diff options
Diffstat (limited to 'epan/dfilter')
35 files changed, 11679 insertions, 0 deletions
diff --git a/epan/dfilter/.editorconfig b/epan/dfilter/.editorconfig new file mode 100644 index 0000000..976affd --- /dev/null +++ b/epan/dfilter/.editorconfig @@ -0,0 +1,19 @@ +# +# Editor configuration +# +# https://editorconfig.org/ +# + +# C +[*.{c,h}] +indent_style = tab +indent_size = tab +tab_width = 8 + +[drange.[ch]] +indent_style = space +indent_size = 4 + +[dfunctions.[ch]] +indent_style = space +indent_size = 4 diff --git a/epan/dfilter/CMakeLists.txt b/epan/dfilter/CMakeLists.txt new file mode 100644 index 0000000..c21129f --- /dev/null +++ b/epan/dfilter/CMakeLists.txt @@ -0,0 +1,119 @@ +# CMakeLists.txt +# +# Wireshark - Network traffic analyzer +# By Gerald Combs <gerald@wireshark.org> +# Copyright 1998 Gerald Combs +# +# SPDX-License-Identifier: GPL-2.0-or-later +# + +set(DFILTER_PUBLIC_HEADERS + dfilter.h + drange.h +) + +set(DFILTER_HEADER_FILES + ${DFILTER_PUBLIC_HEADERS} + dfilter-int.h + dfilter-macro.h + dfilter.h + dfunctions.h + dfvm.h + drange.h + gencode.h + semcheck.h + sttype-field.h + sttype-function.h + sttype-pointer.h + sttype-set.h + sttype-slice.h + sttype-op.h + syntax-tree.h +) + +set(DFILTER_NONGENERATED_FILES + dfilter.c + dfilter-macro.c + dfunctions.c + dfvm.c + drange.c + gencode.c + semcheck.c + sttype-field.c + sttype-function.c + sttype-pointer.c + sttype-set.c + sttype-slice.c + sttype-string.c + sttype-op.c + syntax-tree.c +) +source_group(dfilter FILES ${DFILTER_NONGENERATED_FILES}) + +set(DFILTER_FILES ${DFILTER_NONGENERATED_FILES}) + +add_lex_files(LEX_FILES DFILTER_FILES + scanner.l +) + +add_lemon_files(LEMON_FILES DFILTER_FILES + grammar.lemon +) + +# +# We don't enable -Werror on generated code to make the build a +# little less fragile when configured warnings change. +# +set_source_files_properties( + ${DFILTER_NONGENERATED_FILES} + PROPERTIES + COMPILE_FLAGS "${WERROR_COMMON_FLAGS}" +) + +add_library(dfilter OBJECT + + #Included so that Visual Studio can properly put header files in solution + ${DFILTER_HEADER_FILES} + + ${DFILTER_FILES} +) + +target_include_directories(dfilter + PRIVATE + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/epan + ${CMAKE_SOURCE_DIR}/tools/lemon +) + +set_target_properties(dfilter PROPERTIES + FOLDER "Libs/epan/dfilter" + COMPILE_DEFINITIONS "WS_BUILD_DLL" +) + +install(FILES ${DFILTER_PUBLIC_HEADERS} + DESTINATION "${PROJECT_INSTALL_INCLUDEDIR}/epan/dfilter" + COMPONENT "Development" + EXCLUDE_FROM_ALL +) + +CHECKAPI( + NAME + dfilter + SWITCHES + SOURCES + ${DFILTER_NONGENERATED_FILES} +) + +# +# Editor modelines - https://www.wireshark.org/tools/modelines.html +# +# Local variables: +# c-basic-offset: 8 +# tab-width: 8 +# indent-tabs-mode: t +# End: +# +# vi: set shiftwidth=8 tabstop=8 noexpandtab: +# :indentSize=8:tabSize=8:noTabs=false: +# diff --git a/epan/dfilter/dfilter-int.h b/epan/dfilter/dfilter-int.h new file mode 100644 index 0000000..7f0be87 --- /dev/null +++ b/epan/dfilter/dfilter-int.h @@ -0,0 +1,192 @@ +/** @file + * + * Wireshark - Network traffic analyzer + * By Gerald Combs <gerald@wireshark.org> + * Copyright 2001 Gerald Combs + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef DFILTER_INT_H +#define DFILTER_INT_H + +#include "dfilter.h" +#include "syntax-tree.h" + +#include <epan/proto.h> +#include <stdio.h> + +typedef struct { + const header_field_info *hfinfo; + fvalue_t *value; + int proto_layer_num; +} df_reference_t; + +typedef struct { + GPtrArray *array; +} df_cell_t; + +typedef struct { + GPtrArray *ptr; + unsigned idx; +} df_cell_iter_t; + +/* Passed back to user */ +struct epan_dfilter { + GPtrArray *insns; + unsigned num_registers; + df_cell_t *registers; + int *interesting_fields; + int num_interesting_fields; + GPtrArray *deprecated; + GSList *warnings; + char *expanded_text; + GHashTable *references; + GHashTable *raw_references; + char *syntax_tree_str; + /* Used to pass arguments to functions. List of Lists (list of registers). */ + GSList *function_stack; + GSList *set_stack; +}; + +typedef struct { + df_error_t *error; + /* more fields. */ +} dfstate_t; + +/* + * State for first stage of compilation (parsing). + */ +typedef struct { + df_error_t *error; /* Must be first struct field. */ + unsigned flags; + stnode_t *st_root; + GPtrArray *deprecated; + stnode_t *lval; + GString *quoted_string; + bool raw_string; + df_loc_t string_loc; + df_loc_t location; +} dfsyntax_t; + +/* + * State for second stage of compilation (semantic check and code generation). + */ +typedef struct { + df_error_t *error; /* Must be first struct field. */ + unsigned flags; + stnode_t *st_root; + unsigned field_count; + GPtrArray *insns; + GHashTable *loaded_fields; + GHashTable *loaded_raw_fields; + GHashTable *interesting_fields; + int next_insn_id; + int next_register; + GPtrArray *deprecated; + GHashTable *references; /* hfinfo -> pointer to array of references */ + GHashTable *raw_references; /* hfinfo -> pointer to array of references */ + char *expanded_text; + wmem_allocator_t *dfw_scope; /* Because we use exceptions for error handling sometimes + cleaning up memory allocations is inconvenient. Memory + allocated from this pool will be freed when the dfwork_t + context is destroyed. */ + GSList *warnings; +} dfwork_t; + +/* Constructor/Destructor prototypes for Lemon Parser */ +void *DfilterAlloc(void *(*)(size_t)); + +void DfilterFree(void *, void (*)(void *)); + +void Dfilter(void *, int, stnode_t *, dfsyntax_t *); + +/* Return value for error in scanner. */ +#define SCAN_FAILED -1 /* not 0, as that means end-of-input */ + +void +dfilter_vfail(void *state, int code, df_loc_t err_loc, + const char *format, va_list args); + +void +dfilter_fail(void *state, int code, df_loc_t err_loc, + const char *format, ...) G_GNUC_PRINTF(4, 5); + +WS_NORETURN +void +dfilter_fail_throw(void *state, int code, df_loc_t err_loc, + const char *format, ...) G_GNUC_PRINTF(4, 5); + +void +dfw_set_error_location(dfwork_t *dfw, df_loc_t err_loc); + +void +add_deprecated_token(dfsyntax_t *dfs, const char *token); + +void +add_compile_warning(dfwork_t *dfw, const char *format, ...); + +void +free_deprecated(GPtrArray *deprecated); + +void +DfilterTrace(FILE *TraceFILE, char *zTracePrompt); + +header_field_info * +dfilter_resolve_unparsed(dfsyntax_t *dfs, const char *name); + +WS_RETNONNULL fvalue_t* +dfilter_fvalue_from_literal(dfwork_t *dfw, ftenum_t ftype, stnode_t *st, + bool allow_partial_value, header_field_info *hfinfo_value_string); + +WS_RETNONNULL fvalue_t * +dfilter_fvalue_from_string(dfwork_t *dfw, ftenum_t ftype, stnode_t *st, + header_field_info *hfinfo_value_string); + +WS_RETNONNULL fvalue_t * +dfilter_fvalue_from_charconst(dfwork_t *dfw, ftenum_t ftype, stnode_t *st); + +const char *tokenstr(int token); + +df_reference_t * +reference_new(const field_info *finfo, bool raw); + +void +reference_free(df_reference_t *ref); + +void +df_cell_append(df_cell_t *rp, fvalue_t *fv); + +GPtrArray * +df_cell_ref(df_cell_t *rp); + +#define df_cell_ptr(rp) ((rp)->array) + +size_t +df_cell_size(const df_cell_t *rp); + +fvalue_t ** +df_cell_array(const df_cell_t *rp); + +bool +df_cell_is_empty(const df_cell_t *rp); + +bool +df_cell_is_null(const df_cell_t *rp); + +/* Pass true to free the array contents when the cell is cleared. */ +void +df_cell_init(df_cell_t *rp, bool free_seg); + +void +df_cell_clear(df_cell_t *rp); + +/* Cell must not be cleared while iter is alive. */ +void +df_cell_iter_init(df_cell_t *rp, df_cell_iter_t *iter); + +fvalue_t * +df_cell_iter_next(df_cell_iter_t *iter); + + +#endif diff --git a/epan/dfilter/dfilter-loc.h b/epan/dfilter/dfilter-loc.h new file mode 100644 index 0000000..adf663e --- /dev/null +++ b/epan/dfilter/dfilter-loc.h @@ -0,0 +1,25 @@ + +/** @file + * + * Wireshark - Network traffic analyzer + * By Gerald Combs <gerald@wireshark.org> + * Copyright 2001 Gerald Combs + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef DFILTER_LOC_H +#define DFILTER_LOC_H + +#include <stddef.h> + +typedef struct _dfilter_loc { + long col_start; + size_t col_len; +} df_loc_t; + +extern df_loc_t loc_empty; + +#define DFILTER_LOC_EMPTY loc_empty + +#endif diff --git a/epan/dfilter/dfilter-macro.c b/epan/dfilter/dfilter-macro.c new file mode 100644 index 0000000..1e479be --- /dev/null +++ b/epan/dfilter/dfilter-macro.c @@ -0,0 +1,682 @@ +/* dfilter-macro.c + * + * Wireshark - Network traffic analyzer + * By Gerald Combs <gerald@wireshark.org> + * Copyright 2001 Gerald Combs + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + + +#include "config.h" +#define WS_LOG_DOMAIN LOG_DOMAIN_DFILTER +#include "dfilter-macro.h" + +#ifdef DUMP_DFILTER_MACRO +#include <stdio.h> +#endif +#include <string.h> + +#include "dfilter-int.h" +#include <ftypes/ftypes.h> +#include <epan/uat-int.h> +#include <epan/proto.h> +#include <wsutil/glib-compat.h> + + +static uat_t* dfilter_macro_uat = NULL; +static dfilter_macro_t* macros = NULL; +static unsigned num_macros; + +/* #define DUMP_DFILTER_MACRO */ +#ifdef DUMP_DFILTER_MACRO +void dump_dfilter_macro_t(const dfilter_macro_t *m, const char *function, const char *file, int line); +#define DUMP_MACRO(m) dump_dfilter_macro_t(m, G_STRFUNC, __FILE__, __LINE__) +#else +#define DUMP_MACRO(m) +#endif + +static char* dfilter_macro_resolve(char* name, char** args, df_error_t** error) { + GString* text; + int argc = 0; + dfilter_macro_t* m = NULL; + int* arg_pos_p; + char** parts; + char* ret; + unsigned i; + + for (i = 0; i < num_macros; i++) { + dfilter_macro_t* c = &(macros[i]); + if ( c->usable && g_str_equal(c->name,name) ) { + m = c; + break; + } + } + + if (!m) { + if (error != NULL) + *error = df_error_new_printf(DF_ERROR_GENERIC, NULL, "macro '%s' does not exist", name); + return NULL; + } + + DUMP_MACRO(m); + + if (args) { + while(args[argc]) argc++; + } + + if (argc != m->argc) { + if (error != NULL) { + *error = df_error_new_printf(DF_ERROR_GENERIC, NULL, + "wrong number of arguments for macro '%s', expecting %d instead of %d", + name, m->argc, argc); + } + return NULL; + } + + arg_pos_p = m->args_pos; + parts = m->parts; + + text = g_string_new(*(parts++)); + + if (args) { + while (*parts) { + g_string_append_printf(text,"%s%s", + args[*(arg_pos_p++)], + *(parts++)); + } + } + + ret = wmem_strdup(NULL, text->str); + + g_string_free(text,true); + + return ret; +} + +/* Start points to the first character after "${" */ +static bool start_is_field_reference(const char *start) +{ + const char *end; + char saved_c; + const header_field_info *hfinfo; + + end = strchr(start, '#'); + if (end == NULL) + end = strchr(start, '}'); + if (end == NULL) + return false; + + saved_c = *end; + /* This violates constness but we will restore the original string. */ + *(char *)end = '\0'; + /* Search for name in registered fields. */ + + if (start[0] == '@') + start++; + + hfinfo = dfilter_resolve_unparsed(NULL, start); + /* Restore mangled string. */ + *(char *)end = saved_c; + + if (hfinfo == NULL) + return false; + + if (hfinfo->type == FT_PROTOCOL || hfinfo->type == FT_NONE) { + /* Ignore these? */ + return false; + } + + /* It's a field reference so ignore it as a macro. */ + ws_noisy("Ignore field reference ${%s}", start); + return true; +} + +static char* dfilter_macro_apply_recurse(const char* text, unsigned depth, df_error_t** error) { + enum { OUTSIDE, STARTING, NAME, ARGS } state = OUTSIDE; + GString* out; + GString* name = NULL; + GString* arg = NULL; + GPtrArray* args = NULL; + char c; + const char* r = text; + bool changed = false; + + if ( depth > 31) { + if (error != NULL) + *error = df_error_new_msg("too much nesting in macros"); + return NULL; + } + +#define FGS(n) if (n) g_string_free(n,true); n = NULL + +#define FREE_ALL() \ + do { \ + FGS(name); \ + FGS(arg); \ + if (args) { \ + while(args->len) { void* p = g_ptr_array_remove_index_fast(args,0); g_free(p); } \ + g_ptr_array_free(args,true); \ + args = NULL; \ + } \ + } while(0) + + if (error != NULL) + *error = NULL; + out = g_string_sized_new(64); + + while(1) { + c = *r++; + + switch(state) { + case OUTSIDE: { + switch(c) { + case '\0': { + goto finish; + } case '$': { + state = STARTING; + break; + } default: { + g_string_append_c(out,c); + break; + } + } + break; + } case STARTING: { + switch (c) { + case '{': { + if (start_is_field_reference(r)) { + /* We have a field reference, preserve the name with ${} and bail. */ + g_string_append(out,"${"); + state = OUTSIDE; + break; + } + + /* We have a macro, continue. */ + args = g_ptr_array_new(); + arg = g_string_sized_new(32); + name = g_string_sized_new(32); + + state = NAME; + + break; + } case '\0': { + g_string_append_c(out,'$'); + + goto finish; + } default: { + g_string_append_c(out,'$'); + g_string_append_c(out,c); + + state = OUTSIDE; + + break; + } + } + break; + } case NAME: { + if ( g_ascii_isalnum(c) || c == '_' || c == '-' || c == '.' ) { + g_string_append_c(name,c); + } else if ( c == ':') { + state = ARGS; + } else if ( c == '}') { + char* resolved; + + g_ptr_array_add(args,NULL); + + resolved = dfilter_macro_resolve(name->str, (char**)args->pdata, error); + if (resolved == NULL) + goto on_error; + + changed = true; + + g_string_append(out,resolved); + wmem_free(NULL, resolved); + + FREE_ALL(); + + state = OUTSIDE; + } else if ( c == '\0') { + if (error != NULL) + *error = df_error_new_msg("end of filter in the middle of a macro expression"); + goto on_error; + } else { + if (error != NULL) + *error = df_error_new_msg("invalid character in macro name"); + goto on_error; + } + break; + } case ARGS: { + switch(c) { + case '\0': { + if (error != NULL) + *error = df_error_new_msg("end of filter in the middle of a macro expression"); + goto on_error; + } case ';': { + g_ptr_array_add(args,g_string_free(arg,false)); + + arg = g_string_sized_new(32); + break; + } case '\\': { + c = *r++; + if (c) { + g_string_append_c(arg,c); + break; + } else { + if (error != NULL) + *error = df_error_new_msg("end of filter in the middle of a macro expression"); + goto on_error; + } + } default: { + g_string_append_c(arg,c); + break; + } case '}': { + char* resolved; + g_ptr_array_add(args,g_string_free(arg,false)); + g_ptr_array_add(args,NULL); + + arg = NULL; + + resolved = dfilter_macro_resolve(name->str, (char**)args->pdata, error); + if (resolved == NULL) + goto on_error; + + changed = true; + + g_string_append(out,resolved); + wmem_free(NULL, resolved); + + FREE_ALL(); + + state = OUTSIDE; + break; + } + } + break; + } + } + } + +finish: + { + FREE_ALL(); + + if (changed) { + char* resolved = dfilter_macro_apply_recurse(out->str, depth + 1, error); + g_string_free(out,true); + return resolved; + } else { + char* out_str = wmem_strdup(NULL, out->str); + g_string_free(out,true); + return out_str; + } + } +on_error: + { + FREE_ALL(); + if (error != NULL) { + if (*error == NULL) + *error = df_error_new_msg("unknown error in macro expression"); + } + g_string_free(out,true); + return NULL; + } +} + +char* dfilter_macro_apply(const char* text, df_error_t** error) { + return dfilter_macro_apply_recurse(text, 0, error); +} + +static bool macro_update(void* mp, gchar** error) { + dfilter_macro_t* m = (dfilter_macro_t*)mp; + GPtrArray* parts; + GArray* args_pos; + const char* r; + char* w; + char* part; + int argc = 0; + + DUMP_MACRO(m); + + *error = NULL; + + /* Invalidate the display filter in case it's in use */ + if (dfilter_macro_uat && dfilter_macro_uat->post_update_cb) + dfilter_macro_uat->post_update_cb(); + + parts = g_ptr_array_new(); + args_pos = g_array_new(false,false,sizeof(int)); + + m->priv = part = w = g_strdup(m->text); + r = m->text; + g_ptr_array_add(parts,part); + + while (r && *r) { + + switch (*r) { + default: + *(w++) = *(r++); + break; + case '\0': + *w = *r; + goto done; + case '\\': + *(w++) = *(r++); + if(*r) + *(w++) = *(r++); + break; + case '$': { + int cnt = 0; + int arg_pos = 0; + do { + char c = *(r+1); + if (c >= '0' && c <= '9') { + cnt++; + r++; + *(w++) = '\0'; + arg_pos *= 10; + arg_pos += c - '0'; + } else { + break; + } + } while(*r); + + if (cnt) { + *(w++) = '\0'; + r++; + argc = argc < arg_pos ? arg_pos : argc; + arg_pos--; + g_array_append_val(args_pos,arg_pos); + g_ptr_array_add(parts,w); + } else { + *(w++) = *(r++); + } + break; + } + } + + } + +done: + g_ptr_array_add(parts,NULL); + + g_free(m->parts); + m->parts = (char **)g_ptr_array_free(parts, false); + + g_free(m->args_pos); + m->args_pos = (int*)(void *)g_array_free(args_pos, false); + + m->argc = argc; + + m->usable = true; + + DUMP_MACRO(m); + + return true; +} + +static void macro_free(void* r) { + dfilter_macro_t* m = (dfilter_macro_t*)r; + + DUMP_MACRO(r); + + g_free(m->name); + g_free(m->text); + g_free(m->priv); + g_free(m->parts); + g_free(m->args_pos); +} + +static void* macro_copy(void* dest, const void* orig, size_t len _U_) { + dfilter_macro_t* d = (dfilter_macro_t*)dest; + const dfilter_macro_t* m = (const dfilter_macro_t*)orig; + + DUMP_MACRO(m); + + d->name = g_strdup(m->name); + d->text = g_strdup(m->text); + d->usable = m->usable; + + if (m->parts) { + unsigned nparts = 0; + + /* + * Copy the contents of m->priv (a "cooked" version + * of m->text) into d->priv. + * + * First we clone m->text into d->priv, this gets + * us a NUL terminated string of the proper length. + * + * Then we loop copying bytes from m->priv into + * d-priv. Since m->priv contains internal ACSII NULs + * we use the length of m->text to stop the copy. + */ + + d->priv = g_strdup(m->text); + { + const char* oldText = m->text; + const char* oldPriv = (const char*)m->priv; + char* newPriv = (char*)d->priv; + while(oldText && *oldText) { + *(newPriv++) = *(oldPriv++); + oldText++; + } + } + + /* + * The contents of the m->parts array contains pointers + * into various sections of m->priv. Since it's + * an argv style array of ponters, this array is + * actually one larger than the number of parts + * to hold the final NULL terminator. + * + * The following copy clones the original m->parts + * array into d->parts but then fixes-up the pointers + * so that they point into the appropriate sections + * of the d->priv. + */ + + do nparts++; while (m->parts[nparts]); + d->parts = (char **)g_memdup2(m->parts,(nparts+1)*(unsigned)sizeof(void*)); + nparts = 0; + while(m->parts[nparts]) { + if(nparts) { + d->parts[nparts] = d->parts[nparts - 1] + (m->parts[nparts] - m->parts[nparts - 1]); + } else { + d->parts[nparts] = (char *)d->priv; + } + nparts++; + } + + /* + * Clone the contents of m->args_pos into d->args_pos. + */ + + d->args_pos = (int *)g_memdup2(m->args_pos,(--nparts)*(unsigned)sizeof(int)); + } + + DUMP_MACRO(d); + + return d; +} + +static bool macro_name_chk(void *mp, const char *in_name, unsigned name_len, + const void *u1 _U_, const void *u2 _U_, char **error) { + dfilter_macro_t* m = (dfilter_macro_t*)mp; + unsigned i; + + if (name_len == 0) { + *error = g_strdup("invalid name"); + return false; + } + + for (i=0; i < name_len; i++) { + if (!(in_name[i] == '_' || g_ascii_isalnum(in_name[i]) ) ) { + *error = g_strdup("invalid char in name"); + return false; + } + } + + /* When loading (!m->name) or when adding/changing the an item with a + * different name, check for uniqueness. NOTE: if a duplicate already + * exists (because the user manually edited the file), then this will + * not trigger a warning. */ + if (!m->name || g_strcmp0(m->name, in_name)) { + for (i = 0; i < num_macros; i++) { + /* This a string field which is always NUL-terminated, + * so no need to check name_len. */ + if (!g_strcmp0(in_name, macros[i].name)) { + *error = ws_strdup_printf("macro '%s' already exists", + in_name); + return false; + } + } + } + + return true; +} + +UAT_CSTRING_CB_DEF(macro,name,dfilter_macro_t) +UAT_CSTRING_CB_DEF(macro,text,dfilter_macro_t) + +void dfilter_macro_init(void) { + static uat_field_t uat_fields[] = { + UAT_FLD_CSTRING_OTHER(macro,name,"Name",macro_name_chk,"The name of the macro."), + /* N.B. it would be nice if there was a field type for display filters (with + auto-completion & colouring), but this wouldn't work here as the filter string + will contain $1, etc... */ + UAT_FLD_CSTRING_ISPRINT(macro,text,"Text","The text this macro resolves to."), + UAT_END_FIELDS + }; + + dfilter_macro_uat = uat_new("Display Filter Macros", + sizeof(dfilter_macro_t), + DFILTER_MACRO_FILENAME, + true, + ¯os, + &num_macros, + UAT_AFFECTS_FIELDS, + "ChDisplayFilterMacrosSection", + macro_copy, + macro_update, + macro_free, + NULL, /* Note: This is set in macros_init () */ + NULL, + uat_fields); +} + +void dfilter_macro_get_uat(uat_t **dfmu_ptr_ptr) { + *dfmu_ptr_ptr = dfilter_macro_uat; +} + +#ifdef DUMP_DFILTER_MACRO +/* + * The dfilter_macro_t has several characteristics that are + * not immediately obvious. The dump_dfilter_filter_macro_t() + * function can be used to help "visualize" the contents of + * a dfilter_macro_t. + * + * Some non-obvious components of this struct include: + * + * m->parts is an argv style array of pointers into the + * m->priv string. + * + * The last pointer of an m->parts array should contain + * NULL to indicate the end of the parts pointer array. + * + * m->priv is a "cooked" copy of the m->text string. + * Any variable substitution indicators within m->text + * ("$1", "$2", ...) will have been replaced with ASCII + * NUL characters within m->priv. + * + * The first element of m->parts array (m-parts[0]) will + * usually have the same pointer value as m->priv (unless + * the dfilter-macro starts off with a variable + * substitution indicator (e.g. "$1"). + */ + +void dump_dfilter_macro_t(const dfilter_macro_t *m, const char *function, const char *file, int line) +{ + printf("\n<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n"); + + if(m == NULL) { + printf(" dfilter_macro_t * == NULL! (via: %s(): %s:%d)\n", function, file, line); + printf("\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"); + return; + } + + printf("DUMP of dfilter_macro_t: %p (via: %s(): %s:%d)\n", m, function, file, line); + + printf(" &dfilter_macro->name == %p\n", &m->name); + if(m->name == NULL) { + printf(" ->name == NULL\n"); + } else { + printf(" ->name == %p\n", m->name); + printf(" ->name == <%s>\n", m->name); + } + + printf(" &dfilter_macro->text == %p\n", &m->text); + if(m->text == NULL) { + printf(" ->text == NULL\n"); + } else { + printf(" ->text == %p\n", m->text); + printf(" ->text == <%s>\n", m->text); + } + + printf(" &dfilter_macro->usable == %p\n", &m->usable); + printf(" ->usable == %u\n", m->usable); + + printf(" &dfilter_macro->parts == %p\n", &m->parts); + + if(m->parts == NULL) { + printf(" ->parts == NULL\n"); + } else { + int i = 0; + + while (m->parts[i]) { + printf(" ->parts[%d] == %p\n", i, m->parts[i]); + printf(" ->parts[%d] == <%s>\n", i, m->parts[i]); + i++; + } + printf(" ->parts[%d] == NULL\n", i); + } + + printf(" &dfilter_macro->args_pos == %p\n", &m->args_pos); + if(m->args_pos == NULL) { + printf(" ->args_pos == NULL\n"); + } else { + printf(" ->args_pos == %p\n", m->args_pos); + /*printf(" ->args_pos == <%?>\n", m->args_pos);*/ + } + + printf(" &dfilter_macro->argc == %p\n", &m->argc); + printf(" ->argc == %d\n", m->argc); + + printf(" &dfilter_macro->priv == %p\n", &m->priv); + if(m->priv == NULL) { + printf(" ->priv == NULL\n"); + } else { + printf(" ->priv == %p\n", m->priv); + printf(" ->priv == <%s>\n", (char *)m->priv); + } + + printf("\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"); +} +#endif + +void dfilter_macro_cleanup(void) +{ +} + +/* + * Editor modelines - https://www.wireshark.org/tools/modelines.html + * + * Local variables: + * c-basic-offset: 8 + * tab-width: 8 + * indent-tabs-mode: t + * End: + * + * vi: set shiftwidth=8 tabstop=8 noexpandtab: + * :indentSize=8:tabSize=8:noTabs=false: + */ diff --git a/epan/dfilter/dfilter-macro.h b/epan/dfilter/dfilter-macro.h new file mode 100644 index 0000000..477bb40 --- /dev/null +++ b/epan/dfilter/dfilter-macro.h @@ -0,0 +1,48 @@ +/** @file + * + * Wireshark - Network traffic analyzer + * By Gerald Combs <gerald@wireshark.org> + * Copyright 2001 Gerald Combs + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef _DFILTER_MACRO_H +#define _DFILTER_MACRO_H + +#include <wireshark.h> +#include "dfilter.h" + +#define DFILTER_MACRO_FILENAME "dfilter_macros" + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +typedef struct _dfilter_macro_t { + char* name; /* the macro id */ + char* text; /* raw data from file */ + bool usable; /* macro is usable */ + char** parts; /* various segments of text between insertion targets */ + int* args_pos; /* what's to be inserted */ + int argc; /* the expected number of arguments */ + void* priv; /* a copy of text that contains every c-string in parts */ +} dfilter_macro_t; + +/* applies all macros to the given text and returns the resulting string or NULL on failure */ +char* dfilter_macro_apply(const char* text, df_error_t** error); + +void dfilter_macro_init(void); + +struct epan_uat; + +WS_DLL_PUBLIC +void dfilter_macro_get_uat(struct epan_uat **dfmu_ptr_ptr); + +void dfilter_macro_cleanup(void); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* _DFILTER_MACRO_H */ diff --git a/epan/dfilter/dfilter.c b/epan/dfilter/dfilter.c new file mode 100644 index 0000000..73646b7 --- /dev/null +++ b/epan/dfilter/dfilter.c @@ -0,0 +1,1009 @@ +/* + * Wireshark - Network traffic analyzer + * By Gerald Combs <gerald@wireshark.org> + * Copyright 2001 Gerald Combs + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "config.h" +#define WS_LOG_DOMAIN LOG_DOMAIN_DFILTER + +#include <stdio.h> +#include <string.h> + +#include "dfilter-int.h" +#include "syntax-tree.h" +#include "gencode.h" +#include "semcheck.h" +#include "dfvm.h" +#include <epan/epan_dissect.h> +#include <epan/exceptions.h> +#include "dfilter.h" +#include "dfilter-macro.h" +#include "scanner_lex.h" +#include <wsutil/wslog.h> +#include <wsutil/ws_assert.h> +#include "grammar.h" + + +#define DFILTER_TOKEN_ID_OFFSET 1 + +/* Holds the singular instance of our Lemon parser object */ +static void* ParserObj = NULL; + +df_loc_t loc_empty = {-1, 0}; + +void +dfilter_vfail(void *state, int code, df_loc_t loc, + const char *format, va_list args) +{ + df_error_t **ptr = &((dfstate_t *)state)->error; + /* If we've already reported one error, don't overwite it */ + if (*ptr != NULL) + return; + + *ptr = df_error_new_vprintf(code, &loc, format, args); +} + +void +dfilter_fail(void *state, int code, df_loc_t loc, + const char *format, ...) +{ + va_list args; + + va_start(args, format); + dfilter_vfail(state, code, loc, format, args); + va_end(args); +} + +void +dfilter_fail_throw(void *state, int code, df_loc_t loc, const char *format, ...) +{ + va_list args; + + va_start(args, format); + dfilter_vfail(state, code, loc, format, args); + va_end(args); + THROW(TypeError); +} + +void +dfw_set_error_location(dfwork_t *dfw, df_loc_t loc) +{ + ws_assert(dfw->error); + dfw->error->loc = loc; +} + +header_field_info * +dfilter_resolve_unparsed(dfsyntax_t *dfs, const char *name) +{ + header_field_info *hfinfo; + + hfinfo = proto_registrar_get_byname(name); + if (hfinfo != NULL) { + /* It's a field name */ + return hfinfo; + } + + hfinfo = proto_registrar_get_byalias(name); + if (hfinfo != NULL) { + /* It's an aliased field name */ + if (dfs) + add_deprecated_token(dfs, name); + return hfinfo; + } + + /* It's not a field. */ + return NULL; +} + +/* Initialize the dfilter module */ +void +dfilter_init(void) +{ + if (ParserObj) { + ws_message("I expected ParserObj to be NULL\n"); + /* Free the Lemon Parser object */ + DfilterFree(ParserObj, g_free); + } + /* Allocate an instance of our Lemon-based parser */ + ParserObj = DfilterAlloc(g_malloc); + + /* Initialize the syntax-tree sub-sub-system */ + sttype_init(); + + dfilter_macro_init(); +} + +/* Clean-up the dfilter module */ +void +dfilter_cleanup(void) +{ + dfilter_macro_cleanup(); + + /* Free the Lemon Parser object */ + if (ParserObj) { + DfilterFree(ParserObj, g_free); + } + + /* Clean up the syntax-tree sub-sub-system */ + sttype_cleanup(); +} + +static dfilter_t* +dfilter_new(GPtrArray *deprecated) +{ + dfilter_t *df; + + df = g_new0(dfilter_t, 1); + df->insns = NULL; + df->function_stack = NULL; + df->set_stack = NULL; + df->warnings = NULL; + if (deprecated) + df->deprecated = g_ptr_array_ref(deprecated); + return df; +} + +/* Given a GPtrArray of instructions (dfvm_insn_t), + * free them. */ +static void +free_insns(GPtrArray *insns) +{ + unsigned int i; + dfvm_insn_t *insn; + + for (i = 0; i < insns->len; i++) { + insn = (dfvm_insn_t *)g_ptr_array_index(insns, i); + dfvm_insn_free(insn); + } + g_ptr_array_free(insns, true); +} + +void +dfilter_free(dfilter_t *df) +{ + if (!df) + return; + + if (df->insns) { + free_insns(df->insns); + } + + g_free(df->interesting_fields); + + g_hash_table_destroy(df->references); + g_hash_table_destroy(df->raw_references); + + if (df->deprecated) + g_ptr_array_unref(df->deprecated); + + if (df->function_stack != NULL) { + ws_critical("Function stack list should be NULL"); + g_slist_free(df->function_stack); + } + + if (df->set_stack != NULL) { + ws_critical("Set stack list should be NULL"); + g_slist_free(df->set_stack); + } + + if (df->warnings) + g_slist_free_full(df->warnings, g_free); + + g_free(df->registers); + g_free(df->expanded_text); + g_free(df->syntax_tree_str); + g_free(df); +} + +static void free_refs_array(void *data) +{ + /* Array data must be freed. */ + (void)g_ptr_array_free(data, true); +} + +static dfsyntax_t* +dfsyntax_new(unsigned flags) +{ + dfsyntax_t *dfs = g_new0(dfsyntax_t, 1); + dfs->flags = flags; + return dfs; +} + +static void +dfsyntax_free(dfsyntax_t *dfs) +{ + if (dfs->error) + df_error_free(&dfs->error); + + if (dfs->st_root) + stnode_free(dfs->st_root); + + if (dfs->deprecated) + g_ptr_array_unref(dfs->deprecated); + + if (dfs->lval) + stnode_free(dfs->lval); + + if (dfs->quoted_string) + g_string_free(dfs->quoted_string, true); + + + + g_free(dfs); +} + +static dfwork_t* +dfwork_new(const char *expanded_text, unsigned flags) +{ + dfwork_t *dfw = g_new0(dfwork_t, 1); + dfw->expanded_text = g_strdup(expanded_text); + dfw->flags = flags; + + dfw->references = + g_hash_table_new_full(g_direct_hash, g_direct_equal, + NULL, (GDestroyNotify)free_refs_array); + + dfw->raw_references = + g_hash_table_new_full(g_direct_hash, g_direct_equal, + NULL, (GDestroyNotify)free_refs_array); + + dfw->dfw_scope = wmem_allocator_new(WMEM_ALLOCATOR_SIMPLE); + + return dfw; +} + +static void +dfwork_free(dfwork_t *dfw) +{ + if (dfw->st_root) { + stnode_free(dfw->st_root); + } + + if (dfw->loaded_fields) { + g_hash_table_destroy(dfw->loaded_fields); + } + + if (dfw->loaded_raw_fields) { + g_hash_table_destroy(dfw->loaded_raw_fields); + } + + if (dfw->interesting_fields) { + g_hash_table_destroy(dfw->interesting_fields); + } + + if (dfw->references) { + g_hash_table_destroy(dfw->references); + } + + if (dfw->raw_references) { + g_hash_table_destroy(dfw->raw_references); + } + + if (dfw->insns) { + free_insns(dfw->insns); + } + + if (dfw->deprecated) + g_ptr_array_unref(dfw->deprecated); + + if (dfw->warnings) + g_slist_free_full(dfw->warnings, g_free); + + g_free(dfw->expanded_text); + + if (dfw->error) + df_error_free(&dfw->error); + + wmem_destroy_allocator(dfw->dfw_scope); + + /* + * We don't free the error message string; our caller will return + * it to its caller. + */ + g_free(dfw); +} + +const char *tokenstr(int token) +{ + switch (token) { + case TOKEN_TEST_AND: return "TEST_AND"; + case TOKEN_TEST_OR: return "TEST_OR"; + case TOKEN_TEST_XOR: return "TEST_XOR"; + case TOKEN_TEST_ALL_EQ: return "TEST_ALL_EQ"; + case TOKEN_TEST_ANY_EQ: return "TEST_ANY_EQ"; + case TOKEN_TEST_ALL_NE: return "TEST_ALL_NE"; + case TOKEN_TEST_ANY_NE: return "TEST_ANY_NE"; + case TOKEN_TEST_LT: return "TEST_LT"; + case TOKEN_TEST_LE: return "TEST_LE"; + case TOKEN_TEST_GT: return "TEST_GT"; + case TOKEN_TEST_GE: return "TEST_GE"; + case TOKEN_TEST_CONTAINS: return "TEST_CONTAINS"; + case TOKEN_TEST_MATCHES: return "TEST_MATCHES"; + case TOKEN_BITWISE_AND: return "BITWISE_AND"; + case TOKEN_PLUS: return "PLUS"; + case TOKEN_MINUS: return "MINUS"; + case TOKEN_STAR: return "STAR"; + case TOKEN_RSLASH: return "RSLASH"; + case TOKEN_PERCENT: return "PERCENT"; + case TOKEN_TEST_NOT: return "TEST_NOT"; + case TOKEN_STRING: return "STRING"; + case TOKEN_CHARCONST: return "CHARCONST"; + case TOKEN_IDENTIFIER: return "IDENTIFIER"; + case TOKEN_CONSTANT: return "CONSTANT"; + case TOKEN_LITERAL: return "LITERAL"; + case TOKEN_FIELD: return "FIELD"; + case TOKEN_LBRACKET: return "LBRACKET"; + case TOKEN_RBRACKET: return "RBRACKET"; + case TOKEN_COMMA: return "COMMA"; + case TOKEN_RANGE_NODE: return "RANGE_NODE"; + case TOKEN_TEST_IN: return "TEST_IN"; + case TOKEN_LBRACE: return "LBRACE"; + case TOKEN_RBRACE: return "RBRACE"; + case TOKEN_DOTDOT: return "DOTDOT"; + case TOKEN_LPAREN: return "LPAREN"; + case TOKEN_RPAREN: return "RPAREN"; + case TOKEN_DOLLAR: return "DOLLAR"; + case TOKEN_ATSIGN: return "ATSIGN"; + case TOKEN_HASH: return "HASH"; + } + return "<unknown>"; +} + +void +add_deprecated_token(dfsyntax_t *dfs, const char *token) +{ + if (dfs->deprecated == NULL) + dfs->deprecated = g_ptr_array_new_full(0, g_free); + + GPtrArray *deprecated = dfs->deprecated; + + for (unsigned i = 0; i < deprecated->len; i++) { + const char *str = g_ptr_array_index(deprecated, i); + if (g_ascii_strcasecmp(token, str) == 0) { + /* It's already in our list */ + return; + } + } + g_ptr_array_add(deprecated, g_strdup(token)); +} + +void +add_compile_warning(dfwork_t *dfw, const char *format, ...) +{ + va_list ap; + va_start(ap, format); + char *msg = ws_strdup_vprintf(format, ap); + va_end(ap); + dfw->warnings = g_slist_prepend(dfw->warnings, msg); +} + +char * +dfilter_expand(const char *expr, df_error_t **err_ret) +{ + return dfilter_macro_apply(expr, err_ret); +} + +static bool +dfwork_parse(const char *expanded_text, dfsyntax_t *dfs) +{ + yyscan_t scanner; + YY_BUFFER_STATE in_buffer; + unsigned token_count = 0; + int token; + + if (df_yylex_init(&scanner) != 0) { + dfs->error = df_error_new_printf(DF_ERROR_GENERIC, NULL, "Can't initialize scanner: %s", g_strerror(errno)); + return false; + } + + in_buffer = df_yy_scan_string(expanded_text, scanner); + df_yyset_extra(dfs, scanner); + +#ifdef NDEBUG + if (dfs->flags & DF_DEBUG_FLEX || dfs->flags & DF_DEBUG_LEMON) { + ws_message("Compile Wireshark without NDEBUG to enable Flex and/or Lemon debug traces"); + } +#else + /* Enable/disable debugging for Flex. */ + df_yyset_debug(dfs->flags & DF_DEBUG_FLEX, scanner); + + /* Enable/disable debugging for Lemon. */ + DfilterTrace(dfs->flags & DF_DEBUG_LEMON ? stderr : NULL, "lemon> "); +#endif + + while (1) { + token = df_yylex(scanner); + + /* Check for scanner failure */ + if (token == SCAN_FAILED) { + ws_noisy("Scanning failed"); + ws_assert(dfs->error != NULL); + break; + } + + /* Check for end-of-input */ + if (token == 0) { + ws_noisy("Scanning finished"); + break; + } + + ws_noisy("(%u) Token %d %s %s", + ++token_count, token, tokenstr(token), + stnode_token(dfs->lval)); + + /* Give the token to the parser */ + Dfilter(ParserObj, token, dfs->lval, dfs); + /* The parser has freed the lval for us. */ + dfs->lval = NULL; + + if (dfs->error) { + break; + } + + } /* while (1) */ + + /* Tell the parser that we have reached the end of input; that + * way, it'll reset its state for the next compile. (We want + * to do that even if we got a syntax error, to make sure the + * parser state is cleaned up; we don't create a new parser + * object when we start a new parse, and don't destroy it when + * the parse finishes.) */ + Dfilter(ParserObj, 0, NULL, dfs); + + /* Free scanner state */ + df_yy_delete_buffer(in_buffer, scanner); + df_yylex_destroy(scanner); + + return dfs->error == NULL; +} + +static dfilter_t * +dfwork_build(dfwork_t *dfw) +{ + dfilter_t *dfilter; + char *tree_str; + + log_syntax_tree(LOG_LEVEL_NOISY, dfw->st_root, "Syntax tree before semantic check", NULL); + + /* Check semantics and do necessary type conversion*/ + if (!dfw_semcheck(dfw)) + return NULL; + + /* Cache tree representation in tree_str. */ + tree_str = NULL; + log_syntax_tree(LOG_LEVEL_NOISY, dfw->st_root, "Syntax tree after successful semantic check", &tree_str); + + if ((dfw->flags & DF_SAVE_TREE) && tree_str == NULL) { + tree_str = dump_syntax_tree_str(dfw->st_root); + } + + /* Create bytecode */ + dfw_gencode(dfw); + + /* Tuck away the bytecode in the dfilter_t */ + dfilter = dfilter_new(dfw->deprecated); + dfilter->insns = dfw->insns; + dfw->insns = NULL; + dfilter->interesting_fields = dfw_interesting_fields(dfw, + &dfilter->num_interesting_fields); + dfilter->expanded_text = dfw->expanded_text; + dfw->expanded_text = NULL; + dfilter->references = dfw->references; + dfw->references = NULL; + dfilter->raw_references = dfw->raw_references; + dfw->raw_references = NULL; + dfilter->warnings = dfw->warnings; + dfw->warnings = NULL; + + if (dfw->flags & DF_SAVE_TREE) { + ws_assert(tree_str); + dfilter->syntax_tree_str = tree_str; + tree_str = NULL; + } + else { + dfilter->syntax_tree_str = NULL; + g_free(tree_str); + tree_str = NULL; + } + + /* Initialize run-time space */ + dfilter->num_registers = dfw->next_register; + dfilter->registers = g_new0(df_cell_t, dfilter->num_registers); + + return dfilter; +} + +static dfilter_t * +compile_filter(const char *expanded_text, unsigned flags, df_error_t **err_ptr) +{ + dfsyntax_t *dfs = NULL; + dfwork_t *dfw = NULL; + dfilter_t *dfcode = NULL; + df_error_t *error = NULL; + bool ok; + + dfs = dfsyntax_new(flags); + + ok = dfwork_parse(expanded_text, dfs); + if (!ok) { + error = dfs->error; + dfs->error = NULL; + goto FAILURE; + } + else if (dfs->st_root == NULL) { + /* Is it an empty filter? If so set the dfcode to NULL and return success. + * This can happen if the user clears the display filter toolbar in the UI. + * In that case the compilation succeeds and the NULL dfcode clears the filter + * (show all frames). */ + dfsyntax_free(dfs); + *err_ptr = NULL; + return NULL; + } + + dfw = dfwork_new(expanded_text, dfs->flags); + dfw->st_root = dfs->st_root; + dfs->st_root = NULL; + if (dfs->deprecated) + dfw->deprecated = g_ptr_array_ref(dfs->deprecated); + dfsyntax_free(dfs); + dfs = NULL; + + dfcode = dfwork_build(dfw); + if (dfcode == NULL) { + error = dfw->error; + dfw->error = NULL; + goto FAILURE; + } + + /* SUCCESS */ + dfwork_free(dfw); + return dfcode; + +FAILURE: + if (error == NULL || error->msg == NULL) { + /* We require an error message. */ + ws_critical("Unknown error compiling filter: %s", expanded_text); + error = df_error_new_msg("Unknown error compiling filter"); + } + + ws_assert(err_ptr && error); + *err_ptr = error; + + if (dfs) + dfsyntax_free(dfs); + if (dfw) + dfwork_free(dfw); + return NULL; +} + +static inline bool +compile_failure(df_error_t *error, df_error_t **err_ptr) +{ + ws_assert(error); + ws_debug("Error compiling filter: (%d) %s", error->code, error->msg); + + if (err_ptr) + *err_ptr = error; + else + df_error_free(&error); + + return false; +} + +bool +dfilter_compile_full(const char *text, dfilter_t **dfp, + df_error_t **err_ptr, unsigned flags, + const char *caller) +{ + char *expanded_text; + dfilter_t *dfcode; + df_error_t *error = NULL; + + ws_assert(dfp); + *dfp = NULL; + if (caller == NULL) + caller = "(unknown)"; + + if (text == NULL) { + /* This is a bug. */ + ws_warning("Called from %s() with invalid NULL expression", caller); + if (err_ptr) { + *err_ptr = df_error_new_msg("BUG: NULL text argument is invalid"); + } + return false; + } + + ws_debug("Called from %s() with filter: %s", caller, text); + + if (flags & DF_EXPAND_MACROS) { + expanded_text = dfilter_macro_apply(text, &error); + if (expanded_text == NULL) { + return compile_failure(error, err_ptr); + } + ws_noisy("Expanded text: %s", expanded_text); + } + else { + expanded_text = g_strdup(text); + ws_noisy("Verbatim text: %s", expanded_text); + } + + dfcode = compile_filter(expanded_text, flags, &error); + g_free(expanded_text); + expanded_text = NULL; + + if(error != NULL) { + return compile_failure(error, err_ptr); + } + + *dfp = dfcode; + ws_log(WS_LOG_DOMAIN, LOG_LEVEL_INFO, "Compiled display filter: %s", text); + return true; +} + + +bool +dfilter_apply(dfilter_t *df, proto_tree *tree) +{ + return dfvm_apply(df, tree); +} + +bool +dfilter_apply_edt(dfilter_t *df, epan_dissect_t* edt) +{ + return dfvm_apply(df, edt->tree); +} + + +void +dfilter_prime_proto_tree(const dfilter_t *df, proto_tree *tree) +{ + int i; + + for (i = 0; i < df->num_interesting_fields; i++) { + proto_tree_prime_with_hfid(tree, df->interesting_fields[i]); + } +} + +bool +dfilter_has_interesting_fields(const dfilter_t *df) +{ + return (df->num_interesting_fields > 0); +} + +bool +dfilter_interested_in_field(const dfilter_t *df, int hfid) +{ + int i; + + for (i = 0; i < df->num_interesting_fields; i++) { + if (df->interesting_fields[i] == hfid) { + return true; + } + } + return false; +} + +bool +dfilter_interested_in_proto(const dfilter_t *df, int proto_id) +{ + int i; + + for (i = 0; i < df->num_interesting_fields; i++) { + int df_hfid = df->interesting_fields[i]; + if (proto_registrar_is_protocol(df_hfid)) { + /* XXX: Should we go up to the parent of a pino? + * We can tell if df_hfid is a PINO, but there's + * no function to return the parent proto ID yet. + */ + if (df_hfid == proto_id) { + return true; + } + } else { + if (proto_registrar_get_parent(df_hfid) == proto_id) { + return true; + } + } + } + return false; +} + +bool +dfilter_requires_columns(const dfilter_t *df) +{ + if (df == NULL) { + return false; + } + + /* XXX: Could cache this like packet_cache_proto_handles */ + static int proto_cols = -1; + if (proto_cols == -1) { + proto_cols = proto_get_id_by_filter_name("_ws.col"); + } + ws_assert(proto_cols != -1); + + return dfilter_interested_in_proto(df, proto_cols); +} + +GPtrArray * +dfilter_deprecated_tokens(dfilter_t *df) { + if (df->deprecated && df->deprecated->len > 0) { + return df->deprecated; + } + return NULL; +} + +GSList * +dfilter_get_warnings(dfilter_t *df) +{ + return df->warnings; +} + +void +dfilter_dump(FILE *fp, dfilter_t *df, uint16_t flags) +{ + dfvm_dump(fp, df, flags); +} + +const char * +dfilter_text(dfilter_t *df) +{ + return df->expanded_text; +} + +const char * +dfilter_syntax_tree(dfilter_t *df) +{ + return df->syntax_tree_str; +} + +void +dfilter_log_full(const char *domain, enum ws_log_level level, + const char *file, long line, const char *func, + dfilter_t *df, const char *msg) +{ + if (!ws_log_msg_is_active(domain, level)) + return; + + if (df == NULL) { + ws_log_write_always_full(domain, level, file, line, func, + "%s: NULL display filter", msg ? msg : "?"); + return; + } + + char *str = dfvm_dump_str(NULL, df, true); + if (G_UNLIKELY(msg == NULL)) + ws_log_write_always_full(domain, level, file, line, func, "\nFilter:\n %s\n\n%s", dfilter_text(df), str); + else + ws_log_write_always_full(domain, level, file, line, func, "%s:\nFilter:\n %s\n\n%s", msg, dfilter_text(df), str); + g_free(str); +} + +static int +compare_ref_layer(gconstpointer _a, gconstpointer _b) +{ + const df_reference_t *a = *(const df_reference_t **)_a; + const df_reference_t *b = *(const df_reference_t **)_b; + return a->proto_layer_num - b->proto_layer_num; +} + +static void +load_references(GHashTable *table, proto_tree *tree, bool raw) +{ + GHashTableIter iter; + GPtrArray *finfos; + field_info *finfo; + header_field_info *hfinfo; + GPtrArray *refs; + + if (g_hash_table_size(table) == 0) { + /* Nothing to do. */ + return; + } + + g_hash_table_iter_init(&iter, table); + while (g_hash_table_iter_next(&iter, (void **)&hfinfo, (void **)&refs)) { + /* If we have a previous array free the data */ + g_ptr_array_set_size(refs, 0); + + while (hfinfo) { + finfos = proto_find_finfo(tree, hfinfo->id); + if (finfos == NULL) { + hfinfo = hfinfo->same_name_next; + continue; + } + for (unsigned i = 0; i < finfos->len; i++) { + finfo = g_ptr_array_index(finfos, i); + g_ptr_array_add(refs, reference_new(finfo, raw)); + } + g_ptr_array_free(finfos, true); + hfinfo = hfinfo->same_name_next; + } + + g_ptr_array_sort(refs, compare_ref_layer); + } +} + +void +dfilter_load_field_references(const dfilter_t *df, proto_tree *tree) +{ + load_references(df->references, tree, false); + load_references(df->raw_references, tree, true); +} + +void +dfilter_load_field_references_edt(const dfilter_t *df, epan_dissect_t *edt) +{ + dfilter_load_field_references(df, edt->tree); +} + +df_reference_t * +reference_new(const field_info *finfo, bool raw) +{ + df_reference_t *ref = g_new(df_reference_t, 1); + ref->hfinfo = finfo->hfinfo; + if (raw) { + ref->value = dfvm_get_raw_fvalue(finfo); + } + else { + ref->value = fvalue_dup(finfo->value); + } + ref->proto_layer_num = finfo->proto_layer_num; + return ref; +} + +void +reference_free(df_reference_t *ref) +{ + fvalue_free(ref->value); + g_free(ref); +} + +df_error_t * +df_error_new(int code, char *msg, df_loc_t *loc) +{ + df_error_t *err = g_new(df_error_t, 1); + err->code = code; + err->msg = msg; + if (loc) { + err->loc.col_start = loc->col_start; + err->loc.col_len = loc->col_len; + } + else { + err->loc.col_start = -1; + err->loc.col_len = 0; + } + return err; +} + +df_error_t * +df_error_new_vprintf(int code, df_loc_t *loc, const char *fmt, va_list ap) +{ + df_error_t *err = g_new(df_error_t, 1); + err->code = code; + err->msg = ws_strdup_vprintf(fmt, ap); + if (loc) { + err->loc.col_start = loc->col_start; + err->loc.col_len = loc->col_len; + } + else { + err->loc.col_start = -1; + err->loc.col_len = 0; + } + return err; +} + +df_error_t * +df_error_new_printf(int code, df_loc_t *loc, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + df_error_t *err = df_error_new_vprintf(code, loc, fmt, ap); + va_end(ap); + return err; +} + +void +df_error_free(df_error_t **ep) +{ + if (*ep == NULL) + return; + g_free((*ep)->msg); + g_free(*ep); + *ep = NULL; +} + +void +df_cell_append(df_cell_t *rp, fvalue_t *fv) +{ + /* Assert cell has been initialized. */ + ws_assert(rp->array != NULL); + g_ptr_array_add(rp->array, fv); +} + +GPtrArray * +df_cell_ref(df_cell_t *rp) +{ + if (rp->array == NULL) + return NULL; + return g_ptr_array_ref(rp->array); +} + +size_t +df_cell_size(const df_cell_t *rp) +{ + if (rp->array == NULL) + return 0; + return rp->array->len; +} + +fvalue_t ** +df_cell_array(const df_cell_t *rp) +{ + if (rp->array == NULL) + return NULL; + return (fvalue_t **)rp->array->pdata; +} + +bool +df_cell_is_empty(const df_cell_t *rp) +{ + if (rp->array == NULL) + return true; + return rp->array->len == 0; +} + +bool +df_cell_is_null(const df_cell_t *rp) +{ + return rp->array == NULL; +} + +void +df_cell_init(df_cell_t *rp, bool free_seg) +{ + df_cell_clear(rp); + if (free_seg) + rp->array = g_ptr_array_new_with_free_func((GDestroyNotify)fvalue_free); + else + rp->array = g_ptr_array_new(); +} + +void +df_cell_clear(df_cell_t *rp) +{ + if (rp->array) + g_ptr_array_unref(rp->array); + rp->array = NULL; +} + +void +df_cell_iter_init(df_cell_t *rp, df_cell_iter_t *iter) +{ + iter->ptr = rp->array; + iter->idx = 0; +} + +fvalue_t * +df_cell_iter_next(df_cell_iter_t *iter) +{ + if (iter->idx < iter->ptr->len) { + return iter->ptr->pdata[iter->idx++]; + } + return NULL; +} + +/* + * Editor modelines - https://www.wireshark.org/tools/modelines.html + * + * Local variables: + * c-basic-offset: 8 + * tab-width: 8 + * indent-tabs-mode: t + * End: + * + * vi: set shiftwidth=8 tabstop=8 noexpandtab: + * :indentSize=8:tabSize=8:noTabs=false: + */ diff --git a/epan/dfilter/dfilter.h b/epan/dfilter/dfilter.h new file mode 100644 index 0000000..640e66e --- /dev/null +++ b/epan/dfilter/dfilter.h @@ -0,0 +1,209 @@ +/** @file + * + * Wireshark - Network traffic analyzer + * By Gerald Combs <gerald@wireshark.org> + * Copyright 2001 Gerald Combs + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef DFILTER_H +#define DFILTER_H + +#include <wireshark.h> + +#include "dfilter-loc.h" +#include <epan/proto.h> + +/* Passed back to user */ +typedef struct epan_dfilter dfilter_t; + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +struct epan_dissect; + +#define DF_ERROR_GENERIC -1 +#define DF_ERROR_UNEXPECTED_END -2 + +typedef struct { + int code; + char *msg; + df_loc_t loc; +} df_error_t; + +df_error_t * +df_error_new(int code, char *msg, df_loc_t *loc); + +df_error_t * +df_error_new_printf(int code, df_loc_t *loc, const char *fmt, ...) +G_GNUC_PRINTF(3, 4); + +#define df_error_new_msg(msg) \ + df_error_new_printf(DF_ERROR_GENERIC, NULL, "%s", msg) + +df_error_t * +df_error_new_vprintf(int code, df_loc_t *loc, const char *fmt, va_list ap); + +WS_DLL_PUBLIC +void +df_error_free(df_error_t **ep); + +/* Module-level initialization */ +void +dfilter_init(void); + +/* Module-level cleanup */ +void +dfilter_cleanup(void); + +/* Perform macro expansion. */ +WS_DLL_PUBLIC +char * +dfilter_expand(const char *expr, df_error_t **err_ret); + +/* Save textual representation of syntax tree (for debugging purposes). */ +#define DF_SAVE_TREE (1U << 0) +/* Perform macro substitution on filter text. */ +#define DF_EXPAND_MACROS (1U << 1) +/* Do an optimization pass on the compiled filter. */ +#define DF_OPTIMIZE (1U << 2) +/* Enable debug trace for flex. */ +#define DF_DEBUG_FLEX (1U << 3) +/* Enable debug trace for lemon. */ +#define DF_DEBUG_LEMON (1U << 4) + +/* Compiles a string to a dfilter_t. + * On success, sets the dfilter* pointed to by dfp + * to either a NULL pointer (if the filter is a null + * filter, as generated by an all-blank string) or to + * a pointer to the newly-allocated dfilter_t + * structure. + * + * On failure, *err_msg is set to point to the error + * message. This error message is allocated with + * g_malloc(), and must be freed with g_free(). + * The dfilter* will be set to NULL after a failure. + * + * Returns true on success, false on failure. + */ +WS_DLL_PUBLIC +bool +dfilter_compile_full(const char *text, dfilter_t **dfp, + df_error_t **errpp, unsigned flags, + const char *caller); + +#define dfilter_compile(text, dfp, errp) \ + dfilter_compile_full(text, dfp, errp, \ + DF_EXPAND_MACROS|DF_OPTIMIZE, \ + __func__) + +/* Frees all memory used by dfilter, and frees + * the dfilter itself. */ +WS_DLL_PUBLIC +void +dfilter_free(dfilter_t *df); + +/* Apply compiled dfilter */ +WS_DLL_PUBLIC +bool +dfilter_apply_edt(dfilter_t *df, struct epan_dissect *edt); + +/* Apply compiled dfilter */ +bool +dfilter_apply(dfilter_t *df, proto_tree *tree); + +/* Prime a proto_tree using the fields/protocols used in a dfilter. */ +void +dfilter_prime_proto_tree(const dfilter_t *df, proto_tree *tree); + +/* Refresh references in a compiled display filter. */ +WS_DLL_PUBLIC +void +dfilter_load_field_references(const dfilter_t *df, proto_tree *tree); + +/* Refresh references in a compiled display filter. */ +WS_DLL_PUBLIC +void +dfilter_load_field_references_edt(const dfilter_t *df, struct epan_dissect *edt); + +/* Check if dfilter has interesting fields */ +bool +dfilter_has_interesting_fields(const dfilter_t *df); + +/* Check if dfilter is interested in a given field + * + * @param df The dfilter + * @param hfid The header field info ID to check + * @return true if the field is interesting to the dfilter + */ +bool +dfilter_interested_in_field(const dfilter_t *df, int hfid); + +/* Check if dfilter is interested in a given protocol + * + * @param df The dfilter + * @param proto_id The protocol ID to check + * @return true if the dfilter is interested in a field whose + * parent is proto_id + */ +bool +dfilter_interested_in_proto(const dfilter_t *df, int proto_id); + +WS_DLL_PUBLIC +bool +dfilter_requires_columns(const dfilter_t *df); + +WS_DLL_PUBLIC +GPtrArray * +dfilter_deprecated_tokens(dfilter_t *df); + +WS_DLL_PUBLIC +GSList * +dfilter_get_warnings(dfilter_t *df); + +#define DF_DUMP_REFERENCES (1U << 0) +#define DF_DUMP_SHOW_FTYPE (1U << 1) + +/* Print bytecode of dfilter to fp */ +WS_DLL_PUBLIC +void +dfilter_dump(FILE *fp, dfilter_t *df, uint16_t flags); + +/* Text after macro expansion. */ +WS_DLL_PUBLIC +const char * +dfilter_text(dfilter_t *df); + +/* Text representation of syntax tree (if it was saved, NULL oterwise). */ +WS_DLL_PUBLIC +const char * +dfilter_syntax_tree(dfilter_t *df); + +/* Print bytecode of dfilter to log */ +WS_DLL_PUBLIC +void +dfilter_log_full(const char *domain, enum ws_log_level level, + const char *file, long line, const char *func, + dfilter_t *dfcode, const char *msg); + +#ifdef WS_DEBUG +#define dfilter_log(dfcode, msg) \ + dfilter_log_full(LOG_DOMAIN_DFILTER, LOG_LEVEL_NOISY, \ + __FILE__, __LINE__, __func__, \ + dfcode, msg) +#else +#define dfilter_log(dfcode, msg) (void)0 +#endif + +#define DFILTER_DEBUG_HERE(dfcode) \ + dfilter_log_full(LOG_DOMAIN_DFILTER, LOG_LEVEL_ECHO, \ + __FILE__, __LINE__, __func__, \ + dfcode, #dfcode); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* DFILTER_H */ diff --git a/epan/dfilter/dfunctions.c b/epan/dfilter/dfunctions.c new file mode 100644 index 0000000..be36424 --- /dev/null +++ b/epan/dfilter/dfunctions.c @@ -0,0 +1,520 @@ +/* + * Wireshark - Network traffic analyzer + * + * Copyright 2006 Gilbert Ramirez <gram@alumni.rice.edu> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "config.h" +#define WS_LOG_DOMAIN LOG_DOMAIN_DFILTER + +#include <glib.h> + +#include "dfilter-int.h" +#include "dfunctions.h" +#include "sttype-field.h" +#include "semcheck.h" + +#include <string.h> + +#include <ftypes/ftypes.h> +#include <epan/exceptions.h> +#include <wsutil/ws_assert.h> + +#define FAIL(dfw, node, ...) \ + do { \ + ws_noisy("Semantic check failed here."); \ + dfilter_fail_throw(dfw, DF_ERROR_GENERIC, stnode_location(node), __VA_ARGS__); \ + } while (0) + +/* Convert an FT_STRING using a callback function */ +static bool +string_walk(GSList *stack, uint32_t arg_count _U_, df_cell_t *retval, char(*conv_func)(char)) +{ + GPtrArray *arg1; + fvalue_t *arg_fvalue; + fvalue_t *new_ft_string; + const wmem_strbuf_t *src; + wmem_strbuf_t *dst; + + ws_assert(arg_count == 1); + arg1 = stack->data; + if (arg1 == NULL) + return false; + + for (unsigned i = 0; i < arg1->len; i++) { + arg_fvalue = arg1->pdata[i]; + /* XXX - it would be nice to handle FT_TVBUFF, too */ + if (FT_IS_STRING(fvalue_type_ftenum(arg_fvalue))) { + src = fvalue_get_strbuf(arg_fvalue); + dst = wmem_strbuf_new_sized(NULL, src->len); + for (size_t j = 0; j < src->len; j++) { + wmem_strbuf_append_c(dst, conv_func(src->str[j])); + } + new_ft_string = fvalue_new(FT_STRING); + fvalue_set_strbuf(new_ft_string, dst); + df_cell_append(retval, new_ft_string); + } + } + + return true; +} + +/* dfilter function: lower() */ +static bool +df_func_lower(GSList *stack, uint32_t arg_count, df_cell_t *retval) +{ + return string_walk(stack, arg_count, retval, g_ascii_tolower); +} + +/* dfilter function: upper() */ +static bool +df_func_upper(GSList *stack, uint32_t arg_count, df_cell_t *retval) +{ + return string_walk(stack, arg_count, retval, g_ascii_toupper); +} + +/* dfilter function: count() */ +static bool +df_func_count(GSList *stack, uint32_t arg_count _U_, df_cell_t *retval) +{ + GPtrArray *arg1; + fvalue_t *ft_ret; + uint32_t num_items; + + ws_assert(arg_count == 1); + arg1 = stack->data; + if (arg1 == NULL) + return false; + + num_items = arg1->len; + ft_ret = fvalue_new(FT_UINT32); + fvalue_set_uinteger(ft_ret, num_items); + df_cell_append(retval, ft_ret); + + return true; +} + +/* dfilter function: string() */ +static bool +df_func_string(GSList *stack, uint32_t arg_count _U_, df_cell_t *retval) +{ + GPtrArray *arg1; + fvalue_t *arg_fvalue; + fvalue_t *new_ft_string; + char *s; + + ws_assert(arg_count == 1); + arg1 = stack->data; + if (arg1 == NULL) + return false; + + for (unsigned i = 0; i < arg1->len; i++) { + arg_fvalue = arg1->pdata[i]; + switch (fvalue_type_ftenum(arg_fvalue)) + { + case FT_UINT8: + case FT_UINT16: + case FT_UINT24: + case FT_UINT32: + case FT_UINT40: + case FT_UINT48: + case FT_UINT56: + case FT_UINT64: + case FT_INT8: + case FT_INT16: + case FT_INT32: + case FT_INT40: + case FT_INT48: + case FT_INT56: + case FT_INT64: + case FT_IPv4: + case FT_IPv6: + case FT_FLOAT: + case FT_DOUBLE: + case FT_ETHER: + case FT_FRAMENUM: + case FT_AX25: + case FT_IPXNET: + case FT_GUID: + case FT_OID: + case FT_EUI64: + case FT_VINES: + case FT_REL_OID: + case FT_SYSTEM_ID: + case FT_FCWWN: + case FT_IEEE_11073_SFLOAT: + case FT_IEEE_11073_FLOAT: + s = fvalue_to_string_repr(NULL, arg_fvalue, FTREPR_DFILTER, BASE_NONE); + /* Ensure we have an allocated string here */ + if (!s) + s = wmem_strdup(NULL, ""); + break; + default: + return true; + } + + new_ft_string = fvalue_new(FT_STRING); + fvalue_set_string(new_ft_string, s); + wmem_free(NULL, s); + df_cell_append(retval, new_ft_string); + } + + return true; +} + +static bool +df_func_compare(GSList *stack, uint32_t arg_count, df_cell_t *retval, + bool (*fv_cmp)(const fvalue_t *a, const fvalue_t *b)) +{ + fvalue_t *fv_ret = NULL; + GSList *args; + GPtrArray *arg1; + fvalue_t *arg_fvalue; + uint32_t i; + + for (args = stack, i = 0; i < arg_count; args = args->next, i++) { + arg1 = args->data; + for (unsigned j = 0; j < arg1->len; j++) { + arg_fvalue = arg1->pdata[j]; + if (fv_ret == NULL || fv_cmp(arg_fvalue, fv_ret)) { + fv_ret = arg_fvalue; + } + } + } + + if (fv_ret == NULL) + return false; + + df_cell_append(retval, fvalue_dup(fv_ret)); + + return true; +} + +/* Find maximum value. */ +static bool +df_func_max(GSList *stack, uint32_t arg_count, df_cell_t *retval) +{ + return df_func_compare(stack, arg_count, retval, fvalue_gt); +} + +/* Find minimum value. */ +static bool +df_func_min(GSList *stack, uint32_t arg_count, df_cell_t *retval) +{ + return df_func_compare(stack, arg_count, retval, fvalue_lt); +} + +static bool +df_func_abs(GSList *stack, uint32_t arg_count _U_, df_cell_t *retval) +{ + GPtrArray *arg1; + fvalue_t *fv_arg, *new_fv; + char *err_msg = NULL; + + ws_assert(arg_count == 1); + arg1 = stack->data; + if (arg1 == NULL) + return false; + + for (unsigned i = 0; i < arg1->len; i++) { + fv_arg = arg1->pdata[i]; + if (fvalue_is_negative(fv_arg)) { + new_fv = fvalue_unary_minus(fv_arg, &err_msg); + if (new_fv == NULL) { + ws_debug("abs: %s", err_msg); + g_free(err_msg); + err_msg = NULL; + } + } + else { + new_fv = fvalue_dup(fv_arg); + } + df_cell_append(retval, new_fv); + } + + return !df_cell_is_empty(retval); +} + +/* For upper() and lower() checks that the parameter passed to + * it is an FT_STRING */ +static ftenum_t +ul_semcheck_is_field_string(dfwork_t *dfw, const char *func_name, ftenum_t lhs_ftype _U_, + GSList *param_list, df_loc_t func_loc _U_) +{ + header_field_info *hfinfo; + + ws_assert(g_slist_length(param_list) == 1); + stnode_t *st_node = param_list->data; + + if (stnode_type_id(st_node) == STTYPE_FIELD) { + dfw->field_count++; + hfinfo = sttype_field_hfinfo(st_node); + if (FT_IS_STRING(hfinfo->type)) { + return FT_STRING; + } + } + FAIL(dfw, st_node, "Only string type fields can be used as parameter for %s()", func_name); +} + +static ftenum_t +ul_semcheck_is_field(dfwork_t *dfw, const char *func_name, ftenum_t lhs_ftype _U_, + GSList *param_list, df_loc_t func_loc _U_) +{ + ws_assert(g_slist_length(param_list) == 1); + stnode_t *st_node = param_list->data; + + if (stnode_type_id(st_node) == STTYPE_FIELD) { + dfw->field_count++; + return FT_UINT32; + } + + FAIL(dfw, st_node, "Only fields can be used as parameter for %s()", func_name); +} + +static ftenum_t +ul_semcheck_can_length(dfwork_t *dfw, const char *func_name, ftenum_t lhs_ftype, + GSList *param_list, df_loc_t func_loc) +{ + ws_assert(g_slist_length(param_list) == 1); + stnode_t *st_node = param_list->data; + + ul_semcheck_is_field(dfw, func_name, lhs_ftype, param_list, func_loc); + if (!ftype_can_length(sttype_field_ftenum(st_node))) { + FAIL(dfw, st_node, "Field %s does not support the %s() function", stnode_todisplay(st_node), func_name); + } + return FT_UINT32; +} + +static ftenum_t +ul_semcheck_string_param(dfwork_t *dfw, const char *func_name, ftenum_t lhs_ftype _U_, + GSList *param_list, df_loc_t func_loc _U_) +{ + header_field_info *hfinfo; + + ws_assert(g_slist_length(param_list) == 1); + stnode_t *st_node = param_list->data; + + if (stnode_type_id(st_node) == STTYPE_FIELD) { + dfw->field_count++; + hfinfo = sttype_field_hfinfo(st_node); + switch (hfinfo->type) { + case FT_UINT8: + case FT_UINT16: + case FT_UINT24: + case FT_UINT32: + case FT_UINT40: + case FT_UINT48: + case FT_UINT56: + case FT_UINT64: + case FT_INT8: + case FT_INT16: + case FT_INT32: + case FT_INT40: + case FT_INT48: + case FT_INT56: + case FT_INT64: + case FT_IPv4: + case FT_IPv6: + case FT_FLOAT: + case FT_DOUBLE: + case FT_ETHER: + case FT_FRAMENUM: + case FT_AX25: + case FT_IPXNET: + case FT_GUID: + case FT_OID: + case FT_EUI64: + case FT_VINES: + case FT_REL_OID: + case FT_SYSTEM_ID: + case FT_FCWWN: + case FT_IEEE_11073_SFLOAT: + case FT_IEEE_11073_FLOAT: + return FT_STRING; + default: + break; + } + FAIL(dfw, st_node, "String conversion for field \"%s\" is not supported", hfinfo->abbrev); + } + FAIL(dfw, st_node, "Only fields can be used as parameter for %s()", func_name); +} + +/* Check arguments are all the same type and they can be compared. */ +/* + Every STTYPE_LITERAL needs to be resolved to a STTYPE_FVALUE. If we don't + have type information (lhs_ftype is FT_NONE) and we have not seen an argument + with a definite type we defer resolving literals to values until we have examined + the entire list of function arguments. If we still cannot resolve to a definite + type after that (all arguments must have the same type) then we give up and + return FT_NONE. +*/ +static ftenum_t +ul_semcheck_compare(dfwork_t *dfw, const char *func_name, ftenum_t lhs_ftype, + GSList *param_list, df_loc_t func_loc _U_) +{ + stnode_t *arg; + sttype_id_t type; + ftenum_t ftype, ft_arg; + GSList *l; + fvalue_t *fv; + wmem_list_t *literals = NULL; + + ftype = lhs_ftype; + + for (l = param_list; l != NULL; l = l->next) { + arg = l->data; + type = stnode_type_id(arg); + + if (type == STTYPE_ARITHMETIC) { + ft_arg = check_arithmetic(dfw, arg, ftype); + } + else if (type == STTYPE_LITERAL) { + if (ftype != FT_NONE) { + fv = dfilter_fvalue_from_literal(dfw, ftype, arg, false, NULL); + stnode_replace(arg, STTYPE_FVALUE, fv); + ft_arg = fvalue_type_ftenum(fv); + } + else { + if (literals == NULL) { + literals = wmem_list_new(dfw->dfw_scope); + } + wmem_list_append(literals, arg); + ft_arg = FT_NONE; + } + } + else if (type == STTYPE_FUNCTION) { + ft_arg = check_function(dfw, arg, ftype); + } + else if (type == STTYPE_FIELD) { + dfw->field_count++; + ft_arg = sttype_field_ftenum(arg); + } + else if (type == STTYPE_REFERENCE) { + ft_arg = sttype_field_ftenum(arg); + } + else { + FAIL(dfw, arg, "Argument '%s' is not valid for %s()", + stnode_todisplay(arg), func_name); + } + + if (ftype == FT_NONE) { + ftype = ft_arg; + } + if (ft_arg != FT_NONE && ftype != FT_NONE && !compatible_ftypes(ft_arg, ftype)) { + FAIL(dfw, arg, "Arguments to '%s' must be type compatible (expected %s, got %s)", + func_name, ftype_name(ftype), ftype_name(ft_arg)); + } + if (ft_arg != FT_NONE && !ftype_can_cmp(ft_arg)) { + FAIL(dfw, arg, "Argument '%s' to '%s' cannot be ordered", + stnode_todisplay(arg), func_name); + } + } + + if (literals != NULL) { + if (ftype != FT_NONE) { + wmem_list_frame_t *fp; + stnode_t *st; + for (fp = wmem_list_head(literals); fp != NULL; fp = wmem_list_frame_next(fp)) { + st = wmem_list_frame_data(fp); + fv = dfilter_fvalue_from_literal(dfw, ftype, st, false, NULL); + stnode_replace(st, STTYPE_FVALUE, fv); + } + } + wmem_destroy_list(literals); + } + + return ftype; +} + +static ftenum_t +ul_semcheck_absolute_value(dfwork_t *dfw, const char *func_name, ftenum_t lhs_ftype, + GSList *param_list, df_loc_t func_loc _U_) +{ + ws_assert(g_slist_length(param_list) == 1); + stnode_t *st_node; + ftenum_t ftype; + fvalue_t *fv; + + st_node = param_list->data; + + if (stnode_type_id(st_node) == STTYPE_ARITHMETIC) { + ftype = check_arithmetic(dfw, st_node, lhs_ftype); + } + else if (stnode_type_id(st_node) == STTYPE_LITERAL) { + if (lhs_ftype != FT_NONE) { + /* Convert RHS literal to the same ftype as LHS. */ + fv = dfilter_fvalue_from_literal(dfw, lhs_ftype, st_node, false, NULL); + stnode_replace(st_node, STTYPE_FVALUE, fv); + ftype = fvalue_type_ftenum(fv); + } + else { + FAIL(dfw, st_node, "Need a field or field-like value on the LHS."); + } + } + else if (stnode_type_id(st_node) == STTYPE_FUNCTION) { + ftype = check_function(dfw, st_node, lhs_ftype); + } + else if (stnode_type_id(st_node) == STTYPE_FIELD) { + dfw->field_count++; + ftype = sttype_field_ftenum(st_node); + } + else { + ftype = FT_NONE; + } + + if (ftype == FT_NONE) { + FAIL(dfw, st_node, "Type %s is not valid for %s", + stnode_type_name(st_node), func_name); + } + if (!ftype_can_is_negative(ftype)) { + FAIL(dfw, st_node, "'%s' is not a valid argument to '%s'()", + stnode_todisplay(st_node), func_name); + } + return ftype; +} + +/* The table of all display-filter functions */ +static df_func_def_t +df_functions[] = { + { "lower", df_func_lower, 1, 1, ul_semcheck_is_field_string }, + { "upper", df_func_upper, 1, 1, ul_semcheck_is_field_string }, + /* Length function is implemented as a DFVM instruction. */ + { "len", NULL, 1, 1, ul_semcheck_can_length }, + { "count", df_func_count, 1, 1, ul_semcheck_is_field }, + { "string", df_func_string, 1, 1, ul_semcheck_string_param }, + { "max", df_func_max, 1, 0, ul_semcheck_compare }, + { "min", df_func_min, 1, 0, ul_semcheck_compare }, + { "abs", df_func_abs, 1, 1, ul_semcheck_absolute_value }, + { NULL, NULL, 0, 0, NULL } +}; + +/* Lookup a display filter function record by name */ +df_func_def_t* +df_func_lookup(const char *name) +{ + df_func_def_t *func_def; + + func_def = df_functions; + while (func_def->name != NULL) { + if (strcmp(func_def->name, name) == 0) { + return func_def; + } + func_def++; + } + return NULL; +} + +/* + * Editor modelines - https://www.wireshark.org/tools/modelines.html + * + * Local variables: + * c-basic-offset: 4 + * tab-width: 8 + * indent-tabs-mode: nil + * End: + * + * vi: set shiftwidth=4 tabstop=8 expandtab: + * :indentSize=4:tabSize=8:noTabs=true: + */ diff --git a/epan/dfilter/dfunctions.h b/epan/dfilter/dfunctions.h new file mode 100644 index 0000000..25485e7 --- /dev/null +++ b/epan/dfilter/dfunctions.h @@ -0,0 +1,40 @@ +/** @file + * + * Wireshark - Network traffic analyzer + * + * Copyright 2006 Gilbert Ramirez <gram@alumni.rice.edu> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef DFUNCTIONS_H +#define DFUNCTIONS_H + +#include <glib.h> +#include <ftypes/ftypes.h> +#include "syntax-tree.h" +#include "dfilter-int.h" + +/* Functions take any number of arguments and return 1. */ + +/* The run-time logic of the dfilter function */ +typedef bool (*DFFuncType)(GSList *stack, uint32_t arg_count, df_cell_t *retval); + +/* The semantic check for the dfilter function */ +typedef ftenum_t (*DFSemCheckType)(dfwork_t *dfw, const char *func_name, ftenum_t lhs_ftype, + GSList *param_list, df_loc_t func_loc); + +/* This is a "function definition" record, holding everything + * we need to know about a function */ +typedef struct { + const char *name; + DFFuncType function; + unsigned min_nargs; + unsigned max_nargs; /* 0 for no limit */ + DFSemCheckType semcheck_param_function; +} df_func_def_t; + +/* Return the function definition record for a function of named "name" */ +df_func_def_t* df_func_lookup(const char *name); + +#endif diff --git a/epan/dfilter/dfvm.c b/epan/dfilter/dfvm.c new file mode 100644 index 0000000..cae7336 --- /dev/null +++ b/epan/dfilter/dfvm.c @@ -0,0 +1,1747 @@ +/* + * Wireshark - Network traffic analyzer + * By Gerald Combs <gerald@wireshark.org> + * Copyright 2001 Gerald Combs + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "config.h" +#define WS_LOG_DOMAIN LOG_DOMAIN_DFILTER + +#include "dfvm.h" + +#include <ftypes/ftypes.h> +#include <wsutil/ws_assert.h> + +static void +debug_register(GSList *reg, uint32_t num); + +const char * +dfvm_opcode_tostr(dfvm_opcode_t code) +{ + switch (code) { + case DFVM_IF_TRUE_GOTO: return "IF_TRUE_GOTO"; + case DFVM_IF_FALSE_GOTO: return "IF_FALSE_GOTO"; + case DFVM_CHECK_EXISTS: return "CHECK_EXISTS"; + case DFVM_CHECK_EXISTS_R: return "CHECK_EXISTS_R"; + case DFVM_NOT: return "NOT"; + case DFVM_RETURN: return "RETURN"; + case DFVM_READ_TREE: return "READ_TREE"; + case DFVM_READ_TREE_R: return "READ_TREE_R"; + case DFVM_READ_REFERENCE: return "READ_REFERENCE"; + case DFVM_READ_REFERENCE_R: return "READ_REFERENCE_R"; + case DFVM_PUT_FVALUE: return "PUT_FVALUE"; + case DFVM_ALL_EQ: return "ALL_EQ"; + case DFVM_ANY_EQ: return "ANY_EQ"; + case DFVM_ALL_NE: return "ALL_NE"; + case DFVM_ANY_NE: return "ANY_NE"; + case DFVM_ALL_GT: return "ALL_GT"; + case DFVM_ANY_GT: return "ANY_GT"; + case DFVM_ALL_GE: return "ALL_GE"; + case DFVM_ANY_GE: return "ANY_GE"; + case DFVM_ALL_LT: return "ALL_LT"; + case DFVM_ANY_LT: return "ANY_LT"; + case DFVM_ALL_LE: return "ALL_LE"; + case DFVM_ANY_LE: return "ANY_LE"; + case DFVM_ALL_CONTAINS: return "ALL_CONTAINS"; + case DFVM_ANY_CONTAINS: return "ANY_CONTAINS"; + case DFVM_ALL_MATCHES: return "ALL_MATCHES"; + case DFVM_ANY_MATCHES: return "ANY_MATCHES"; + case DFVM_SET_ALL_IN: return "SET_ALL_IN"; + case DFVM_SET_ANY_IN: return "SET_ANY_IN"; + case DFVM_SET_ALL_NOT_IN: return "SET_ALL_NOT_IN"; + case DFVM_SET_ANY_NOT_IN: return "SET_ANY_NOT_IN"; + case DFVM_SET_ADD: return "SET_ADD"; + case DFVM_SET_ADD_RANGE: return "SET_ADD_RANGE"; + case DFVM_SET_CLEAR: return "SET_CLEAR"; + case DFVM_SLICE: return "SLICE"; + case DFVM_LENGTH: return "LENGTH"; + case DFVM_BITWISE_AND: return "BITWISE_AND"; + case DFVM_UNARY_MINUS: return "UNARY_MINUS"; + case DFVM_ADD: return "ADD"; + case DFVM_SUBTRACT: return "SUBTRACT"; + case DFVM_MULTIPLY: return "MULTIPLY"; + case DFVM_DIVIDE: return "DIVIDE"; + case DFVM_MODULO: return "MODULO"; + case DFVM_CALL_FUNCTION: return "CALL_FUNCTION"; + case DFVM_STACK_PUSH: return "STACK_PUSH"; + case DFVM_STACK_POP: return "STACK_POP"; + case DFVM_NOT_ALL_ZERO: return "NOT_ALL_ZERO"; + } + return "(fix-opcode-string)"; +} + +dfvm_insn_t* +dfvm_insn_new(dfvm_opcode_t op) +{ + dfvm_insn_t *insn; + + insn = g_new(dfvm_insn_t, 1); + insn->op = op; + insn->arg1 = NULL; + insn->arg2 = NULL; + insn->arg3 = NULL; + return insn; +} + +static void +dfvm_value_free(dfvm_value_t *v) +{ + switch (v->type) { + case FVALUE: + g_ptr_array_unref(v->value.fvalue_p); + break; + case DRANGE: + drange_free(v->value.drange); + break; + case PCRE: + ws_regex_free(v->value.pcre); + break; + case EMPTY: + case HFINFO: + case RAW_HFINFO: + case INSN_NUMBER: + case REGISTER: + case INTEGER: + case FUNCTION_DEF: + break; + } + g_free(v); +} + +dfvm_value_t* +dfvm_value_ref(dfvm_value_t *v) +{ + if (v == NULL) + return NULL; + v->ref_count++; + return v; +} + +void +dfvm_value_unref(dfvm_value_t *v) +{ + ws_assert(v); + v->ref_count--; + if (v->ref_count > 0) + return; + dfvm_value_free(v); +} + +void +dfvm_insn_free(dfvm_insn_t *insn) +{ + if (insn->arg1) { + dfvm_value_unref(insn->arg1); + } + if (insn->arg2) { + dfvm_value_unref(insn->arg2); + } + if (insn->arg3) { + dfvm_value_unref(insn->arg3); + } + g_free(insn); +} + + +dfvm_value_t* +dfvm_value_new(dfvm_value_type_t type) +{ + dfvm_value_t *v; + + v = g_new(dfvm_value_t, 1); + v->type = type; + v->ref_count = 0; + return v; +} + +dfvm_value_t* +dfvm_value_new_fvalue(fvalue_t *fv) +{ + dfvm_value_t *v = dfvm_value_new(FVALUE); + v->value.fvalue_p = g_ptr_array_new_full(1, (GDestroyNotify)fvalue_free); + g_ptr_array_add(v->value.fvalue_p, fv); + return v; +} + +dfvm_value_t* +dfvm_value_new_hfinfo(header_field_info *hfinfo, bool raw) +{ + dfvm_value_t *v; + + if (raw) + v = dfvm_value_new(RAW_HFINFO); + else + v = dfvm_value_new(HFINFO); + v->value.hfinfo = hfinfo; + return v; +} + +dfvm_value_t* +dfvm_value_new_register(int reg) +{ + dfvm_value_t *v = dfvm_value_new(REGISTER); + v->value.numeric = reg; + return v; +} + +dfvm_value_t* +dfvm_value_new_drange(drange_t *dr) +{ + dfvm_value_t *v = dfvm_value_new(DRANGE); + v->value.drange = dr; + return v; +} + +dfvm_value_t* +dfvm_value_new_funcdef(df_func_def_t *funcdef) +{ + dfvm_value_t *v = dfvm_value_new(FUNCTION_DEF); + v->value.funcdef = funcdef; + return v; +} + +dfvm_value_t* +dfvm_value_new_pcre(ws_regex_t *re) +{ + dfvm_value_t *v = dfvm_value_new(PCRE); + v->value.pcre = re; + return v; +} + +dfvm_value_t* +dfvm_value_new_guint(unsigned num) +{ + dfvm_value_t *v = dfvm_value_new(INTEGER); + v->value.numeric = num; + return v; +} + +static char * +dfvm_value_tostr(dfvm_value_t *v) +{ + char *s; + + if (!v) + return NULL; + + switch (v->type) { + case HFINFO: + s = ws_strdup(v->value.hfinfo->abbrev); + break; + case RAW_HFINFO: + s = ws_strdup_printf("@%s", v->value.hfinfo->abbrev); + break; + case FVALUE: + s = fvalue_to_debug_repr(NULL, dfvm_value_get_fvalue(v)); + break; + case DRANGE: + s = drange_tostr(v->value.drange); + break; + case PCRE: + s = ws_strdup(ws_regex_pattern(v->value.pcre)); + break; + case REGISTER: + s = ws_strdup_printf("R%"G_GUINT32_FORMAT, v->value.numeric); + break; + case FUNCTION_DEF: + s = ws_strdup(v->value.funcdef->name); + break; + case INTEGER: + s = ws_strdup_printf("%"G_GUINT32_FORMAT, v->value.numeric); + break; + default: + s = ws_strdup("FIXME"); + } + return s; +} + +static char * +value_type_tostr(dfvm_value_t *v, bool show_ftype) +{ + const char *s; + + if (!v || !show_ftype) + return ws_strdup(""); + + switch (v->type) { + case HFINFO: + s = ftype_name(v->value.hfinfo->type); + break; + case RAW_HFINFO: + s = "FT_BYTES"; + break; + case FVALUE: + s = fvalue_type_name(dfvm_value_get_fvalue(v)); + break; + default: + return ws_strdup(""); + break; + } + return ws_strdup_printf(" <%s>", s); +} + +static GSList * +dump_str_stack_push(GSList *stack, const char *str) +{ + return g_slist_prepend(stack, g_strdup(str)); +} + +static GSList * +dump_str_stack_pop(GSList *stack, uint32_t count) +{ + while (stack && count-- > 0) { + g_free(stack->data); + stack = g_slist_delete_link(stack, stack); + } + return stack; +} + +static void +append_call_function(wmem_strbuf_t *buf, const char *func, uint32_t nargs, + GSList *stack_print) +{ + uint32_t idx; + GString *gs; + GSList *l; + const char *sep = ""; + + wmem_strbuf_append_printf(buf, "%s(", func); + if (nargs > 0) { + gs = g_string_new(NULL); + for (l = stack_print, idx = 0; l != NULL && idx < nargs; idx++, l = l->next) { + g_string_prepend(gs, sep); + g_string_prepend(gs, l->data); + sep = ", "; + } + wmem_strbuf_append(buf, gs->str); + g_string_free(gs, true); + } + wmem_strbuf_append(buf, ")"); +} + +static void +indent(wmem_strbuf_t *buf, size_t offset, size_t start) +{ + size_t pos = buf->len - start; + if (pos >= offset) + return; + wmem_strbuf_append_c_count(buf, ' ', offset - pos); +} +#define indent1(buf, start) indent(buf, 24, start) +#define indent2(buf, start) indent(buf, 16, start) + +static void +append_to_register(wmem_strbuf_t *buf, const char *reg) +{ + wmem_strbuf_append_printf(buf, " -> %s", reg); +} + +static void +append_op_args(wmem_strbuf_t *buf, dfvm_insn_t *insn, GSList **stack_print, + uint16_t flags) +{ + dfvm_value_t *arg1, *arg2, *arg3; + char *arg1_str, *arg2_str, *arg3_str; + char *arg1_str_type, *arg2_str_type, *arg3_str_type; + size_t col_start; + + arg1 = insn->arg1; + arg2 = insn->arg2; + arg3 = insn->arg3; + arg1_str = dfvm_value_tostr(arg1); + arg2_str = dfvm_value_tostr(arg2); + arg3_str = dfvm_value_tostr(arg3); + arg1_str_type = value_type_tostr(arg1, flags & DF_DUMP_SHOW_FTYPE); + arg2_str_type = value_type_tostr(arg2, flags & DF_DUMP_SHOW_FTYPE); + arg3_str_type = value_type_tostr(arg3, flags & DF_DUMP_SHOW_FTYPE); + + col_start = buf->len; + + switch (insn->op) { + case DFVM_CHECK_EXISTS: + wmem_strbuf_append_printf(buf, "%s%s", + arg1_str, arg1_str_type); + break; + + case DFVM_CHECK_EXISTS_R: + wmem_strbuf_append_printf(buf, "%s#[%s]%s", + arg1_str, arg2_str, arg1_str_type); + break; + + case DFVM_READ_TREE: + wmem_strbuf_append_printf(buf, "%s%s", + arg1_str, arg1_str_type); + indent2(buf, col_start); + append_to_register(buf, arg2_str); + break; + + case DFVM_READ_TREE_R: + wmem_strbuf_append_printf(buf, "%s#[%s]%s", + arg1_str, arg3_str, arg1_str_type); + indent2(buf, col_start); + append_to_register(buf, arg2_str); + break; + + case DFVM_READ_REFERENCE: + wmem_strbuf_append_printf(buf, "${%s}%s", + arg1_str, arg1_str_type); + indent2(buf, col_start); + append_to_register(buf, arg2_str); + break; + + case DFVM_READ_REFERENCE_R: + wmem_strbuf_append_printf(buf, "${%s#[%s]}%s", + arg1_str, arg3_str, arg1_str_type); + indent2(buf, col_start); + append_to_register(buf, arg2_str); + break; + + case DFVM_PUT_FVALUE: + wmem_strbuf_append_printf(buf, "%s%s", + arg1_str, arg1_str_type); + indent2(buf, col_start); + append_to_register(buf, arg2_str); + break; + + case DFVM_CALL_FUNCTION: + append_call_function(buf, arg1_str, arg3->value.numeric, *stack_print); + indent2(buf, col_start); + append_to_register(buf, arg2_str); + break; + + case DFVM_STACK_PUSH: + wmem_strbuf_append_printf(buf, "%s", arg1_str); + *stack_print = dump_str_stack_push(*stack_print, arg1_str); + break; + + case DFVM_STACK_POP: + wmem_strbuf_append_printf(buf, "%s", arg1_str); + *stack_print = dump_str_stack_pop(*stack_print, arg1->value.numeric); + break; + + case DFVM_SLICE: + wmem_strbuf_append_printf(buf, "%s[%s]%s", + arg1_str, arg3_str, arg1_str_type); + indent2(buf, col_start); + append_to_register(buf, arg2_str); + break; + + case DFVM_LENGTH: + wmem_strbuf_append_printf(buf, "%s%s", + arg1_str, arg1_str_type); + indent2(buf, col_start); + append_to_register(buf, arg2_str); + break; + + case DFVM_ALL_EQ: + wmem_strbuf_append_printf(buf, "%s%s === %s%s", + arg1_str, arg1_str_type, arg2_str, arg2_str_type); + break; + + case DFVM_ANY_EQ: + wmem_strbuf_append_printf(buf, "%s%s == %s%s", + arg1_str, arg1_str_type, arg2_str, arg2_str_type); + break; + + case DFVM_ALL_NE: + wmem_strbuf_append_printf(buf, "%s%s != %s%s", + arg1_str, arg1_str_type, arg2_str, arg2_str_type); + break; + + case DFVM_ANY_NE: + wmem_strbuf_append_printf(buf, "%s%s !== %s%s", + arg1_str, arg1_str_type, arg2_str, arg2_str_type); + break; + + case DFVM_ALL_GT: + case DFVM_ANY_GT: + wmem_strbuf_append_printf(buf, "%s%s > %s%s", + arg1_str, arg1_str_type, arg2_str, arg2_str_type); + break; + + case DFVM_ALL_GE: + case DFVM_ANY_GE: + wmem_strbuf_append_printf(buf, "%s%s >= %s%s", + arg1_str, arg1_str_type, arg2_str, arg2_str_type); + break; + + case DFVM_ALL_LT: + case DFVM_ANY_LT: + wmem_strbuf_append_printf(buf, "%s%s < %s%s", + arg1_str, arg1_str_type, arg2_str, arg2_str_type); + break; + + case DFVM_ALL_LE: + case DFVM_ANY_LE: + wmem_strbuf_append_printf(buf, "%s%s <= %s%s", + arg1_str, arg1_str_type, arg2_str, arg2_str_type); + break; + + case DFVM_NOT_ALL_ZERO: + wmem_strbuf_append_printf(buf, "%s%s", + arg1_str, arg1_str_type); + break; + + case DFVM_ALL_CONTAINS: + case DFVM_ANY_CONTAINS: + wmem_strbuf_append_printf(buf, "%s%s contains %s%s", + arg1_str, arg1_str_type, arg2_str, arg2_str_type); + break; + + case DFVM_ALL_MATCHES: + case DFVM_ANY_MATCHES: + wmem_strbuf_append_printf(buf, "%s%s matches %s%s", + arg1_str, arg1_str_type, arg2_str, arg2_str_type); + break; + + case DFVM_SET_ALL_IN: + case DFVM_SET_ANY_IN: + case DFVM_SET_ALL_NOT_IN: + case DFVM_SET_ANY_NOT_IN: + wmem_strbuf_append_printf(buf, "%s%s", + arg1_str, arg1_str_type); + break; + + case DFVM_SET_ADD: + wmem_strbuf_append_printf(buf, "%s%s", arg1_str, arg1_str_type); + break; + + case DFVM_SET_ADD_RANGE: + wmem_strbuf_append_printf(buf, "%s%s .. %s%s", + arg1_str, arg1_str_type, arg2_str, arg2_str_type); + break; + + case DFVM_BITWISE_AND: + wmem_strbuf_append_printf(buf, "%s%s & %s%s", + arg1_str, arg1_str_type, arg2_str, arg2_str_type); + indent2(buf, col_start); + append_to_register(buf, arg3_str); + break; + + case DFVM_UNARY_MINUS: + wmem_strbuf_append_printf(buf, "-%s%s", + arg1_str, arg1_str_type); + indent2(buf, col_start); + append_to_register(buf, arg2_str); + break; + + case DFVM_ADD: + wmem_strbuf_append_printf(buf, "%s%s + %s%s", + arg1_str, arg1_str_type, arg2_str, arg2_str_type); + indent2(buf, col_start); + append_to_register(buf, arg3_str); + break; + + case DFVM_SUBTRACT: + wmem_strbuf_append_printf(buf, "%s%s - %s%s", + arg1_str, arg1_str_type, arg2_str, arg2_str_type); + indent2(buf, col_start); + append_to_register(buf, arg3_str); + break; + + case DFVM_MULTIPLY: + wmem_strbuf_append_printf(buf, "%s%s * %s%s", + arg1_str, arg1_str_type, arg2_str, arg2_str_type); + indent2(buf, col_start); + append_to_register(buf, arg3_str); + break; + + case DFVM_DIVIDE: + wmem_strbuf_append_printf(buf, "%s%s / %s%s", + arg1_str, arg1_str_type, arg2_str, arg2_str_type); + indent2(buf, col_start); + append_to_register(buf, arg3_str); + break; + + case DFVM_MODULO: + wmem_strbuf_append_printf(buf, "%s%s %% %s%s", + arg1_str, arg1_str_type, arg2_str, arg2_str_type); + indent2(buf, col_start); + append_to_register(buf, arg3_str); + break; + + case DFVM_IF_TRUE_GOTO: + case DFVM_IF_FALSE_GOTO: + wmem_strbuf_append_printf(buf, "%u", arg1->value.numeric); + break; + + case DFVM_NOT: + case DFVM_RETURN: + case DFVM_SET_CLEAR: + ws_assert_not_reached(); + } + + g_free(arg1_str); + g_free(arg2_str); + g_free(arg3_str); + g_free(arg1_str_type); + g_free(arg2_str_type); + g_free(arg3_str_type); +} + +static void +append_references(wmem_strbuf_t *buf, GHashTable *references, bool raw) +{ + GHashTableIter ref_iter; + void *key, *value; + char *str; + unsigned i; + + g_hash_table_iter_init(&ref_iter, references); + while (g_hash_table_iter_next(&ref_iter, &key, &value)) { + const char *abbrev = ((header_field_info *)key)->abbrev; + GPtrArray *refs_array = value; + df_reference_t *ref; + + if (raw) + wmem_strbuf_append_printf(buf, " ${@%s} = {", abbrev); + else + wmem_strbuf_append_printf(buf, " ${%s} = {", abbrev); + for (i = 0; i < refs_array->len; i++) { + if (i != 0) { + wmem_strbuf_append(buf, ", "); + } + ref = refs_array->pdata[i]; + str = fvalue_to_debug_repr(NULL, ref->value); + wmem_strbuf_append_printf(buf, "%s <%s>", str, fvalue_type_name(ref->value)); + g_free(str); + } + wmem_strbuf_append(buf, "}\n"); + } +} + +char * +dfvm_dump_str(wmem_allocator_t *alloc, dfilter_t *df, uint16_t flags) +{ + int id, length; + dfvm_insn_t *insn; + wmem_strbuf_t *buf; + GSList *stack_print = NULL; + size_t col_start; + + buf = wmem_strbuf_new(alloc, NULL); + + if (flags & DF_DUMP_REFERENCES) { + if (g_hash_table_size(df->references) > 0) { + wmem_strbuf_append(buf, "References:\n"); + append_references(buf, df->references, false); + } + else { + wmem_strbuf_append(buf, "References: (none)\n"); + } + wmem_strbuf_append_c(buf, '\n'); + } + + if (flags & DF_DUMP_REFERENCES) { + if (g_hash_table_size(df->raw_references) > 0) { + wmem_strbuf_append(buf, "Raw references:\n"); + append_references(buf, df->raw_references, true); + } + else { + wmem_strbuf_append(buf, "Raw references: (none)\n"); + } + wmem_strbuf_append_c(buf, '\n'); + } + + wmem_strbuf_append(buf, "Instructions:"); + + length = df->insns->len; + for (id = 0; id < length; id++) { + insn = g_ptr_array_index(df->insns, id); + col_start = buf->len; + wmem_strbuf_append_printf(buf, "\n %04d %s", id, dfvm_opcode_tostr(insn->op)); + + switch (insn->op) { + case DFVM_NOT: + case DFVM_RETURN: + case DFVM_SET_CLEAR: + /* Nothing here */ + break; + default: + indent1(buf, col_start); + append_op_args(buf, insn, &stack_print, flags); + break; + } + } + + return wmem_strbuf_finalize(buf); +} + +void +dfvm_dump(FILE *f, dfilter_t *df, uint16_t flags) +{ + char *str = dfvm_dump_str(NULL, df, flags); + fputs(str, f); + fputc('\n', f); + wmem_free(NULL, str); +} + +static int +compare_finfo_layer(gconstpointer _a, gconstpointer _b) +{ + const field_info *a = *(const field_info **)_a; + const field_info *b = *(const field_info **)_b; + return a->proto_layer_num - b->proto_layer_num; +} + +static bool +drange_contains_layer(drange_t *dr, int num, int length) +{ + drange_node *rn; + GSList *list = dr->range_list; + int lower, upper; + + while (list) { + rn = list->data; + lower = rn->start_offset; + if (lower < 0) { + lower += length + 1; + } + if (rn->ending == DRANGE_NODE_END_T_LENGTH) { + upper = lower + rn->length - 1; + } + else if (rn->ending == DRANGE_NODE_END_T_OFFSET) { + upper = rn->end_offset; + } + else if (rn->ending == DRANGE_NODE_END_T_TO_THE_END) { + upper = INT_MAX; + } + else { + ws_assert_not_reached(); + } + + if (num >= lower && num <= upper) { /* inclusive */ + return true; + } + + list = g_slist_next(list); + } + return false; +} + +fvalue_t * +dfvm_get_raw_fvalue(const field_info *fi) +{ + GByteArray *bytes; + fvalue_t *fv; + int length, tvb_length; + + /* + * XXX - a field can have a length that runs past + * the end of the tvbuff. Ideally, that should + * be fixed when adding an item to the protocol + * tree, but checking the length when doing + * that could be expensive. Until we fix that, + * we'll do the check here. + */ + tvb_length = tvb_captured_length_remaining(fi->ds_tvb, fi->start); + if (tvb_length < 0) { + return NULL; + } + length = fi->length; + if (length > tvb_length) + length = tvb_length; + + bytes = g_byte_array_new(); + g_byte_array_append(bytes, tvb_get_ptr(fi->ds_tvb, fi->start, length), length); + + fv = fvalue_new(FT_BYTES); + fvalue_set_byte_array(fv, bytes); + return fv; +} + +static size_t +filter_finfo_fvalues(df_cell_t *rp, GPtrArray *finfos, drange_t *range, bool raw) +{ + int length; /* maximum proto layer number. The numbers are sequential. */ + field_info *last_finfo, *finfo; + fvalue_t *fv; + int cookie = -1; + bool cookie_matches = false; + int layer; + size_t count = 0; + + g_ptr_array_sort(finfos, compare_finfo_layer); + last_finfo = finfos->pdata[finfos->len - 1]; + length = last_finfo->proto_layer_num; + + for (unsigned i = 0; i < finfos->len; i++) { + finfo = finfos->pdata[i]; + layer = finfo->proto_layer_num; + if (cookie == layer) { + if (cookie_matches) { + if (rp != NULL) { + if (raw) + fv = dfvm_get_raw_fvalue(finfo); + else + fv = finfo->value; + df_cell_append(rp, fv); + } + count++; + } + } + else { + cookie = layer; + cookie_matches = drange_contains_layer(range, layer, length); + if (cookie_matches) { + if (rp != NULL) { + if (raw) + fv = dfvm_get_raw_fvalue(finfo); + else + fv = finfo->value; + df_cell_append(rp, fv); + } + count++; + } + } + } + return count; +} + +static bool +read_tree_finfos(df_cell_t *rp, proto_tree *tree, + header_field_info *hfinfo, drange_t *range, bool raw) +{ + GPtrArray *finfos; + field_info *finfo; + fvalue_t *fv; + + /* The caller should NOT free the GPtrArray. */ + finfos = proto_get_finfo_ptr_array(tree, hfinfo->id); + if (finfos == NULL || g_ptr_array_len(finfos) == 0) { + return false; + } + if (range) { + return filter_finfo_fvalues(rp, finfos, range, raw) > 0; + } + + for (unsigned i = 0; i < finfos->len; i++) { + finfo = g_ptr_array_index(finfos, i); + if (raw) + fv = dfvm_get_raw_fvalue(finfo); + else + fv = finfo->value; + df_cell_append(rp, fv); + } + return true; +} + +/* Reads a field from the proto_tree and loads the fvalues into a register, + * if that field has not already been read. */ +static bool +read_tree(dfilter_t *df, proto_tree *tree, + dfvm_value_t *arg1, dfvm_value_t *arg2, + dfvm_value_t *arg3) +{ + drange_t *range = NULL; + bool raw; + df_cell_t *rp; + + header_field_info *hfinfo = arg1->value.hfinfo; + raw = arg1->type == RAW_HFINFO; + + int reg = arg2->value.numeric; + + if (arg3) { + range = arg3->value.drange; + } + + rp = &df->registers[reg]; + + /* Already loaded in this run of the dfilter? */ + if (!df_cell_is_null(rp)) { + return !df_cell_is_empty(rp); + } + + if (raw) { + df_cell_init(rp, true); + } + else { + // These values are referenced only, do not try to free it later. + df_cell_init(rp, false); + } + + while (hfinfo) { + read_tree_finfos(rp, tree, hfinfo, range, raw); + hfinfo = hfinfo->same_name_next; + } + + return !df_cell_is_empty(rp); +} + +static void +filter_refs_fvalues(df_cell_t *rp, GPtrArray *refs_array, drange_t *range) +{ + int length; /* maximum proto layer number. The numbers are sequential. */ + df_reference_t *last_ref = NULL; + int cookie = -1; + bool cookie_matches = false; + + if (!refs_array || refs_array->len == 0) { + return; + } + + /* refs array is sorted. */ + last_ref = refs_array->pdata[refs_array->len - 1]; + length = last_ref->proto_layer_num; + + for (unsigned i = 0; i < refs_array->len; i++) { + df_reference_t *ref = refs_array->pdata[i]; + int layer = ref->proto_layer_num; + + if (range == NULL) { + df_cell_append(rp, ref->value); + continue; + } + + if (cookie == layer) { + if (cookie_matches) { + df_cell_append(rp, ref->value); + } + } + else { + cookie = layer; + cookie_matches = drange_contains_layer(range, layer, length); + if (cookie_matches) { + df_cell_append(rp, ref->value); + } + } + } +} + +static bool +read_reference(dfilter_t *df, dfvm_value_t *arg1, dfvm_value_t *arg2, + dfvm_value_t *arg3) +{ + df_cell_t *rp; + GPtrArray *refs; + drange_t *range = NULL; + bool raw; + + header_field_info *hfinfo = arg1->value.hfinfo; + raw = arg1->type == RAW_HFINFO; + + int reg = arg2->value.numeric; + + if (arg3) { + range = arg3->value.drange; + } + + rp = &df->registers[reg]; + + /* Already loaded in this run of the dfilter? */ + if (!df_cell_is_null(rp)) { + return !df_cell_is_empty(rp); + } + + refs = g_hash_table_lookup(raw ? df->raw_references : df->references, hfinfo); + if (refs == NULL || refs->len == 0) { + return false; + } + + // These values are referenced only, do not try to free it later. + df_cell_init(rp, false); + filter_refs_fvalues(rp, refs, range); + return true; +} + +enum match_how { + MATCH_ANY, + MATCH_ALL +}; + +typedef ft_bool_t (*DFVMCompareFunc)(const fvalue_t*, const fvalue_t*); +typedef ft_bool_t (*DFVMTestFunc)(const fvalue_t*); + +static bool +cmp_test_internal(enum match_how how, DFVMCompareFunc match_func, + GPtrArray *fv1, GPtrArray *fv2) +{ + bool want_all = (how == MATCH_ALL); + bool want_any = (how == MATCH_ANY); + ft_bool_t have_match; + + for (size_t idx1 = 0; idx1 < fv1->len; idx1++) { + for (size_t idx2 = 0; idx2 < fv2->len; idx2++) { + have_match = match_func(fv1->pdata[idx1], fv2->pdata[idx2]); + if (want_all && have_match == FT_FALSE) { + return false; + } + else if (want_any && have_match == FT_TRUE) { + return true; + } + } + } + /* want_all || !want_any */ + return want_all; +} + +static bool +cmp_test_unary(enum match_how how, DFVMTestFunc test_func, + const fvalue_t **fv_ptr, size_t fv_count) +{ + bool want_all = (how == MATCH_ALL); + bool want_any = (how == MATCH_ANY); + ft_bool_t have_match; + + for (size_t idx = 0; idx < fv_count; idx++) { + have_match = test_func(fv_ptr[idx]); + if (want_all && have_match == FT_FALSE) { + return false; + } + else if (want_any && have_match == FT_TRUE) { + return true; + } + } + /* want_all || !want_any */ + return want_all; +} + +static bool +all_test_unary(dfilter_t *df, DFVMTestFunc func, dfvm_value_t *arg1) +{ + ws_assert(arg1->type == REGISTER); + df_cell_t *rp = &df->registers[arg1->value.numeric]; + return cmp_test_unary(MATCH_ALL, func, + (const fvalue_t **)df_cell_array(rp), df_cell_size(rp)); +} + +static bool +cmp_test(dfilter_t *df, DFVMCompareFunc cmp, + dfvm_value_t *arg1, dfvm_value_t *arg2, + enum match_how how) +{ + GPtrArray *fv1, *fv2; + + if (arg1->type == REGISTER) { + fv1 = df_cell_ptr(&df->registers[arg1->value.numeric]); + } + else if (arg1->type == FVALUE) { + fv1 = arg1->value.fvalue_p; + } + else { + ws_assert_not_reached(); + } + + if (arg2->type == REGISTER) { + fv2 = df_cell_ptr(&df->registers[arg2->value.numeric]); + } + else if (arg2->type == FVALUE) { + fv2 = arg2->value.fvalue_p; + } + else { + ws_assert_not_reached(); + } + + return cmp_test_internal(how, cmp, fv1, fv2); +} + +/* cmp(A) <=> cmp(a1) OR cmp(a2) OR cmp(a3) OR ... */ +static inline bool +any_test(dfilter_t *df, DFVMCompareFunc cmp, + dfvm_value_t *arg1, dfvm_value_t *arg2) +{ + return cmp_test(df, cmp, arg1, arg2, MATCH_ANY); +} + +/* cmp(A) <=> cmp(a1) AND cmp(a2) AND cmp(a3) AND ... */ +static bool +all_test(dfilter_t *df, DFVMCompareFunc cmp, + dfvm_value_t *arg1, dfvm_value_t *arg2) +{ + return cmp_test(df, cmp, arg1, arg2, MATCH_ALL); +} + +static bool +any_matches(dfilter_t *df, dfvm_value_t *arg1, dfvm_value_t *arg2) +{ + df_cell_t *rp = &df->registers[arg1->value.numeric]; + ws_regex_t *re = arg2->value.pcre; + + const fvalue_t **fv_ptr = (const fvalue_t **)df_cell_array(rp); + + for (size_t idx = 0; idx < df_cell_size(rp); idx++) { + if (fvalue_matches(fv_ptr[idx], re) == FT_TRUE) { + return true; + } + } + return false; +} + +static bool +all_matches(dfilter_t *df, dfvm_value_t *arg1, dfvm_value_t *arg2) +{ + df_cell_t *rp = &df->registers[arg1->value.numeric]; + ws_regex_t *re = arg2->value.pcre; + + const fvalue_t **fv_ptr = (const fvalue_t **)df_cell_array(rp); + + for (size_t idx = 0; idx < df_cell_size(rp); idx++) { + if (fvalue_matches(fv_ptr[idx], re) == FT_FALSE) { + return false; + } + } + return true; +} + +static bool +test_in_internal(fvalue_t *fv, GPtrArray *range[2]) +{ + GPtrArray *low = range[0]; + GPtrArray *high = range[1]; + bool low_ok = false, high_ok = false; + + if (high) { + /* range */ + for (unsigned i = 0; i < high->len; i++) { + if (fvalue_le(fv, high->pdata[i]) == FT_TRUE) { + high_ok = true; + break; + } + } + if (!high_ok) { + return false; + } + ws_assert(low); + for (unsigned i = 0; i < low->len; i++) { + if (fvalue_ge(fv, low->pdata[i]) == FT_TRUE) { + low_ok = true; + break; + } + } + } + else { + /* single element */ + for (unsigned i = 0; i < low->len; i++) { + if (fvalue_eq(fv, low->pdata[i]) == FT_TRUE) { + low_ok = true; + break; + } + } + } + + return low_ok; +} + +static bool +any_in(dfilter_t *df, dfvm_value_t *arg1) +{ + df_cell_t *rp = &df->registers[arg1->value.numeric]; + GPtrArray *value; + GSList *stack; + bool ok; + + /* If the read failed we jump over the membership test. */ + ws_assert(!df_cell_is_empty(rp)); + value = df_cell_ptr(rp); + + for (size_t i = 0; i < value->len; i++) { + stack = df->set_stack; + ok = false; + while (stack) { + if (test_in_internal(value->pdata[i], stack->data)) { + ok = true; + break; + } + stack = stack->next; + } + if (ok) { + return true; + } + } + return false; +} + +static bool +all_in(dfilter_t *df, dfvm_value_t *arg1) +{ + df_cell_t *rp = &df->registers[arg1->value.numeric]; + GPtrArray *value; + GSList *stack; + bool ok; + + /* If the read failed we jump over the membership test. */ + ws_assert(!df_cell_is_empty(rp)); + value = df_cell_ptr(rp); + + for (size_t i = 0; i < value->len; i++) { + stack = df->set_stack; + ok = false; + while (stack) { + if (test_in_internal(value->pdata[i], stack->data)) { + ok = true; + break; + } + stack = stack->next; + } + if (!ok) { + return false; + } + } + return true; +} + +/* Clear registers that were populated during evaluation. + * If we created the values, then these will be freed as well. */ +static void +free_register_overhead(dfilter_t* df) +{ + for (unsigned i = 0; i < df->num_registers; i++) { + df_cell_clear(&df->registers[i]); + } +} + +/* Takes the list of fvalue_t's in a register, uses fvalue_slice() + * to make a new list of fvalue_t's (which are byte-slices), + * and puts the new list into a new register. */ +static void +mk_slice(dfilter_t *df, dfvm_value_t *from_arg, dfvm_value_t *to_arg, + dfvm_value_t *drange_arg) +{ + df_cell_t *from_rp, *to_rp; + df_cell_iter_t from_iter; + fvalue_t *old_fv; + fvalue_t *new_fv; + + to_rp = &df->registers[to_arg->value.numeric]; + df_cell_init(to_rp, true); + from_rp = &df->registers[from_arg->value.numeric]; + drange_t *drange = drange_arg->value.drange; + + df_cell_iter_init(from_rp, &from_iter); + while ((old_fv = df_cell_iter_next(&from_iter)) != NULL) { + new_fv = fvalue_slice(old_fv, drange); + /* Assert here because semcheck.c should have + * already caught the cases in which a slice + * cannot be made. */ + ws_assert(new_fv); + df_cell_append(to_rp, new_fv); + } +} + +static void +mk_length(dfilter_t *df, dfvm_value_t *from_arg, dfvm_value_t *to_arg) +{ + df_cell_t *from_rp, *to_rp; + df_cell_iter_t from_iter; + fvalue_t *old_fv; + fvalue_t *new_fv; + + to_rp = &df->registers[to_arg->value.numeric]; + df_cell_init(to_rp, true); + from_rp = &df->registers[from_arg->value.numeric]; + + df_cell_iter_init(from_rp, &from_iter); + while ((old_fv = df_cell_iter_next(&from_iter)) != NULL) { + new_fv = fvalue_new(FT_UINT32); + fvalue_set_uinteger(new_fv, (uint32_t)fvalue_length2(old_fv)); + df_cell_append(to_rp, new_fv); + } +} + +static bool +call_function(dfilter_t *df, dfvm_value_t *arg1, dfvm_value_t *arg2, + dfvm_value_t *arg3) +{ + df_func_def_t *funcdef; + bool accum; + df_cell_t *rp_return; + uint32_t arg_count; + + + funcdef = arg1->value.funcdef; + rp_return = &df->registers[arg2->value.numeric]; + arg_count = arg3->value.numeric; + + // Functions create a new value, so own it. + df_cell_init(rp_return, true); + + accum = funcdef->function(df->function_stack, arg_count, rp_return); + return accum; +} + +static void debug_op_error(const fvalue_t *v1, const fvalue_t *v2, const char *op, const char *msg) +{ + char *s1 = fvalue_to_debug_repr(NULL, v1); + char *s2 = fvalue_to_debug_repr(NULL, v2); + ws_noisy("Error: %s %s %s: %s", s1, op, s2, msg); + g_free(s1); + g_free(s2); +} + +/* Used for temporary debugging only, don't leave in production code (at + * a minimum WS_DEBUG_HERE must be replaced by another log level). */ +static void _U_ +debug_register(GSList *reg, uint32_t num) +{ + wmem_strbuf_t *buf; + GSList *l; + char *s; + + buf = wmem_strbuf_new(NULL, NULL); + + wmem_strbuf_append_printf(buf, "Reg#%"G_GUINT32_FORMAT" = { ", num); + for (l = reg; l != NULL; l = l->next) { + s = fvalue_to_debug_repr(NULL, l->data); + wmem_strbuf_append_printf(buf, "%s <%s>", s, fvalue_type_name(l->data)); + g_free(s); + if (l->next != NULL) { + wmem_strbuf_append(buf, ", "); + } + } + wmem_strbuf_append_c(buf, '}'); + WS_DEBUG_HERE("%s", wmem_strbuf_get_str(buf)); + wmem_strbuf_destroy(buf); +} + + +typedef fvalue_t* (*DFVMBinaryFunc)(const fvalue_t*, const fvalue_t*, char **); + +static void +mk_binary_internal(DFVMBinaryFunc func, GPtrArray *fv1, GPtrArray *fv2, df_cell_t *retval) +{ + fvalue_t *result; + char *err_msg = NULL; + + for (size_t i = 0; i < fv1->len; i++) { + for (size_t j = 0; j < fv2->len; j++) { + result = func(fv1->pdata[i], fv2->pdata[j], &err_msg); + if (result == NULL) { + debug_op_error(fv1->pdata[i], fv2->pdata[i], "&", err_msg); + g_free(err_msg); + err_msg = NULL; + } + else { + df_cell_append(retval, result); + } + } + } +} + +static void +mk_binary(dfilter_t *df, DFVMBinaryFunc func, + dfvm_value_t *arg1, dfvm_value_t *arg2, dfvm_value_t *to_arg) +{ + GPtrArray *val1, *val2; + df_cell_t *to_rp; + + if (arg1->type == REGISTER) { + val1 = df_cell_ptr(&df->registers[arg1->value.numeric]); + } + else if (arg1->type == FVALUE) { + val1 = arg1->value.fvalue_p; + } + else { + ws_assert_not_reached(); + } + + if (arg2->type == REGISTER) { + val2 = df_cell_ptr(&df->registers[arg2->value.numeric]); + } + else if (arg2->type == FVALUE) { + val2 = arg2->value.fvalue_p; + } + else { + ws_assert_not_reached(); + } + + to_rp = &df->registers[to_arg->value.numeric]; + df_cell_init(to_rp, true); + + mk_binary_internal(func, val1, val2, to_rp); + //debug_register(result, to_arg->value.numeric); +} + +static void +mk_minus_internal(GPtrArray *fv, df_cell_t *retval) +{ + fvalue_t *result; + char *err_msg = NULL; + + for (size_t i = 0; i < fv->len; i++) { + result = fvalue_unary_minus(fv->pdata[i], &err_msg); + if (result == NULL) { + ws_noisy("unary_minus: %s", err_msg); + g_free(err_msg); + err_msg = NULL; + } + else { + df_cell_append(retval, result); + } + } +} + +static void +mk_minus(dfilter_t *df, dfvm_value_t *arg1, dfvm_value_t *to_arg) +{ + GPtrArray *val; + df_cell_t *to_rp; + + if (arg1->type == REGISTER) { + val = df_cell_ptr(&df->registers[arg1->value.numeric]); + } + else if (arg1->type == FVALUE) { + val = arg1->value.fvalue_p; + } + else { + ws_assert_not_reached(); + } + + to_rp = &df->registers[to_arg->value.numeric]; + df_cell_init(to_rp, true); + + mk_minus_internal(val, to_rp); +} + +static void +put_fvalue(dfilter_t *df, dfvm_value_t *arg1, dfvm_value_t *to_arg) +{ + df_cell_t *to_rp = &df->registers[to_arg->value.numeric]; + /* Memory is owned by the dfvm_value_t. */ + df_cell_init(to_rp, false); + df_cell_append(to_rp, dfvm_value_get_fvalue(arg1)); +} + +static void +stack_push(dfilter_t *df, dfvm_value_t *arg1) +{ + GPtrArray *arg; + + if (arg1->type == FVALUE) { + arg = g_ptr_array_ref(arg1->value.fvalue_p); + } + else if (arg1->type == REGISTER) { + arg = df_cell_ref(&df->registers[arg1->value.numeric]); + } + else { + ws_assert_not_reached(); + } + df->function_stack = g_slist_prepend(df->function_stack, arg); +} + +static void +stack_pop(dfilter_t *df, dfvm_value_t *arg1) +{ + unsigned count = arg1->value.numeric; + + for (unsigned i = 0; i < count; i++) { + /* Free top of stack data. */ + g_ptr_array_unref(df->function_stack->data); + /* Remove top of stack. */ + df->function_stack = g_slist_delete_link(df->function_stack, df->function_stack); + } +} + +static void +set_push(dfilter_t *df, dfvm_value_t *arg1, dfvm_value_t *arg2) +{ + GPtrArray **range; + + /* We don´t need to use reference counting because the lifetime of each + * arg is guaranteed to outlive the set stack. */ + + range = g_new0(GPtrArray *, 2); + + if (arg1->type == FVALUE) { + range[0] = arg1->value.fvalue_p; + } + else if (arg1->type == REGISTER) { + range[0] = df_cell_ptr(&df->registers[arg1->value.numeric]); + } + else { + ws_assert_not_reached(); + } + + if (arg2) { + if (arg2->type == FVALUE) { + range[1] = arg2->value.fvalue_p; + } + else if (arg2->type == REGISTER) { + range[1] = df_cell_ptr(&df->registers[arg2->value.numeric]); + } + else { + ws_assert_not_reached(); + } + } + + df->set_stack = g_slist_prepend(df->set_stack, range); +} + +static void +set_clear(dfilter_t *df) +{ + g_slist_free_full(df->set_stack, g_free); + df->set_stack = NULL; +} + +static bool +check_exists_finfos(proto_tree *tree, header_field_info *hfinfo, drange_t *range) +{ + GPtrArray *finfos; + + finfos = proto_get_finfo_ptr_array(tree, hfinfo->id); + if (finfos == NULL || g_ptr_array_len(finfos) == 0) { + return false; + } + if (range == NULL) { + return true; + } + return filter_finfo_fvalues(NULL, finfos, range, false) > 0; +} + +static bool +check_exists(proto_tree *tree, dfvm_value_t *arg1, dfvm_value_t *arg2) +{ + header_field_info *hfinfo; + drange_t *range = NULL; + + hfinfo = arg1->value.hfinfo; + if (arg2) + range = arg2->value.drange; + + while (hfinfo) { + if (check_exists_finfos(tree, hfinfo, range)) { + return true; + } + hfinfo = hfinfo->same_name_next; + } + + return false; +} + +bool +dfvm_apply(dfilter_t *df, proto_tree *tree) +{ + int id, length; + bool accum = true; + dfvm_insn_t *insn; + dfvm_value_t *arg1; + dfvm_value_t *arg2; + dfvm_value_t *arg3 = NULL; + + ws_assert(tree); + + length = df->insns->len; + + for (id = 0; id < length; id++) { + + AGAIN: + insn = g_ptr_array_index(df->insns, id); + arg1 = insn->arg1; + arg2 = insn->arg2; + arg3 = insn->arg3; + + switch (insn->op) { + case DFVM_CHECK_EXISTS: + accum = check_exists(tree, arg1, NULL); + break; + + case DFVM_CHECK_EXISTS_R: + accum = check_exists(tree, arg1, arg2); + break; + + case DFVM_READ_TREE: + accum = read_tree(df, tree, arg1, arg2, NULL); + break; + + case DFVM_READ_TREE_R: + accum = read_tree(df, tree, arg1, arg2, arg3); + break; + + case DFVM_READ_REFERENCE: + accum = read_reference(df, arg1, arg2, NULL); + break; + + case DFVM_READ_REFERENCE_R: + accum = read_reference(df, arg1, arg2, arg3); + break; + + case DFVM_PUT_FVALUE: + put_fvalue(df, arg1, arg2); + break; + + case DFVM_CALL_FUNCTION: + accum = call_function(df, arg1, arg2, arg3); + break; + + case DFVM_STACK_PUSH: + stack_push(df, arg1); + break; + + case DFVM_STACK_POP: + stack_pop(df, arg1); + break; + + case DFVM_SLICE: + mk_slice(df, arg1, arg2, arg3); + break; + + case DFVM_LENGTH: + mk_length(df, arg1, arg2); + break; + + case DFVM_ALL_EQ: + accum = all_test(df, fvalue_eq, arg1, arg2); + break; + + case DFVM_ANY_EQ: + accum = any_test(df, fvalue_eq, arg1, arg2); + break; + + case DFVM_ALL_NE: + accum = all_test(df, fvalue_ne, arg1, arg2); + break; + + case DFVM_ANY_NE: + accum = any_test(df, fvalue_ne, arg1, arg2); + break; + + case DFVM_ALL_GT: + accum = all_test(df, fvalue_gt, arg1, arg2); + break; + + case DFVM_ANY_GT: + accum = any_test(df, fvalue_gt, arg1, arg2); + break; + + case DFVM_ALL_GE: + accum = all_test(df, fvalue_ge, arg1, arg2); + break; + + case DFVM_ANY_GE: + accum = any_test(df, fvalue_ge, arg1, arg2); + break; + + case DFVM_ALL_LT: + accum = all_test(df, fvalue_lt, arg1, arg2); + break; + + case DFVM_ANY_LT: + accum = any_test(df, fvalue_lt, arg1, arg2); + break; + + case DFVM_ALL_LE: + accum = all_test(df, fvalue_le, arg1, arg2); + break; + + case DFVM_ANY_LE: + accum = any_test(df, fvalue_le, arg1, arg2); + break; + + case DFVM_BITWISE_AND: + mk_binary(df, fvalue_bitwise_and, arg1, arg2, arg3); + break; + + case DFVM_ADD: + mk_binary(df, fvalue_add, arg1, arg2, arg3); + break; + + case DFVM_SUBTRACT: + mk_binary(df, fvalue_subtract, arg1, arg2, arg3); + break; + + case DFVM_MULTIPLY: + mk_binary(df, fvalue_multiply, arg1, arg2, arg3); + break; + + case DFVM_DIVIDE: + mk_binary(df, fvalue_divide, arg1, arg2, arg3); + break; + + case DFVM_MODULO: + mk_binary(df, fvalue_modulo, arg1, arg2, arg3); + break; + + case DFVM_NOT_ALL_ZERO: + accum = !all_test_unary(df, fvalue_is_zero, arg1); + break; + + case DFVM_ALL_CONTAINS: + accum = all_test(df, fvalue_contains, arg1, arg2); + break; + + case DFVM_ANY_CONTAINS: + accum = any_test(df, fvalue_contains, arg1, arg2); + break; + + case DFVM_ALL_MATCHES: + accum = all_matches(df, arg1, arg2); + break; + + case DFVM_ANY_MATCHES: + accum = any_matches(df, arg1, arg2); + break; + + case DFVM_SET_ADD: + set_push(df, arg1, NULL); + break; + + case DFVM_SET_ADD_RANGE: + set_push(df, arg1, arg2); + break; + + case DFVM_SET_ALL_IN: + accum = all_in(df, arg1); + break; + + case DFVM_SET_ANY_IN: + accum = any_in(df, arg1); + break; + + case DFVM_SET_ALL_NOT_IN: + accum = !all_in(df, arg1); + break; + + case DFVM_SET_ANY_NOT_IN: + accum = !any_in(df, arg1); + break; + + case DFVM_SET_CLEAR: + set_clear(df); + break; + + case DFVM_UNARY_MINUS: + mk_minus(df, arg1, arg2); + break; + + case DFVM_NOT: + accum = !accum; + break; + + case DFVM_RETURN: + free_register_overhead(df); + return accum; + + case DFVM_IF_TRUE_GOTO: + if (accum) { + id = arg1->value.numeric; + goto AGAIN; + } + break; + + case DFVM_IF_FALSE_GOTO: + if (!accum) { + id = arg1->value.numeric; + goto AGAIN; + } + break; + } + } + + ws_assert_not_reached(); +} + +/* + * Editor modelines - https://www.wireshark.org/tools/modelines.html + * + * Local variables: + * c-basic-offset: 8 + * tab-width: 8 + * indent-tabs-mode: t + * End: + * + * vi: set shiftwidth=8 tabstop=8 noexpandtab: + * :indentSize=8:tabSize=8:noTabs=false: + */ diff --git a/epan/dfilter/dfvm.h b/epan/dfilter/dfvm.h new file mode 100644 index 0000000..4e2a0ad --- /dev/null +++ b/epan/dfilter/dfvm.h @@ -0,0 +1,160 @@ +/** @file + * + * Wireshark - Network traffic analyzer + * By Gerald Combs <gerald@wireshark.org> + * Copyright 2001 Gerald Combs + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef DFVM_H +#define DFVM_H + +#include <wsutil/regex.h> +#include <epan/proto.h> +#include "dfilter-int.h" +#include "syntax-tree.h" +#include "drange.h" +#include "dfunctions.h" + +typedef enum { + EMPTY, + FVALUE, + HFINFO, + RAW_HFINFO, + INSN_NUMBER, + REGISTER, + INTEGER, + DRANGE, + FUNCTION_DEF, + PCRE +} dfvm_value_type_t; + +typedef struct { + dfvm_value_type_t type; + + union { + GPtrArray *fvalue_p; /* Always has length == 1 */ + uint32_t numeric; + drange_t *drange; + header_field_info *hfinfo; + df_func_def_t *funcdef; + ws_regex_t *pcre; + } value; + + int ref_count; +} dfvm_value_t; + +#define dfvm_value_get_fvalue(val) ((val)->value.fvalue_p->pdata[0]) + +typedef enum { + + DFVM_IF_TRUE_GOTO, + DFVM_IF_FALSE_GOTO, + DFVM_CHECK_EXISTS, + DFVM_CHECK_EXISTS_R, + DFVM_NOT, + DFVM_RETURN, + DFVM_READ_TREE, + DFVM_READ_TREE_R, + DFVM_READ_REFERENCE, + DFVM_READ_REFERENCE_R, + DFVM_PUT_FVALUE, + DFVM_ALL_EQ, + DFVM_ANY_EQ, + DFVM_ALL_NE, + DFVM_ANY_NE, + DFVM_ALL_GT, + DFVM_ANY_GT, + DFVM_ALL_GE, + DFVM_ANY_GE, + DFVM_ALL_LT, + DFVM_ANY_LT, + DFVM_ALL_LE, + DFVM_ANY_LE, + DFVM_ALL_CONTAINS, + DFVM_ANY_CONTAINS, + DFVM_ALL_MATCHES, + DFVM_ANY_MATCHES, + DFVM_SET_ALL_IN, + DFVM_SET_ANY_IN, + DFVM_SET_ALL_NOT_IN, + DFVM_SET_ANY_NOT_IN, + DFVM_SET_ADD, + DFVM_SET_ADD_RANGE, + DFVM_SET_CLEAR, + DFVM_SLICE, + DFVM_LENGTH, + DFVM_BITWISE_AND, + DFVM_UNARY_MINUS, + DFVM_ADD, + DFVM_SUBTRACT, + DFVM_MULTIPLY, + DFVM_DIVIDE, + DFVM_MODULO, + DFVM_CALL_FUNCTION, + DFVM_STACK_PUSH, + DFVM_STACK_POP, + DFVM_NOT_ALL_ZERO, +} dfvm_opcode_t; + +const char * +dfvm_opcode_tostr(dfvm_opcode_t code); + +typedef struct { + int id; + dfvm_opcode_t op; + dfvm_value_t *arg1; + dfvm_value_t *arg2; + dfvm_value_t *arg3; +} dfvm_insn_t; + +dfvm_insn_t* +dfvm_insn_new(dfvm_opcode_t op); + +void +dfvm_insn_free(dfvm_insn_t *insn); + +dfvm_value_t* +dfvm_value_new(dfvm_value_type_t type); + +dfvm_value_t* +dfvm_value_ref(dfvm_value_t *v); + +void +dfvm_value_unref(dfvm_value_t *v); + +dfvm_value_t* +dfvm_value_new_fvalue(fvalue_t *fv); + +dfvm_value_t* +dfvm_value_new_hfinfo(header_field_info *hfinfo, bool raw); + +dfvm_value_t* +dfvm_value_new_register(int reg); + +dfvm_value_t* +dfvm_value_new_drange(drange_t *dr); + +dfvm_value_t* +dfvm_value_new_funcdef(df_func_def_t *funcdef); + +dfvm_value_t* +dfvm_value_new_pcre(ws_regex_t *re); + +dfvm_value_t* +dfvm_value_new_guint(unsigned num); + +void +dfvm_dump(FILE *f, dfilter_t *df, uint16_t flags); + +char * +dfvm_dump_str(wmem_allocator_t *alloc, dfilter_t *df, uint16_t flags); + +bool +dfvm_apply(dfilter_t *df, proto_tree *tree); + +fvalue_t * +dfvm_get_raw_fvalue(const field_info *fi); + +#endif diff --git a/epan/dfilter/drange.c b/epan/dfilter/drange.c new file mode 100644 index 0000000..e0aada4 --- /dev/null +++ b/epan/dfilter/drange.c @@ -0,0 +1,406 @@ +/* drange.c + * Routines for providing general range support to the dfilter library + * + * Copyright (c) 2000 by Ed Warnicke <hagbard@physics.rutgers.edu> + * + * Wireshark - Network traffic analyzer + * By Gerald Combs + * Copyright 1999 Gerald Combs + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "config.h" + +#include "drange.h" + +#include <errno.h> +#include <stdlib.h> + + +/* drange_node constructor */ +drange_node* +drange_node_new(void) +{ + drange_node* new_range_node; + + new_range_node = g_new(drange_node,1); + new_range_node->start_offset = 0; + new_range_node->length = 0; + new_range_node->end_offset = 0; + new_range_node->ending = DRANGE_NODE_END_T_UNINITIALIZED; + return new_range_node; +} + +static bool +drange_str_to_gint32(const char *s, int32_t *pint, char **endptr, char **err_ptr) +{ + long integer; + + errno = 0; + integer = strtol(s, endptr, 0); + if (errno == EINVAL || *endptr == s) { + /* This isn't a valid number. */ + *err_ptr = ws_strdup_printf("\"%s\" is not a valid number.", s); + return false; + } + if (errno == ERANGE || integer > INT32_MAX || integer < INT32_MIN) { + *err_ptr = ws_strdup_printf("\"%s\" causes an integer overflow.", s); + return false; + } + *pint = (int32_t)integer; + return true; +} + +/* drange_node constructor from string */ +drange_node* +drange_node_from_str(const char *range_str, char **err_ptr) +{ + const char *str; + char *endptr; + int32_t lower, upper = 0; + drange_node_end_t end = DRANGE_NODE_END_T_UNINITIALIZED; + drange_node *dn; + bool ok; + + /* + * The following syntax governs slices: + * [i:j] i = start_offset, j = length + * [i-j] i = start_offset, j = end_offset, inclusive. + * [i] i = start_offset, length = 1 + * [:j] start_offset = 0, length = j + * [i:] start_offset = i, end_offset = end_of_field + */ + + str = range_str; + if (*str == ':') { + lower = 0; + /* Do not advance 'str' here. */ + } + else { + if (!drange_str_to_gint32(str, &lower, &endptr, err_ptr)) + return NULL; + str = endptr; + } + + while (*str != '\0' && g_ascii_isspace(*str)) + str++; + + if (*str == '-') { + str++; + end = DRANGE_NODE_END_T_OFFSET; + ok = drange_str_to_gint32(str, &upper, &endptr, err_ptr); + str = endptr; + } + else if (*str == ':') { + str++; + if (*str == '\0') { + end = DRANGE_NODE_END_T_TO_THE_END; + ok = true; + } + else { + end = DRANGE_NODE_END_T_LENGTH; + ok = drange_str_to_gint32(str, &upper, &endptr, err_ptr); + str = endptr; + } + } + else if (*str == '\0') { + end = DRANGE_NODE_END_T_LENGTH; + upper = 1; + ok = true; + } + else { + ok = false; + } + + while (*str != '\0' && g_ascii_isspace(*str)) + str++; + + if (!ok || *str != '\0') { + *err_ptr = ws_strdup_printf("\"%s\" is not a valid range.", range_str); + return NULL; + } + + dn = drange_node_new(); + drange_node_set_start_offset(dn, lower); + switch (end) { + case DRANGE_NODE_END_T_LENGTH: + if (upper <= 0) { + *err_ptr = ws_strdup_printf("Range %s isn't valid " + "because length %d isn't positive", + range_str, upper); + drange_node_free(dn); + return NULL; + } + drange_node_set_length(dn, upper); + break; + case DRANGE_NODE_END_T_OFFSET: + if ((lower < 0 && upper > 0) || (lower > 0 && upper < 0)) { + *err_ptr = ws_strdup_printf("Range %s isn't valid " + "because %d and %d have different signs", + range_str, lower, upper); + drange_node_free(dn); + return NULL; + } + if (upper <= lower) { + *err_ptr = ws_strdup_printf("Range %s isn't valid " + "because %d is greater or equal than %d", + range_str, lower, upper); + drange_node_free(dn); + return NULL; + } + drange_node_set_end_offset(dn, upper); + break; + case DRANGE_NODE_END_T_TO_THE_END: + drange_node_set_to_the_end(dn); + break; + default: + ws_assert_not_reached(); + break; + } + + return dn; +} + +static drange_node* +drange_node_dup(drange_node *org) +{ + drange_node *new_range_node; + + if (!org) + return NULL; + + new_range_node = g_new(drange_node,1); + new_range_node->start_offset = org->start_offset; + new_range_node->length = org->length; + new_range_node->end_offset = org->end_offset; + new_range_node->ending = org->ending; + return new_range_node; +} + +/* drange_node destructor */ +void +drange_node_free(drange_node* drnode) +{ + g_free(drnode); +} + +/* drange_node accessors */ +int +drange_node_get_start_offset(drange_node* drnode) +{ + ws_assert(drnode->ending != DRANGE_NODE_END_T_UNINITIALIZED); + return drnode->start_offset; +} + +int +drange_node_get_length(drange_node* drnode) +{ + ws_assert(drnode->ending == DRANGE_NODE_END_T_LENGTH); + return drnode->length; +} + +int +drange_node_get_end_offset(drange_node* drnode) +{ + ws_assert(drnode->ending == DRANGE_NODE_END_T_OFFSET); + return drnode->end_offset; +} + +drange_node_end_t +drange_node_get_ending(drange_node* drnode) +{ + ws_assert(drnode->ending != DRANGE_NODE_END_T_UNINITIALIZED); + return drnode->ending; +} + +/* drange_node mutators */ +void +drange_node_set_start_offset(drange_node* drnode, int offset) +{ + drnode->start_offset = offset; +} + +void +drange_node_set_length(drange_node* drnode, int length) +{ + drnode->length = length; + drnode->ending = DRANGE_NODE_END_T_LENGTH; +} + +void +drange_node_set_end_offset(drange_node* drnode, int offset) +{ + drnode->end_offset = offset; + drnode->ending = DRANGE_NODE_END_T_OFFSET; +} + + +void +drange_node_set_to_the_end(drange_node* drnode) +{ + drnode->ending = DRANGE_NODE_END_T_TO_THE_END; +} + +/* drange constructor */ +drange_t * +drange_new(drange_node* drnode) +{ + drange_t * new_drange; + new_drange = g_new(drange_t,1); + new_drange->range_list = NULL; + new_drange->has_total_length = true; + new_drange->total_length = 0; + new_drange->min_start_offset = INT_MAX; + new_drange->max_start_offset = INT_MIN; + + if (drnode) + drange_append_drange_node(new_drange, drnode); + + return new_drange; +} + +static void +drange_append_wrapper(void *data, void *user_data) +{ + drange_node *drnode = (drange_node *)data; + drange_t *dr = (drange_t *)user_data; + + drange_append_drange_node(dr, drnode); +} + +drange_t * +drange_new_from_list(GSList *list) +{ + drange_t *new_drange; + + new_drange = drange_new(NULL); + g_slist_foreach(list, drange_append_wrapper, new_drange); + return new_drange; +} + +drange_t * +drange_dup(drange_t *org) +{ + drange_t *new_drange; + GSList *p; + + if (!org) + return NULL; + + new_drange = drange_new(NULL); + for (p = org->range_list; p; p = p->next) { + drange_node *drnode = (drange_node *)p->data; + drange_append_drange_node(new_drange, drange_node_dup(drnode)); + } + return new_drange; +} + + +/* drange destructor */ +void +drange_free(drange_t * dr) +{ + drange_node_free_list(dr->range_list); + g_free(dr); +} + +/* Call drange_node destructor on all list items */ +void +drange_node_free_list(GSList* list) +{ + g_slist_free_full(list, g_free); +} + +/* drange accessors */ +bool drange_has_total_length(drange_t * dr) { return dr->has_total_length; } +int drange_get_total_length(drange_t * dr) { return dr->total_length; } +int drange_get_min_start_offset(drange_t * dr) { return dr->min_start_offset; } +int drange_get_max_start_offset(drange_t * dr) { return dr->max_start_offset; } + +static void +update_drange_with_node(drange_t *dr, drange_node *drnode) +{ + if(drnode->ending == DRANGE_NODE_END_T_TO_THE_END){ + dr->has_total_length = false; + } + else if(dr->has_total_length){ + dr->total_length += drnode->length; + } + if(drnode->start_offset < dr->min_start_offset){ + dr->min_start_offset = drnode->start_offset; + } + if(drnode->start_offset > dr->max_start_offset){ + dr->max_start_offset = drnode->start_offset; + } +} + +/* drange mutators */ +void +drange_prepend_drange_node(drange_t * dr, drange_node* drnode) +{ + if(drnode != NULL){ + dr->range_list = g_slist_prepend(dr->range_list,drnode); + update_drange_with_node(dr, drnode); + } +} + +void +drange_append_drange_node(drange_t * dr, drange_node* drnode) +{ + if(drnode != NULL){ + dr->range_list = g_slist_append(dr->range_list,drnode); + update_drange_with_node(dr, drnode); + } +} + +void +drange_foreach_drange_node(drange_t * dr, GFunc func, void *funcdata) +{ + g_slist_foreach(dr->range_list,func,funcdata); +} + +char * +drange_node_tostr(const drange_node *rn) +{ + if (rn->ending == DRANGE_NODE_END_T_TO_THE_END) + return ws_strdup_printf("%d:", rn->start_offset); + else if(rn->ending == DRANGE_NODE_END_T_OFFSET) + return ws_strdup_printf("%d-%d", rn->start_offset, rn->end_offset); + else if (rn->ending == DRANGE_NODE_END_T_LENGTH) + return ws_strdup_printf("%d:%d", rn->start_offset, rn->length); + else + return ws_strdup_printf("%d/%d/%d/U", rn->start_offset, rn->length, rn->end_offset); +} + +char * +drange_tostr(const drange_t *dr) +{ + GString *repr = g_string_new(""); + GSList *range_list = dr->range_list; + char *s; + + while (range_list) { + s = drange_node_tostr(range_list->data); + g_string_append(repr, s); + g_free(s); + range_list = g_slist_next(range_list); + if (range_list != NULL) { + g_string_append_c(repr, ','); + } + } + + return g_string_free(repr, false); +} + +/* + * Editor modelines - https://www.wireshark.org/tools/modelines.html + * + * Local variables: + * c-basic-offset: 4 + * tab-width: 8 + * indent-tabs-mode: nil + * End: + * + * vi: set shiftwidth=4 tabstop=8 expandtab: + * :indentSize=4:tabSize=8:noTabs=true: + */ diff --git a/epan/dfilter/drange.h b/epan/dfilter/drange.h new file mode 100644 index 0000000..8162afd --- /dev/null +++ b/epan/dfilter/drange.h @@ -0,0 +1,97 @@ +/** @file + * + * Routines for providing general range support to the dfilter library + * + * Copyright (c) 2000 by Ed Warnicke <hagbard@physics.rutgers.edu> + * + * Wireshark - Network traffic analyzer + * By Gerald Combs + * Copyright 1999 Gerald Combs + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef __DRANGE_H__ +#define __DRANGE_H__ + +#include <wireshark.h> + +/* Please don't directly manipulate these structs. Please use + * the methods provided. If you REALLY can't do what you need to + * do with the methods provided please write new methods that do + * what you need, put them into the drange object here, and limit + * your direct manipulation of the drange and drange_node structs to + * here. + */ + +typedef enum { + DRANGE_NODE_END_T_UNINITIALIZED, + DRANGE_NODE_END_T_LENGTH, + DRANGE_NODE_END_T_OFFSET, + DRANGE_NODE_END_T_TO_THE_END +} drange_node_end_t; + +typedef struct _drange_node { + int start_offset; + int length; + int end_offset; + drange_node_end_t ending; +} drange_node; + +typedef struct _drange { + GSList* range_list; + bool has_total_length; + int total_length; + int min_start_offset; + int max_start_offset; +} drange_t; + +/* drange_node constructor */ +drange_node* drange_node_new(void); + +/* drange_node constructor */ +drange_node* drange_node_from_str(const char *range_str, char **err_ptr); + +/* drange_node destructor */ +void drange_node_free(drange_node* drnode); + +/* Call drange_node destructor on all list items */ +void drange_node_free_list(GSList* list); + +/* drange_node accessors */ +int drange_node_get_start_offset(drange_node* drnode); +int drange_node_get_length(drange_node* drnode); +int drange_node_get_end_offset(drange_node* drnode); +drange_node_end_t drange_node_get_ending(drange_node* drnode); + +/* drange_node mutators */ +void drange_node_set_start_offset(drange_node* drnode, int offset); +void drange_node_set_length(drange_node* drnode, int length); +void drange_node_set_end_offset(drange_node* drnode, int offset); +void drange_node_set_to_the_end(drange_node* drnode); + +/* drange constructor */ +drange_t * drange_new(drange_node* drnode); +drange_t * drange_new_from_list(GSList *list); +drange_t * drange_dup(drange_t *org); + +/* drange destructor, only use this if you used drange_new() to creat + * the drange + */ +void drange_free(drange_t* dr); + +/* drange accessors */ +bool drange_has_total_length(drange_t* dr); +int drange_get_total_length(drange_t* dr); +int drange_get_min_start_offset(drange_t* dr); +int drange_get_max_start_offset(drange_t* dr); + +/* drange mutators */ +void drange_append_drange_node(drange_t* dr, drange_node* drnode); +void drange_prepend_drange_node(drange_t* dr, drange_node* drnode); +void drange_foreach_drange_node(drange_t* dr, GFunc func, void *funcdata); + +char *drange_node_tostr(const drange_node *rn); + +char *drange_tostr(const drange_t *dr); + +#endif /* ! __DRANGE_H__ */ diff --git a/epan/dfilter/gencode.c b/epan/dfilter/gencode.c new file mode 100644 index 0000000..6c4da95 --- /dev/null +++ b/epan/dfilter/gencode.c @@ -0,0 +1,897 @@ +/* + * Wireshark - Network traffic analyzer + * By Gerald Combs <gerald@wireshark.org> + * Copyright 2001 Gerald Combs + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "config.h" + +#include "gencode.h" +#include "dfvm.h" +#include "syntax-tree.h" +#include "sttype-field.h" +#include "sttype-slice.h" +#include "sttype-op.h" +#include "sttype-set.h" +#include "sttype-function.h" +#include "ftypes/ftypes.h" +#include <wsutil/ws_assert.h> + +static void +fixup_jumps(void *data, void *user_data); + +static void +gencode(dfwork_t *dfw, stnode_t *st_node); + +static dfvm_value_t * +gen_entity(dfwork_t *dfw, stnode_t *st_arg, GSList **jumps_ptr); + +static dfvm_opcode_t +select_opcode(dfvm_opcode_t op, stmatch_t how) +{ + if (how == STNODE_MATCH_DEF) + return op; + + switch (op) { + case DFVM_ALL_EQ: + case DFVM_ALL_NE: + case DFVM_ALL_GT: + case DFVM_ALL_GE: + case DFVM_ALL_LT: + case DFVM_ALL_LE: + case DFVM_ALL_CONTAINS: + case DFVM_ALL_MATCHES: + case DFVM_SET_ALL_IN: + case DFVM_SET_ALL_NOT_IN: + return how == STNODE_MATCH_ALL ? op : op + 1; + case DFVM_ANY_EQ: + case DFVM_ANY_NE: + case DFVM_ANY_GT: + case DFVM_ANY_GE: + case DFVM_ANY_LT: + case DFVM_ANY_LE: + case DFVM_ANY_CONTAINS: + case DFVM_ANY_MATCHES: + case DFVM_SET_ANY_IN: + case DFVM_SET_ANY_NOT_IN: + return how == STNODE_MATCH_ANY ? op : op - 1; + default: + break; + } + ws_assert_not_reached(); +} + +static void +dfw_append_insn(dfwork_t *dfw, dfvm_insn_t *insn) +{ + insn->id = dfw->next_insn_id; + dfw->next_insn_id++; + g_ptr_array_add(dfw->insns, insn); +} + +static void +dfw_append_stack_push(dfwork_t *dfw, dfvm_value_t *arg1) +{ + dfvm_insn_t *insn; + + insn = dfvm_insn_new(DFVM_STACK_PUSH); + insn->arg1 = dfvm_value_ref(arg1); + dfw_append_insn(dfw, insn); +} + +static void +dfw_append_stack_pop(dfwork_t *dfw, unsigned count) +{ + dfvm_insn_t *insn; + dfvm_value_t *val; + + insn = dfvm_insn_new(DFVM_STACK_POP); + val = dfvm_value_new_guint(count); + insn->arg1 = dfvm_value_ref(val); + dfw_append_insn(dfw, insn); +} + +static void +dfw_append_set_add_range(dfwork_t *dfw, dfvm_value_t *arg1, dfvm_value_t *arg2) +{ + dfvm_insn_t *insn; + + insn = dfvm_insn_new(DFVM_SET_ADD_RANGE); + insn->arg1 = dfvm_value_ref(arg1); + insn->arg2 = dfvm_value_ref(arg2); + dfw_append_insn(dfw, insn); +} + +static void +dfw_append_set_add(dfwork_t *dfw, dfvm_value_t *arg1) +{ + dfvm_insn_t *insn; + + insn = dfvm_insn_new(DFVM_SET_ADD); + insn->arg1 = dfvm_value_ref(arg1); + dfw_append_insn(dfw, insn); +} + +static dfvm_value_t * +dfw_append_jump(dfwork_t *dfw) +{ + dfvm_insn_t *insn; + dfvm_value_t *jmp; + + insn = dfvm_insn_new(DFVM_IF_FALSE_GOTO); + jmp = dfvm_value_new(INSN_NUMBER); + insn->arg1 = dfvm_value_ref(jmp); + dfw_append_insn(dfw, insn); + return jmp; +} + +/* returns register number */ +static dfvm_value_t * +dfw_append_read_tree(dfwork_t *dfw, header_field_info *hfinfo, + drange_t *range, + bool raw) +{ + dfvm_insn_t *insn; + int reg = -1; + dfvm_value_t *reg_val, *val1, *val3; + bool added_new_hfinfo = false; + GHashTable *loaded_fields; + void *loaded_key; + + /* Rewind to find the first field of this name. */ + while (hfinfo->same_name_prev_id != -1) { + hfinfo = proto_registrar_get_nth(hfinfo->same_name_prev_id); + } + + if (raw) + loaded_fields = dfw->loaded_raw_fields; + else + loaded_fields = dfw->loaded_fields; + + /* Keep track of which registers + * were used for which hfinfo's so that we + * can re-use registers. */ + /* Re-use only if we are not using a range (layer filter). */ + loaded_key = g_hash_table_lookup(loaded_fields, hfinfo); + if (loaded_key != NULL) { + if (range == NULL) { + /* + * Reg's are stored in has as reg+1, so + * that the non-existence of a hfinfo in + * the hash, or 0, can be differentiated from + * a hfinfo being loaded into register #0. + */ + reg = GPOINTER_TO_INT(loaded_key) - 1; + } + else { + reg = dfw->next_register++; + } + } + else { + reg = dfw->next_register++; + g_hash_table_insert(loaded_fields, + hfinfo, GINT_TO_POINTER(reg + 1)); + + added_new_hfinfo = true; + } + + val1 = dfvm_value_new_hfinfo(hfinfo, raw); + reg_val = dfvm_value_new_register(reg); + if (range) { + val3 = dfvm_value_new_drange(range); + insn = dfvm_insn_new(DFVM_READ_TREE_R); + } + else { + val3 = NULL; + insn = dfvm_insn_new(DFVM_READ_TREE); + } + insn->arg1 = dfvm_value_ref(val1); + insn->arg2 = dfvm_value_ref(reg_val); + insn->arg3 = dfvm_value_ref(val3); + dfw_append_insn(dfw, insn); + + if (added_new_hfinfo) { + while (hfinfo) { + /* Record the FIELD_ID in hash of interesting fields. */ + g_hash_table_add(dfw->interesting_fields, &hfinfo->id); + hfinfo = hfinfo->same_name_next; + } + } + + return reg_val; +} + +/* returns register number */ +static dfvm_value_t * +dfw_append_read_reference(dfwork_t *dfw, header_field_info *hfinfo, + drange_t *range, + bool raw) +{ + dfvm_insn_t *insn; + dfvm_value_t *reg_val, *val1, *val3; + GPtrArray *refs_array; + + /* Rewind to find the first field of this name. */ + while (hfinfo->same_name_prev_id != -1) { + hfinfo = proto_registrar_get_nth(hfinfo->same_name_prev_id); + } + + /* We can't reuse registers with a filter so just skip + * that optimization and don't reuse them at all. */ + val1 = dfvm_value_new_hfinfo(hfinfo, raw); + reg_val = dfvm_value_new_register(dfw->next_register++); + if (range) { + val3 = dfvm_value_new_drange(range); + insn = dfvm_insn_new(DFVM_READ_REFERENCE_R); + } + else { + val3 = NULL; + insn = dfvm_insn_new(DFVM_READ_REFERENCE); + } + insn->arg1 = dfvm_value_ref(val1); + insn->arg2 = dfvm_value_ref(reg_val); + insn->arg3 = dfvm_value_ref(val3); + dfw_append_insn(dfw, insn); + + refs_array = g_ptr_array_new_with_free_func((GDestroyNotify)reference_free); + if (raw) + g_hash_table_insert(dfw->raw_references, hfinfo, refs_array); + else + g_hash_table_insert(dfw->references, hfinfo, refs_array); + + /* Record the FIELD_ID in hash of interesting fields. */ + while (hfinfo) { + /* Record the FIELD_ID in hash of interesting fields. */ + g_hash_table_add(dfw->interesting_fields, &hfinfo->id); + hfinfo = hfinfo->same_name_next; + } + + return reg_val; +} + +/* returns register number */ +static dfvm_value_t * +dfw_append_mk_slice(dfwork_t *dfw, stnode_t *node, GSList **jumps_ptr) +{ + stnode_t *entity; + dfvm_insn_t *insn; + dfvm_value_t *reg_val, *val1, *val3; + + entity = sttype_slice_entity(node); + + insn = dfvm_insn_new(DFVM_SLICE); + val1 = gen_entity(dfw, entity, jumps_ptr); + insn->arg1 = dfvm_value_ref(val1); + reg_val = dfvm_value_new_register(dfw->next_register++); + insn->arg2 = dfvm_value_ref(reg_val); + val3 = dfvm_value_new_drange(sttype_slice_drange_steal(node)); + insn->arg3 = dfvm_value_ref(val3); + sttype_slice_remove_drange(node); + dfw_append_insn(dfw, insn); + + return reg_val; +} + +/* returns register number */ +_U_ static dfvm_value_t * +dfw_append_put_fvalue(dfwork_t *dfw, fvalue_t *fv) +{ + dfvm_insn_t *insn; + dfvm_value_t *reg_val, *val1; + + insn = dfvm_insn_new(DFVM_PUT_FVALUE); + val1 = dfvm_value_new_fvalue(fv); + insn->arg1 = dfvm_value_ref(val1); + reg_val = dfvm_value_new_register(dfw->next_register++); + insn->arg2 = dfvm_value_ref(reg_val); + dfw_append_insn(dfw, insn); + + return reg_val; +} + +/* returns register number that the length's result will be in. */ +static dfvm_value_t * +dfw_append_length(dfwork_t *dfw, stnode_t *node, GSList **jumps_ptr) +{ + GSList *params; + dfvm_insn_t *insn; + dfvm_value_t *reg_val, *val_arg; + + /* Create the new DFVM instruction */ + insn = dfvm_insn_new(DFVM_LENGTH); + /* Create input argument */ + params = sttype_function_params(node); + ws_assert(params); + ws_assert(g_slist_length(params) == 1); + val_arg = gen_entity(dfw, params->data, jumps_ptr); + insn->arg1 = dfvm_value_ref(val_arg); + /* Destination. */ + reg_val = dfvm_value_new_register(dfw->next_register++); + insn->arg2 = dfvm_value_ref(reg_val); + + dfw_append_insn(dfw, insn); + return reg_val; +} + +/* returns register number that the functions's result will be in. */ +static dfvm_value_t * +dfw_append_function(dfwork_t *dfw, stnode_t *node, GSList **jumps_ptr) +{ + GSList *params; + dfvm_value_t *jmp; + dfvm_insn_t *insn; + dfvm_value_t *reg_val, *val1, *val3, *val_arg; + unsigned count; + GSList *params_jumps = NULL; + + if (strcmp(sttype_function_name(node), "len") == 0) { + /* Replace len() function call with DFVM_LENGTH instruction. */ + return dfw_append_length(dfw, node, jumps_ptr); + } + + /* Create the new DFVM instruction */ + insn = dfvm_insn_new(DFVM_CALL_FUNCTION); + val1 = dfvm_value_new_funcdef(sttype_function_funcdef(node)); + insn->arg1 = dfvm_value_ref(val1); + reg_val = dfvm_value_new_register(dfw->next_register++); + insn->arg2 = dfvm_value_ref(reg_val); + + /* Create input arguments */ + params = sttype_function_params(node); + ws_assert(params); + count = 0; + while (params) { + val_arg = gen_entity(dfw, params->data, ¶ms_jumps); + /* If a parameter fails to generate jump here. + * Note: stack_push NULL register is valid. */ + g_slist_foreach(params_jumps, fixup_jumps, dfw); + g_slist_free(params_jumps); + params_jumps = NULL; + dfw_append_stack_push(dfw, val_arg); + count++; + params = params->next; + } + val3 = dfvm_value_new_guint(count); + insn->arg3 = dfvm_value_ref(val3); + dfw_append_insn(dfw, insn); + dfw_append_stack_pop(dfw, count); + + /* We need another instruction to jump to another exit + * place, if the call() of our function failed for some reason */ + insn = dfvm_insn_new(DFVM_IF_FALSE_GOTO); + jmp = dfvm_value_new(INSN_NUMBER); + insn->arg1 = dfvm_value_ref(jmp); + dfw_append_insn(dfw, insn); + *jumps_ptr = g_slist_prepend(*jumps_ptr, jmp); + + return reg_val; +} + +/** + * Adds an instruction for a relation operator where the values are already + * loaded in registers. + */ +static void +gen_relation_insn(dfwork_t *dfw, dfvm_opcode_t op, + dfvm_value_t *arg1, dfvm_value_t *arg2, + dfvm_value_t *arg3) +{ + dfvm_insn_t *insn; + + insn = dfvm_insn_new(op); + insn->arg1 = dfvm_value_ref(arg1); + insn->arg2 = dfvm_value_ref(arg2); + insn->arg3 = dfvm_value_ref(arg3); + dfw_append_insn(dfw, insn); +} + +static void +gen_relation(dfwork_t *dfw, dfvm_opcode_t op, stmatch_t how, + stnode_t *st_arg1, stnode_t *st_arg2) +{ + GSList *jumps = NULL; + dfvm_value_t *val1, *val2; + + /* Create code for the LHS and RHS of the relation */ + val1 = gen_entity(dfw, st_arg1, &jumps); + val2 = gen_entity(dfw, st_arg2, &jumps); + + /* Then combine them in a DFVM insruction */ + op = select_opcode(op, how); + gen_relation_insn(dfw, op, val1, val2, NULL); + + /* If either of the relation arguments need an "exit" instruction + * to jump to (on failure), mark them */ + g_slist_foreach(jumps, fixup_jumps, dfw); + g_slist_free(jumps); + jumps = NULL; +} + +static void +fixup_jumps(void *data, void *user_data) +{ + dfvm_value_t *jmp = (dfvm_value_t*)data; + dfwork_t *dfw = (dfwork_t*)user_data; + + if (jmp) { + jmp->value.numeric = dfw->next_insn_id; + } +} + +/* Generate the code for the in operator. Pushes set values into a stack + * and then evaluates membership in a single instruction. */ +static void +gen_relation_in(dfwork_t *dfw, dfvm_opcode_t op, stmatch_t how, + stnode_t *st_arg1, stnode_t *st_arg2) +{ + dfvm_insn_t *insn; + GSList *jumps = NULL; + GSList *node_jumps = NULL; + dfvm_value_t *val1, *val2, *val3; + stnode_t *node1, *node2; + GSList *nodelist_head, *nodelist; + + /* Create code for the LHS of the relation */ + val1 = gen_entity(dfw, st_arg1, &jumps); + + /* Create code to populate the set stack */ + nodelist_head = nodelist = stnode_steal_data(st_arg2); + while (nodelist) { + node1 = nodelist->data; + nodelist = g_slist_next(nodelist); + node2 = nodelist->data; + nodelist = g_slist_next(nodelist); + + if (node2) { + /* Range element. */ + val2 = gen_entity(dfw, node1, &node_jumps); + val3 = gen_entity(dfw, node2, &node_jumps); + dfw_append_set_add_range(dfw, val2, val3); + } else { + /* Normal element. */ + val2 = gen_entity(dfw, node1, &node_jumps); + dfw_append_set_add(dfw, val2); + } + + /* If an item is not present, just jump to the next item */ + g_slist_foreach(node_jumps, fixup_jumps, dfw); + g_slist_free(node_jumps); + node_jumps = NULL; + } + set_nodelist_free(nodelist_head); + + /* Create code for the set on the RHS of the relation */ + insn = dfvm_insn_new(select_opcode(op, how)); + insn->arg1 = dfvm_value_ref(val1); + dfw_append_insn(dfw, insn); + + /* Add instruction to clear the whole stack */ + insn = dfvm_insn_new(DFVM_SET_CLEAR); + dfw_append_insn(dfw, insn); + + /* Jump here if the LHS entity was not present */ + g_slist_foreach(jumps, fixup_jumps, dfw); + g_slist_free(jumps); + jumps = NULL; +} + +static dfvm_value_t * +gen_arithmetic(dfwork_t *dfw, stnode_t *st_arg, GSList **jumps_ptr) +{ + stnode_t *left, *right; + stnode_op_t st_op; + dfvm_value_t *reg_val, *val1, *val2 = NULL; + dfvm_opcode_t op; + + sttype_oper_get(st_arg, &st_op, &left, &right); + + if (st_op == STNODE_OP_UNARY_MINUS) { + op = DFVM_UNARY_MINUS; + } + else if (st_op == STNODE_OP_ADD) { + op = DFVM_ADD; + } + else if (st_op == STNODE_OP_SUBTRACT) { + op = DFVM_SUBTRACT; + } + else if (st_op == STNODE_OP_MULTIPLY) { + op = DFVM_MULTIPLY; + } + else if (st_op == STNODE_OP_DIVIDE) { + op = DFVM_DIVIDE; + } + else if (st_op == STNODE_OP_MODULO) { + op = DFVM_MODULO; + } + else if (st_op == STNODE_OP_BITWISE_AND) { + op = DFVM_BITWISE_AND; + } + else { + ws_assert_not_reached(); + } + + val1 = gen_entity(dfw, left, jumps_ptr); + if (right == NULL) { + /* Generate unary DFVM instruction. */ + reg_val = dfvm_value_new_register(dfw->next_register++); + gen_relation_insn(dfw, op, val1, reg_val, NULL); + return reg_val; + } + + val2 = gen_entity(dfw, right, jumps_ptr); + reg_val = dfvm_value_new_register(dfw->next_register++); + gen_relation_insn(dfw, op, val1, val2, reg_val); + return reg_val; +} + +/* Parse an entity, returning the reg that it gets put into. + * p_jmp will be set if it has to be set by the calling code; it should + * be set to the place to jump to, to return to the calling code, + * if the load of a field from the proto_tree fails. */ +static dfvm_value_t * +gen_entity(dfwork_t *dfw, stnode_t *st_arg, GSList **jumps_ptr) +{ + sttype_id_t e_type; + dfvm_value_t *val; + header_field_info *hfinfo; + drange_t *range = NULL; + bool raw; + e_type = stnode_type_id(st_arg); + + if (e_type == STTYPE_FIELD) { + hfinfo = sttype_field_hfinfo(st_arg); + range = sttype_field_drange_steal(st_arg); + raw = sttype_field_raw(st_arg); + val = dfw_append_read_tree(dfw, hfinfo, range, raw); + if (jumps_ptr != NULL) { + *jumps_ptr = g_slist_prepend(*jumps_ptr, dfw_append_jump(dfw)); + } + } + else if (e_type == STTYPE_REFERENCE) { + hfinfo = sttype_field_hfinfo(st_arg); + range = sttype_field_drange_steal(st_arg); + raw = sttype_field_raw(st_arg); + val = dfw_append_read_reference(dfw, hfinfo, range, raw); + if (jumps_ptr != NULL) { + *jumps_ptr = g_slist_prepend(*jumps_ptr, dfw_append_jump(dfw)); + } + } + else if (e_type == STTYPE_FVALUE) { + val = dfvm_value_new_fvalue(stnode_steal_data(st_arg)); + } + else if (e_type == STTYPE_SLICE) { + val = dfw_append_mk_slice(dfw, st_arg, jumps_ptr); + } + else if (e_type == STTYPE_FUNCTION) { + val = dfw_append_function(dfw, st_arg, jumps_ptr); + } + else if (e_type == STTYPE_PCRE) { + val = dfvm_value_new_pcre(stnode_steal_data(st_arg)); + } + else if (e_type == STTYPE_ARITHMETIC) { + val = gen_arithmetic(dfw, st_arg, jumps_ptr); + } + else { + ws_error("Invalid sttype: %s", stnode_type_name(st_arg)); + } + return val; +} + +static void +gen_exists(dfwork_t *dfw, stnode_t *st_node) +{ + dfvm_insn_t *insn; + dfvm_value_t *val1, *val2 = NULL; + header_field_info *hfinfo; + drange_t *range = NULL; + + hfinfo = sttype_field_hfinfo(st_node); + range = sttype_field_drange_steal(st_node); + + /* Rewind to find the first field of this name. */ + while (hfinfo->same_name_prev_id != -1) { + hfinfo = proto_registrar_get_nth(hfinfo->same_name_prev_id); + } + + /* Ignore "rawness" for existence tests. */ + val1 = dfvm_value_new_hfinfo(hfinfo, false); + if (range) { + val2 = dfvm_value_new_drange(range); + } + + if (val2) { + insn = dfvm_insn_new(DFVM_CHECK_EXISTS_R); + insn->arg1 = dfvm_value_ref(val1); + insn->arg2 = dfvm_value_ref(val2); + } + else { + insn = dfvm_insn_new(DFVM_CHECK_EXISTS); + insn->arg1 = dfvm_value_ref(val1); + } + dfw_append_insn(dfw, insn); + + /* Record the FIELD_ID in hash of interesting fields. */ + while (hfinfo) { + g_hash_table_add(dfw->interesting_fields, &hfinfo->id); + hfinfo = hfinfo->same_name_next; + } +} + +static void +gen_notzero(dfwork_t *dfw, stnode_t *st_node) +{ + dfvm_insn_t *insn; + dfvm_value_t *val1; + GSList *jumps = NULL; + + val1 = gen_arithmetic(dfw, st_node, &jumps); + insn = dfvm_insn_new(DFVM_NOT_ALL_ZERO); + insn->arg1 = dfvm_value_ref(val1); + dfw_append_insn(dfw, insn); + g_slist_foreach(jumps, fixup_jumps, dfw); + g_slist_free(jumps); +} + +static void +gen_exists_slice(dfwork_t *dfw, stnode_t *st_node) +{ + dfvm_insn_t *insn; + dfvm_value_t *val1, *reg_val; + GSList *jumps = NULL; + + val1 = gen_entity(dfw, st_node, &jumps); + /* Compute length. */ + insn = dfvm_insn_new(DFVM_LENGTH); + insn->arg1 = dfvm_value_ref(val1); + reg_val = dfvm_value_new_register(dfw->next_register++); + insn->arg2 = dfvm_value_ref(reg_val); + dfw_append_insn(dfw, insn); + /* Check length is not zero. */ + insn = dfvm_insn_new(DFVM_NOT_ALL_ZERO); + insn->arg1 = dfvm_value_ref(reg_val); + dfw_append_insn(dfw, insn); + /* Fixup jumps. */ + g_slist_foreach(jumps, fixup_jumps, dfw); + g_slist_free(jumps); +} + +static void +gen_test(dfwork_t *dfw, stnode_t *st_node) +{ + stnode_op_t st_op; + stmatch_t st_how; + stnode_t *st_arg1, *st_arg2; + dfvm_insn_t *insn; + dfvm_value_t *jmp; + + + sttype_oper_get(st_node, &st_op, &st_arg1, &st_arg2); + st_how = sttype_test_get_match(st_node); + + switch (st_op) { + case STNODE_OP_UNINITIALIZED: + ws_assert_not_reached(); + break; + + case STNODE_OP_NOT: + gencode(dfw, st_arg1); + insn = dfvm_insn_new(DFVM_NOT); + dfw_append_insn(dfw, insn); + break; + + case STNODE_OP_AND: + gencode(dfw, st_arg1); + + insn = dfvm_insn_new(DFVM_IF_FALSE_GOTO); + jmp = dfvm_value_new(INSN_NUMBER); + insn->arg1 = dfvm_value_ref(jmp); + dfw_append_insn(dfw, insn); + + gencode(dfw, st_arg2); + jmp->value.numeric = dfw->next_insn_id; + break; + + case STNODE_OP_OR: + gencode(dfw, st_arg1); + + insn = dfvm_insn_new(DFVM_IF_TRUE_GOTO); + jmp = dfvm_value_new(INSN_NUMBER); + insn->arg1 = dfvm_value_ref(jmp); + dfw_append_insn(dfw, insn); + + gencode(dfw, st_arg2); + jmp->value.numeric = dfw->next_insn_id; + break; + + case STNODE_OP_ALL_EQ: + gen_relation(dfw, DFVM_ALL_EQ, st_how, st_arg1, st_arg2); + break; + + case STNODE_OP_ANY_EQ: + gen_relation(dfw, DFVM_ANY_EQ, st_how, st_arg1, st_arg2); + break; + + case STNODE_OP_ALL_NE: + gen_relation(dfw, DFVM_ALL_NE, st_how, st_arg1, st_arg2); + break; + + case STNODE_OP_ANY_NE: + gen_relation(dfw, DFVM_ANY_NE, st_how, st_arg1, st_arg2); + break; + + case STNODE_OP_GT: + gen_relation(dfw, DFVM_ANY_GT, st_how, st_arg1, st_arg2); + break; + + case STNODE_OP_GE: + gen_relation(dfw, DFVM_ANY_GE, st_how, st_arg1, st_arg2); + break; + + case STNODE_OP_LT: + gen_relation(dfw, DFVM_ANY_LT, st_how, st_arg1, st_arg2); + break; + + case STNODE_OP_LE: + gen_relation(dfw, DFVM_ANY_LE, st_how, st_arg1, st_arg2); + break; + + case STNODE_OP_CONTAINS: + gen_relation(dfw, DFVM_ANY_CONTAINS, st_how, st_arg1, st_arg2); + break; + + case STNODE_OP_MATCHES: + gen_relation(dfw, DFVM_ANY_MATCHES, st_how, st_arg1, st_arg2); + break; + + case STNODE_OP_IN: + gen_relation_in(dfw, DFVM_SET_ANY_IN, st_how, st_arg1, st_arg2); + break; + + case STNODE_OP_NOT_IN: + gen_relation_in(dfw, DFVM_SET_ANY_NOT_IN, st_how, st_arg1, st_arg2); + break; + + case STNODE_OP_BITWISE_AND: + case STNODE_OP_UNARY_MINUS: + case STNODE_OP_ADD: + case STNODE_OP_SUBTRACT: + case STNODE_OP_MULTIPLY: + case STNODE_OP_DIVIDE: + case STNODE_OP_MODULO: + ws_assert_not_reached(); + break; + } +} + +static void +gencode(dfwork_t *dfw, stnode_t *st_node) +{ + switch (stnode_type_id(st_node)) { + case STTYPE_TEST: + gen_test(dfw, st_node); + break; + case STTYPE_FIELD: + gen_exists(dfw, st_node); + break; + case STTYPE_ARITHMETIC: + gen_notzero(dfw, st_node); + break; + case STTYPE_SLICE: + gen_exists_slice(dfw, st_node); + break; + default: + ws_assert_not_reached(); + } +} + + +static void +optimize(dfwork_t *dfw) +{ + int id, id1, length; + dfvm_insn_t *insn, *insn1, *prev; + dfvm_value_t *arg1; + + length = dfw->insns->len; + + for (id = 0, prev = NULL; id < length; prev = insn, id++) { + insn = (dfvm_insn_t *)g_ptr_array_index(dfw->insns, id); + arg1 = insn->arg1; + if (insn->op == DFVM_IF_TRUE_GOTO || insn->op == DFVM_IF_FALSE_GOTO) { + /* Try to optimize branch jumps */ + dfvm_opcode_t revert = (insn->op == DFVM_IF_FALSE_GOTO) ? DFVM_IF_TRUE_GOTO : DFVM_IF_FALSE_GOTO; + id1 = arg1->value.numeric; + for (;;) { + insn1 = (dfvm_insn_t*)g_ptr_array_index(dfw->insns, id1); + if (insn1->op == revert) { + /* Skip this one; it is always false and the branch is not taken */ + id1 = id1 +1; + continue; + } + if (insn1->op == DFVM_READ_TREE && prev && prev->op == DFVM_READ_TREE && + prev->arg2->value.numeric == insn1->arg2->value.numeric) { + /* Skip this one; hack if it's the same register it's the same field + * and it returns the same value */ + id1 = id1 +1; + continue; + } + if (insn1->op == insn->op) { + /* The branch jumps to the same branch instruction so + * coalesce the jumps */ + arg1 = insn1->arg1; + id1 = arg1->value.numeric; + continue; + } + /* Finished */ + arg1 = insn->arg1; + arg1->value.numeric = id1; + break; + } + } + } +} + +void +dfw_gencode(dfwork_t *dfw) +{ + dfw->insns = g_ptr_array_new(); + dfw->loaded_fields = g_hash_table_new(g_direct_hash, g_direct_equal); + dfw->loaded_raw_fields = g_hash_table_new(g_direct_hash, g_direct_equal); + dfw->interesting_fields = g_hash_table_new(g_int_hash, g_int_equal); + gencode(dfw, dfw->st_root); + dfw_append_insn(dfw, dfvm_insn_new(DFVM_RETURN)); + if (dfw->flags & DF_OPTIMIZE) { + optimize(dfw); + } +} + + +typedef struct { + int i; + int *fields; +} hash_key_iterator; + +static void +get_hash_key(void *key, void *value _U_, void *user_data) +{ + int field_id = *(int *)key; + hash_key_iterator *hki = (hash_key_iterator *)user_data; + + hki->fields[hki->i] = field_id; + hki->i++; +} + +int* +dfw_interesting_fields(dfwork_t *dfw, int *caller_num_fields) +{ + int num_fields = g_hash_table_size(dfw->interesting_fields); + + hash_key_iterator hki; + + if (num_fields == 0) { + *caller_num_fields = 0; + return NULL; + } + + hki.fields = g_new(int, num_fields); + hki.i = 0; + + g_hash_table_foreach(dfw->interesting_fields, get_hash_key, &hki); + *caller_num_fields = num_fields; + return hki.fields; +} + +/* + * Editor modelines - https://www.wireshark.org/tools/modelines.html + * + * Local variables: + * c-basic-offset: 8 + * tab-width: 8 + * indent-tabs-mode: t + * End: + * + * vi: set shiftwidth=8 tabstop=8 noexpandtab: + * :indentSize=8:tabSize=8:noTabs=false: + */ diff --git a/epan/dfilter/gencode.h b/epan/dfilter/gencode.h new file mode 100644 index 0000000..94191b6 --- /dev/null +++ b/epan/dfilter/gencode.h @@ -0,0 +1,21 @@ +/** @file + * + * Wireshark - Network traffic analyzer + * By Gerald Combs <gerald@wireshark.org> + * Copyright 2001 Gerald Combs + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef GENCODE_H +#define GENCODE_H + +#include "dfilter-int.h" + +void +dfw_gencode(dfwork_t *dfw); + +int* +dfw_interesting_fields(dfwork_t *dfw, int *caller_num_fields); + +#endif diff --git a/epan/dfilter/grammar.lemon b/epan/dfilter/grammar.lemon new file mode 100644 index 0000000..af997ca --- /dev/null +++ b/epan/dfilter/grammar.lemon @@ -0,0 +1,559 @@ + +%include { +#include "config.h" +#define WS_LOG_DOMAIN LOG_DOMAIN_DFILTER + +#include <assert.h> + +#include "dfilter-int.h" +#include "syntax-tree.h" +#include "sttype-field.h" +#include "sttype-slice.h" +#include "sttype-op.h" +#include "sttype-function.h" +#include "sttype-set.h" +#include "drange.h" + +#include "grammar.h" + +#ifdef _MSC_VER +#pragma warning(disable:4671) +#endif + +static stnode_t * +new_function(dfsyntax_t *dfs, stnode_t *node); + +#define FAIL(dfs, node, ...) \ + do { \ + ws_noisy("Parsing failed here."); \ + dfilter_fail(dfs, DF_ERROR_GENERIC, stnode_location(node), __VA_ARGS__); \ + } while (0) + +DIAG_OFF_LEMON() +} /* end of %include */ + +%code { +DIAG_ON_LEMON() +} + +/* Parser Information */ +%name Dfilter +%token_prefix TOKEN_ +%extra_argument {dfsyntax_t *dfs} + +/* Terminal and Non-Terminal types and destructors */ +%token_type {stnode_t*} +%token_destructor { + (void)dfs; + stnode_free($$); +} + +%default_type {stnode_t*} +%default_destructor {stnode_free($$);} + +%type range_node_list {GSList*} +%destructor range_node_list {drange_node_free_list($$);} + +%type func_params_list {GSList*} +%destructor func_params_list {st_funcparams_free($$);} + +%type set_list {GSList*} +%destructor set_list {set_nodelist_free($$);} + +%type set_element_list {GSList*} +%destructor set_element_list {set_nodelist_free($$);} + +/* This is called as soon as a syntax error happens. After that, +any "error" symbols are shifted, if possible. */ +%syntax_error { + if (!TOKEN) { + dfilter_fail(dfs, DF_ERROR_UNEXPECTED_END, DFILTER_LOC_EMPTY, "Unexpected end of filter expression."); + return; + } + FAIL(dfs, TOKEN, "\"%s\" was unexpected in this context.", stnode_token(TOKEN)); +} + +/* ----------------- The grammar -------------- */ + +/* Associativity */ +%left TEST_OR. +%left TEST_XOR. +%left TEST_AND. +%right TEST_NOT. +%nonassoc TEST_ALL_EQ TEST_ANY_EQ TEST_ALL_NE TEST_ANY_NE TEST_LT TEST_LE TEST_GT TEST_GE + TEST_CONTAINS TEST_MATCHES. +%left BITWISE_AND. +%left PLUS MINUS. +%left STAR RSLASH PERCENT. +%nonassoc UNARY_PLUS UNARY_MINUS. + +/* Top-level targets */ +sentence ::= expr(X). { dfs->st_root = X; } +sentence ::= . { dfs->st_root = NULL; } + +expr(X) ::= relation(R). { X = R; } +expr(X) ::= arithmetic_expr(E). { X = E; } + +/* Logical tests */ +expr(X) ::= expr(Y) TEST_AND(T) expr(Z). +{ + X = T; + sttype_oper_set2(X, STNODE_OP_AND, Y, Z); + stnode_merge_location(X, Y, Z); +} + +expr(X) ::= expr(Y) TEST_OR(T) expr(Z). +{ + X = T; + sttype_oper_set2(X, STNODE_OP_OR, Y, Z); + stnode_merge_location(X, Y, Z); +} + +expr(X) ::= expr(Y) TEST_XOR(T) expr(Z). +{ + stnode_t *A = stnode_new(STTYPE_TEST, NULL, NULL, DFILTER_LOC_EMPTY); + sttype_oper_set2(A, STNODE_OP_OR, stnode_dup(Y), stnode_dup(Z)); + + stnode_t *B = stnode_new(STTYPE_TEST, NULL, NULL, DFILTER_LOC_EMPTY); + sttype_oper_set2(B, STNODE_OP_AND, Y, Z); + + stnode_t *C = stnode_new(STTYPE_TEST, NULL, NULL, DFILTER_LOC_EMPTY); + sttype_oper_set1(C, STNODE_OP_NOT, B); + + X = T; + sttype_oper_set2(X, STNODE_OP_AND, A, C); + stnode_merge_location(X, Y, Z); +} + +expr(X) ::= TEST_NOT(T) expr(Y). +{ + X = T; + sttype_oper_set1(X, STNODE_OP_NOT, Y); + stnode_merge_location(X, T, Y); +} + +/* Any expression inside parens is simply that expression */ +expr(X) ::= LPAREN(L) expr(Y) RPAREN(R). +{ + X = Y; + stnode_merge_location(X, L, R); + stnode_free(L); + stnode_free(R); +} + +/* Entities, or things that can be compared/tested/checked */ +atom(A) ::= STRING(S). { A = S; } +atom(A) ::= CHARCONST(N). { A = N; } +atom(A) ::= LITERAL(S). { A = S; } +atom(A) ::= CONSTANT(C). { A = C; } + +named_field(X) ::= FIELD(F). +{ + X = F; +} + +named_field(X) ::= IDENTIFIER(U). +{ + X = U; + const char *name = stnode_token(U); + header_field_info *hfinfo = dfilter_resolve_unparsed(dfs, name); + if (hfinfo == NULL) { + stnode_replace(X, STTYPE_LITERAL, g_strdup(name)); + } + else { + stnode_replace(X, STTYPE_FIELD, hfinfo); + } +} + +layered_field(X) ::= named_field(F). +{ + X = F; +} + +layered_field(X) ::= named_field(F) HASH LBRACKET range_node_list(L) RBRACKET(R). +{ + X = F; + if (stnode_type_id(X) != STTYPE_FIELD) { + FAIL(dfs, X, "\"%s\" is not a valid protocol or protocol field.", stnode_token(X)); + } + else { + sttype_field_set_range(X, L); + } + g_slist_free(L); + stnode_merge_location(X, F, R); + stnode_free(R); +} + +layered_field(X) ::= named_field(F) HASH INTEGER(N). +{ + X = F; + if (stnode_type_id(X) != STTYPE_FIELD) { + FAIL(dfs, X, "\"%s\" is not a valid protocol or protocol field.", stnode_token(X)); + } + else { + char *err_msg = NULL; + drange_node *range = drange_node_from_str(stnode_token(N), &err_msg); + if (err_msg != NULL) { + FAIL(dfs, N, "%s", err_msg); + g_free(err_msg); + } + sttype_field_set_range1(X, range); + } + stnode_merge_location(X, F, N); + stnode_free(N); +} + +rawable_field(X) ::= layered_field(F). +{ + X = F; +} + +rawable_field(X) ::= ATSIGN(A) layered_field(F). +{ + X = F; + if (stnode_type_id(X) != STTYPE_FIELD) { + FAIL(dfs, X, "\"%s\" is not a valid protocol or protocol field.", stnode_token(X)); + } + else { + sttype_field_set_raw(X, true); + } + stnode_merge_location(X, A, F); + stnode_free(A); +} + +reference(X) ::= DOLLAR(D) LBRACE rawable_field(F) RBRACE(R). +{ + /* convert field to reference */ + if (stnode_type_id(F) != STTYPE_FIELD) { + X = stnode_new(STTYPE_REFERENCE, NULL, NULL, stnode_location(F)); + FAIL(dfs, F, "\"%s\" is not a valid protocol or protocol field.", stnode_token(F)); + } + else { + X = stnode_new(STTYPE_REFERENCE, sttype_field_hfinfo(F), NULL, stnode_location(F)); + sttype_field_set_drange(X, sttype_field_drange_steal(F)); + sttype_field_set_raw(X, sttype_field_raw(F)); + } + stnode_merge_location(X, D, R); + stnode_free(F); + stnode_free(D); + stnode_free(R); +} + +entity(E) ::= atom(A). { E = A; } +entity(E) ::= slice(R). { E = R; } +entity(E) ::= function(F). { E = F; } +entity(E) ::= rawable_field(F). { E = F; } +entity(E) ::= reference(R). { E = R; } + +arithmetic_expr(T) ::= entity(N). +{ + T = N; +} + +arithmetic_expr(T) ::= PLUS(P) arithmetic_expr(N). [UNARY_PLUS] +{ + T = N; + stnode_merge_location(T, P, N); + stnode_free(P); +} + +arithmetic_expr(T) ::= MINUS(M) arithmetic_expr(N). [UNARY_MINUS] +{ + T = M; + sttype_oper_set1(T, STNODE_OP_UNARY_MINUS, N); + stnode_merge_location(T, M, N); +} + +arithmetic_expr(T) ::= arithmetic_expr(F) BITWISE_AND(O) arithmetic_expr(M). +{ + T = O; + sttype_oper_set2(T, STNODE_OP_BITWISE_AND, F, M); + stnode_merge_location(T, F, M); +} + +arithmetic_expr(T) ::= arithmetic_expr(F) PLUS(O) arithmetic_expr(M). +{ + T = O; + sttype_oper_set2(T, STNODE_OP_ADD, F, M); + stnode_merge_location(T, F, M); +} + +arithmetic_expr(T) ::= arithmetic_expr(F) MINUS(O) arithmetic_expr(M). +{ + T = O; + sttype_oper_set2(T, STNODE_OP_SUBTRACT, F, M); + stnode_merge_location(T, F, M); +} + +arithmetic_expr(T) ::= arithmetic_expr(F) STAR(O) arithmetic_expr(M). +{ + T = O; + sttype_oper_set2(T, STNODE_OP_MULTIPLY, F, M); + stnode_merge_location(T, F, M); +} + +arithmetic_expr(T) ::= arithmetic_expr(F) RSLASH(O) arithmetic_expr(M). +{ + T = O; + sttype_oper_set2(T, STNODE_OP_DIVIDE, F, M); + stnode_merge_location(T, F, M); +} + +arithmetic_expr(T) ::= arithmetic_expr(F) PERCENT(O) arithmetic_expr(M). +{ + T = O; + sttype_oper_set2(T, STNODE_OP_MODULO, F, M); + stnode_merge_location(T, F, M); +} + +arithmetic_expr(T) ::= LBRACE(L) arithmetic_expr(F) RBRACE(R). +{ + T = F; + stnode_merge_location(T, L, R); + stnode_free(L); + stnode_free(R); +} + +/* Relational tests */ +cmp_op(O) ::= TEST_ALL_EQ(L). +{ + O = L; + sttype_oper_set_op(O, STNODE_OP_ALL_EQ); +} + +cmp_op(O) ::= TEST_ANY_EQ(L). +{ + O = L; + sttype_oper_set_op(O, STNODE_OP_ANY_EQ); +} + +cmp_op(O) ::= TEST_ALL_NE(L). +{ + O = L; + sttype_oper_set_op(O, STNODE_OP_ALL_NE); +} + +cmp_op(O) ::= TEST_ANY_NE(L). +{ + O = L; + sttype_oper_set_op(O, STNODE_OP_ANY_NE); +} + +cmp_op(O) ::= TEST_GT(L). +{ + O = L; + sttype_oper_set_op(O, STNODE_OP_GT); +} + +cmp_op(O) ::= TEST_GE(L). +{ + O = L; + sttype_oper_set_op(O, STNODE_OP_GE); +} + +cmp_op(O) ::= TEST_LT(L). +{ + O = L; + sttype_oper_set_op(O, STNODE_OP_LT); +} + +cmp_op(O) ::= TEST_LE(L). +{ + O = L; + sttype_oper_set_op(O, STNODE_OP_LE); +} + +comparison_test(T) ::= arithmetic_expr(E) cmp_op(O) arithmetic_expr(F). +{ + T = O; + sttype_oper_set2_args(O, E, F); + stnode_merge_location(T, E, F); +} + +/* 'a == b == c' or 'a < b <= c <= d < e' */ +comparison_test(T) ::= arithmetic_expr(E) cmp_op(O) comparison_test(R). +{ + stnode_t *L, *F; + + F = R; + while (stnode_type_id(F) == STTYPE_TEST) { + sttype_oper_get(F, NULL, &F, NULL); + } + + L = O; + sttype_oper_set2_args(L, E, stnode_dup(F)); + + T = stnode_new_empty(STTYPE_TEST); + sttype_oper_set2(T, STNODE_OP_AND, L, R); + stnode_merge_location(T, E, R); +} + +relation_test(T) ::= comparison_test(C). { T = C; } + +relation_test(T) ::= entity(E) TEST_CONTAINS(L) entity(F). +{ + T = L; + sttype_oper_set2(T, STNODE_OP_CONTAINS, E, F); + stnode_merge_location(T, E, F); +} + +relation_test(T) ::= entity(E) TEST_MATCHES(L) entity(F). +{ + T = L; + sttype_oper_set2(T, STNODE_OP_MATCHES, E, F); + stnode_merge_location(T, E, F); +} + +relation_test(T) ::= entity(E) TEST_IN(O) set(S). +{ + T = O; + sttype_oper_set2(T, STNODE_OP_IN, E, S); + stnode_merge_location(T, E, S); +} + +relation_test(T) ::= entity(E) TEST_NOT TEST_IN(O) set(S). +{ + T = O; + sttype_oper_set2(O, STNODE_OP_NOT_IN, E, S); + stnode_merge_location(T, E, S); +} + +relation(R) ::= relation_test(T). { R = T; } + +relation(R) ::= ANY(A) relation_test(T). +{ + R = T; + sttype_test_set_match(R, STNODE_MATCH_ANY); + stnode_merge_location(R, A, T); + stnode_free(A); +} + +relation(R) ::= ALL(A) relation_test(T). +{ + R = T; + sttype_test_set_match(R, STNODE_MATCH_ALL); + stnode_merge_location(R, A, T); + stnode_free(A); +} + +/* Sets */ + +set_element_list(N) ::= arithmetic_expr(X). +{ + N = g_slist_append(NULL, X); + N = g_slist_append(N, NULL); +} + +set_element_list(N) ::= arithmetic_expr(X) DOTDOT arithmetic_expr(Y). +{ + N = g_slist_append(NULL, X); + N = g_slist_append(N, Y); +} + +set_list(L) ::= set_element_list(N). +{ + L = g_slist_concat(NULL, N); +} + +set_list(L) ::= set_list(P) COMMA set_element_list(N). +{ + L = g_slist_concat(P, N); +} + +set(S) ::= LBRACE(LB) set_list(L) RBRACE(RB). +{ + S = stnode_new(STTYPE_SET, L, NULL, DFILTER_LOC_EMPTY); + stnode_merge_location(S, LB, RB); + stnode_free(LB); + stnode_free(RB); +} + +/* Slices */ + +slice(R) ::= entity(E) LBRACKET range_node_list(L) RBRACKET. +{ + R = stnode_new(STTYPE_SLICE, NULL, NULL, DFILTER_LOC_EMPTY); + sttype_slice_set(R, E, L); + + /* Delete the list, but not the drange_nodes that + * the list contains. */ + g_slist_free(L); +} + +range_node_list(L) ::= RANGE_NODE(N). +{ + char *err_msg = NULL; + drange_node *rn = drange_node_from_str(stnode_token(N), &err_msg); + if (err_msg != NULL) { + FAIL(dfs, N, "%s", err_msg); + g_free(err_msg); + } + L = g_slist_append(NULL, rn); + stnode_free(N); +} + +range_node_list(L) ::= range_node_list(P) COMMA RANGE_NODE(N). +{ + char *err_msg = NULL; + drange_node *rn = drange_node_from_str(stnode_token(N), &err_msg); + if (err_msg != NULL) { + FAIL(dfs, N, "%s", err_msg); + g_free(err_msg); + } + L = g_slist_append(P, rn); + stnode_free(N); +} + +/* Functions */ + +%code { + static stnode_t * + new_function(dfsyntax_t *dfs, stnode_t *node) + { + const char *name = stnode_token(node); + + df_func_def_t *def = df_func_lookup(name); + if (!def) { + FAIL(dfs, node, "Function '%s' does not exist", name); + } + stnode_replace(node, STTYPE_FUNCTION, def); + return node; + } +} + +/* A function can have one or more parameters */ +function(F) ::= IDENTIFIER(U) LPAREN func_params_list(P) RPAREN(R). +{ + F = new_function(dfs, U); + sttype_function_set_params(F, P); + stnode_merge_location(F, U, R); + stnode_free(R); +} + +function ::= CONSTANT(U) LPAREN func_params_list RPAREN. +{ + FAIL(dfs, U, "Function '%s' does not exist", stnode_token(U)); +} + +/* A function can have zero parameters. */ +function(F) ::= IDENTIFIER(U) LPAREN RPAREN(R). +{ + F = new_function(dfs, U); + stnode_merge_location(F, U, R); + stnode_free(R); +} + +function ::= CONSTANT(U) LPAREN RPAREN. +{ + FAIL(dfs, U, "Function '%s' does not exist", stnode_token(U)); +} + +func_params_list(P) ::= arithmetic_expr(E). +{ + P = g_slist_append(NULL, E); +} + +func_params_list(P) ::= func_params_list(L) COMMA arithmetic_expr(E). +{ + P = g_slist_append(L, E); +} diff --git a/epan/dfilter/scanner.l b/epan/dfilter/scanner.l new file mode 100644 index 0000000..e5565d2 --- /dev/null +++ b/epan/dfilter/scanner.l @@ -0,0 +1,968 @@ +%top { +/* Include this before everything else, for various large-file definitions */ +#include "config.h" +#define WS_LOG_DOMAIN LOG_DOMAIN_DFILTER +#include <wireshark.h> + +#include <stdlib.h> +#include <errno.h> + +#include <wsutil/str_util.h> + +#include "dfilter-int.h" +#include "syntax-tree.h" +#include "grammar.h" +#include "dfunctions.h" +} + +/* + * Always generate warnings. + */ +%option warn + +/* + * We want a reentrant scanner. + */ +%option reentrant + +/* + * We don't use input, so don't generate code for it. + */ +%option noinput + +/* + * We don't use unput, so don't generate code for it. + */ +%option nounput + +/* + * We don't read interactively from the terminal. + */ +%option never-interactive + +/* + * Prefix scanner routines with "df_yy" rather than "yy", so this scanner + * can coexist with other scanners. + */ +%option prefix="df_yy" + +/* + * We're reading from a string, so we don't need yywrap. + */ +%option noyywrap + +/* + * The type for the dfs we keep for a scanner. + */ +%option extra-type="dfsyntax_t *" + +%{ +/* + * Wireshark - Network traffic analyzer + * By Gerald Combs <gerald@wireshark.org> + * Copyright 2001 Gerald Combs + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +/* + * Disable diagnostics in the code generated by Flex. + */ +DIAG_OFF_FLEX() + +WS_WARN_UNUSED static int set_lval_simple(dfsyntax_t *dfs, int token, const char *token_value, sttype_id_t type_id); +#define simple(token) (update_location(yyextra, yytext), set_lval_simple(yyextra, token, yytext, STTYPE_UNINITIALIZED)) +#define test(token) (update_location(yyextra, yytext), set_lval_simple(yyextra, token, yytext, STTYPE_TEST)) +#define math(token) (update_location(yyextra, yytext), set_lval_simple(yyextra, token, yytext, STTYPE_ARITHMETIC)) + +WS_WARN_UNUSED static int set_lval_literal(dfsyntax_t *dfs, const char *value, const char *token_value); +WS_WARN_UNUSED static int set_lval_identifier(dfsyntax_t *dfs, const char *value, const char *token_value); +WS_WARN_UNUSED static int set_lval_constant(dfsyntax_t *dfs, const char *value, const char *token_value); +WS_WARN_UNUSED static int set_lval_unparsed(dfsyntax_t *dfs, const char *value, const char *token_value); + +WS_WARN_UNUSED static int set_lval_field(dfsyntax_t *dfs, const header_field_info *hfinfo, const char *token_value); +WS_WARN_UNUSED static int set_lval_quoted_string(dfsyntax_t *dfs, GString *quoted_string); +WS_WARN_UNUSED static int set_lval_charconst(dfsyntax_t *dfs, GString *quoted_string); + +static bool append_escaped_char(dfsyntax_t *dfs, GString *str, char c); +static bool append_universal_character_name(dfsyntax_t *dfs, GString *str, const char *ucn); +static bool parse_charconst(dfsyntax_t *dfs, const char *s, unsigned long *valuep); + +static void update_location(dfsyntax_t *dfs, const char *text); +static void update_string_loc(dfsyntax_t *dfs, const char *text); + +#define FAIL(...) \ + do { \ + ws_noisy("Scanning failed here."); \ + dfilter_fail(yyextra, DF_ERROR_GENERIC, yyextra->location, __VA_ARGS__); \ + } while (0) + +%} + +FunctionIdentifier [[:alpha:]_][[:alnum:]_]* + +/* + * Cannot start with '-'. * Some protocol name can contain '-', for example "mac-lte". + * Note that some protocol names start with a number, for example "9p". This is + * handled as a special case for numeric patterns. + * Some protocol names contain dots, e.g: _ws.expert + * Protocol or protocol field cannot contain DOTDOT anywhere. + */ +VarIdentifier [[:alnum:]_][[:alnum:]_-]* +ProtoFieldIdentifier {VarIdentifier}(\.{VarIdentifier})* + +hex2 [[:xdigit:]]{2} +ColonMacAddress {hex2}:{hex2}:{hex2}:{hex2}:{hex2}:{hex2} +HyphenMacAddress {hex2}-{hex2}-{hex2}-{hex2}-{hex2}-{hex2} +DotMacAddress {hex2}\.{hex2}\.{hex2}\.{hex2}\.{hex2}\.{hex2} + +hex4 [[:xdigit:]]{4} +DotQuadMacAddress {hex4}\.{hex4}\.{hex4} + +ColonBytes ({hex2}:)|({hex2}(:{hex2})+) +HyphenBytes {hex2}(-{hex2})+ +DotBytes {hex2}(\.{hex2})+ + +DecOctet [0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5] +IPv4Address {DecOctet}\.{DecOctet}\.{DecOctet}\.{DecOctet} + +h16 [0-9A-Fa-f]{1,4} +ls32 {h16}:{h16}|{IPv4Address} +IPv6Address ({h16}:){6}{ls32}|::({h16}:){5}{ls32}|({h16})?::({h16}:){4}{ls32}|(({h16}:){0,1}{h16})?::({h16}:){3}{ls32}|(({h16}:){0,2}{h16})?::({h16}:){2}{ls32}|(({h16}:){0,3}{h16})?::{h16}:{ls32}|(({h16}:){0,4}{h16})?::{ls32}|(({h16}:){0,5}{h16})?::{h16}|(({h16}:){0,6}{h16})?:: + +V4CidrPrefix \/[[:digit:]]{1,2} +V6CidrPrefix \/[[:digit:]]{1,3} + +/* Catch all valid semantic values. Cannot contain DOT DOT or start with MINUS. */ +StartAlphabet [[:alnum:]_:] +Alphabet [[:alnum:]_:/-] +LiteralValue {StartAlphabet}{Alphabet}*(\.{Alphabet}+)* + +Exponent ([eE][+-]?[[:digit:]]+) +HexExponent ([pP][+-]?[[:digit:]]+) + +%x RANGE +%x LAYER +%x DQUOTE +%x SQUOTE + +%% + +[[:blank:]\n\r]+ { + update_location(yyextra, yytext); +} + +"(" return simple(TOKEN_LPAREN); +")" return simple(TOKEN_RPAREN); +"," return simple(TOKEN_COMMA); +"{" return simple(TOKEN_LBRACE); +".." return simple(TOKEN_DOTDOT); +"}" return simple(TOKEN_RBRACE); +"$" return simple(TOKEN_DOLLAR); +"@" return simple(TOKEN_ATSIGN); +"any" return simple(TOKEN_ANY); +"all" return simple(TOKEN_ALL); + +"==" return test(TOKEN_TEST_ANY_EQ); +"eq" return test(TOKEN_TEST_ANY_EQ); +"any_eq" return test(TOKEN_TEST_ANY_EQ); +"!=" return test(TOKEN_TEST_ALL_NE); +"ne" return test(TOKEN_TEST_ALL_NE); +"all_ne" return test(TOKEN_TEST_ALL_NE); +"===" return test(TOKEN_TEST_ALL_EQ); +"all_eq" return test(TOKEN_TEST_ALL_EQ); +"!==" return test(TOKEN_TEST_ANY_NE); +"any_ne" return test(TOKEN_TEST_ANY_NE); +">" return test(TOKEN_TEST_GT); +"gt" return test(TOKEN_TEST_GT); +">=" return test(TOKEN_TEST_GE); +"ge" return test(TOKEN_TEST_GE); +"<" return test(TOKEN_TEST_LT); +"lt" return test(TOKEN_TEST_LT); +"<=" return test(TOKEN_TEST_LE); +"le" return test(TOKEN_TEST_LE); +"contains" return test(TOKEN_TEST_CONTAINS); +"~" return test(TOKEN_TEST_MATCHES); +"matches" return test(TOKEN_TEST_MATCHES); +"!" return test(TOKEN_TEST_NOT); +"not" return test(TOKEN_TEST_NOT); +"&&" return test(TOKEN_TEST_AND); +"and" return test(TOKEN_TEST_AND); +"||" return test(TOKEN_TEST_OR); +"or" return test(TOKEN_TEST_OR); +"^^" return test(TOKEN_TEST_XOR); +"xor" return test(TOKEN_TEST_XOR); +"in" return test(TOKEN_TEST_IN); + +"+" return math(TOKEN_PLUS); +"-" return math(TOKEN_MINUS); +"*" return math(TOKEN_STAR); +"/" return math(TOKEN_RSLASH); +"%" return math(TOKEN_PERCENT); +"&" return math(TOKEN_BITWISE_AND); +"bitwise_and" return math(TOKEN_BITWISE_AND); + +"#" { + BEGIN(LAYER); + return simple(TOKEN_HASH); +} + +<LAYER>[[:digit:]]+ { + BEGIN(INITIAL); + update_location(yyextra, yytext); + return set_lval_simple(yyextra, TOKEN_INTEGER, yytext, STTYPE_UNINITIALIZED); +} + +<LAYER>[^[:digit:][] { + update_location(yyextra, yytext); + FAIL("Expected digit or \"[\", not \"%s\"", yytext); + return SCAN_FAILED; +} + +<INITIAL,LAYER>"[" { + BEGIN(RANGE); + return simple(TOKEN_LBRACKET); +} + +<RANGE>[^],]+ { + update_location(yyextra, yytext); + return set_lval_simple(yyextra, TOKEN_RANGE_NODE, yytext, STTYPE_UNINITIALIZED); +} + +<RANGE>"," { + return simple(TOKEN_COMMA); +} + +<RANGE>"]" { + BEGIN(INITIAL); + return simple(TOKEN_RBRACKET); +} + +<RANGE><<EOF>> { + update_location(yyextra, yytext); + FAIL("The right bracket was missing from a slice."); + return SCAN_FAILED; +} + +[rR]{0,1}\042 { + /* start quote of a quoted string */ + /* + * The example of how to scan for strings was taken from + * the flex manual, from the section "Start Conditions". + * See: https://westes.github.io/flex/manual/Start-Conditions.html + */ + BEGIN(DQUOTE); + update_location(yyextra, yytext); + yyextra->string_loc = yyextra->location; + + yyextra->quoted_string = g_string_new(NULL); + + if (yytext[0] == 'r' || yytext[0] == 'R') { + /* + * This is a raw string (like in Python). Rules: 1) The two + * escape sequences are \\ and \". 2) Backslashes are + * preserved. 3) Double quotes in the string must be escaped. + * Corollary: Strings cannot end with an odd number of + * backslashes. + * Example: r"a\b\x12\"\\" is the string (including the implicit NUL terminator) + * {'a', '\\', 'b', '\\', 'x', '1', '2', '\\', '"', '\\'. '\\', '\0'} + */ + yyextra->raw_string = true; + } + else { + yyextra->raw_string = false; + } +} + +<DQUOTE><<EOF>> { + /* unterminated string */ + update_string_loc(yyextra, yytext); + g_string_free(yyextra->quoted_string, true); + yyextra->quoted_string = NULL; + FAIL("The final quote was missing from a quoted string."); + return SCAN_FAILED; +} + +<DQUOTE>\042 { + /* end quote */ + BEGIN(INITIAL); + update_string_loc(yyextra, yytext); + int token = set_lval_quoted_string(yyextra, yyextra->quoted_string); + yyextra->quoted_string = NULL; + yyextra->string_loc.col_start = -1; + return token; +} + +<DQUOTE>\\[0-7]{1,3} { + /* octal sequence */ + update_string_loc(yyextra, yytext); + if (yyextra->raw_string) { + g_string_append(yyextra->quoted_string, yytext); + } + else { + unsigned long result; + result = strtoul(yytext + 1, NULL, 8); + if (result > 0xff) { + g_string_free(yyextra->quoted_string, true); + yyextra->quoted_string = NULL; + FAIL("%s is larger than 255.", yytext); + return SCAN_FAILED; + } + g_string_append_c(yyextra->quoted_string, (char) result); + } +} + +<DQUOTE>\\x[[:xdigit:]]{1,2} { + /* hex sequence */ + /* + * C standard does not place a limit on the number of hex + * digits after \x... but we do. \xNN can have 1 or two Ns, not more. + */ + update_string_loc(yyextra, yytext); + if (yyextra->raw_string) { + g_string_append(yyextra->quoted_string, yytext); + } + else { + unsigned long result; + result = strtoul(yytext + 2, NULL, 16); + g_string_append_c(yyextra->quoted_string, (char) result); + } +} + +<DQUOTE>\\u[[:xdigit:]]{0,4} { + /* universal character name */ + update_string_loc(yyextra, yytext); + if (yyextra->raw_string) { + g_string_append(yyextra->quoted_string, yytext); + } + else if (!append_universal_character_name(yyextra, yyextra->quoted_string, yytext)) { + g_string_free(yyextra->quoted_string, true); + yyextra->quoted_string = NULL; + return SCAN_FAILED; + } +} + +<DQUOTE>\\U[[:xdigit:]]{0,8} { + /* universal character name */ + update_string_loc(yyextra, yytext); + if (yyextra->raw_string) { + g_string_append(yyextra->quoted_string, yytext); + } + else if (!append_universal_character_name(yyextra, yyextra->quoted_string, yytext)) { + g_string_free(yyextra->quoted_string, true); + yyextra->quoted_string = NULL; + return SCAN_FAILED; + } +} + + +<DQUOTE>\\. { + /* escaped character */ + update_string_loc(yyextra, yytext); + if (yyextra->raw_string) { + g_string_append(yyextra->quoted_string, yytext); + } + else if (!append_escaped_char(yyextra, yyextra->quoted_string, yytext[1])) { + g_string_free(yyextra->quoted_string, true); + yyextra->quoted_string = NULL; + return SCAN_FAILED; + } +} + +<DQUOTE>[^\\\042]+ { + /* non-escaped string */ + update_string_loc(yyextra, yytext); + g_string_append(yyextra->quoted_string, yytext); +} + + +\047 { + /* start quote of a quoted character value */ + BEGIN(SQUOTE); + update_location(yyextra, yytext); + yyextra->string_loc = yyextra->location; + + yyextra->quoted_string = g_string_new("'"); +} + +<SQUOTE><<EOF>> { + /* unterminated character value */ + update_string_loc(yyextra, yytext); + g_string_free(yyextra->quoted_string, true); + yyextra->quoted_string = NULL; + FAIL("The final quote was missing from a character constant."); + return SCAN_FAILED; +} + +<SQUOTE>\047 { + /* end quote */ + BEGIN(INITIAL); + update_string_loc(yyextra, yytext); + g_string_append_c(yyextra->quoted_string, '\''); + int token = set_lval_charconst(yyextra, yyextra->quoted_string); + yyextra->quoted_string = NULL; + yyextra->string_loc.col_start = -1; + return token; +} + +<SQUOTE>\\. { + /* escaped character */ + update_string_loc(yyextra, yytext); + g_string_append(yyextra->quoted_string, yytext); +} + +<SQUOTE>[^\\\047]+ { + /* non-escaped string */ + update_string_loc(yyextra, yytext); + g_string_append(yyextra->quoted_string, yytext); +} + + /* NOTE: None of the patterns below can match ".." anywhere in the token string. */ + + /* MAC address. */ + +{ColonMacAddress}|{HyphenMacAddress} { + /* MAC Address. */ + update_location(yyextra, yytext); + return set_lval_literal(yyextra, yytext, yytext); +} + +{DotMacAddress}|{DotQuadMacAddress} { + /* MAC Address, can also be a field. */ + update_location(yyextra, yytext); + return set_lval_unparsed(yyextra, yytext, yytext); +} + + /* IP address. */ + +{IPv4Address}{V4CidrPrefix}? { + /* IPv4 with or without prefix. */ + update_location(yyextra, yytext); + return set_lval_literal(yyextra, yytext, yytext); +} + +{IPv6Address}{V6CidrPrefix}? { + /* IPv6 with or without prefix. */ + update_location(yyextra, yytext); + return set_lval_literal(yyextra, yytext, yytext); +} + + /* Integer */ + +[[:digit:]][[:digit:]]* { + /* Numeric or field. */ + update_location(yyextra, yytext); + /* Check if we have a protocol or protocol field, otherwise assume a literal. */ + /* It is only reasonable to assume a literal here, instead of a + * (possibly non-existant) protocol field, because protocol field filter names + * should not start with a digit (the lexical syntax for numbers). */ + header_field_info *hfinfo = dfilter_resolve_unparsed(yyextra, yytext); + if (hfinfo != NULL) { + return set_lval_field(yyextra, hfinfo, yytext); + } + return set_lval_literal(yyextra, yytext, yytext); +} + +0[bBxX]?[[:xdigit:]]+ { + /* Binary or octal or hexadecimal. */ + update_location(yyextra, yytext); + return set_lval_literal(yyextra, yytext, yytext); +} + + /* Floating point. */ + +[[:digit:]]+{Exponent}|[[:digit:]]+\.[[:digit:]]+{Exponent}? { + /* Decimal float with optional exponent. */ + /* Significand cannot have any side omitted. */ + update_location(yyextra, yytext); + /* Check if we have a protocol or protocol field, otherwise assume a literal. */ + /* It is only reasonable to assume a literal here, instead of a + * (possibly non-existant) protocol field, because protocol field filter names + * should not start with a digit (the lexical syntax for numbers). */ + header_field_info *hfinfo = dfilter_resolve_unparsed(yyextra, yytext); + if (hfinfo != NULL) { + return set_lval_field(yyextra, hfinfo, yytext); + } + return set_lval_literal(yyextra, yytext, yytext); +} + +0[xX][[:xdigit:]]+{HexExponent}|0[xX][[:xdigit:]]+\.[[:xdigit:]]+{HexExponent}? { + /* Hexadecimal float with optional exponent. Can't be a field because + * field cannot beging with 0x. */ + /* Significand cannot have any side omitted. */ + update_location(yyextra, yytext); + return set_lval_literal(yyextra, yytext, yytext); +} + +:[[:xdigit:]]+ { + /* Numeric prefixed with ':'. */ + update_location(yyextra, yytext); + return set_lval_literal(yyextra, yytext + 1, yytext); +} + +[[:xdigit:]]+ { + /* Numeric or field. */ + update_location(yyextra, yytext); + return set_lval_unparsed(yyextra, yytext, yytext); +} + + /* Bytes. */ + +:?{ColonBytes} { + /* Bytes. */ + update_location(yyextra, yytext); + if (yytext[0] == ':') + return set_lval_literal(yyextra, yytext + 1, yytext); + return set_lval_literal(yyextra, yytext, yytext); +} + +:?{HyphenBytes} { + /* Bytes. */ + update_location(yyextra, yytext); + if (yytext[0] == ':') + return set_lval_literal(yyextra, yytext + 1, yytext); + return set_lval_literal(yyextra, yytext, yytext); +} + +:?{DotBytes} { + /* DotBytes, can be a field without ':' prefix. */ + update_location(yyextra, yytext); + if (yytext[0] == ':') + return set_lval_literal(yyextra, yytext + 1, yytext); + return set_lval_unparsed(yyextra, yytext, yytext); +} + + /* Identifier (protocol/field/function name). */ + + /* This must come before FieldIdentifier to match function names. */ +{FunctionIdentifier} { + /* Identifier (field or function) or constant (bytes without separator). */ + /* We use CONSTANT instead of LITERAL because the difference is significant + * in the syntactical grammar. */ + update_location(yyextra, yytext); + header_field_info *hfinfo = dfilter_resolve_unparsed(yyextra, yytext); + if (hfinfo != NULL) { + return set_lval_identifier(yyextra, yytext, yytext); + } + df_func_def_t *def = df_func_lookup(yytext); + if (def != NULL) { + return set_lval_identifier(yyextra, yytext, yytext); + } + return set_lval_constant(yyextra, yytext, yytext); +} + +\.{ProtoFieldIdentifier} { + /* Identifier, prefixed with a '.'. */ + update_location(yyextra, yytext); + const char *name = yytext + 1; + header_field_info *hfinfo = dfilter_resolve_unparsed(yyextra, name); + if (hfinfo == NULL) { + FAIL("\"%s\" is not a valid protocol or protocol field.", name); + return SCAN_FAILED; + } + return set_lval_field(yyextra, hfinfo, yytext); +} + +{ProtoFieldIdentifier} { + /* Catch-all for protocol values. Can also be a literal. */ + update_location(yyextra, yytext); + return set_lval_identifier(yyextra, yytext, yytext); +} + +{LiteralValue} { + /* Catch-all for semantic values. */ + update_location(yyextra, yytext); + /* We use literal here because identifiers (using unparsed) should have + * matched one of the previous rules. */ + return set_lval_literal(yyextra, yytext, yytext); +} + +. { + /* Default */ + update_location(yyextra, yytext); + if (isprint_string(yytext)) + FAIL("\"%s\" was unexpected in this context.", yytext); + else + FAIL("Non-printable ASCII characters may only appear inside double-quotes."); + return SCAN_FAILED; +} + +%% + +/* + * Turn diagnostics back on, so we check the code that we've written. + */ +DIAG_ON_FLEX() + +static void +_update_location(dfsyntax_t *dfs, size_t len) +{ + dfs->location.col_start += (long)dfs->location.col_len; + dfs->location.col_len = len; +} + +static void +update_location(dfsyntax_t *dfs, const char *text) +{ + _update_location(dfs, strlen(text)); +} + +static void +update_string_loc(dfsyntax_t *dfs, const char *text) +{ + size_t len = strlen(text); + dfs->string_loc.col_len += len; + _update_location(dfs, len); +} + +static int +set_lval_simple(dfsyntax_t *dfs, int token, const char *token_value, sttype_id_t type_id) +{ + dfs->lval = stnode_new(type_id, NULL, g_strdup(token_value), dfs->location); + return token; +} + +static int +set_lval_literal(dfsyntax_t *dfs, const char *value, const char *token_value) +{ + dfs->lval = stnode_new(STTYPE_LITERAL, g_strdup(value), g_strdup(token_value), dfs->location); + return TOKEN_LITERAL; +} + +static int +set_lval_identifier(dfsyntax_t *dfs, const char *value, const char *token_value) +{ + dfs->lval = stnode_new(STTYPE_LITERAL, g_strdup(value), g_strdup(token_value), dfs->location); + return TOKEN_IDENTIFIER; +} + +static int +set_lval_constant(dfsyntax_t *dfs, const char *value, const char *token_value) +{ + dfs->lval = stnode_new(STTYPE_LITERAL, g_strdup(value), g_strdup(token_value), dfs->location); + return TOKEN_CONSTANT; +} + +static int +set_lval_unparsed(dfsyntax_t *dfs, const char *value, const char *token_value) +{ + int token; + const header_field_info *hfinfo = dfilter_resolve_unparsed(dfs, value); + if (hfinfo != NULL) { + token = set_lval_field(dfs, hfinfo, token_value); + } + else { + token = set_lval_literal(dfs, value, token_value); + } + stnode_set_flags(dfs->lval, STFLAG_UNPARSED); + return token; +} + +static int +set_lval_field(dfsyntax_t *dfs, const header_field_info *hfinfo, const char *token_value) +{ + dfs->lval = stnode_new(STTYPE_FIELD, (void *)hfinfo, g_strdup(token_value), dfs->location); + return TOKEN_FIELD; +} + +static int +set_lval_quoted_string(dfsyntax_t *dfs, GString *quoted_string) +{ + char *token_value; + + token_value = ws_escape_string_len(NULL, quoted_string->str, quoted_string->len, true); + dfs->lval = stnode_new(STTYPE_STRING, quoted_string, token_value, dfs->string_loc); + return TOKEN_STRING; +} + +static int +set_lval_charconst(dfsyntax_t *dfs, GString *quoted_string) +{ + unsigned long number; + bool ok; + + char *token_value = g_string_free(quoted_string, false); + ok = parse_charconst(dfs, token_value, &number); + if (!ok) { + g_free(token_value); + return SCAN_FAILED; + } + dfs->lval = stnode_new(STTYPE_CHARCONST, g_memdup2(&number, sizeof(number)), token_value, dfs->string_loc); + return TOKEN_CHARCONST; +} + +static bool +append_escaped_char(dfsyntax_t *dfs, GString *str, char c) +{ + switch (c) { + case 'a': + c = '\a'; + break; + case 'b': + c = '\b'; + break; + case 'f': + c = '\f'; + break; + case 'n': + c = '\n'; + break; + case 'r': + c = '\r'; + break; + case 't': + c = '\t'; + break; + case 'v': + c = '\v'; + break; + case '\\': + case '\'': + case '\"': + break; + default: + dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->location, + "\\%c is not a valid character escape sequence", c); + return false; + } + + g_string_append_c(str, c); + return true; +} + +static bool +parse_universal_character_name(dfsyntax_t *dfs _U_, const char *str, char **ret_endptr, gunichar *valuep) +{ + uint64_t val; + char *endptr; + int ndigits; + + if (str[0] != '\\') + return false; + + if (str[1] == 'u') + ndigits = 4; + else if (str[1] == 'U') + ndigits = 8; + else + return false; + + for (int i = 2; i < ndigits + 2; i++) { + if (!g_ascii_isxdigit(str[i])) { + return false; + } + } + + errno = 0; + val = g_ascii_strtoull(str + 2, &endptr, 16); /* skip leading 'u' or 'U' */ + + if (errno != 0 || endptr == str || val > UINT32_MAX) { + return false; + } + + /* + * Ref: https://en.cppreference.com/w/c/language/escape + * Range of universal character names + * + * If a universal character name corresponds to a code point that is + * not 0x24 ($), 0x40 (@), nor 0x60 (`) and less than 0xA0, or a + * surrogate code point (the range 0xD800-0xDFFF, inclusive), or + * greater than 0x10FFFF, i.e. not a Unicode code point (since C23), + * the program is ill-formed. In other words, members of basic source + * character set and control characters (in ranges 0x0-0x1F and + * 0x7F-0x9F) cannot be expressed in universal character names. + */ + if (val < 0xA0 && val != 0x24 && val != 0x40 && val != 0x60) + return false; + else if (val >= 0xD800 && val <= 0xDFFF) + return false; + else if (val > 0x10FFFF) + return false; + + *valuep = (gunichar)val; + if (ret_endptr) + *ret_endptr = endptr; + return true; +} + +static bool +append_universal_character_name(dfsyntax_t *dfs, GString *str, const char *ucn) +{ + gunichar val; + + if (!parse_universal_character_name(dfs, ucn, NULL, &val)) { + dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->location, "%s is not a valid universal character name", ucn); + return false; + } + + g_string_append_unichar(str, val); + return true; +} + +static bool +parse_charconst(dfsyntax_t *dfs, const char *s, unsigned long *valuep) +{ + const char *cp; + unsigned long value; + gunichar unival; + char *endptr; + + cp = s + 1; /* skip the leading ' */ + if (*cp == '\'') { + dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "Empty character constant."); + return false; + } + + if (*cp == '\\') { + /* + * C escape sequence. + * An escape sequence is an octal number \NNN, + * an hex number \xNN, or one of \' \" \\ \a \b \f \n \r \t \v + * that stands for the byte value of the equivalent + * C-escape in ASCII encoding. + */ + cp++; + switch (*cp) { + + case '\0': + dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s); + return false; + + case 'a': + value = '\a'; + cp++; + break; + + case 'b': + value = '\b'; + cp++; + break; + + case 'f': + value = '\f'; + cp++; + break; + + case 'n': + value = '\n'; + break; + + case 'r': + value = '\r'; + cp++; + break; + + case 't': + value = '\t'; + cp++; + break; + + case 'v': + value = '\v'; + cp++; + break; + + case '\'': + value = '\''; + cp++; + break; + + case '\\': + value = '\\'; + cp++; + break; + + case '"': + value = '"'; + cp++; + break; + + case 'x': + cp++; + if (*cp >= '0' && *cp <= '9') + value = *cp - '0'; + else if (*cp >= 'A' && *cp <= 'F') + value = 10 + (*cp - 'A'); + else if (*cp >= 'a' && *cp <= 'f') + value = 10 + (*cp - 'a'); + else { + dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s); + return false; + } + cp++; + if (*cp != '\'') { + value <<= 4; + if (*cp >= '0' && *cp <= '9') + value |= *cp - '0'; + else if (*cp >= 'A' && *cp <= 'F') + value |= 10 + (*cp - 'A'); + else if (*cp >= 'a' && *cp <= 'f') + value |= 10 + (*cp - 'a'); + else { + dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s); + return false; + } + } + cp++; + break; + + case 'u': + case 'U': + if (!parse_universal_character_name(dfs, s+1, &endptr, &unival)) { + dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s is not a valid universal character name", s); + return false; + } + value = (unsigned long)unival; + cp = endptr; + break; + + default: + /* Octal */ + if (*cp >= '0' && *cp <= '7') + value = *cp - '0'; + else { + dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s); + return false; + } + if (*(cp + 1) != '\'') { + cp++; + value <<= 3; + if (*cp >= '0' && *cp <= '7') + value |= *cp - '0'; + else { + dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s); + return false; + } + if (*(cp + 1) != '\'') { + cp++; + value <<= 3; + if (*cp >= '0' && *cp <= '7') + value |= *cp - '0'; + else { + dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s); + return false; + } + } + } + if (value > 0xFF) { + dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s is too large to be a valid character constant.", s); + return false; + } + cp++; + } + } else { + value = *cp++; + if (!g_ascii_isprint(value)) { + dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "Non-printable value '0x%02lx' in character constant.", value); + return false; + } + } + + if ((*cp != '\'') || (*(cp + 1) != '\0')){ + dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s is too long to be a valid character constant.", s); + return false; + } + + *valuep = value; + return true; +} diff --git a/epan/dfilter/semcheck.c b/epan/dfilter/semcheck.c new file mode 100644 index 0000000..fec84a9 --- /dev/null +++ b/epan/dfilter/semcheck.c @@ -0,0 +1,1599 @@ +/* + * Wireshark - Network traffic analyzer + * By Gerald Combs <gerald@wireshark.org> + * Copyright 2001 Gerald Combs + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "config.h" + +#define WS_LOG_DOMAIN LOG_DOMAIN_DFILTER + +#include <string.h> + +#include "dfilter-int.h" +#include "semcheck.h" +#include "syntax-tree.h" +#include "sttype-field.h" +#include "sttype-slice.h" +#include "sttype-op.h" +#include "sttype-set.h" +#include "sttype-function.h" +#include "sttype-pointer.h" + +#include <epan/exceptions.h> +#include <epan/packet.h> + +#include <wsutil/ws_assert.h> +#include <wsutil/wslog.h> + +#include <ftypes/ftypes.h> + + +#define FAIL(dfw, node, ...) \ + do { \ + ws_noisy("Semantic check failed here."); \ + dfilter_fail_throw(dfw, DF_ERROR_GENERIC, stnode_location(node), __VA_ARGS__); \ + } while (0) + +#define FAIL_HERE(dfw) \ + do { \ + ws_noisy("Semantic check failed here."); \ + THROW(TypeError); \ + } while (0) + +typedef bool (*FtypeCanFunc)(enum ftenum); + +static ftenum_t +check_arithmetic_LHS(dfwork_t *dfw, stnode_op_t st_op, + stnode_t *st_node, stnode_t *st_arg1, stnode_t *st_arg2, + ftenum_t lhs_ftype); + +static void +check_relation(dfwork_t *dfw, stnode_op_t st_op, + FtypeCanFunc can_func, bool allow_partial_value, + stnode_t *st_node, stnode_t *st_arg1, stnode_t *st_arg2); + +static void +semcheck(dfwork_t *dfw, stnode_t *st_node); + +static fvalue_t * +mk_fvalue_from_val_string(dfwork_t *dfw, header_field_info *hfinfo, const char *s, + df_loc_t loc); + +/* Compares to ftenum_t's and decides if they're + * compatible or not (if they're the same basic type) */ +bool +compatible_ftypes(ftenum_t a, ftenum_t b) +{ + switch (a) { + case FT_NONE: + case FT_BOOLEAN: + case FT_PROTOCOL: + case FT_ABSOLUTE_TIME: + case FT_RELATIVE_TIME: + case FT_IEEE_11073_SFLOAT: + case FT_IEEE_11073_FLOAT: + case FT_IPv4: + case FT_IPv6: + return a == b; + + case FT_FLOAT: /* XXX - should be able to compare with INT */ + case FT_DOUBLE: /* XXX - should be able to compare with INT */ + switch (b) { + case FT_FLOAT: + case FT_DOUBLE: + return true; + default: + return false; + } + + case FT_ETHER: + case FT_BYTES: + case FT_UINT_BYTES: + case FT_GUID: + case FT_OID: + case FT_AX25: + case FT_VINES: + case FT_FCWWN: + case FT_REL_OID: + case FT_SYSTEM_ID: + + return (b == FT_ETHER || b == FT_BYTES || b == FT_UINT_BYTES || b == FT_GUID || b == FT_OID || b == FT_AX25 || b == FT_VINES || b == FT_FCWWN || b == FT_REL_OID || b == FT_SYSTEM_ID); + + case FT_UINT8: + case FT_UINT16: + case FT_UINT24: + case FT_UINT32: + case FT_CHAR: + case FT_FRAMENUM: + case FT_IPXNET: + return ftype_can_val_to_uinteger(b); + + case FT_UINT40: + case FT_UINT48: + case FT_UINT56: + case FT_UINT64: + case FT_EUI64: + return ftype_can_val_to_uinteger64(b); + + case FT_INT8: + case FT_INT16: + case FT_INT24: + case FT_INT32: + return ftype_can_val_to_sinteger(b); + + case FT_INT40: + case FT_INT48: + case FT_INT56: + case FT_INT64: + return ftype_can_val_to_sinteger64(b); + + case FT_STRING: + case FT_STRINGZ: + case FT_UINT_STRING: + case FT_STRINGZPAD: + case FT_STRINGZTRUNC: + switch (b) { + case FT_STRING: + case FT_STRINGZ: + case FT_UINT_STRING: + case FT_STRINGZPAD: + case FT_STRINGZTRUNC: + return true; + default: + return false; + } + + case FT_NUM_TYPES: + ws_assert_not_reached(); + } + + ws_assert_not_reached(); + return false; +} + +/* Don't set the error message if it's already set. */ +#define SET_ERROR(dfw, str) \ + do { \ + if ((str) != NULL && (dfw)->error == NULL) { \ + (dfw)->error = df_error_new(DF_ERROR_GENERIC, str, NULL); \ + } \ + else { \ + g_free(str); \ + } \ + } while (0) + +/* Gets an fvalue from a string, and sets the error message on failure. */ +WS_RETNONNULL +fvalue_t* +dfilter_fvalue_from_literal(dfwork_t *dfw, ftenum_t ftype, stnode_t *st, + bool allow_partial_value, header_field_info *hfinfo_value_string) +{ + fvalue_t *fv; + const char *s = stnode_data(st); + char *error_message = NULL; + + fv = fvalue_from_literal(ftype, s, allow_partial_value, &error_message); + SET_ERROR(dfw, error_message); + + if (fv == NULL && hfinfo_value_string) { + /* check value_string */ + fv = mk_fvalue_from_val_string(dfw, hfinfo_value_string, s, stnode_location(st)); + /* + * Ignore previous errors if this can be mapped + * to an item from value_string. + */ + if (fv) { + df_error_free(&dfw->error); + add_compile_warning(dfw, "Interpreting the symbol \u2039%s\u203A as a %s value string. " + "Writing value strings without double quotes is deprecated. " + "Please use \"%s\" instead", + s, ftype_pretty_name(hfinfo_value_string->type), s); + } + } + if (fv == NULL) { + dfw_set_error_location(dfw, stnode_location(st)); + FAIL_HERE(dfw); + } + + return fv; +} + +/* Gets an fvalue from a string, and sets the error message on failure. */ +WS_RETNONNULL +fvalue_t * +dfilter_fvalue_from_string(dfwork_t *dfw, ftenum_t ftype, stnode_t *st, + header_field_info *hfinfo_value_string) +{ + fvalue_t *fv; + const GString *gs = stnode_string(st); + char *error_message = NULL; + + fv = fvalue_from_string(ftype, gs->str, gs->len, &error_message); + SET_ERROR(dfw, error_message); + + if (fv == NULL && hfinfo_value_string) { + fv = mk_fvalue_from_val_string(dfw, hfinfo_value_string, gs->str, stnode_location(st)); + /* + * Ignore previous errors if this can be mapped + * to an item from value_string. + */ + if (fv) { + df_error_free(&dfw->error); + } + } + if (fv == NULL) { + dfw_set_error_location(dfw, stnode_location(st)); + FAIL_HERE(dfw); + } + + return fv; +} + +/* Creates a FT_UINT32 fvalue with a given value. */ +static fvalue_t* +mk_uint32_fvalue(uint32_t val) +{ + fvalue_t *fv; + + fv = fvalue_new(FT_UINT32); + fvalue_set_uinteger(fv, val); + + return fv; +} + +/* Creates a FT_UINT64 fvalue with a given value. */ +static fvalue_t* +mk_uint64_fvalue(uint64_t val) +{ + fvalue_t *fv; + + fv = fvalue_new(FT_UINT64); + fvalue_set_uinteger64(fv, val); + + return fv; +} + +/* Creates a FT_BOOLEAN fvalue with a given value. */ +static fvalue_t* +mk_boolean_fvalue(bool val) +{ + fvalue_t *fv; + + fv = fvalue_new(FT_BOOLEAN); + fvalue_set_uinteger64(fv, val); + + return fv; +} + +/* Try to make an fvalue from a string using a value_string or true_false_string. + * This works only for ftypes that are integers. Returns the created fvalue_t* + * or NULL if impossible. */ +static fvalue_t* +mk_fvalue_from_val_string(dfwork_t *dfw, header_field_info *hfinfo, const char *s, + df_loc_t loc) +{ + /* Early return? */ + switch(hfinfo->type) { + case FT_NONE: + case FT_PROTOCOL: + case FT_FLOAT: + case FT_DOUBLE: + case FT_IEEE_11073_SFLOAT: + case FT_IEEE_11073_FLOAT: + case FT_ABSOLUTE_TIME: + case FT_RELATIVE_TIME: + case FT_IPv4: + case FT_IPv6: + case FT_IPXNET: + case FT_AX25: + case FT_VINES: + case FT_FCWWN: + case FT_ETHER: + case FT_BYTES: + case FT_UINT_BYTES: + case FT_STRING: + case FT_STRINGZ: + case FT_UINT_STRING: + case FT_STRINGZPAD: + case FT_STRINGZTRUNC: + case FT_EUI64: + case FT_GUID: + case FT_OID: + case FT_REL_OID: + case FT_SYSTEM_ID: + case FT_FRAMENUM: /* hfinfo->strings contains ft_framenum_type_t, not strings */ + return NULL; + + case FT_BOOLEAN: + case FT_CHAR: + case FT_UINT8: + case FT_UINT16: + case FT_UINT24: + case FT_UINT32: + case FT_UINT40: + case FT_UINT48: + case FT_UINT56: + case FT_UINT64: + case FT_INT8: + case FT_INT16: + case FT_INT24: + case FT_INT32: + case FT_INT40: + case FT_INT48: + case FT_INT56: + case FT_INT64: + break; + + case FT_NUM_TYPES: + ws_assert_not_reached(); + } + + /* Do val_strings exist? */ + if (!hfinfo->strings) { + dfilter_fail(dfw, DF_ERROR_GENERIC, loc, "%s cannot accept strings as values.", + hfinfo->abbrev); + return NULL; + } + + /* Reset the error message, since *something* interesting will happen, + * and the error message will be more interesting than any error message + * I happen to have now. */ + df_error_free(&dfw->error); + + if (hfinfo->type == FT_BOOLEAN) { + const true_false_string *tf = (const true_false_string *)hfinfo->strings; + + if (g_ascii_strcasecmp(s, tf->true_string) == 0) { + return mk_boolean_fvalue(true); + } + if (g_ascii_strcasecmp(s, tf->false_string) == 0) { + return mk_boolean_fvalue(false); + } + dfilter_fail(dfw, DF_ERROR_GENERIC, loc, "\"%s\" cannot be found among the possible values for %s.", + s, hfinfo->abbrev); + } + else if (hfinfo->display & BASE_RANGE_STRING) { + dfilter_fail(dfw, DF_ERROR_GENERIC, loc, "\"%s\" cannot accept [range] strings as values.", + hfinfo->abbrev); + } + else if (hfinfo->display & BASE_VAL64_STRING) { + const val64_string *vals = (const val64_string *)hfinfo->strings; + + while (vals->strptr != NULL) { + if (g_ascii_strcasecmp(s, vals->strptr) == 0) { + return mk_uint64_fvalue(vals->value); + } + vals++; + } + dfilter_fail(dfw, DF_ERROR_GENERIC, loc, "\"%s\" cannot be found among the possible values for %s.", + s, hfinfo->abbrev); + } + else if (hfinfo->display == BASE_CUSTOM) { + /* If a user wants to match against a custom string, we would + * somehow have to have the integer value here to pass it in + * to the custom-display function. But we don't have an + * integer, we have the string they're trying to match. + * -><- + */ + dfilter_fail(dfw, DF_ERROR_GENERIC, loc, "\"%s\" cannot accept [custom] strings as values.", + hfinfo->abbrev); + } + else { + const value_string *vals = (const value_string *)hfinfo->strings; + if (hfinfo->display & BASE_EXT_STRING) + vals = VALUE_STRING_EXT_VS_P((const value_string_ext *) vals); + + while (vals->strptr != NULL) { + if (g_ascii_strcasecmp(s, vals->strptr) == 0) { + return mk_uint32_fvalue(vals->value); + } + vals++; + } + dfilter_fail(dfw, DF_ERROR_GENERIC, loc, "\"%s\" cannot be found among the possible values for %s.", + s, hfinfo->abbrev); + } + return NULL; +} + +static bool +is_bytes_type(enum ftenum type) +{ + switch(type) { + case FT_AX25: + case FT_VINES: + case FT_FCWWN: + case FT_ETHER: + case FT_BYTES: + case FT_UINT_BYTES: + case FT_IPv6: + case FT_GUID: + case FT_OID: + case FT_REL_OID: + case FT_SYSTEM_ID: + return true; + + case FT_NONE: + case FT_PROTOCOL: + case FT_FLOAT: + case FT_DOUBLE: + case FT_IEEE_11073_SFLOAT: + case FT_IEEE_11073_FLOAT: + case FT_ABSOLUTE_TIME: + case FT_RELATIVE_TIME: + case FT_IPv4: + case FT_IPXNET: + case FT_STRING: + case FT_STRINGZ: + case FT_UINT_STRING: + case FT_STRINGZPAD: + case FT_STRINGZTRUNC: + case FT_BOOLEAN: + case FT_FRAMENUM: + case FT_CHAR: + case FT_UINT8: + case FT_UINT16: + case FT_UINT24: + case FT_UINT32: + case FT_UINT40: + case FT_UINT48: + case FT_UINT56: + case FT_UINT64: + case FT_INT8: + case FT_INT16: + case FT_INT24: + case FT_INT32: + case FT_INT40: + case FT_INT48: + case FT_INT56: + case FT_INT64: + case FT_EUI64: + return false; + + case FT_NUM_TYPES: + ws_assert_not_reached(); + } + + ws_assert_not_reached(); + return false; +} + +/* Check the semantics of an existence test. */ +static void +check_exists(dfwork_t *dfw, stnode_t *st_arg1) +{ + LOG_NODE(st_arg1); + + switch (stnode_type_id(st_arg1)) { + case STTYPE_FIELD: + /* This is OK */ + dfw->field_count++; + break; + case STTYPE_REFERENCE: + case STTYPE_STRING: + case STTYPE_LITERAL: + case STTYPE_CHARCONST: + FAIL(dfw, st_arg1, "\"%s\" is neither a field nor a protocol name.", + stnode_todisplay(st_arg1)); + break; + + case STTYPE_FUNCTION: + /* XXX - Maybe we should change functions so they can return fields, + * in which case the 'exist' should be fine. */ + FAIL(dfw, st_arg1, "You cannot test whether a function is present."); + break; + + case STTYPE_SET: + case STTYPE_UNINITIALIZED: + case STTYPE_NUM_TYPES: + case STTYPE_TEST: + case STTYPE_FVALUE: + case STTYPE_PCRE: + case STTYPE_ARITHMETIC: + case STTYPE_SLICE: + ws_assert_not_reached(); + } +} + +ftenum_t +check_slice(dfwork_t *dfw, stnode_t *st, ftenum_t lhs_ftype) +{ + stnode_t *entity1; + header_field_info *hfinfo1; + ftenum_t ftype1; + + LOG_NODE(st); + + entity1 = sttype_slice_entity(st); + ws_assert(entity1); + + if (stnode_type_id(entity1) == STTYPE_FIELD) { + dfw->field_count++; + hfinfo1 = sttype_field_hfinfo(entity1); + ftype1 = sttype_field_ftenum(entity1); + + if (!ftype_can_slice(ftype1)) { + FAIL(dfw, entity1, "\"%s\" is a %s and cannot be sliced into a sequence of bytes.", + hfinfo1->abbrev, ftype_pretty_name(ftype1)); + } + } else if (stnode_type_id(entity1) == STTYPE_FUNCTION) { + ftype1 = check_function(dfw, entity1, lhs_ftype); + + if (!ftype_can_slice(ftype1)) { + FAIL(dfw, entity1, "Return value of function \"%s\" is a %s and cannot be converted into a sequence of bytes.", + sttype_function_name(entity1), ftype_pretty_name(ftype1)); + } + } else if (stnode_type_id(entity1) == STTYPE_SLICE) { + ftype1 = check_slice(dfw, entity1, lhs_ftype); + } else { + FAIL(dfw, entity1, "Range is not supported for entity %s", + stnode_todisplay(entity1)); + } + + return FT_IS_STRING(ftype1) ? FT_STRING : FT_BYTES; +} + +#define IS_FIELD_ENTITY(ft) \ + ((ft) == STTYPE_FIELD || \ + (ft) == STTYPE_REFERENCE) + +static void +convert_to_bytes(stnode_t *arg) +{ + stnode_t *entity1; + drange_node *rn; + + entity1 = stnode_dup(arg); + rn = drange_node_new(); + drange_node_set_start_offset(rn, 0); + drange_node_set_to_the_end(rn); + + stnode_replace(arg, STTYPE_SLICE, NULL); + sttype_slice_set1(arg, entity1, rn); +} + +ftenum_t +check_function(dfwork_t *dfw, stnode_t *st_node, ftenum_t lhs_ftype) +{ + df_func_def_t *funcdef; + GSList *params; + unsigned nparams; + + LOG_NODE(st_node); + + funcdef = sttype_function_funcdef(st_node); + params = sttype_function_params(st_node); + nparams = g_slist_length(params); + + if (nparams < funcdef->min_nargs) { + FAIL(dfw, st_node, "Function %s needs at least %u arguments.", + funcdef->name, funcdef->min_nargs); + } else if (funcdef->max_nargs > 0 && nparams > funcdef->max_nargs) { + FAIL(dfw, st_node, "Function %s can only accept %u arguments.", + funcdef->name, funcdef->max_nargs); + } + + return funcdef->semcheck_param_function(dfw, funcdef->name, lhs_ftype, params, + stnode_location(st_node)); +} + +WS_RETNONNULL +fvalue_t * +dfilter_fvalue_from_charconst(dfwork_t *dfw, ftenum_t ftype, stnode_t *st) +{ + fvalue_t *fvalue; + unsigned long *nump = stnode_data(st); + char *error_message = NULL; + + fvalue = fvalue_from_charconst(ftype, *nump, &error_message); + SET_ERROR(dfw, error_message); + + if (fvalue == NULL) { + dfw_set_error_location(dfw, stnode_location(st)); + FAIL_HERE(dfw); + } + + return fvalue; +} + +/* If the LHS of a relation test is a FIELD, run some checks + * and possibly some modifications of syntax tree nodes. */ +static void +check_relation_LHS_FIELD(dfwork_t *dfw, stnode_op_t st_op _U_, + FtypeCanFunc can_func, bool allow_partial_value, + stnode_t *st_node, + stnode_t *st_arg1, stnode_t *st_arg2) +{ + sttype_id_t type2; + header_field_info *hfinfo1; + ftenum_t ftype1, ftype2; + fvalue_t *fvalue; + + LOG_NODE(st_node); + + if (stnode_type_id(st_arg1) == STTYPE_FIELD) + dfw->field_count++; + + hfinfo1 = sttype_field_hfinfo(st_arg1); + ftype1 = sttype_field_ftenum(st_arg1); + if (!can_func(ftype1)) { + FAIL(dfw, st_arg1, "%s (type=%s) cannot participate in %s comparison.", + hfinfo1->abbrev, ftype_pretty_name(ftype1), + stnode_todisplay(st_node)); + } + + type2 = stnode_type_id(st_arg2); + + if (IS_FIELD_ENTITY(type2)) { + ftype2 = sttype_field_ftenum(st_arg2); + + if (!compatible_ftypes(ftype1, ftype2)) { + FAIL(dfw, st_arg2, "%s and %s are not of compatible types.", + stnode_todisplay(st_arg1), stnode_todisplay(st_arg2)); + } + /* Do this check even though you'd think that if + * they're compatible, then can_func() would pass. */ + if (!can_func(ftype2)) { + FAIL(dfw, st_arg2, "%s (type=%s) cannot participate in specified comparison.", + stnode_todisplay(st_arg2), ftype_pretty_name(ftype2)); + } + if (type2 == STTYPE_FIELD) { + dfw->field_count++; + } + } + else if (type2 == STTYPE_STRING || type2 == STTYPE_LITERAL) { + /* Skip incompatible fields */ + while (hfinfo1->same_name_prev_id != -1 && + ((type2 == STTYPE_STRING && ftype1 != FT_STRING && ftype1!= FT_STRINGZ) || + (type2 != STTYPE_STRING && (ftype1 == FT_STRING || ftype1== FT_STRINGZ)))) { + hfinfo1 = proto_registrar_get_nth(hfinfo1->same_name_prev_id); + ftype1 = hfinfo1->type; + } + + if (type2 == STTYPE_STRING) { + fvalue = dfilter_fvalue_from_string(dfw, ftype1, st_arg2, hfinfo1); + } + else { + fvalue = dfilter_fvalue_from_literal(dfw, ftype1, st_arg2, allow_partial_value, hfinfo1); + } + stnode_replace(st_arg2, STTYPE_FVALUE, fvalue); + } + else if (type2 == STTYPE_CHARCONST) { + fvalue = dfilter_fvalue_from_charconst(dfw, ftype1, st_arg2); + stnode_replace(st_arg2, STTYPE_FVALUE, fvalue); + } + else if (type2 == STTYPE_SLICE) { + ftype2 = check_slice(dfw, st_arg2, ftype1); + + if (!compatible_ftypes(ftype1, ftype2)) { + FAIL(dfw, st_arg2, "%s and %s are not of compatible types.", + stnode_todisplay(st_arg1), stnode_todisplay(st_arg2)); + } + if (!can_func(ftype2)) { + FAIL(dfw, st_arg2, "%s (type=%s) cannot participate in specified comparison.", + stnode_todisplay(st_arg2), ftype_pretty_name(ftype2)); + } + + if (!is_bytes_type(ftype1)) { + if (!ftype_can_slice(ftype1)) { + FAIL(dfw, st_arg1, "\"%s\" is a %s and cannot be converted into a sequence of bytes.", + hfinfo1->abbrev, + ftype_pretty_name(ftype1)); + } + + /* Convert entire field to bytes */ + convert_to_bytes(st_arg1); + } + } + else if (type2 == STTYPE_FUNCTION) { + ftype2 = check_function(dfw, st_arg2, ftype1); + + if (!compatible_ftypes(ftype1, ftype2)) { + FAIL(dfw, st_arg2, "%s (type=%s) and return value of %s() (type=%s) are not of compatible types.", + hfinfo1->abbrev, ftype_pretty_name(ftype1), + sttype_function_name(st_arg2), ftype_pretty_name(ftype2)); + } + + if (!can_func(ftype2)) { + FAIL(dfw, st_arg2, "return value of %s() (type=%s) cannot participate in specified comparison.", + sttype_function_name(st_arg2), ftype_pretty_name(ftype2)); + } + } + else if (type2 == STTYPE_PCRE) { + ws_assert(st_op == STNODE_OP_MATCHES); + } + else if (type2 == STTYPE_ARITHMETIC) { + ftype2 = check_arithmetic(dfw, st_arg2, ftype1); + + if (!compatible_ftypes(ftype1, ftype2)) { + FAIL(dfw, st_arg2, "%s and %s are not of compatible types.", + stnode_todisplay(st_arg1), stnode_todisplay(st_arg2)); + } + + if (!can_func(ftype2)) { + FAIL(dfw, st_arg2, "%s (type=%s) cannot participate in specified comparison.", + stnode_todisplay(st_arg2), ftype_pretty_name(ftype2)); + } + } + else { + ws_assert_not_reached(); + } +} + +static void +check_relation_LHS_FVALUE(dfwork_t *dfw, stnode_op_t st_op _U_, + FtypeCanFunc can_func, bool allow_partial_value, + stnode_t *st_node, + stnode_t *st_arg1, stnode_t *st_arg2) +{ + sttype_id_t type1, type2; + header_field_info *hfinfo2 = NULL; + ftenum_t ftype2; + fvalue_t *fvalue; + + LOG_NODE(st_node); + + type2 = stnode_type_id(st_arg2); + + if (IS_FIELD_ENTITY(type2)) { + hfinfo2 = sttype_field_hfinfo(st_arg2); + ftype2 = sttype_field_ftenum(st_arg2); + + if (!can_func(ftype2)) { + FAIL(dfw, st_arg2, "%s (type=%s) cannot participate in specified comparison.", + stnode_todisplay(st_arg2), ftype_pretty_name(ftype2)); + } + if (type2 == STTYPE_FIELD) { + dfw->field_count++; + } + } + else if (type2 == STTYPE_STRING || + type2 == STTYPE_LITERAL || + type2 == STTYPE_CHARCONST || + type2 == STTYPE_PCRE) { + FAIL(dfw, st_node, "Constant expression is invalid."); + } + else if (type2 == STTYPE_SLICE) { + ftype2 = check_slice(dfw, st_arg2, FT_NONE); + + if (!can_func(ftype2)) { + FAIL(dfw, st_arg2, "%s (type=%s) cannot participate in specified comparison.", + stnode_todisplay(st_arg2), ftype_pretty_name(ftype2)); + } + } + else if (type2 == STTYPE_FUNCTION) { + ftype2 = check_function(dfw, st_arg2, FT_NONE); + + if (!can_func(ftype2)) { + FAIL(dfw, st_arg2, "return value of %s() (type=%s) cannot participate in specified comparison.", + sttype_function_name(st_arg2), ftype_pretty_name(ftype2)); + } + } + else if (type2 == STTYPE_ARITHMETIC) { + ftype2 = check_arithmetic(dfw, st_arg2, FT_NONE); + + if (!can_func(ftype2)) { + FAIL(dfw, st_arg2, "%s (type=%s) cannot participate in specified comparison.", + stnode_todisplay(st_arg2), ftype_pretty_name(ftype2)); + } + } + else { + ws_assert_not_reached(); + } + + type1 = stnode_type_id(st_arg1); + if (type1 == STTYPE_STRING) { + fvalue = dfilter_fvalue_from_string(dfw, ftype2, st_arg1, hfinfo2); + } + else if (type1 == STTYPE_LITERAL) { + fvalue = dfilter_fvalue_from_literal(dfw, ftype2, st_arg1, allow_partial_value, hfinfo2); + } + else if (type1 == STTYPE_CHARCONST) { + fvalue = dfilter_fvalue_from_charconst(dfw, ftype2, st_arg1); + } + else { + ws_assert_not_reached(); + } + stnode_replace(st_arg1, STTYPE_FVALUE, fvalue); +} + +static void +check_relation_LHS_SLICE(dfwork_t *dfw, stnode_op_t st_op _U_, + FtypeCanFunc can_func _U_, + bool allow_partial_value, + stnode_t *st_node _U_, + stnode_t *st_arg1, stnode_t *st_arg2) +{ + sttype_id_t type2; + ftenum_t ftype1, ftype2; + fvalue_t *fvalue; + + LOG_NODE(st_node); + + ftype1 = check_slice(dfw, st_arg1, FT_NONE); + if (!can_func(ftype1)) { + FAIL(dfw, st_arg1, "%s cannot participate in %s comparison.", + stnode_todisplay(st_arg1), stnode_todisplay(st_node)); + } + + type2 = stnode_type_id(st_arg2); + + if (IS_FIELD_ENTITY(type2)) { + ftype2 = sttype_field_ftenum(st_arg2); + + if (!is_bytes_type(ftype2)) { + if (!ftype_can_slice(ftype2)) { + FAIL(dfw, st_arg2, "\"%s\" is a %s and cannot be converted into a sequence of bytes.", + stnode_todisplay(st_arg2), + ftype_pretty_name(ftype2)); + } + + /* Convert entire field to bytes */ + convert_to_bytes(st_arg2); + } + if (type2 == STTYPE_FIELD) { + dfw->field_count++; + } + } + else if (type2 == STTYPE_STRING) { + fvalue = dfilter_fvalue_from_string(dfw, ftype1, st_arg2, NULL); + stnode_replace(st_arg2, STTYPE_FVALUE, fvalue); + } + else if (type2 == STTYPE_LITERAL) { + fvalue = dfilter_fvalue_from_literal(dfw, ftype1, st_arg2, allow_partial_value, NULL); + stnode_replace(st_arg2, STTYPE_FVALUE, fvalue); + } + else if (type2 == STTYPE_CHARCONST) { + fvalue = dfilter_fvalue_from_charconst(dfw, ftype1, st_arg2); + stnode_replace(st_arg2, STTYPE_FVALUE, fvalue); + } + else if (type2 == STTYPE_SLICE) { + ftype2 = check_slice(dfw, st_arg2, ftype1); + + if (!compatible_ftypes(ftype1, ftype2)) { + FAIL(dfw, st_arg2, "%s and %s are not of compatible types.", + stnode_todisplay(st_arg1), stnode_todisplay(st_arg2)); + } + if (!can_func(ftype2)) { + FAIL(dfw, st_arg2, "%s (type=%s) cannot participate in specified comparison.", + stnode_todisplay(st_arg2), ftype_pretty_name(ftype2)); + } + } + else if (type2 == STTYPE_FUNCTION) { + ftype2 = check_function(dfw, st_arg2, ftype1); + + if (!is_bytes_type(ftype2)) { + if (!ftype_can_slice(ftype2)) { + FAIL(dfw, st_arg2, "Return value of function \"%s\" is a %s and cannot be converted into a sequence of bytes.", + sttype_function_name(st_arg2), + ftype_pretty_name(ftype2)); + } + + /* Convert function result to bytes */ + convert_to_bytes(st_arg2); + } + } + else if (type2 == STTYPE_PCRE) { + ws_assert(st_op == STNODE_OP_MATCHES); + } + else if (type2 == STTYPE_ARITHMETIC) { + ftype2 = check_arithmetic(dfw, st_arg2, ftype1); + + if (!compatible_ftypes(ftype1, ftype2)) { + FAIL(dfw, st_arg2, "%s and %s are not of compatible types.", + stnode_todisplay(st_arg1), stnode_todisplay(st_arg2)); + } + + if (!can_func(ftype2)) { + FAIL(dfw, st_arg2, "%s (type=%s) cannot participate in specified comparison.", + stnode_todisplay(st_arg2), ftype_pretty_name(ftype2)); + } + } + else { + ws_assert_not_reached(); + } +} + +/* If the LHS of a relation test is a FUNCTION, run some checks + * and possibly some modifications of syntax tree nodes. */ +static void +check_relation_LHS_FUNCTION(dfwork_t *dfw, stnode_op_t st_op _U_, + FtypeCanFunc can_func, bool allow_partial_value, + stnode_t *st_node, stnode_t *st_arg1, stnode_t *st_arg2) +{ + sttype_id_t type2; + ftenum_t ftype1, ftype2; + fvalue_t *fvalue; + + LOG_NODE(st_node); + + ftype1 = check_function(dfw, st_arg1, FT_NONE); + if (ftype1 == FT_NONE) { + FAIL(dfw, st_arg1, "Constant expression is invalid on the LHS."); + } + if (!can_func(ftype1)) { + FAIL(dfw, st_arg1, "Function %s (type=%s) cannot participate in %s comparison.", + sttype_function_name(st_arg1), ftype_pretty_name(ftype1), + stnode_todisplay(st_node)); + } + + type2 = stnode_type_id(st_arg2); + + if (IS_FIELD_ENTITY(type2)) { + ftype2 = sttype_field_ftenum(st_arg2); + + if (!compatible_ftypes(ftype1, ftype2)) { + FAIL(dfw, st_arg2, "Function %s and %s are not of compatible types.", + sttype_function_name(st_arg2), stnode_todisplay(st_arg2)); + } + /* Do this check even though you'd think that if + * they're compatible, then can_func() would pass. */ + if (!can_func(ftype2)) { + FAIL(dfw, st_arg2, "%s (type=%s) cannot participate in specified comparison.", + stnode_todisplay(st_arg2), ftype_pretty_name(ftype2)); + } + if (type2 == STTYPE_FIELD) { + dfw->field_count++; + } + } + else if (type2 == STTYPE_STRING) { + fvalue = dfilter_fvalue_from_string(dfw, ftype1, st_arg2, NULL); + stnode_replace(st_arg2, STTYPE_FVALUE, fvalue); + } + else if (type2 == STTYPE_LITERAL) { + fvalue = dfilter_fvalue_from_literal(dfw, ftype1, st_arg2, allow_partial_value, NULL); + stnode_replace(st_arg2, STTYPE_FVALUE, fvalue); + } + else if (type2 == STTYPE_CHARCONST) { + fvalue = dfilter_fvalue_from_charconst(dfw, ftype1, st_arg2); + stnode_replace(st_arg2, STTYPE_FVALUE, fvalue); + } + else if (type2 == STTYPE_SLICE) { + ftype2 = check_slice(dfw, st_arg2, ftype1); + + if (!compatible_ftypes(ftype1, ftype2)) { + FAIL(dfw, st_arg2, "%s and %s are not of compatible types.", + stnode_todisplay(st_arg1), stnode_todisplay(st_arg2)); + } + if (!can_func(ftype2)) { + FAIL(dfw, st_arg2, "%s (type=%s) cannot participate in specified comparison.", + stnode_todisplay(st_arg2), ftype_pretty_name(ftype2)); + } + + if (!is_bytes_type(ftype1)) { + if (!ftype_can_slice(ftype1)) { + FAIL(dfw, st_arg1, "Function \"%s\" is a %s and cannot be converted into a sequence of bytes.", + sttype_function_name(st_arg1), + ftype_pretty_name(ftype1)); + } + + /* Convert function result to bytes */ + convert_to_bytes(st_arg1); + } + } + else if (type2 == STTYPE_FUNCTION) { + ftype2 = check_function(dfw, st_arg2, ftype1); + + if (!compatible_ftypes(ftype1, ftype2)) { + FAIL(dfw, st_arg2, "Return values of function %s (type=%s) and function %s (type=%s) are not of compatible types.", + sttype_function_name(st_arg1), ftype_pretty_name(ftype1), sttype_function_name(st_arg1), ftype_pretty_name(ftype2)); + } + + /* Do this check even though you'd think that if + * they're compatible, then can_func() would pass. */ + if (!can_func(ftype2)) { + FAIL(dfw, st_arg2, "Return value of %s (type=%s) cannot participate in specified comparison.", + sttype_function_name(st_arg2), ftype_pretty_name(ftype2)); + } + } + else if (type2 == STTYPE_PCRE) { + ws_assert(st_op == STNODE_OP_MATCHES); + } + else if (type2 == STTYPE_ARITHMETIC) { + ftype2 = check_arithmetic(dfw, st_arg2, ftype1); + + if (!compatible_ftypes(ftype1, ftype2)) { + FAIL(dfw, st_arg2, "%s and %s are not of compatible types.", + stnode_todisplay(st_arg1), stnode_todisplay(st_arg2)); + } + + if (!can_func(ftype2)) { + FAIL(dfw, st_arg2, "%s (type=%s) cannot participate in specified comparison.", + stnode_todisplay(st_arg2), ftype_pretty_name(ftype2)); + } + } + else { + ws_assert_not_reached(); + } +} + +static void +check_relation_LHS_ARITHMETIC(dfwork_t *dfw, stnode_op_t st_op _U_, + FtypeCanFunc can_func, bool allow_partial_value, + stnode_t *st_node, stnode_t *st_arg1, stnode_t *st_arg2) +{ + sttype_id_t type2; + ftenum_t ftype1, ftype2; + fvalue_t *fvalue; + + LOG_NODE(st_node); + + ftype1 = check_arithmetic(dfw, st_arg1, FT_NONE); + if (ftype1 == FT_NONE) { + FAIL(dfw, st_arg1, "Constant expression is invalid on the LHS."); + } + if (!can_func(ftype1)) { + FAIL(dfw, st_arg1, "Result with type %s cannot participate in %s comparison.", + ftype_pretty_name(ftype1), + stnode_todisplay(st_node)); + } + + type2 = stnode_type_id(st_arg2); + + if (IS_FIELD_ENTITY(type2)) { + ftype2 = sttype_field_ftenum(st_arg2); + + if (!compatible_ftypes(ftype1, ftype2)) { + FAIL(dfw, st_arg2, "%s and %s are not of compatible types.", + stnode_todisplay(st_arg1), stnode_todisplay(st_arg2)); + } + if (!can_func(ftype2)) { + FAIL(dfw, st_arg2, "%s (type=%s) cannot participate in specified comparison.", + stnode_todisplay(st_arg2), ftype_pretty_name(ftype2)); + } + if (type2 == STTYPE_FIELD) { + dfw->field_count++; + } + } + else if (type2 == STTYPE_STRING) { + fvalue = dfilter_fvalue_from_string(dfw, ftype1, st_arg2, NULL); + stnode_replace(st_arg2, STTYPE_FVALUE, fvalue); + } + else if (type2 == STTYPE_LITERAL) { + fvalue = dfilter_fvalue_from_literal(dfw, ftype1, st_arg2, allow_partial_value, NULL); + stnode_replace(st_arg2, STTYPE_FVALUE, fvalue); + } + else if (type2 == STTYPE_CHARCONST) { + fvalue = dfilter_fvalue_from_charconst(dfw, ftype1, st_arg2); + stnode_replace(st_arg2, STTYPE_FVALUE, fvalue); + } + else if (type2 == STTYPE_SLICE) { + ftype2 = check_slice(dfw, st_arg2, ftype1); + + if (!compatible_ftypes(ftype1, ftype2)) { + FAIL(dfw, st_arg2, "%s and %s are not of compatible types.", + stnode_todisplay(st_arg1), stnode_todisplay(st_arg2)); + } + if (!can_func(ftype2)) { + FAIL(dfw, st_arg2, "%s (type=%s) cannot participate in specified comparison.", + stnode_todisplay(st_arg2), ftype_pretty_name(ftype2)); + } + + if (!is_bytes_type(ftype1)) { + if (!ftype_can_slice(ftype1)) { + FAIL(dfw, st_arg1, "Result is a %s and cannot be converted into a sequence of bytes.", + ftype_pretty_name(ftype1)); + } + + /* Convert expression result to bytes */ + convert_to_bytes(st_arg1); + } + } + else if (type2 == STTYPE_FUNCTION) { + ftype2 = check_function(dfw, st_arg2, ftype1); + + if (!compatible_ftypes(ftype1, ftype2)) { + FAIL(dfw, st_arg2, "Result (type=%s) and return value of %s() (type=%s) are not of compatible types.", + ftype_pretty_name(ftype1), + sttype_function_name(st_arg2), ftype_pretty_name(ftype2)); + } + if (!can_func(ftype2)) { + FAIL(dfw, st_arg2, "return value of %s() (type=%s) cannot participate in specified comparison.", + sttype_function_name(st_arg2), ftype_pretty_name(ftype2)); + } + } + else if (type2 == STTYPE_PCRE) { + ws_assert(st_op == STNODE_OP_MATCHES); + } + else if (type2 == STTYPE_ARITHMETIC) { + ftype2 = check_arithmetic(dfw, st_arg2, ftype1); + + if (!compatible_ftypes(ftype1, ftype2)) { + FAIL(dfw, st_arg2, "%s and %s are not of compatible types.", + stnode_todisplay(st_arg1), stnode_todisplay(st_arg2)); + } + if (!can_func(ftype2)) { + FAIL(dfw, st_arg2, "%s (type=%s) cannot participate in specified comparison.", + stnode_todisplay(st_arg2), ftype_pretty_name(ftype2)); + } + } + else { + ws_assert_not_reached(); + } +} + +/* Check the semantics of any relational test. */ +static void +check_relation(dfwork_t *dfw, stnode_op_t st_op, + FtypeCanFunc can_func, bool allow_partial_value, + stnode_t *st_node, stnode_t *st_arg1, stnode_t *st_arg2) +{ + LOG_NODE(st_node); + + switch (stnode_type_id(st_arg1)) { + case STTYPE_FIELD: + case STTYPE_REFERENCE: + check_relation_LHS_FIELD(dfw, st_op, can_func, + allow_partial_value, st_node, st_arg1, st_arg2); + break; + case STTYPE_SLICE: + check_relation_LHS_SLICE(dfw, st_op, can_func, + allow_partial_value, st_node, st_arg1, st_arg2); + break; + case STTYPE_FUNCTION: + check_relation_LHS_FUNCTION(dfw, st_op, can_func, + allow_partial_value, st_node, st_arg1, st_arg2); + break; + case STTYPE_ARITHMETIC: + check_relation_LHS_ARITHMETIC(dfw, st_op, can_func, + allow_partial_value, st_node, st_arg1, st_arg2); + break; + case STTYPE_LITERAL: + case STTYPE_STRING: + case STTYPE_CHARCONST: + check_relation_LHS_FVALUE(dfw, st_op, can_func, + allow_partial_value, st_node, st_arg1, st_arg2); + break; + default: + /* Should not happen. */ + FAIL(dfw, st_arg1, "(FIXME) Syntax node type \"%s\" is invalid for relation \"%s\".", + stnode_type_name(st_arg1), stnode_todisplay(st_node)); + } +} + +static void +check_warning_contains_RHS_FIELD(dfwork_t *dfw, stnode_t *st_node _U_, + stnode_t *st_arg1 _U_, stnode_t *st_arg2) +{ + const char *token = stnode_token(st_arg2); + header_field_info *hfinfo = sttype_field_hfinfo(st_arg2); + fvalue_t *fvalue = fvalue_from_literal(FT_BYTES, token, true, NULL); + if (fvalue != NULL) { + char *repr = fvalue_to_string_repr(dfw->dfw_scope, fvalue, FTREPR_DFILTER, 0); + add_compile_warning(dfw, "Interpreting \"%s\" as %s instead of %s. " + "Consider writing \"%s\" or \".%s\" to remove this warning", + token, hfinfo->name, ftype_pretty_name(FT_BYTES), + repr, hfinfo->abbrev); + fvalue_free(fvalue); + } +} + +static void +check_relation_contains(dfwork_t *dfw, stnode_t *st_node, + stnode_t *st_arg1, stnode_t *st_arg2) +{ + LOG_NODE(st_node); + + if (stnode_type_id(st_arg2) == STTYPE_FIELD && stnode_get_flags(st_arg2, STFLAG_UNPARSED)) { + check_warning_contains_RHS_FIELD(dfw, st_node, st_arg1, st_arg2); + } + + switch (stnode_type_id(st_arg1)) { + case STTYPE_FIELD: + case STTYPE_REFERENCE: + check_relation_LHS_FIELD(dfw, STNODE_OP_CONTAINS, ftype_can_contains, + true, st_node, st_arg1, st_arg2); + break; + case STTYPE_FUNCTION: + check_relation_LHS_FUNCTION(dfw, STNODE_OP_CONTAINS, ftype_can_contains, + true, st_node, st_arg1, st_arg2); + break; + case STTYPE_SLICE: + check_relation_LHS_SLICE(dfw, STNODE_OP_CONTAINS, ftype_can_contains, + true, st_node, st_arg1, st_arg2); + break; + default: + FAIL(dfw, st_arg1, "Left side of %s expression must be a field or function, not %s.", + stnode_todisplay(st_node), stnode_todisplay(st_arg1)); + } +} + + +static void +check_relation_matches(dfwork_t *dfw, stnode_t *st_node, + stnode_t *st_arg1, stnode_t *st_arg2) +{ + ws_regex_t *pcre; + char *errmsg = NULL; + GString *patt; + + LOG_NODE(st_node); + + if (stnode_type_id(st_arg2) != STTYPE_STRING) { + FAIL(dfw, st_arg2, "Matches requires a double quoted string on the right side."); + } + + patt = stnode_string(st_arg2); + ws_debug("Compile regex pattern: %s", stnode_token(st_arg2)); + + pcre = ws_regex_compile_ex(patt->str, patt->len, &errmsg, WS_REGEX_CASELESS|WS_REGEX_NEVER_UTF); + if (errmsg) { + dfilter_fail(dfw, DF_ERROR_GENERIC, stnode_location(st_arg2), "Regex compilation error: %s.", errmsg); + g_free(errmsg); + ws_noisy("Semantic check failed here with a regex syntax error"); + THROW(TypeError); + } + + stnode_replace(st_arg2, STTYPE_PCRE, pcre); + + switch (stnode_type_id(st_arg1)) { + case STTYPE_FIELD: + case STTYPE_REFERENCE: + check_relation_LHS_FIELD(dfw, STNODE_OP_MATCHES, ftype_can_matches, + true, st_node, st_arg1, st_arg2); + break; + case STTYPE_FUNCTION: + check_relation_LHS_FUNCTION(dfw, STNODE_OP_MATCHES, ftype_can_matches, + true, st_node, st_arg1, st_arg2); + break; + case STTYPE_SLICE: + check_relation_LHS_SLICE(dfw, STNODE_OP_MATCHES, ftype_can_matches, + true, st_node, st_arg1, st_arg2); + break; + default: + FAIL(dfw, st_arg1, "Left side of %s expression must be a field or function, not %s.", + stnode_todisplay(st_node), stnode_todisplay(st_arg1)); + } +} + +static void +check_relation_in(dfwork_t *dfw, stnode_t *st_node _U_, + stnode_t *st_arg1, stnode_t *st_arg2) +{ + GSList *nodelist; + stnode_t *node_left, *node_right; + + LOG_NODE(st_node); + + if (stnode_type_id(st_arg1) != STTYPE_FIELD) { + FAIL(dfw, st_arg1, "Only a field may be tested for membership in a set."); + } + /* Checked in the grammar parser. */ + ws_assert(stnode_type_id(st_arg2) == STTYPE_SET); + + /* Attempt to interpret one element of the set at a time. Each + * element is represented by two items in the list, the element + * value and NULL. Both will be replaced by a lower and upper + * value if the element is a range. */ + nodelist = stnode_data(st_arg2); + while (nodelist) { + node_left = nodelist->data; + + /* Don't let a range on the RHS affect the LHS field. */ + if (stnode_type_id(node_left) == STTYPE_SLICE) { + FAIL(dfw, node_left, "A slice may not appear inside a set."); + break; + } + + nodelist = g_slist_next(nodelist); + ws_assert(nodelist); + node_right = nodelist->data; + if (node_right) { + check_relation_LHS_FIELD(dfw, STNODE_OP_GE, ftype_can_cmp, + false, st_node, st_arg1, node_left); + check_relation_LHS_FIELD(dfw, STNODE_OP_LE, ftype_can_cmp, + false, st_node, st_arg1, node_right); + } else { + check_relation_LHS_FIELD(dfw, STNODE_OP_ANY_EQ, ftype_can_eq, + false, st_node, st_arg1, node_left); + } + nodelist = g_slist_next(nodelist); + } +} + +/* Check the semantics of any type of TEST */ +static void +check_test(dfwork_t *dfw, stnode_t *st_node) +{ + stnode_op_t st_op; + stnode_t *st_arg1, *st_arg2; + + LOG_NODE(st_node); + + sttype_oper_get(st_node, &st_op, &st_arg1, &st_arg2); + + switch (st_op) { + case STNODE_OP_NOT: + semcheck(dfw, st_arg1); + break; + case STNODE_OP_AND: + case STNODE_OP_OR: + semcheck(dfw, st_arg1); + semcheck(dfw, st_arg2); + break; + case STNODE_OP_ALL_EQ: + case STNODE_OP_ANY_EQ: + case STNODE_OP_ALL_NE: + case STNODE_OP_ANY_NE: + check_relation(dfw, st_op, ftype_can_eq, false, st_node, st_arg1, st_arg2); + break; + case STNODE_OP_GT: + case STNODE_OP_GE: + case STNODE_OP_LT: + case STNODE_OP_LE: + check_relation(dfw, st_op, ftype_can_cmp, false, st_node, st_arg1, st_arg2); + break; + case STNODE_OP_CONTAINS: + check_relation_contains(dfw, st_node, st_arg1, st_arg2); + break; + case STNODE_OP_MATCHES: + check_relation_matches(dfw, st_node, st_arg1, st_arg2); + break; + case STNODE_OP_IN: + case STNODE_OP_NOT_IN: + check_relation_in(dfw, st_node, st_arg1, st_arg2); + break; + + case STNODE_OP_UNINITIALIZED: + case STNODE_OP_UNARY_MINUS: + case STNODE_OP_BITWISE_AND: + case STNODE_OP_ADD: + case STNODE_OP_SUBTRACT: + case STNODE_OP_MULTIPLY: + case STNODE_OP_DIVIDE: + case STNODE_OP_MODULO: + ws_assert_not_reached(); + } +} + +static void +check_nonzero(dfwork_t *dfw, stnode_t *st_node) +{ + ftenum_t ftype = FT_NONE; + + LOG_NODE(st_node); + + switch (stnode_type_id(st_node)) { + case STTYPE_ARITHMETIC: + ftype = check_arithmetic(dfw, st_node, FT_NONE); + break; + case STTYPE_SLICE: + ftype = check_slice(dfw, st_node, FT_NONE); + break; + default: + ws_assert_not_reached(); + break; + } + + if (ftype == FT_NONE) { + FAIL(dfw, st_node, "Constant expression is invalid."); + } +} + +static const char * +op_to_error_msg(stnode_op_t st_op) +{ + switch (st_op) { + case STNODE_OP_UNARY_MINUS: + return "cannot be negated"; + case STNODE_OP_ADD: + return "cannot be added"; + case STNODE_OP_SUBTRACT: + return "cannot be subtracted"; + case STNODE_OP_MULTIPLY: + return "cannot be multiplied"; + case STNODE_OP_DIVIDE: + return "cannot be divided"; + case STNODE_OP_MODULO: + return "does not support modulo operation"; + case STNODE_OP_BITWISE_AND: + return "does not support bitwise AND"; + default: + return "cannot FIXME"; + } +} + +static ftenum_t +check_arithmetic_LHS(dfwork_t *dfw, stnode_op_t st_op, + stnode_t *st_node, stnode_t *st_arg1, stnode_t *st_arg2, + ftenum_t lhs_ftype) +{ + ftenum_t ftype1, ftype2; + FtypeCanFunc can_func = NULL; + + LOG_NODE(st_node); + + if (st_op == STNODE_OP_UNARY_MINUS) { + ftype1 = check_arithmetic(dfw, st_arg1, lhs_ftype); + if (ftype1 == FT_NONE) + return FT_NONE; + if (!ftype_can_unary_minus(ftype1)) { + FAIL(dfw, st_arg1, "%s %s.", + ftype_name(ftype1), op_to_error_msg(st_op)); + } + if (stnode_type_id(st_arg1) == STTYPE_FVALUE) { + /* Pre-compute constant unary minus result */ + char *err_msg; + fvalue_t *new_fv = fvalue_unary_minus(stnode_data(st_arg1), &err_msg); + if (new_fv == NULL) { + dfilter_fail(dfw, DF_ERROR_GENERIC, stnode_location(st_arg1), + "%s: %s", stnode_todisplay(st_arg1), err_msg); + g_free(err_msg); + FAIL_HERE(dfw); + } + /* Replaces unary operator with result */ + stnode_replace(st_node, STTYPE_FVALUE, new_fv); + } + return ftype1; + } + + switch (st_op) { + case STNODE_OP_ADD: + can_func = ftype_can_add; + break; + case STNODE_OP_SUBTRACT: + can_func = ftype_can_subtract; + break; + case STNODE_OP_MULTIPLY: + can_func = ftype_can_multiply; + break; + case STNODE_OP_DIVIDE: + can_func = ftype_can_divide; + break; + case STNODE_OP_MODULO: + can_func = ftype_can_modulo; + break; + case STNODE_OP_BITWISE_AND: + can_func = ftype_can_bitwise_and; + break; + default: + ws_assert_not_reached(); + } + + ftype1 = check_arithmetic(dfw, st_arg1, lhs_ftype); + if (ftype1 == FT_NONE) { + FAIL(dfw, st_arg1, "Unknown type for left side of %s", stnode_todisplay(st_node)); + } + if (!can_func(ftype1)) { + FAIL(dfw, st_arg1, "%s %s.", + ftype_name(ftype1), op_to_error_msg(st_op)); + } + + ftype2 = check_arithmetic(dfw, st_arg2, ftype1); + if (!can_func(ftype2)) { + FAIL(dfw, st_arg2, "%s %s.", + ftype_name(ftype2), op_to_error_msg(st_op)); + } + + if (!compatible_ftypes(ftype1, ftype2)) { + FAIL(dfw, st_node, "%s and %s are not compatible.", + ftype_name(ftype1), ftype_name(ftype2)); + } + + return ftype1; +} + +ftenum_t +check_arithmetic(dfwork_t *dfw, stnode_t *st_node, ftenum_t lhs_ftype) +{ + sttype_id_t type; + stnode_op_t st_op; + stnode_t *st_arg1, *st_arg2; + ftenum_t ftype; + + LOG_NODE(st_node); + + type = stnode_type_id(st_node); + + switch (type) { + case STTYPE_LITERAL: + if (lhs_ftype != FT_NONE) { + fvalue_t *fvalue = dfilter_fvalue_from_literal(dfw, lhs_ftype, st_node, false, NULL); + stnode_replace(st_node, STTYPE_FVALUE, fvalue); + ftype = fvalue_type_ftenum(fvalue); + } + else { + ftype = FT_NONE; + } + break; + + case STTYPE_FIELD: + dfw->field_count++; + /* fall-through */ + case STTYPE_REFERENCE: + ftype = sttype_field_ftenum(st_node); + break; + + case STTYPE_FUNCTION: + ftype = check_function(dfw, st_node, lhs_ftype); + break; + + case STTYPE_SLICE: + ftype = check_slice(dfw, st_node, lhs_ftype); + break; + + case STTYPE_FVALUE: + ftype = fvalue_type_ftenum(stnode_data(st_node)); + break; + + case STTYPE_ARITHMETIC: + sttype_oper_get(st_node, &st_op, &st_arg1, &st_arg2); + ftype = check_arithmetic_LHS(dfw, st_op, st_node, st_arg1, st_arg2, lhs_ftype); + break; + + default: + FAIL(dfw, st_node, "%s is not a valid arithmetic operation.", + stnode_todisplay(st_node)); + } + + return ftype; +} + + +/* Check the entire syntax tree. */ +static void +semcheck(dfwork_t *dfw, stnode_t *st_node) +{ + LOG_NODE(st_node); + + dfw->field_count = 0; + + switch (stnode_type_id(st_node)) { + case STTYPE_TEST: + check_test(dfw, st_node); + break; + case STTYPE_ARITHMETIC: + case STTYPE_SLICE: + check_nonzero(dfw, st_node); + break; + default: + check_exists(dfw, st_node); + } + + if (dfw->field_count == 0) { + FAIL(dfw, st_node, "Constant expression is invalid."); + } +} + + +/* Check the syntax tree for semantic errors, and convert + * some of the nodes into the form they need to be in order to + * later generate the DFVM bytecode. */ +bool +dfw_semcheck(dfwork_t *dfw) +{ + volatile bool ok_filter = true; + + ws_debug("Starting semantic check (dfw = %p)", dfw); + + /* Instead of having to check for errors at every stage of + * the semantic-checking, the semantic-checking code will + * throw an exception if a problem is found. */ + TRY { + semcheck(dfw, dfw->st_root); + } + CATCH(TypeError) { + ok_filter = false; + } + ENDTRY; + + ws_debug("Semantic check (dfw = %p) returns %s", + dfw, ok_filter ? "TRUE" : "FALSE"); + + return ok_filter; +} + +/* + * Editor modelines - https://www.wireshark.org/tools/modelines.html + * + * Local variables: + * c-basic-offset: 8 + * tab-width: 8 + * indent-tabs-mode: t + * End: + * + * vi: set shiftwidth=8 tabstop=8 noexpandtab: + * :indentSize=8:tabSize=8:noTabs=false: + */ diff --git a/epan/dfilter/semcheck.h b/epan/dfilter/semcheck.h new file mode 100644 index 0000000..261bdc6 --- /dev/null +++ b/epan/dfilter/semcheck.h @@ -0,0 +1,31 @@ +/** @file + * + * Wireshark - Network traffic analyzer + * By Gerald Combs <gerald@wireshark.org> + * Copyright 2001 Gerald Combs + * + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef SEMCHECK_H +#define SEMCHECK_H + +#include "dfilter-int.h" + +bool +dfw_semcheck(dfwork_t *dfw); + +ftenum_t +check_arithmetic(dfwork_t *dfw, stnode_t *st_node, ftenum_t lhs_ftype); + +ftenum_t +check_function(dfwork_t *dfw, stnode_t *st_node, ftenum_t lhs_ftype); + +ftenum_t +check_slice(dfwork_t *dfw, stnode_t *st, ftenum_t lhs_ftype); + +bool +compatible_ftypes(ftenum_t a, ftenum_t b); + +#endif diff --git a/epan/dfilter/sttype-field.c b/epan/dfilter/sttype-field.c new file mode 100644 index 0000000..8df1c84 --- /dev/null +++ b/epan/dfilter/sttype-field.c @@ -0,0 +1,235 @@ +/* + * Wireshark - Network traffic analyzer + * By Gerald Combs <gerald@wireshark.org> + * Copyright 2001 Gerald Combs + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +/* The ideas in this code came from Ed Warnicke's original implementation + * of dranges for the old display filter code (Ethereal 0.8.15 and before). + * The code is different, but definitely inspired by his code. + */ + +#include "config.h" + +#include <glib.h> + +#include <epan/proto.h> +#include "sttype-field.h" +#include <wsutil/ws_assert.h> + +typedef struct { + uint32_t magic; + header_field_info *hfinfo; + drange_t *drange; + bool raw; +} field_t; + +#define FIELD_MAGIC 0xfc2002cf + +static void * +field_new(void *hfinfo) +{ + field_t *field; + + field = g_new(field_t, 1); + field->magic = FIELD_MAGIC; + field->hfinfo = hfinfo; + field->drange = NULL; + field->raw = false; + + return field; +} + +static void * +field_dup(gconstpointer data) +{ + const field_t *org = data; + field_t *field; + + ws_assert_magic(org, FIELD_MAGIC); + field = field_new(NULL); + field->hfinfo = org->hfinfo; + field->drange = drange_dup(org->drange); + field->raw = org->raw; + + return field; +} + +static void +field_free(void *data) +{ + field_t *field = data; + ws_assert_magic(field, FIELD_MAGIC); + + if (field->drange) + drange_free(field->drange); + g_free(field); +} + +static char * +field_tostr(const void *data, bool pretty _U_) +{ + const field_t *field = data; + ws_assert_magic(field, FIELD_MAGIC); + wmem_strbuf_t *repr; + char *drange_str = NULL; + + + repr = wmem_strbuf_new(NULL, NULL); + + if (field->raw) { + wmem_strbuf_append_c(repr, '@'); + } + + wmem_strbuf_append(repr, field->hfinfo->abbrev); + + if (field->drange) { + drange_str = drange_tostr(field->drange); + wmem_strbuf_append_printf(repr, "#[%s]", drange_str); + g_free(drange_str); + } + + if (field->raw) { + wmem_strbuf_append(repr, " <FT_BYTES>"); + } + else { + wmem_strbuf_append_printf(repr, " <%s>", + ftype_name(field->hfinfo->type)); + } + + return wmem_strbuf_finalize(repr); +} + +header_field_info * +sttype_field_hfinfo(stnode_t *node) +{ + field_t *field = node->data; + ws_assert_magic(field, FIELD_MAGIC); + return field->hfinfo; +} + +ftenum_t +sttype_field_ftenum(stnode_t *node) +{ + field_t *field = node->data; + ws_assert_magic(field, FIELD_MAGIC); + if (field->raw) + return FT_BYTES; + return field->hfinfo->type; +} + +drange_t * +sttype_field_drange(stnode_t *node) +{ + field_t *field = node->data; + ws_assert_magic(field, FIELD_MAGIC); + return field->drange; +} + +bool +sttype_field_raw(stnode_t *node) +{ + field_t *field = node->data; + ws_assert_magic(field, FIELD_MAGIC); + return field->raw; +} + +drange_t * +sttype_field_drange_steal(stnode_t *node) +{ + field_t *field; + drange_t *dr; + + field = stnode_data(node); + ws_assert_magic(field, FIELD_MAGIC); + dr = field->drange; + field->drange = NULL; + return dr; +} + +/* Set a field */ +void +sttype_field_set_range(stnode_t *node, GSList* drange_list) +{ + field_t *field = stnode_data(node); + ws_assert_magic(field, FIELD_MAGIC); + ws_assert(field->drange == NULL); + field->drange = drange_new_from_list(drange_list); +} + +void +sttype_field_set_range1(stnode_t *node, drange_node *rn) +{ + field_t *field = stnode_data(node); + ws_assert_magic(field, FIELD_MAGIC); + ws_assert(field->drange == NULL); + field->drange = drange_new(rn); +} + +void +sttype_field_set_drange(stnode_t *node, drange_t *dr) +{ + field_t *field = stnode_data(node); + ws_assert_magic(field, FIELD_MAGIC); + ws_assert(field->drange == NULL); + field->drange = dr; +} + +void +sttype_field_set_raw(stnode_t *node, bool raw) +{ + field_t *field = stnode_data(node); + ws_assert_magic(field, FIELD_MAGIC); + field->raw = raw; +} + +char * +sttype_field_set_number(stnode_t *node, const char *number_str) +{ + char *err_msg = NULL; + drange_node *rn = drange_node_from_str(number_str, &err_msg); + if (err_msg != NULL) + return err_msg; + + sttype_field_set_range1(node, rn); + return NULL; +} + +void +sttype_register_field(void) +{ + static sttype_t field_type = { + STTYPE_FIELD, + "FIELD", + field_new, + field_free, + field_dup, + field_tostr + }; + static sttype_t reference_type = { + STTYPE_REFERENCE, + "REFERENCE", + field_new, + field_free, + field_dup, + field_tostr + }; + + sttype_register(&field_type); + sttype_register(&reference_type); +} + +/* + * Editor modelines - https://www.wireshark.org/tools/modelines.html + * + * Local variables: + * c-basic-offset: 8 + * tab-width: 8 + * indent-tabs-mode: t + * End: + * + * vi: set shiftwidth=8 tabstop=8 noexpandtab: + * :indentSize=8:tabSize=8:noTabs=false: + */ diff --git a/epan/dfilter/sttype-field.h b/epan/dfilter/sttype-field.h new file mode 100644 index 0000000..b890b84 --- /dev/null +++ b/epan/dfilter/sttype-field.h @@ -0,0 +1,54 @@ +/** @file + * + * Wireshark - Network traffic analyzer + * By Gerald Combs <gerald@wireshark.org> + * Copyright 2001 Gerald Combs + * + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef STTYPE_FIELD_H +#define STTYPE_FIELD_H + +#include "dfilter-int.h" +#include "drange.h" + + +header_field_info * +sttype_field_hfinfo(stnode_t *node); + +ftenum_t +sttype_field_ftenum(stnode_t *node); + +drange_t * +sttype_field_drange(stnode_t *node); + +drange_t * +sttype_field_drange_steal(stnode_t *node); + +bool +sttype_field_raw(stnode_t *node); + +/* Set a range */ +void +sttype_field_set_range(stnode_t *node, GSList* drange_list); + +void +sttype_field_set_range1(stnode_t *node, drange_node *rn); + +void +sttype_field_set_drange(stnode_t *node, drange_t *dr); + +void +sttype_field_set_raw(stnode_t *node, bool raw); + +char * +sttype_field_set_number(stnode_t *node, const char *number_str); + +/* Clear the 'drange' variable to remove responsibility for + * freeing it. */ +void +sttype_field_remove_drange(stnode_t *node); + +#endif diff --git a/epan/dfilter/sttype-function.c b/epan/dfilter/sttype-function.c new file mode 100644 index 0000000..4bdd854 --- /dev/null +++ b/epan/dfilter/sttype-function.c @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2006 by Gilbert Ramirez <gram@alumni.rice.edu> + * + * Wireshark - Network traffic analyzer + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "config.h" + +#include "syntax-tree.h" +#include "sttype-function.h" +#include <wsutil/ws_assert.h> + +typedef struct { + uint32_t magic; + df_func_def_t *funcdef; + GSList *params; +} function_t; + +#define FUNCTION_MAGIC 0xe10f0f99 + +static void * +function_new(void *funcdef) +{ + function_t *stfuncrec; + + stfuncrec = g_new(function_t, 1); + + stfuncrec->magic = FUNCTION_MAGIC; + stfuncrec->funcdef = funcdef; + stfuncrec->params = NULL; + + return stfuncrec; +} + +static void * +function_dup(gconstpointer data) +{ + const function_t *org = data; + function_t *stfuncrec; + GSList *p; + + stfuncrec = function_new(org->funcdef); + + for (p = org->params; p; p = p->next) { + const stnode_t *param = p->data; + stfuncrec->params = g_slist_append(stfuncrec->params, stnode_dup(param)); + } + return stfuncrec; +} + +static char * +function_tostr(const void *data, bool pretty) +{ + const function_t *stfuncrec = data; + const df_func_def_t *def = stfuncrec->funcdef; + GSList *params = stfuncrec->params; + GString *repr = g_string_new(""); + + ws_assert(def); + + if (pretty) { + g_string_printf(repr, "%s(", def->name); + while (params != NULL) { + ws_assert(params->data); + g_string_append(repr, stnode_tostr(params->data, pretty)); + params = params->next; + if (params != NULL) { + g_string_append(repr, ", "); + } + } + g_string_append_c(repr, ')'); + } + else { + g_string_printf(repr, "%s#%u", def->name, g_slist_length(params)); + } + + return g_string_free(repr, false); +} + +static void +slist_stnode_free(void *data) +{ + stnode_free(data); +} + +void +st_funcparams_free(GSList *params) +{ + g_slist_free_full(params, slist_stnode_free); +} + +static void +function_free(void *value) +{ + function_t *stfuncrec = value; + ws_assert_magic(stfuncrec, FUNCTION_MAGIC); + st_funcparams_free(stfuncrec->params); + g_free(stfuncrec); +} + + +/* Set the parameters for a function stnode_t. */ +void +sttype_function_set_params(stnode_t *node, GSList *params) +{ + + function_t *stfuncrec; + + stfuncrec = stnode_data(node); + ws_assert_magic(stfuncrec, FUNCTION_MAGIC); + + stfuncrec->params = params; +} + +/* Get the function-definition record for a function stnode_t. */ +df_func_def_t* +sttype_function_funcdef(stnode_t *node) +{ + function_t *stfuncrec; + + stfuncrec = stnode_data(node); + ws_assert_magic(stfuncrec, FUNCTION_MAGIC); + return stfuncrec->funcdef; +} + +const char * +sttype_function_name(stnode_t *node) +{ + function_t *stfuncrec; + + stfuncrec = stnode_data(node); + ws_assert_magic(stfuncrec, FUNCTION_MAGIC); + return stfuncrec->funcdef->name; +} + +/* Get the parameters for a function stnode_t. */ +GSList* +sttype_function_params(stnode_t *node) +{ + function_t *stfuncrec; + + stfuncrec = stnode_data(node); + ws_assert_magic(stfuncrec, FUNCTION_MAGIC); + return stfuncrec->params; +} + + +void +sttype_register_function(void) +{ + static sttype_t function_type = { + STTYPE_FUNCTION, + "FUNCTION", + function_new, + function_free, + function_dup, + function_tostr + }; + + sttype_register(&function_type); +} + +/* + * Editor modelines - https://www.wireshark.org/tools/modelines.html + * + * Local variables: + * c-basic-offset: 8 + * tab-width: 8 + * indent-tabs-mode: t + * End: + * + * vi: set shiftwidth=8 tabstop=8 noexpandtab: + * :indentSize=8:tabSize=8:noTabs=false: + */ diff --git a/epan/dfilter/sttype-function.h b/epan/dfilter/sttype-function.h new file mode 100644 index 0000000..6f1cb3e --- /dev/null +++ b/epan/dfilter/sttype-function.h @@ -0,0 +1,32 @@ +/** @file + * + * Wireshark - Network traffic analyzer + * By Gerald Combs <gerald@wireshark.org> + * Copyright 2001 Gerald Combs + * + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef STTYPE_FUNCTION_H +#define STTYPE_FUNCTION_H + +#include "dfilter-int.h" +#include "dfunctions.h" + +/* Set the parameters for a function stnode_t. */ +void +sttype_function_set_params(stnode_t *node, GSList *params); + +/* Get the function-definition record for a function stnode_t. */ +df_func_def_t* sttype_function_funcdef(stnode_t *node); + +const char *sttype_function_name(stnode_t *node); + +/* Get the parameters for a function stnode_t. */ +GSList* sttype_function_params(stnode_t *node); + +/* Free the memory of a param list */ +void st_funcparams_free(GSList *params); + +#endif diff --git a/epan/dfilter/sttype-op.c b/epan/dfilter/sttype-op.c new file mode 100644 index 0000000..2f83f45 --- /dev/null +++ b/epan/dfilter/sttype-op.c @@ -0,0 +1,409 @@ +/* + * Wireshark - Network traffic analyzer + * By Gerald Combs <gerald@wireshark.org> + * Copyright 2001 Gerald Combs + * + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "syntax-tree.h" +#include "sttype-op.h" + +typedef struct { + uint32_t magic; + stnode_op_t op; + stmatch_t how; + stnode_t *val1; + stnode_t *val2; +} oper_t; + +#define OPER_MAGIC 0xab9009ba + +static void * +oper_new(void *junk _U_) +{ + oper_t *oper; + + ws_assert(junk == NULL); + + oper = g_new(oper_t, 1); + + oper->magic = OPER_MAGIC; + oper->op = STNODE_OP_UNINITIALIZED; + oper->how = STNODE_MATCH_DEF; + oper->val1 = NULL; + oper->val2 = NULL; + + return oper; +} + +static void * +oper_dup(gconstpointer data) +{ + const oper_t *org = data; + oper_t *oper; + + oper = oper_new(NULL); + oper->op = org->op; + oper->how = org->how; + oper->val1 = stnode_dup(org->val1); + oper->val2 = stnode_dup(org->val2); + + return oper; +} + +static void +oper_free(void *value) +{ + oper_t *oper = value; + ws_assert_magic(oper, OPER_MAGIC); + + if (oper->val1) + stnode_free(oper->val1); + if (oper->val2) + stnode_free(oper->val2); + + g_free(oper); +} + +static char * +oper_todisplay(const oper_t *oper) +{ + const char *s = "<notset>"; + + switch(oper->op) { + case STNODE_OP_NOT: + s = "!"; + break; + case STNODE_OP_AND: + s = "&&"; + break; + case STNODE_OP_OR: + s = "||"; + break; + case STNODE_OP_ALL_EQ: + s = "==="; + break; + case STNODE_OP_ANY_EQ: + s = "=="; + break; + case STNODE_OP_ALL_NE: + s = "!="; + break; + case STNODE_OP_ANY_NE: + s = "~="; + break; + case STNODE_OP_GT: + s = ">"; + break; + case STNODE_OP_GE: + s = ">="; + break; + case STNODE_OP_LT: + s = "<"; + break; + case STNODE_OP_LE: + s = "<="; + break; + case STNODE_OP_BITWISE_AND: + s = "&"; + break; + case STNODE_OP_ADD: + s = "+"; + break; + case STNODE_OP_UNARY_MINUS: + case STNODE_OP_SUBTRACT: + s = "-"; + break; + case STNODE_OP_MULTIPLY: + s = "*"; + break; + case STNODE_OP_DIVIDE: + s = "/"; + break; + case STNODE_OP_MODULO: + s = "%"; + break; + case STNODE_OP_CONTAINS: + s = "contains"; + break; + case STNODE_OP_MATCHES: + s = "matches"; + break; + case STNODE_OP_IN: + s = "in"; + break; + case STNODE_OP_NOT_IN: + s = "not in"; + break; + case STNODE_OP_UNINITIALIZED: + s = "<uninitialized>"; + break; + } + return g_strdup(s); +} + +static char * +oper_todebug(const oper_t *oper) +{ + const char *s = "<notset>"; + + switch(oper->op) { + case STNODE_OP_NOT: + s = "TEST_NOT"; + break; + case STNODE_OP_AND: + s = "TEST_AND"; + break; + case STNODE_OP_OR: + s = "TEST_OR"; + break; + case STNODE_OP_ALL_EQ: + s = "TEST_ALL_EQ"; + break; + case STNODE_OP_ANY_EQ: + s = "TEST_ANY_EQ"; + break; + case STNODE_OP_ALL_NE: + s = "TEST_ALL_NE"; + break; + case STNODE_OP_ANY_NE: + s = "TEST_ANY_NE"; + break; + case STNODE_OP_GT: + s = "TEST_GT"; + break; + case STNODE_OP_GE: + s = "TEST_GE"; + break; + case STNODE_OP_LT: + s = "TEST_LT"; + break; + case STNODE_OP_LE: + s = "TEST_LE"; + break; + case STNODE_OP_BITWISE_AND: + s = "OP_BITWISE_AND"; + break; + case STNODE_OP_UNARY_MINUS: + s = "OP_UNARY_MINUS"; + break; + case STNODE_OP_ADD: + s = "OP_ADD"; + break; + case STNODE_OP_SUBTRACT: + s = "OP_SUBTRACT"; + break; + case STNODE_OP_MULTIPLY: + s = "OP_MULTIPLY"; + break; + case STNODE_OP_DIVIDE: + s = "OP_DIVIDE"; + break; + case STNODE_OP_MODULO: + s = "OP_MODULO"; + break; + case STNODE_OP_CONTAINS: + s = "TEST_CONTAINS"; + break; + case STNODE_OP_MATCHES: + s = "TEST_MATCHES"; + break; + case STNODE_OP_IN: + s = "TEST_IN"; + break; + case STNODE_OP_NOT_IN: + s = "TEST_NOT_IN"; + break; + case STNODE_OP_UNINITIALIZED: + s = "<uninitialized>"; + break; + } + + if (oper->how == STNODE_MATCH_ALL) + return g_strdup_printf("ALL %s", s); + if (oper->how == STNODE_MATCH_ANY) + return g_strdup_printf("ANY %s", s); + return g_strdup(s); +} + +static char * +oper_tostr(const void *value, bool pretty) +{ + const oper_t *oper = value; + ws_assert_magic(oper, OPER_MAGIC); + + if (pretty) + return oper_todisplay(oper); + return oper_todebug(oper); +} + +static int +num_operands(stnode_op_t op) +{ + switch(op) { + case STNODE_OP_UNINITIALIZED: + break; + case STNODE_OP_NOT: + case STNODE_OP_UNARY_MINUS: + return 1; + case STNODE_OP_AND: + case STNODE_OP_OR: + case STNODE_OP_ALL_EQ: + case STNODE_OP_ANY_EQ: + case STNODE_OP_ALL_NE: + case STNODE_OP_ANY_NE: + case STNODE_OP_GT: + case STNODE_OP_GE: + case STNODE_OP_LT: + case STNODE_OP_LE: + case STNODE_OP_BITWISE_AND: + case STNODE_OP_ADD: + case STNODE_OP_SUBTRACT: + case STNODE_OP_MULTIPLY: + case STNODE_OP_DIVIDE: + case STNODE_OP_MODULO: + case STNODE_OP_CONTAINS: + case STNODE_OP_MATCHES: + case STNODE_OP_IN: + case STNODE_OP_NOT_IN: + return 2; + } + ws_assert_not_reached(); + return -1; +} + + +void +sttype_oper_set1(stnode_t *node, stnode_op_t op, stnode_t *val1) +{ + oper_t *oper = stnode_data(node); + ws_assert_magic(oper, OPER_MAGIC); + + ws_assert(num_operands(op) == 1); + oper->op = op; + oper->val1 = val1; + oper->val2 = NULL; +} + +void +sttype_oper_set2(stnode_t *node, stnode_op_t op, stnode_t *val1, stnode_t *val2) +{ + oper_t *oper = stnode_data(node); + ws_assert_magic(oper, OPER_MAGIC); + + ws_assert(num_operands(op) == 2); + oper->op = op; + oper->val1 = val1; + oper->val2 = val2; +} + +void +sttype_oper_set1_args(stnode_t *node, stnode_t *val1) +{ + oper_t *oper; + + oper = (oper_t*)stnode_data(node); + ws_assert_magic(oper, OPER_MAGIC); + + ws_assert(num_operands(oper->op) == 1); + oper->val1 = val1; + oper->val2 = NULL; +} + +void +sttype_oper_set2_args(stnode_t *node, stnode_t *val1, stnode_t *val2) +{ + oper_t *oper; + + oper = (oper_t*)stnode_data(node); + ws_assert_magic(oper, OPER_MAGIC); + + ws_assert(num_operands(oper->op) == 2); + oper->val1 = val1; + oper->val2 = val2; +} + +void +sttype_oper_set_op(stnode_t *node, stnode_op_t op) +{ + oper_t *oper = stnode_data(node); + ws_assert_magic(oper, OPER_MAGIC); + ws_assert(oper->op == STNODE_OP_UNINITIALIZED); + oper->op = op; +} + +stnode_op_t +sttype_oper_get_op(stnode_t *node) +{ + ws_assert_magic(node, OPER_MAGIC); + return ((oper_t *)node)->op; +} + +void +sttype_oper_get(stnode_t *node, stnode_op_t *p_op, stnode_t **p_val1, stnode_t **p_val2) +{ + oper_t *oper = stnode_data(node); + ws_assert_magic(oper, OPER_MAGIC); + + if (p_op) + *p_op = oper->op; + if (p_val1) + *p_val1 = oper->val1; + if (p_val2) + *p_val2 = oper->val2; +} + +void +sttype_test_set_match(stnode_t *node, stmatch_t how) +{ + oper_t *oper = stnode_data(node); + ws_assert_magic(oper, OPER_MAGIC); + oper->how = how; +} + +stmatch_t +sttype_test_get_match(stnode_t *node) +{ + oper_t *oper = stnode_data(node); + ws_assert_magic(oper, OPER_MAGIC); + return oper->how; +} + +void +sttype_register_opers(void) +{ + static sttype_t test_type = { + STTYPE_TEST, + "TEST", + oper_new, + oper_free, + oper_dup, + oper_tostr + }; + static sttype_t arithmetic_type = { + STTYPE_ARITHMETIC, + "ARITHMETIC", + oper_new, + oper_free, + oper_dup, + oper_tostr + }; + + sttype_register(&test_type); + sttype_register(&arithmetic_type); +} + +/* + * Editor modelines - https://www.wireshark.org/tools/modelines.html + * + * Local variables: + * c-basic-offset: 8 + * tab-width: 8 + * indent-tabs-mode: t + * End: + * + * vi: set shiftwidth=8 tabstop=8 noexpandtab: + * :indentSize=8:tabSize=8:noTabs=false: + */ diff --git a/epan/dfilter/sttype-op.h b/epan/dfilter/sttype-op.h new file mode 100644 index 0000000..ca99981 --- /dev/null +++ b/epan/dfilter/sttype-op.h @@ -0,0 +1,43 @@ +/** @file + * + * Wireshark - Network traffic analyzer + * By Gerald Combs <gerald@wireshark.org> + * Copyright 2001 Gerald Combs + * + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef STTYPE_TEST_H +#define STTYPE_TEST_H + +#include "syntax-tree.h" + +void +sttype_oper_set1(stnode_t *node, stnode_op_t op, stnode_t *val1); + +void +sttype_oper_set2(stnode_t *node, stnode_op_t op, stnode_t *val1, stnode_t *val2); + +void +sttype_oper_set1_args(stnode_t *node, stnode_t *val1); + +void +sttype_oper_set2_args(stnode_t *node, stnode_t *val1, stnode_t *val2); + +void +sttype_oper_set_op(stnode_t *node, stnode_op_t op); + +stnode_op_t +sttype_oper_get_op(stnode_t *node); + +void +sttype_oper_get(stnode_t *node, stnode_op_t *p_op, stnode_t **p_val1, stnode_t **p_val2); + +void +sttype_test_set_match(stnode_t *node, stmatch_t how); + +stmatch_t +sttype_test_get_match(stnode_t *node); + +#endif diff --git a/epan/dfilter/sttype-pointer.c b/epan/dfilter/sttype-pointer.c new file mode 100644 index 0000000..2a29287 --- /dev/null +++ b/epan/dfilter/sttype-pointer.c @@ -0,0 +1,149 @@ +/* + * Wireshark - Network traffic analyzer + * By Gerald Combs <gerald@wireshark.org> + * Copyright 2001 Gerald Combs + * + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "config.h" +#include "sttype-pointer.h" + +#include "ftypes/ftypes.h" +#include "syntax-tree.h" +#include <epan/proto.h> // For BASE_NONE + +static void +sttype_fvalue_free(void *value) +{ + fvalue_t *fvalue = value; + + /* If the data was not claimed with stnode_steal_data(), free it. */ + if (fvalue) { + fvalue_free(fvalue); + } +} + +static void +pcre_free(void *value) +{ + ws_regex_t *pcre = value; + + /* If the data was not claimed with stnode_steal_data(), free it. */ + if (pcre) { + ws_regex_free(pcre); + } +} + +static char * +sttype_fvalue_tostr(const void *data, bool pretty) +{ + const fvalue_t *fvalue = data; + + char *s, *repr; + + s = fvalue_to_string_repr(NULL, fvalue, FTREPR_DFILTER, BASE_NONE); + if (pretty) + repr = g_strdup(s); + else + repr = ws_strdup_printf("%s <%s>", s, fvalue_type_name(fvalue)); + g_free(s); + return repr; +} + +static char * +pcre_tostr(const void *data, bool pretty _U_) +{ + return g_strdup(ws_regex_pattern(data)); +} + +static char * +charconst_tostr(const void *data, bool pretty _U_) +{ + unsigned long num = *(const unsigned long *)data; + + if (num > 0x7f) + goto out; + + switch (num) { + case 0: return g_strdup("'\\0'"); + case '\a': return g_strdup("'\\a'"); + case '\b': return g_strdup("'\\b'"); + case '\f': return g_strdup("'\\f'"); + case '\n': return g_strdup("'\\n'"); + case '\r': return g_strdup("'\\r'"); + case '\t': return g_strdup("'\\t'"); + case '\v': return g_strdup("'\\v'"); + case '\'': return g_strdup("'\\''"); + case '\\': return g_strdup("'\\\\'"); + default: + break; + } + + if (g_ascii_isprint(num)) + return ws_strdup_printf("'%c'", (int)num); +out: + return ws_strdup_printf("'\\x%02lx'", num); +} + +ftenum_t +sttype_pointer_ftenum(stnode_t *node) +{ + switch (node->type->id) { + case STTYPE_FIELD: + case STTYPE_REFERENCE: + return ((header_field_info *)node->data)->type; + case STTYPE_FVALUE: + return fvalue_type_ftenum(node->data); + default: + break; + } + return FT_NONE; +} + +void +sttype_register_pointer(void) +{ + static sttype_t fvalue_type = { + STTYPE_FVALUE, + "FVALUE", + NULL, + sttype_fvalue_free, + NULL, + sttype_fvalue_tostr + }; + static sttype_t pcre_type = { + STTYPE_PCRE, + "PCRE", + NULL, + pcre_free, + NULL, + pcre_tostr + }; + static sttype_t charconst_type = { + STTYPE_CHARCONST, + "CHARCONST", + NULL, + g_free, + NULL, + charconst_tostr + }; + + sttype_register(&fvalue_type); + sttype_register(&pcre_type); + sttype_register(&charconst_type); +} + +/* + * Editor modelines - https://www.wireshark.org/tools/modelines.html + * + * Local variables: + * c-basic-offset: 8 + * tab-width: 8 + * indent-tabs-mode: t + * End: + * + * vi: set shiftwidth=8 tabstop=8 noexpandtab: + * :indentSize=8:tabSize=8:noTabs=false: + */ diff --git a/epan/dfilter/sttype-pointer.h b/epan/dfilter/sttype-pointer.h new file mode 100644 index 0000000..54aa28e --- /dev/null +++ b/epan/dfilter/sttype-pointer.h @@ -0,0 +1,20 @@ +/** @file + * + * Wireshark - Network traffic analyzer + * By Gerald Combs <gerald@wireshark.org> + * Copyright 2001 Gerald Combs + * + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef STTYPE_POINTER_H +#define STTYPE_POINTER_H + +#include "dfilter-int.h" +#include <epan/ftypes/ftypes.h> + +ftenum_t +sttype_pointer_ftenum(stnode_t *node); + +#endif diff --git a/epan/dfilter/sttype-set.c b/epan/dfilter/sttype-set.c new file mode 100644 index 0000000..35b2114 --- /dev/null +++ b/epan/dfilter/sttype-set.c @@ -0,0 +1,99 @@ +/* + * Wireshark - Network traffic analyzer + * By Gerald Combs <gerald@wireshark.org> + * Copyright 2001 Gerald Combs + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "config.h" + +#include "syntax-tree.h" +#include "sttype-set.h" +#include <wsutil/ws_assert.h> + +/* + * The GSList stores a list of elements of the set. Each element is represented + * by two list items: (lower, upper) in case of a value range or (value, NULL) + * if the element is not a range value. + */ + +static void +slist_stnode_free(void *data) +{ + if (data) { + stnode_free(data); + } +} + +void +set_nodelist_free(GSList *params) +{ + g_slist_free_full(params, slist_stnode_free); +} + +static void +sttype_set_free(void *value) +{ + /* If the data was not claimed with stnode_steal_data(), free it. */ + if (value) { + set_nodelist_free(value); + } +} + +static char * +sttype_set_tostr(const void *data, bool pretty) +{ + const GSList* nodelist = data; + stnode_t *lower, *upper; + GString *repr = g_string_new(""); + + while (nodelist) { + lower = nodelist->data; + g_string_append(repr, stnode_tostr(lower, pretty)); + + /* Set elements are always in pairs; upper may be null. */ + nodelist = g_slist_next(nodelist); + ws_assert(nodelist); + upper = nodelist->data; + if (upper != NULL) { + g_string_append(repr, ".."); + g_string_append(repr, stnode_tostr(upper, pretty)); + } + + nodelist = g_slist_next(nodelist); + if (nodelist != NULL) { + g_string_append_c(repr, ' '); + } + } + + return g_string_free(repr, false); +} + +void +sttype_register_set(void) +{ + static sttype_t set_type = { + STTYPE_SET, + "SET", + NULL, + sttype_set_free, + NULL, + sttype_set_tostr + }; + + sttype_register(&set_type); +} + +/* + * Editor modelines - https://www.wireshark.org/tools/modelines.html + * + * Local variables: + * c-basic-offset: 8 + * tab-width: 8 + * indent-tabs-mode: t + * End: + * + * vi: set shiftwidth=8 tabstop=8 noexpandtab: + * :indentSize=8:tabSize=8:noTabs=false: + */ diff --git a/epan/dfilter/sttype-set.h b/epan/dfilter/sttype-set.h new file mode 100644 index 0000000..7b2670b --- /dev/null +++ b/epan/dfilter/sttype-set.h @@ -0,0 +1,24 @@ +/** @file + * + * Wireshark - Network traffic analyzer + * By Gerald Combs <gerald@wireshark.org> + * Copyright 2001 Gerald Combs + * + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef STTYPE_SET_H +#define STTYPE_SET_H + +#include <wireshark.h> + +#include "syntax-tree.h" + +bool +sttype_set_convert_to_range(stnode_t **node_left, stnode_t **node_right); + +void +set_nodelist_free(GSList *params); + +#endif diff --git a/epan/dfilter/sttype-slice.c b/epan/dfilter/sttype-slice.c new file mode 100644 index 0000000..a2bff76 --- /dev/null +++ b/epan/dfilter/sttype-slice.c @@ -0,0 +1,193 @@ +/* + * Wireshark - Network traffic analyzer + * By Gerald Combs <gerald@wireshark.org> + * Copyright 2001 Gerald Combs + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +/* The ideas in this code came from Ed Warnicke's original implementation + * of dranges for the old display filter code (Ethereal 0.8.15 and before). + * The code is different, but definitely inspired by his code. + */ + +#include "config.h" + +#include <glib.h> + +#include <epan/proto.h> +#include "drange.h" +#include "sttype-slice.h" +#include <wsutil/ws_assert.h> + +typedef struct { + uint32_t magic; + stnode_t *entity; + drange_t *drange; +} slice_t; + +#define SLICE_MAGIC 0xec0990ce + +static void * +slice_new(void *junk _U_) +{ + slice_t *slice; + + ws_assert(junk == NULL); + + slice = g_new(slice_t, 1); + + slice->magic = SLICE_MAGIC; + slice->entity = NULL; + slice->drange = NULL; + + return slice; +} + +static void * +slice_dup(gconstpointer data) +{ + const slice_t *org = data; + slice_t *slice; + + slice = slice_new(NULL); + slice->entity = stnode_dup(org->entity); + slice->drange = drange_dup(org->drange); + + return slice; +} + +static void +slice_free(void *value) +{ + slice_t *slice = value; + ws_assert_magic(slice, SLICE_MAGIC); + + if (slice->drange) + drange_free(slice->drange); + + if (slice->entity) + stnode_free(slice->entity); + + g_free(slice); +} + +static char * +slice_tostr(const void *data, bool pretty) +{ + const slice_t *slice = data; + ws_assert_magic(slice, SLICE_MAGIC); + + char *repr, *drange_str; + + drange_str = drange_tostr(slice->drange); + repr = ws_strdup_printf("%s[%s]", + stnode_tostr(slice->entity, pretty), + drange_str); + g_free(drange_str); + + return repr; +} + +void +sttype_slice_remove_drange(stnode_t *node) +{ + slice_t *slice; + + slice = stnode_data(node); + ws_assert_magic(slice, SLICE_MAGIC); + + slice->drange = NULL; +} + +drange_t * +sttype_slice_drange_steal(stnode_t *node) +{ + slice_t *slice; + drange_t *dr; + + slice = stnode_data(node); + ws_assert_magic(slice, SLICE_MAGIC); + dr = slice->drange; + slice->drange = NULL; + return dr; +} + +/* Set a slice */ +void +sttype_slice_set(stnode_t *node, stnode_t *entity, GSList* drange_list) +{ + slice_t *slice; + + slice = stnode_data(node); + ws_assert_magic(slice, SLICE_MAGIC); + + slice->entity = entity; + + slice->drange = drange_new_from_list(drange_list); +} + +void +sttype_slice_set1(stnode_t *node, stnode_t *entity, drange_node *rn) +{ + GSList *drange_list = g_slist_append(NULL, rn); + sttype_slice_set(node, entity, drange_list); + g_slist_free(drange_list); +} + +void +sttype_slice_set_drange(stnode_t *node, stnode_t *field, drange_t *dr) +{ + slice_t *slice; + + slice = stnode_data(node); + ws_assert_magic(slice, SLICE_MAGIC); + + slice->entity = field; + + slice->drange = dr; +} + +stnode_t * +sttype_slice_entity(stnode_t *node) +{ + slice_t *slice = node->data; + ws_assert_magic(slice, SLICE_MAGIC); + return slice->entity; +} + +drange_t * +sttype_slice_drange(stnode_t *node) +{ + slice_t *slice = node->data; + ws_assert_magic(slice, SLICE_MAGIC); + return slice->drange; +} + +void +sttype_register_slice(void) +{ + static sttype_t slice_type = { + STTYPE_SLICE, + "SLICE", + slice_new, + slice_free, + slice_dup, + slice_tostr + }; + + sttype_register(&slice_type); +} + +/* + * Editor modelines - https://www.wireshark.org/tools/modelines.html + * + * Local variables: + * c-basic-offset: 8 + * tab-width: 8 + * indent-tabs-mode: t + * End: + * + * vi: set shiftwidth=8 tabstop=8 noexpandtab: + * :indentSize=8:tabSize=8:noTabs=false: + */ diff --git a/epan/dfilter/sttype-slice.h b/epan/dfilter/sttype-slice.h new file mode 100644 index 0000000..005675d --- /dev/null +++ b/epan/dfilter/sttype-slice.h @@ -0,0 +1,42 @@ +/** @file + * + * Wireshark - Network traffic analyzer + * By Gerald Combs <gerald@wireshark.org> + * Copyright 2001 Gerald Combs + * + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef STTYPE_SLICE_H +#define STTYPE_SLICE_H + +#include "syntax-tree.h" +#include "drange.h" + + +stnode_t * +sttype_slice_entity(stnode_t *node); + +drange_t * +sttype_slice_drange(stnode_t *node); + +drange_t * +sttype_slice_drange_steal(stnode_t *node); + +/* Set a range */ +void +sttype_slice_set(stnode_t *node, stnode_t *field, GSList* drange_list); + +void +sttype_slice_set1(stnode_t *node, stnode_t *field, drange_node *rn); + +void +sttype_slice_set_drange(stnode_t *node, stnode_t *field, drange_t *dr); + +/* Clear the 'drange' variable to remove responsibility for + * freeing it. */ +void +sttype_slice_remove_drange(stnode_t *node); + +#endif diff --git a/epan/dfilter/sttype-string.c b/epan/dfilter/sttype-string.c new file mode 100644 index 0000000..62fe203 --- /dev/null +++ b/epan/dfilter/sttype-string.c @@ -0,0 +1,88 @@ +/* + * Wireshark - Network traffic analyzer + * By Gerald Combs <gerald@wireshark.org> + * Copyright 2001 Gerald Combs + * + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "syntax-tree.h" +#include <wsutil/str_util.h> + +static void * +string_dup(gconstpointer string) +{ + return g_strdup(string); +} + +static void +string_free(void *value) +{ + g_free(value); +} + +static char * +string_tostr(const void *data, bool pretty _U_) +{ + return g_strdup(data); +} + +static void * +gstring_dup(gconstpointer value) +{ + const GString *gs = value; + return g_string_new_len(gs->str, gs->len); +} + +static void +gstring_free(void *value) +{ + g_string_free(value, true); +} + +static char * +gstring_tostr(const void *value, bool pretty _U_) +{ + const GString *gs = value; + return ws_escape_string_len(NULL, gs->str, gs->len, false); +} + + +void +sttype_register_string(void) +{ + static sttype_t string_type = { + STTYPE_STRING, + "STRING", + NULL, + gstring_free, + gstring_dup, + gstring_tostr + }; + + static sttype_t literal_type = { + STTYPE_LITERAL, + "LITERAL", + NULL, + string_free, + string_dup, + string_tostr + }; + + sttype_register(&string_type); + sttype_register(&literal_type); +} + +/* + * Editor modelines - https://www.wireshark.org/tools/modelines.html + * + * Local variables: + * c-basic-offset: 8 + * tab-width: 8 + * indent-tabs-mode: t + * End: + * + * vi: set shiftwidth=8 tabstop=8 noexpandtab: + * :indentSize=8:tabSize=8:noTabs=false: + */ diff --git a/epan/dfilter/syntax-tree.c b/epan/dfilter/syntax-tree.c new file mode 100644 index 0000000..b16d63b --- /dev/null +++ b/epan/dfilter/syntax-tree.c @@ -0,0 +1,533 @@ +/* + * Wireshark - Network traffic analyzer + * By Gerald Combs <gerald@wireshark.org> + * Copyright 2001 Gerald Combs + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "config.h" + +#define WS_LOG_DOMAIN LOG_DOMAIN_DFILTER + +#include "syntax-tree.h" +#include <wsutil/wmem/wmem.h> +#include <wsutil/str_util.h> +#include <wsutil/glib-compat.h> +#include "sttype-op.h" +#include "sttype-function.h" +#include "dfilter-int.h" + +/* Keep track of sttype_t's via their sttype_id_t number */ +static sttype_t* type_list[STTYPE_NUM_TYPES]; + + +#define STNODE_MAGIC 0xe9b00b9e + + +void +sttype_init(void) +{ + sttype_register_field(); + sttype_register_function(); + sttype_register_pointer(); + sttype_register_set(); + sttype_register_slice(); + sttype_register_string(); + sttype_register_opers(); +} + +void +sttype_cleanup(void) +{ + /* nothing to do */ +} + + +void +sttype_register(sttype_t *type) +{ + sttype_id_t type_id; + + type_id = type->id; + + /* Check input */ + ws_assert(type_id < STTYPE_NUM_TYPES); + + /* Don't re-register. */ + ws_assert(type_list[type_id] == NULL); + + type_list[type_id] = type; +} + +static sttype_t* +sttype_lookup(sttype_id_t type_id) +{ + sttype_t *result; + + /* Check input */ + ws_assert(type_id < STTYPE_NUM_TYPES); + + result = type_list[type_id]; + + /* Check output. */ + ws_assert(result != NULL); + + return result; +} + +void +stnode_clear(stnode_t *node) +{ + ws_assert_magic(node, STNODE_MAGIC); + if (node->type) { + if (node->type->func_free && node->data) { + node->type->func_free(node->data); + } + } + else { + ws_assert(!node->data); + } + + node->type = NULL; + node->data = NULL; + g_free(node->repr_display); + node->repr_display = NULL; + g_free(node->repr_debug); + node->repr_debug = NULL; + g_free(node->repr_token); + node->repr_token = NULL; + node->location.col_start = -1; + node->location.col_len = 0; + node->flags = 0; +} + +void +stnode_init(stnode_t *node, sttype_id_t type_id, void *data, char *token, df_loc_t loc) +{ + sttype_t *type; + + ws_assert_magic(node, STNODE_MAGIC); + ws_assert(!node->type); + ws_assert(!node->data); + node->repr_display = NULL; + node->repr_debug = NULL; + node->repr_token = token; + node->location = loc; + node->flags = 0; + + if (type_id == STTYPE_UNINITIALIZED) { + node->type = NULL; + node->data = NULL; + } + else { + /* Creating an initialized node with a NULL pointer is + * allowed and needs to be safe. The parser relies on that. */ + type = sttype_lookup(type_id); + ws_assert(type); + node->type = type; + if (type->func_new) { + node->data = type->func_new(data); + } + else { + node->data = data; + } + } +} + +void +stnode_replace(stnode_t *node, sttype_id_t type_id, void *data) +{ + char *token = g_strdup(node->repr_token); + df_loc_t loc = node->location; + uint16_t flags = node->flags; + stnode_clear(node); + stnode_init(node, type_id, data, token, loc); + node->flags = flags; +} + +stnode_t* +stnode_new(sttype_id_t type_id, void *data, char *token, df_loc_t loc) +{ + stnode_t *node; + + node = g_new0(stnode_t, 1); + node->magic = STNODE_MAGIC; + + stnode_init(node, type_id, data, token, loc); + + return node; +} + +stnode_t* +stnode_new_empty(sttype_id_t type_id) +{ + df_loc_t loc = {-1, 0}; + return stnode_new(type_id, NULL, NULL, loc); +} + +stnode_t* +stnode_dup(const stnode_t *node) +{ + stnode_t *new; + + ws_assert_magic(node, STNODE_MAGIC); + new = g_new(stnode_t, 1); + new->magic = STNODE_MAGIC; + new->repr_display = NULL; + new->repr_debug = NULL; + new->repr_token = g_strdup(node->repr_token); + new->location = node->location; + new->flags = node->flags; + + new->type = node->type; + if (node->type == NULL) + new->data = NULL; + else if (node->type->func_dup) + new->data = node->type->func_dup(node->data); + else + new->data = node->data; + + return new; +} + +void +stnode_free(stnode_t *node) +{ + ws_assert_magic(node, STNODE_MAGIC); + stnode_clear(node); + g_free(node); +} + +const char* +stnode_type_name(stnode_t *node) +{ + ws_assert_magic(node, STNODE_MAGIC); + if (node->type) + return node->type->name; + else + return "UNINITIALIZED"; +} + +sttype_id_t +stnode_type_id(stnode_t *node) +{ + ws_assert_magic(node, STNODE_MAGIC); + if (node->type) + return node->type->id; + else + return STTYPE_UNINITIALIZED; +} + +void * +stnode_data(stnode_t *node) +{ + ws_assert_magic(node, STNODE_MAGIC); + return node->data; +} + +GString * +stnode_string(stnode_t *node) +{ + ws_assert(stnode_type_id(node) == STTYPE_STRING); + return stnode_data(node); +} + +void * +stnode_steal_data(stnode_t *node) +{ + ws_assert_magic(node, STNODE_MAGIC); + void *data = node->data; + ws_assert(data); + node->data = NULL; + return data; +} + +const char * +stnode_token(stnode_t *node) +{ + return node->repr_token; +} + +df_loc_t +stnode_location(stnode_t *node) +{ + return node->location; +} + +void +stnode_set_location(stnode_t *node, df_loc_t loc) +{ + node->location = loc; +} + +bool +stnode_get_flags(stnode_t *node, uint16_t flags) +{ + return node->flags & flags; +} + +void +stnode_set_flags(stnode_t *node, uint16_t flags) +{ + node->flags |= flags; +} + +/* Finds the first and last location from a set and creates + * a new location from start of first (col_start) to end of + * last (col_start + col_len). Sets the result to dst. */ +void +stnode_merge_location(stnode_t *dst, stnode_t *n1, stnode_t *n2) +{ + df_loc_t first, last; + df_loc_t loc2; + + first = last = stnode_location(n1); + loc2 = stnode_location(n2); + if (loc2.col_start >= 0 && loc2.col_start > first.col_start) + last = loc2; + dst->location.col_start = first.col_start; + dst->location.col_len = last.col_start - first.col_start + last.col_len; +} + +#define IS_OPERATOR(node) \ + (stnode_type_id(node) == STTYPE_TEST || \ + stnode_type_id(node) == STTYPE_ARITHMETIC) + +static char * +_node_tostr(stnode_t *node, bool pretty) +{ + char *s, *repr; + + if (node->type->func_tostr == NULL) + s = g_strdup("FIXME"); + else + s = node->type->func_tostr(node->data, pretty); + + if (pretty) + return s; + + if (IS_OPERATOR(node)) { + repr = s; + } + else { + repr = ws_strdup_printf("%s(%s)", stnode_type_name(node), s); + g_free(s); + } + + return repr; +} + +const char * +stnode_tostr(stnode_t *node, bool pretty) +{ + ws_assert_magic(node, STNODE_MAGIC); + + if (pretty && IS_OPERATOR(node) && node->repr_token != NULL) { + /* Some operators can have synonyms, like "or" and "||". + * Show the user the same representation as he typed. */ + g_free(node->repr_display); + node->repr_display = g_strdup(node->repr_token); + return node->repr_display; + } + + char *str = _node_tostr(node, pretty); + + if (pretty) { + g_free(node->repr_display); + node->repr_display = str; + } + else { + g_free(node->repr_debug); + node->repr_debug = str; + } + + return str; +} + +static char * +sprint_node(stnode_t *node) +{ + wmem_strbuf_t *buf = wmem_strbuf_new(NULL, NULL); + + wmem_strbuf_append_printf(buf, "{ "); + wmem_strbuf_append_printf(buf, "magic = 0x%"PRIx32", ", node->magic); + wmem_strbuf_append_printf(buf, "type = %s, ", stnode_type_name(node)); + wmem_strbuf_append_printf(buf, "data = %s, ", stnode_todebug(node)); + wmem_strbuf_append_printf(buf, "location = %ld:%zu", + node->location.col_start, node->location.col_len); + wmem_strbuf_append_printf(buf, " }"); + return wmem_strbuf_finalize(buf); +} + +void +log_node_full(enum ws_log_level level, + const char *file, int line, const char *func, + stnode_t *node, const char *msg) +{ + if (!ws_log_msg_is_active(WS_LOG_DOMAIN, level)) + return; + + if (node == NULL) { + ws_log_write_always_full(WS_LOG_DOMAIN, level, + file, line, func, "%s is NULL", msg); + return; + } + + char *str = sprint_node(node); + + ws_log_write_always_full(WS_LOG_DOMAIN, level, file, line, func, + "%s = %s", msg, str); + + g_free(str); +} + +void +log_test_full(enum ws_log_level level, + const char *file, int line, const char *func, + stnode_t *node, const char *msg) +{ + if (!ws_log_msg_is_active(WS_LOG_DOMAIN, level)) + return; + + if (node == NULL) { + ws_log_write_always_full(WS_LOG_DOMAIN, level, + file, line, func, "%s is NULL", msg); + return; + } + + stnode_op_t st_op; + stnode_t *st_lhs = NULL, *st_rhs = NULL; + char *lhs = NULL, *rhs = NULL; + + sttype_oper_get(node, &st_op, &st_lhs, &st_rhs); + + if (st_lhs) + lhs = sprint_node(st_lhs); + if (st_rhs) + rhs = sprint_node(st_rhs); + + ws_log_write_always_full(WS_LOG_DOMAIN, level, file, line, func, + "%s:\n LHS = %s\n RHS = %s", + stnode_todebug(node), + lhs ? lhs : "NULL", + rhs ? rhs : "NULL"); + + g_free(lhs); + g_free(rhs); +} + +static void +indent(wmem_strbuf_t *buf, int level) +{ + for (int i = 0; i < level * 2; i++) { + wmem_strbuf_append_c(buf, ' '); + } + wmem_strbuf_append_printf(buf, "% 2d ", level); +} + +static void +visit_tree(wmem_strbuf_t *buf, stnode_t *node, int level) +{ + stnode_t *left, *right; + stnode_t *lower, *upper; + GSList *params; + GSList *nodelist; + + if (stnode_type_id(node) == STTYPE_TEST || + stnode_type_id(node) == STTYPE_ARITHMETIC) { + wmem_strbuf_append_printf(buf, "%s:\n", stnode_todebug(node)); + sttype_oper_get(node, NULL, &left, &right); + if (left && right) { + indent(buf, level + 1); + visit_tree(buf, left, level + 1); + wmem_strbuf_append_c(buf, '\n'); + indent(buf, level + 1); + visit_tree(buf, right, level + 1); + } + else if (left) { + indent(buf, level + 1); + visit_tree(buf, left, level + 1); + } + else if (right) { + ws_assert_not_reached(); + } + } + else if (stnode_type_id(node) == STTYPE_SET) { + nodelist = stnode_data(node); + wmem_strbuf_append_printf(buf, "SET(#%u):\n", g_slist_length(nodelist) / 2); + while (nodelist) { + indent(buf, level + 1); + lower = nodelist->data; + wmem_strbuf_append(buf, stnode_tostr(lower, false)); + /* Set elements are always in pairs; upper may be null. */ + nodelist = g_slist_next(nodelist); + ws_assert(nodelist); + upper = nodelist->data; + if (upper != NULL) { + wmem_strbuf_append(buf, " .. "); + wmem_strbuf_append(buf, stnode_tostr(upper, false)); + } + nodelist = g_slist_next(nodelist); + if (nodelist != NULL) { + wmem_strbuf_append_c(buf, '\n'); + } + } + } + else if (stnode_type_id(node) == STTYPE_FUNCTION) { + wmem_strbuf_append_printf(buf, "%s:\n", stnode_todebug(node)); + params = sttype_function_params(node); + while (params) { + indent(buf, level + 1); + visit_tree(buf, params->data, level + 1); + if (params->next != NULL) { + wmem_strbuf_append_c(buf, '\n'); + } + params = params->next; + } + } + else { + wmem_strbuf_append(buf, stnode_todebug(node)); + } +} + +char * +dump_syntax_tree_str(stnode_t *root) +{ + wmem_strbuf_t *buf = wmem_strbuf_new(NULL, NULL); + indent(buf, 0); + visit_tree(buf, root, 0); + return wmem_strbuf_finalize(buf); +} + +void +log_syntax_tree(enum ws_log_level level, stnode_t *root, const char *msg, char **cache_ptr) +{ + if (!ws_log_msg_is_active(LOG_DOMAIN_DFILTER, level)) + return; + + char *str = dump_syntax_tree_str(root); + + ws_log_write_always_full(LOG_DOMAIN_DFILTER, level, NULL, -1, NULL, + "%s:\n%s", msg, str); + + if (cache_ptr) { + *cache_ptr = str; + } + else { + g_free(str); + } +} + +/* + * Editor modelines - https://www.wireshark.org/tools/modelines.html + * + * Local variables: + * c-basic-offset: 8 + * tab-width: 8 + * indent-tabs-mode: t + * End: + * + * vi: set shiftwidth=8 tabstop=8 noexpandtab: + * :indentSize=8:tabSize=8:noTabs=false: + */ diff --git a/epan/dfilter/syntax-tree.h b/epan/dfilter/syntax-tree.h new file mode 100644 index 0000000..332f6f2 --- /dev/null +++ b/epan/dfilter/syntax-tree.h @@ -0,0 +1,234 @@ +/* + * Wireshark - Network traffic analyzer + * By Gerald Combs <gerald@wireshark.org> + * Copyright 2001 Gerald Combs + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef SYNTAX_TREE_H +#define SYNTAX_TREE_H + +#include <stdio.h> +#include <inttypes.h> +#include <glib.h> + +#include <wsutil/ws_assert.h> +#include <wsutil/wslog.h> +#include <epan/ftypes/ftypes.h> +#include "dfilter-loc.h" + +/** @file + */ + +typedef enum { + STTYPE_UNINITIALIZED, + STTYPE_TEST, + STTYPE_LITERAL, + STTYPE_REFERENCE, + STTYPE_STRING, + STTYPE_CHARCONST, + STTYPE_FIELD, + STTYPE_FVALUE, + STTYPE_SLICE, + STTYPE_FUNCTION, + STTYPE_SET, + STTYPE_PCRE, + STTYPE_ARITHMETIC, + STTYPE_NUM_TYPES +} sttype_id_t; + +typedef void * (*STTypeNewFunc)(void *); +typedef void * (*STTypeDupFunc)(gconstpointer); +typedef void (*STTypeFreeFunc)(void *); +typedef char* (*STTypeToStrFunc)(gconstpointer, bool pretty); + + +/* Type information */ +typedef struct { + sttype_id_t id; + const char *name; + STTypeNewFunc func_new; + STTypeFreeFunc func_free; + STTypeDupFunc func_dup; + STTypeToStrFunc func_tostr; +} sttype_t; + + +/* Lexical value is ambiguous (can be a protocol field or a literal). */ +#define STFLAG_UNPARSED (1 << 0) + +/** Node (type instance) information */ +typedef struct { + uint32_t magic; + sttype_t *type; + void * data; + char *repr_token; + char *repr_display; + char *repr_debug; + df_loc_t location; + uint16_t flags; +} stnode_t; + +typedef enum { + STNODE_OP_UNINITIALIZED, + STNODE_OP_NOT, + STNODE_OP_AND, + STNODE_OP_OR, + STNODE_OP_ALL_EQ, + STNODE_OP_ANY_EQ, + STNODE_OP_ALL_NE, + STNODE_OP_ANY_NE, + STNODE_OP_GT, + STNODE_OP_GE, + STNODE_OP_LT, + STNODE_OP_LE, + STNODE_OP_CONTAINS, + STNODE_OP_MATCHES, + STNODE_OP_IN, + STNODE_OP_NOT_IN, + STNODE_OP_BITWISE_AND, + STNODE_OP_UNARY_MINUS, + STNODE_OP_ADD, + STNODE_OP_SUBTRACT, + STNODE_OP_MULTIPLY, + STNODE_OP_DIVIDE, + STNODE_OP_MODULO, +} stnode_op_t; + +typedef enum { + STNODE_MATCH_DEF, + STNODE_MATCH_ANY, + STNODE_MATCH_ALL, +} stmatch_t; + +/* These are the sttype_t registration function prototypes. */ +void sttype_register_field(void); +void sttype_register_function(void); +void sttype_register_pointer(void); +void sttype_register_set(void); +void sttype_register_slice(void); +void sttype_register_string(void); +void sttype_register_opers(void); + +void +sttype_init(void); + +void +sttype_cleanup(void); + +void +sttype_register(sttype_t *type); + +stnode_t* +stnode_new(sttype_id_t type_id, void *data, char *token, df_loc_t loc); + +stnode_t* +stnode_new_empty(sttype_id_t type_id); + +stnode_t* +stnode_dup(const stnode_t *org); + +void +stnode_clear(stnode_t *node); + +void +stnode_init(stnode_t *node, sttype_id_t type_id, void *data, char *token, df_loc_t loc); + +void +stnode_replace(stnode_t *node, sttype_id_t type_id, void *data); + +void +stnode_free(stnode_t *node); + +const char* +stnode_type_name(stnode_t *node); + +sttype_id_t +stnode_type_id(stnode_t *node); + +void * +stnode_data(stnode_t *node); + +GString * +stnode_string(stnode_t *node); + +void * +stnode_steal_data(stnode_t *node); + +const char * +stnode_token(stnode_t *node); + +df_loc_t +stnode_location(stnode_t *node); + +void +stnode_set_location(stnode_t *node, df_loc_t loc); + +bool +stnode_get_flags(stnode_t *node, uint16_t flags); + +void +stnode_set_flags(stnode_t *node, uint16_t flags); + +void +stnode_merge_location(stnode_t *dst, stnode_t *n1, stnode_t *n2); + +const char * +stnode_tostr(stnode_t *node, bool pretty); + +#define stnode_todisplay(node) stnode_tostr(node, true) + +#define stnode_todebug(node) stnode_tostr(node, false) + +void +log_node_full(enum ws_log_level level, + const char *file, int line, const char *func, + stnode_t *node, const char *msg); + +void +log_test_full(enum ws_log_level level, + const char *file, int line, const char *func, + stnode_t *node, const char *msg); + +#ifdef WS_DEBUG +#define log_node(node) \ + log_node_full(LOG_LEVEL_NOISY, __FILE__, __LINE__, __func__, node, #node) +#define log_test(node) \ + log_test_full(LOG_LEVEL_NOISY, __FILE__, __LINE__, __func__, node, #node) +#define LOG_NODE(node) \ + do { \ + if (stnode_type_id(node) == STTYPE_TEST) \ + log_test(node); \ + else \ + log_node(node); \ + } while (0) +#else +#define log_node(node) (void)0 +#define log_test(node) (void)0 +#define LOG_NODE(node) (void)0 +#endif + +char * +dump_syntax_tree_str(stnode_t *root); + +void +log_syntax_tree(enum ws_log_level, stnode_t *root, const char *msg, char **cache_ptr); + +#ifdef WS_DEBUG +#define ws_assert_magic(obj, mnum) \ + do { \ + ws_assert(obj); \ + if ((obj)->magic != (mnum)) { \ + ws_log_full(LOG_DOMAIN_DFILTER, LOG_LEVEL_ERROR, \ + __FILE__, __LINE__, __func__, \ + "Magic num is 0x%08"PRIx32", " \ + "but should be 0x%08"PRIx32, \ + (obj)->magic, (mnum)); \ + } \ + } while(0) +#else +#define ws_assert_magic(obj, mnum) (void)0 +#endif + +#endif /* SYNTAX_TREE_H */ |