summaryrefslogtreecommitdiffstats
path: root/epan/dfilter
diff options
context:
space:
mode:
Diffstat (limited to 'epan/dfilter')
-rw-r--r--epan/dfilter/.editorconfig19
-rw-r--r--epan/dfilter/CMakeLists.txt119
-rw-r--r--epan/dfilter/dfilter-int.h192
-rw-r--r--epan/dfilter/dfilter-loc.h25
-rw-r--r--epan/dfilter/dfilter-macro.c682
-rw-r--r--epan/dfilter/dfilter-macro.h48
-rw-r--r--epan/dfilter/dfilter.c1009
-rw-r--r--epan/dfilter/dfilter.h209
-rw-r--r--epan/dfilter/dfunctions.c520
-rw-r--r--epan/dfilter/dfunctions.h40
-rw-r--r--epan/dfilter/dfvm.c1747
-rw-r--r--epan/dfilter/dfvm.h160
-rw-r--r--epan/dfilter/drange.c406
-rw-r--r--epan/dfilter/drange.h97
-rw-r--r--epan/dfilter/gencode.c897
-rw-r--r--epan/dfilter/gencode.h21
-rw-r--r--epan/dfilter/grammar.lemon559
-rw-r--r--epan/dfilter/scanner.l968
-rw-r--r--epan/dfilter/semcheck.c1599
-rw-r--r--epan/dfilter/semcheck.h31
-rw-r--r--epan/dfilter/sttype-field.c235
-rw-r--r--epan/dfilter/sttype-field.h54
-rw-r--r--epan/dfilter/sttype-function.c176
-rw-r--r--epan/dfilter/sttype-function.h32
-rw-r--r--epan/dfilter/sttype-op.c409
-rw-r--r--epan/dfilter/sttype-op.h43
-rw-r--r--epan/dfilter/sttype-pointer.c149
-rw-r--r--epan/dfilter/sttype-pointer.h20
-rw-r--r--epan/dfilter/sttype-set.c99
-rw-r--r--epan/dfilter/sttype-set.h24
-rw-r--r--epan/dfilter/sttype-slice.c193
-rw-r--r--epan/dfilter/sttype-slice.h42
-rw-r--r--epan/dfilter/sttype-string.c88
-rw-r--r--epan/dfilter/syntax-tree.c533
-rw-r--r--epan/dfilter/syntax-tree.h234
35 files changed, 11679 insertions, 0 deletions
diff --git a/epan/dfilter/.editorconfig b/epan/dfilter/.editorconfig
new file mode 100644
index 0000000..976affd
--- /dev/null
+++ b/epan/dfilter/.editorconfig
@@ -0,0 +1,19 @@
+#
+# Editor configuration
+#
+# https://editorconfig.org/
+#
+
+# C
+[*.{c,h}]
+indent_style = tab
+indent_size = tab
+tab_width = 8
+
+[drange.[ch]]
+indent_style = space
+indent_size = 4
+
+[dfunctions.[ch]]
+indent_style = space
+indent_size = 4
diff --git a/epan/dfilter/CMakeLists.txt b/epan/dfilter/CMakeLists.txt
new file mode 100644
index 0000000..c21129f
--- /dev/null
+++ b/epan/dfilter/CMakeLists.txt
@@ -0,0 +1,119 @@
+# CMakeLists.txt
+#
+# Wireshark - Network traffic analyzer
+# By Gerald Combs <gerald@wireshark.org>
+# Copyright 1998 Gerald Combs
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+#
+
+set(DFILTER_PUBLIC_HEADERS
+ dfilter.h
+ drange.h
+)
+
+set(DFILTER_HEADER_FILES
+ ${DFILTER_PUBLIC_HEADERS}
+ dfilter-int.h
+ dfilter-macro.h
+ dfilter.h
+ dfunctions.h
+ dfvm.h
+ drange.h
+ gencode.h
+ semcheck.h
+ sttype-field.h
+ sttype-function.h
+ sttype-pointer.h
+ sttype-set.h
+ sttype-slice.h
+ sttype-op.h
+ syntax-tree.h
+)
+
+set(DFILTER_NONGENERATED_FILES
+ dfilter.c
+ dfilter-macro.c
+ dfunctions.c
+ dfvm.c
+ drange.c
+ gencode.c
+ semcheck.c
+ sttype-field.c
+ sttype-function.c
+ sttype-pointer.c
+ sttype-set.c
+ sttype-slice.c
+ sttype-string.c
+ sttype-op.c
+ syntax-tree.c
+)
+source_group(dfilter FILES ${DFILTER_NONGENERATED_FILES})
+
+set(DFILTER_FILES ${DFILTER_NONGENERATED_FILES})
+
+add_lex_files(LEX_FILES DFILTER_FILES
+ scanner.l
+)
+
+add_lemon_files(LEMON_FILES DFILTER_FILES
+ grammar.lemon
+)
+
+#
+# We don't enable -Werror on generated code to make the build a
+# little less fragile when configured warnings change.
+#
+set_source_files_properties(
+ ${DFILTER_NONGENERATED_FILES}
+ PROPERTIES
+ COMPILE_FLAGS "${WERROR_COMMON_FLAGS}"
+)
+
+add_library(dfilter OBJECT
+
+ #Included so that Visual Studio can properly put header files in solution
+ ${DFILTER_HEADER_FILES}
+
+ ${DFILTER_FILES}
+)
+
+target_include_directories(dfilter
+ PRIVATE
+ ${CMAKE_CURRENT_BINARY_DIR}
+ ${CMAKE_CURRENT_SOURCE_DIR}
+ ${CMAKE_SOURCE_DIR}/epan
+ ${CMAKE_SOURCE_DIR}/tools/lemon
+)
+
+set_target_properties(dfilter PROPERTIES
+ FOLDER "Libs/epan/dfilter"
+ COMPILE_DEFINITIONS "WS_BUILD_DLL"
+)
+
+install(FILES ${DFILTER_PUBLIC_HEADERS}
+ DESTINATION "${PROJECT_INSTALL_INCLUDEDIR}/epan/dfilter"
+ COMPONENT "Development"
+ EXCLUDE_FROM_ALL
+)
+
+CHECKAPI(
+ NAME
+ dfilter
+ SWITCHES
+ SOURCES
+ ${DFILTER_NONGENERATED_FILES}
+)
+
+#
+# Editor modelines - https://www.wireshark.org/tools/modelines.html
+#
+# Local variables:
+# c-basic-offset: 8
+# tab-width: 8
+# indent-tabs-mode: t
+# End:
+#
+# vi: set shiftwidth=8 tabstop=8 noexpandtab:
+# :indentSize=8:tabSize=8:noTabs=false:
+#
diff --git a/epan/dfilter/dfilter-int.h b/epan/dfilter/dfilter-int.h
new file mode 100644
index 0000000..7f0be87
--- /dev/null
+++ b/epan/dfilter/dfilter-int.h
@@ -0,0 +1,192 @@
+/** @file
+ *
+ * Wireshark - Network traffic analyzer
+ * By Gerald Combs <gerald@wireshark.org>
+ * Copyright 2001 Gerald Combs
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef DFILTER_INT_H
+#define DFILTER_INT_H
+
+#include "dfilter.h"
+#include "syntax-tree.h"
+
+#include <epan/proto.h>
+#include <stdio.h>
+
+typedef struct {
+ const header_field_info *hfinfo;
+ fvalue_t *value;
+ int proto_layer_num;
+} df_reference_t;
+
+typedef struct {
+ GPtrArray *array;
+} df_cell_t;
+
+typedef struct {
+ GPtrArray *ptr;
+ unsigned idx;
+} df_cell_iter_t;
+
+/* Passed back to user */
+struct epan_dfilter {
+ GPtrArray *insns;
+ unsigned num_registers;
+ df_cell_t *registers;
+ int *interesting_fields;
+ int num_interesting_fields;
+ GPtrArray *deprecated;
+ GSList *warnings;
+ char *expanded_text;
+ GHashTable *references;
+ GHashTable *raw_references;
+ char *syntax_tree_str;
+ /* Used to pass arguments to functions. List of Lists (list of registers). */
+ GSList *function_stack;
+ GSList *set_stack;
+};
+
+typedef struct {
+ df_error_t *error;
+ /* more fields. */
+} dfstate_t;
+
+/*
+ * State for first stage of compilation (parsing).
+ */
+typedef struct {
+ df_error_t *error; /* Must be first struct field. */
+ unsigned flags;
+ stnode_t *st_root;
+ GPtrArray *deprecated;
+ stnode_t *lval;
+ GString *quoted_string;
+ bool raw_string;
+ df_loc_t string_loc;
+ df_loc_t location;
+} dfsyntax_t;
+
+/*
+ * State for second stage of compilation (semantic check and code generation).
+ */
+typedef struct {
+ df_error_t *error; /* Must be first struct field. */
+ unsigned flags;
+ stnode_t *st_root;
+ unsigned field_count;
+ GPtrArray *insns;
+ GHashTable *loaded_fields;
+ GHashTable *loaded_raw_fields;
+ GHashTable *interesting_fields;
+ int next_insn_id;
+ int next_register;
+ GPtrArray *deprecated;
+ GHashTable *references; /* hfinfo -> pointer to array of references */
+ GHashTable *raw_references; /* hfinfo -> pointer to array of references */
+ char *expanded_text;
+ wmem_allocator_t *dfw_scope; /* Because we use exceptions for error handling sometimes
+ cleaning up memory allocations is inconvenient. Memory
+ allocated from this pool will be freed when the dfwork_t
+ context is destroyed. */
+ GSList *warnings;
+} dfwork_t;
+
+/* Constructor/Destructor prototypes for Lemon Parser */
+void *DfilterAlloc(void *(*)(size_t));
+
+void DfilterFree(void *, void (*)(void *));
+
+void Dfilter(void *, int, stnode_t *, dfsyntax_t *);
+
+/* Return value for error in scanner. */
+#define SCAN_FAILED -1 /* not 0, as that means end-of-input */
+
+void
+dfilter_vfail(void *state, int code, df_loc_t err_loc,
+ const char *format, va_list args);
+
+void
+dfilter_fail(void *state, int code, df_loc_t err_loc,
+ const char *format, ...) G_GNUC_PRINTF(4, 5);
+
+WS_NORETURN
+void
+dfilter_fail_throw(void *state, int code, df_loc_t err_loc,
+ const char *format, ...) G_GNUC_PRINTF(4, 5);
+
+void
+dfw_set_error_location(dfwork_t *dfw, df_loc_t err_loc);
+
+void
+add_deprecated_token(dfsyntax_t *dfs, const char *token);
+
+void
+add_compile_warning(dfwork_t *dfw, const char *format, ...);
+
+void
+free_deprecated(GPtrArray *deprecated);
+
+void
+DfilterTrace(FILE *TraceFILE, char *zTracePrompt);
+
+header_field_info *
+dfilter_resolve_unparsed(dfsyntax_t *dfs, const char *name);
+
+WS_RETNONNULL fvalue_t*
+dfilter_fvalue_from_literal(dfwork_t *dfw, ftenum_t ftype, stnode_t *st,
+ bool allow_partial_value, header_field_info *hfinfo_value_string);
+
+WS_RETNONNULL fvalue_t *
+dfilter_fvalue_from_string(dfwork_t *dfw, ftenum_t ftype, stnode_t *st,
+ header_field_info *hfinfo_value_string);
+
+WS_RETNONNULL fvalue_t *
+dfilter_fvalue_from_charconst(dfwork_t *dfw, ftenum_t ftype, stnode_t *st);
+
+const char *tokenstr(int token);
+
+df_reference_t *
+reference_new(const field_info *finfo, bool raw);
+
+void
+reference_free(df_reference_t *ref);
+
+void
+df_cell_append(df_cell_t *rp, fvalue_t *fv);
+
+GPtrArray *
+df_cell_ref(df_cell_t *rp);
+
+#define df_cell_ptr(rp) ((rp)->array)
+
+size_t
+df_cell_size(const df_cell_t *rp);
+
+fvalue_t **
+df_cell_array(const df_cell_t *rp);
+
+bool
+df_cell_is_empty(const df_cell_t *rp);
+
+bool
+df_cell_is_null(const df_cell_t *rp);
+
+/* Pass true to free the array contents when the cell is cleared. */
+void
+df_cell_init(df_cell_t *rp, bool free_seg);
+
+void
+df_cell_clear(df_cell_t *rp);
+
+/* Cell must not be cleared while iter is alive. */
+void
+df_cell_iter_init(df_cell_t *rp, df_cell_iter_t *iter);
+
+fvalue_t *
+df_cell_iter_next(df_cell_iter_t *iter);
+
+
+#endif
diff --git a/epan/dfilter/dfilter-loc.h b/epan/dfilter/dfilter-loc.h
new file mode 100644
index 0000000..adf663e
--- /dev/null
+++ b/epan/dfilter/dfilter-loc.h
@@ -0,0 +1,25 @@
+
+/** @file
+ *
+ * Wireshark - Network traffic analyzer
+ * By Gerald Combs <gerald@wireshark.org>
+ * Copyright 2001 Gerald Combs
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef DFILTER_LOC_H
+#define DFILTER_LOC_H
+
+#include <stddef.h>
+
+typedef struct _dfilter_loc {
+ long col_start;
+ size_t col_len;
+} df_loc_t;
+
+extern df_loc_t loc_empty;
+
+#define DFILTER_LOC_EMPTY loc_empty
+
+#endif
diff --git a/epan/dfilter/dfilter-macro.c b/epan/dfilter/dfilter-macro.c
new file mode 100644
index 0000000..1e479be
--- /dev/null
+++ b/epan/dfilter/dfilter-macro.c
@@ -0,0 +1,682 @@
+/* dfilter-macro.c
+ *
+ * Wireshark - Network traffic analyzer
+ * By Gerald Combs <gerald@wireshark.org>
+ * Copyright 2001 Gerald Combs
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+
+#include "config.h"
+#define WS_LOG_DOMAIN LOG_DOMAIN_DFILTER
+#include "dfilter-macro.h"
+
+#ifdef DUMP_DFILTER_MACRO
+#include <stdio.h>
+#endif
+#include <string.h>
+
+#include "dfilter-int.h"
+#include <ftypes/ftypes.h>
+#include <epan/uat-int.h>
+#include <epan/proto.h>
+#include <wsutil/glib-compat.h>
+
+
+static uat_t* dfilter_macro_uat = NULL;
+static dfilter_macro_t* macros = NULL;
+static unsigned num_macros;
+
+/* #define DUMP_DFILTER_MACRO */
+#ifdef DUMP_DFILTER_MACRO
+void dump_dfilter_macro_t(const dfilter_macro_t *m, const char *function, const char *file, int line);
+#define DUMP_MACRO(m) dump_dfilter_macro_t(m, G_STRFUNC, __FILE__, __LINE__)
+#else
+#define DUMP_MACRO(m)
+#endif
+
+static char* dfilter_macro_resolve(char* name, char** args, df_error_t** error) {
+ GString* text;
+ int argc = 0;
+ dfilter_macro_t* m = NULL;
+ int* arg_pos_p;
+ char** parts;
+ char* ret;
+ unsigned i;
+
+ for (i = 0; i < num_macros; i++) {
+ dfilter_macro_t* c = &(macros[i]);
+ if ( c->usable && g_str_equal(c->name,name) ) {
+ m = c;
+ break;
+ }
+ }
+
+ if (!m) {
+ if (error != NULL)
+ *error = df_error_new_printf(DF_ERROR_GENERIC, NULL, "macro '%s' does not exist", name);
+ return NULL;
+ }
+
+ DUMP_MACRO(m);
+
+ if (args) {
+ while(args[argc]) argc++;
+ }
+
+ if (argc != m->argc) {
+ if (error != NULL) {
+ *error = df_error_new_printf(DF_ERROR_GENERIC, NULL,
+ "wrong number of arguments for macro '%s', expecting %d instead of %d",
+ name, m->argc, argc);
+ }
+ return NULL;
+ }
+
+ arg_pos_p = m->args_pos;
+ parts = m->parts;
+
+ text = g_string_new(*(parts++));
+
+ if (args) {
+ while (*parts) {
+ g_string_append_printf(text,"%s%s",
+ args[*(arg_pos_p++)],
+ *(parts++));
+ }
+ }
+
+ ret = wmem_strdup(NULL, text->str);
+
+ g_string_free(text,true);
+
+ return ret;
+}
+
+/* Start points to the first character after "${" */
+static bool start_is_field_reference(const char *start)
+{
+ const char *end;
+ char saved_c;
+ const header_field_info *hfinfo;
+
+ end = strchr(start, '#');
+ if (end == NULL)
+ end = strchr(start, '}');
+ if (end == NULL)
+ return false;
+
+ saved_c = *end;
+ /* This violates constness but we will restore the original string. */
+ *(char *)end = '\0';
+ /* Search for name in registered fields. */
+
+ if (start[0] == '@')
+ start++;
+
+ hfinfo = dfilter_resolve_unparsed(NULL, start);
+ /* Restore mangled string. */
+ *(char *)end = saved_c;
+
+ if (hfinfo == NULL)
+ return false;
+
+ if (hfinfo->type == FT_PROTOCOL || hfinfo->type == FT_NONE) {
+ /* Ignore these? */
+ return false;
+ }
+
+ /* It's a field reference so ignore it as a macro. */
+ ws_noisy("Ignore field reference ${%s}", start);
+ return true;
+}
+
+static char* dfilter_macro_apply_recurse(const char* text, unsigned depth, df_error_t** error) {
+ enum { OUTSIDE, STARTING, NAME, ARGS } state = OUTSIDE;
+ GString* out;
+ GString* name = NULL;
+ GString* arg = NULL;
+ GPtrArray* args = NULL;
+ char c;
+ const char* r = text;
+ bool changed = false;
+
+ if ( depth > 31) {
+ if (error != NULL)
+ *error = df_error_new_msg("too much nesting in macros");
+ return NULL;
+ }
+
+#define FGS(n) if (n) g_string_free(n,true); n = NULL
+
+#define FREE_ALL() \
+ do { \
+ FGS(name); \
+ FGS(arg); \
+ if (args) { \
+ while(args->len) { void* p = g_ptr_array_remove_index_fast(args,0); g_free(p); } \
+ g_ptr_array_free(args,true); \
+ args = NULL; \
+ } \
+ } while(0)
+
+ if (error != NULL)
+ *error = NULL;
+ out = g_string_sized_new(64);
+
+ while(1) {
+ c = *r++;
+
+ switch(state) {
+ case OUTSIDE: {
+ switch(c) {
+ case '\0': {
+ goto finish;
+ } case '$': {
+ state = STARTING;
+ break;
+ } default: {
+ g_string_append_c(out,c);
+ break;
+ }
+ }
+ break;
+ } case STARTING: {
+ switch (c) {
+ case '{': {
+ if (start_is_field_reference(r)) {
+ /* We have a field reference, preserve the name with ${} and bail. */
+ g_string_append(out,"${");
+ state = OUTSIDE;
+ break;
+ }
+
+ /* We have a macro, continue. */
+ args = g_ptr_array_new();
+ arg = g_string_sized_new(32);
+ name = g_string_sized_new(32);
+
+ state = NAME;
+
+ break;
+ } case '\0': {
+ g_string_append_c(out,'$');
+
+ goto finish;
+ } default: {
+ g_string_append_c(out,'$');
+ g_string_append_c(out,c);
+
+ state = OUTSIDE;
+
+ break;
+ }
+ }
+ break;
+ } case NAME: {
+ if ( g_ascii_isalnum(c) || c == '_' || c == '-' || c == '.' ) {
+ g_string_append_c(name,c);
+ } else if ( c == ':') {
+ state = ARGS;
+ } else if ( c == '}') {
+ char* resolved;
+
+ g_ptr_array_add(args,NULL);
+
+ resolved = dfilter_macro_resolve(name->str, (char**)args->pdata, error);
+ if (resolved == NULL)
+ goto on_error;
+
+ changed = true;
+
+ g_string_append(out,resolved);
+ wmem_free(NULL, resolved);
+
+ FREE_ALL();
+
+ state = OUTSIDE;
+ } else if ( c == '\0') {
+ if (error != NULL)
+ *error = df_error_new_msg("end of filter in the middle of a macro expression");
+ goto on_error;
+ } else {
+ if (error != NULL)
+ *error = df_error_new_msg("invalid character in macro name");
+ goto on_error;
+ }
+ break;
+ } case ARGS: {
+ switch(c) {
+ case '\0': {
+ if (error != NULL)
+ *error = df_error_new_msg("end of filter in the middle of a macro expression");
+ goto on_error;
+ } case ';': {
+ g_ptr_array_add(args,g_string_free(arg,false));
+
+ arg = g_string_sized_new(32);
+ break;
+ } case '\\': {
+ c = *r++;
+ if (c) {
+ g_string_append_c(arg,c);
+ break;
+ } else {
+ if (error != NULL)
+ *error = df_error_new_msg("end of filter in the middle of a macro expression");
+ goto on_error;
+ }
+ } default: {
+ g_string_append_c(arg,c);
+ break;
+ } case '}': {
+ char* resolved;
+ g_ptr_array_add(args,g_string_free(arg,false));
+ g_ptr_array_add(args,NULL);
+
+ arg = NULL;
+
+ resolved = dfilter_macro_resolve(name->str, (char**)args->pdata, error);
+ if (resolved == NULL)
+ goto on_error;
+
+ changed = true;
+
+ g_string_append(out,resolved);
+ wmem_free(NULL, resolved);
+
+ FREE_ALL();
+
+ state = OUTSIDE;
+ break;
+ }
+ }
+ break;
+ }
+ }
+ }
+
+finish:
+ {
+ FREE_ALL();
+
+ if (changed) {
+ char* resolved = dfilter_macro_apply_recurse(out->str, depth + 1, error);
+ g_string_free(out,true);
+ return resolved;
+ } else {
+ char* out_str = wmem_strdup(NULL, out->str);
+ g_string_free(out,true);
+ return out_str;
+ }
+ }
+on_error:
+ {
+ FREE_ALL();
+ if (error != NULL) {
+ if (*error == NULL)
+ *error = df_error_new_msg("unknown error in macro expression");
+ }
+ g_string_free(out,true);
+ return NULL;
+ }
+}
+
+char* dfilter_macro_apply(const char* text, df_error_t** error) {
+ return dfilter_macro_apply_recurse(text, 0, error);
+}
+
+static bool macro_update(void* mp, gchar** error) {
+ dfilter_macro_t* m = (dfilter_macro_t*)mp;
+ GPtrArray* parts;
+ GArray* args_pos;
+ const char* r;
+ char* w;
+ char* part;
+ int argc = 0;
+
+ DUMP_MACRO(m);
+
+ *error = NULL;
+
+ /* Invalidate the display filter in case it's in use */
+ if (dfilter_macro_uat && dfilter_macro_uat->post_update_cb)
+ dfilter_macro_uat->post_update_cb();
+
+ parts = g_ptr_array_new();
+ args_pos = g_array_new(false,false,sizeof(int));
+
+ m->priv = part = w = g_strdup(m->text);
+ r = m->text;
+ g_ptr_array_add(parts,part);
+
+ while (r && *r) {
+
+ switch (*r) {
+ default:
+ *(w++) = *(r++);
+ break;
+ case '\0':
+ *w = *r;
+ goto done;
+ case '\\':
+ *(w++) = *(r++);
+ if(*r)
+ *(w++) = *(r++);
+ break;
+ case '$': {
+ int cnt = 0;
+ int arg_pos = 0;
+ do {
+ char c = *(r+1);
+ if (c >= '0' && c <= '9') {
+ cnt++;
+ r++;
+ *(w++) = '\0';
+ arg_pos *= 10;
+ arg_pos += c - '0';
+ } else {
+ break;
+ }
+ } while(*r);
+
+ if (cnt) {
+ *(w++) = '\0';
+ r++;
+ argc = argc < arg_pos ? arg_pos : argc;
+ arg_pos--;
+ g_array_append_val(args_pos,arg_pos);
+ g_ptr_array_add(parts,w);
+ } else {
+ *(w++) = *(r++);
+ }
+ break;
+ }
+ }
+
+ }
+
+done:
+ g_ptr_array_add(parts,NULL);
+
+ g_free(m->parts);
+ m->parts = (char **)g_ptr_array_free(parts, false);
+
+ g_free(m->args_pos);
+ m->args_pos = (int*)(void *)g_array_free(args_pos, false);
+
+ m->argc = argc;
+
+ m->usable = true;
+
+ DUMP_MACRO(m);
+
+ return true;
+}
+
+static void macro_free(void* r) {
+ dfilter_macro_t* m = (dfilter_macro_t*)r;
+
+ DUMP_MACRO(r);
+
+ g_free(m->name);
+ g_free(m->text);
+ g_free(m->priv);
+ g_free(m->parts);
+ g_free(m->args_pos);
+}
+
+static void* macro_copy(void* dest, const void* orig, size_t len _U_) {
+ dfilter_macro_t* d = (dfilter_macro_t*)dest;
+ const dfilter_macro_t* m = (const dfilter_macro_t*)orig;
+
+ DUMP_MACRO(m);
+
+ d->name = g_strdup(m->name);
+ d->text = g_strdup(m->text);
+ d->usable = m->usable;
+
+ if (m->parts) {
+ unsigned nparts = 0;
+
+ /*
+ * Copy the contents of m->priv (a "cooked" version
+ * of m->text) into d->priv.
+ *
+ * First we clone m->text into d->priv, this gets
+ * us a NUL terminated string of the proper length.
+ *
+ * Then we loop copying bytes from m->priv into
+ * d-priv. Since m->priv contains internal ACSII NULs
+ * we use the length of m->text to stop the copy.
+ */
+
+ d->priv = g_strdup(m->text);
+ {
+ const char* oldText = m->text;
+ const char* oldPriv = (const char*)m->priv;
+ char* newPriv = (char*)d->priv;
+ while(oldText && *oldText) {
+ *(newPriv++) = *(oldPriv++);
+ oldText++;
+ }
+ }
+
+ /*
+ * The contents of the m->parts array contains pointers
+ * into various sections of m->priv. Since it's
+ * an argv style array of ponters, this array is
+ * actually one larger than the number of parts
+ * to hold the final NULL terminator.
+ *
+ * The following copy clones the original m->parts
+ * array into d->parts but then fixes-up the pointers
+ * so that they point into the appropriate sections
+ * of the d->priv.
+ */
+
+ do nparts++; while (m->parts[nparts]);
+ d->parts = (char **)g_memdup2(m->parts,(nparts+1)*(unsigned)sizeof(void*));
+ nparts = 0;
+ while(m->parts[nparts]) {
+ if(nparts) {
+ d->parts[nparts] = d->parts[nparts - 1] + (m->parts[nparts] - m->parts[nparts - 1]);
+ } else {
+ d->parts[nparts] = (char *)d->priv;
+ }
+ nparts++;
+ }
+
+ /*
+ * Clone the contents of m->args_pos into d->args_pos.
+ */
+
+ d->args_pos = (int *)g_memdup2(m->args_pos,(--nparts)*(unsigned)sizeof(int));
+ }
+
+ DUMP_MACRO(d);
+
+ return d;
+}
+
+static bool macro_name_chk(void *mp, const char *in_name, unsigned name_len,
+ const void *u1 _U_, const void *u2 _U_, char **error) {
+ dfilter_macro_t* m = (dfilter_macro_t*)mp;
+ unsigned i;
+
+ if (name_len == 0) {
+ *error = g_strdup("invalid name");
+ return false;
+ }
+
+ for (i=0; i < name_len; i++) {
+ if (!(in_name[i] == '_' || g_ascii_isalnum(in_name[i]) ) ) {
+ *error = g_strdup("invalid char in name");
+ return false;
+ }
+ }
+
+ /* When loading (!m->name) or when adding/changing the an item with a
+ * different name, check for uniqueness. NOTE: if a duplicate already
+ * exists (because the user manually edited the file), then this will
+ * not trigger a warning. */
+ if (!m->name || g_strcmp0(m->name, in_name)) {
+ for (i = 0; i < num_macros; i++) {
+ /* This a string field which is always NUL-terminated,
+ * so no need to check name_len. */
+ if (!g_strcmp0(in_name, macros[i].name)) {
+ *error = ws_strdup_printf("macro '%s' already exists",
+ in_name);
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+UAT_CSTRING_CB_DEF(macro,name,dfilter_macro_t)
+UAT_CSTRING_CB_DEF(macro,text,dfilter_macro_t)
+
+void dfilter_macro_init(void) {
+ static uat_field_t uat_fields[] = {
+ UAT_FLD_CSTRING_OTHER(macro,name,"Name",macro_name_chk,"The name of the macro."),
+ /* N.B. it would be nice if there was a field type for display filters (with
+ auto-completion & colouring), but this wouldn't work here as the filter string
+ will contain $1, etc... */
+ UAT_FLD_CSTRING_ISPRINT(macro,text,"Text","The text this macro resolves to."),
+ UAT_END_FIELDS
+ };
+
+ dfilter_macro_uat = uat_new("Display Filter Macros",
+ sizeof(dfilter_macro_t),
+ DFILTER_MACRO_FILENAME,
+ true,
+ &macros,
+ &num_macros,
+ UAT_AFFECTS_FIELDS,
+ "ChDisplayFilterMacrosSection",
+ macro_copy,
+ macro_update,
+ macro_free,
+ NULL, /* Note: This is set in macros_init () */
+ NULL,
+ uat_fields);
+}
+
+void dfilter_macro_get_uat(uat_t **dfmu_ptr_ptr) {
+ *dfmu_ptr_ptr = dfilter_macro_uat;
+}
+
+#ifdef DUMP_DFILTER_MACRO
+/*
+ * The dfilter_macro_t has several characteristics that are
+ * not immediately obvious. The dump_dfilter_filter_macro_t()
+ * function can be used to help "visualize" the contents of
+ * a dfilter_macro_t.
+ *
+ * Some non-obvious components of this struct include:
+ *
+ * m->parts is an argv style array of pointers into the
+ * m->priv string.
+ *
+ * The last pointer of an m->parts array should contain
+ * NULL to indicate the end of the parts pointer array.
+ *
+ * m->priv is a "cooked" copy of the m->text string.
+ * Any variable substitution indicators within m->text
+ * ("$1", "$2", ...) will have been replaced with ASCII
+ * NUL characters within m->priv.
+ *
+ * The first element of m->parts array (m-parts[0]) will
+ * usually have the same pointer value as m->priv (unless
+ * the dfilter-macro starts off with a variable
+ * substitution indicator (e.g. "$1").
+ */
+
+void dump_dfilter_macro_t(const dfilter_macro_t *m, const char *function, const char *file, int line)
+{
+ printf("\n<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n");
+
+ if(m == NULL) {
+ printf(" dfilter_macro_t * == NULL! (via: %s(): %s:%d)\n", function, file, line);
+ printf("\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n");
+ return;
+ }
+
+ printf("DUMP of dfilter_macro_t: %p (via: %s(): %s:%d)\n", m, function, file, line);
+
+ printf(" &dfilter_macro->name == %p\n", &m->name);
+ if(m->name == NULL) {
+ printf(" ->name == NULL\n");
+ } else {
+ printf(" ->name == %p\n", m->name);
+ printf(" ->name == <%s>\n", m->name);
+ }
+
+ printf(" &dfilter_macro->text == %p\n", &m->text);
+ if(m->text == NULL) {
+ printf(" ->text == NULL\n");
+ } else {
+ printf(" ->text == %p\n", m->text);
+ printf(" ->text == <%s>\n", m->text);
+ }
+
+ printf(" &dfilter_macro->usable == %p\n", &m->usable);
+ printf(" ->usable == %u\n", m->usable);
+
+ printf(" &dfilter_macro->parts == %p\n", &m->parts);
+
+ if(m->parts == NULL) {
+ printf(" ->parts == NULL\n");
+ } else {
+ int i = 0;
+
+ while (m->parts[i]) {
+ printf(" ->parts[%d] == %p\n", i, m->parts[i]);
+ printf(" ->parts[%d] == <%s>\n", i, m->parts[i]);
+ i++;
+ }
+ printf(" ->parts[%d] == NULL\n", i);
+ }
+
+ printf(" &dfilter_macro->args_pos == %p\n", &m->args_pos);
+ if(m->args_pos == NULL) {
+ printf(" ->args_pos == NULL\n");
+ } else {
+ printf(" ->args_pos == %p\n", m->args_pos);
+ /*printf(" ->args_pos == <%?>\n", m->args_pos);*/
+ }
+
+ printf(" &dfilter_macro->argc == %p\n", &m->argc);
+ printf(" ->argc == %d\n", m->argc);
+
+ printf(" &dfilter_macro->priv == %p\n", &m->priv);
+ if(m->priv == NULL) {
+ printf(" ->priv == NULL\n");
+ } else {
+ printf(" ->priv == %p\n", m->priv);
+ printf(" ->priv == <%s>\n", (char *)m->priv);
+ }
+
+ printf("\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n");
+}
+#endif
+
+void dfilter_macro_cleanup(void)
+{
+}
+
+/*
+ * Editor modelines - https://www.wireshark.org/tools/modelines.html
+ *
+ * Local variables:
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ *
+ * vi: set shiftwidth=8 tabstop=8 noexpandtab:
+ * :indentSize=8:tabSize=8:noTabs=false:
+ */
diff --git a/epan/dfilter/dfilter-macro.h b/epan/dfilter/dfilter-macro.h
new file mode 100644
index 0000000..477bb40
--- /dev/null
+++ b/epan/dfilter/dfilter-macro.h
@@ -0,0 +1,48 @@
+/** @file
+ *
+ * Wireshark - Network traffic analyzer
+ * By Gerald Combs <gerald@wireshark.org>
+ * Copyright 2001 Gerald Combs
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef _DFILTER_MACRO_H
+#define _DFILTER_MACRO_H
+
+#include <wireshark.h>
+#include "dfilter.h"
+
+#define DFILTER_MACRO_FILENAME "dfilter_macros"
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+typedef struct _dfilter_macro_t {
+ char* name; /* the macro id */
+ char* text; /* raw data from file */
+ bool usable; /* macro is usable */
+ char** parts; /* various segments of text between insertion targets */
+ int* args_pos; /* what's to be inserted */
+ int argc; /* the expected number of arguments */
+ void* priv; /* a copy of text that contains every c-string in parts */
+} dfilter_macro_t;
+
+/* applies all macros to the given text and returns the resulting string or NULL on failure */
+char* dfilter_macro_apply(const char* text, df_error_t** error);
+
+void dfilter_macro_init(void);
+
+struct epan_uat;
+
+WS_DLL_PUBLIC
+void dfilter_macro_get_uat(struct epan_uat **dfmu_ptr_ptr);
+
+void dfilter_macro_cleanup(void);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* _DFILTER_MACRO_H */
diff --git a/epan/dfilter/dfilter.c b/epan/dfilter/dfilter.c
new file mode 100644
index 0000000..73646b7
--- /dev/null
+++ b/epan/dfilter/dfilter.c
@@ -0,0 +1,1009 @@
+/*
+ * Wireshark - Network traffic analyzer
+ * By Gerald Combs <gerald@wireshark.org>
+ * Copyright 2001 Gerald Combs
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "config.h"
+#define WS_LOG_DOMAIN LOG_DOMAIN_DFILTER
+
+#include <stdio.h>
+#include <string.h>
+
+#include "dfilter-int.h"
+#include "syntax-tree.h"
+#include "gencode.h"
+#include "semcheck.h"
+#include "dfvm.h"
+#include <epan/epan_dissect.h>
+#include <epan/exceptions.h>
+#include "dfilter.h"
+#include "dfilter-macro.h"
+#include "scanner_lex.h"
+#include <wsutil/wslog.h>
+#include <wsutil/ws_assert.h>
+#include "grammar.h"
+
+
+#define DFILTER_TOKEN_ID_OFFSET 1
+
+/* Holds the singular instance of our Lemon parser object */
+static void* ParserObj = NULL;
+
+df_loc_t loc_empty = {-1, 0};
+
+void
+dfilter_vfail(void *state, int code, df_loc_t loc,
+ const char *format, va_list args)
+{
+ df_error_t **ptr = &((dfstate_t *)state)->error;
+ /* If we've already reported one error, don't overwite it */
+ if (*ptr != NULL)
+ return;
+
+ *ptr = df_error_new_vprintf(code, &loc, format, args);
+}
+
+void
+dfilter_fail(void *state, int code, df_loc_t loc,
+ const char *format, ...)
+{
+ va_list args;
+
+ va_start(args, format);
+ dfilter_vfail(state, code, loc, format, args);
+ va_end(args);
+}
+
+void
+dfilter_fail_throw(void *state, int code, df_loc_t loc, const char *format, ...)
+{
+ va_list args;
+
+ va_start(args, format);
+ dfilter_vfail(state, code, loc, format, args);
+ va_end(args);
+ THROW(TypeError);
+}
+
+void
+dfw_set_error_location(dfwork_t *dfw, df_loc_t loc)
+{
+ ws_assert(dfw->error);
+ dfw->error->loc = loc;
+}
+
+header_field_info *
+dfilter_resolve_unparsed(dfsyntax_t *dfs, const char *name)
+{
+ header_field_info *hfinfo;
+
+ hfinfo = proto_registrar_get_byname(name);
+ if (hfinfo != NULL) {
+ /* It's a field name */
+ return hfinfo;
+ }
+
+ hfinfo = proto_registrar_get_byalias(name);
+ if (hfinfo != NULL) {
+ /* It's an aliased field name */
+ if (dfs)
+ add_deprecated_token(dfs, name);
+ return hfinfo;
+ }
+
+ /* It's not a field. */
+ return NULL;
+}
+
+/* Initialize the dfilter module */
+void
+dfilter_init(void)
+{
+ if (ParserObj) {
+ ws_message("I expected ParserObj to be NULL\n");
+ /* Free the Lemon Parser object */
+ DfilterFree(ParserObj, g_free);
+ }
+ /* Allocate an instance of our Lemon-based parser */
+ ParserObj = DfilterAlloc(g_malloc);
+
+ /* Initialize the syntax-tree sub-sub-system */
+ sttype_init();
+
+ dfilter_macro_init();
+}
+
+/* Clean-up the dfilter module */
+void
+dfilter_cleanup(void)
+{
+ dfilter_macro_cleanup();
+
+ /* Free the Lemon Parser object */
+ if (ParserObj) {
+ DfilterFree(ParserObj, g_free);
+ }
+
+ /* Clean up the syntax-tree sub-sub-system */
+ sttype_cleanup();
+}
+
+static dfilter_t*
+dfilter_new(GPtrArray *deprecated)
+{
+ dfilter_t *df;
+
+ df = g_new0(dfilter_t, 1);
+ df->insns = NULL;
+ df->function_stack = NULL;
+ df->set_stack = NULL;
+ df->warnings = NULL;
+ if (deprecated)
+ df->deprecated = g_ptr_array_ref(deprecated);
+ return df;
+}
+
+/* Given a GPtrArray of instructions (dfvm_insn_t),
+ * free them. */
+static void
+free_insns(GPtrArray *insns)
+{
+ unsigned int i;
+ dfvm_insn_t *insn;
+
+ for (i = 0; i < insns->len; i++) {
+ insn = (dfvm_insn_t *)g_ptr_array_index(insns, i);
+ dfvm_insn_free(insn);
+ }
+ g_ptr_array_free(insns, true);
+}
+
+void
+dfilter_free(dfilter_t *df)
+{
+ if (!df)
+ return;
+
+ if (df->insns) {
+ free_insns(df->insns);
+ }
+
+ g_free(df->interesting_fields);
+
+ g_hash_table_destroy(df->references);
+ g_hash_table_destroy(df->raw_references);
+
+ if (df->deprecated)
+ g_ptr_array_unref(df->deprecated);
+
+ if (df->function_stack != NULL) {
+ ws_critical("Function stack list should be NULL");
+ g_slist_free(df->function_stack);
+ }
+
+ if (df->set_stack != NULL) {
+ ws_critical("Set stack list should be NULL");
+ g_slist_free(df->set_stack);
+ }
+
+ if (df->warnings)
+ g_slist_free_full(df->warnings, g_free);
+
+ g_free(df->registers);
+ g_free(df->expanded_text);
+ g_free(df->syntax_tree_str);
+ g_free(df);
+}
+
+static void free_refs_array(void *data)
+{
+ /* Array data must be freed. */
+ (void)g_ptr_array_free(data, true);
+}
+
+static dfsyntax_t*
+dfsyntax_new(unsigned flags)
+{
+ dfsyntax_t *dfs = g_new0(dfsyntax_t, 1);
+ dfs->flags = flags;
+ return dfs;
+}
+
+static void
+dfsyntax_free(dfsyntax_t *dfs)
+{
+ if (dfs->error)
+ df_error_free(&dfs->error);
+
+ if (dfs->st_root)
+ stnode_free(dfs->st_root);
+
+ if (dfs->deprecated)
+ g_ptr_array_unref(dfs->deprecated);
+
+ if (dfs->lval)
+ stnode_free(dfs->lval);
+
+ if (dfs->quoted_string)
+ g_string_free(dfs->quoted_string, true);
+
+
+
+ g_free(dfs);
+}
+
+static dfwork_t*
+dfwork_new(const char *expanded_text, unsigned flags)
+{
+ dfwork_t *dfw = g_new0(dfwork_t, 1);
+ dfw->expanded_text = g_strdup(expanded_text);
+ dfw->flags = flags;
+
+ dfw->references =
+ g_hash_table_new_full(g_direct_hash, g_direct_equal,
+ NULL, (GDestroyNotify)free_refs_array);
+
+ dfw->raw_references =
+ g_hash_table_new_full(g_direct_hash, g_direct_equal,
+ NULL, (GDestroyNotify)free_refs_array);
+
+ dfw->dfw_scope = wmem_allocator_new(WMEM_ALLOCATOR_SIMPLE);
+
+ return dfw;
+}
+
+static void
+dfwork_free(dfwork_t *dfw)
+{
+ if (dfw->st_root) {
+ stnode_free(dfw->st_root);
+ }
+
+ if (dfw->loaded_fields) {
+ g_hash_table_destroy(dfw->loaded_fields);
+ }
+
+ if (dfw->loaded_raw_fields) {
+ g_hash_table_destroy(dfw->loaded_raw_fields);
+ }
+
+ if (dfw->interesting_fields) {
+ g_hash_table_destroy(dfw->interesting_fields);
+ }
+
+ if (dfw->references) {
+ g_hash_table_destroy(dfw->references);
+ }
+
+ if (dfw->raw_references) {
+ g_hash_table_destroy(dfw->raw_references);
+ }
+
+ if (dfw->insns) {
+ free_insns(dfw->insns);
+ }
+
+ if (dfw->deprecated)
+ g_ptr_array_unref(dfw->deprecated);
+
+ if (dfw->warnings)
+ g_slist_free_full(dfw->warnings, g_free);
+
+ g_free(dfw->expanded_text);
+
+ if (dfw->error)
+ df_error_free(&dfw->error);
+
+ wmem_destroy_allocator(dfw->dfw_scope);
+
+ /*
+ * We don't free the error message string; our caller will return
+ * it to its caller.
+ */
+ g_free(dfw);
+}
+
+const char *tokenstr(int token)
+{
+ switch (token) {
+ case TOKEN_TEST_AND: return "TEST_AND";
+ case TOKEN_TEST_OR: return "TEST_OR";
+ case TOKEN_TEST_XOR: return "TEST_XOR";
+ case TOKEN_TEST_ALL_EQ: return "TEST_ALL_EQ";
+ case TOKEN_TEST_ANY_EQ: return "TEST_ANY_EQ";
+ case TOKEN_TEST_ALL_NE: return "TEST_ALL_NE";
+ case TOKEN_TEST_ANY_NE: return "TEST_ANY_NE";
+ case TOKEN_TEST_LT: return "TEST_LT";
+ case TOKEN_TEST_LE: return "TEST_LE";
+ case TOKEN_TEST_GT: return "TEST_GT";
+ case TOKEN_TEST_GE: return "TEST_GE";
+ case TOKEN_TEST_CONTAINS: return "TEST_CONTAINS";
+ case TOKEN_TEST_MATCHES: return "TEST_MATCHES";
+ case TOKEN_BITWISE_AND: return "BITWISE_AND";
+ case TOKEN_PLUS: return "PLUS";
+ case TOKEN_MINUS: return "MINUS";
+ case TOKEN_STAR: return "STAR";
+ case TOKEN_RSLASH: return "RSLASH";
+ case TOKEN_PERCENT: return "PERCENT";
+ case TOKEN_TEST_NOT: return "TEST_NOT";
+ case TOKEN_STRING: return "STRING";
+ case TOKEN_CHARCONST: return "CHARCONST";
+ case TOKEN_IDENTIFIER: return "IDENTIFIER";
+ case TOKEN_CONSTANT: return "CONSTANT";
+ case TOKEN_LITERAL: return "LITERAL";
+ case TOKEN_FIELD: return "FIELD";
+ case TOKEN_LBRACKET: return "LBRACKET";
+ case TOKEN_RBRACKET: return "RBRACKET";
+ case TOKEN_COMMA: return "COMMA";
+ case TOKEN_RANGE_NODE: return "RANGE_NODE";
+ case TOKEN_TEST_IN: return "TEST_IN";
+ case TOKEN_LBRACE: return "LBRACE";
+ case TOKEN_RBRACE: return "RBRACE";
+ case TOKEN_DOTDOT: return "DOTDOT";
+ case TOKEN_LPAREN: return "LPAREN";
+ case TOKEN_RPAREN: return "RPAREN";
+ case TOKEN_DOLLAR: return "DOLLAR";
+ case TOKEN_ATSIGN: return "ATSIGN";
+ case TOKEN_HASH: return "HASH";
+ }
+ return "<unknown>";
+}
+
+void
+add_deprecated_token(dfsyntax_t *dfs, const char *token)
+{
+ if (dfs->deprecated == NULL)
+ dfs->deprecated = g_ptr_array_new_full(0, g_free);
+
+ GPtrArray *deprecated = dfs->deprecated;
+
+ for (unsigned i = 0; i < deprecated->len; i++) {
+ const char *str = g_ptr_array_index(deprecated, i);
+ if (g_ascii_strcasecmp(token, str) == 0) {
+ /* It's already in our list */
+ return;
+ }
+ }
+ g_ptr_array_add(deprecated, g_strdup(token));
+}
+
+void
+add_compile_warning(dfwork_t *dfw, const char *format, ...)
+{
+ va_list ap;
+ va_start(ap, format);
+ char *msg = ws_strdup_vprintf(format, ap);
+ va_end(ap);
+ dfw->warnings = g_slist_prepend(dfw->warnings, msg);
+}
+
+char *
+dfilter_expand(const char *expr, df_error_t **err_ret)
+{
+ return dfilter_macro_apply(expr, err_ret);
+}
+
+static bool
+dfwork_parse(const char *expanded_text, dfsyntax_t *dfs)
+{
+ yyscan_t scanner;
+ YY_BUFFER_STATE in_buffer;
+ unsigned token_count = 0;
+ int token;
+
+ if (df_yylex_init(&scanner) != 0) {
+ dfs->error = df_error_new_printf(DF_ERROR_GENERIC, NULL, "Can't initialize scanner: %s", g_strerror(errno));
+ return false;
+ }
+
+ in_buffer = df_yy_scan_string(expanded_text, scanner);
+ df_yyset_extra(dfs, scanner);
+
+#ifdef NDEBUG
+ if (dfs->flags & DF_DEBUG_FLEX || dfs->flags & DF_DEBUG_LEMON) {
+ ws_message("Compile Wireshark without NDEBUG to enable Flex and/or Lemon debug traces");
+ }
+#else
+ /* Enable/disable debugging for Flex. */
+ df_yyset_debug(dfs->flags & DF_DEBUG_FLEX, scanner);
+
+ /* Enable/disable debugging for Lemon. */
+ DfilterTrace(dfs->flags & DF_DEBUG_LEMON ? stderr : NULL, "lemon> ");
+#endif
+
+ while (1) {
+ token = df_yylex(scanner);
+
+ /* Check for scanner failure */
+ if (token == SCAN_FAILED) {
+ ws_noisy("Scanning failed");
+ ws_assert(dfs->error != NULL);
+ break;
+ }
+
+ /* Check for end-of-input */
+ if (token == 0) {
+ ws_noisy("Scanning finished");
+ break;
+ }
+
+ ws_noisy("(%u) Token %d %s %s",
+ ++token_count, token, tokenstr(token),
+ stnode_token(dfs->lval));
+
+ /* Give the token to the parser */
+ Dfilter(ParserObj, token, dfs->lval, dfs);
+ /* The parser has freed the lval for us. */
+ dfs->lval = NULL;
+
+ if (dfs->error) {
+ break;
+ }
+
+ } /* while (1) */
+
+ /* Tell the parser that we have reached the end of input; that
+ * way, it'll reset its state for the next compile. (We want
+ * to do that even if we got a syntax error, to make sure the
+ * parser state is cleaned up; we don't create a new parser
+ * object when we start a new parse, and don't destroy it when
+ * the parse finishes.) */
+ Dfilter(ParserObj, 0, NULL, dfs);
+
+ /* Free scanner state */
+ df_yy_delete_buffer(in_buffer, scanner);
+ df_yylex_destroy(scanner);
+
+ return dfs->error == NULL;
+}
+
+static dfilter_t *
+dfwork_build(dfwork_t *dfw)
+{
+ dfilter_t *dfilter;
+ char *tree_str;
+
+ log_syntax_tree(LOG_LEVEL_NOISY, dfw->st_root, "Syntax tree before semantic check", NULL);
+
+ /* Check semantics and do necessary type conversion*/
+ if (!dfw_semcheck(dfw))
+ return NULL;
+
+ /* Cache tree representation in tree_str. */
+ tree_str = NULL;
+ log_syntax_tree(LOG_LEVEL_NOISY, dfw->st_root, "Syntax tree after successful semantic check", &tree_str);
+
+ if ((dfw->flags & DF_SAVE_TREE) && tree_str == NULL) {
+ tree_str = dump_syntax_tree_str(dfw->st_root);
+ }
+
+ /* Create bytecode */
+ dfw_gencode(dfw);
+
+ /* Tuck away the bytecode in the dfilter_t */
+ dfilter = dfilter_new(dfw->deprecated);
+ dfilter->insns = dfw->insns;
+ dfw->insns = NULL;
+ dfilter->interesting_fields = dfw_interesting_fields(dfw,
+ &dfilter->num_interesting_fields);
+ dfilter->expanded_text = dfw->expanded_text;
+ dfw->expanded_text = NULL;
+ dfilter->references = dfw->references;
+ dfw->references = NULL;
+ dfilter->raw_references = dfw->raw_references;
+ dfw->raw_references = NULL;
+ dfilter->warnings = dfw->warnings;
+ dfw->warnings = NULL;
+
+ if (dfw->flags & DF_SAVE_TREE) {
+ ws_assert(tree_str);
+ dfilter->syntax_tree_str = tree_str;
+ tree_str = NULL;
+ }
+ else {
+ dfilter->syntax_tree_str = NULL;
+ g_free(tree_str);
+ tree_str = NULL;
+ }
+
+ /* Initialize run-time space */
+ dfilter->num_registers = dfw->next_register;
+ dfilter->registers = g_new0(df_cell_t, dfilter->num_registers);
+
+ return dfilter;
+}
+
+static dfilter_t *
+compile_filter(const char *expanded_text, unsigned flags, df_error_t **err_ptr)
+{
+ dfsyntax_t *dfs = NULL;
+ dfwork_t *dfw = NULL;
+ dfilter_t *dfcode = NULL;
+ df_error_t *error = NULL;
+ bool ok;
+
+ dfs = dfsyntax_new(flags);
+
+ ok = dfwork_parse(expanded_text, dfs);
+ if (!ok) {
+ error = dfs->error;
+ dfs->error = NULL;
+ goto FAILURE;
+ }
+ else if (dfs->st_root == NULL) {
+ /* Is it an empty filter? If so set the dfcode to NULL and return success.
+ * This can happen if the user clears the display filter toolbar in the UI.
+ * In that case the compilation succeeds and the NULL dfcode clears the filter
+ * (show all frames). */
+ dfsyntax_free(dfs);
+ *err_ptr = NULL;
+ return NULL;
+ }
+
+ dfw = dfwork_new(expanded_text, dfs->flags);
+ dfw->st_root = dfs->st_root;
+ dfs->st_root = NULL;
+ if (dfs->deprecated)
+ dfw->deprecated = g_ptr_array_ref(dfs->deprecated);
+ dfsyntax_free(dfs);
+ dfs = NULL;
+
+ dfcode = dfwork_build(dfw);
+ if (dfcode == NULL) {
+ error = dfw->error;
+ dfw->error = NULL;
+ goto FAILURE;
+ }
+
+ /* SUCCESS */
+ dfwork_free(dfw);
+ return dfcode;
+
+FAILURE:
+ if (error == NULL || error->msg == NULL) {
+ /* We require an error message. */
+ ws_critical("Unknown error compiling filter: %s", expanded_text);
+ error = df_error_new_msg("Unknown error compiling filter");
+ }
+
+ ws_assert(err_ptr && error);
+ *err_ptr = error;
+
+ if (dfs)
+ dfsyntax_free(dfs);
+ if (dfw)
+ dfwork_free(dfw);
+ return NULL;
+}
+
+static inline bool
+compile_failure(df_error_t *error, df_error_t **err_ptr)
+{
+ ws_assert(error);
+ ws_debug("Error compiling filter: (%d) %s", error->code, error->msg);
+
+ if (err_ptr)
+ *err_ptr = error;
+ else
+ df_error_free(&error);
+
+ return false;
+}
+
+bool
+dfilter_compile_full(const char *text, dfilter_t **dfp,
+ df_error_t **err_ptr, unsigned flags,
+ const char *caller)
+{
+ char *expanded_text;
+ dfilter_t *dfcode;
+ df_error_t *error = NULL;
+
+ ws_assert(dfp);
+ *dfp = NULL;
+ if (caller == NULL)
+ caller = "(unknown)";
+
+ if (text == NULL) {
+ /* This is a bug. */
+ ws_warning("Called from %s() with invalid NULL expression", caller);
+ if (err_ptr) {
+ *err_ptr = df_error_new_msg("BUG: NULL text argument is invalid");
+ }
+ return false;
+ }
+
+ ws_debug("Called from %s() with filter: %s", caller, text);
+
+ if (flags & DF_EXPAND_MACROS) {
+ expanded_text = dfilter_macro_apply(text, &error);
+ if (expanded_text == NULL) {
+ return compile_failure(error, err_ptr);
+ }
+ ws_noisy("Expanded text: %s", expanded_text);
+ }
+ else {
+ expanded_text = g_strdup(text);
+ ws_noisy("Verbatim text: %s", expanded_text);
+ }
+
+ dfcode = compile_filter(expanded_text, flags, &error);
+ g_free(expanded_text);
+ expanded_text = NULL;
+
+ if(error != NULL) {
+ return compile_failure(error, err_ptr);
+ }
+
+ *dfp = dfcode;
+ ws_log(WS_LOG_DOMAIN, LOG_LEVEL_INFO, "Compiled display filter: %s", text);
+ return true;
+}
+
+
+bool
+dfilter_apply(dfilter_t *df, proto_tree *tree)
+{
+ return dfvm_apply(df, tree);
+}
+
+bool
+dfilter_apply_edt(dfilter_t *df, epan_dissect_t* edt)
+{
+ return dfvm_apply(df, edt->tree);
+}
+
+
+void
+dfilter_prime_proto_tree(const dfilter_t *df, proto_tree *tree)
+{
+ int i;
+
+ for (i = 0; i < df->num_interesting_fields; i++) {
+ proto_tree_prime_with_hfid(tree, df->interesting_fields[i]);
+ }
+}
+
+bool
+dfilter_has_interesting_fields(const dfilter_t *df)
+{
+ return (df->num_interesting_fields > 0);
+}
+
+bool
+dfilter_interested_in_field(const dfilter_t *df, int hfid)
+{
+ int i;
+
+ for (i = 0; i < df->num_interesting_fields; i++) {
+ if (df->interesting_fields[i] == hfid) {
+ return true;
+ }
+ }
+ return false;
+}
+
+bool
+dfilter_interested_in_proto(const dfilter_t *df, int proto_id)
+{
+ int i;
+
+ for (i = 0; i < df->num_interesting_fields; i++) {
+ int df_hfid = df->interesting_fields[i];
+ if (proto_registrar_is_protocol(df_hfid)) {
+ /* XXX: Should we go up to the parent of a pino?
+ * We can tell if df_hfid is a PINO, but there's
+ * no function to return the parent proto ID yet.
+ */
+ if (df_hfid == proto_id) {
+ return true;
+ }
+ } else {
+ if (proto_registrar_get_parent(df_hfid) == proto_id) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+bool
+dfilter_requires_columns(const dfilter_t *df)
+{
+ if (df == NULL) {
+ return false;
+ }
+
+ /* XXX: Could cache this like packet_cache_proto_handles */
+ static int proto_cols = -1;
+ if (proto_cols == -1) {
+ proto_cols = proto_get_id_by_filter_name("_ws.col");
+ }
+ ws_assert(proto_cols != -1);
+
+ return dfilter_interested_in_proto(df, proto_cols);
+}
+
+GPtrArray *
+dfilter_deprecated_tokens(dfilter_t *df) {
+ if (df->deprecated && df->deprecated->len > 0) {
+ return df->deprecated;
+ }
+ return NULL;
+}
+
+GSList *
+dfilter_get_warnings(dfilter_t *df)
+{
+ return df->warnings;
+}
+
+void
+dfilter_dump(FILE *fp, dfilter_t *df, uint16_t flags)
+{
+ dfvm_dump(fp, df, flags);
+}
+
+const char *
+dfilter_text(dfilter_t *df)
+{
+ return df->expanded_text;
+}
+
+const char *
+dfilter_syntax_tree(dfilter_t *df)
+{
+ return df->syntax_tree_str;
+}
+
+void
+dfilter_log_full(const char *domain, enum ws_log_level level,
+ const char *file, long line, const char *func,
+ dfilter_t *df, const char *msg)
+{
+ if (!ws_log_msg_is_active(domain, level))
+ return;
+
+ if (df == NULL) {
+ ws_log_write_always_full(domain, level, file, line, func,
+ "%s: NULL display filter", msg ? msg : "?");
+ return;
+ }
+
+ char *str = dfvm_dump_str(NULL, df, true);
+ if (G_UNLIKELY(msg == NULL))
+ ws_log_write_always_full(domain, level, file, line, func, "\nFilter:\n %s\n\n%s", dfilter_text(df), str);
+ else
+ ws_log_write_always_full(domain, level, file, line, func, "%s:\nFilter:\n %s\n\n%s", msg, dfilter_text(df), str);
+ g_free(str);
+}
+
+static int
+compare_ref_layer(gconstpointer _a, gconstpointer _b)
+{
+ const df_reference_t *a = *(const df_reference_t **)_a;
+ const df_reference_t *b = *(const df_reference_t **)_b;
+ return a->proto_layer_num - b->proto_layer_num;
+}
+
+static void
+load_references(GHashTable *table, proto_tree *tree, bool raw)
+{
+ GHashTableIter iter;
+ GPtrArray *finfos;
+ field_info *finfo;
+ header_field_info *hfinfo;
+ GPtrArray *refs;
+
+ if (g_hash_table_size(table) == 0) {
+ /* Nothing to do. */
+ return;
+ }
+
+ g_hash_table_iter_init(&iter, table);
+ while (g_hash_table_iter_next(&iter, (void **)&hfinfo, (void **)&refs)) {
+ /* If we have a previous array free the data */
+ g_ptr_array_set_size(refs, 0);
+
+ while (hfinfo) {
+ finfos = proto_find_finfo(tree, hfinfo->id);
+ if (finfos == NULL) {
+ hfinfo = hfinfo->same_name_next;
+ continue;
+ }
+ for (unsigned i = 0; i < finfos->len; i++) {
+ finfo = g_ptr_array_index(finfos, i);
+ g_ptr_array_add(refs, reference_new(finfo, raw));
+ }
+ g_ptr_array_free(finfos, true);
+ hfinfo = hfinfo->same_name_next;
+ }
+
+ g_ptr_array_sort(refs, compare_ref_layer);
+ }
+}
+
+void
+dfilter_load_field_references(const dfilter_t *df, proto_tree *tree)
+{
+ load_references(df->references, tree, false);
+ load_references(df->raw_references, tree, true);
+}
+
+void
+dfilter_load_field_references_edt(const dfilter_t *df, epan_dissect_t *edt)
+{
+ dfilter_load_field_references(df, edt->tree);
+}
+
+df_reference_t *
+reference_new(const field_info *finfo, bool raw)
+{
+ df_reference_t *ref = g_new(df_reference_t, 1);
+ ref->hfinfo = finfo->hfinfo;
+ if (raw) {
+ ref->value = dfvm_get_raw_fvalue(finfo);
+ }
+ else {
+ ref->value = fvalue_dup(finfo->value);
+ }
+ ref->proto_layer_num = finfo->proto_layer_num;
+ return ref;
+}
+
+void
+reference_free(df_reference_t *ref)
+{
+ fvalue_free(ref->value);
+ g_free(ref);
+}
+
+df_error_t *
+df_error_new(int code, char *msg, df_loc_t *loc)
+{
+ df_error_t *err = g_new(df_error_t, 1);
+ err->code = code;
+ err->msg = msg;
+ if (loc) {
+ err->loc.col_start = loc->col_start;
+ err->loc.col_len = loc->col_len;
+ }
+ else {
+ err->loc.col_start = -1;
+ err->loc.col_len = 0;
+ }
+ return err;
+}
+
+df_error_t *
+df_error_new_vprintf(int code, df_loc_t *loc, const char *fmt, va_list ap)
+{
+ df_error_t *err = g_new(df_error_t, 1);
+ err->code = code;
+ err->msg = ws_strdup_vprintf(fmt, ap);
+ if (loc) {
+ err->loc.col_start = loc->col_start;
+ err->loc.col_len = loc->col_len;
+ }
+ else {
+ err->loc.col_start = -1;
+ err->loc.col_len = 0;
+ }
+ return err;
+}
+
+df_error_t *
+df_error_new_printf(int code, df_loc_t *loc, const char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ df_error_t *err = df_error_new_vprintf(code, loc, fmt, ap);
+ va_end(ap);
+ return err;
+}
+
+void
+df_error_free(df_error_t **ep)
+{
+ if (*ep == NULL)
+ return;
+ g_free((*ep)->msg);
+ g_free(*ep);
+ *ep = NULL;
+}
+
+void
+df_cell_append(df_cell_t *rp, fvalue_t *fv)
+{
+ /* Assert cell has been initialized. */
+ ws_assert(rp->array != NULL);
+ g_ptr_array_add(rp->array, fv);
+}
+
+GPtrArray *
+df_cell_ref(df_cell_t *rp)
+{
+ if (rp->array == NULL)
+ return NULL;
+ return g_ptr_array_ref(rp->array);
+}
+
+size_t
+df_cell_size(const df_cell_t *rp)
+{
+ if (rp->array == NULL)
+ return 0;
+ return rp->array->len;
+}
+
+fvalue_t **
+df_cell_array(const df_cell_t *rp)
+{
+ if (rp->array == NULL)
+ return NULL;
+ return (fvalue_t **)rp->array->pdata;
+}
+
+bool
+df_cell_is_empty(const df_cell_t *rp)
+{
+ if (rp->array == NULL)
+ return true;
+ return rp->array->len == 0;
+}
+
+bool
+df_cell_is_null(const df_cell_t *rp)
+{
+ return rp->array == NULL;
+}
+
+void
+df_cell_init(df_cell_t *rp, bool free_seg)
+{
+ df_cell_clear(rp);
+ if (free_seg)
+ rp->array = g_ptr_array_new_with_free_func((GDestroyNotify)fvalue_free);
+ else
+ rp->array = g_ptr_array_new();
+}
+
+void
+df_cell_clear(df_cell_t *rp)
+{
+ if (rp->array)
+ g_ptr_array_unref(rp->array);
+ rp->array = NULL;
+}
+
+void
+df_cell_iter_init(df_cell_t *rp, df_cell_iter_t *iter)
+{
+ iter->ptr = rp->array;
+ iter->idx = 0;
+}
+
+fvalue_t *
+df_cell_iter_next(df_cell_iter_t *iter)
+{
+ if (iter->idx < iter->ptr->len) {
+ return iter->ptr->pdata[iter->idx++];
+ }
+ return NULL;
+}
+
+/*
+ * Editor modelines - https://www.wireshark.org/tools/modelines.html
+ *
+ * Local variables:
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ *
+ * vi: set shiftwidth=8 tabstop=8 noexpandtab:
+ * :indentSize=8:tabSize=8:noTabs=false:
+ */
diff --git a/epan/dfilter/dfilter.h b/epan/dfilter/dfilter.h
new file mode 100644
index 0000000..640e66e
--- /dev/null
+++ b/epan/dfilter/dfilter.h
@@ -0,0 +1,209 @@
+/** @file
+ *
+ * Wireshark - Network traffic analyzer
+ * By Gerald Combs <gerald@wireshark.org>
+ * Copyright 2001 Gerald Combs
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef DFILTER_H
+#define DFILTER_H
+
+#include <wireshark.h>
+
+#include "dfilter-loc.h"
+#include <epan/proto.h>
+
+/* Passed back to user */
+typedef struct epan_dfilter dfilter_t;
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+struct epan_dissect;
+
+#define DF_ERROR_GENERIC -1
+#define DF_ERROR_UNEXPECTED_END -2
+
+typedef struct {
+ int code;
+ char *msg;
+ df_loc_t loc;
+} df_error_t;
+
+df_error_t *
+df_error_new(int code, char *msg, df_loc_t *loc);
+
+df_error_t *
+df_error_new_printf(int code, df_loc_t *loc, const char *fmt, ...)
+G_GNUC_PRINTF(3, 4);
+
+#define df_error_new_msg(msg) \
+ df_error_new_printf(DF_ERROR_GENERIC, NULL, "%s", msg)
+
+df_error_t *
+df_error_new_vprintf(int code, df_loc_t *loc, const char *fmt, va_list ap);
+
+WS_DLL_PUBLIC
+void
+df_error_free(df_error_t **ep);
+
+/* Module-level initialization */
+void
+dfilter_init(void);
+
+/* Module-level cleanup */
+void
+dfilter_cleanup(void);
+
+/* Perform macro expansion. */
+WS_DLL_PUBLIC
+char *
+dfilter_expand(const char *expr, df_error_t **err_ret);
+
+/* Save textual representation of syntax tree (for debugging purposes). */
+#define DF_SAVE_TREE (1U << 0)
+/* Perform macro substitution on filter text. */
+#define DF_EXPAND_MACROS (1U << 1)
+/* Do an optimization pass on the compiled filter. */
+#define DF_OPTIMIZE (1U << 2)
+/* Enable debug trace for flex. */
+#define DF_DEBUG_FLEX (1U << 3)
+/* Enable debug trace for lemon. */
+#define DF_DEBUG_LEMON (1U << 4)
+
+/* Compiles a string to a dfilter_t.
+ * On success, sets the dfilter* pointed to by dfp
+ * to either a NULL pointer (if the filter is a null
+ * filter, as generated by an all-blank string) or to
+ * a pointer to the newly-allocated dfilter_t
+ * structure.
+ *
+ * On failure, *err_msg is set to point to the error
+ * message. This error message is allocated with
+ * g_malloc(), and must be freed with g_free().
+ * The dfilter* will be set to NULL after a failure.
+ *
+ * Returns true on success, false on failure.
+ */
+WS_DLL_PUBLIC
+bool
+dfilter_compile_full(const char *text, dfilter_t **dfp,
+ df_error_t **errpp, unsigned flags,
+ const char *caller);
+
+#define dfilter_compile(text, dfp, errp) \
+ dfilter_compile_full(text, dfp, errp, \
+ DF_EXPAND_MACROS|DF_OPTIMIZE, \
+ __func__)
+
+/* Frees all memory used by dfilter, and frees
+ * the dfilter itself. */
+WS_DLL_PUBLIC
+void
+dfilter_free(dfilter_t *df);
+
+/* Apply compiled dfilter */
+WS_DLL_PUBLIC
+bool
+dfilter_apply_edt(dfilter_t *df, struct epan_dissect *edt);
+
+/* Apply compiled dfilter */
+bool
+dfilter_apply(dfilter_t *df, proto_tree *tree);
+
+/* Prime a proto_tree using the fields/protocols used in a dfilter. */
+void
+dfilter_prime_proto_tree(const dfilter_t *df, proto_tree *tree);
+
+/* Refresh references in a compiled display filter. */
+WS_DLL_PUBLIC
+void
+dfilter_load_field_references(const dfilter_t *df, proto_tree *tree);
+
+/* Refresh references in a compiled display filter. */
+WS_DLL_PUBLIC
+void
+dfilter_load_field_references_edt(const dfilter_t *df, struct epan_dissect *edt);
+
+/* Check if dfilter has interesting fields */
+bool
+dfilter_has_interesting_fields(const dfilter_t *df);
+
+/* Check if dfilter is interested in a given field
+ *
+ * @param df The dfilter
+ * @param hfid The header field info ID to check
+ * @return true if the field is interesting to the dfilter
+ */
+bool
+dfilter_interested_in_field(const dfilter_t *df, int hfid);
+
+/* Check if dfilter is interested in a given protocol
+ *
+ * @param df The dfilter
+ * @param proto_id The protocol ID to check
+ * @return true if the dfilter is interested in a field whose
+ * parent is proto_id
+ */
+bool
+dfilter_interested_in_proto(const dfilter_t *df, int proto_id);
+
+WS_DLL_PUBLIC
+bool
+dfilter_requires_columns(const dfilter_t *df);
+
+WS_DLL_PUBLIC
+GPtrArray *
+dfilter_deprecated_tokens(dfilter_t *df);
+
+WS_DLL_PUBLIC
+GSList *
+dfilter_get_warnings(dfilter_t *df);
+
+#define DF_DUMP_REFERENCES (1U << 0)
+#define DF_DUMP_SHOW_FTYPE (1U << 1)
+
+/* Print bytecode of dfilter to fp */
+WS_DLL_PUBLIC
+void
+dfilter_dump(FILE *fp, dfilter_t *df, uint16_t flags);
+
+/* Text after macro expansion. */
+WS_DLL_PUBLIC
+const char *
+dfilter_text(dfilter_t *df);
+
+/* Text representation of syntax tree (if it was saved, NULL oterwise). */
+WS_DLL_PUBLIC
+const char *
+dfilter_syntax_tree(dfilter_t *df);
+
+/* Print bytecode of dfilter to log */
+WS_DLL_PUBLIC
+void
+dfilter_log_full(const char *domain, enum ws_log_level level,
+ const char *file, long line, const char *func,
+ dfilter_t *dfcode, const char *msg);
+
+#ifdef WS_DEBUG
+#define dfilter_log(dfcode, msg) \
+ dfilter_log_full(LOG_DOMAIN_DFILTER, LOG_LEVEL_NOISY, \
+ __FILE__, __LINE__, __func__, \
+ dfcode, msg)
+#else
+#define dfilter_log(dfcode, msg) (void)0
+#endif
+
+#define DFILTER_DEBUG_HERE(dfcode) \
+ dfilter_log_full(LOG_DOMAIN_DFILTER, LOG_LEVEL_ECHO, \
+ __FILE__, __LINE__, __func__, \
+ dfcode, #dfcode);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* DFILTER_H */
diff --git a/epan/dfilter/dfunctions.c b/epan/dfilter/dfunctions.c
new file mode 100644
index 0000000..be36424
--- /dev/null
+++ b/epan/dfilter/dfunctions.c
@@ -0,0 +1,520 @@
+/*
+ * Wireshark - Network traffic analyzer
+ *
+ * Copyright 2006 Gilbert Ramirez <gram@alumni.rice.edu>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "config.h"
+#define WS_LOG_DOMAIN LOG_DOMAIN_DFILTER
+
+#include <glib.h>
+
+#include "dfilter-int.h"
+#include "dfunctions.h"
+#include "sttype-field.h"
+#include "semcheck.h"
+
+#include <string.h>
+
+#include <ftypes/ftypes.h>
+#include <epan/exceptions.h>
+#include <wsutil/ws_assert.h>
+
+#define FAIL(dfw, node, ...) \
+ do { \
+ ws_noisy("Semantic check failed here."); \
+ dfilter_fail_throw(dfw, DF_ERROR_GENERIC, stnode_location(node), __VA_ARGS__); \
+ } while (0)
+
+/* Convert an FT_STRING using a callback function */
+static bool
+string_walk(GSList *stack, uint32_t arg_count _U_, df_cell_t *retval, char(*conv_func)(char))
+{
+ GPtrArray *arg1;
+ fvalue_t *arg_fvalue;
+ fvalue_t *new_ft_string;
+ const wmem_strbuf_t *src;
+ wmem_strbuf_t *dst;
+
+ ws_assert(arg_count == 1);
+ arg1 = stack->data;
+ if (arg1 == NULL)
+ return false;
+
+ for (unsigned i = 0; i < arg1->len; i++) {
+ arg_fvalue = arg1->pdata[i];
+ /* XXX - it would be nice to handle FT_TVBUFF, too */
+ if (FT_IS_STRING(fvalue_type_ftenum(arg_fvalue))) {
+ src = fvalue_get_strbuf(arg_fvalue);
+ dst = wmem_strbuf_new_sized(NULL, src->len);
+ for (size_t j = 0; j < src->len; j++) {
+ wmem_strbuf_append_c(dst, conv_func(src->str[j]));
+ }
+ new_ft_string = fvalue_new(FT_STRING);
+ fvalue_set_strbuf(new_ft_string, dst);
+ df_cell_append(retval, new_ft_string);
+ }
+ }
+
+ return true;
+}
+
+/* dfilter function: lower() */
+static bool
+df_func_lower(GSList *stack, uint32_t arg_count, df_cell_t *retval)
+{
+ return string_walk(stack, arg_count, retval, g_ascii_tolower);
+}
+
+/* dfilter function: upper() */
+static bool
+df_func_upper(GSList *stack, uint32_t arg_count, df_cell_t *retval)
+{
+ return string_walk(stack, arg_count, retval, g_ascii_toupper);
+}
+
+/* dfilter function: count() */
+static bool
+df_func_count(GSList *stack, uint32_t arg_count _U_, df_cell_t *retval)
+{
+ GPtrArray *arg1;
+ fvalue_t *ft_ret;
+ uint32_t num_items;
+
+ ws_assert(arg_count == 1);
+ arg1 = stack->data;
+ if (arg1 == NULL)
+ return false;
+
+ num_items = arg1->len;
+ ft_ret = fvalue_new(FT_UINT32);
+ fvalue_set_uinteger(ft_ret, num_items);
+ df_cell_append(retval, ft_ret);
+
+ return true;
+}
+
+/* dfilter function: string() */
+static bool
+df_func_string(GSList *stack, uint32_t arg_count _U_, df_cell_t *retval)
+{
+ GPtrArray *arg1;
+ fvalue_t *arg_fvalue;
+ fvalue_t *new_ft_string;
+ char *s;
+
+ ws_assert(arg_count == 1);
+ arg1 = stack->data;
+ if (arg1 == NULL)
+ return false;
+
+ for (unsigned i = 0; i < arg1->len; i++) {
+ arg_fvalue = arg1->pdata[i];
+ switch (fvalue_type_ftenum(arg_fvalue))
+ {
+ case FT_UINT8:
+ case FT_UINT16:
+ case FT_UINT24:
+ case FT_UINT32:
+ case FT_UINT40:
+ case FT_UINT48:
+ case FT_UINT56:
+ case FT_UINT64:
+ case FT_INT8:
+ case FT_INT16:
+ case FT_INT32:
+ case FT_INT40:
+ case FT_INT48:
+ case FT_INT56:
+ case FT_INT64:
+ case FT_IPv4:
+ case FT_IPv6:
+ case FT_FLOAT:
+ case FT_DOUBLE:
+ case FT_ETHER:
+ case FT_FRAMENUM:
+ case FT_AX25:
+ case FT_IPXNET:
+ case FT_GUID:
+ case FT_OID:
+ case FT_EUI64:
+ case FT_VINES:
+ case FT_REL_OID:
+ case FT_SYSTEM_ID:
+ case FT_FCWWN:
+ case FT_IEEE_11073_SFLOAT:
+ case FT_IEEE_11073_FLOAT:
+ s = fvalue_to_string_repr(NULL, arg_fvalue, FTREPR_DFILTER, BASE_NONE);
+ /* Ensure we have an allocated string here */
+ if (!s)
+ s = wmem_strdup(NULL, "");
+ break;
+ default:
+ return true;
+ }
+
+ new_ft_string = fvalue_new(FT_STRING);
+ fvalue_set_string(new_ft_string, s);
+ wmem_free(NULL, s);
+ df_cell_append(retval, new_ft_string);
+ }
+
+ return true;
+}
+
+static bool
+df_func_compare(GSList *stack, uint32_t arg_count, df_cell_t *retval,
+ bool (*fv_cmp)(const fvalue_t *a, const fvalue_t *b))
+{
+ fvalue_t *fv_ret = NULL;
+ GSList *args;
+ GPtrArray *arg1;
+ fvalue_t *arg_fvalue;
+ uint32_t i;
+
+ for (args = stack, i = 0; i < arg_count; args = args->next, i++) {
+ arg1 = args->data;
+ for (unsigned j = 0; j < arg1->len; j++) {
+ arg_fvalue = arg1->pdata[j];
+ if (fv_ret == NULL || fv_cmp(arg_fvalue, fv_ret)) {
+ fv_ret = arg_fvalue;
+ }
+ }
+ }
+
+ if (fv_ret == NULL)
+ return false;
+
+ df_cell_append(retval, fvalue_dup(fv_ret));
+
+ return true;
+}
+
+/* Find maximum value. */
+static bool
+df_func_max(GSList *stack, uint32_t arg_count, df_cell_t *retval)
+{
+ return df_func_compare(stack, arg_count, retval, fvalue_gt);
+}
+
+/* Find minimum value. */
+static bool
+df_func_min(GSList *stack, uint32_t arg_count, df_cell_t *retval)
+{
+ return df_func_compare(stack, arg_count, retval, fvalue_lt);
+}
+
+static bool
+df_func_abs(GSList *stack, uint32_t arg_count _U_, df_cell_t *retval)
+{
+ GPtrArray *arg1;
+ fvalue_t *fv_arg, *new_fv;
+ char *err_msg = NULL;
+
+ ws_assert(arg_count == 1);
+ arg1 = stack->data;
+ if (arg1 == NULL)
+ return false;
+
+ for (unsigned i = 0; i < arg1->len; i++) {
+ fv_arg = arg1->pdata[i];
+ if (fvalue_is_negative(fv_arg)) {
+ new_fv = fvalue_unary_minus(fv_arg, &err_msg);
+ if (new_fv == NULL) {
+ ws_debug("abs: %s", err_msg);
+ g_free(err_msg);
+ err_msg = NULL;
+ }
+ }
+ else {
+ new_fv = fvalue_dup(fv_arg);
+ }
+ df_cell_append(retval, new_fv);
+ }
+
+ return !df_cell_is_empty(retval);
+}
+
+/* For upper() and lower() checks that the parameter passed to
+ * it is an FT_STRING */
+static ftenum_t
+ul_semcheck_is_field_string(dfwork_t *dfw, const char *func_name, ftenum_t lhs_ftype _U_,
+ GSList *param_list, df_loc_t func_loc _U_)
+{
+ header_field_info *hfinfo;
+
+ ws_assert(g_slist_length(param_list) == 1);
+ stnode_t *st_node = param_list->data;
+
+ if (stnode_type_id(st_node) == STTYPE_FIELD) {
+ dfw->field_count++;
+ hfinfo = sttype_field_hfinfo(st_node);
+ if (FT_IS_STRING(hfinfo->type)) {
+ return FT_STRING;
+ }
+ }
+ FAIL(dfw, st_node, "Only string type fields can be used as parameter for %s()", func_name);
+}
+
+static ftenum_t
+ul_semcheck_is_field(dfwork_t *dfw, const char *func_name, ftenum_t lhs_ftype _U_,
+ GSList *param_list, df_loc_t func_loc _U_)
+{
+ ws_assert(g_slist_length(param_list) == 1);
+ stnode_t *st_node = param_list->data;
+
+ if (stnode_type_id(st_node) == STTYPE_FIELD) {
+ dfw->field_count++;
+ return FT_UINT32;
+ }
+
+ FAIL(dfw, st_node, "Only fields can be used as parameter for %s()", func_name);
+}
+
+static ftenum_t
+ul_semcheck_can_length(dfwork_t *dfw, const char *func_name, ftenum_t lhs_ftype,
+ GSList *param_list, df_loc_t func_loc)
+{
+ ws_assert(g_slist_length(param_list) == 1);
+ stnode_t *st_node = param_list->data;
+
+ ul_semcheck_is_field(dfw, func_name, lhs_ftype, param_list, func_loc);
+ if (!ftype_can_length(sttype_field_ftenum(st_node))) {
+ FAIL(dfw, st_node, "Field %s does not support the %s() function", stnode_todisplay(st_node), func_name);
+ }
+ return FT_UINT32;
+}
+
+static ftenum_t
+ul_semcheck_string_param(dfwork_t *dfw, const char *func_name, ftenum_t lhs_ftype _U_,
+ GSList *param_list, df_loc_t func_loc _U_)
+{
+ header_field_info *hfinfo;
+
+ ws_assert(g_slist_length(param_list) == 1);
+ stnode_t *st_node = param_list->data;
+
+ if (stnode_type_id(st_node) == STTYPE_FIELD) {
+ dfw->field_count++;
+ hfinfo = sttype_field_hfinfo(st_node);
+ switch (hfinfo->type) {
+ case FT_UINT8:
+ case FT_UINT16:
+ case FT_UINT24:
+ case FT_UINT32:
+ case FT_UINT40:
+ case FT_UINT48:
+ case FT_UINT56:
+ case FT_UINT64:
+ case FT_INT8:
+ case FT_INT16:
+ case FT_INT32:
+ case FT_INT40:
+ case FT_INT48:
+ case FT_INT56:
+ case FT_INT64:
+ case FT_IPv4:
+ case FT_IPv6:
+ case FT_FLOAT:
+ case FT_DOUBLE:
+ case FT_ETHER:
+ case FT_FRAMENUM:
+ case FT_AX25:
+ case FT_IPXNET:
+ case FT_GUID:
+ case FT_OID:
+ case FT_EUI64:
+ case FT_VINES:
+ case FT_REL_OID:
+ case FT_SYSTEM_ID:
+ case FT_FCWWN:
+ case FT_IEEE_11073_SFLOAT:
+ case FT_IEEE_11073_FLOAT:
+ return FT_STRING;
+ default:
+ break;
+ }
+ FAIL(dfw, st_node, "String conversion for field \"%s\" is not supported", hfinfo->abbrev);
+ }
+ FAIL(dfw, st_node, "Only fields can be used as parameter for %s()", func_name);
+}
+
+/* Check arguments are all the same type and they can be compared. */
+/*
+ Every STTYPE_LITERAL needs to be resolved to a STTYPE_FVALUE. If we don't
+ have type information (lhs_ftype is FT_NONE) and we have not seen an argument
+ with a definite type we defer resolving literals to values until we have examined
+ the entire list of function arguments. If we still cannot resolve to a definite
+ type after that (all arguments must have the same type) then we give up and
+ return FT_NONE.
+*/
+static ftenum_t
+ul_semcheck_compare(dfwork_t *dfw, const char *func_name, ftenum_t lhs_ftype,
+ GSList *param_list, df_loc_t func_loc _U_)
+{
+ stnode_t *arg;
+ sttype_id_t type;
+ ftenum_t ftype, ft_arg;
+ GSList *l;
+ fvalue_t *fv;
+ wmem_list_t *literals = NULL;
+
+ ftype = lhs_ftype;
+
+ for (l = param_list; l != NULL; l = l->next) {
+ arg = l->data;
+ type = stnode_type_id(arg);
+
+ if (type == STTYPE_ARITHMETIC) {
+ ft_arg = check_arithmetic(dfw, arg, ftype);
+ }
+ else if (type == STTYPE_LITERAL) {
+ if (ftype != FT_NONE) {
+ fv = dfilter_fvalue_from_literal(dfw, ftype, arg, false, NULL);
+ stnode_replace(arg, STTYPE_FVALUE, fv);
+ ft_arg = fvalue_type_ftenum(fv);
+ }
+ else {
+ if (literals == NULL) {
+ literals = wmem_list_new(dfw->dfw_scope);
+ }
+ wmem_list_append(literals, arg);
+ ft_arg = FT_NONE;
+ }
+ }
+ else if (type == STTYPE_FUNCTION) {
+ ft_arg = check_function(dfw, arg, ftype);
+ }
+ else if (type == STTYPE_FIELD) {
+ dfw->field_count++;
+ ft_arg = sttype_field_ftenum(arg);
+ }
+ else if (type == STTYPE_REFERENCE) {
+ ft_arg = sttype_field_ftenum(arg);
+ }
+ else {
+ FAIL(dfw, arg, "Argument '%s' is not valid for %s()",
+ stnode_todisplay(arg), func_name);
+ }
+
+ if (ftype == FT_NONE) {
+ ftype = ft_arg;
+ }
+ if (ft_arg != FT_NONE && ftype != FT_NONE && !compatible_ftypes(ft_arg, ftype)) {
+ FAIL(dfw, arg, "Arguments to '%s' must be type compatible (expected %s, got %s)",
+ func_name, ftype_name(ftype), ftype_name(ft_arg));
+ }
+ if (ft_arg != FT_NONE && !ftype_can_cmp(ft_arg)) {
+ FAIL(dfw, arg, "Argument '%s' to '%s' cannot be ordered",
+ stnode_todisplay(arg), func_name);
+ }
+ }
+
+ if (literals != NULL) {
+ if (ftype != FT_NONE) {
+ wmem_list_frame_t *fp;
+ stnode_t *st;
+ for (fp = wmem_list_head(literals); fp != NULL; fp = wmem_list_frame_next(fp)) {
+ st = wmem_list_frame_data(fp);
+ fv = dfilter_fvalue_from_literal(dfw, ftype, st, false, NULL);
+ stnode_replace(st, STTYPE_FVALUE, fv);
+ }
+ }
+ wmem_destroy_list(literals);
+ }
+
+ return ftype;
+}
+
+static ftenum_t
+ul_semcheck_absolute_value(dfwork_t *dfw, const char *func_name, ftenum_t lhs_ftype,
+ GSList *param_list, df_loc_t func_loc _U_)
+{
+ ws_assert(g_slist_length(param_list) == 1);
+ stnode_t *st_node;
+ ftenum_t ftype;
+ fvalue_t *fv;
+
+ st_node = param_list->data;
+
+ if (stnode_type_id(st_node) == STTYPE_ARITHMETIC) {
+ ftype = check_arithmetic(dfw, st_node, lhs_ftype);
+ }
+ else if (stnode_type_id(st_node) == STTYPE_LITERAL) {
+ if (lhs_ftype != FT_NONE) {
+ /* Convert RHS literal to the same ftype as LHS. */
+ fv = dfilter_fvalue_from_literal(dfw, lhs_ftype, st_node, false, NULL);
+ stnode_replace(st_node, STTYPE_FVALUE, fv);
+ ftype = fvalue_type_ftenum(fv);
+ }
+ else {
+ FAIL(dfw, st_node, "Need a field or field-like value on the LHS.");
+ }
+ }
+ else if (stnode_type_id(st_node) == STTYPE_FUNCTION) {
+ ftype = check_function(dfw, st_node, lhs_ftype);
+ }
+ else if (stnode_type_id(st_node) == STTYPE_FIELD) {
+ dfw->field_count++;
+ ftype = sttype_field_ftenum(st_node);
+ }
+ else {
+ ftype = FT_NONE;
+ }
+
+ if (ftype == FT_NONE) {
+ FAIL(dfw, st_node, "Type %s is not valid for %s",
+ stnode_type_name(st_node), func_name);
+ }
+ if (!ftype_can_is_negative(ftype)) {
+ FAIL(dfw, st_node, "'%s' is not a valid argument to '%s'()",
+ stnode_todisplay(st_node), func_name);
+ }
+ return ftype;
+}
+
+/* The table of all display-filter functions */
+static df_func_def_t
+df_functions[] = {
+ { "lower", df_func_lower, 1, 1, ul_semcheck_is_field_string },
+ { "upper", df_func_upper, 1, 1, ul_semcheck_is_field_string },
+ /* Length function is implemented as a DFVM instruction. */
+ { "len", NULL, 1, 1, ul_semcheck_can_length },
+ { "count", df_func_count, 1, 1, ul_semcheck_is_field },
+ { "string", df_func_string, 1, 1, ul_semcheck_string_param },
+ { "max", df_func_max, 1, 0, ul_semcheck_compare },
+ { "min", df_func_min, 1, 0, ul_semcheck_compare },
+ { "abs", df_func_abs, 1, 1, ul_semcheck_absolute_value },
+ { NULL, NULL, 0, 0, NULL }
+};
+
+/* Lookup a display filter function record by name */
+df_func_def_t*
+df_func_lookup(const char *name)
+{
+ df_func_def_t *func_def;
+
+ func_def = df_functions;
+ while (func_def->name != NULL) {
+ if (strcmp(func_def->name, name) == 0) {
+ return func_def;
+ }
+ func_def++;
+ }
+ return NULL;
+}
+
+/*
+ * Editor modelines - https://www.wireshark.org/tools/modelines.html
+ *
+ * Local variables:
+ * c-basic-offset: 4
+ * tab-width: 8
+ * indent-tabs-mode: nil
+ * End:
+ *
+ * vi: set shiftwidth=4 tabstop=8 expandtab:
+ * :indentSize=4:tabSize=8:noTabs=true:
+ */
diff --git a/epan/dfilter/dfunctions.h b/epan/dfilter/dfunctions.h
new file mode 100644
index 0000000..25485e7
--- /dev/null
+++ b/epan/dfilter/dfunctions.h
@@ -0,0 +1,40 @@
+/** @file
+ *
+ * Wireshark - Network traffic analyzer
+ *
+ * Copyright 2006 Gilbert Ramirez <gram@alumni.rice.edu>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef DFUNCTIONS_H
+#define DFUNCTIONS_H
+
+#include <glib.h>
+#include <ftypes/ftypes.h>
+#include "syntax-tree.h"
+#include "dfilter-int.h"
+
+/* Functions take any number of arguments and return 1. */
+
+/* The run-time logic of the dfilter function */
+typedef bool (*DFFuncType)(GSList *stack, uint32_t arg_count, df_cell_t *retval);
+
+/* The semantic check for the dfilter function */
+typedef ftenum_t (*DFSemCheckType)(dfwork_t *dfw, const char *func_name, ftenum_t lhs_ftype,
+ GSList *param_list, df_loc_t func_loc);
+
+/* This is a "function definition" record, holding everything
+ * we need to know about a function */
+typedef struct {
+ const char *name;
+ DFFuncType function;
+ unsigned min_nargs;
+ unsigned max_nargs; /* 0 for no limit */
+ DFSemCheckType semcheck_param_function;
+} df_func_def_t;
+
+/* Return the function definition record for a function of named "name" */
+df_func_def_t* df_func_lookup(const char *name);
+
+#endif
diff --git a/epan/dfilter/dfvm.c b/epan/dfilter/dfvm.c
new file mode 100644
index 0000000..cae7336
--- /dev/null
+++ b/epan/dfilter/dfvm.c
@@ -0,0 +1,1747 @@
+/*
+ * Wireshark - Network traffic analyzer
+ * By Gerald Combs <gerald@wireshark.org>
+ * Copyright 2001 Gerald Combs
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "config.h"
+#define WS_LOG_DOMAIN LOG_DOMAIN_DFILTER
+
+#include "dfvm.h"
+
+#include <ftypes/ftypes.h>
+#include <wsutil/ws_assert.h>
+
+static void
+debug_register(GSList *reg, uint32_t num);
+
+const char *
+dfvm_opcode_tostr(dfvm_opcode_t code)
+{
+ switch (code) {
+ case DFVM_IF_TRUE_GOTO: return "IF_TRUE_GOTO";
+ case DFVM_IF_FALSE_GOTO: return "IF_FALSE_GOTO";
+ case DFVM_CHECK_EXISTS: return "CHECK_EXISTS";
+ case DFVM_CHECK_EXISTS_R: return "CHECK_EXISTS_R";
+ case DFVM_NOT: return "NOT";
+ case DFVM_RETURN: return "RETURN";
+ case DFVM_READ_TREE: return "READ_TREE";
+ case DFVM_READ_TREE_R: return "READ_TREE_R";
+ case DFVM_READ_REFERENCE: return "READ_REFERENCE";
+ case DFVM_READ_REFERENCE_R: return "READ_REFERENCE_R";
+ case DFVM_PUT_FVALUE: return "PUT_FVALUE";
+ case DFVM_ALL_EQ: return "ALL_EQ";
+ case DFVM_ANY_EQ: return "ANY_EQ";
+ case DFVM_ALL_NE: return "ALL_NE";
+ case DFVM_ANY_NE: return "ANY_NE";
+ case DFVM_ALL_GT: return "ALL_GT";
+ case DFVM_ANY_GT: return "ANY_GT";
+ case DFVM_ALL_GE: return "ALL_GE";
+ case DFVM_ANY_GE: return "ANY_GE";
+ case DFVM_ALL_LT: return "ALL_LT";
+ case DFVM_ANY_LT: return "ANY_LT";
+ case DFVM_ALL_LE: return "ALL_LE";
+ case DFVM_ANY_LE: return "ANY_LE";
+ case DFVM_ALL_CONTAINS: return "ALL_CONTAINS";
+ case DFVM_ANY_CONTAINS: return "ANY_CONTAINS";
+ case DFVM_ALL_MATCHES: return "ALL_MATCHES";
+ case DFVM_ANY_MATCHES: return "ANY_MATCHES";
+ case DFVM_SET_ALL_IN: return "SET_ALL_IN";
+ case DFVM_SET_ANY_IN: return "SET_ANY_IN";
+ case DFVM_SET_ALL_NOT_IN: return "SET_ALL_NOT_IN";
+ case DFVM_SET_ANY_NOT_IN: return "SET_ANY_NOT_IN";
+ case DFVM_SET_ADD: return "SET_ADD";
+ case DFVM_SET_ADD_RANGE: return "SET_ADD_RANGE";
+ case DFVM_SET_CLEAR: return "SET_CLEAR";
+ case DFVM_SLICE: return "SLICE";
+ case DFVM_LENGTH: return "LENGTH";
+ case DFVM_BITWISE_AND: return "BITWISE_AND";
+ case DFVM_UNARY_MINUS: return "UNARY_MINUS";
+ case DFVM_ADD: return "ADD";
+ case DFVM_SUBTRACT: return "SUBTRACT";
+ case DFVM_MULTIPLY: return "MULTIPLY";
+ case DFVM_DIVIDE: return "DIVIDE";
+ case DFVM_MODULO: return "MODULO";
+ case DFVM_CALL_FUNCTION: return "CALL_FUNCTION";
+ case DFVM_STACK_PUSH: return "STACK_PUSH";
+ case DFVM_STACK_POP: return "STACK_POP";
+ case DFVM_NOT_ALL_ZERO: return "NOT_ALL_ZERO";
+ }
+ return "(fix-opcode-string)";
+}
+
+dfvm_insn_t*
+dfvm_insn_new(dfvm_opcode_t op)
+{
+ dfvm_insn_t *insn;
+
+ insn = g_new(dfvm_insn_t, 1);
+ insn->op = op;
+ insn->arg1 = NULL;
+ insn->arg2 = NULL;
+ insn->arg3 = NULL;
+ return insn;
+}
+
+static void
+dfvm_value_free(dfvm_value_t *v)
+{
+ switch (v->type) {
+ case FVALUE:
+ g_ptr_array_unref(v->value.fvalue_p);
+ break;
+ case DRANGE:
+ drange_free(v->value.drange);
+ break;
+ case PCRE:
+ ws_regex_free(v->value.pcre);
+ break;
+ case EMPTY:
+ case HFINFO:
+ case RAW_HFINFO:
+ case INSN_NUMBER:
+ case REGISTER:
+ case INTEGER:
+ case FUNCTION_DEF:
+ break;
+ }
+ g_free(v);
+}
+
+dfvm_value_t*
+dfvm_value_ref(dfvm_value_t *v)
+{
+ if (v == NULL)
+ return NULL;
+ v->ref_count++;
+ return v;
+}
+
+void
+dfvm_value_unref(dfvm_value_t *v)
+{
+ ws_assert(v);
+ v->ref_count--;
+ if (v->ref_count > 0)
+ return;
+ dfvm_value_free(v);
+}
+
+void
+dfvm_insn_free(dfvm_insn_t *insn)
+{
+ if (insn->arg1) {
+ dfvm_value_unref(insn->arg1);
+ }
+ if (insn->arg2) {
+ dfvm_value_unref(insn->arg2);
+ }
+ if (insn->arg3) {
+ dfvm_value_unref(insn->arg3);
+ }
+ g_free(insn);
+}
+
+
+dfvm_value_t*
+dfvm_value_new(dfvm_value_type_t type)
+{
+ dfvm_value_t *v;
+
+ v = g_new(dfvm_value_t, 1);
+ v->type = type;
+ v->ref_count = 0;
+ return v;
+}
+
+dfvm_value_t*
+dfvm_value_new_fvalue(fvalue_t *fv)
+{
+ dfvm_value_t *v = dfvm_value_new(FVALUE);
+ v->value.fvalue_p = g_ptr_array_new_full(1, (GDestroyNotify)fvalue_free);
+ g_ptr_array_add(v->value.fvalue_p, fv);
+ return v;
+}
+
+dfvm_value_t*
+dfvm_value_new_hfinfo(header_field_info *hfinfo, bool raw)
+{
+ dfvm_value_t *v;
+
+ if (raw)
+ v = dfvm_value_new(RAW_HFINFO);
+ else
+ v = dfvm_value_new(HFINFO);
+ v->value.hfinfo = hfinfo;
+ return v;
+}
+
+dfvm_value_t*
+dfvm_value_new_register(int reg)
+{
+ dfvm_value_t *v = dfvm_value_new(REGISTER);
+ v->value.numeric = reg;
+ return v;
+}
+
+dfvm_value_t*
+dfvm_value_new_drange(drange_t *dr)
+{
+ dfvm_value_t *v = dfvm_value_new(DRANGE);
+ v->value.drange = dr;
+ return v;
+}
+
+dfvm_value_t*
+dfvm_value_new_funcdef(df_func_def_t *funcdef)
+{
+ dfvm_value_t *v = dfvm_value_new(FUNCTION_DEF);
+ v->value.funcdef = funcdef;
+ return v;
+}
+
+dfvm_value_t*
+dfvm_value_new_pcre(ws_regex_t *re)
+{
+ dfvm_value_t *v = dfvm_value_new(PCRE);
+ v->value.pcre = re;
+ return v;
+}
+
+dfvm_value_t*
+dfvm_value_new_guint(unsigned num)
+{
+ dfvm_value_t *v = dfvm_value_new(INTEGER);
+ v->value.numeric = num;
+ return v;
+}
+
+static char *
+dfvm_value_tostr(dfvm_value_t *v)
+{
+ char *s;
+
+ if (!v)
+ return NULL;
+
+ switch (v->type) {
+ case HFINFO:
+ s = ws_strdup(v->value.hfinfo->abbrev);
+ break;
+ case RAW_HFINFO:
+ s = ws_strdup_printf("@%s", v->value.hfinfo->abbrev);
+ break;
+ case FVALUE:
+ s = fvalue_to_debug_repr(NULL, dfvm_value_get_fvalue(v));
+ break;
+ case DRANGE:
+ s = drange_tostr(v->value.drange);
+ break;
+ case PCRE:
+ s = ws_strdup(ws_regex_pattern(v->value.pcre));
+ break;
+ case REGISTER:
+ s = ws_strdup_printf("R%"G_GUINT32_FORMAT, v->value.numeric);
+ break;
+ case FUNCTION_DEF:
+ s = ws_strdup(v->value.funcdef->name);
+ break;
+ case INTEGER:
+ s = ws_strdup_printf("%"G_GUINT32_FORMAT, v->value.numeric);
+ break;
+ default:
+ s = ws_strdup("FIXME");
+ }
+ return s;
+}
+
+static char *
+value_type_tostr(dfvm_value_t *v, bool show_ftype)
+{
+ const char *s;
+
+ if (!v || !show_ftype)
+ return ws_strdup("");
+
+ switch (v->type) {
+ case HFINFO:
+ s = ftype_name(v->value.hfinfo->type);
+ break;
+ case RAW_HFINFO:
+ s = "FT_BYTES";
+ break;
+ case FVALUE:
+ s = fvalue_type_name(dfvm_value_get_fvalue(v));
+ break;
+ default:
+ return ws_strdup("");
+ break;
+ }
+ return ws_strdup_printf(" <%s>", s);
+}
+
+static GSList *
+dump_str_stack_push(GSList *stack, const char *str)
+{
+ return g_slist_prepend(stack, g_strdup(str));
+}
+
+static GSList *
+dump_str_stack_pop(GSList *stack, uint32_t count)
+{
+ while (stack && count-- > 0) {
+ g_free(stack->data);
+ stack = g_slist_delete_link(stack, stack);
+ }
+ return stack;
+}
+
+static void
+append_call_function(wmem_strbuf_t *buf, const char *func, uint32_t nargs,
+ GSList *stack_print)
+{
+ uint32_t idx;
+ GString *gs;
+ GSList *l;
+ const char *sep = "";
+
+ wmem_strbuf_append_printf(buf, "%s(", func);
+ if (nargs > 0) {
+ gs = g_string_new(NULL);
+ for (l = stack_print, idx = 0; l != NULL && idx < nargs; idx++, l = l->next) {
+ g_string_prepend(gs, sep);
+ g_string_prepend(gs, l->data);
+ sep = ", ";
+ }
+ wmem_strbuf_append(buf, gs->str);
+ g_string_free(gs, true);
+ }
+ wmem_strbuf_append(buf, ")");
+}
+
+static void
+indent(wmem_strbuf_t *buf, size_t offset, size_t start)
+{
+ size_t pos = buf->len - start;
+ if (pos >= offset)
+ return;
+ wmem_strbuf_append_c_count(buf, ' ', offset - pos);
+}
+#define indent1(buf, start) indent(buf, 24, start)
+#define indent2(buf, start) indent(buf, 16, start)
+
+static void
+append_to_register(wmem_strbuf_t *buf, const char *reg)
+{
+ wmem_strbuf_append_printf(buf, " -> %s", reg);
+}
+
+static void
+append_op_args(wmem_strbuf_t *buf, dfvm_insn_t *insn, GSList **stack_print,
+ uint16_t flags)
+{
+ dfvm_value_t *arg1, *arg2, *arg3;
+ char *arg1_str, *arg2_str, *arg3_str;
+ char *arg1_str_type, *arg2_str_type, *arg3_str_type;
+ size_t col_start;
+
+ arg1 = insn->arg1;
+ arg2 = insn->arg2;
+ arg3 = insn->arg3;
+ arg1_str = dfvm_value_tostr(arg1);
+ arg2_str = dfvm_value_tostr(arg2);
+ arg3_str = dfvm_value_tostr(arg3);
+ arg1_str_type = value_type_tostr(arg1, flags & DF_DUMP_SHOW_FTYPE);
+ arg2_str_type = value_type_tostr(arg2, flags & DF_DUMP_SHOW_FTYPE);
+ arg3_str_type = value_type_tostr(arg3, flags & DF_DUMP_SHOW_FTYPE);
+
+ col_start = buf->len;
+
+ switch (insn->op) {
+ case DFVM_CHECK_EXISTS:
+ wmem_strbuf_append_printf(buf, "%s%s",
+ arg1_str, arg1_str_type);
+ break;
+
+ case DFVM_CHECK_EXISTS_R:
+ wmem_strbuf_append_printf(buf, "%s#[%s]%s",
+ arg1_str, arg2_str, arg1_str_type);
+ break;
+
+ case DFVM_READ_TREE:
+ wmem_strbuf_append_printf(buf, "%s%s",
+ arg1_str, arg1_str_type);
+ indent2(buf, col_start);
+ append_to_register(buf, arg2_str);
+ break;
+
+ case DFVM_READ_TREE_R:
+ wmem_strbuf_append_printf(buf, "%s#[%s]%s",
+ arg1_str, arg3_str, arg1_str_type);
+ indent2(buf, col_start);
+ append_to_register(buf, arg2_str);
+ break;
+
+ case DFVM_READ_REFERENCE:
+ wmem_strbuf_append_printf(buf, "${%s}%s",
+ arg1_str, arg1_str_type);
+ indent2(buf, col_start);
+ append_to_register(buf, arg2_str);
+ break;
+
+ case DFVM_READ_REFERENCE_R:
+ wmem_strbuf_append_printf(buf, "${%s#[%s]}%s",
+ arg1_str, arg3_str, arg1_str_type);
+ indent2(buf, col_start);
+ append_to_register(buf, arg2_str);
+ break;
+
+ case DFVM_PUT_FVALUE:
+ wmem_strbuf_append_printf(buf, "%s%s",
+ arg1_str, arg1_str_type);
+ indent2(buf, col_start);
+ append_to_register(buf, arg2_str);
+ break;
+
+ case DFVM_CALL_FUNCTION:
+ append_call_function(buf, arg1_str, arg3->value.numeric, *stack_print);
+ indent2(buf, col_start);
+ append_to_register(buf, arg2_str);
+ break;
+
+ case DFVM_STACK_PUSH:
+ wmem_strbuf_append_printf(buf, "%s", arg1_str);
+ *stack_print = dump_str_stack_push(*stack_print, arg1_str);
+ break;
+
+ case DFVM_STACK_POP:
+ wmem_strbuf_append_printf(buf, "%s", arg1_str);
+ *stack_print = dump_str_stack_pop(*stack_print, arg1->value.numeric);
+ break;
+
+ case DFVM_SLICE:
+ wmem_strbuf_append_printf(buf, "%s[%s]%s",
+ arg1_str, arg3_str, arg1_str_type);
+ indent2(buf, col_start);
+ append_to_register(buf, arg2_str);
+ break;
+
+ case DFVM_LENGTH:
+ wmem_strbuf_append_printf(buf, "%s%s",
+ arg1_str, arg1_str_type);
+ indent2(buf, col_start);
+ append_to_register(buf, arg2_str);
+ break;
+
+ case DFVM_ALL_EQ:
+ wmem_strbuf_append_printf(buf, "%s%s === %s%s",
+ arg1_str, arg1_str_type, arg2_str, arg2_str_type);
+ break;
+
+ case DFVM_ANY_EQ:
+ wmem_strbuf_append_printf(buf, "%s%s == %s%s",
+ arg1_str, arg1_str_type, arg2_str, arg2_str_type);
+ break;
+
+ case DFVM_ALL_NE:
+ wmem_strbuf_append_printf(buf, "%s%s != %s%s",
+ arg1_str, arg1_str_type, arg2_str, arg2_str_type);
+ break;
+
+ case DFVM_ANY_NE:
+ wmem_strbuf_append_printf(buf, "%s%s !== %s%s",
+ arg1_str, arg1_str_type, arg2_str, arg2_str_type);
+ break;
+
+ case DFVM_ALL_GT:
+ case DFVM_ANY_GT:
+ wmem_strbuf_append_printf(buf, "%s%s > %s%s",
+ arg1_str, arg1_str_type, arg2_str, arg2_str_type);
+ break;
+
+ case DFVM_ALL_GE:
+ case DFVM_ANY_GE:
+ wmem_strbuf_append_printf(buf, "%s%s >= %s%s",
+ arg1_str, arg1_str_type, arg2_str, arg2_str_type);
+ break;
+
+ case DFVM_ALL_LT:
+ case DFVM_ANY_LT:
+ wmem_strbuf_append_printf(buf, "%s%s < %s%s",
+ arg1_str, arg1_str_type, arg2_str, arg2_str_type);
+ break;
+
+ case DFVM_ALL_LE:
+ case DFVM_ANY_LE:
+ wmem_strbuf_append_printf(buf, "%s%s <= %s%s",
+ arg1_str, arg1_str_type, arg2_str, arg2_str_type);
+ break;
+
+ case DFVM_NOT_ALL_ZERO:
+ wmem_strbuf_append_printf(buf, "%s%s",
+ arg1_str, arg1_str_type);
+ break;
+
+ case DFVM_ALL_CONTAINS:
+ case DFVM_ANY_CONTAINS:
+ wmem_strbuf_append_printf(buf, "%s%s contains %s%s",
+ arg1_str, arg1_str_type, arg2_str, arg2_str_type);
+ break;
+
+ case DFVM_ALL_MATCHES:
+ case DFVM_ANY_MATCHES:
+ wmem_strbuf_append_printf(buf, "%s%s matches %s%s",
+ arg1_str, arg1_str_type, arg2_str, arg2_str_type);
+ break;
+
+ case DFVM_SET_ALL_IN:
+ case DFVM_SET_ANY_IN:
+ case DFVM_SET_ALL_NOT_IN:
+ case DFVM_SET_ANY_NOT_IN:
+ wmem_strbuf_append_printf(buf, "%s%s",
+ arg1_str, arg1_str_type);
+ break;
+
+ case DFVM_SET_ADD:
+ wmem_strbuf_append_printf(buf, "%s%s", arg1_str, arg1_str_type);
+ break;
+
+ case DFVM_SET_ADD_RANGE:
+ wmem_strbuf_append_printf(buf, "%s%s .. %s%s",
+ arg1_str, arg1_str_type, arg2_str, arg2_str_type);
+ break;
+
+ case DFVM_BITWISE_AND:
+ wmem_strbuf_append_printf(buf, "%s%s & %s%s",
+ arg1_str, arg1_str_type, arg2_str, arg2_str_type);
+ indent2(buf, col_start);
+ append_to_register(buf, arg3_str);
+ break;
+
+ case DFVM_UNARY_MINUS:
+ wmem_strbuf_append_printf(buf, "-%s%s",
+ arg1_str, arg1_str_type);
+ indent2(buf, col_start);
+ append_to_register(buf, arg2_str);
+ break;
+
+ case DFVM_ADD:
+ wmem_strbuf_append_printf(buf, "%s%s + %s%s",
+ arg1_str, arg1_str_type, arg2_str, arg2_str_type);
+ indent2(buf, col_start);
+ append_to_register(buf, arg3_str);
+ break;
+
+ case DFVM_SUBTRACT:
+ wmem_strbuf_append_printf(buf, "%s%s - %s%s",
+ arg1_str, arg1_str_type, arg2_str, arg2_str_type);
+ indent2(buf, col_start);
+ append_to_register(buf, arg3_str);
+ break;
+
+ case DFVM_MULTIPLY:
+ wmem_strbuf_append_printf(buf, "%s%s * %s%s",
+ arg1_str, arg1_str_type, arg2_str, arg2_str_type);
+ indent2(buf, col_start);
+ append_to_register(buf, arg3_str);
+ break;
+
+ case DFVM_DIVIDE:
+ wmem_strbuf_append_printf(buf, "%s%s / %s%s",
+ arg1_str, arg1_str_type, arg2_str, arg2_str_type);
+ indent2(buf, col_start);
+ append_to_register(buf, arg3_str);
+ break;
+
+ case DFVM_MODULO:
+ wmem_strbuf_append_printf(buf, "%s%s %% %s%s",
+ arg1_str, arg1_str_type, arg2_str, arg2_str_type);
+ indent2(buf, col_start);
+ append_to_register(buf, arg3_str);
+ break;
+
+ case DFVM_IF_TRUE_GOTO:
+ case DFVM_IF_FALSE_GOTO:
+ wmem_strbuf_append_printf(buf, "%u", arg1->value.numeric);
+ break;
+
+ case DFVM_NOT:
+ case DFVM_RETURN:
+ case DFVM_SET_CLEAR:
+ ws_assert_not_reached();
+ }
+
+ g_free(arg1_str);
+ g_free(arg2_str);
+ g_free(arg3_str);
+ g_free(arg1_str_type);
+ g_free(arg2_str_type);
+ g_free(arg3_str_type);
+}
+
+static void
+append_references(wmem_strbuf_t *buf, GHashTable *references, bool raw)
+{
+ GHashTableIter ref_iter;
+ void *key, *value;
+ char *str;
+ unsigned i;
+
+ g_hash_table_iter_init(&ref_iter, references);
+ while (g_hash_table_iter_next(&ref_iter, &key, &value)) {
+ const char *abbrev = ((header_field_info *)key)->abbrev;
+ GPtrArray *refs_array = value;
+ df_reference_t *ref;
+
+ if (raw)
+ wmem_strbuf_append_printf(buf, " ${@%s} = {", abbrev);
+ else
+ wmem_strbuf_append_printf(buf, " ${%s} = {", abbrev);
+ for (i = 0; i < refs_array->len; i++) {
+ if (i != 0) {
+ wmem_strbuf_append(buf, ", ");
+ }
+ ref = refs_array->pdata[i];
+ str = fvalue_to_debug_repr(NULL, ref->value);
+ wmem_strbuf_append_printf(buf, "%s <%s>", str, fvalue_type_name(ref->value));
+ g_free(str);
+ }
+ wmem_strbuf_append(buf, "}\n");
+ }
+}
+
+char *
+dfvm_dump_str(wmem_allocator_t *alloc, dfilter_t *df, uint16_t flags)
+{
+ int id, length;
+ dfvm_insn_t *insn;
+ wmem_strbuf_t *buf;
+ GSList *stack_print = NULL;
+ size_t col_start;
+
+ buf = wmem_strbuf_new(alloc, NULL);
+
+ if (flags & DF_DUMP_REFERENCES) {
+ if (g_hash_table_size(df->references) > 0) {
+ wmem_strbuf_append(buf, "References:\n");
+ append_references(buf, df->references, false);
+ }
+ else {
+ wmem_strbuf_append(buf, "References: (none)\n");
+ }
+ wmem_strbuf_append_c(buf, '\n');
+ }
+
+ if (flags & DF_DUMP_REFERENCES) {
+ if (g_hash_table_size(df->raw_references) > 0) {
+ wmem_strbuf_append(buf, "Raw references:\n");
+ append_references(buf, df->raw_references, true);
+ }
+ else {
+ wmem_strbuf_append(buf, "Raw references: (none)\n");
+ }
+ wmem_strbuf_append_c(buf, '\n');
+ }
+
+ wmem_strbuf_append(buf, "Instructions:");
+
+ length = df->insns->len;
+ for (id = 0; id < length; id++) {
+ insn = g_ptr_array_index(df->insns, id);
+ col_start = buf->len;
+ wmem_strbuf_append_printf(buf, "\n %04d %s", id, dfvm_opcode_tostr(insn->op));
+
+ switch (insn->op) {
+ case DFVM_NOT:
+ case DFVM_RETURN:
+ case DFVM_SET_CLEAR:
+ /* Nothing here */
+ break;
+ default:
+ indent1(buf, col_start);
+ append_op_args(buf, insn, &stack_print, flags);
+ break;
+ }
+ }
+
+ return wmem_strbuf_finalize(buf);
+}
+
+void
+dfvm_dump(FILE *f, dfilter_t *df, uint16_t flags)
+{
+ char *str = dfvm_dump_str(NULL, df, flags);
+ fputs(str, f);
+ fputc('\n', f);
+ wmem_free(NULL, str);
+}
+
+static int
+compare_finfo_layer(gconstpointer _a, gconstpointer _b)
+{
+ const field_info *a = *(const field_info **)_a;
+ const field_info *b = *(const field_info **)_b;
+ return a->proto_layer_num - b->proto_layer_num;
+}
+
+static bool
+drange_contains_layer(drange_t *dr, int num, int length)
+{
+ drange_node *rn;
+ GSList *list = dr->range_list;
+ int lower, upper;
+
+ while (list) {
+ rn = list->data;
+ lower = rn->start_offset;
+ if (lower < 0) {
+ lower += length + 1;
+ }
+ if (rn->ending == DRANGE_NODE_END_T_LENGTH) {
+ upper = lower + rn->length - 1;
+ }
+ else if (rn->ending == DRANGE_NODE_END_T_OFFSET) {
+ upper = rn->end_offset;
+ }
+ else if (rn->ending == DRANGE_NODE_END_T_TO_THE_END) {
+ upper = INT_MAX;
+ }
+ else {
+ ws_assert_not_reached();
+ }
+
+ if (num >= lower && num <= upper) { /* inclusive */
+ return true;
+ }
+
+ list = g_slist_next(list);
+ }
+ return false;
+}
+
+fvalue_t *
+dfvm_get_raw_fvalue(const field_info *fi)
+{
+ GByteArray *bytes;
+ fvalue_t *fv;
+ int length, tvb_length;
+
+ /*
+ * XXX - a field can have a length that runs past
+ * the end of the tvbuff. Ideally, that should
+ * be fixed when adding an item to the protocol
+ * tree, but checking the length when doing
+ * that could be expensive. Until we fix that,
+ * we'll do the check here.
+ */
+ tvb_length = tvb_captured_length_remaining(fi->ds_tvb, fi->start);
+ if (tvb_length < 0) {
+ return NULL;
+ }
+ length = fi->length;
+ if (length > tvb_length)
+ length = tvb_length;
+
+ bytes = g_byte_array_new();
+ g_byte_array_append(bytes, tvb_get_ptr(fi->ds_tvb, fi->start, length), length);
+
+ fv = fvalue_new(FT_BYTES);
+ fvalue_set_byte_array(fv, bytes);
+ return fv;
+}
+
+static size_t
+filter_finfo_fvalues(df_cell_t *rp, GPtrArray *finfos, drange_t *range, bool raw)
+{
+ int length; /* maximum proto layer number. The numbers are sequential. */
+ field_info *last_finfo, *finfo;
+ fvalue_t *fv;
+ int cookie = -1;
+ bool cookie_matches = false;
+ int layer;
+ size_t count = 0;
+
+ g_ptr_array_sort(finfos, compare_finfo_layer);
+ last_finfo = finfos->pdata[finfos->len - 1];
+ length = last_finfo->proto_layer_num;
+
+ for (unsigned i = 0; i < finfos->len; i++) {
+ finfo = finfos->pdata[i];
+ layer = finfo->proto_layer_num;
+ if (cookie == layer) {
+ if (cookie_matches) {
+ if (rp != NULL) {
+ if (raw)
+ fv = dfvm_get_raw_fvalue(finfo);
+ else
+ fv = finfo->value;
+ df_cell_append(rp, fv);
+ }
+ count++;
+ }
+ }
+ else {
+ cookie = layer;
+ cookie_matches = drange_contains_layer(range, layer, length);
+ if (cookie_matches) {
+ if (rp != NULL) {
+ if (raw)
+ fv = dfvm_get_raw_fvalue(finfo);
+ else
+ fv = finfo->value;
+ df_cell_append(rp, fv);
+ }
+ count++;
+ }
+ }
+ }
+ return count;
+}
+
+static bool
+read_tree_finfos(df_cell_t *rp, proto_tree *tree,
+ header_field_info *hfinfo, drange_t *range, bool raw)
+{
+ GPtrArray *finfos;
+ field_info *finfo;
+ fvalue_t *fv;
+
+ /* The caller should NOT free the GPtrArray. */
+ finfos = proto_get_finfo_ptr_array(tree, hfinfo->id);
+ if (finfos == NULL || g_ptr_array_len(finfos) == 0) {
+ return false;
+ }
+ if (range) {
+ return filter_finfo_fvalues(rp, finfos, range, raw) > 0;
+ }
+
+ for (unsigned i = 0; i < finfos->len; i++) {
+ finfo = g_ptr_array_index(finfos, i);
+ if (raw)
+ fv = dfvm_get_raw_fvalue(finfo);
+ else
+ fv = finfo->value;
+ df_cell_append(rp, fv);
+ }
+ return true;
+}
+
+/* Reads a field from the proto_tree and loads the fvalues into a register,
+ * if that field has not already been read. */
+static bool
+read_tree(dfilter_t *df, proto_tree *tree,
+ dfvm_value_t *arg1, dfvm_value_t *arg2,
+ dfvm_value_t *arg3)
+{
+ drange_t *range = NULL;
+ bool raw;
+ df_cell_t *rp;
+
+ header_field_info *hfinfo = arg1->value.hfinfo;
+ raw = arg1->type == RAW_HFINFO;
+
+ int reg = arg2->value.numeric;
+
+ if (arg3) {
+ range = arg3->value.drange;
+ }
+
+ rp = &df->registers[reg];
+
+ /* Already loaded in this run of the dfilter? */
+ if (!df_cell_is_null(rp)) {
+ return !df_cell_is_empty(rp);
+ }
+
+ if (raw) {
+ df_cell_init(rp, true);
+ }
+ else {
+ // These values are referenced only, do not try to free it later.
+ df_cell_init(rp, false);
+ }
+
+ while (hfinfo) {
+ read_tree_finfos(rp, tree, hfinfo, range, raw);
+ hfinfo = hfinfo->same_name_next;
+ }
+
+ return !df_cell_is_empty(rp);
+}
+
+static void
+filter_refs_fvalues(df_cell_t *rp, GPtrArray *refs_array, drange_t *range)
+{
+ int length; /* maximum proto layer number. The numbers are sequential. */
+ df_reference_t *last_ref = NULL;
+ int cookie = -1;
+ bool cookie_matches = false;
+
+ if (!refs_array || refs_array->len == 0) {
+ return;
+ }
+
+ /* refs array is sorted. */
+ last_ref = refs_array->pdata[refs_array->len - 1];
+ length = last_ref->proto_layer_num;
+
+ for (unsigned i = 0; i < refs_array->len; i++) {
+ df_reference_t *ref = refs_array->pdata[i];
+ int layer = ref->proto_layer_num;
+
+ if (range == NULL) {
+ df_cell_append(rp, ref->value);
+ continue;
+ }
+
+ if (cookie == layer) {
+ if (cookie_matches) {
+ df_cell_append(rp, ref->value);
+ }
+ }
+ else {
+ cookie = layer;
+ cookie_matches = drange_contains_layer(range, layer, length);
+ if (cookie_matches) {
+ df_cell_append(rp, ref->value);
+ }
+ }
+ }
+}
+
+static bool
+read_reference(dfilter_t *df, dfvm_value_t *arg1, dfvm_value_t *arg2,
+ dfvm_value_t *arg3)
+{
+ df_cell_t *rp;
+ GPtrArray *refs;
+ drange_t *range = NULL;
+ bool raw;
+
+ header_field_info *hfinfo = arg1->value.hfinfo;
+ raw = arg1->type == RAW_HFINFO;
+
+ int reg = arg2->value.numeric;
+
+ if (arg3) {
+ range = arg3->value.drange;
+ }
+
+ rp = &df->registers[reg];
+
+ /* Already loaded in this run of the dfilter? */
+ if (!df_cell_is_null(rp)) {
+ return !df_cell_is_empty(rp);
+ }
+
+ refs = g_hash_table_lookup(raw ? df->raw_references : df->references, hfinfo);
+ if (refs == NULL || refs->len == 0) {
+ return false;
+ }
+
+ // These values are referenced only, do not try to free it later.
+ df_cell_init(rp, false);
+ filter_refs_fvalues(rp, refs, range);
+ return true;
+}
+
+enum match_how {
+ MATCH_ANY,
+ MATCH_ALL
+};
+
+typedef ft_bool_t (*DFVMCompareFunc)(const fvalue_t*, const fvalue_t*);
+typedef ft_bool_t (*DFVMTestFunc)(const fvalue_t*);
+
+static bool
+cmp_test_internal(enum match_how how, DFVMCompareFunc match_func,
+ GPtrArray *fv1, GPtrArray *fv2)
+{
+ bool want_all = (how == MATCH_ALL);
+ bool want_any = (how == MATCH_ANY);
+ ft_bool_t have_match;
+
+ for (size_t idx1 = 0; idx1 < fv1->len; idx1++) {
+ for (size_t idx2 = 0; idx2 < fv2->len; idx2++) {
+ have_match = match_func(fv1->pdata[idx1], fv2->pdata[idx2]);
+ if (want_all && have_match == FT_FALSE) {
+ return false;
+ }
+ else if (want_any && have_match == FT_TRUE) {
+ return true;
+ }
+ }
+ }
+ /* want_all || !want_any */
+ return want_all;
+}
+
+static bool
+cmp_test_unary(enum match_how how, DFVMTestFunc test_func,
+ const fvalue_t **fv_ptr, size_t fv_count)
+{
+ bool want_all = (how == MATCH_ALL);
+ bool want_any = (how == MATCH_ANY);
+ ft_bool_t have_match;
+
+ for (size_t idx = 0; idx < fv_count; idx++) {
+ have_match = test_func(fv_ptr[idx]);
+ if (want_all && have_match == FT_FALSE) {
+ return false;
+ }
+ else if (want_any && have_match == FT_TRUE) {
+ return true;
+ }
+ }
+ /* want_all || !want_any */
+ return want_all;
+}
+
+static bool
+all_test_unary(dfilter_t *df, DFVMTestFunc func, dfvm_value_t *arg1)
+{
+ ws_assert(arg1->type == REGISTER);
+ df_cell_t *rp = &df->registers[arg1->value.numeric];
+ return cmp_test_unary(MATCH_ALL, func,
+ (const fvalue_t **)df_cell_array(rp), df_cell_size(rp));
+}
+
+static bool
+cmp_test(dfilter_t *df, DFVMCompareFunc cmp,
+ dfvm_value_t *arg1, dfvm_value_t *arg2,
+ enum match_how how)
+{
+ GPtrArray *fv1, *fv2;
+
+ if (arg1->type == REGISTER) {
+ fv1 = df_cell_ptr(&df->registers[arg1->value.numeric]);
+ }
+ else if (arg1->type == FVALUE) {
+ fv1 = arg1->value.fvalue_p;
+ }
+ else {
+ ws_assert_not_reached();
+ }
+
+ if (arg2->type == REGISTER) {
+ fv2 = df_cell_ptr(&df->registers[arg2->value.numeric]);
+ }
+ else if (arg2->type == FVALUE) {
+ fv2 = arg2->value.fvalue_p;
+ }
+ else {
+ ws_assert_not_reached();
+ }
+
+ return cmp_test_internal(how, cmp, fv1, fv2);
+}
+
+/* cmp(A) <=> cmp(a1) OR cmp(a2) OR cmp(a3) OR ... */
+static inline bool
+any_test(dfilter_t *df, DFVMCompareFunc cmp,
+ dfvm_value_t *arg1, dfvm_value_t *arg2)
+{
+ return cmp_test(df, cmp, arg1, arg2, MATCH_ANY);
+}
+
+/* cmp(A) <=> cmp(a1) AND cmp(a2) AND cmp(a3) AND ... */
+static bool
+all_test(dfilter_t *df, DFVMCompareFunc cmp,
+ dfvm_value_t *arg1, dfvm_value_t *arg2)
+{
+ return cmp_test(df, cmp, arg1, arg2, MATCH_ALL);
+}
+
+static bool
+any_matches(dfilter_t *df, dfvm_value_t *arg1, dfvm_value_t *arg2)
+{
+ df_cell_t *rp = &df->registers[arg1->value.numeric];
+ ws_regex_t *re = arg2->value.pcre;
+
+ const fvalue_t **fv_ptr = (const fvalue_t **)df_cell_array(rp);
+
+ for (size_t idx = 0; idx < df_cell_size(rp); idx++) {
+ if (fvalue_matches(fv_ptr[idx], re) == FT_TRUE) {
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool
+all_matches(dfilter_t *df, dfvm_value_t *arg1, dfvm_value_t *arg2)
+{
+ df_cell_t *rp = &df->registers[arg1->value.numeric];
+ ws_regex_t *re = arg2->value.pcre;
+
+ const fvalue_t **fv_ptr = (const fvalue_t **)df_cell_array(rp);
+
+ for (size_t idx = 0; idx < df_cell_size(rp); idx++) {
+ if (fvalue_matches(fv_ptr[idx], re) == FT_FALSE) {
+ return false;
+ }
+ }
+ return true;
+}
+
+static bool
+test_in_internal(fvalue_t *fv, GPtrArray *range[2])
+{
+ GPtrArray *low = range[0];
+ GPtrArray *high = range[1];
+ bool low_ok = false, high_ok = false;
+
+ if (high) {
+ /* range */
+ for (unsigned i = 0; i < high->len; i++) {
+ if (fvalue_le(fv, high->pdata[i]) == FT_TRUE) {
+ high_ok = true;
+ break;
+ }
+ }
+ if (!high_ok) {
+ return false;
+ }
+ ws_assert(low);
+ for (unsigned i = 0; i < low->len; i++) {
+ if (fvalue_ge(fv, low->pdata[i]) == FT_TRUE) {
+ low_ok = true;
+ break;
+ }
+ }
+ }
+ else {
+ /* single element */
+ for (unsigned i = 0; i < low->len; i++) {
+ if (fvalue_eq(fv, low->pdata[i]) == FT_TRUE) {
+ low_ok = true;
+ break;
+ }
+ }
+ }
+
+ return low_ok;
+}
+
+static bool
+any_in(dfilter_t *df, dfvm_value_t *arg1)
+{
+ df_cell_t *rp = &df->registers[arg1->value.numeric];
+ GPtrArray *value;
+ GSList *stack;
+ bool ok;
+
+ /* If the read failed we jump over the membership test. */
+ ws_assert(!df_cell_is_empty(rp));
+ value = df_cell_ptr(rp);
+
+ for (size_t i = 0; i < value->len; i++) {
+ stack = df->set_stack;
+ ok = false;
+ while (stack) {
+ if (test_in_internal(value->pdata[i], stack->data)) {
+ ok = true;
+ break;
+ }
+ stack = stack->next;
+ }
+ if (ok) {
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool
+all_in(dfilter_t *df, dfvm_value_t *arg1)
+{
+ df_cell_t *rp = &df->registers[arg1->value.numeric];
+ GPtrArray *value;
+ GSList *stack;
+ bool ok;
+
+ /* If the read failed we jump over the membership test. */
+ ws_assert(!df_cell_is_empty(rp));
+ value = df_cell_ptr(rp);
+
+ for (size_t i = 0; i < value->len; i++) {
+ stack = df->set_stack;
+ ok = false;
+ while (stack) {
+ if (test_in_internal(value->pdata[i], stack->data)) {
+ ok = true;
+ break;
+ }
+ stack = stack->next;
+ }
+ if (!ok) {
+ return false;
+ }
+ }
+ return true;
+}
+
+/* Clear registers that were populated during evaluation.
+ * If we created the values, then these will be freed as well. */
+static void
+free_register_overhead(dfilter_t* df)
+{
+ for (unsigned i = 0; i < df->num_registers; i++) {
+ df_cell_clear(&df->registers[i]);
+ }
+}
+
+/* Takes the list of fvalue_t's in a register, uses fvalue_slice()
+ * to make a new list of fvalue_t's (which are byte-slices),
+ * and puts the new list into a new register. */
+static void
+mk_slice(dfilter_t *df, dfvm_value_t *from_arg, dfvm_value_t *to_arg,
+ dfvm_value_t *drange_arg)
+{
+ df_cell_t *from_rp, *to_rp;
+ df_cell_iter_t from_iter;
+ fvalue_t *old_fv;
+ fvalue_t *new_fv;
+
+ to_rp = &df->registers[to_arg->value.numeric];
+ df_cell_init(to_rp, true);
+ from_rp = &df->registers[from_arg->value.numeric];
+ drange_t *drange = drange_arg->value.drange;
+
+ df_cell_iter_init(from_rp, &from_iter);
+ while ((old_fv = df_cell_iter_next(&from_iter)) != NULL) {
+ new_fv = fvalue_slice(old_fv, drange);
+ /* Assert here because semcheck.c should have
+ * already caught the cases in which a slice
+ * cannot be made. */
+ ws_assert(new_fv);
+ df_cell_append(to_rp, new_fv);
+ }
+}
+
+static void
+mk_length(dfilter_t *df, dfvm_value_t *from_arg, dfvm_value_t *to_arg)
+{
+ df_cell_t *from_rp, *to_rp;
+ df_cell_iter_t from_iter;
+ fvalue_t *old_fv;
+ fvalue_t *new_fv;
+
+ to_rp = &df->registers[to_arg->value.numeric];
+ df_cell_init(to_rp, true);
+ from_rp = &df->registers[from_arg->value.numeric];
+
+ df_cell_iter_init(from_rp, &from_iter);
+ while ((old_fv = df_cell_iter_next(&from_iter)) != NULL) {
+ new_fv = fvalue_new(FT_UINT32);
+ fvalue_set_uinteger(new_fv, (uint32_t)fvalue_length2(old_fv));
+ df_cell_append(to_rp, new_fv);
+ }
+}
+
+static bool
+call_function(dfilter_t *df, dfvm_value_t *arg1, dfvm_value_t *arg2,
+ dfvm_value_t *arg3)
+{
+ df_func_def_t *funcdef;
+ bool accum;
+ df_cell_t *rp_return;
+ uint32_t arg_count;
+
+
+ funcdef = arg1->value.funcdef;
+ rp_return = &df->registers[arg2->value.numeric];
+ arg_count = arg3->value.numeric;
+
+ // Functions create a new value, so own it.
+ df_cell_init(rp_return, true);
+
+ accum = funcdef->function(df->function_stack, arg_count, rp_return);
+ return accum;
+}
+
+static void debug_op_error(const fvalue_t *v1, const fvalue_t *v2, const char *op, const char *msg)
+{
+ char *s1 = fvalue_to_debug_repr(NULL, v1);
+ char *s2 = fvalue_to_debug_repr(NULL, v2);
+ ws_noisy("Error: %s %s %s: %s", s1, op, s2, msg);
+ g_free(s1);
+ g_free(s2);
+}
+
+/* Used for temporary debugging only, don't leave in production code (at
+ * a minimum WS_DEBUG_HERE must be replaced by another log level). */
+static void _U_
+debug_register(GSList *reg, uint32_t num)
+{
+ wmem_strbuf_t *buf;
+ GSList *l;
+ char *s;
+
+ buf = wmem_strbuf_new(NULL, NULL);
+
+ wmem_strbuf_append_printf(buf, "Reg#%"G_GUINT32_FORMAT" = { ", num);
+ for (l = reg; l != NULL; l = l->next) {
+ s = fvalue_to_debug_repr(NULL, l->data);
+ wmem_strbuf_append_printf(buf, "%s <%s>", s, fvalue_type_name(l->data));
+ g_free(s);
+ if (l->next != NULL) {
+ wmem_strbuf_append(buf, ", ");
+ }
+ }
+ wmem_strbuf_append_c(buf, '}');
+ WS_DEBUG_HERE("%s", wmem_strbuf_get_str(buf));
+ wmem_strbuf_destroy(buf);
+}
+
+
+typedef fvalue_t* (*DFVMBinaryFunc)(const fvalue_t*, const fvalue_t*, char **);
+
+static void
+mk_binary_internal(DFVMBinaryFunc func, GPtrArray *fv1, GPtrArray *fv2, df_cell_t *retval)
+{
+ fvalue_t *result;
+ char *err_msg = NULL;
+
+ for (size_t i = 0; i < fv1->len; i++) {
+ for (size_t j = 0; j < fv2->len; j++) {
+ result = func(fv1->pdata[i], fv2->pdata[j], &err_msg);
+ if (result == NULL) {
+ debug_op_error(fv1->pdata[i], fv2->pdata[i], "&", err_msg);
+ g_free(err_msg);
+ err_msg = NULL;
+ }
+ else {
+ df_cell_append(retval, result);
+ }
+ }
+ }
+}
+
+static void
+mk_binary(dfilter_t *df, DFVMBinaryFunc func,
+ dfvm_value_t *arg1, dfvm_value_t *arg2, dfvm_value_t *to_arg)
+{
+ GPtrArray *val1, *val2;
+ df_cell_t *to_rp;
+
+ if (arg1->type == REGISTER) {
+ val1 = df_cell_ptr(&df->registers[arg1->value.numeric]);
+ }
+ else if (arg1->type == FVALUE) {
+ val1 = arg1->value.fvalue_p;
+ }
+ else {
+ ws_assert_not_reached();
+ }
+
+ if (arg2->type == REGISTER) {
+ val2 = df_cell_ptr(&df->registers[arg2->value.numeric]);
+ }
+ else if (arg2->type == FVALUE) {
+ val2 = arg2->value.fvalue_p;
+ }
+ else {
+ ws_assert_not_reached();
+ }
+
+ to_rp = &df->registers[to_arg->value.numeric];
+ df_cell_init(to_rp, true);
+
+ mk_binary_internal(func, val1, val2, to_rp);
+ //debug_register(result, to_arg->value.numeric);
+}
+
+static void
+mk_minus_internal(GPtrArray *fv, df_cell_t *retval)
+{
+ fvalue_t *result;
+ char *err_msg = NULL;
+
+ for (size_t i = 0; i < fv->len; i++) {
+ result = fvalue_unary_minus(fv->pdata[i], &err_msg);
+ if (result == NULL) {
+ ws_noisy("unary_minus: %s", err_msg);
+ g_free(err_msg);
+ err_msg = NULL;
+ }
+ else {
+ df_cell_append(retval, result);
+ }
+ }
+}
+
+static void
+mk_minus(dfilter_t *df, dfvm_value_t *arg1, dfvm_value_t *to_arg)
+{
+ GPtrArray *val;
+ df_cell_t *to_rp;
+
+ if (arg1->type == REGISTER) {
+ val = df_cell_ptr(&df->registers[arg1->value.numeric]);
+ }
+ else if (arg1->type == FVALUE) {
+ val = arg1->value.fvalue_p;
+ }
+ else {
+ ws_assert_not_reached();
+ }
+
+ to_rp = &df->registers[to_arg->value.numeric];
+ df_cell_init(to_rp, true);
+
+ mk_minus_internal(val, to_rp);
+}
+
+static void
+put_fvalue(dfilter_t *df, dfvm_value_t *arg1, dfvm_value_t *to_arg)
+{
+ df_cell_t *to_rp = &df->registers[to_arg->value.numeric];
+ /* Memory is owned by the dfvm_value_t. */
+ df_cell_init(to_rp, false);
+ df_cell_append(to_rp, dfvm_value_get_fvalue(arg1));
+}
+
+static void
+stack_push(dfilter_t *df, dfvm_value_t *arg1)
+{
+ GPtrArray *arg;
+
+ if (arg1->type == FVALUE) {
+ arg = g_ptr_array_ref(arg1->value.fvalue_p);
+ }
+ else if (arg1->type == REGISTER) {
+ arg = df_cell_ref(&df->registers[arg1->value.numeric]);
+ }
+ else {
+ ws_assert_not_reached();
+ }
+ df->function_stack = g_slist_prepend(df->function_stack, arg);
+}
+
+static void
+stack_pop(dfilter_t *df, dfvm_value_t *arg1)
+{
+ unsigned count = arg1->value.numeric;
+
+ for (unsigned i = 0; i < count; i++) {
+ /* Free top of stack data. */
+ g_ptr_array_unref(df->function_stack->data);
+ /* Remove top of stack. */
+ df->function_stack = g_slist_delete_link(df->function_stack, df->function_stack);
+ }
+}
+
+static void
+set_push(dfilter_t *df, dfvm_value_t *arg1, dfvm_value_t *arg2)
+{
+ GPtrArray **range;
+
+ /* We don´t need to use reference counting because the lifetime of each
+ * arg is guaranteed to outlive the set stack. */
+
+ range = g_new0(GPtrArray *, 2);
+
+ if (arg1->type == FVALUE) {
+ range[0] = arg1->value.fvalue_p;
+ }
+ else if (arg1->type == REGISTER) {
+ range[0] = df_cell_ptr(&df->registers[arg1->value.numeric]);
+ }
+ else {
+ ws_assert_not_reached();
+ }
+
+ if (arg2) {
+ if (arg2->type == FVALUE) {
+ range[1] = arg2->value.fvalue_p;
+ }
+ else if (arg2->type == REGISTER) {
+ range[1] = df_cell_ptr(&df->registers[arg2->value.numeric]);
+ }
+ else {
+ ws_assert_not_reached();
+ }
+ }
+
+ df->set_stack = g_slist_prepend(df->set_stack, range);
+}
+
+static void
+set_clear(dfilter_t *df)
+{
+ g_slist_free_full(df->set_stack, g_free);
+ df->set_stack = NULL;
+}
+
+static bool
+check_exists_finfos(proto_tree *tree, header_field_info *hfinfo, drange_t *range)
+{
+ GPtrArray *finfos;
+
+ finfos = proto_get_finfo_ptr_array(tree, hfinfo->id);
+ if (finfos == NULL || g_ptr_array_len(finfos) == 0) {
+ return false;
+ }
+ if (range == NULL) {
+ return true;
+ }
+ return filter_finfo_fvalues(NULL, finfos, range, false) > 0;
+}
+
+static bool
+check_exists(proto_tree *tree, dfvm_value_t *arg1, dfvm_value_t *arg2)
+{
+ header_field_info *hfinfo;
+ drange_t *range = NULL;
+
+ hfinfo = arg1->value.hfinfo;
+ if (arg2)
+ range = arg2->value.drange;
+
+ while (hfinfo) {
+ if (check_exists_finfos(tree, hfinfo, range)) {
+ return true;
+ }
+ hfinfo = hfinfo->same_name_next;
+ }
+
+ return false;
+}
+
+bool
+dfvm_apply(dfilter_t *df, proto_tree *tree)
+{
+ int id, length;
+ bool accum = true;
+ dfvm_insn_t *insn;
+ dfvm_value_t *arg1;
+ dfvm_value_t *arg2;
+ dfvm_value_t *arg3 = NULL;
+
+ ws_assert(tree);
+
+ length = df->insns->len;
+
+ for (id = 0; id < length; id++) {
+
+ AGAIN:
+ insn = g_ptr_array_index(df->insns, id);
+ arg1 = insn->arg1;
+ arg2 = insn->arg2;
+ arg3 = insn->arg3;
+
+ switch (insn->op) {
+ case DFVM_CHECK_EXISTS:
+ accum = check_exists(tree, arg1, NULL);
+ break;
+
+ case DFVM_CHECK_EXISTS_R:
+ accum = check_exists(tree, arg1, arg2);
+ break;
+
+ case DFVM_READ_TREE:
+ accum = read_tree(df, tree, arg1, arg2, NULL);
+ break;
+
+ case DFVM_READ_TREE_R:
+ accum = read_tree(df, tree, arg1, arg2, arg3);
+ break;
+
+ case DFVM_READ_REFERENCE:
+ accum = read_reference(df, arg1, arg2, NULL);
+ break;
+
+ case DFVM_READ_REFERENCE_R:
+ accum = read_reference(df, arg1, arg2, arg3);
+ break;
+
+ case DFVM_PUT_FVALUE:
+ put_fvalue(df, arg1, arg2);
+ break;
+
+ case DFVM_CALL_FUNCTION:
+ accum = call_function(df, arg1, arg2, arg3);
+ break;
+
+ case DFVM_STACK_PUSH:
+ stack_push(df, arg1);
+ break;
+
+ case DFVM_STACK_POP:
+ stack_pop(df, arg1);
+ break;
+
+ case DFVM_SLICE:
+ mk_slice(df, arg1, arg2, arg3);
+ break;
+
+ case DFVM_LENGTH:
+ mk_length(df, arg1, arg2);
+ break;
+
+ case DFVM_ALL_EQ:
+ accum = all_test(df, fvalue_eq, arg1, arg2);
+ break;
+
+ case DFVM_ANY_EQ:
+ accum = any_test(df, fvalue_eq, arg1, arg2);
+ break;
+
+ case DFVM_ALL_NE:
+ accum = all_test(df, fvalue_ne, arg1, arg2);
+ break;
+
+ case DFVM_ANY_NE:
+ accum = any_test(df, fvalue_ne, arg1, arg2);
+ break;
+
+ case DFVM_ALL_GT:
+ accum = all_test(df, fvalue_gt, arg1, arg2);
+ break;
+
+ case DFVM_ANY_GT:
+ accum = any_test(df, fvalue_gt, arg1, arg2);
+ break;
+
+ case DFVM_ALL_GE:
+ accum = all_test(df, fvalue_ge, arg1, arg2);
+ break;
+
+ case DFVM_ANY_GE:
+ accum = any_test(df, fvalue_ge, arg1, arg2);
+ break;
+
+ case DFVM_ALL_LT:
+ accum = all_test(df, fvalue_lt, arg1, arg2);
+ break;
+
+ case DFVM_ANY_LT:
+ accum = any_test(df, fvalue_lt, arg1, arg2);
+ break;
+
+ case DFVM_ALL_LE:
+ accum = all_test(df, fvalue_le, arg1, arg2);
+ break;
+
+ case DFVM_ANY_LE:
+ accum = any_test(df, fvalue_le, arg1, arg2);
+ break;
+
+ case DFVM_BITWISE_AND:
+ mk_binary(df, fvalue_bitwise_and, arg1, arg2, arg3);
+ break;
+
+ case DFVM_ADD:
+ mk_binary(df, fvalue_add, arg1, arg2, arg3);
+ break;
+
+ case DFVM_SUBTRACT:
+ mk_binary(df, fvalue_subtract, arg1, arg2, arg3);
+ break;
+
+ case DFVM_MULTIPLY:
+ mk_binary(df, fvalue_multiply, arg1, arg2, arg3);
+ break;
+
+ case DFVM_DIVIDE:
+ mk_binary(df, fvalue_divide, arg1, arg2, arg3);
+ break;
+
+ case DFVM_MODULO:
+ mk_binary(df, fvalue_modulo, arg1, arg2, arg3);
+ break;
+
+ case DFVM_NOT_ALL_ZERO:
+ accum = !all_test_unary(df, fvalue_is_zero, arg1);
+ break;
+
+ case DFVM_ALL_CONTAINS:
+ accum = all_test(df, fvalue_contains, arg1, arg2);
+ break;
+
+ case DFVM_ANY_CONTAINS:
+ accum = any_test(df, fvalue_contains, arg1, arg2);
+ break;
+
+ case DFVM_ALL_MATCHES:
+ accum = all_matches(df, arg1, arg2);
+ break;
+
+ case DFVM_ANY_MATCHES:
+ accum = any_matches(df, arg1, arg2);
+ break;
+
+ case DFVM_SET_ADD:
+ set_push(df, arg1, NULL);
+ break;
+
+ case DFVM_SET_ADD_RANGE:
+ set_push(df, arg1, arg2);
+ break;
+
+ case DFVM_SET_ALL_IN:
+ accum = all_in(df, arg1);
+ break;
+
+ case DFVM_SET_ANY_IN:
+ accum = any_in(df, arg1);
+ break;
+
+ case DFVM_SET_ALL_NOT_IN:
+ accum = !all_in(df, arg1);
+ break;
+
+ case DFVM_SET_ANY_NOT_IN:
+ accum = !any_in(df, arg1);
+ break;
+
+ case DFVM_SET_CLEAR:
+ set_clear(df);
+ break;
+
+ case DFVM_UNARY_MINUS:
+ mk_minus(df, arg1, arg2);
+ break;
+
+ case DFVM_NOT:
+ accum = !accum;
+ break;
+
+ case DFVM_RETURN:
+ free_register_overhead(df);
+ return accum;
+
+ case DFVM_IF_TRUE_GOTO:
+ if (accum) {
+ id = arg1->value.numeric;
+ goto AGAIN;
+ }
+ break;
+
+ case DFVM_IF_FALSE_GOTO:
+ if (!accum) {
+ id = arg1->value.numeric;
+ goto AGAIN;
+ }
+ break;
+ }
+ }
+
+ ws_assert_not_reached();
+}
+
+/*
+ * Editor modelines - https://www.wireshark.org/tools/modelines.html
+ *
+ * Local variables:
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ *
+ * vi: set shiftwidth=8 tabstop=8 noexpandtab:
+ * :indentSize=8:tabSize=8:noTabs=false:
+ */
diff --git a/epan/dfilter/dfvm.h b/epan/dfilter/dfvm.h
new file mode 100644
index 0000000..4e2a0ad
--- /dev/null
+++ b/epan/dfilter/dfvm.h
@@ -0,0 +1,160 @@
+/** @file
+ *
+ * Wireshark - Network traffic analyzer
+ * By Gerald Combs <gerald@wireshark.org>
+ * Copyright 2001 Gerald Combs
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef DFVM_H
+#define DFVM_H
+
+#include <wsutil/regex.h>
+#include <epan/proto.h>
+#include "dfilter-int.h"
+#include "syntax-tree.h"
+#include "drange.h"
+#include "dfunctions.h"
+
+typedef enum {
+ EMPTY,
+ FVALUE,
+ HFINFO,
+ RAW_HFINFO,
+ INSN_NUMBER,
+ REGISTER,
+ INTEGER,
+ DRANGE,
+ FUNCTION_DEF,
+ PCRE
+} dfvm_value_type_t;
+
+typedef struct {
+ dfvm_value_type_t type;
+
+ union {
+ GPtrArray *fvalue_p; /* Always has length == 1 */
+ uint32_t numeric;
+ drange_t *drange;
+ header_field_info *hfinfo;
+ df_func_def_t *funcdef;
+ ws_regex_t *pcre;
+ } value;
+
+ int ref_count;
+} dfvm_value_t;
+
+#define dfvm_value_get_fvalue(val) ((val)->value.fvalue_p->pdata[0])
+
+typedef enum {
+
+ DFVM_IF_TRUE_GOTO,
+ DFVM_IF_FALSE_GOTO,
+ DFVM_CHECK_EXISTS,
+ DFVM_CHECK_EXISTS_R,
+ DFVM_NOT,
+ DFVM_RETURN,
+ DFVM_READ_TREE,
+ DFVM_READ_TREE_R,
+ DFVM_READ_REFERENCE,
+ DFVM_READ_REFERENCE_R,
+ DFVM_PUT_FVALUE,
+ DFVM_ALL_EQ,
+ DFVM_ANY_EQ,
+ DFVM_ALL_NE,
+ DFVM_ANY_NE,
+ DFVM_ALL_GT,
+ DFVM_ANY_GT,
+ DFVM_ALL_GE,
+ DFVM_ANY_GE,
+ DFVM_ALL_LT,
+ DFVM_ANY_LT,
+ DFVM_ALL_LE,
+ DFVM_ANY_LE,
+ DFVM_ALL_CONTAINS,
+ DFVM_ANY_CONTAINS,
+ DFVM_ALL_MATCHES,
+ DFVM_ANY_MATCHES,
+ DFVM_SET_ALL_IN,
+ DFVM_SET_ANY_IN,
+ DFVM_SET_ALL_NOT_IN,
+ DFVM_SET_ANY_NOT_IN,
+ DFVM_SET_ADD,
+ DFVM_SET_ADD_RANGE,
+ DFVM_SET_CLEAR,
+ DFVM_SLICE,
+ DFVM_LENGTH,
+ DFVM_BITWISE_AND,
+ DFVM_UNARY_MINUS,
+ DFVM_ADD,
+ DFVM_SUBTRACT,
+ DFVM_MULTIPLY,
+ DFVM_DIVIDE,
+ DFVM_MODULO,
+ DFVM_CALL_FUNCTION,
+ DFVM_STACK_PUSH,
+ DFVM_STACK_POP,
+ DFVM_NOT_ALL_ZERO,
+} dfvm_opcode_t;
+
+const char *
+dfvm_opcode_tostr(dfvm_opcode_t code);
+
+typedef struct {
+ int id;
+ dfvm_opcode_t op;
+ dfvm_value_t *arg1;
+ dfvm_value_t *arg2;
+ dfvm_value_t *arg3;
+} dfvm_insn_t;
+
+dfvm_insn_t*
+dfvm_insn_new(dfvm_opcode_t op);
+
+void
+dfvm_insn_free(dfvm_insn_t *insn);
+
+dfvm_value_t*
+dfvm_value_new(dfvm_value_type_t type);
+
+dfvm_value_t*
+dfvm_value_ref(dfvm_value_t *v);
+
+void
+dfvm_value_unref(dfvm_value_t *v);
+
+dfvm_value_t*
+dfvm_value_new_fvalue(fvalue_t *fv);
+
+dfvm_value_t*
+dfvm_value_new_hfinfo(header_field_info *hfinfo, bool raw);
+
+dfvm_value_t*
+dfvm_value_new_register(int reg);
+
+dfvm_value_t*
+dfvm_value_new_drange(drange_t *dr);
+
+dfvm_value_t*
+dfvm_value_new_funcdef(df_func_def_t *funcdef);
+
+dfvm_value_t*
+dfvm_value_new_pcre(ws_regex_t *re);
+
+dfvm_value_t*
+dfvm_value_new_guint(unsigned num);
+
+void
+dfvm_dump(FILE *f, dfilter_t *df, uint16_t flags);
+
+char *
+dfvm_dump_str(wmem_allocator_t *alloc, dfilter_t *df, uint16_t flags);
+
+bool
+dfvm_apply(dfilter_t *df, proto_tree *tree);
+
+fvalue_t *
+dfvm_get_raw_fvalue(const field_info *fi);
+
+#endif
diff --git a/epan/dfilter/drange.c b/epan/dfilter/drange.c
new file mode 100644
index 0000000..e0aada4
--- /dev/null
+++ b/epan/dfilter/drange.c
@@ -0,0 +1,406 @@
+/* drange.c
+ * Routines for providing general range support to the dfilter library
+ *
+ * Copyright (c) 2000 by Ed Warnicke <hagbard@physics.rutgers.edu>
+ *
+ * Wireshark - Network traffic analyzer
+ * By Gerald Combs
+ * Copyright 1999 Gerald Combs
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "config.h"
+
+#include "drange.h"
+
+#include <errno.h>
+#include <stdlib.h>
+
+
+/* drange_node constructor */
+drange_node*
+drange_node_new(void)
+{
+ drange_node* new_range_node;
+
+ new_range_node = g_new(drange_node,1);
+ new_range_node->start_offset = 0;
+ new_range_node->length = 0;
+ new_range_node->end_offset = 0;
+ new_range_node->ending = DRANGE_NODE_END_T_UNINITIALIZED;
+ return new_range_node;
+}
+
+static bool
+drange_str_to_gint32(const char *s, int32_t *pint, char **endptr, char **err_ptr)
+{
+ long integer;
+
+ errno = 0;
+ integer = strtol(s, endptr, 0);
+ if (errno == EINVAL || *endptr == s) {
+ /* This isn't a valid number. */
+ *err_ptr = ws_strdup_printf("\"%s\" is not a valid number.", s);
+ return false;
+ }
+ if (errno == ERANGE || integer > INT32_MAX || integer < INT32_MIN) {
+ *err_ptr = ws_strdup_printf("\"%s\" causes an integer overflow.", s);
+ return false;
+ }
+ *pint = (int32_t)integer;
+ return true;
+}
+
+/* drange_node constructor from string */
+drange_node*
+drange_node_from_str(const char *range_str, char **err_ptr)
+{
+ const char *str;
+ char *endptr;
+ int32_t lower, upper = 0;
+ drange_node_end_t end = DRANGE_NODE_END_T_UNINITIALIZED;
+ drange_node *dn;
+ bool ok;
+
+ /*
+ * The following syntax governs slices:
+ * [i:j] i = start_offset, j = length
+ * [i-j] i = start_offset, j = end_offset, inclusive.
+ * [i] i = start_offset, length = 1
+ * [:j] start_offset = 0, length = j
+ * [i:] start_offset = i, end_offset = end_of_field
+ */
+
+ str = range_str;
+ if (*str == ':') {
+ lower = 0;
+ /* Do not advance 'str' here. */
+ }
+ else {
+ if (!drange_str_to_gint32(str, &lower, &endptr, err_ptr))
+ return NULL;
+ str = endptr;
+ }
+
+ while (*str != '\0' && g_ascii_isspace(*str))
+ str++;
+
+ if (*str == '-') {
+ str++;
+ end = DRANGE_NODE_END_T_OFFSET;
+ ok = drange_str_to_gint32(str, &upper, &endptr, err_ptr);
+ str = endptr;
+ }
+ else if (*str == ':') {
+ str++;
+ if (*str == '\0') {
+ end = DRANGE_NODE_END_T_TO_THE_END;
+ ok = true;
+ }
+ else {
+ end = DRANGE_NODE_END_T_LENGTH;
+ ok = drange_str_to_gint32(str, &upper, &endptr, err_ptr);
+ str = endptr;
+ }
+ }
+ else if (*str == '\0') {
+ end = DRANGE_NODE_END_T_LENGTH;
+ upper = 1;
+ ok = true;
+ }
+ else {
+ ok = false;
+ }
+
+ while (*str != '\0' && g_ascii_isspace(*str))
+ str++;
+
+ if (!ok || *str != '\0') {
+ *err_ptr = ws_strdup_printf("\"%s\" is not a valid range.", range_str);
+ return NULL;
+ }
+
+ dn = drange_node_new();
+ drange_node_set_start_offset(dn, lower);
+ switch (end) {
+ case DRANGE_NODE_END_T_LENGTH:
+ if (upper <= 0) {
+ *err_ptr = ws_strdup_printf("Range %s isn't valid "
+ "because length %d isn't positive",
+ range_str, upper);
+ drange_node_free(dn);
+ return NULL;
+ }
+ drange_node_set_length(dn, upper);
+ break;
+ case DRANGE_NODE_END_T_OFFSET:
+ if ((lower < 0 && upper > 0) || (lower > 0 && upper < 0)) {
+ *err_ptr = ws_strdup_printf("Range %s isn't valid "
+ "because %d and %d have different signs",
+ range_str, lower, upper);
+ drange_node_free(dn);
+ return NULL;
+ }
+ if (upper <= lower) {
+ *err_ptr = ws_strdup_printf("Range %s isn't valid "
+ "because %d is greater or equal than %d",
+ range_str, lower, upper);
+ drange_node_free(dn);
+ return NULL;
+ }
+ drange_node_set_end_offset(dn, upper);
+ break;
+ case DRANGE_NODE_END_T_TO_THE_END:
+ drange_node_set_to_the_end(dn);
+ break;
+ default:
+ ws_assert_not_reached();
+ break;
+ }
+
+ return dn;
+}
+
+static drange_node*
+drange_node_dup(drange_node *org)
+{
+ drange_node *new_range_node;
+
+ if (!org)
+ return NULL;
+
+ new_range_node = g_new(drange_node,1);
+ new_range_node->start_offset = org->start_offset;
+ new_range_node->length = org->length;
+ new_range_node->end_offset = org->end_offset;
+ new_range_node->ending = org->ending;
+ return new_range_node;
+}
+
+/* drange_node destructor */
+void
+drange_node_free(drange_node* drnode)
+{
+ g_free(drnode);
+}
+
+/* drange_node accessors */
+int
+drange_node_get_start_offset(drange_node* drnode)
+{
+ ws_assert(drnode->ending != DRANGE_NODE_END_T_UNINITIALIZED);
+ return drnode->start_offset;
+}
+
+int
+drange_node_get_length(drange_node* drnode)
+{
+ ws_assert(drnode->ending == DRANGE_NODE_END_T_LENGTH);
+ return drnode->length;
+}
+
+int
+drange_node_get_end_offset(drange_node* drnode)
+{
+ ws_assert(drnode->ending == DRANGE_NODE_END_T_OFFSET);
+ return drnode->end_offset;
+}
+
+drange_node_end_t
+drange_node_get_ending(drange_node* drnode)
+{
+ ws_assert(drnode->ending != DRANGE_NODE_END_T_UNINITIALIZED);
+ return drnode->ending;
+}
+
+/* drange_node mutators */
+void
+drange_node_set_start_offset(drange_node* drnode, int offset)
+{
+ drnode->start_offset = offset;
+}
+
+void
+drange_node_set_length(drange_node* drnode, int length)
+{
+ drnode->length = length;
+ drnode->ending = DRANGE_NODE_END_T_LENGTH;
+}
+
+void
+drange_node_set_end_offset(drange_node* drnode, int offset)
+{
+ drnode->end_offset = offset;
+ drnode->ending = DRANGE_NODE_END_T_OFFSET;
+}
+
+
+void
+drange_node_set_to_the_end(drange_node* drnode)
+{
+ drnode->ending = DRANGE_NODE_END_T_TO_THE_END;
+}
+
+/* drange constructor */
+drange_t *
+drange_new(drange_node* drnode)
+{
+ drange_t * new_drange;
+ new_drange = g_new(drange_t,1);
+ new_drange->range_list = NULL;
+ new_drange->has_total_length = true;
+ new_drange->total_length = 0;
+ new_drange->min_start_offset = INT_MAX;
+ new_drange->max_start_offset = INT_MIN;
+
+ if (drnode)
+ drange_append_drange_node(new_drange, drnode);
+
+ return new_drange;
+}
+
+static void
+drange_append_wrapper(void *data, void *user_data)
+{
+ drange_node *drnode = (drange_node *)data;
+ drange_t *dr = (drange_t *)user_data;
+
+ drange_append_drange_node(dr, drnode);
+}
+
+drange_t *
+drange_new_from_list(GSList *list)
+{
+ drange_t *new_drange;
+
+ new_drange = drange_new(NULL);
+ g_slist_foreach(list, drange_append_wrapper, new_drange);
+ return new_drange;
+}
+
+drange_t *
+drange_dup(drange_t *org)
+{
+ drange_t *new_drange;
+ GSList *p;
+
+ if (!org)
+ return NULL;
+
+ new_drange = drange_new(NULL);
+ for (p = org->range_list; p; p = p->next) {
+ drange_node *drnode = (drange_node *)p->data;
+ drange_append_drange_node(new_drange, drange_node_dup(drnode));
+ }
+ return new_drange;
+}
+
+
+/* drange destructor */
+void
+drange_free(drange_t * dr)
+{
+ drange_node_free_list(dr->range_list);
+ g_free(dr);
+}
+
+/* Call drange_node destructor on all list items */
+void
+drange_node_free_list(GSList* list)
+{
+ g_slist_free_full(list, g_free);
+}
+
+/* drange accessors */
+bool drange_has_total_length(drange_t * dr) { return dr->has_total_length; }
+int drange_get_total_length(drange_t * dr) { return dr->total_length; }
+int drange_get_min_start_offset(drange_t * dr) { return dr->min_start_offset; }
+int drange_get_max_start_offset(drange_t * dr) { return dr->max_start_offset; }
+
+static void
+update_drange_with_node(drange_t *dr, drange_node *drnode)
+{
+ if(drnode->ending == DRANGE_NODE_END_T_TO_THE_END){
+ dr->has_total_length = false;
+ }
+ else if(dr->has_total_length){
+ dr->total_length += drnode->length;
+ }
+ if(drnode->start_offset < dr->min_start_offset){
+ dr->min_start_offset = drnode->start_offset;
+ }
+ if(drnode->start_offset > dr->max_start_offset){
+ dr->max_start_offset = drnode->start_offset;
+ }
+}
+
+/* drange mutators */
+void
+drange_prepend_drange_node(drange_t * dr, drange_node* drnode)
+{
+ if(drnode != NULL){
+ dr->range_list = g_slist_prepend(dr->range_list,drnode);
+ update_drange_with_node(dr, drnode);
+ }
+}
+
+void
+drange_append_drange_node(drange_t * dr, drange_node* drnode)
+{
+ if(drnode != NULL){
+ dr->range_list = g_slist_append(dr->range_list,drnode);
+ update_drange_with_node(dr, drnode);
+ }
+}
+
+void
+drange_foreach_drange_node(drange_t * dr, GFunc func, void *funcdata)
+{
+ g_slist_foreach(dr->range_list,func,funcdata);
+}
+
+char *
+drange_node_tostr(const drange_node *rn)
+{
+ if (rn->ending == DRANGE_NODE_END_T_TO_THE_END)
+ return ws_strdup_printf("%d:", rn->start_offset);
+ else if(rn->ending == DRANGE_NODE_END_T_OFFSET)
+ return ws_strdup_printf("%d-%d", rn->start_offset, rn->end_offset);
+ else if (rn->ending == DRANGE_NODE_END_T_LENGTH)
+ return ws_strdup_printf("%d:%d", rn->start_offset, rn->length);
+ else
+ return ws_strdup_printf("%d/%d/%d/U", rn->start_offset, rn->length, rn->end_offset);
+}
+
+char *
+drange_tostr(const drange_t *dr)
+{
+ GString *repr = g_string_new("");
+ GSList *range_list = dr->range_list;
+ char *s;
+
+ while (range_list) {
+ s = drange_node_tostr(range_list->data);
+ g_string_append(repr, s);
+ g_free(s);
+ range_list = g_slist_next(range_list);
+ if (range_list != NULL) {
+ g_string_append_c(repr, ',');
+ }
+ }
+
+ return g_string_free(repr, false);
+}
+
+/*
+ * Editor modelines - https://www.wireshark.org/tools/modelines.html
+ *
+ * Local variables:
+ * c-basic-offset: 4
+ * tab-width: 8
+ * indent-tabs-mode: nil
+ * End:
+ *
+ * vi: set shiftwidth=4 tabstop=8 expandtab:
+ * :indentSize=4:tabSize=8:noTabs=true:
+ */
diff --git a/epan/dfilter/drange.h b/epan/dfilter/drange.h
new file mode 100644
index 0000000..8162afd
--- /dev/null
+++ b/epan/dfilter/drange.h
@@ -0,0 +1,97 @@
+/** @file
+ *
+ * Routines for providing general range support to the dfilter library
+ *
+ * Copyright (c) 2000 by Ed Warnicke <hagbard@physics.rutgers.edu>
+ *
+ * Wireshark - Network traffic analyzer
+ * By Gerald Combs
+ * Copyright 1999 Gerald Combs
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef __DRANGE_H__
+#define __DRANGE_H__
+
+#include <wireshark.h>
+
+/* Please don't directly manipulate these structs. Please use
+ * the methods provided. If you REALLY can't do what you need to
+ * do with the methods provided please write new methods that do
+ * what you need, put them into the drange object here, and limit
+ * your direct manipulation of the drange and drange_node structs to
+ * here.
+ */
+
+typedef enum {
+ DRANGE_NODE_END_T_UNINITIALIZED,
+ DRANGE_NODE_END_T_LENGTH,
+ DRANGE_NODE_END_T_OFFSET,
+ DRANGE_NODE_END_T_TO_THE_END
+} drange_node_end_t;
+
+typedef struct _drange_node {
+ int start_offset;
+ int length;
+ int end_offset;
+ drange_node_end_t ending;
+} drange_node;
+
+typedef struct _drange {
+ GSList* range_list;
+ bool has_total_length;
+ int total_length;
+ int min_start_offset;
+ int max_start_offset;
+} drange_t;
+
+/* drange_node constructor */
+drange_node* drange_node_new(void);
+
+/* drange_node constructor */
+drange_node* drange_node_from_str(const char *range_str, char **err_ptr);
+
+/* drange_node destructor */
+void drange_node_free(drange_node* drnode);
+
+/* Call drange_node destructor on all list items */
+void drange_node_free_list(GSList* list);
+
+/* drange_node accessors */
+int drange_node_get_start_offset(drange_node* drnode);
+int drange_node_get_length(drange_node* drnode);
+int drange_node_get_end_offset(drange_node* drnode);
+drange_node_end_t drange_node_get_ending(drange_node* drnode);
+
+/* drange_node mutators */
+void drange_node_set_start_offset(drange_node* drnode, int offset);
+void drange_node_set_length(drange_node* drnode, int length);
+void drange_node_set_end_offset(drange_node* drnode, int offset);
+void drange_node_set_to_the_end(drange_node* drnode);
+
+/* drange constructor */
+drange_t * drange_new(drange_node* drnode);
+drange_t * drange_new_from_list(GSList *list);
+drange_t * drange_dup(drange_t *org);
+
+/* drange destructor, only use this if you used drange_new() to creat
+ * the drange
+ */
+void drange_free(drange_t* dr);
+
+/* drange accessors */
+bool drange_has_total_length(drange_t* dr);
+int drange_get_total_length(drange_t* dr);
+int drange_get_min_start_offset(drange_t* dr);
+int drange_get_max_start_offset(drange_t* dr);
+
+/* drange mutators */
+void drange_append_drange_node(drange_t* dr, drange_node* drnode);
+void drange_prepend_drange_node(drange_t* dr, drange_node* drnode);
+void drange_foreach_drange_node(drange_t* dr, GFunc func, void *funcdata);
+
+char *drange_node_tostr(const drange_node *rn);
+
+char *drange_tostr(const drange_t *dr);
+
+#endif /* ! __DRANGE_H__ */
diff --git a/epan/dfilter/gencode.c b/epan/dfilter/gencode.c
new file mode 100644
index 0000000..6c4da95
--- /dev/null
+++ b/epan/dfilter/gencode.c
@@ -0,0 +1,897 @@
+/*
+ * Wireshark - Network traffic analyzer
+ * By Gerald Combs <gerald@wireshark.org>
+ * Copyright 2001 Gerald Combs
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "config.h"
+
+#include "gencode.h"
+#include "dfvm.h"
+#include "syntax-tree.h"
+#include "sttype-field.h"
+#include "sttype-slice.h"
+#include "sttype-op.h"
+#include "sttype-set.h"
+#include "sttype-function.h"
+#include "ftypes/ftypes.h"
+#include <wsutil/ws_assert.h>
+
+static void
+fixup_jumps(void *data, void *user_data);
+
+static void
+gencode(dfwork_t *dfw, stnode_t *st_node);
+
+static dfvm_value_t *
+gen_entity(dfwork_t *dfw, stnode_t *st_arg, GSList **jumps_ptr);
+
+static dfvm_opcode_t
+select_opcode(dfvm_opcode_t op, stmatch_t how)
+{
+ if (how == STNODE_MATCH_DEF)
+ return op;
+
+ switch (op) {
+ case DFVM_ALL_EQ:
+ case DFVM_ALL_NE:
+ case DFVM_ALL_GT:
+ case DFVM_ALL_GE:
+ case DFVM_ALL_LT:
+ case DFVM_ALL_LE:
+ case DFVM_ALL_CONTAINS:
+ case DFVM_ALL_MATCHES:
+ case DFVM_SET_ALL_IN:
+ case DFVM_SET_ALL_NOT_IN:
+ return how == STNODE_MATCH_ALL ? op : op + 1;
+ case DFVM_ANY_EQ:
+ case DFVM_ANY_NE:
+ case DFVM_ANY_GT:
+ case DFVM_ANY_GE:
+ case DFVM_ANY_LT:
+ case DFVM_ANY_LE:
+ case DFVM_ANY_CONTAINS:
+ case DFVM_ANY_MATCHES:
+ case DFVM_SET_ANY_IN:
+ case DFVM_SET_ANY_NOT_IN:
+ return how == STNODE_MATCH_ANY ? op : op - 1;
+ default:
+ break;
+ }
+ ws_assert_not_reached();
+}
+
+static void
+dfw_append_insn(dfwork_t *dfw, dfvm_insn_t *insn)
+{
+ insn->id = dfw->next_insn_id;
+ dfw->next_insn_id++;
+ g_ptr_array_add(dfw->insns, insn);
+}
+
+static void
+dfw_append_stack_push(dfwork_t *dfw, dfvm_value_t *arg1)
+{
+ dfvm_insn_t *insn;
+
+ insn = dfvm_insn_new(DFVM_STACK_PUSH);
+ insn->arg1 = dfvm_value_ref(arg1);
+ dfw_append_insn(dfw, insn);
+}
+
+static void
+dfw_append_stack_pop(dfwork_t *dfw, unsigned count)
+{
+ dfvm_insn_t *insn;
+ dfvm_value_t *val;
+
+ insn = dfvm_insn_new(DFVM_STACK_POP);
+ val = dfvm_value_new_guint(count);
+ insn->arg1 = dfvm_value_ref(val);
+ dfw_append_insn(dfw, insn);
+}
+
+static void
+dfw_append_set_add_range(dfwork_t *dfw, dfvm_value_t *arg1, dfvm_value_t *arg2)
+{
+ dfvm_insn_t *insn;
+
+ insn = dfvm_insn_new(DFVM_SET_ADD_RANGE);
+ insn->arg1 = dfvm_value_ref(arg1);
+ insn->arg2 = dfvm_value_ref(arg2);
+ dfw_append_insn(dfw, insn);
+}
+
+static void
+dfw_append_set_add(dfwork_t *dfw, dfvm_value_t *arg1)
+{
+ dfvm_insn_t *insn;
+
+ insn = dfvm_insn_new(DFVM_SET_ADD);
+ insn->arg1 = dfvm_value_ref(arg1);
+ dfw_append_insn(dfw, insn);
+}
+
+static dfvm_value_t *
+dfw_append_jump(dfwork_t *dfw)
+{
+ dfvm_insn_t *insn;
+ dfvm_value_t *jmp;
+
+ insn = dfvm_insn_new(DFVM_IF_FALSE_GOTO);
+ jmp = dfvm_value_new(INSN_NUMBER);
+ insn->arg1 = dfvm_value_ref(jmp);
+ dfw_append_insn(dfw, insn);
+ return jmp;
+}
+
+/* returns register number */
+static dfvm_value_t *
+dfw_append_read_tree(dfwork_t *dfw, header_field_info *hfinfo,
+ drange_t *range,
+ bool raw)
+{
+ dfvm_insn_t *insn;
+ int reg = -1;
+ dfvm_value_t *reg_val, *val1, *val3;
+ bool added_new_hfinfo = false;
+ GHashTable *loaded_fields;
+ void *loaded_key;
+
+ /* Rewind to find the first field of this name. */
+ while (hfinfo->same_name_prev_id != -1) {
+ hfinfo = proto_registrar_get_nth(hfinfo->same_name_prev_id);
+ }
+
+ if (raw)
+ loaded_fields = dfw->loaded_raw_fields;
+ else
+ loaded_fields = dfw->loaded_fields;
+
+ /* Keep track of which registers
+ * were used for which hfinfo's so that we
+ * can re-use registers. */
+ /* Re-use only if we are not using a range (layer filter). */
+ loaded_key = g_hash_table_lookup(loaded_fields, hfinfo);
+ if (loaded_key != NULL) {
+ if (range == NULL) {
+ /*
+ * Reg's are stored in has as reg+1, so
+ * that the non-existence of a hfinfo in
+ * the hash, or 0, can be differentiated from
+ * a hfinfo being loaded into register #0.
+ */
+ reg = GPOINTER_TO_INT(loaded_key) - 1;
+ }
+ else {
+ reg = dfw->next_register++;
+ }
+ }
+ else {
+ reg = dfw->next_register++;
+ g_hash_table_insert(loaded_fields,
+ hfinfo, GINT_TO_POINTER(reg + 1));
+
+ added_new_hfinfo = true;
+ }
+
+ val1 = dfvm_value_new_hfinfo(hfinfo, raw);
+ reg_val = dfvm_value_new_register(reg);
+ if (range) {
+ val3 = dfvm_value_new_drange(range);
+ insn = dfvm_insn_new(DFVM_READ_TREE_R);
+ }
+ else {
+ val3 = NULL;
+ insn = dfvm_insn_new(DFVM_READ_TREE);
+ }
+ insn->arg1 = dfvm_value_ref(val1);
+ insn->arg2 = dfvm_value_ref(reg_val);
+ insn->arg3 = dfvm_value_ref(val3);
+ dfw_append_insn(dfw, insn);
+
+ if (added_new_hfinfo) {
+ while (hfinfo) {
+ /* Record the FIELD_ID in hash of interesting fields. */
+ g_hash_table_add(dfw->interesting_fields, &hfinfo->id);
+ hfinfo = hfinfo->same_name_next;
+ }
+ }
+
+ return reg_val;
+}
+
+/* returns register number */
+static dfvm_value_t *
+dfw_append_read_reference(dfwork_t *dfw, header_field_info *hfinfo,
+ drange_t *range,
+ bool raw)
+{
+ dfvm_insn_t *insn;
+ dfvm_value_t *reg_val, *val1, *val3;
+ GPtrArray *refs_array;
+
+ /* Rewind to find the first field of this name. */
+ while (hfinfo->same_name_prev_id != -1) {
+ hfinfo = proto_registrar_get_nth(hfinfo->same_name_prev_id);
+ }
+
+ /* We can't reuse registers with a filter so just skip
+ * that optimization and don't reuse them at all. */
+ val1 = dfvm_value_new_hfinfo(hfinfo, raw);
+ reg_val = dfvm_value_new_register(dfw->next_register++);
+ if (range) {
+ val3 = dfvm_value_new_drange(range);
+ insn = dfvm_insn_new(DFVM_READ_REFERENCE_R);
+ }
+ else {
+ val3 = NULL;
+ insn = dfvm_insn_new(DFVM_READ_REFERENCE);
+ }
+ insn->arg1 = dfvm_value_ref(val1);
+ insn->arg2 = dfvm_value_ref(reg_val);
+ insn->arg3 = dfvm_value_ref(val3);
+ dfw_append_insn(dfw, insn);
+
+ refs_array = g_ptr_array_new_with_free_func((GDestroyNotify)reference_free);
+ if (raw)
+ g_hash_table_insert(dfw->raw_references, hfinfo, refs_array);
+ else
+ g_hash_table_insert(dfw->references, hfinfo, refs_array);
+
+ /* Record the FIELD_ID in hash of interesting fields. */
+ while (hfinfo) {
+ /* Record the FIELD_ID in hash of interesting fields. */
+ g_hash_table_add(dfw->interesting_fields, &hfinfo->id);
+ hfinfo = hfinfo->same_name_next;
+ }
+
+ return reg_val;
+}
+
+/* returns register number */
+static dfvm_value_t *
+dfw_append_mk_slice(dfwork_t *dfw, stnode_t *node, GSList **jumps_ptr)
+{
+ stnode_t *entity;
+ dfvm_insn_t *insn;
+ dfvm_value_t *reg_val, *val1, *val3;
+
+ entity = sttype_slice_entity(node);
+
+ insn = dfvm_insn_new(DFVM_SLICE);
+ val1 = gen_entity(dfw, entity, jumps_ptr);
+ insn->arg1 = dfvm_value_ref(val1);
+ reg_val = dfvm_value_new_register(dfw->next_register++);
+ insn->arg2 = dfvm_value_ref(reg_val);
+ val3 = dfvm_value_new_drange(sttype_slice_drange_steal(node));
+ insn->arg3 = dfvm_value_ref(val3);
+ sttype_slice_remove_drange(node);
+ dfw_append_insn(dfw, insn);
+
+ return reg_val;
+}
+
+/* returns register number */
+_U_ static dfvm_value_t *
+dfw_append_put_fvalue(dfwork_t *dfw, fvalue_t *fv)
+{
+ dfvm_insn_t *insn;
+ dfvm_value_t *reg_val, *val1;
+
+ insn = dfvm_insn_new(DFVM_PUT_FVALUE);
+ val1 = dfvm_value_new_fvalue(fv);
+ insn->arg1 = dfvm_value_ref(val1);
+ reg_val = dfvm_value_new_register(dfw->next_register++);
+ insn->arg2 = dfvm_value_ref(reg_val);
+ dfw_append_insn(dfw, insn);
+
+ return reg_val;
+}
+
+/* returns register number that the length's result will be in. */
+static dfvm_value_t *
+dfw_append_length(dfwork_t *dfw, stnode_t *node, GSList **jumps_ptr)
+{
+ GSList *params;
+ dfvm_insn_t *insn;
+ dfvm_value_t *reg_val, *val_arg;
+
+ /* Create the new DFVM instruction */
+ insn = dfvm_insn_new(DFVM_LENGTH);
+ /* Create input argument */
+ params = sttype_function_params(node);
+ ws_assert(params);
+ ws_assert(g_slist_length(params) == 1);
+ val_arg = gen_entity(dfw, params->data, jumps_ptr);
+ insn->arg1 = dfvm_value_ref(val_arg);
+ /* Destination. */
+ reg_val = dfvm_value_new_register(dfw->next_register++);
+ insn->arg2 = dfvm_value_ref(reg_val);
+
+ dfw_append_insn(dfw, insn);
+ return reg_val;
+}
+
+/* returns register number that the functions's result will be in. */
+static dfvm_value_t *
+dfw_append_function(dfwork_t *dfw, stnode_t *node, GSList **jumps_ptr)
+{
+ GSList *params;
+ dfvm_value_t *jmp;
+ dfvm_insn_t *insn;
+ dfvm_value_t *reg_val, *val1, *val3, *val_arg;
+ unsigned count;
+ GSList *params_jumps = NULL;
+
+ if (strcmp(sttype_function_name(node), "len") == 0) {
+ /* Replace len() function call with DFVM_LENGTH instruction. */
+ return dfw_append_length(dfw, node, jumps_ptr);
+ }
+
+ /* Create the new DFVM instruction */
+ insn = dfvm_insn_new(DFVM_CALL_FUNCTION);
+ val1 = dfvm_value_new_funcdef(sttype_function_funcdef(node));
+ insn->arg1 = dfvm_value_ref(val1);
+ reg_val = dfvm_value_new_register(dfw->next_register++);
+ insn->arg2 = dfvm_value_ref(reg_val);
+
+ /* Create input arguments */
+ params = sttype_function_params(node);
+ ws_assert(params);
+ count = 0;
+ while (params) {
+ val_arg = gen_entity(dfw, params->data, &params_jumps);
+ /* If a parameter fails to generate jump here.
+ * Note: stack_push NULL register is valid. */
+ g_slist_foreach(params_jumps, fixup_jumps, dfw);
+ g_slist_free(params_jumps);
+ params_jumps = NULL;
+ dfw_append_stack_push(dfw, val_arg);
+ count++;
+ params = params->next;
+ }
+ val3 = dfvm_value_new_guint(count);
+ insn->arg3 = dfvm_value_ref(val3);
+ dfw_append_insn(dfw, insn);
+ dfw_append_stack_pop(dfw, count);
+
+ /* We need another instruction to jump to another exit
+ * place, if the call() of our function failed for some reason */
+ insn = dfvm_insn_new(DFVM_IF_FALSE_GOTO);
+ jmp = dfvm_value_new(INSN_NUMBER);
+ insn->arg1 = dfvm_value_ref(jmp);
+ dfw_append_insn(dfw, insn);
+ *jumps_ptr = g_slist_prepend(*jumps_ptr, jmp);
+
+ return reg_val;
+}
+
+/**
+ * Adds an instruction for a relation operator where the values are already
+ * loaded in registers.
+ */
+static void
+gen_relation_insn(dfwork_t *dfw, dfvm_opcode_t op,
+ dfvm_value_t *arg1, dfvm_value_t *arg2,
+ dfvm_value_t *arg3)
+{
+ dfvm_insn_t *insn;
+
+ insn = dfvm_insn_new(op);
+ insn->arg1 = dfvm_value_ref(arg1);
+ insn->arg2 = dfvm_value_ref(arg2);
+ insn->arg3 = dfvm_value_ref(arg3);
+ dfw_append_insn(dfw, insn);
+}
+
+static void
+gen_relation(dfwork_t *dfw, dfvm_opcode_t op, stmatch_t how,
+ stnode_t *st_arg1, stnode_t *st_arg2)
+{
+ GSList *jumps = NULL;
+ dfvm_value_t *val1, *val2;
+
+ /* Create code for the LHS and RHS of the relation */
+ val1 = gen_entity(dfw, st_arg1, &jumps);
+ val2 = gen_entity(dfw, st_arg2, &jumps);
+
+ /* Then combine them in a DFVM insruction */
+ op = select_opcode(op, how);
+ gen_relation_insn(dfw, op, val1, val2, NULL);
+
+ /* If either of the relation arguments need an "exit" instruction
+ * to jump to (on failure), mark them */
+ g_slist_foreach(jumps, fixup_jumps, dfw);
+ g_slist_free(jumps);
+ jumps = NULL;
+}
+
+static void
+fixup_jumps(void *data, void *user_data)
+{
+ dfvm_value_t *jmp = (dfvm_value_t*)data;
+ dfwork_t *dfw = (dfwork_t*)user_data;
+
+ if (jmp) {
+ jmp->value.numeric = dfw->next_insn_id;
+ }
+}
+
+/* Generate the code for the in operator. Pushes set values into a stack
+ * and then evaluates membership in a single instruction. */
+static void
+gen_relation_in(dfwork_t *dfw, dfvm_opcode_t op, stmatch_t how,
+ stnode_t *st_arg1, stnode_t *st_arg2)
+{
+ dfvm_insn_t *insn;
+ GSList *jumps = NULL;
+ GSList *node_jumps = NULL;
+ dfvm_value_t *val1, *val2, *val3;
+ stnode_t *node1, *node2;
+ GSList *nodelist_head, *nodelist;
+
+ /* Create code for the LHS of the relation */
+ val1 = gen_entity(dfw, st_arg1, &jumps);
+
+ /* Create code to populate the set stack */
+ nodelist_head = nodelist = stnode_steal_data(st_arg2);
+ while (nodelist) {
+ node1 = nodelist->data;
+ nodelist = g_slist_next(nodelist);
+ node2 = nodelist->data;
+ nodelist = g_slist_next(nodelist);
+
+ if (node2) {
+ /* Range element. */
+ val2 = gen_entity(dfw, node1, &node_jumps);
+ val3 = gen_entity(dfw, node2, &node_jumps);
+ dfw_append_set_add_range(dfw, val2, val3);
+ } else {
+ /* Normal element. */
+ val2 = gen_entity(dfw, node1, &node_jumps);
+ dfw_append_set_add(dfw, val2);
+ }
+
+ /* If an item is not present, just jump to the next item */
+ g_slist_foreach(node_jumps, fixup_jumps, dfw);
+ g_slist_free(node_jumps);
+ node_jumps = NULL;
+ }
+ set_nodelist_free(nodelist_head);
+
+ /* Create code for the set on the RHS of the relation */
+ insn = dfvm_insn_new(select_opcode(op, how));
+ insn->arg1 = dfvm_value_ref(val1);
+ dfw_append_insn(dfw, insn);
+
+ /* Add instruction to clear the whole stack */
+ insn = dfvm_insn_new(DFVM_SET_CLEAR);
+ dfw_append_insn(dfw, insn);
+
+ /* Jump here if the LHS entity was not present */
+ g_slist_foreach(jumps, fixup_jumps, dfw);
+ g_slist_free(jumps);
+ jumps = NULL;
+}
+
+static dfvm_value_t *
+gen_arithmetic(dfwork_t *dfw, stnode_t *st_arg, GSList **jumps_ptr)
+{
+ stnode_t *left, *right;
+ stnode_op_t st_op;
+ dfvm_value_t *reg_val, *val1, *val2 = NULL;
+ dfvm_opcode_t op;
+
+ sttype_oper_get(st_arg, &st_op, &left, &right);
+
+ if (st_op == STNODE_OP_UNARY_MINUS) {
+ op = DFVM_UNARY_MINUS;
+ }
+ else if (st_op == STNODE_OP_ADD) {
+ op = DFVM_ADD;
+ }
+ else if (st_op == STNODE_OP_SUBTRACT) {
+ op = DFVM_SUBTRACT;
+ }
+ else if (st_op == STNODE_OP_MULTIPLY) {
+ op = DFVM_MULTIPLY;
+ }
+ else if (st_op == STNODE_OP_DIVIDE) {
+ op = DFVM_DIVIDE;
+ }
+ else if (st_op == STNODE_OP_MODULO) {
+ op = DFVM_MODULO;
+ }
+ else if (st_op == STNODE_OP_BITWISE_AND) {
+ op = DFVM_BITWISE_AND;
+ }
+ else {
+ ws_assert_not_reached();
+ }
+
+ val1 = gen_entity(dfw, left, jumps_ptr);
+ if (right == NULL) {
+ /* Generate unary DFVM instruction. */
+ reg_val = dfvm_value_new_register(dfw->next_register++);
+ gen_relation_insn(dfw, op, val1, reg_val, NULL);
+ return reg_val;
+ }
+
+ val2 = gen_entity(dfw, right, jumps_ptr);
+ reg_val = dfvm_value_new_register(dfw->next_register++);
+ gen_relation_insn(dfw, op, val1, val2, reg_val);
+ return reg_val;
+}
+
+/* Parse an entity, returning the reg that it gets put into.
+ * p_jmp will be set if it has to be set by the calling code; it should
+ * be set to the place to jump to, to return to the calling code,
+ * if the load of a field from the proto_tree fails. */
+static dfvm_value_t *
+gen_entity(dfwork_t *dfw, stnode_t *st_arg, GSList **jumps_ptr)
+{
+ sttype_id_t e_type;
+ dfvm_value_t *val;
+ header_field_info *hfinfo;
+ drange_t *range = NULL;
+ bool raw;
+ e_type = stnode_type_id(st_arg);
+
+ if (e_type == STTYPE_FIELD) {
+ hfinfo = sttype_field_hfinfo(st_arg);
+ range = sttype_field_drange_steal(st_arg);
+ raw = sttype_field_raw(st_arg);
+ val = dfw_append_read_tree(dfw, hfinfo, range, raw);
+ if (jumps_ptr != NULL) {
+ *jumps_ptr = g_slist_prepend(*jumps_ptr, dfw_append_jump(dfw));
+ }
+ }
+ else if (e_type == STTYPE_REFERENCE) {
+ hfinfo = sttype_field_hfinfo(st_arg);
+ range = sttype_field_drange_steal(st_arg);
+ raw = sttype_field_raw(st_arg);
+ val = dfw_append_read_reference(dfw, hfinfo, range, raw);
+ if (jumps_ptr != NULL) {
+ *jumps_ptr = g_slist_prepend(*jumps_ptr, dfw_append_jump(dfw));
+ }
+ }
+ else if (e_type == STTYPE_FVALUE) {
+ val = dfvm_value_new_fvalue(stnode_steal_data(st_arg));
+ }
+ else if (e_type == STTYPE_SLICE) {
+ val = dfw_append_mk_slice(dfw, st_arg, jumps_ptr);
+ }
+ else if (e_type == STTYPE_FUNCTION) {
+ val = dfw_append_function(dfw, st_arg, jumps_ptr);
+ }
+ else if (e_type == STTYPE_PCRE) {
+ val = dfvm_value_new_pcre(stnode_steal_data(st_arg));
+ }
+ else if (e_type == STTYPE_ARITHMETIC) {
+ val = gen_arithmetic(dfw, st_arg, jumps_ptr);
+ }
+ else {
+ ws_error("Invalid sttype: %s", stnode_type_name(st_arg));
+ }
+ return val;
+}
+
+static void
+gen_exists(dfwork_t *dfw, stnode_t *st_node)
+{
+ dfvm_insn_t *insn;
+ dfvm_value_t *val1, *val2 = NULL;
+ header_field_info *hfinfo;
+ drange_t *range = NULL;
+
+ hfinfo = sttype_field_hfinfo(st_node);
+ range = sttype_field_drange_steal(st_node);
+
+ /* Rewind to find the first field of this name. */
+ while (hfinfo->same_name_prev_id != -1) {
+ hfinfo = proto_registrar_get_nth(hfinfo->same_name_prev_id);
+ }
+
+ /* Ignore "rawness" for existence tests. */
+ val1 = dfvm_value_new_hfinfo(hfinfo, false);
+ if (range) {
+ val2 = dfvm_value_new_drange(range);
+ }
+
+ if (val2) {
+ insn = dfvm_insn_new(DFVM_CHECK_EXISTS_R);
+ insn->arg1 = dfvm_value_ref(val1);
+ insn->arg2 = dfvm_value_ref(val2);
+ }
+ else {
+ insn = dfvm_insn_new(DFVM_CHECK_EXISTS);
+ insn->arg1 = dfvm_value_ref(val1);
+ }
+ dfw_append_insn(dfw, insn);
+
+ /* Record the FIELD_ID in hash of interesting fields. */
+ while (hfinfo) {
+ g_hash_table_add(dfw->interesting_fields, &hfinfo->id);
+ hfinfo = hfinfo->same_name_next;
+ }
+}
+
+static void
+gen_notzero(dfwork_t *dfw, stnode_t *st_node)
+{
+ dfvm_insn_t *insn;
+ dfvm_value_t *val1;
+ GSList *jumps = NULL;
+
+ val1 = gen_arithmetic(dfw, st_node, &jumps);
+ insn = dfvm_insn_new(DFVM_NOT_ALL_ZERO);
+ insn->arg1 = dfvm_value_ref(val1);
+ dfw_append_insn(dfw, insn);
+ g_slist_foreach(jumps, fixup_jumps, dfw);
+ g_slist_free(jumps);
+}
+
+static void
+gen_exists_slice(dfwork_t *dfw, stnode_t *st_node)
+{
+ dfvm_insn_t *insn;
+ dfvm_value_t *val1, *reg_val;
+ GSList *jumps = NULL;
+
+ val1 = gen_entity(dfw, st_node, &jumps);
+ /* Compute length. */
+ insn = dfvm_insn_new(DFVM_LENGTH);
+ insn->arg1 = dfvm_value_ref(val1);
+ reg_val = dfvm_value_new_register(dfw->next_register++);
+ insn->arg2 = dfvm_value_ref(reg_val);
+ dfw_append_insn(dfw, insn);
+ /* Check length is not zero. */
+ insn = dfvm_insn_new(DFVM_NOT_ALL_ZERO);
+ insn->arg1 = dfvm_value_ref(reg_val);
+ dfw_append_insn(dfw, insn);
+ /* Fixup jumps. */
+ g_slist_foreach(jumps, fixup_jumps, dfw);
+ g_slist_free(jumps);
+}
+
+static void
+gen_test(dfwork_t *dfw, stnode_t *st_node)
+{
+ stnode_op_t st_op;
+ stmatch_t st_how;
+ stnode_t *st_arg1, *st_arg2;
+ dfvm_insn_t *insn;
+ dfvm_value_t *jmp;
+
+
+ sttype_oper_get(st_node, &st_op, &st_arg1, &st_arg2);
+ st_how = sttype_test_get_match(st_node);
+
+ switch (st_op) {
+ case STNODE_OP_UNINITIALIZED:
+ ws_assert_not_reached();
+ break;
+
+ case STNODE_OP_NOT:
+ gencode(dfw, st_arg1);
+ insn = dfvm_insn_new(DFVM_NOT);
+ dfw_append_insn(dfw, insn);
+ break;
+
+ case STNODE_OP_AND:
+ gencode(dfw, st_arg1);
+
+ insn = dfvm_insn_new(DFVM_IF_FALSE_GOTO);
+ jmp = dfvm_value_new(INSN_NUMBER);
+ insn->arg1 = dfvm_value_ref(jmp);
+ dfw_append_insn(dfw, insn);
+
+ gencode(dfw, st_arg2);
+ jmp->value.numeric = dfw->next_insn_id;
+ break;
+
+ case STNODE_OP_OR:
+ gencode(dfw, st_arg1);
+
+ insn = dfvm_insn_new(DFVM_IF_TRUE_GOTO);
+ jmp = dfvm_value_new(INSN_NUMBER);
+ insn->arg1 = dfvm_value_ref(jmp);
+ dfw_append_insn(dfw, insn);
+
+ gencode(dfw, st_arg2);
+ jmp->value.numeric = dfw->next_insn_id;
+ break;
+
+ case STNODE_OP_ALL_EQ:
+ gen_relation(dfw, DFVM_ALL_EQ, st_how, st_arg1, st_arg2);
+ break;
+
+ case STNODE_OP_ANY_EQ:
+ gen_relation(dfw, DFVM_ANY_EQ, st_how, st_arg1, st_arg2);
+ break;
+
+ case STNODE_OP_ALL_NE:
+ gen_relation(dfw, DFVM_ALL_NE, st_how, st_arg1, st_arg2);
+ break;
+
+ case STNODE_OP_ANY_NE:
+ gen_relation(dfw, DFVM_ANY_NE, st_how, st_arg1, st_arg2);
+ break;
+
+ case STNODE_OP_GT:
+ gen_relation(dfw, DFVM_ANY_GT, st_how, st_arg1, st_arg2);
+ break;
+
+ case STNODE_OP_GE:
+ gen_relation(dfw, DFVM_ANY_GE, st_how, st_arg1, st_arg2);
+ break;
+
+ case STNODE_OP_LT:
+ gen_relation(dfw, DFVM_ANY_LT, st_how, st_arg1, st_arg2);
+ break;
+
+ case STNODE_OP_LE:
+ gen_relation(dfw, DFVM_ANY_LE, st_how, st_arg1, st_arg2);
+ break;
+
+ case STNODE_OP_CONTAINS:
+ gen_relation(dfw, DFVM_ANY_CONTAINS, st_how, st_arg1, st_arg2);
+ break;
+
+ case STNODE_OP_MATCHES:
+ gen_relation(dfw, DFVM_ANY_MATCHES, st_how, st_arg1, st_arg2);
+ break;
+
+ case STNODE_OP_IN:
+ gen_relation_in(dfw, DFVM_SET_ANY_IN, st_how, st_arg1, st_arg2);
+ break;
+
+ case STNODE_OP_NOT_IN:
+ gen_relation_in(dfw, DFVM_SET_ANY_NOT_IN, st_how, st_arg1, st_arg2);
+ break;
+
+ case STNODE_OP_BITWISE_AND:
+ case STNODE_OP_UNARY_MINUS:
+ case STNODE_OP_ADD:
+ case STNODE_OP_SUBTRACT:
+ case STNODE_OP_MULTIPLY:
+ case STNODE_OP_DIVIDE:
+ case STNODE_OP_MODULO:
+ ws_assert_not_reached();
+ break;
+ }
+}
+
+static void
+gencode(dfwork_t *dfw, stnode_t *st_node)
+{
+ switch (stnode_type_id(st_node)) {
+ case STTYPE_TEST:
+ gen_test(dfw, st_node);
+ break;
+ case STTYPE_FIELD:
+ gen_exists(dfw, st_node);
+ break;
+ case STTYPE_ARITHMETIC:
+ gen_notzero(dfw, st_node);
+ break;
+ case STTYPE_SLICE:
+ gen_exists_slice(dfw, st_node);
+ break;
+ default:
+ ws_assert_not_reached();
+ }
+}
+
+
+static void
+optimize(dfwork_t *dfw)
+{
+ int id, id1, length;
+ dfvm_insn_t *insn, *insn1, *prev;
+ dfvm_value_t *arg1;
+
+ length = dfw->insns->len;
+
+ for (id = 0, prev = NULL; id < length; prev = insn, id++) {
+ insn = (dfvm_insn_t *)g_ptr_array_index(dfw->insns, id);
+ arg1 = insn->arg1;
+ if (insn->op == DFVM_IF_TRUE_GOTO || insn->op == DFVM_IF_FALSE_GOTO) {
+ /* Try to optimize branch jumps */
+ dfvm_opcode_t revert = (insn->op == DFVM_IF_FALSE_GOTO) ? DFVM_IF_TRUE_GOTO : DFVM_IF_FALSE_GOTO;
+ id1 = arg1->value.numeric;
+ for (;;) {
+ insn1 = (dfvm_insn_t*)g_ptr_array_index(dfw->insns, id1);
+ if (insn1->op == revert) {
+ /* Skip this one; it is always false and the branch is not taken */
+ id1 = id1 +1;
+ continue;
+ }
+ if (insn1->op == DFVM_READ_TREE && prev && prev->op == DFVM_READ_TREE &&
+ prev->arg2->value.numeric == insn1->arg2->value.numeric) {
+ /* Skip this one; hack if it's the same register it's the same field
+ * and it returns the same value */
+ id1 = id1 +1;
+ continue;
+ }
+ if (insn1->op == insn->op) {
+ /* The branch jumps to the same branch instruction so
+ * coalesce the jumps */
+ arg1 = insn1->arg1;
+ id1 = arg1->value.numeric;
+ continue;
+ }
+ /* Finished */
+ arg1 = insn->arg1;
+ arg1->value.numeric = id1;
+ break;
+ }
+ }
+ }
+}
+
+void
+dfw_gencode(dfwork_t *dfw)
+{
+ dfw->insns = g_ptr_array_new();
+ dfw->loaded_fields = g_hash_table_new(g_direct_hash, g_direct_equal);
+ dfw->loaded_raw_fields = g_hash_table_new(g_direct_hash, g_direct_equal);
+ dfw->interesting_fields = g_hash_table_new(g_int_hash, g_int_equal);
+ gencode(dfw, dfw->st_root);
+ dfw_append_insn(dfw, dfvm_insn_new(DFVM_RETURN));
+ if (dfw->flags & DF_OPTIMIZE) {
+ optimize(dfw);
+ }
+}
+
+
+typedef struct {
+ int i;
+ int *fields;
+} hash_key_iterator;
+
+static void
+get_hash_key(void *key, void *value _U_, void *user_data)
+{
+ int field_id = *(int *)key;
+ hash_key_iterator *hki = (hash_key_iterator *)user_data;
+
+ hki->fields[hki->i] = field_id;
+ hki->i++;
+}
+
+int*
+dfw_interesting_fields(dfwork_t *dfw, int *caller_num_fields)
+{
+ int num_fields = g_hash_table_size(dfw->interesting_fields);
+
+ hash_key_iterator hki;
+
+ if (num_fields == 0) {
+ *caller_num_fields = 0;
+ return NULL;
+ }
+
+ hki.fields = g_new(int, num_fields);
+ hki.i = 0;
+
+ g_hash_table_foreach(dfw->interesting_fields, get_hash_key, &hki);
+ *caller_num_fields = num_fields;
+ return hki.fields;
+}
+
+/*
+ * Editor modelines - https://www.wireshark.org/tools/modelines.html
+ *
+ * Local variables:
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ *
+ * vi: set shiftwidth=8 tabstop=8 noexpandtab:
+ * :indentSize=8:tabSize=8:noTabs=false:
+ */
diff --git a/epan/dfilter/gencode.h b/epan/dfilter/gencode.h
new file mode 100644
index 0000000..94191b6
--- /dev/null
+++ b/epan/dfilter/gencode.h
@@ -0,0 +1,21 @@
+/** @file
+ *
+ * Wireshark - Network traffic analyzer
+ * By Gerald Combs <gerald@wireshark.org>
+ * Copyright 2001 Gerald Combs
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef GENCODE_H
+#define GENCODE_H
+
+#include "dfilter-int.h"
+
+void
+dfw_gencode(dfwork_t *dfw);
+
+int*
+dfw_interesting_fields(dfwork_t *dfw, int *caller_num_fields);
+
+#endif
diff --git a/epan/dfilter/grammar.lemon b/epan/dfilter/grammar.lemon
new file mode 100644
index 0000000..af997ca
--- /dev/null
+++ b/epan/dfilter/grammar.lemon
@@ -0,0 +1,559 @@
+
+%include {
+#include "config.h"
+#define WS_LOG_DOMAIN LOG_DOMAIN_DFILTER
+
+#include <assert.h>
+
+#include "dfilter-int.h"
+#include "syntax-tree.h"
+#include "sttype-field.h"
+#include "sttype-slice.h"
+#include "sttype-op.h"
+#include "sttype-function.h"
+#include "sttype-set.h"
+#include "drange.h"
+
+#include "grammar.h"
+
+#ifdef _MSC_VER
+#pragma warning(disable:4671)
+#endif
+
+static stnode_t *
+new_function(dfsyntax_t *dfs, stnode_t *node);
+
+#define FAIL(dfs, node, ...) \
+ do { \
+ ws_noisy("Parsing failed here."); \
+ dfilter_fail(dfs, DF_ERROR_GENERIC, stnode_location(node), __VA_ARGS__); \
+ } while (0)
+
+DIAG_OFF_LEMON()
+} /* end of %include */
+
+%code {
+DIAG_ON_LEMON()
+}
+
+/* Parser Information */
+%name Dfilter
+%token_prefix TOKEN_
+%extra_argument {dfsyntax_t *dfs}
+
+/* Terminal and Non-Terminal types and destructors */
+%token_type {stnode_t*}
+%token_destructor {
+ (void)dfs;
+ stnode_free($$);
+}
+
+%default_type {stnode_t*}
+%default_destructor {stnode_free($$);}
+
+%type range_node_list {GSList*}
+%destructor range_node_list {drange_node_free_list($$);}
+
+%type func_params_list {GSList*}
+%destructor func_params_list {st_funcparams_free($$);}
+
+%type set_list {GSList*}
+%destructor set_list {set_nodelist_free($$);}
+
+%type set_element_list {GSList*}
+%destructor set_element_list {set_nodelist_free($$);}
+
+/* This is called as soon as a syntax error happens. After that,
+any "error" symbols are shifted, if possible. */
+%syntax_error {
+ if (!TOKEN) {
+ dfilter_fail(dfs, DF_ERROR_UNEXPECTED_END, DFILTER_LOC_EMPTY, "Unexpected end of filter expression.");
+ return;
+ }
+ FAIL(dfs, TOKEN, "\"%s\" was unexpected in this context.", stnode_token(TOKEN));
+}
+
+/* ----------------- The grammar -------------- */
+
+/* Associativity */
+%left TEST_OR.
+%left TEST_XOR.
+%left TEST_AND.
+%right TEST_NOT.
+%nonassoc TEST_ALL_EQ TEST_ANY_EQ TEST_ALL_NE TEST_ANY_NE TEST_LT TEST_LE TEST_GT TEST_GE
+ TEST_CONTAINS TEST_MATCHES.
+%left BITWISE_AND.
+%left PLUS MINUS.
+%left STAR RSLASH PERCENT.
+%nonassoc UNARY_PLUS UNARY_MINUS.
+
+/* Top-level targets */
+sentence ::= expr(X). { dfs->st_root = X; }
+sentence ::= . { dfs->st_root = NULL; }
+
+expr(X) ::= relation(R). { X = R; }
+expr(X) ::= arithmetic_expr(E). { X = E; }
+
+/* Logical tests */
+expr(X) ::= expr(Y) TEST_AND(T) expr(Z).
+{
+ X = T;
+ sttype_oper_set2(X, STNODE_OP_AND, Y, Z);
+ stnode_merge_location(X, Y, Z);
+}
+
+expr(X) ::= expr(Y) TEST_OR(T) expr(Z).
+{
+ X = T;
+ sttype_oper_set2(X, STNODE_OP_OR, Y, Z);
+ stnode_merge_location(X, Y, Z);
+}
+
+expr(X) ::= expr(Y) TEST_XOR(T) expr(Z).
+{
+ stnode_t *A = stnode_new(STTYPE_TEST, NULL, NULL, DFILTER_LOC_EMPTY);
+ sttype_oper_set2(A, STNODE_OP_OR, stnode_dup(Y), stnode_dup(Z));
+
+ stnode_t *B = stnode_new(STTYPE_TEST, NULL, NULL, DFILTER_LOC_EMPTY);
+ sttype_oper_set2(B, STNODE_OP_AND, Y, Z);
+
+ stnode_t *C = stnode_new(STTYPE_TEST, NULL, NULL, DFILTER_LOC_EMPTY);
+ sttype_oper_set1(C, STNODE_OP_NOT, B);
+
+ X = T;
+ sttype_oper_set2(X, STNODE_OP_AND, A, C);
+ stnode_merge_location(X, Y, Z);
+}
+
+expr(X) ::= TEST_NOT(T) expr(Y).
+{
+ X = T;
+ sttype_oper_set1(X, STNODE_OP_NOT, Y);
+ stnode_merge_location(X, T, Y);
+}
+
+/* Any expression inside parens is simply that expression */
+expr(X) ::= LPAREN(L) expr(Y) RPAREN(R).
+{
+ X = Y;
+ stnode_merge_location(X, L, R);
+ stnode_free(L);
+ stnode_free(R);
+}
+
+/* Entities, or things that can be compared/tested/checked */
+atom(A) ::= STRING(S). { A = S; }
+atom(A) ::= CHARCONST(N). { A = N; }
+atom(A) ::= LITERAL(S). { A = S; }
+atom(A) ::= CONSTANT(C). { A = C; }
+
+named_field(X) ::= FIELD(F).
+{
+ X = F;
+}
+
+named_field(X) ::= IDENTIFIER(U).
+{
+ X = U;
+ const char *name = stnode_token(U);
+ header_field_info *hfinfo = dfilter_resolve_unparsed(dfs, name);
+ if (hfinfo == NULL) {
+ stnode_replace(X, STTYPE_LITERAL, g_strdup(name));
+ }
+ else {
+ stnode_replace(X, STTYPE_FIELD, hfinfo);
+ }
+}
+
+layered_field(X) ::= named_field(F).
+{
+ X = F;
+}
+
+layered_field(X) ::= named_field(F) HASH LBRACKET range_node_list(L) RBRACKET(R).
+{
+ X = F;
+ if (stnode_type_id(X) != STTYPE_FIELD) {
+ FAIL(dfs, X, "\"%s\" is not a valid protocol or protocol field.", stnode_token(X));
+ }
+ else {
+ sttype_field_set_range(X, L);
+ }
+ g_slist_free(L);
+ stnode_merge_location(X, F, R);
+ stnode_free(R);
+}
+
+layered_field(X) ::= named_field(F) HASH INTEGER(N).
+{
+ X = F;
+ if (stnode_type_id(X) != STTYPE_FIELD) {
+ FAIL(dfs, X, "\"%s\" is not a valid protocol or protocol field.", stnode_token(X));
+ }
+ else {
+ char *err_msg = NULL;
+ drange_node *range = drange_node_from_str(stnode_token(N), &err_msg);
+ if (err_msg != NULL) {
+ FAIL(dfs, N, "%s", err_msg);
+ g_free(err_msg);
+ }
+ sttype_field_set_range1(X, range);
+ }
+ stnode_merge_location(X, F, N);
+ stnode_free(N);
+}
+
+rawable_field(X) ::= layered_field(F).
+{
+ X = F;
+}
+
+rawable_field(X) ::= ATSIGN(A) layered_field(F).
+{
+ X = F;
+ if (stnode_type_id(X) != STTYPE_FIELD) {
+ FAIL(dfs, X, "\"%s\" is not a valid protocol or protocol field.", stnode_token(X));
+ }
+ else {
+ sttype_field_set_raw(X, true);
+ }
+ stnode_merge_location(X, A, F);
+ stnode_free(A);
+}
+
+reference(X) ::= DOLLAR(D) LBRACE rawable_field(F) RBRACE(R).
+{
+ /* convert field to reference */
+ if (stnode_type_id(F) != STTYPE_FIELD) {
+ X = stnode_new(STTYPE_REFERENCE, NULL, NULL, stnode_location(F));
+ FAIL(dfs, F, "\"%s\" is not a valid protocol or protocol field.", stnode_token(F));
+ }
+ else {
+ X = stnode_new(STTYPE_REFERENCE, sttype_field_hfinfo(F), NULL, stnode_location(F));
+ sttype_field_set_drange(X, sttype_field_drange_steal(F));
+ sttype_field_set_raw(X, sttype_field_raw(F));
+ }
+ stnode_merge_location(X, D, R);
+ stnode_free(F);
+ stnode_free(D);
+ stnode_free(R);
+}
+
+entity(E) ::= atom(A). { E = A; }
+entity(E) ::= slice(R). { E = R; }
+entity(E) ::= function(F). { E = F; }
+entity(E) ::= rawable_field(F). { E = F; }
+entity(E) ::= reference(R). { E = R; }
+
+arithmetic_expr(T) ::= entity(N).
+{
+ T = N;
+}
+
+arithmetic_expr(T) ::= PLUS(P) arithmetic_expr(N). [UNARY_PLUS]
+{
+ T = N;
+ stnode_merge_location(T, P, N);
+ stnode_free(P);
+}
+
+arithmetic_expr(T) ::= MINUS(M) arithmetic_expr(N). [UNARY_MINUS]
+{
+ T = M;
+ sttype_oper_set1(T, STNODE_OP_UNARY_MINUS, N);
+ stnode_merge_location(T, M, N);
+}
+
+arithmetic_expr(T) ::= arithmetic_expr(F) BITWISE_AND(O) arithmetic_expr(M).
+{
+ T = O;
+ sttype_oper_set2(T, STNODE_OP_BITWISE_AND, F, M);
+ stnode_merge_location(T, F, M);
+}
+
+arithmetic_expr(T) ::= arithmetic_expr(F) PLUS(O) arithmetic_expr(M).
+{
+ T = O;
+ sttype_oper_set2(T, STNODE_OP_ADD, F, M);
+ stnode_merge_location(T, F, M);
+}
+
+arithmetic_expr(T) ::= arithmetic_expr(F) MINUS(O) arithmetic_expr(M).
+{
+ T = O;
+ sttype_oper_set2(T, STNODE_OP_SUBTRACT, F, M);
+ stnode_merge_location(T, F, M);
+}
+
+arithmetic_expr(T) ::= arithmetic_expr(F) STAR(O) arithmetic_expr(M).
+{
+ T = O;
+ sttype_oper_set2(T, STNODE_OP_MULTIPLY, F, M);
+ stnode_merge_location(T, F, M);
+}
+
+arithmetic_expr(T) ::= arithmetic_expr(F) RSLASH(O) arithmetic_expr(M).
+{
+ T = O;
+ sttype_oper_set2(T, STNODE_OP_DIVIDE, F, M);
+ stnode_merge_location(T, F, M);
+}
+
+arithmetic_expr(T) ::= arithmetic_expr(F) PERCENT(O) arithmetic_expr(M).
+{
+ T = O;
+ sttype_oper_set2(T, STNODE_OP_MODULO, F, M);
+ stnode_merge_location(T, F, M);
+}
+
+arithmetic_expr(T) ::= LBRACE(L) arithmetic_expr(F) RBRACE(R).
+{
+ T = F;
+ stnode_merge_location(T, L, R);
+ stnode_free(L);
+ stnode_free(R);
+}
+
+/* Relational tests */
+cmp_op(O) ::= TEST_ALL_EQ(L).
+{
+ O = L;
+ sttype_oper_set_op(O, STNODE_OP_ALL_EQ);
+}
+
+cmp_op(O) ::= TEST_ANY_EQ(L).
+{
+ O = L;
+ sttype_oper_set_op(O, STNODE_OP_ANY_EQ);
+}
+
+cmp_op(O) ::= TEST_ALL_NE(L).
+{
+ O = L;
+ sttype_oper_set_op(O, STNODE_OP_ALL_NE);
+}
+
+cmp_op(O) ::= TEST_ANY_NE(L).
+{
+ O = L;
+ sttype_oper_set_op(O, STNODE_OP_ANY_NE);
+}
+
+cmp_op(O) ::= TEST_GT(L).
+{
+ O = L;
+ sttype_oper_set_op(O, STNODE_OP_GT);
+}
+
+cmp_op(O) ::= TEST_GE(L).
+{
+ O = L;
+ sttype_oper_set_op(O, STNODE_OP_GE);
+}
+
+cmp_op(O) ::= TEST_LT(L).
+{
+ O = L;
+ sttype_oper_set_op(O, STNODE_OP_LT);
+}
+
+cmp_op(O) ::= TEST_LE(L).
+{
+ O = L;
+ sttype_oper_set_op(O, STNODE_OP_LE);
+}
+
+comparison_test(T) ::= arithmetic_expr(E) cmp_op(O) arithmetic_expr(F).
+{
+ T = O;
+ sttype_oper_set2_args(O, E, F);
+ stnode_merge_location(T, E, F);
+}
+
+/* 'a == b == c' or 'a < b <= c <= d < e' */
+comparison_test(T) ::= arithmetic_expr(E) cmp_op(O) comparison_test(R).
+{
+ stnode_t *L, *F;
+
+ F = R;
+ while (stnode_type_id(F) == STTYPE_TEST) {
+ sttype_oper_get(F, NULL, &F, NULL);
+ }
+
+ L = O;
+ sttype_oper_set2_args(L, E, stnode_dup(F));
+
+ T = stnode_new_empty(STTYPE_TEST);
+ sttype_oper_set2(T, STNODE_OP_AND, L, R);
+ stnode_merge_location(T, E, R);
+}
+
+relation_test(T) ::= comparison_test(C). { T = C; }
+
+relation_test(T) ::= entity(E) TEST_CONTAINS(L) entity(F).
+{
+ T = L;
+ sttype_oper_set2(T, STNODE_OP_CONTAINS, E, F);
+ stnode_merge_location(T, E, F);
+}
+
+relation_test(T) ::= entity(E) TEST_MATCHES(L) entity(F).
+{
+ T = L;
+ sttype_oper_set2(T, STNODE_OP_MATCHES, E, F);
+ stnode_merge_location(T, E, F);
+}
+
+relation_test(T) ::= entity(E) TEST_IN(O) set(S).
+{
+ T = O;
+ sttype_oper_set2(T, STNODE_OP_IN, E, S);
+ stnode_merge_location(T, E, S);
+}
+
+relation_test(T) ::= entity(E) TEST_NOT TEST_IN(O) set(S).
+{
+ T = O;
+ sttype_oper_set2(O, STNODE_OP_NOT_IN, E, S);
+ stnode_merge_location(T, E, S);
+}
+
+relation(R) ::= relation_test(T). { R = T; }
+
+relation(R) ::= ANY(A) relation_test(T).
+{
+ R = T;
+ sttype_test_set_match(R, STNODE_MATCH_ANY);
+ stnode_merge_location(R, A, T);
+ stnode_free(A);
+}
+
+relation(R) ::= ALL(A) relation_test(T).
+{
+ R = T;
+ sttype_test_set_match(R, STNODE_MATCH_ALL);
+ stnode_merge_location(R, A, T);
+ stnode_free(A);
+}
+
+/* Sets */
+
+set_element_list(N) ::= arithmetic_expr(X).
+{
+ N = g_slist_append(NULL, X);
+ N = g_slist_append(N, NULL);
+}
+
+set_element_list(N) ::= arithmetic_expr(X) DOTDOT arithmetic_expr(Y).
+{
+ N = g_slist_append(NULL, X);
+ N = g_slist_append(N, Y);
+}
+
+set_list(L) ::= set_element_list(N).
+{
+ L = g_slist_concat(NULL, N);
+}
+
+set_list(L) ::= set_list(P) COMMA set_element_list(N).
+{
+ L = g_slist_concat(P, N);
+}
+
+set(S) ::= LBRACE(LB) set_list(L) RBRACE(RB).
+{
+ S = stnode_new(STTYPE_SET, L, NULL, DFILTER_LOC_EMPTY);
+ stnode_merge_location(S, LB, RB);
+ stnode_free(LB);
+ stnode_free(RB);
+}
+
+/* Slices */
+
+slice(R) ::= entity(E) LBRACKET range_node_list(L) RBRACKET.
+{
+ R = stnode_new(STTYPE_SLICE, NULL, NULL, DFILTER_LOC_EMPTY);
+ sttype_slice_set(R, E, L);
+
+ /* Delete the list, but not the drange_nodes that
+ * the list contains. */
+ g_slist_free(L);
+}
+
+range_node_list(L) ::= RANGE_NODE(N).
+{
+ char *err_msg = NULL;
+ drange_node *rn = drange_node_from_str(stnode_token(N), &err_msg);
+ if (err_msg != NULL) {
+ FAIL(dfs, N, "%s", err_msg);
+ g_free(err_msg);
+ }
+ L = g_slist_append(NULL, rn);
+ stnode_free(N);
+}
+
+range_node_list(L) ::= range_node_list(P) COMMA RANGE_NODE(N).
+{
+ char *err_msg = NULL;
+ drange_node *rn = drange_node_from_str(stnode_token(N), &err_msg);
+ if (err_msg != NULL) {
+ FAIL(dfs, N, "%s", err_msg);
+ g_free(err_msg);
+ }
+ L = g_slist_append(P, rn);
+ stnode_free(N);
+}
+
+/* Functions */
+
+%code {
+ static stnode_t *
+ new_function(dfsyntax_t *dfs, stnode_t *node)
+ {
+ const char *name = stnode_token(node);
+
+ df_func_def_t *def = df_func_lookup(name);
+ if (!def) {
+ FAIL(dfs, node, "Function '%s' does not exist", name);
+ }
+ stnode_replace(node, STTYPE_FUNCTION, def);
+ return node;
+ }
+}
+
+/* A function can have one or more parameters */
+function(F) ::= IDENTIFIER(U) LPAREN func_params_list(P) RPAREN(R).
+{
+ F = new_function(dfs, U);
+ sttype_function_set_params(F, P);
+ stnode_merge_location(F, U, R);
+ stnode_free(R);
+}
+
+function ::= CONSTANT(U) LPAREN func_params_list RPAREN.
+{
+ FAIL(dfs, U, "Function '%s' does not exist", stnode_token(U));
+}
+
+/* A function can have zero parameters. */
+function(F) ::= IDENTIFIER(U) LPAREN RPAREN(R).
+{
+ F = new_function(dfs, U);
+ stnode_merge_location(F, U, R);
+ stnode_free(R);
+}
+
+function ::= CONSTANT(U) LPAREN RPAREN.
+{
+ FAIL(dfs, U, "Function '%s' does not exist", stnode_token(U));
+}
+
+func_params_list(P) ::= arithmetic_expr(E).
+{
+ P = g_slist_append(NULL, E);
+}
+
+func_params_list(P) ::= func_params_list(L) COMMA arithmetic_expr(E).
+{
+ P = g_slist_append(L, E);
+}
diff --git a/epan/dfilter/scanner.l b/epan/dfilter/scanner.l
new file mode 100644
index 0000000..e5565d2
--- /dev/null
+++ b/epan/dfilter/scanner.l
@@ -0,0 +1,968 @@
+%top {
+/* Include this before everything else, for various large-file definitions */
+#include "config.h"
+#define WS_LOG_DOMAIN LOG_DOMAIN_DFILTER
+#include <wireshark.h>
+
+#include <stdlib.h>
+#include <errno.h>
+
+#include <wsutil/str_util.h>
+
+#include "dfilter-int.h"
+#include "syntax-tree.h"
+#include "grammar.h"
+#include "dfunctions.h"
+}
+
+/*
+ * Always generate warnings.
+ */
+%option warn
+
+/*
+ * We want a reentrant scanner.
+ */
+%option reentrant
+
+/*
+ * We don't use input, so don't generate code for it.
+ */
+%option noinput
+
+/*
+ * We don't use unput, so don't generate code for it.
+ */
+%option nounput
+
+/*
+ * We don't read interactively from the terminal.
+ */
+%option never-interactive
+
+/*
+ * Prefix scanner routines with "df_yy" rather than "yy", so this scanner
+ * can coexist with other scanners.
+ */
+%option prefix="df_yy"
+
+/*
+ * We're reading from a string, so we don't need yywrap.
+ */
+%option noyywrap
+
+/*
+ * The type for the dfs we keep for a scanner.
+ */
+%option extra-type="dfsyntax_t *"
+
+%{
+/*
+ * Wireshark - Network traffic analyzer
+ * By Gerald Combs <gerald@wireshark.org>
+ * Copyright 2001 Gerald Combs
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+/*
+ * Disable diagnostics in the code generated by Flex.
+ */
+DIAG_OFF_FLEX()
+
+WS_WARN_UNUSED static int set_lval_simple(dfsyntax_t *dfs, int token, const char *token_value, sttype_id_t type_id);
+#define simple(token) (update_location(yyextra, yytext), set_lval_simple(yyextra, token, yytext, STTYPE_UNINITIALIZED))
+#define test(token) (update_location(yyextra, yytext), set_lval_simple(yyextra, token, yytext, STTYPE_TEST))
+#define math(token) (update_location(yyextra, yytext), set_lval_simple(yyextra, token, yytext, STTYPE_ARITHMETIC))
+
+WS_WARN_UNUSED static int set_lval_literal(dfsyntax_t *dfs, const char *value, const char *token_value);
+WS_WARN_UNUSED static int set_lval_identifier(dfsyntax_t *dfs, const char *value, const char *token_value);
+WS_WARN_UNUSED static int set_lval_constant(dfsyntax_t *dfs, const char *value, const char *token_value);
+WS_WARN_UNUSED static int set_lval_unparsed(dfsyntax_t *dfs, const char *value, const char *token_value);
+
+WS_WARN_UNUSED static int set_lval_field(dfsyntax_t *dfs, const header_field_info *hfinfo, const char *token_value);
+WS_WARN_UNUSED static int set_lval_quoted_string(dfsyntax_t *dfs, GString *quoted_string);
+WS_WARN_UNUSED static int set_lval_charconst(dfsyntax_t *dfs, GString *quoted_string);
+
+static bool append_escaped_char(dfsyntax_t *dfs, GString *str, char c);
+static bool append_universal_character_name(dfsyntax_t *dfs, GString *str, const char *ucn);
+static bool parse_charconst(dfsyntax_t *dfs, const char *s, unsigned long *valuep);
+
+static void update_location(dfsyntax_t *dfs, const char *text);
+static void update_string_loc(dfsyntax_t *dfs, const char *text);
+
+#define FAIL(...) \
+ do { \
+ ws_noisy("Scanning failed here."); \
+ dfilter_fail(yyextra, DF_ERROR_GENERIC, yyextra->location, __VA_ARGS__); \
+ } while (0)
+
+%}
+
+FunctionIdentifier [[:alpha:]_][[:alnum:]_]*
+
+/*
+ * Cannot start with '-'. * Some protocol name can contain '-', for example "mac-lte".
+ * Note that some protocol names start with a number, for example "9p". This is
+ * handled as a special case for numeric patterns.
+ * Some protocol names contain dots, e.g: _ws.expert
+ * Protocol or protocol field cannot contain DOTDOT anywhere.
+ */
+VarIdentifier [[:alnum:]_][[:alnum:]_-]*
+ProtoFieldIdentifier {VarIdentifier}(\.{VarIdentifier})*
+
+hex2 [[:xdigit:]]{2}
+ColonMacAddress {hex2}:{hex2}:{hex2}:{hex2}:{hex2}:{hex2}
+HyphenMacAddress {hex2}-{hex2}-{hex2}-{hex2}-{hex2}-{hex2}
+DotMacAddress {hex2}\.{hex2}\.{hex2}\.{hex2}\.{hex2}\.{hex2}
+
+hex4 [[:xdigit:]]{4}
+DotQuadMacAddress {hex4}\.{hex4}\.{hex4}
+
+ColonBytes ({hex2}:)|({hex2}(:{hex2})+)
+HyphenBytes {hex2}(-{hex2})+
+DotBytes {hex2}(\.{hex2})+
+
+DecOctet [0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]
+IPv4Address {DecOctet}\.{DecOctet}\.{DecOctet}\.{DecOctet}
+
+h16 [0-9A-Fa-f]{1,4}
+ls32 {h16}:{h16}|{IPv4Address}
+IPv6Address ({h16}:){6}{ls32}|::({h16}:){5}{ls32}|({h16})?::({h16}:){4}{ls32}|(({h16}:){0,1}{h16})?::({h16}:){3}{ls32}|(({h16}:){0,2}{h16})?::({h16}:){2}{ls32}|(({h16}:){0,3}{h16})?::{h16}:{ls32}|(({h16}:){0,4}{h16})?::{ls32}|(({h16}:){0,5}{h16})?::{h16}|(({h16}:){0,6}{h16})?::
+
+V4CidrPrefix \/[[:digit:]]{1,2}
+V6CidrPrefix \/[[:digit:]]{1,3}
+
+/* Catch all valid semantic values. Cannot contain DOT DOT or start with MINUS. */
+StartAlphabet [[:alnum:]_:]
+Alphabet [[:alnum:]_:/-]
+LiteralValue {StartAlphabet}{Alphabet}*(\.{Alphabet}+)*
+
+Exponent ([eE][+-]?[[:digit:]]+)
+HexExponent ([pP][+-]?[[:digit:]]+)
+
+%x RANGE
+%x LAYER
+%x DQUOTE
+%x SQUOTE
+
+%%
+
+[[:blank:]\n\r]+ {
+ update_location(yyextra, yytext);
+}
+
+"(" return simple(TOKEN_LPAREN);
+")" return simple(TOKEN_RPAREN);
+"," return simple(TOKEN_COMMA);
+"{" return simple(TOKEN_LBRACE);
+".." return simple(TOKEN_DOTDOT);
+"}" return simple(TOKEN_RBRACE);
+"$" return simple(TOKEN_DOLLAR);
+"@" return simple(TOKEN_ATSIGN);
+"any" return simple(TOKEN_ANY);
+"all" return simple(TOKEN_ALL);
+
+"==" return test(TOKEN_TEST_ANY_EQ);
+"eq" return test(TOKEN_TEST_ANY_EQ);
+"any_eq" return test(TOKEN_TEST_ANY_EQ);
+"!=" return test(TOKEN_TEST_ALL_NE);
+"ne" return test(TOKEN_TEST_ALL_NE);
+"all_ne" return test(TOKEN_TEST_ALL_NE);
+"===" return test(TOKEN_TEST_ALL_EQ);
+"all_eq" return test(TOKEN_TEST_ALL_EQ);
+"!==" return test(TOKEN_TEST_ANY_NE);
+"any_ne" return test(TOKEN_TEST_ANY_NE);
+">" return test(TOKEN_TEST_GT);
+"gt" return test(TOKEN_TEST_GT);
+">=" return test(TOKEN_TEST_GE);
+"ge" return test(TOKEN_TEST_GE);
+"<" return test(TOKEN_TEST_LT);
+"lt" return test(TOKEN_TEST_LT);
+"<=" return test(TOKEN_TEST_LE);
+"le" return test(TOKEN_TEST_LE);
+"contains" return test(TOKEN_TEST_CONTAINS);
+"~" return test(TOKEN_TEST_MATCHES);
+"matches" return test(TOKEN_TEST_MATCHES);
+"!" return test(TOKEN_TEST_NOT);
+"not" return test(TOKEN_TEST_NOT);
+"&&" return test(TOKEN_TEST_AND);
+"and" return test(TOKEN_TEST_AND);
+"||" return test(TOKEN_TEST_OR);
+"or" return test(TOKEN_TEST_OR);
+"^^" return test(TOKEN_TEST_XOR);
+"xor" return test(TOKEN_TEST_XOR);
+"in" return test(TOKEN_TEST_IN);
+
+"+" return math(TOKEN_PLUS);
+"-" return math(TOKEN_MINUS);
+"*" return math(TOKEN_STAR);
+"/" return math(TOKEN_RSLASH);
+"%" return math(TOKEN_PERCENT);
+"&" return math(TOKEN_BITWISE_AND);
+"bitwise_and" return math(TOKEN_BITWISE_AND);
+
+"#" {
+ BEGIN(LAYER);
+ return simple(TOKEN_HASH);
+}
+
+<LAYER>[[:digit:]]+ {
+ BEGIN(INITIAL);
+ update_location(yyextra, yytext);
+ return set_lval_simple(yyextra, TOKEN_INTEGER, yytext, STTYPE_UNINITIALIZED);
+}
+
+<LAYER>[^[:digit:][] {
+ update_location(yyextra, yytext);
+ FAIL("Expected digit or \"[\", not \"%s\"", yytext);
+ return SCAN_FAILED;
+}
+
+<INITIAL,LAYER>"[" {
+ BEGIN(RANGE);
+ return simple(TOKEN_LBRACKET);
+}
+
+<RANGE>[^],]+ {
+ update_location(yyextra, yytext);
+ return set_lval_simple(yyextra, TOKEN_RANGE_NODE, yytext, STTYPE_UNINITIALIZED);
+}
+
+<RANGE>"," {
+ return simple(TOKEN_COMMA);
+}
+
+<RANGE>"]" {
+ BEGIN(INITIAL);
+ return simple(TOKEN_RBRACKET);
+}
+
+<RANGE><<EOF>> {
+ update_location(yyextra, yytext);
+ FAIL("The right bracket was missing from a slice.");
+ return SCAN_FAILED;
+}
+
+[rR]{0,1}\042 {
+ /* start quote of a quoted string */
+ /*
+ * The example of how to scan for strings was taken from
+ * the flex manual, from the section "Start Conditions".
+ * See: https://westes.github.io/flex/manual/Start-Conditions.html
+ */
+ BEGIN(DQUOTE);
+ update_location(yyextra, yytext);
+ yyextra->string_loc = yyextra->location;
+
+ yyextra->quoted_string = g_string_new(NULL);
+
+ if (yytext[0] == 'r' || yytext[0] == 'R') {
+ /*
+ * This is a raw string (like in Python). Rules: 1) The two
+ * escape sequences are \\ and \". 2) Backslashes are
+ * preserved. 3) Double quotes in the string must be escaped.
+ * Corollary: Strings cannot end with an odd number of
+ * backslashes.
+ * Example: r"a\b\x12\"\\" is the string (including the implicit NUL terminator)
+ * {'a', '\\', 'b', '\\', 'x', '1', '2', '\\', '"', '\\'. '\\', '\0'}
+ */
+ yyextra->raw_string = true;
+ }
+ else {
+ yyextra->raw_string = false;
+ }
+}
+
+<DQUOTE><<EOF>> {
+ /* unterminated string */
+ update_string_loc(yyextra, yytext);
+ g_string_free(yyextra->quoted_string, true);
+ yyextra->quoted_string = NULL;
+ FAIL("The final quote was missing from a quoted string.");
+ return SCAN_FAILED;
+}
+
+<DQUOTE>\042 {
+ /* end quote */
+ BEGIN(INITIAL);
+ update_string_loc(yyextra, yytext);
+ int token = set_lval_quoted_string(yyextra, yyextra->quoted_string);
+ yyextra->quoted_string = NULL;
+ yyextra->string_loc.col_start = -1;
+ return token;
+}
+
+<DQUOTE>\\[0-7]{1,3} {
+ /* octal sequence */
+ update_string_loc(yyextra, yytext);
+ if (yyextra->raw_string) {
+ g_string_append(yyextra->quoted_string, yytext);
+ }
+ else {
+ unsigned long result;
+ result = strtoul(yytext + 1, NULL, 8);
+ if (result > 0xff) {
+ g_string_free(yyextra->quoted_string, true);
+ yyextra->quoted_string = NULL;
+ FAIL("%s is larger than 255.", yytext);
+ return SCAN_FAILED;
+ }
+ g_string_append_c(yyextra->quoted_string, (char) result);
+ }
+}
+
+<DQUOTE>\\x[[:xdigit:]]{1,2} {
+ /* hex sequence */
+ /*
+ * C standard does not place a limit on the number of hex
+ * digits after \x... but we do. \xNN can have 1 or two Ns, not more.
+ */
+ update_string_loc(yyextra, yytext);
+ if (yyextra->raw_string) {
+ g_string_append(yyextra->quoted_string, yytext);
+ }
+ else {
+ unsigned long result;
+ result = strtoul(yytext + 2, NULL, 16);
+ g_string_append_c(yyextra->quoted_string, (char) result);
+ }
+}
+
+<DQUOTE>\\u[[:xdigit:]]{0,4} {
+ /* universal character name */
+ update_string_loc(yyextra, yytext);
+ if (yyextra->raw_string) {
+ g_string_append(yyextra->quoted_string, yytext);
+ }
+ else if (!append_universal_character_name(yyextra, yyextra->quoted_string, yytext)) {
+ g_string_free(yyextra->quoted_string, true);
+ yyextra->quoted_string = NULL;
+ return SCAN_FAILED;
+ }
+}
+
+<DQUOTE>\\U[[:xdigit:]]{0,8} {
+ /* universal character name */
+ update_string_loc(yyextra, yytext);
+ if (yyextra->raw_string) {
+ g_string_append(yyextra->quoted_string, yytext);
+ }
+ else if (!append_universal_character_name(yyextra, yyextra->quoted_string, yytext)) {
+ g_string_free(yyextra->quoted_string, true);
+ yyextra->quoted_string = NULL;
+ return SCAN_FAILED;
+ }
+}
+
+
+<DQUOTE>\\. {
+ /* escaped character */
+ update_string_loc(yyextra, yytext);
+ if (yyextra->raw_string) {
+ g_string_append(yyextra->quoted_string, yytext);
+ }
+ else if (!append_escaped_char(yyextra, yyextra->quoted_string, yytext[1])) {
+ g_string_free(yyextra->quoted_string, true);
+ yyextra->quoted_string = NULL;
+ return SCAN_FAILED;
+ }
+}
+
+<DQUOTE>[^\\\042]+ {
+ /* non-escaped string */
+ update_string_loc(yyextra, yytext);
+ g_string_append(yyextra->quoted_string, yytext);
+}
+
+
+\047 {
+ /* start quote of a quoted character value */
+ BEGIN(SQUOTE);
+ update_location(yyextra, yytext);
+ yyextra->string_loc = yyextra->location;
+
+ yyextra->quoted_string = g_string_new("'");
+}
+
+<SQUOTE><<EOF>> {
+ /* unterminated character value */
+ update_string_loc(yyextra, yytext);
+ g_string_free(yyextra->quoted_string, true);
+ yyextra->quoted_string = NULL;
+ FAIL("The final quote was missing from a character constant.");
+ return SCAN_FAILED;
+}
+
+<SQUOTE>\047 {
+ /* end quote */
+ BEGIN(INITIAL);
+ update_string_loc(yyextra, yytext);
+ g_string_append_c(yyextra->quoted_string, '\'');
+ int token = set_lval_charconst(yyextra, yyextra->quoted_string);
+ yyextra->quoted_string = NULL;
+ yyextra->string_loc.col_start = -1;
+ return token;
+}
+
+<SQUOTE>\\. {
+ /* escaped character */
+ update_string_loc(yyextra, yytext);
+ g_string_append(yyextra->quoted_string, yytext);
+}
+
+<SQUOTE>[^\\\047]+ {
+ /* non-escaped string */
+ update_string_loc(yyextra, yytext);
+ g_string_append(yyextra->quoted_string, yytext);
+}
+
+ /* NOTE: None of the patterns below can match ".." anywhere in the token string. */
+
+ /* MAC address. */
+
+{ColonMacAddress}|{HyphenMacAddress} {
+ /* MAC Address. */
+ update_location(yyextra, yytext);
+ return set_lval_literal(yyextra, yytext, yytext);
+}
+
+{DotMacAddress}|{DotQuadMacAddress} {
+ /* MAC Address, can also be a field. */
+ update_location(yyextra, yytext);
+ return set_lval_unparsed(yyextra, yytext, yytext);
+}
+
+ /* IP address. */
+
+{IPv4Address}{V4CidrPrefix}? {
+ /* IPv4 with or without prefix. */
+ update_location(yyextra, yytext);
+ return set_lval_literal(yyextra, yytext, yytext);
+}
+
+{IPv6Address}{V6CidrPrefix}? {
+ /* IPv6 with or without prefix. */
+ update_location(yyextra, yytext);
+ return set_lval_literal(yyextra, yytext, yytext);
+}
+
+ /* Integer */
+
+[[:digit:]][[:digit:]]* {
+ /* Numeric or field. */
+ update_location(yyextra, yytext);
+ /* Check if we have a protocol or protocol field, otherwise assume a literal. */
+ /* It is only reasonable to assume a literal here, instead of a
+ * (possibly non-existant) protocol field, because protocol field filter names
+ * should not start with a digit (the lexical syntax for numbers). */
+ header_field_info *hfinfo = dfilter_resolve_unparsed(yyextra, yytext);
+ if (hfinfo != NULL) {
+ return set_lval_field(yyextra, hfinfo, yytext);
+ }
+ return set_lval_literal(yyextra, yytext, yytext);
+}
+
+0[bBxX]?[[:xdigit:]]+ {
+ /* Binary or octal or hexadecimal. */
+ update_location(yyextra, yytext);
+ return set_lval_literal(yyextra, yytext, yytext);
+}
+
+ /* Floating point. */
+
+[[:digit:]]+{Exponent}|[[:digit:]]+\.[[:digit:]]+{Exponent}? {
+ /* Decimal float with optional exponent. */
+ /* Significand cannot have any side omitted. */
+ update_location(yyextra, yytext);
+ /* Check if we have a protocol or protocol field, otherwise assume a literal. */
+ /* It is only reasonable to assume a literal here, instead of a
+ * (possibly non-existant) protocol field, because protocol field filter names
+ * should not start with a digit (the lexical syntax for numbers). */
+ header_field_info *hfinfo = dfilter_resolve_unparsed(yyextra, yytext);
+ if (hfinfo != NULL) {
+ return set_lval_field(yyextra, hfinfo, yytext);
+ }
+ return set_lval_literal(yyextra, yytext, yytext);
+}
+
+0[xX][[:xdigit:]]+{HexExponent}|0[xX][[:xdigit:]]+\.[[:xdigit:]]+{HexExponent}? {
+ /* Hexadecimal float with optional exponent. Can't be a field because
+ * field cannot beging with 0x. */
+ /* Significand cannot have any side omitted. */
+ update_location(yyextra, yytext);
+ return set_lval_literal(yyextra, yytext, yytext);
+}
+
+:[[:xdigit:]]+ {
+ /* Numeric prefixed with ':'. */
+ update_location(yyextra, yytext);
+ return set_lval_literal(yyextra, yytext + 1, yytext);
+}
+
+[[:xdigit:]]+ {
+ /* Numeric or field. */
+ update_location(yyextra, yytext);
+ return set_lval_unparsed(yyextra, yytext, yytext);
+}
+
+ /* Bytes. */
+
+:?{ColonBytes} {
+ /* Bytes. */
+ update_location(yyextra, yytext);
+ if (yytext[0] == ':')
+ return set_lval_literal(yyextra, yytext + 1, yytext);
+ return set_lval_literal(yyextra, yytext, yytext);
+}
+
+:?{HyphenBytes} {
+ /* Bytes. */
+ update_location(yyextra, yytext);
+ if (yytext[0] == ':')
+ return set_lval_literal(yyextra, yytext + 1, yytext);
+ return set_lval_literal(yyextra, yytext, yytext);
+}
+
+:?{DotBytes} {
+ /* DotBytes, can be a field without ':' prefix. */
+ update_location(yyextra, yytext);
+ if (yytext[0] == ':')
+ return set_lval_literal(yyextra, yytext + 1, yytext);
+ return set_lval_unparsed(yyextra, yytext, yytext);
+}
+
+ /* Identifier (protocol/field/function name). */
+
+ /* This must come before FieldIdentifier to match function names. */
+{FunctionIdentifier} {
+ /* Identifier (field or function) or constant (bytes without separator). */
+ /* We use CONSTANT instead of LITERAL because the difference is significant
+ * in the syntactical grammar. */
+ update_location(yyextra, yytext);
+ header_field_info *hfinfo = dfilter_resolve_unparsed(yyextra, yytext);
+ if (hfinfo != NULL) {
+ return set_lval_identifier(yyextra, yytext, yytext);
+ }
+ df_func_def_t *def = df_func_lookup(yytext);
+ if (def != NULL) {
+ return set_lval_identifier(yyextra, yytext, yytext);
+ }
+ return set_lval_constant(yyextra, yytext, yytext);
+}
+
+\.{ProtoFieldIdentifier} {
+ /* Identifier, prefixed with a '.'. */
+ update_location(yyextra, yytext);
+ const char *name = yytext + 1;
+ header_field_info *hfinfo = dfilter_resolve_unparsed(yyextra, name);
+ if (hfinfo == NULL) {
+ FAIL("\"%s\" is not a valid protocol or protocol field.", name);
+ return SCAN_FAILED;
+ }
+ return set_lval_field(yyextra, hfinfo, yytext);
+}
+
+{ProtoFieldIdentifier} {
+ /* Catch-all for protocol values. Can also be a literal. */
+ update_location(yyextra, yytext);
+ return set_lval_identifier(yyextra, yytext, yytext);
+}
+
+{LiteralValue} {
+ /* Catch-all for semantic values. */
+ update_location(yyextra, yytext);
+ /* We use literal here because identifiers (using unparsed) should have
+ * matched one of the previous rules. */
+ return set_lval_literal(yyextra, yytext, yytext);
+}
+
+. {
+ /* Default */
+ update_location(yyextra, yytext);
+ if (isprint_string(yytext))
+ FAIL("\"%s\" was unexpected in this context.", yytext);
+ else
+ FAIL("Non-printable ASCII characters may only appear inside double-quotes.");
+ return SCAN_FAILED;
+}
+
+%%
+
+/*
+ * Turn diagnostics back on, so we check the code that we've written.
+ */
+DIAG_ON_FLEX()
+
+static void
+_update_location(dfsyntax_t *dfs, size_t len)
+{
+ dfs->location.col_start += (long)dfs->location.col_len;
+ dfs->location.col_len = len;
+}
+
+static void
+update_location(dfsyntax_t *dfs, const char *text)
+{
+ _update_location(dfs, strlen(text));
+}
+
+static void
+update_string_loc(dfsyntax_t *dfs, const char *text)
+{
+ size_t len = strlen(text);
+ dfs->string_loc.col_len += len;
+ _update_location(dfs, len);
+}
+
+static int
+set_lval_simple(dfsyntax_t *dfs, int token, const char *token_value, sttype_id_t type_id)
+{
+ dfs->lval = stnode_new(type_id, NULL, g_strdup(token_value), dfs->location);
+ return token;
+}
+
+static int
+set_lval_literal(dfsyntax_t *dfs, const char *value, const char *token_value)
+{
+ dfs->lval = stnode_new(STTYPE_LITERAL, g_strdup(value), g_strdup(token_value), dfs->location);
+ return TOKEN_LITERAL;
+}
+
+static int
+set_lval_identifier(dfsyntax_t *dfs, const char *value, const char *token_value)
+{
+ dfs->lval = stnode_new(STTYPE_LITERAL, g_strdup(value), g_strdup(token_value), dfs->location);
+ return TOKEN_IDENTIFIER;
+}
+
+static int
+set_lval_constant(dfsyntax_t *dfs, const char *value, const char *token_value)
+{
+ dfs->lval = stnode_new(STTYPE_LITERAL, g_strdup(value), g_strdup(token_value), dfs->location);
+ return TOKEN_CONSTANT;
+}
+
+static int
+set_lval_unparsed(dfsyntax_t *dfs, const char *value, const char *token_value)
+{
+ int token;
+ const header_field_info *hfinfo = dfilter_resolve_unparsed(dfs, value);
+ if (hfinfo != NULL) {
+ token = set_lval_field(dfs, hfinfo, token_value);
+ }
+ else {
+ token = set_lval_literal(dfs, value, token_value);
+ }
+ stnode_set_flags(dfs->lval, STFLAG_UNPARSED);
+ return token;
+}
+
+static int
+set_lval_field(dfsyntax_t *dfs, const header_field_info *hfinfo, const char *token_value)
+{
+ dfs->lval = stnode_new(STTYPE_FIELD, (void *)hfinfo, g_strdup(token_value), dfs->location);
+ return TOKEN_FIELD;
+}
+
+static int
+set_lval_quoted_string(dfsyntax_t *dfs, GString *quoted_string)
+{
+ char *token_value;
+
+ token_value = ws_escape_string_len(NULL, quoted_string->str, quoted_string->len, true);
+ dfs->lval = stnode_new(STTYPE_STRING, quoted_string, token_value, dfs->string_loc);
+ return TOKEN_STRING;
+}
+
+static int
+set_lval_charconst(dfsyntax_t *dfs, GString *quoted_string)
+{
+ unsigned long number;
+ bool ok;
+
+ char *token_value = g_string_free(quoted_string, false);
+ ok = parse_charconst(dfs, token_value, &number);
+ if (!ok) {
+ g_free(token_value);
+ return SCAN_FAILED;
+ }
+ dfs->lval = stnode_new(STTYPE_CHARCONST, g_memdup2(&number, sizeof(number)), token_value, dfs->string_loc);
+ return TOKEN_CHARCONST;
+}
+
+static bool
+append_escaped_char(dfsyntax_t *dfs, GString *str, char c)
+{
+ switch (c) {
+ case 'a':
+ c = '\a';
+ break;
+ case 'b':
+ c = '\b';
+ break;
+ case 'f':
+ c = '\f';
+ break;
+ case 'n':
+ c = '\n';
+ break;
+ case 'r':
+ c = '\r';
+ break;
+ case 't':
+ c = '\t';
+ break;
+ case 'v':
+ c = '\v';
+ break;
+ case '\\':
+ case '\'':
+ case '\"':
+ break;
+ default:
+ dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->location,
+ "\\%c is not a valid character escape sequence", c);
+ return false;
+ }
+
+ g_string_append_c(str, c);
+ return true;
+}
+
+static bool
+parse_universal_character_name(dfsyntax_t *dfs _U_, const char *str, char **ret_endptr, gunichar *valuep)
+{
+ uint64_t val;
+ char *endptr;
+ int ndigits;
+
+ if (str[0] != '\\')
+ return false;
+
+ if (str[1] == 'u')
+ ndigits = 4;
+ else if (str[1] == 'U')
+ ndigits = 8;
+ else
+ return false;
+
+ for (int i = 2; i < ndigits + 2; i++) {
+ if (!g_ascii_isxdigit(str[i])) {
+ return false;
+ }
+ }
+
+ errno = 0;
+ val = g_ascii_strtoull(str + 2, &endptr, 16); /* skip leading 'u' or 'U' */
+
+ if (errno != 0 || endptr == str || val > UINT32_MAX) {
+ return false;
+ }
+
+ /*
+ * Ref: https://en.cppreference.com/w/c/language/escape
+ * Range of universal character names
+ *
+ * If a universal character name corresponds to a code point that is
+ * not 0x24 ($), 0x40 (@), nor 0x60 (`) and less than 0xA0, or a
+ * surrogate code point (the range 0xD800-0xDFFF, inclusive), or
+ * greater than 0x10FFFF, i.e. not a Unicode code point (since C23),
+ * the program is ill-formed. In other words, members of basic source
+ * character set and control characters (in ranges 0x0-0x1F and
+ * 0x7F-0x9F) cannot be expressed in universal character names.
+ */
+ if (val < 0xA0 && val != 0x24 && val != 0x40 && val != 0x60)
+ return false;
+ else if (val >= 0xD800 && val <= 0xDFFF)
+ return false;
+ else if (val > 0x10FFFF)
+ return false;
+
+ *valuep = (gunichar)val;
+ if (ret_endptr)
+ *ret_endptr = endptr;
+ return true;
+}
+
+static bool
+append_universal_character_name(dfsyntax_t *dfs, GString *str, const char *ucn)
+{
+ gunichar val;
+
+ if (!parse_universal_character_name(dfs, ucn, NULL, &val)) {
+ dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->location, "%s is not a valid universal character name", ucn);
+ return false;
+ }
+
+ g_string_append_unichar(str, val);
+ return true;
+}
+
+static bool
+parse_charconst(dfsyntax_t *dfs, const char *s, unsigned long *valuep)
+{
+ const char *cp;
+ unsigned long value;
+ gunichar unival;
+ char *endptr;
+
+ cp = s + 1; /* skip the leading ' */
+ if (*cp == '\'') {
+ dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "Empty character constant.");
+ return false;
+ }
+
+ if (*cp == '\\') {
+ /*
+ * C escape sequence.
+ * An escape sequence is an octal number \NNN,
+ * an hex number \xNN, or one of \' \" \\ \a \b \f \n \r \t \v
+ * that stands for the byte value of the equivalent
+ * C-escape in ASCII encoding.
+ */
+ cp++;
+ switch (*cp) {
+
+ case '\0':
+ dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s);
+ return false;
+
+ case 'a':
+ value = '\a';
+ cp++;
+ break;
+
+ case 'b':
+ value = '\b';
+ cp++;
+ break;
+
+ case 'f':
+ value = '\f';
+ cp++;
+ break;
+
+ case 'n':
+ value = '\n';
+ break;
+
+ case 'r':
+ value = '\r';
+ cp++;
+ break;
+
+ case 't':
+ value = '\t';
+ cp++;
+ break;
+
+ case 'v':
+ value = '\v';
+ cp++;
+ break;
+
+ case '\'':
+ value = '\'';
+ cp++;
+ break;
+
+ case '\\':
+ value = '\\';
+ cp++;
+ break;
+
+ case '"':
+ value = '"';
+ cp++;
+ break;
+
+ case 'x':
+ cp++;
+ if (*cp >= '0' && *cp <= '9')
+ value = *cp - '0';
+ else if (*cp >= 'A' && *cp <= 'F')
+ value = 10 + (*cp - 'A');
+ else if (*cp >= 'a' && *cp <= 'f')
+ value = 10 + (*cp - 'a');
+ else {
+ dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s);
+ return false;
+ }
+ cp++;
+ if (*cp != '\'') {
+ value <<= 4;
+ if (*cp >= '0' && *cp <= '9')
+ value |= *cp - '0';
+ else if (*cp >= 'A' && *cp <= 'F')
+ value |= 10 + (*cp - 'A');
+ else if (*cp >= 'a' && *cp <= 'f')
+ value |= 10 + (*cp - 'a');
+ else {
+ dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s);
+ return false;
+ }
+ }
+ cp++;
+ break;
+
+ case 'u':
+ case 'U':
+ if (!parse_universal_character_name(dfs, s+1, &endptr, &unival)) {
+ dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s is not a valid universal character name", s);
+ return false;
+ }
+ value = (unsigned long)unival;
+ cp = endptr;
+ break;
+
+ default:
+ /* Octal */
+ if (*cp >= '0' && *cp <= '7')
+ value = *cp - '0';
+ else {
+ dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s);
+ return false;
+ }
+ if (*(cp + 1) != '\'') {
+ cp++;
+ value <<= 3;
+ if (*cp >= '0' && *cp <= '7')
+ value |= *cp - '0';
+ else {
+ dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s);
+ return false;
+ }
+ if (*(cp + 1) != '\'') {
+ cp++;
+ value <<= 3;
+ if (*cp >= '0' && *cp <= '7')
+ value |= *cp - '0';
+ else {
+ dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s);
+ return false;
+ }
+ }
+ }
+ if (value > 0xFF) {
+ dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s is too large to be a valid character constant.", s);
+ return false;
+ }
+ cp++;
+ }
+ } else {
+ value = *cp++;
+ if (!g_ascii_isprint(value)) {
+ dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "Non-printable value '0x%02lx' in character constant.", value);
+ return false;
+ }
+ }
+
+ if ((*cp != '\'') || (*(cp + 1) != '\0')){
+ dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s is too long to be a valid character constant.", s);
+ return false;
+ }
+
+ *valuep = value;
+ return true;
+}
diff --git a/epan/dfilter/semcheck.c b/epan/dfilter/semcheck.c
new file mode 100644
index 0000000..fec84a9
--- /dev/null
+++ b/epan/dfilter/semcheck.c
@@ -0,0 +1,1599 @@
+/*
+ * Wireshark - Network traffic analyzer
+ * By Gerald Combs <gerald@wireshark.org>
+ * Copyright 2001 Gerald Combs
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "config.h"
+
+#define WS_LOG_DOMAIN LOG_DOMAIN_DFILTER
+
+#include <string.h>
+
+#include "dfilter-int.h"
+#include "semcheck.h"
+#include "syntax-tree.h"
+#include "sttype-field.h"
+#include "sttype-slice.h"
+#include "sttype-op.h"
+#include "sttype-set.h"
+#include "sttype-function.h"
+#include "sttype-pointer.h"
+
+#include <epan/exceptions.h>
+#include <epan/packet.h>
+
+#include <wsutil/ws_assert.h>
+#include <wsutil/wslog.h>
+
+#include <ftypes/ftypes.h>
+
+
+#define FAIL(dfw, node, ...) \
+ do { \
+ ws_noisy("Semantic check failed here."); \
+ dfilter_fail_throw(dfw, DF_ERROR_GENERIC, stnode_location(node), __VA_ARGS__); \
+ } while (0)
+
+#define FAIL_HERE(dfw) \
+ do { \
+ ws_noisy("Semantic check failed here."); \
+ THROW(TypeError); \
+ } while (0)
+
+typedef bool (*FtypeCanFunc)(enum ftenum);
+
+static ftenum_t
+check_arithmetic_LHS(dfwork_t *dfw, stnode_op_t st_op,
+ stnode_t *st_node, stnode_t *st_arg1, stnode_t *st_arg2,
+ ftenum_t lhs_ftype);
+
+static void
+check_relation(dfwork_t *dfw, stnode_op_t st_op,
+ FtypeCanFunc can_func, bool allow_partial_value,
+ stnode_t *st_node, stnode_t *st_arg1, stnode_t *st_arg2);
+
+static void
+semcheck(dfwork_t *dfw, stnode_t *st_node);
+
+static fvalue_t *
+mk_fvalue_from_val_string(dfwork_t *dfw, header_field_info *hfinfo, const char *s,
+ df_loc_t loc);
+
+/* Compares to ftenum_t's and decides if they're
+ * compatible or not (if they're the same basic type) */
+bool
+compatible_ftypes(ftenum_t a, ftenum_t b)
+{
+ switch (a) {
+ case FT_NONE:
+ case FT_BOOLEAN:
+ case FT_PROTOCOL:
+ case FT_ABSOLUTE_TIME:
+ case FT_RELATIVE_TIME:
+ case FT_IEEE_11073_SFLOAT:
+ case FT_IEEE_11073_FLOAT:
+ case FT_IPv4:
+ case FT_IPv6:
+ return a == b;
+
+ case FT_FLOAT: /* XXX - should be able to compare with INT */
+ case FT_DOUBLE: /* XXX - should be able to compare with INT */
+ switch (b) {
+ case FT_FLOAT:
+ case FT_DOUBLE:
+ return true;
+ default:
+ return false;
+ }
+
+ case FT_ETHER:
+ case FT_BYTES:
+ case FT_UINT_BYTES:
+ case FT_GUID:
+ case FT_OID:
+ case FT_AX25:
+ case FT_VINES:
+ case FT_FCWWN:
+ case FT_REL_OID:
+ case FT_SYSTEM_ID:
+
+ return (b == FT_ETHER || b == FT_BYTES || b == FT_UINT_BYTES || b == FT_GUID || b == FT_OID || b == FT_AX25 || b == FT_VINES || b == FT_FCWWN || b == FT_REL_OID || b == FT_SYSTEM_ID);
+
+ case FT_UINT8:
+ case FT_UINT16:
+ case FT_UINT24:
+ case FT_UINT32:
+ case FT_CHAR:
+ case FT_FRAMENUM:
+ case FT_IPXNET:
+ return ftype_can_val_to_uinteger(b);
+
+ case FT_UINT40:
+ case FT_UINT48:
+ case FT_UINT56:
+ case FT_UINT64:
+ case FT_EUI64:
+ return ftype_can_val_to_uinteger64(b);
+
+ case FT_INT8:
+ case FT_INT16:
+ case FT_INT24:
+ case FT_INT32:
+ return ftype_can_val_to_sinteger(b);
+
+ case FT_INT40:
+ case FT_INT48:
+ case FT_INT56:
+ case FT_INT64:
+ return ftype_can_val_to_sinteger64(b);
+
+ case FT_STRING:
+ case FT_STRINGZ:
+ case FT_UINT_STRING:
+ case FT_STRINGZPAD:
+ case FT_STRINGZTRUNC:
+ switch (b) {
+ case FT_STRING:
+ case FT_STRINGZ:
+ case FT_UINT_STRING:
+ case FT_STRINGZPAD:
+ case FT_STRINGZTRUNC:
+ return true;
+ default:
+ return false;
+ }
+
+ case FT_NUM_TYPES:
+ ws_assert_not_reached();
+ }
+
+ ws_assert_not_reached();
+ return false;
+}
+
+/* Don't set the error message if it's already set. */
+#define SET_ERROR(dfw, str) \
+ do { \
+ if ((str) != NULL && (dfw)->error == NULL) { \
+ (dfw)->error = df_error_new(DF_ERROR_GENERIC, str, NULL); \
+ } \
+ else { \
+ g_free(str); \
+ } \
+ } while (0)
+
+/* Gets an fvalue from a string, and sets the error message on failure. */
+WS_RETNONNULL
+fvalue_t*
+dfilter_fvalue_from_literal(dfwork_t *dfw, ftenum_t ftype, stnode_t *st,
+ bool allow_partial_value, header_field_info *hfinfo_value_string)
+{
+ fvalue_t *fv;
+ const char *s = stnode_data(st);
+ char *error_message = NULL;
+
+ fv = fvalue_from_literal(ftype, s, allow_partial_value, &error_message);
+ SET_ERROR(dfw, error_message);
+
+ if (fv == NULL && hfinfo_value_string) {
+ /* check value_string */
+ fv = mk_fvalue_from_val_string(dfw, hfinfo_value_string, s, stnode_location(st));
+ /*
+ * Ignore previous errors if this can be mapped
+ * to an item from value_string.
+ */
+ if (fv) {
+ df_error_free(&dfw->error);
+ add_compile_warning(dfw, "Interpreting the symbol \u2039%s\u203A as a %s value string. "
+ "Writing value strings without double quotes is deprecated. "
+ "Please use \"%s\" instead",
+ s, ftype_pretty_name(hfinfo_value_string->type), s);
+ }
+ }
+ if (fv == NULL) {
+ dfw_set_error_location(dfw, stnode_location(st));
+ FAIL_HERE(dfw);
+ }
+
+ return fv;
+}
+
+/* Gets an fvalue from a string, and sets the error message on failure. */
+WS_RETNONNULL
+fvalue_t *
+dfilter_fvalue_from_string(dfwork_t *dfw, ftenum_t ftype, stnode_t *st,
+ header_field_info *hfinfo_value_string)
+{
+ fvalue_t *fv;
+ const GString *gs = stnode_string(st);
+ char *error_message = NULL;
+
+ fv = fvalue_from_string(ftype, gs->str, gs->len, &error_message);
+ SET_ERROR(dfw, error_message);
+
+ if (fv == NULL && hfinfo_value_string) {
+ fv = mk_fvalue_from_val_string(dfw, hfinfo_value_string, gs->str, stnode_location(st));
+ /*
+ * Ignore previous errors if this can be mapped
+ * to an item from value_string.
+ */
+ if (fv) {
+ df_error_free(&dfw->error);
+ }
+ }
+ if (fv == NULL) {
+ dfw_set_error_location(dfw, stnode_location(st));
+ FAIL_HERE(dfw);
+ }
+
+ return fv;
+}
+
+/* Creates a FT_UINT32 fvalue with a given value. */
+static fvalue_t*
+mk_uint32_fvalue(uint32_t val)
+{
+ fvalue_t *fv;
+
+ fv = fvalue_new(FT_UINT32);
+ fvalue_set_uinteger(fv, val);
+
+ return fv;
+}
+
+/* Creates a FT_UINT64 fvalue with a given value. */
+static fvalue_t*
+mk_uint64_fvalue(uint64_t val)
+{
+ fvalue_t *fv;
+
+ fv = fvalue_new(FT_UINT64);
+ fvalue_set_uinteger64(fv, val);
+
+ return fv;
+}
+
+/* Creates a FT_BOOLEAN fvalue with a given value. */
+static fvalue_t*
+mk_boolean_fvalue(bool val)
+{
+ fvalue_t *fv;
+
+ fv = fvalue_new(FT_BOOLEAN);
+ fvalue_set_uinteger64(fv, val);
+
+ return fv;
+}
+
+/* Try to make an fvalue from a string using a value_string or true_false_string.
+ * This works only for ftypes that are integers. Returns the created fvalue_t*
+ * or NULL if impossible. */
+static fvalue_t*
+mk_fvalue_from_val_string(dfwork_t *dfw, header_field_info *hfinfo, const char *s,
+ df_loc_t loc)
+{
+ /* Early return? */
+ switch(hfinfo->type) {
+ case FT_NONE:
+ case FT_PROTOCOL:
+ case FT_FLOAT:
+ case FT_DOUBLE:
+ case FT_IEEE_11073_SFLOAT:
+ case FT_IEEE_11073_FLOAT:
+ case FT_ABSOLUTE_TIME:
+ case FT_RELATIVE_TIME:
+ case FT_IPv4:
+ case FT_IPv6:
+ case FT_IPXNET:
+ case FT_AX25:
+ case FT_VINES:
+ case FT_FCWWN:
+ case FT_ETHER:
+ case FT_BYTES:
+ case FT_UINT_BYTES:
+ case FT_STRING:
+ case FT_STRINGZ:
+ case FT_UINT_STRING:
+ case FT_STRINGZPAD:
+ case FT_STRINGZTRUNC:
+ case FT_EUI64:
+ case FT_GUID:
+ case FT_OID:
+ case FT_REL_OID:
+ case FT_SYSTEM_ID:
+ case FT_FRAMENUM: /* hfinfo->strings contains ft_framenum_type_t, not strings */
+ return NULL;
+
+ case FT_BOOLEAN:
+ case FT_CHAR:
+ case FT_UINT8:
+ case FT_UINT16:
+ case FT_UINT24:
+ case FT_UINT32:
+ case FT_UINT40:
+ case FT_UINT48:
+ case FT_UINT56:
+ case FT_UINT64:
+ case FT_INT8:
+ case FT_INT16:
+ case FT_INT24:
+ case FT_INT32:
+ case FT_INT40:
+ case FT_INT48:
+ case FT_INT56:
+ case FT_INT64:
+ break;
+
+ case FT_NUM_TYPES:
+ ws_assert_not_reached();
+ }
+
+ /* Do val_strings exist? */
+ if (!hfinfo->strings) {
+ dfilter_fail(dfw, DF_ERROR_GENERIC, loc, "%s cannot accept strings as values.",
+ hfinfo->abbrev);
+ return NULL;
+ }
+
+ /* Reset the error message, since *something* interesting will happen,
+ * and the error message will be more interesting than any error message
+ * I happen to have now. */
+ df_error_free(&dfw->error);
+
+ if (hfinfo->type == FT_BOOLEAN) {
+ const true_false_string *tf = (const true_false_string *)hfinfo->strings;
+
+ if (g_ascii_strcasecmp(s, tf->true_string) == 0) {
+ return mk_boolean_fvalue(true);
+ }
+ if (g_ascii_strcasecmp(s, tf->false_string) == 0) {
+ return mk_boolean_fvalue(false);
+ }
+ dfilter_fail(dfw, DF_ERROR_GENERIC, loc, "\"%s\" cannot be found among the possible values for %s.",
+ s, hfinfo->abbrev);
+ }
+ else if (hfinfo->display & BASE_RANGE_STRING) {
+ dfilter_fail(dfw, DF_ERROR_GENERIC, loc, "\"%s\" cannot accept [range] strings as values.",
+ hfinfo->abbrev);
+ }
+ else if (hfinfo->display & BASE_VAL64_STRING) {
+ const val64_string *vals = (const val64_string *)hfinfo->strings;
+
+ while (vals->strptr != NULL) {
+ if (g_ascii_strcasecmp(s, vals->strptr) == 0) {
+ return mk_uint64_fvalue(vals->value);
+ }
+ vals++;
+ }
+ dfilter_fail(dfw, DF_ERROR_GENERIC, loc, "\"%s\" cannot be found among the possible values for %s.",
+ s, hfinfo->abbrev);
+ }
+ else if (hfinfo->display == BASE_CUSTOM) {
+ /* If a user wants to match against a custom string, we would
+ * somehow have to have the integer value here to pass it in
+ * to the custom-display function. But we don't have an
+ * integer, we have the string they're trying to match.
+ * -><-
+ */
+ dfilter_fail(dfw, DF_ERROR_GENERIC, loc, "\"%s\" cannot accept [custom] strings as values.",
+ hfinfo->abbrev);
+ }
+ else {
+ const value_string *vals = (const value_string *)hfinfo->strings;
+ if (hfinfo->display & BASE_EXT_STRING)
+ vals = VALUE_STRING_EXT_VS_P((const value_string_ext *) vals);
+
+ while (vals->strptr != NULL) {
+ if (g_ascii_strcasecmp(s, vals->strptr) == 0) {
+ return mk_uint32_fvalue(vals->value);
+ }
+ vals++;
+ }
+ dfilter_fail(dfw, DF_ERROR_GENERIC, loc, "\"%s\" cannot be found among the possible values for %s.",
+ s, hfinfo->abbrev);
+ }
+ return NULL;
+}
+
+static bool
+is_bytes_type(enum ftenum type)
+{
+ switch(type) {
+ case FT_AX25:
+ case FT_VINES:
+ case FT_FCWWN:
+ case FT_ETHER:
+ case FT_BYTES:
+ case FT_UINT_BYTES:
+ case FT_IPv6:
+ case FT_GUID:
+ case FT_OID:
+ case FT_REL_OID:
+ case FT_SYSTEM_ID:
+ return true;
+
+ case FT_NONE:
+ case FT_PROTOCOL:
+ case FT_FLOAT:
+ case FT_DOUBLE:
+ case FT_IEEE_11073_SFLOAT:
+ case FT_IEEE_11073_FLOAT:
+ case FT_ABSOLUTE_TIME:
+ case FT_RELATIVE_TIME:
+ case FT_IPv4:
+ case FT_IPXNET:
+ case FT_STRING:
+ case FT_STRINGZ:
+ case FT_UINT_STRING:
+ case FT_STRINGZPAD:
+ case FT_STRINGZTRUNC:
+ case FT_BOOLEAN:
+ case FT_FRAMENUM:
+ case FT_CHAR:
+ case FT_UINT8:
+ case FT_UINT16:
+ case FT_UINT24:
+ case FT_UINT32:
+ case FT_UINT40:
+ case FT_UINT48:
+ case FT_UINT56:
+ case FT_UINT64:
+ case FT_INT8:
+ case FT_INT16:
+ case FT_INT24:
+ case FT_INT32:
+ case FT_INT40:
+ case FT_INT48:
+ case FT_INT56:
+ case FT_INT64:
+ case FT_EUI64:
+ return false;
+
+ case FT_NUM_TYPES:
+ ws_assert_not_reached();
+ }
+
+ ws_assert_not_reached();
+ return false;
+}
+
+/* Check the semantics of an existence test. */
+static void
+check_exists(dfwork_t *dfw, stnode_t *st_arg1)
+{
+ LOG_NODE(st_arg1);
+
+ switch (stnode_type_id(st_arg1)) {
+ case STTYPE_FIELD:
+ /* This is OK */
+ dfw->field_count++;
+ break;
+ case STTYPE_REFERENCE:
+ case STTYPE_STRING:
+ case STTYPE_LITERAL:
+ case STTYPE_CHARCONST:
+ FAIL(dfw, st_arg1, "\"%s\" is neither a field nor a protocol name.",
+ stnode_todisplay(st_arg1));
+ break;
+
+ case STTYPE_FUNCTION:
+ /* XXX - Maybe we should change functions so they can return fields,
+ * in which case the 'exist' should be fine. */
+ FAIL(dfw, st_arg1, "You cannot test whether a function is present.");
+ break;
+
+ case STTYPE_SET:
+ case STTYPE_UNINITIALIZED:
+ case STTYPE_NUM_TYPES:
+ case STTYPE_TEST:
+ case STTYPE_FVALUE:
+ case STTYPE_PCRE:
+ case STTYPE_ARITHMETIC:
+ case STTYPE_SLICE:
+ ws_assert_not_reached();
+ }
+}
+
+ftenum_t
+check_slice(dfwork_t *dfw, stnode_t *st, ftenum_t lhs_ftype)
+{
+ stnode_t *entity1;
+ header_field_info *hfinfo1;
+ ftenum_t ftype1;
+
+ LOG_NODE(st);
+
+ entity1 = sttype_slice_entity(st);
+ ws_assert(entity1);
+
+ if (stnode_type_id(entity1) == STTYPE_FIELD) {
+ dfw->field_count++;
+ hfinfo1 = sttype_field_hfinfo(entity1);
+ ftype1 = sttype_field_ftenum(entity1);
+
+ if (!ftype_can_slice(ftype1)) {
+ FAIL(dfw, entity1, "\"%s\" is a %s and cannot be sliced into a sequence of bytes.",
+ hfinfo1->abbrev, ftype_pretty_name(ftype1));
+ }
+ } else if (stnode_type_id(entity1) == STTYPE_FUNCTION) {
+ ftype1 = check_function(dfw, entity1, lhs_ftype);
+
+ if (!ftype_can_slice(ftype1)) {
+ FAIL(dfw, entity1, "Return value of function \"%s\" is a %s and cannot be converted into a sequence of bytes.",
+ sttype_function_name(entity1), ftype_pretty_name(ftype1));
+ }
+ } else if (stnode_type_id(entity1) == STTYPE_SLICE) {
+ ftype1 = check_slice(dfw, entity1, lhs_ftype);
+ } else {
+ FAIL(dfw, entity1, "Range is not supported for entity %s",
+ stnode_todisplay(entity1));
+ }
+
+ return FT_IS_STRING(ftype1) ? FT_STRING : FT_BYTES;
+}
+
+#define IS_FIELD_ENTITY(ft) \
+ ((ft) == STTYPE_FIELD || \
+ (ft) == STTYPE_REFERENCE)
+
+static void
+convert_to_bytes(stnode_t *arg)
+{
+ stnode_t *entity1;
+ drange_node *rn;
+
+ entity1 = stnode_dup(arg);
+ rn = drange_node_new();
+ drange_node_set_start_offset(rn, 0);
+ drange_node_set_to_the_end(rn);
+
+ stnode_replace(arg, STTYPE_SLICE, NULL);
+ sttype_slice_set1(arg, entity1, rn);
+}
+
+ftenum_t
+check_function(dfwork_t *dfw, stnode_t *st_node, ftenum_t lhs_ftype)
+{
+ df_func_def_t *funcdef;
+ GSList *params;
+ unsigned nparams;
+
+ LOG_NODE(st_node);
+
+ funcdef = sttype_function_funcdef(st_node);
+ params = sttype_function_params(st_node);
+ nparams = g_slist_length(params);
+
+ if (nparams < funcdef->min_nargs) {
+ FAIL(dfw, st_node, "Function %s needs at least %u arguments.",
+ funcdef->name, funcdef->min_nargs);
+ } else if (funcdef->max_nargs > 0 && nparams > funcdef->max_nargs) {
+ FAIL(dfw, st_node, "Function %s can only accept %u arguments.",
+ funcdef->name, funcdef->max_nargs);
+ }
+
+ return funcdef->semcheck_param_function(dfw, funcdef->name, lhs_ftype, params,
+ stnode_location(st_node));
+}
+
+WS_RETNONNULL
+fvalue_t *
+dfilter_fvalue_from_charconst(dfwork_t *dfw, ftenum_t ftype, stnode_t *st)
+{
+ fvalue_t *fvalue;
+ unsigned long *nump = stnode_data(st);
+ char *error_message = NULL;
+
+ fvalue = fvalue_from_charconst(ftype, *nump, &error_message);
+ SET_ERROR(dfw, error_message);
+
+ if (fvalue == NULL) {
+ dfw_set_error_location(dfw, stnode_location(st));
+ FAIL_HERE(dfw);
+ }
+
+ return fvalue;
+}
+
+/* If the LHS of a relation test is a FIELD, run some checks
+ * and possibly some modifications of syntax tree nodes. */
+static void
+check_relation_LHS_FIELD(dfwork_t *dfw, stnode_op_t st_op _U_,
+ FtypeCanFunc can_func, bool allow_partial_value,
+ stnode_t *st_node,
+ stnode_t *st_arg1, stnode_t *st_arg2)
+{
+ sttype_id_t type2;
+ header_field_info *hfinfo1;
+ ftenum_t ftype1, ftype2;
+ fvalue_t *fvalue;
+
+ LOG_NODE(st_node);
+
+ if (stnode_type_id(st_arg1) == STTYPE_FIELD)
+ dfw->field_count++;
+
+ hfinfo1 = sttype_field_hfinfo(st_arg1);
+ ftype1 = sttype_field_ftenum(st_arg1);
+ if (!can_func(ftype1)) {
+ FAIL(dfw, st_arg1, "%s (type=%s) cannot participate in %s comparison.",
+ hfinfo1->abbrev, ftype_pretty_name(ftype1),
+ stnode_todisplay(st_node));
+ }
+
+ type2 = stnode_type_id(st_arg2);
+
+ if (IS_FIELD_ENTITY(type2)) {
+ ftype2 = sttype_field_ftenum(st_arg2);
+
+ if (!compatible_ftypes(ftype1, ftype2)) {
+ FAIL(dfw, st_arg2, "%s and %s are not of compatible types.",
+ stnode_todisplay(st_arg1), stnode_todisplay(st_arg2));
+ }
+ /* Do this check even though you'd think that if
+ * they're compatible, then can_func() would pass. */
+ if (!can_func(ftype2)) {
+ FAIL(dfw, st_arg2, "%s (type=%s) cannot participate in specified comparison.",
+ stnode_todisplay(st_arg2), ftype_pretty_name(ftype2));
+ }
+ if (type2 == STTYPE_FIELD) {
+ dfw->field_count++;
+ }
+ }
+ else if (type2 == STTYPE_STRING || type2 == STTYPE_LITERAL) {
+ /* Skip incompatible fields */
+ while (hfinfo1->same_name_prev_id != -1 &&
+ ((type2 == STTYPE_STRING && ftype1 != FT_STRING && ftype1!= FT_STRINGZ) ||
+ (type2 != STTYPE_STRING && (ftype1 == FT_STRING || ftype1== FT_STRINGZ)))) {
+ hfinfo1 = proto_registrar_get_nth(hfinfo1->same_name_prev_id);
+ ftype1 = hfinfo1->type;
+ }
+
+ if (type2 == STTYPE_STRING) {
+ fvalue = dfilter_fvalue_from_string(dfw, ftype1, st_arg2, hfinfo1);
+ }
+ else {
+ fvalue = dfilter_fvalue_from_literal(dfw, ftype1, st_arg2, allow_partial_value, hfinfo1);
+ }
+ stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
+ }
+ else if (type2 == STTYPE_CHARCONST) {
+ fvalue = dfilter_fvalue_from_charconst(dfw, ftype1, st_arg2);
+ stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
+ }
+ else if (type2 == STTYPE_SLICE) {
+ ftype2 = check_slice(dfw, st_arg2, ftype1);
+
+ if (!compatible_ftypes(ftype1, ftype2)) {
+ FAIL(dfw, st_arg2, "%s and %s are not of compatible types.",
+ stnode_todisplay(st_arg1), stnode_todisplay(st_arg2));
+ }
+ if (!can_func(ftype2)) {
+ FAIL(dfw, st_arg2, "%s (type=%s) cannot participate in specified comparison.",
+ stnode_todisplay(st_arg2), ftype_pretty_name(ftype2));
+ }
+
+ if (!is_bytes_type(ftype1)) {
+ if (!ftype_can_slice(ftype1)) {
+ FAIL(dfw, st_arg1, "\"%s\" is a %s and cannot be converted into a sequence of bytes.",
+ hfinfo1->abbrev,
+ ftype_pretty_name(ftype1));
+ }
+
+ /* Convert entire field to bytes */
+ convert_to_bytes(st_arg1);
+ }
+ }
+ else if (type2 == STTYPE_FUNCTION) {
+ ftype2 = check_function(dfw, st_arg2, ftype1);
+
+ if (!compatible_ftypes(ftype1, ftype2)) {
+ FAIL(dfw, st_arg2, "%s (type=%s) and return value of %s() (type=%s) are not of compatible types.",
+ hfinfo1->abbrev, ftype_pretty_name(ftype1),
+ sttype_function_name(st_arg2), ftype_pretty_name(ftype2));
+ }
+
+ if (!can_func(ftype2)) {
+ FAIL(dfw, st_arg2, "return value of %s() (type=%s) cannot participate in specified comparison.",
+ sttype_function_name(st_arg2), ftype_pretty_name(ftype2));
+ }
+ }
+ else if (type2 == STTYPE_PCRE) {
+ ws_assert(st_op == STNODE_OP_MATCHES);
+ }
+ else if (type2 == STTYPE_ARITHMETIC) {
+ ftype2 = check_arithmetic(dfw, st_arg2, ftype1);
+
+ if (!compatible_ftypes(ftype1, ftype2)) {
+ FAIL(dfw, st_arg2, "%s and %s are not of compatible types.",
+ stnode_todisplay(st_arg1), stnode_todisplay(st_arg2));
+ }
+
+ if (!can_func(ftype2)) {
+ FAIL(dfw, st_arg2, "%s (type=%s) cannot participate in specified comparison.",
+ stnode_todisplay(st_arg2), ftype_pretty_name(ftype2));
+ }
+ }
+ else {
+ ws_assert_not_reached();
+ }
+}
+
+static void
+check_relation_LHS_FVALUE(dfwork_t *dfw, stnode_op_t st_op _U_,
+ FtypeCanFunc can_func, bool allow_partial_value,
+ stnode_t *st_node,
+ stnode_t *st_arg1, stnode_t *st_arg2)
+{
+ sttype_id_t type1, type2;
+ header_field_info *hfinfo2 = NULL;
+ ftenum_t ftype2;
+ fvalue_t *fvalue;
+
+ LOG_NODE(st_node);
+
+ type2 = stnode_type_id(st_arg2);
+
+ if (IS_FIELD_ENTITY(type2)) {
+ hfinfo2 = sttype_field_hfinfo(st_arg2);
+ ftype2 = sttype_field_ftenum(st_arg2);
+
+ if (!can_func(ftype2)) {
+ FAIL(dfw, st_arg2, "%s (type=%s) cannot participate in specified comparison.",
+ stnode_todisplay(st_arg2), ftype_pretty_name(ftype2));
+ }
+ if (type2 == STTYPE_FIELD) {
+ dfw->field_count++;
+ }
+ }
+ else if (type2 == STTYPE_STRING ||
+ type2 == STTYPE_LITERAL ||
+ type2 == STTYPE_CHARCONST ||
+ type2 == STTYPE_PCRE) {
+ FAIL(dfw, st_node, "Constant expression is invalid.");
+ }
+ else if (type2 == STTYPE_SLICE) {
+ ftype2 = check_slice(dfw, st_arg2, FT_NONE);
+
+ if (!can_func(ftype2)) {
+ FAIL(dfw, st_arg2, "%s (type=%s) cannot participate in specified comparison.",
+ stnode_todisplay(st_arg2), ftype_pretty_name(ftype2));
+ }
+ }
+ else if (type2 == STTYPE_FUNCTION) {
+ ftype2 = check_function(dfw, st_arg2, FT_NONE);
+
+ if (!can_func(ftype2)) {
+ FAIL(dfw, st_arg2, "return value of %s() (type=%s) cannot participate in specified comparison.",
+ sttype_function_name(st_arg2), ftype_pretty_name(ftype2));
+ }
+ }
+ else if (type2 == STTYPE_ARITHMETIC) {
+ ftype2 = check_arithmetic(dfw, st_arg2, FT_NONE);
+
+ if (!can_func(ftype2)) {
+ FAIL(dfw, st_arg2, "%s (type=%s) cannot participate in specified comparison.",
+ stnode_todisplay(st_arg2), ftype_pretty_name(ftype2));
+ }
+ }
+ else {
+ ws_assert_not_reached();
+ }
+
+ type1 = stnode_type_id(st_arg1);
+ if (type1 == STTYPE_STRING) {
+ fvalue = dfilter_fvalue_from_string(dfw, ftype2, st_arg1, hfinfo2);
+ }
+ else if (type1 == STTYPE_LITERAL) {
+ fvalue = dfilter_fvalue_from_literal(dfw, ftype2, st_arg1, allow_partial_value, hfinfo2);
+ }
+ else if (type1 == STTYPE_CHARCONST) {
+ fvalue = dfilter_fvalue_from_charconst(dfw, ftype2, st_arg1);
+ }
+ else {
+ ws_assert_not_reached();
+ }
+ stnode_replace(st_arg1, STTYPE_FVALUE, fvalue);
+}
+
+static void
+check_relation_LHS_SLICE(dfwork_t *dfw, stnode_op_t st_op _U_,
+ FtypeCanFunc can_func _U_,
+ bool allow_partial_value,
+ stnode_t *st_node _U_,
+ stnode_t *st_arg1, stnode_t *st_arg2)
+{
+ sttype_id_t type2;
+ ftenum_t ftype1, ftype2;
+ fvalue_t *fvalue;
+
+ LOG_NODE(st_node);
+
+ ftype1 = check_slice(dfw, st_arg1, FT_NONE);
+ if (!can_func(ftype1)) {
+ FAIL(dfw, st_arg1, "%s cannot participate in %s comparison.",
+ stnode_todisplay(st_arg1), stnode_todisplay(st_node));
+ }
+
+ type2 = stnode_type_id(st_arg2);
+
+ if (IS_FIELD_ENTITY(type2)) {
+ ftype2 = sttype_field_ftenum(st_arg2);
+
+ if (!is_bytes_type(ftype2)) {
+ if (!ftype_can_slice(ftype2)) {
+ FAIL(dfw, st_arg2, "\"%s\" is a %s and cannot be converted into a sequence of bytes.",
+ stnode_todisplay(st_arg2),
+ ftype_pretty_name(ftype2));
+ }
+
+ /* Convert entire field to bytes */
+ convert_to_bytes(st_arg2);
+ }
+ if (type2 == STTYPE_FIELD) {
+ dfw->field_count++;
+ }
+ }
+ else if (type2 == STTYPE_STRING) {
+ fvalue = dfilter_fvalue_from_string(dfw, ftype1, st_arg2, NULL);
+ stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
+ }
+ else if (type2 == STTYPE_LITERAL) {
+ fvalue = dfilter_fvalue_from_literal(dfw, ftype1, st_arg2, allow_partial_value, NULL);
+ stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
+ }
+ else if (type2 == STTYPE_CHARCONST) {
+ fvalue = dfilter_fvalue_from_charconst(dfw, ftype1, st_arg2);
+ stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
+ }
+ else if (type2 == STTYPE_SLICE) {
+ ftype2 = check_slice(dfw, st_arg2, ftype1);
+
+ if (!compatible_ftypes(ftype1, ftype2)) {
+ FAIL(dfw, st_arg2, "%s and %s are not of compatible types.",
+ stnode_todisplay(st_arg1), stnode_todisplay(st_arg2));
+ }
+ if (!can_func(ftype2)) {
+ FAIL(dfw, st_arg2, "%s (type=%s) cannot participate in specified comparison.",
+ stnode_todisplay(st_arg2), ftype_pretty_name(ftype2));
+ }
+ }
+ else if (type2 == STTYPE_FUNCTION) {
+ ftype2 = check_function(dfw, st_arg2, ftype1);
+
+ if (!is_bytes_type(ftype2)) {
+ if (!ftype_can_slice(ftype2)) {
+ FAIL(dfw, st_arg2, "Return value of function \"%s\" is a %s and cannot be converted into a sequence of bytes.",
+ sttype_function_name(st_arg2),
+ ftype_pretty_name(ftype2));
+ }
+
+ /* Convert function result to bytes */
+ convert_to_bytes(st_arg2);
+ }
+ }
+ else if (type2 == STTYPE_PCRE) {
+ ws_assert(st_op == STNODE_OP_MATCHES);
+ }
+ else if (type2 == STTYPE_ARITHMETIC) {
+ ftype2 = check_arithmetic(dfw, st_arg2, ftype1);
+
+ if (!compatible_ftypes(ftype1, ftype2)) {
+ FAIL(dfw, st_arg2, "%s and %s are not of compatible types.",
+ stnode_todisplay(st_arg1), stnode_todisplay(st_arg2));
+ }
+
+ if (!can_func(ftype2)) {
+ FAIL(dfw, st_arg2, "%s (type=%s) cannot participate in specified comparison.",
+ stnode_todisplay(st_arg2), ftype_pretty_name(ftype2));
+ }
+ }
+ else {
+ ws_assert_not_reached();
+ }
+}
+
+/* If the LHS of a relation test is a FUNCTION, run some checks
+ * and possibly some modifications of syntax tree nodes. */
+static void
+check_relation_LHS_FUNCTION(dfwork_t *dfw, stnode_op_t st_op _U_,
+ FtypeCanFunc can_func, bool allow_partial_value,
+ stnode_t *st_node, stnode_t *st_arg1, stnode_t *st_arg2)
+{
+ sttype_id_t type2;
+ ftenum_t ftype1, ftype2;
+ fvalue_t *fvalue;
+
+ LOG_NODE(st_node);
+
+ ftype1 = check_function(dfw, st_arg1, FT_NONE);
+ if (ftype1 == FT_NONE) {
+ FAIL(dfw, st_arg1, "Constant expression is invalid on the LHS.");
+ }
+ if (!can_func(ftype1)) {
+ FAIL(dfw, st_arg1, "Function %s (type=%s) cannot participate in %s comparison.",
+ sttype_function_name(st_arg1), ftype_pretty_name(ftype1),
+ stnode_todisplay(st_node));
+ }
+
+ type2 = stnode_type_id(st_arg2);
+
+ if (IS_FIELD_ENTITY(type2)) {
+ ftype2 = sttype_field_ftenum(st_arg2);
+
+ if (!compatible_ftypes(ftype1, ftype2)) {
+ FAIL(dfw, st_arg2, "Function %s and %s are not of compatible types.",
+ sttype_function_name(st_arg2), stnode_todisplay(st_arg2));
+ }
+ /* Do this check even though you'd think that if
+ * they're compatible, then can_func() would pass. */
+ if (!can_func(ftype2)) {
+ FAIL(dfw, st_arg2, "%s (type=%s) cannot participate in specified comparison.",
+ stnode_todisplay(st_arg2), ftype_pretty_name(ftype2));
+ }
+ if (type2 == STTYPE_FIELD) {
+ dfw->field_count++;
+ }
+ }
+ else if (type2 == STTYPE_STRING) {
+ fvalue = dfilter_fvalue_from_string(dfw, ftype1, st_arg2, NULL);
+ stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
+ }
+ else if (type2 == STTYPE_LITERAL) {
+ fvalue = dfilter_fvalue_from_literal(dfw, ftype1, st_arg2, allow_partial_value, NULL);
+ stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
+ }
+ else if (type2 == STTYPE_CHARCONST) {
+ fvalue = dfilter_fvalue_from_charconst(dfw, ftype1, st_arg2);
+ stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
+ }
+ else if (type2 == STTYPE_SLICE) {
+ ftype2 = check_slice(dfw, st_arg2, ftype1);
+
+ if (!compatible_ftypes(ftype1, ftype2)) {
+ FAIL(dfw, st_arg2, "%s and %s are not of compatible types.",
+ stnode_todisplay(st_arg1), stnode_todisplay(st_arg2));
+ }
+ if (!can_func(ftype2)) {
+ FAIL(dfw, st_arg2, "%s (type=%s) cannot participate in specified comparison.",
+ stnode_todisplay(st_arg2), ftype_pretty_name(ftype2));
+ }
+
+ if (!is_bytes_type(ftype1)) {
+ if (!ftype_can_slice(ftype1)) {
+ FAIL(dfw, st_arg1, "Function \"%s\" is a %s and cannot be converted into a sequence of bytes.",
+ sttype_function_name(st_arg1),
+ ftype_pretty_name(ftype1));
+ }
+
+ /* Convert function result to bytes */
+ convert_to_bytes(st_arg1);
+ }
+ }
+ else if (type2 == STTYPE_FUNCTION) {
+ ftype2 = check_function(dfw, st_arg2, ftype1);
+
+ if (!compatible_ftypes(ftype1, ftype2)) {
+ FAIL(dfw, st_arg2, "Return values of function %s (type=%s) and function %s (type=%s) are not of compatible types.",
+ sttype_function_name(st_arg1), ftype_pretty_name(ftype1), sttype_function_name(st_arg1), ftype_pretty_name(ftype2));
+ }
+
+ /* Do this check even though you'd think that if
+ * they're compatible, then can_func() would pass. */
+ if (!can_func(ftype2)) {
+ FAIL(dfw, st_arg2, "Return value of %s (type=%s) cannot participate in specified comparison.",
+ sttype_function_name(st_arg2), ftype_pretty_name(ftype2));
+ }
+ }
+ else if (type2 == STTYPE_PCRE) {
+ ws_assert(st_op == STNODE_OP_MATCHES);
+ }
+ else if (type2 == STTYPE_ARITHMETIC) {
+ ftype2 = check_arithmetic(dfw, st_arg2, ftype1);
+
+ if (!compatible_ftypes(ftype1, ftype2)) {
+ FAIL(dfw, st_arg2, "%s and %s are not of compatible types.",
+ stnode_todisplay(st_arg1), stnode_todisplay(st_arg2));
+ }
+
+ if (!can_func(ftype2)) {
+ FAIL(dfw, st_arg2, "%s (type=%s) cannot participate in specified comparison.",
+ stnode_todisplay(st_arg2), ftype_pretty_name(ftype2));
+ }
+ }
+ else {
+ ws_assert_not_reached();
+ }
+}
+
+static void
+check_relation_LHS_ARITHMETIC(dfwork_t *dfw, stnode_op_t st_op _U_,
+ FtypeCanFunc can_func, bool allow_partial_value,
+ stnode_t *st_node, stnode_t *st_arg1, stnode_t *st_arg2)
+{
+ sttype_id_t type2;
+ ftenum_t ftype1, ftype2;
+ fvalue_t *fvalue;
+
+ LOG_NODE(st_node);
+
+ ftype1 = check_arithmetic(dfw, st_arg1, FT_NONE);
+ if (ftype1 == FT_NONE) {
+ FAIL(dfw, st_arg1, "Constant expression is invalid on the LHS.");
+ }
+ if (!can_func(ftype1)) {
+ FAIL(dfw, st_arg1, "Result with type %s cannot participate in %s comparison.",
+ ftype_pretty_name(ftype1),
+ stnode_todisplay(st_node));
+ }
+
+ type2 = stnode_type_id(st_arg2);
+
+ if (IS_FIELD_ENTITY(type2)) {
+ ftype2 = sttype_field_ftenum(st_arg2);
+
+ if (!compatible_ftypes(ftype1, ftype2)) {
+ FAIL(dfw, st_arg2, "%s and %s are not of compatible types.",
+ stnode_todisplay(st_arg1), stnode_todisplay(st_arg2));
+ }
+ if (!can_func(ftype2)) {
+ FAIL(dfw, st_arg2, "%s (type=%s) cannot participate in specified comparison.",
+ stnode_todisplay(st_arg2), ftype_pretty_name(ftype2));
+ }
+ if (type2 == STTYPE_FIELD) {
+ dfw->field_count++;
+ }
+ }
+ else if (type2 == STTYPE_STRING) {
+ fvalue = dfilter_fvalue_from_string(dfw, ftype1, st_arg2, NULL);
+ stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
+ }
+ else if (type2 == STTYPE_LITERAL) {
+ fvalue = dfilter_fvalue_from_literal(dfw, ftype1, st_arg2, allow_partial_value, NULL);
+ stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
+ }
+ else if (type2 == STTYPE_CHARCONST) {
+ fvalue = dfilter_fvalue_from_charconst(dfw, ftype1, st_arg2);
+ stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
+ }
+ else if (type2 == STTYPE_SLICE) {
+ ftype2 = check_slice(dfw, st_arg2, ftype1);
+
+ if (!compatible_ftypes(ftype1, ftype2)) {
+ FAIL(dfw, st_arg2, "%s and %s are not of compatible types.",
+ stnode_todisplay(st_arg1), stnode_todisplay(st_arg2));
+ }
+ if (!can_func(ftype2)) {
+ FAIL(dfw, st_arg2, "%s (type=%s) cannot participate in specified comparison.",
+ stnode_todisplay(st_arg2), ftype_pretty_name(ftype2));
+ }
+
+ if (!is_bytes_type(ftype1)) {
+ if (!ftype_can_slice(ftype1)) {
+ FAIL(dfw, st_arg1, "Result is a %s and cannot be converted into a sequence of bytes.",
+ ftype_pretty_name(ftype1));
+ }
+
+ /* Convert expression result to bytes */
+ convert_to_bytes(st_arg1);
+ }
+ }
+ else if (type2 == STTYPE_FUNCTION) {
+ ftype2 = check_function(dfw, st_arg2, ftype1);
+
+ if (!compatible_ftypes(ftype1, ftype2)) {
+ FAIL(dfw, st_arg2, "Result (type=%s) and return value of %s() (type=%s) are not of compatible types.",
+ ftype_pretty_name(ftype1),
+ sttype_function_name(st_arg2), ftype_pretty_name(ftype2));
+ }
+ if (!can_func(ftype2)) {
+ FAIL(dfw, st_arg2, "return value of %s() (type=%s) cannot participate in specified comparison.",
+ sttype_function_name(st_arg2), ftype_pretty_name(ftype2));
+ }
+ }
+ else if (type2 == STTYPE_PCRE) {
+ ws_assert(st_op == STNODE_OP_MATCHES);
+ }
+ else if (type2 == STTYPE_ARITHMETIC) {
+ ftype2 = check_arithmetic(dfw, st_arg2, ftype1);
+
+ if (!compatible_ftypes(ftype1, ftype2)) {
+ FAIL(dfw, st_arg2, "%s and %s are not of compatible types.",
+ stnode_todisplay(st_arg1), stnode_todisplay(st_arg2));
+ }
+ if (!can_func(ftype2)) {
+ FAIL(dfw, st_arg2, "%s (type=%s) cannot participate in specified comparison.",
+ stnode_todisplay(st_arg2), ftype_pretty_name(ftype2));
+ }
+ }
+ else {
+ ws_assert_not_reached();
+ }
+}
+
+/* Check the semantics of any relational test. */
+static void
+check_relation(dfwork_t *dfw, stnode_op_t st_op,
+ FtypeCanFunc can_func, bool allow_partial_value,
+ stnode_t *st_node, stnode_t *st_arg1, stnode_t *st_arg2)
+{
+ LOG_NODE(st_node);
+
+ switch (stnode_type_id(st_arg1)) {
+ case STTYPE_FIELD:
+ case STTYPE_REFERENCE:
+ check_relation_LHS_FIELD(dfw, st_op, can_func,
+ allow_partial_value, st_node, st_arg1, st_arg2);
+ break;
+ case STTYPE_SLICE:
+ check_relation_LHS_SLICE(dfw, st_op, can_func,
+ allow_partial_value, st_node, st_arg1, st_arg2);
+ break;
+ case STTYPE_FUNCTION:
+ check_relation_LHS_FUNCTION(dfw, st_op, can_func,
+ allow_partial_value, st_node, st_arg1, st_arg2);
+ break;
+ case STTYPE_ARITHMETIC:
+ check_relation_LHS_ARITHMETIC(dfw, st_op, can_func,
+ allow_partial_value, st_node, st_arg1, st_arg2);
+ break;
+ case STTYPE_LITERAL:
+ case STTYPE_STRING:
+ case STTYPE_CHARCONST:
+ check_relation_LHS_FVALUE(dfw, st_op, can_func,
+ allow_partial_value, st_node, st_arg1, st_arg2);
+ break;
+ default:
+ /* Should not happen. */
+ FAIL(dfw, st_arg1, "(FIXME) Syntax node type \"%s\" is invalid for relation \"%s\".",
+ stnode_type_name(st_arg1), stnode_todisplay(st_node));
+ }
+}
+
+static void
+check_warning_contains_RHS_FIELD(dfwork_t *dfw, stnode_t *st_node _U_,
+ stnode_t *st_arg1 _U_, stnode_t *st_arg2)
+{
+ const char *token = stnode_token(st_arg2);
+ header_field_info *hfinfo = sttype_field_hfinfo(st_arg2);
+ fvalue_t *fvalue = fvalue_from_literal(FT_BYTES, token, true, NULL);
+ if (fvalue != NULL) {
+ char *repr = fvalue_to_string_repr(dfw->dfw_scope, fvalue, FTREPR_DFILTER, 0);
+ add_compile_warning(dfw, "Interpreting \"%s\" as %s instead of %s. "
+ "Consider writing \"%s\" or \".%s\" to remove this warning",
+ token, hfinfo->name, ftype_pretty_name(FT_BYTES),
+ repr, hfinfo->abbrev);
+ fvalue_free(fvalue);
+ }
+}
+
+static void
+check_relation_contains(dfwork_t *dfw, stnode_t *st_node,
+ stnode_t *st_arg1, stnode_t *st_arg2)
+{
+ LOG_NODE(st_node);
+
+ if (stnode_type_id(st_arg2) == STTYPE_FIELD && stnode_get_flags(st_arg2, STFLAG_UNPARSED)) {
+ check_warning_contains_RHS_FIELD(dfw, st_node, st_arg1, st_arg2);
+ }
+
+ switch (stnode_type_id(st_arg1)) {
+ case STTYPE_FIELD:
+ case STTYPE_REFERENCE:
+ check_relation_LHS_FIELD(dfw, STNODE_OP_CONTAINS, ftype_can_contains,
+ true, st_node, st_arg1, st_arg2);
+ break;
+ case STTYPE_FUNCTION:
+ check_relation_LHS_FUNCTION(dfw, STNODE_OP_CONTAINS, ftype_can_contains,
+ true, st_node, st_arg1, st_arg2);
+ break;
+ case STTYPE_SLICE:
+ check_relation_LHS_SLICE(dfw, STNODE_OP_CONTAINS, ftype_can_contains,
+ true, st_node, st_arg1, st_arg2);
+ break;
+ default:
+ FAIL(dfw, st_arg1, "Left side of %s expression must be a field or function, not %s.",
+ stnode_todisplay(st_node), stnode_todisplay(st_arg1));
+ }
+}
+
+
+static void
+check_relation_matches(dfwork_t *dfw, stnode_t *st_node,
+ stnode_t *st_arg1, stnode_t *st_arg2)
+{
+ ws_regex_t *pcre;
+ char *errmsg = NULL;
+ GString *patt;
+
+ LOG_NODE(st_node);
+
+ if (stnode_type_id(st_arg2) != STTYPE_STRING) {
+ FAIL(dfw, st_arg2, "Matches requires a double quoted string on the right side.");
+ }
+
+ patt = stnode_string(st_arg2);
+ ws_debug("Compile regex pattern: %s", stnode_token(st_arg2));
+
+ pcre = ws_regex_compile_ex(patt->str, patt->len, &errmsg, WS_REGEX_CASELESS|WS_REGEX_NEVER_UTF);
+ if (errmsg) {
+ dfilter_fail(dfw, DF_ERROR_GENERIC, stnode_location(st_arg2), "Regex compilation error: %s.", errmsg);
+ g_free(errmsg);
+ ws_noisy("Semantic check failed here with a regex syntax error");
+ THROW(TypeError);
+ }
+
+ stnode_replace(st_arg2, STTYPE_PCRE, pcre);
+
+ switch (stnode_type_id(st_arg1)) {
+ case STTYPE_FIELD:
+ case STTYPE_REFERENCE:
+ check_relation_LHS_FIELD(dfw, STNODE_OP_MATCHES, ftype_can_matches,
+ true, st_node, st_arg1, st_arg2);
+ break;
+ case STTYPE_FUNCTION:
+ check_relation_LHS_FUNCTION(dfw, STNODE_OP_MATCHES, ftype_can_matches,
+ true, st_node, st_arg1, st_arg2);
+ break;
+ case STTYPE_SLICE:
+ check_relation_LHS_SLICE(dfw, STNODE_OP_MATCHES, ftype_can_matches,
+ true, st_node, st_arg1, st_arg2);
+ break;
+ default:
+ FAIL(dfw, st_arg1, "Left side of %s expression must be a field or function, not %s.",
+ stnode_todisplay(st_node), stnode_todisplay(st_arg1));
+ }
+}
+
+static void
+check_relation_in(dfwork_t *dfw, stnode_t *st_node _U_,
+ stnode_t *st_arg1, stnode_t *st_arg2)
+{
+ GSList *nodelist;
+ stnode_t *node_left, *node_right;
+
+ LOG_NODE(st_node);
+
+ if (stnode_type_id(st_arg1) != STTYPE_FIELD) {
+ FAIL(dfw, st_arg1, "Only a field may be tested for membership in a set.");
+ }
+ /* Checked in the grammar parser. */
+ ws_assert(stnode_type_id(st_arg2) == STTYPE_SET);
+
+ /* Attempt to interpret one element of the set at a time. Each
+ * element is represented by two items in the list, the element
+ * value and NULL. Both will be replaced by a lower and upper
+ * value if the element is a range. */
+ nodelist = stnode_data(st_arg2);
+ while (nodelist) {
+ node_left = nodelist->data;
+
+ /* Don't let a range on the RHS affect the LHS field. */
+ if (stnode_type_id(node_left) == STTYPE_SLICE) {
+ FAIL(dfw, node_left, "A slice may not appear inside a set.");
+ break;
+ }
+
+ nodelist = g_slist_next(nodelist);
+ ws_assert(nodelist);
+ node_right = nodelist->data;
+ if (node_right) {
+ check_relation_LHS_FIELD(dfw, STNODE_OP_GE, ftype_can_cmp,
+ false, st_node, st_arg1, node_left);
+ check_relation_LHS_FIELD(dfw, STNODE_OP_LE, ftype_can_cmp,
+ false, st_node, st_arg1, node_right);
+ } else {
+ check_relation_LHS_FIELD(dfw, STNODE_OP_ANY_EQ, ftype_can_eq,
+ false, st_node, st_arg1, node_left);
+ }
+ nodelist = g_slist_next(nodelist);
+ }
+}
+
+/* Check the semantics of any type of TEST */
+static void
+check_test(dfwork_t *dfw, stnode_t *st_node)
+{
+ stnode_op_t st_op;
+ stnode_t *st_arg1, *st_arg2;
+
+ LOG_NODE(st_node);
+
+ sttype_oper_get(st_node, &st_op, &st_arg1, &st_arg2);
+
+ switch (st_op) {
+ case STNODE_OP_NOT:
+ semcheck(dfw, st_arg1);
+ break;
+ case STNODE_OP_AND:
+ case STNODE_OP_OR:
+ semcheck(dfw, st_arg1);
+ semcheck(dfw, st_arg2);
+ break;
+ case STNODE_OP_ALL_EQ:
+ case STNODE_OP_ANY_EQ:
+ case STNODE_OP_ALL_NE:
+ case STNODE_OP_ANY_NE:
+ check_relation(dfw, st_op, ftype_can_eq, false, st_node, st_arg1, st_arg2);
+ break;
+ case STNODE_OP_GT:
+ case STNODE_OP_GE:
+ case STNODE_OP_LT:
+ case STNODE_OP_LE:
+ check_relation(dfw, st_op, ftype_can_cmp, false, st_node, st_arg1, st_arg2);
+ break;
+ case STNODE_OP_CONTAINS:
+ check_relation_contains(dfw, st_node, st_arg1, st_arg2);
+ break;
+ case STNODE_OP_MATCHES:
+ check_relation_matches(dfw, st_node, st_arg1, st_arg2);
+ break;
+ case STNODE_OP_IN:
+ case STNODE_OP_NOT_IN:
+ check_relation_in(dfw, st_node, st_arg1, st_arg2);
+ break;
+
+ case STNODE_OP_UNINITIALIZED:
+ case STNODE_OP_UNARY_MINUS:
+ case STNODE_OP_BITWISE_AND:
+ case STNODE_OP_ADD:
+ case STNODE_OP_SUBTRACT:
+ case STNODE_OP_MULTIPLY:
+ case STNODE_OP_DIVIDE:
+ case STNODE_OP_MODULO:
+ ws_assert_not_reached();
+ }
+}
+
+static void
+check_nonzero(dfwork_t *dfw, stnode_t *st_node)
+{
+ ftenum_t ftype = FT_NONE;
+
+ LOG_NODE(st_node);
+
+ switch (stnode_type_id(st_node)) {
+ case STTYPE_ARITHMETIC:
+ ftype = check_arithmetic(dfw, st_node, FT_NONE);
+ break;
+ case STTYPE_SLICE:
+ ftype = check_slice(dfw, st_node, FT_NONE);
+ break;
+ default:
+ ws_assert_not_reached();
+ break;
+ }
+
+ if (ftype == FT_NONE) {
+ FAIL(dfw, st_node, "Constant expression is invalid.");
+ }
+}
+
+static const char *
+op_to_error_msg(stnode_op_t st_op)
+{
+ switch (st_op) {
+ case STNODE_OP_UNARY_MINUS:
+ return "cannot be negated";
+ case STNODE_OP_ADD:
+ return "cannot be added";
+ case STNODE_OP_SUBTRACT:
+ return "cannot be subtracted";
+ case STNODE_OP_MULTIPLY:
+ return "cannot be multiplied";
+ case STNODE_OP_DIVIDE:
+ return "cannot be divided";
+ case STNODE_OP_MODULO:
+ return "does not support modulo operation";
+ case STNODE_OP_BITWISE_AND:
+ return "does not support bitwise AND";
+ default:
+ return "cannot FIXME";
+ }
+}
+
+static ftenum_t
+check_arithmetic_LHS(dfwork_t *dfw, stnode_op_t st_op,
+ stnode_t *st_node, stnode_t *st_arg1, stnode_t *st_arg2,
+ ftenum_t lhs_ftype)
+{
+ ftenum_t ftype1, ftype2;
+ FtypeCanFunc can_func = NULL;
+
+ LOG_NODE(st_node);
+
+ if (st_op == STNODE_OP_UNARY_MINUS) {
+ ftype1 = check_arithmetic(dfw, st_arg1, lhs_ftype);
+ if (ftype1 == FT_NONE)
+ return FT_NONE;
+ if (!ftype_can_unary_minus(ftype1)) {
+ FAIL(dfw, st_arg1, "%s %s.",
+ ftype_name(ftype1), op_to_error_msg(st_op));
+ }
+ if (stnode_type_id(st_arg1) == STTYPE_FVALUE) {
+ /* Pre-compute constant unary minus result */
+ char *err_msg;
+ fvalue_t *new_fv = fvalue_unary_minus(stnode_data(st_arg1), &err_msg);
+ if (new_fv == NULL) {
+ dfilter_fail(dfw, DF_ERROR_GENERIC, stnode_location(st_arg1),
+ "%s: %s", stnode_todisplay(st_arg1), err_msg);
+ g_free(err_msg);
+ FAIL_HERE(dfw);
+ }
+ /* Replaces unary operator with result */
+ stnode_replace(st_node, STTYPE_FVALUE, new_fv);
+ }
+ return ftype1;
+ }
+
+ switch (st_op) {
+ case STNODE_OP_ADD:
+ can_func = ftype_can_add;
+ break;
+ case STNODE_OP_SUBTRACT:
+ can_func = ftype_can_subtract;
+ break;
+ case STNODE_OP_MULTIPLY:
+ can_func = ftype_can_multiply;
+ break;
+ case STNODE_OP_DIVIDE:
+ can_func = ftype_can_divide;
+ break;
+ case STNODE_OP_MODULO:
+ can_func = ftype_can_modulo;
+ break;
+ case STNODE_OP_BITWISE_AND:
+ can_func = ftype_can_bitwise_and;
+ break;
+ default:
+ ws_assert_not_reached();
+ }
+
+ ftype1 = check_arithmetic(dfw, st_arg1, lhs_ftype);
+ if (ftype1 == FT_NONE) {
+ FAIL(dfw, st_arg1, "Unknown type for left side of %s", stnode_todisplay(st_node));
+ }
+ if (!can_func(ftype1)) {
+ FAIL(dfw, st_arg1, "%s %s.",
+ ftype_name(ftype1), op_to_error_msg(st_op));
+ }
+
+ ftype2 = check_arithmetic(dfw, st_arg2, ftype1);
+ if (!can_func(ftype2)) {
+ FAIL(dfw, st_arg2, "%s %s.",
+ ftype_name(ftype2), op_to_error_msg(st_op));
+ }
+
+ if (!compatible_ftypes(ftype1, ftype2)) {
+ FAIL(dfw, st_node, "%s and %s are not compatible.",
+ ftype_name(ftype1), ftype_name(ftype2));
+ }
+
+ return ftype1;
+}
+
+ftenum_t
+check_arithmetic(dfwork_t *dfw, stnode_t *st_node, ftenum_t lhs_ftype)
+{
+ sttype_id_t type;
+ stnode_op_t st_op;
+ stnode_t *st_arg1, *st_arg2;
+ ftenum_t ftype;
+
+ LOG_NODE(st_node);
+
+ type = stnode_type_id(st_node);
+
+ switch (type) {
+ case STTYPE_LITERAL:
+ if (lhs_ftype != FT_NONE) {
+ fvalue_t *fvalue = dfilter_fvalue_from_literal(dfw, lhs_ftype, st_node, false, NULL);
+ stnode_replace(st_node, STTYPE_FVALUE, fvalue);
+ ftype = fvalue_type_ftenum(fvalue);
+ }
+ else {
+ ftype = FT_NONE;
+ }
+ break;
+
+ case STTYPE_FIELD:
+ dfw->field_count++;
+ /* fall-through */
+ case STTYPE_REFERENCE:
+ ftype = sttype_field_ftenum(st_node);
+ break;
+
+ case STTYPE_FUNCTION:
+ ftype = check_function(dfw, st_node, lhs_ftype);
+ break;
+
+ case STTYPE_SLICE:
+ ftype = check_slice(dfw, st_node, lhs_ftype);
+ break;
+
+ case STTYPE_FVALUE:
+ ftype = fvalue_type_ftenum(stnode_data(st_node));
+ break;
+
+ case STTYPE_ARITHMETIC:
+ sttype_oper_get(st_node, &st_op, &st_arg1, &st_arg2);
+ ftype = check_arithmetic_LHS(dfw, st_op, st_node, st_arg1, st_arg2, lhs_ftype);
+ break;
+
+ default:
+ FAIL(dfw, st_node, "%s is not a valid arithmetic operation.",
+ stnode_todisplay(st_node));
+ }
+
+ return ftype;
+}
+
+
+/* Check the entire syntax tree. */
+static void
+semcheck(dfwork_t *dfw, stnode_t *st_node)
+{
+ LOG_NODE(st_node);
+
+ dfw->field_count = 0;
+
+ switch (stnode_type_id(st_node)) {
+ case STTYPE_TEST:
+ check_test(dfw, st_node);
+ break;
+ case STTYPE_ARITHMETIC:
+ case STTYPE_SLICE:
+ check_nonzero(dfw, st_node);
+ break;
+ default:
+ check_exists(dfw, st_node);
+ }
+
+ if (dfw->field_count == 0) {
+ FAIL(dfw, st_node, "Constant expression is invalid.");
+ }
+}
+
+
+/* Check the syntax tree for semantic errors, and convert
+ * some of the nodes into the form they need to be in order to
+ * later generate the DFVM bytecode. */
+bool
+dfw_semcheck(dfwork_t *dfw)
+{
+ volatile bool ok_filter = true;
+
+ ws_debug("Starting semantic check (dfw = %p)", dfw);
+
+ /* Instead of having to check for errors at every stage of
+ * the semantic-checking, the semantic-checking code will
+ * throw an exception if a problem is found. */
+ TRY {
+ semcheck(dfw, dfw->st_root);
+ }
+ CATCH(TypeError) {
+ ok_filter = false;
+ }
+ ENDTRY;
+
+ ws_debug("Semantic check (dfw = %p) returns %s",
+ dfw, ok_filter ? "TRUE" : "FALSE");
+
+ return ok_filter;
+}
+
+/*
+ * Editor modelines - https://www.wireshark.org/tools/modelines.html
+ *
+ * Local variables:
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ *
+ * vi: set shiftwidth=8 tabstop=8 noexpandtab:
+ * :indentSize=8:tabSize=8:noTabs=false:
+ */
diff --git a/epan/dfilter/semcheck.h b/epan/dfilter/semcheck.h
new file mode 100644
index 0000000..261bdc6
--- /dev/null
+++ b/epan/dfilter/semcheck.h
@@ -0,0 +1,31 @@
+/** @file
+ *
+ * Wireshark - Network traffic analyzer
+ * By Gerald Combs <gerald@wireshark.org>
+ * Copyright 2001 Gerald Combs
+ *
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef SEMCHECK_H
+#define SEMCHECK_H
+
+#include "dfilter-int.h"
+
+bool
+dfw_semcheck(dfwork_t *dfw);
+
+ftenum_t
+check_arithmetic(dfwork_t *dfw, stnode_t *st_node, ftenum_t lhs_ftype);
+
+ftenum_t
+check_function(dfwork_t *dfw, stnode_t *st_node, ftenum_t lhs_ftype);
+
+ftenum_t
+check_slice(dfwork_t *dfw, stnode_t *st, ftenum_t lhs_ftype);
+
+bool
+compatible_ftypes(ftenum_t a, ftenum_t b);
+
+#endif
diff --git a/epan/dfilter/sttype-field.c b/epan/dfilter/sttype-field.c
new file mode 100644
index 0000000..8df1c84
--- /dev/null
+++ b/epan/dfilter/sttype-field.c
@@ -0,0 +1,235 @@
+/*
+ * Wireshark - Network traffic analyzer
+ * By Gerald Combs <gerald@wireshark.org>
+ * Copyright 2001 Gerald Combs
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+/* The ideas in this code came from Ed Warnicke's original implementation
+ * of dranges for the old display filter code (Ethereal 0.8.15 and before).
+ * The code is different, but definitely inspired by his code.
+ */
+
+#include "config.h"
+
+#include <glib.h>
+
+#include <epan/proto.h>
+#include "sttype-field.h"
+#include <wsutil/ws_assert.h>
+
+typedef struct {
+ uint32_t magic;
+ header_field_info *hfinfo;
+ drange_t *drange;
+ bool raw;
+} field_t;
+
+#define FIELD_MAGIC 0xfc2002cf
+
+static void *
+field_new(void *hfinfo)
+{
+ field_t *field;
+
+ field = g_new(field_t, 1);
+ field->magic = FIELD_MAGIC;
+ field->hfinfo = hfinfo;
+ field->drange = NULL;
+ field->raw = false;
+
+ return field;
+}
+
+static void *
+field_dup(gconstpointer data)
+{
+ const field_t *org = data;
+ field_t *field;
+
+ ws_assert_magic(org, FIELD_MAGIC);
+ field = field_new(NULL);
+ field->hfinfo = org->hfinfo;
+ field->drange = drange_dup(org->drange);
+ field->raw = org->raw;
+
+ return field;
+}
+
+static void
+field_free(void *data)
+{
+ field_t *field = data;
+ ws_assert_magic(field, FIELD_MAGIC);
+
+ if (field->drange)
+ drange_free(field->drange);
+ g_free(field);
+}
+
+static char *
+field_tostr(const void *data, bool pretty _U_)
+{
+ const field_t *field = data;
+ ws_assert_magic(field, FIELD_MAGIC);
+ wmem_strbuf_t *repr;
+ char *drange_str = NULL;
+
+
+ repr = wmem_strbuf_new(NULL, NULL);
+
+ if (field->raw) {
+ wmem_strbuf_append_c(repr, '@');
+ }
+
+ wmem_strbuf_append(repr, field->hfinfo->abbrev);
+
+ if (field->drange) {
+ drange_str = drange_tostr(field->drange);
+ wmem_strbuf_append_printf(repr, "#[%s]", drange_str);
+ g_free(drange_str);
+ }
+
+ if (field->raw) {
+ wmem_strbuf_append(repr, " <FT_BYTES>");
+ }
+ else {
+ wmem_strbuf_append_printf(repr, " <%s>",
+ ftype_name(field->hfinfo->type));
+ }
+
+ return wmem_strbuf_finalize(repr);
+}
+
+header_field_info *
+sttype_field_hfinfo(stnode_t *node)
+{
+ field_t *field = node->data;
+ ws_assert_magic(field, FIELD_MAGIC);
+ return field->hfinfo;
+}
+
+ftenum_t
+sttype_field_ftenum(stnode_t *node)
+{
+ field_t *field = node->data;
+ ws_assert_magic(field, FIELD_MAGIC);
+ if (field->raw)
+ return FT_BYTES;
+ return field->hfinfo->type;
+}
+
+drange_t *
+sttype_field_drange(stnode_t *node)
+{
+ field_t *field = node->data;
+ ws_assert_magic(field, FIELD_MAGIC);
+ return field->drange;
+}
+
+bool
+sttype_field_raw(stnode_t *node)
+{
+ field_t *field = node->data;
+ ws_assert_magic(field, FIELD_MAGIC);
+ return field->raw;
+}
+
+drange_t *
+sttype_field_drange_steal(stnode_t *node)
+{
+ field_t *field;
+ drange_t *dr;
+
+ field = stnode_data(node);
+ ws_assert_magic(field, FIELD_MAGIC);
+ dr = field->drange;
+ field->drange = NULL;
+ return dr;
+}
+
+/* Set a field */
+void
+sttype_field_set_range(stnode_t *node, GSList* drange_list)
+{
+ field_t *field = stnode_data(node);
+ ws_assert_magic(field, FIELD_MAGIC);
+ ws_assert(field->drange == NULL);
+ field->drange = drange_new_from_list(drange_list);
+}
+
+void
+sttype_field_set_range1(stnode_t *node, drange_node *rn)
+{
+ field_t *field = stnode_data(node);
+ ws_assert_magic(field, FIELD_MAGIC);
+ ws_assert(field->drange == NULL);
+ field->drange = drange_new(rn);
+}
+
+void
+sttype_field_set_drange(stnode_t *node, drange_t *dr)
+{
+ field_t *field = stnode_data(node);
+ ws_assert_magic(field, FIELD_MAGIC);
+ ws_assert(field->drange == NULL);
+ field->drange = dr;
+}
+
+void
+sttype_field_set_raw(stnode_t *node, bool raw)
+{
+ field_t *field = stnode_data(node);
+ ws_assert_magic(field, FIELD_MAGIC);
+ field->raw = raw;
+}
+
+char *
+sttype_field_set_number(stnode_t *node, const char *number_str)
+{
+ char *err_msg = NULL;
+ drange_node *rn = drange_node_from_str(number_str, &err_msg);
+ if (err_msg != NULL)
+ return err_msg;
+
+ sttype_field_set_range1(node, rn);
+ return NULL;
+}
+
+void
+sttype_register_field(void)
+{
+ static sttype_t field_type = {
+ STTYPE_FIELD,
+ "FIELD",
+ field_new,
+ field_free,
+ field_dup,
+ field_tostr
+ };
+ static sttype_t reference_type = {
+ STTYPE_REFERENCE,
+ "REFERENCE",
+ field_new,
+ field_free,
+ field_dup,
+ field_tostr
+ };
+
+ sttype_register(&field_type);
+ sttype_register(&reference_type);
+}
+
+/*
+ * Editor modelines - https://www.wireshark.org/tools/modelines.html
+ *
+ * Local variables:
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ *
+ * vi: set shiftwidth=8 tabstop=8 noexpandtab:
+ * :indentSize=8:tabSize=8:noTabs=false:
+ */
diff --git a/epan/dfilter/sttype-field.h b/epan/dfilter/sttype-field.h
new file mode 100644
index 0000000..b890b84
--- /dev/null
+++ b/epan/dfilter/sttype-field.h
@@ -0,0 +1,54 @@
+/** @file
+ *
+ * Wireshark - Network traffic analyzer
+ * By Gerald Combs <gerald@wireshark.org>
+ * Copyright 2001 Gerald Combs
+ *
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef STTYPE_FIELD_H
+#define STTYPE_FIELD_H
+
+#include "dfilter-int.h"
+#include "drange.h"
+
+
+header_field_info *
+sttype_field_hfinfo(stnode_t *node);
+
+ftenum_t
+sttype_field_ftenum(stnode_t *node);
+
+drange_t *
+sttype_field_drange(stnode_t *node);
+
+drange_t *
+sttype_field_drange_steal(stnode_t *node);
+
+bool
+sttype_field_raw(stnode_t *node);
+
+/* Set a range */
+void
+sttype_field_set_range(stnode_t *node, GSList* drange_list);
+
+void
+sttype_field_set_range1(stnode_t *node, drange_node *rn);
+
+void
+sttype_field_set_drange(stnode_t *node, drange_t *dr);
+
+void
+sttype_field_set_raw(stnode_t *node, bool raw);
+
+char *
+sttype_field_set_number(stnode_t *node, const char *number_str);
+
+/* Clear the 'drange' variable to remove responsibility for
+ * freeing it. */
+void
+sttype_field_remove_drange(stnode_t *node);
+
+#endif
diff --git a/epan/dfilter/sttype-function.c b/epan/dfilter/sttype-function.c
new file mode 100644
index 0000000..4bdd854
--- /dev/null
+++ b/epan/dfilter/sttype-function.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2006 by Gilbert Ramirez <gram@alumni.rice.edu>
+ *
+ * Wireshark - Network traffic analyzer
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "config.h"
+
+#include "syntax-tree.h"
+#include "sttype-function.h"
+#include <wsutil/ws_assert.h>
+
+typedef struct {
+ uint32_t magic;
+ df_func_def_t *funcdef;
+ GSList *params;
+} function_t;
+
+#define FUNCTION_MAGIC 0xe10f0f99
+
+static void *
+function_new(void *funcdef)
+{
+ function_t *stfuncrec;
+
+ stfuncrec = g_new(function_t, 1);
+
+ stfuncrec->magic = FUNCTION_MAGIC;
+ stfuncrec->funcdef = funcdef;
+ stfuncrec->params = NULL;
+
+ return stfuncrec;
+}
+
+static void *
+function_dup(gconstpointer data)
+{
+ const function_t *org = data;
+ function_t *stfuncrec;
+ GSList *p;
+
+ stfuncrec = function_new(org->funcdef);
+
+ for (p = org->params; p; p = p->next) {
+ const stnode_t *param = p->data;
+ stfuncrec->params = g_slist_append(stfuncrec->params, stnode_dup(param));
+ }
+ return stfuncrec;
+}
+
+static char *
+function_tostr(const void *data, bool pretty)
+{
+ const function_t *stfuncrec = data;
+ const df_func_def_t *def = stfuncrec->funcdef;
+ GSList *params = stfuncrec->params;
+ GString *repr = g_string_new("");
+
+ ws_assert(def);
+
+ if (pretty) {
+ g_string_printf(repr, "%s(", def->name);
+ while (params != NULL) {
+ ws_assert(params->data);
+ g_string_append(repr, stnode_tostr(params->data, pretty));
+ params = params->next;
+ if (params != NULL) {
+ g_string_append(repr, ", ");
+ }
+ }
+ g_string_append_c(repr, ')');
+ }
+ else {
+ g_string_printf(repr, "%s#%u", def->name, g_slist_length(params));
+ }
+
+ return g_string_free(repr, false);
+}
+
+static void
+slist_stnode_free(void *data)
+{
+ stnode_free(data);
+}
+
+void
+st_funcparams_free(GSList *params)
+{
+ g_slist_free_full(params, slist_stnode_free);
+}
+
+static void
+function_free(void *value)
+{
+ function_t *stfuncrec = value;
+ ws_assert_magic(stfuncrec, FUNCTION_MAGIC);
+ st_funcparams_free(stfuncrec->params);
+ g_free(stfuncrec);
+}
+
+
+/* Set the parameters for a function stnode_t. */
+void
+sttype_function_set_params(stnode_t *node, GSList *params)
+{
+
+ function_t *stfuncrec;
+
+ stfuncrec = stnode_data(node);
+ ws_assert_magic(stfuncrec, FUNCTION_MAGIC);
+
+ stfuncrec->params = params;
+}
+
+/* Get the function-definition record for a function stnode_t. */
+df_func_def_t*
+sttype_function_funcdef(stnode_t *node)
+{
+ function_t *stfuncrec;
+
+ stfuncrec = stnode_data(node);
+ ws_assert_magic(stfuncrec, FUNCTION_MAGIC);
+ return stfuncrec->funcdef;
+}
+
+const char *
+sttype_function_name(stnode_t *node)
+{
+ function_t *stfuncrec;
+
+ stfuncrec = stnode_data(node);
+ ws_assert_magic(stfuncrec, FUNCTION_MAGIC);
+ return stfuncrec->funcdef->name;
+}
+
+/* Get the parameters for a function stnode_t. */
+GSList*
+sttype_function_params(stnode_t *node)
+{
+ function_t *stfuncrec;
+
+ stfuncrec = stnode_data(node);
+ ws_assert_magic(stfuncrec, FUNCTION_MAGIC);
+ return stfuncrec->params;
+}
+
+
+void
+sttype_register_function(void)
+{
+ static sttype_t function_type = {
+ STTYPE_FUNCTION,
+ "FUNCTION",
+ function_new,
+ function_free,
+ function_dup,
+ function_tostr
+ };
+
+ sttype_register(&function_type);
+}
+
+/*
+ * Editor modelines - https://www.wireshark.org/tools/modelines.html
+ *
+ * Local variables:
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ *
+ * vi: set shiftwidth=8 tabstop=8 noexpandtab:
+ * :indentSize=8:tabSize=8:noTabs=false:
+ */
diff --git a/epan/dfilter/sttype-function.h b/epan/dfilter/sttype-function.h
new file mode 100644
index 0000000..6f1cb3e
--- /dev/null
+++ b/epan/dfilter/sttype-function.h
@@ -0,0 +1,32 @@
+/** @file
+ *
+ * Wireshark - Network traffic analyzer
+ * By Gerald Combs <gerald@wireshark.org>
+ * Copyright 2001 Gerald Combs
+ *
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef STTYPE_FUNCTION_H
+#define STTYPE_FUNCTION_H
+
+#include "dfilter-int.h"
+#include "dfunctions.h"
+
+/* Set the parameters for a function stnode_t. */
+void
+sttype_function_set_params(stnode_t *node, GSList *params);
+
+/* Get the function-definition record for a function stnode_t. */
+df_func_def_t* sttype_function_funcdef(stnode_t *node);
+
+const char *sttype_function_name(stnode_t *node);
+
+/* Get the parameters for a function stnode_t. */
+GSList* sttype_function_params(stnode_t *node);
+
+/* Free the memory of a param list */
+void st_funcparams_free(GSList *params);
+
+#endif
diff --git a/epan/dfilter/sttype-op.c b/epan/dfilter/sttype-op.c
new file mode 100644
index 0000000..2f83f45
--- /dev/null
+++ b/epan/dfilter/sttype-op.c
@@ -0,0 +1,409 @@
+/*
+ * Wireshark - Network traffic analyzer
+ * By Gerald Combs <gerald@wireshark.org>
+ * Copyright 2001 Gerald Combs
+ *
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "syntax-tree.h"
+#include "sttype-op.h"
+
+typedef struct {
+ uint32_t magic;
+ stnode_op_t op;
+ stmatch_t how;
+ stnode_t *val1;
+ stnode_t *val2;
+} oper_t;
+
+#define OPER_MAGIC 0xab9009ba
+
+static void *
+oper_new(void *junk _U_)
+{
+ oper_t *oper;
+
+ ws_assert(junk == NULL);
+
+ oper = g_new(oper_t, 1);
+
+ oper->magic = OPER_MAGIC;
+ oper->op = STNODE_OP_UNINITIALIZED;
+ oper->how = STNODE_MATCH_DEF;
+ oper->val1 = NULL;
+ oper->val2 = NULL;
+
+ return oper;
+}
+
+static void *
+oper_dup(gconstpointer data)
+{
+ const oper_t *org = data;
+ oper_t *oper;
+
+ oper = oper_new(NULL);
+ oper->op = org->op;
+ oper->how = org->how;
+ oper->val1 = stnode_dup(org->val1);
+ oper->val2 = stnode_dup(org->val2);
+
+ return oper;
+}
+
+static void
+oper_free(void *value)
+{
+ oper_t *oper = value;
+ ws_assert_magic(oper, OPER_MAGIC);
+
+ if (oper->val1)
+ stnode_free(oper->val1);
+ if (oper->val2)
+ stnode_free(oper->val2);
+
+ g_free(oper);
+}
+
+static char *
+oper_todisplay(const oper_t *oper)
+{
+ const char *s = "<notset>";
+
+ switch(oper->op) {
+ case STNODE_OP_NOT:
+ s = "!";
+ break;
+ case STNODE_OP_AND:
+ s = "&&";
+ break;
+ case STNODE_OP_OR:
+ s = "||";
+ break;
+ case STNODE_OP_ALL_EQ:
+ s = "===";
+ break;
+ case STNODE_OP_ANY_EQ:
+ s = "==";
+ break;
+ case STNODE_OP_ALL_NE:
+ s = "!=";
+ break;
+ case STNODE_OP_ANY_NE:
+ s = "~=";
+ break;
+ case STNODE_OP_GT:
+ s = ">";
+ break;
+ case STNODE_OP_GE:
+ s = ">=";
+ break;
+ case STNODE_OP_LT:
+ s = "<";
+ break;
+ case STNODE_OP_LE:
+ s = "<=";
+ break;
+ case STNODE_OP_BITWISE_AND:
+ s = "&";
+ break;
+ case STNODE_OP_ADD:
+ s = "+";
+ break;
+ case STNODE_OP_UNARY_MINUS:
+ case STNODE_OP_SUBTRACT:
+ s = "-";
+ break;
+ case STNODE_OP_MULTIPLY:
+ s = "*";
+ break;
+ case STNODE_OP_DIVIDE:
+ s = "/";
+ break;
+ case STNODE_OP_MODULO:
+ s = "%";
+ break;
+ case STNODE_OP_CONTAINS:
+ s = "contains";
+ break;
+ case STNODE_OP_MATCHES:
+ s = "matches";
+ break;
+ case STNODE_OP_IN:
+ s = "in";
+ break;
+ case STNODE_OP_NOT_IN:
+ s = "not in";
+ break;
+ case STNODE_OP_UNINITIALIZED:
+ s = "<uninitialized>";
+ break;
+ }
+ return g_strdup(s);
+}
+
+static char *
+oper_todebug(const oper_t *oper)
+{
+ const char *s = "<notset>";
+
+ switch(oper->op) {
+ case STNODE_OP_NOT:
+ s = "TEST_NOT";
+ break;
+ case STNODE_OP_AND:
+ s = "TEST_AND";
+ break;
+ case STNODE_OP_OR:
+ s = "TEST_OR";
+ break;
+ case STNODE_OP_ALL_EQ:
+ s = "TEST_ALL_EQ";
+ break;
+ case STNODE_OP_ANY_EQ:
+ s = "TEST_ANY_EQ";
+ break;
+ case STNODE_OP_ALL_NE:
+ s = "TEST_ALL_NE";
+ break;
+ case STNODE_OP_ANY_NE:
+ s = "TEST_ANY_NE";
+ break;
+ case STNODE_OP_GT:
+ s = "TEST_GT";
+ break;
+ case STNODE_OP_GE:
+ s = "TEST_GE";
+ break;
+ case STNODE_OP_LT:
+ s = "TEST_LT";
+ break;
+ case STNODE_OP_LE:
+ s = "TEST_LE";
+ break;
+ case STNODE_OP_BITWISE_AND:
+ s = "OP_BITWISE_AND";
+ break;
+ case STNODE_OP_UNARY_MINUS:
+ s = "OP_UNARY_MINUS";
+ break;
+ case STNODE_OP_ADD:
+ s = "OP_ADD";
+ break;
+ case STNODE_OP_SUBTRACT:
+ s = "OP_SUBTRACT";
+ break;
+ case STNODE_OP_MULTIPLY:
+ s = "OP_MULTIPLY";
+ break;
+ case STNODE_OP_DIVIDE:
+ s = "OP_DIVIDE";
+ break;
+ case STNODE_OP_MODULO:
+ s = "OP_MODULO";
+ break;
+ case STNODE_OP_CONTAINS:
+ s = "TEST_CONTAINS";
+ break;
+ case STNODE_OP_MATCHES:
+ s = "TEST_MATCHES";
+ break;
+ case STNODE_OP_IN:
+ s = "TEST_IN";
+ break;
+ case STNODE_OP_NOT_IN:
+ s = "TEST_NOT_IN";
+ break;
+ case STNODE_OP_UNINITIALIZED:
+ s = "<uninitialized>";
+ break;
+ }
+
+ if (oper->how == STNODE_MATCH_ALL)
+ return g_strdup_printf("ALL %s", s);
+ if (oper->how == STNODE_MATCH_ANY)
+ return g_strdup_printf("ANY %s", s);
+ return g_strdup(s);
+}
+
+static char *
+oper_tostr(const void *value, bool pretty)
+{
+ const oper_t *oper = value;
+ ws_assert_magic(oper, OPER_MAGIC);
+
+ if (pretty)
+ return oper_todisplay(oper);
+ return oper_todebug(oper);
+}
+
+static int
+num_operands(stnode_op_t op)
+{
+ switch(op) {
+ case STNODE_OP_UNINITIALIZED:
+ break;
+ case STNODE_OP_NOT:
+ case STNODE_OP_UNARY_MINUS:
+ return 1;
+ case STNODE_OP_AND:
+ case STNODE_OP_OR:
+ case STNODE_OP_ALL_EQ:
+ case STNODE_OP_ANY_EQ:
+ case STNODE_OP_ALL_NE:
+ case STNODE_OP_ANY_NE:
+ case STNODE_OP_GT:
+ case STNODE_OP_GE:
+ case STNODE_OP_LT:
+ case STNODE_OP_LE:
+ case STNODE_OP_BITWISE_AND:
+ case STNODE_OP_ADD:
+ case STNODE_OP_SUBTRACT:
+ case STNODE_OP_MULTIPLY:
+ case STNODE_OP_DIVIDE:
+ case STNODE_OP_MODULO:
+ case STNODE_OP_CONTAINS:
+ case STNODE_OP_MATCHES:
+ case STNODE_OP_IN:
+ case STNODE_OP_NOT_IN:
+ return 2;
+ }
+ ws_assert_not_reached();
+ return -1;
+}
+
+
+void
+sttype_oper_set1(stnode_t *node, stnode_op_t op, stnode_t *val1)
+{
+ oper_t *oper = stnode_data(node);
+ ws_assert_magic(oper, OPER_MAGIC);
+
+ ws_assert(num_operands(op) == 1);
+ oper->op = op;
+ oper->val1 = val1;
+ oper->val2 = NULL;
+}
+
+void
+sttype_oper_set2(stnode_t *node, stnode_op_t op, stnode_t *val1, stnode_t *val2)
+{
+ oper_t *oper = stnode_data(node);
+ ws_assert_magic(oper, OPER_MAGIC);
+
+ ws_assert(num_operands(op) == 2);
+ oper->op = op;
+ oper->val1 = val1;
+ oper->val2 = val2;
+}
+
+void
+sttype_oper_set1_args(stnode_t *node, stnode_t *val1)
+{
+ oper_t *oper;
+
+ oper = (oper_t*)stnode_data(node);
+ ws_assert_magic(oper, OPER_MAGIC);
+
+ ws_assert(num_operands(oper->op) == 1);
+ oper->val1 = val1;
+ oper->val2 = NULL;
+}
+
+void
+sttype_oper_set2_args(stnode_t *node, stnode_t *val1, stnode_t *val2)
+{
+ oper_t *oper;
+
+ oper = (oper_t*)stnode_data(node);
+ ws_assert_magic(oper, OPER_MAGIC);
+
+ ws_assert(num_operands(oper->op) == 2);
+ oper->val1 = val1;
+ oper->val2 = val2;
+}
+
+void
+sttype_oper_set_op(stnode_t *node, stnode_op_t op)
+{
+ oper_t *oper = stnode_data(node);
+ ws_assert_magic(oper, OPER_MAGIC);
+ ws_assert(oper->op == STNODE_OP_UNINITIALIZED);
+ oper->op = op;
+}
+
+stnode_op_t
+sttype_oper_get_op(stnode_t *node)
+{
+ ws_assert_magic(node, OPER_MAGIC);
+ return ((oper_t *)node)->op;
+}
+
+void
+sttype_oper_get(stnode_t *node, stnode_op_t *p_op, stnode_t **p_val1, stnode_t **p_val2)
+{
+ oper_t *oper = stnode_data(node);
+ ws_assert_magic(oper, OPER_MAGIC);
+
+ if (p_op)
+ *p_op = oper->op;
+ if (p_val1)
+ *p_val1 = oper->val1;
+ if (p_val2)
+ *p_val2 = oper->val2;
+}
+
+void
+sttype_test_set_match(stnode_t *node, stmatch_t how)
+{
+ oper_t *oper = stnode_data(node);
+ ws_assert_magic(oper, OPER_MAGIC);
+ oper->how = how;
+}
+
+stmatch_t
+sttype_test_get_match(stnode_t *node)
+{
+ oper_t *oper = stnode_data(node);
+ ws_assert_magic(oper, OPER_MAGIC);
+ return oper->how;
+}
+
+void
+sttype_register_opers(void)
+{
+ static sttype_t test_type = {
+ STTYPE_TEST,
+ "TEST",
+ oper_new,
+ oper_free,
+ oper_dup,
+ oper_tostr
+ };
+ static sttype_t arithmetic_type = {
+ STTYPE_ARITHMETIC,
+ "ARITHMETIC",
+ oper_new,
+ oper_free,
+ oper_dup,
+ oper_tostr
+ };
+
+ sttype_register(&test_type);
+ sttype_register(&arithmetic_type);
+}
+
+/*
+ * Editor modelines - https://www.wireshark.org/tools/modelines.html
+ *
+ * Local variables:
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ *
+ * vi: set shiftwidth=8 tabstop=8 noexpandtab:
+ * :indentSize=8:tabSize=8:noTabs=false:
+ */
diff --git a/epan/dfilter/sttype-op.h b/epan/dfilter/sttype-op.h
new file mode 100644
index 0000000..ca99981
--- /dev/null
+++ b/epan/dfilter/sttype-op.h
@@ -0,0 +1,43 @@
+/** @file
+ *
+ * Wireshark - Network traffic analyzer
+ * By Gerald Combs <gerald@wireshark.org>
+ * Copyright 2001 Gerald Combs
+ *
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef STTYPE_TEST_H
+#define STTYPE_TEST_H
+
+#include "syntax-tree.h"
+
+void
+sttype_oper_set1(stnode_t *node, stnode_op_t op, stnode_t *val1);
+
+void
+sttype_oper_set2(stnode_t *node, stnode_op_t op, stnode_t *val1, stnode_t *val2);
+
+void
+sttype_oper_set1_args(stnode_t *node, stnode_t *val1);
+
+void
+sttype_oper_set2_args(stnode_t *node, stnode_t *val1, stnode_t *val2);
+
+void
+sttype_oper_set_op(stnode_t *node, stnode_op_t op);
+
+stnode_op_t
+sttype_oper_get_op(stnode_t *node);
+
+void
+sttype_oper_get(stnode_t *node, stnode_op_t *p_op, stnode_t **p_val1, stnode_t **p_val2);
+
+void
+sttype_test_set_match(stnode_t *node, stmatch_t how);
+
+stmatch_t
+sttype_test_get_match(stnode_t *node);
+
+#endif
diff --git a/epan/dfilter/sttype-pointer.c b/epan/dfilter/sttype-pointer.c
new file mode 100644
index 0000000..2a29287
--- /dev/null
+++ b/epan/dfilter/sttype-pointer.c
@@ -0,0 +1,149 @@
+/*
+ * Wireshark - Network traffic analyzer
+ * By Gerald Combs <gerald@wireshark.org>
+ * Copyright 2001 Gerald Combs
+ *
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "config.h"
+#include "sttype-pointer.h"
+
+#include "ftypes/ftypes.h"
+#include "syntax-tree.h"
+#include <epan/proto.h> // For BASE_NONE
+
+static void
+sttype_fvalue_free(void *value)
+{
+ fvalue_t *fvalue = value;
+
+ /* If the data was not claimed with stnode_steal_data(), free it. */
+ if (fvalue) {
+ fvalue_free(fvalue);
+ }
+}
+
+static void
+pcre_free(void *value)
+{
+ ws_regex_t *pcre = value;
+
+ /* If the data was not claimed with stnode_steal_data(), free it. */
+ if (pcre) {
+ ws_regex_free(pcre);
+ }
+}
+
+static char *
+sttype_fvalue_tostr(const void *data, bool pretty)
+{
+ const fvalue_t *fvalue = data;
+
+ char *s, *repr;
+
+ s = fvalue_to_string_repr(NULL, fvalue, FTREPR_DFILTER, BASE_NONE);
+ if (pretty)
+ repr = g_strdup(s);
+ else
+ repr = ws_strdup_printf("%s <%s>", s, fvalue_type_name(fvalue));
+ g_free(s);
+ return repr;
+}
+
+static char *
+pcre_tostr(const void *data, bool pretty _U_)
+{
+ return g_strdup(ws_regex_pattern(data));
+}
+
+static char *
+charconst_tostr(const void *data, bool pretty _U_)
+{
+ unsigned long num = *(const unsigned long *)data;
+
+ if (num > 0x7f)
+ goto out;
+
+ switch (num) {
+ case 0: return g_strdup("'\\0'");
+ case '\a': return g_strdup("'\\a'");
+ case '\b': return g_strdup("'\\b'");
+ case '\f': return g_strdup("'\\f'");
+ case '\n': return g_strdup("'\\n'");
+ case '\r': return g_strdup("'\\r'");
+ case '\t': return g_strdup("'\\t'");
+ case '\v': return g_strdup("'\\v'");
+ case '\'': return g_strdup("'\\''");
+ case '\\': return g_strdup("'\\\\'");
+ default:
+ break;
+ }
+
+ if (g_ascii_isprint(num))
+ return ws_strdup_printf("'%c'", (int)num);
+out:
+ return ws_strdup_printf("'\\x%02lx'", num);
+}
+
+ftenum_t
+sttype_pointer_ftenum(stnode_t *node)
+{
+ switch (node->type->id) {
+ case STTYPE_FIELD:
+ case STTYPE_REFERENCE:
+ return ((header_field_info *)node->data)->type;
+ case STTYPE_FVALUE:
+ return fvalue_type_ftenum(node->data);
+ default:
+ break;
+ }
+ return FT_NONE;
+}
+
+void
+sttype_register_pointer(void)
+{
+ static sttype_t fvalue_type = {
+ STTYPE_FVALUE,
+ "FVALUE",
+ NULL,
+ sttype_fvalue_free,
+ NULL,
+ sttype_fvalue_tostr
+ };
+ static sttype_t pcre_type = {
+ STTYPE_PCRE,
+ "PCRE",
+ NULL,
+ pcre_free,
+ NULL,
+ pcre_tostr
+ };
+ static sttype_t charconst_type = {
+ STTYPE_CHARCONST,
+ "CHARCONST",
+ NULL,
+ g_free,
+ NULL,
+ charconst_tostr
+ };
+
+ sttype_register(&fvalue_type);
+ sttype_register(&pcre_type);
+ sttype_register(&charconst_type);
+}
+
+/*
+ * Editor modelines - https://www.wireshark.org/tools/modelines.html
+ *
+ * Local variables:
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ *
+ * vi: set shiftwidth=8 tabstop=8 noexpandtab:
+ * :indentSize=8:tabSize=8:noTabs=false:
+ */
diff --git a/epan/dfilter/sttype-pointer.h b/epan/dfilter/sttype-pointer.h
new file mode 100644
index 0000000..54aa28e
--- /dev/null
+++ b/epan/dfilter/sttype-pointer.h
@@ -0,0 +1,20 @@
+/** @file
+ *
+ * Wireshark - Network traffic analyzer
+ * By Gerald Combs <gerald@wireshark.org>
+ * Copyright 2001 Gerald Combs
+ *
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef STTYPE_POINTER_H
+#define STTYPE_POINTER_H
+
+#include "dfilter-int.h"
+#include <epan/ftypes/ftypes.h>
+
+ftenum_t
+sttype_pointer_ftenum(stnode_t *node);
+
+#endif
diff --git a/epan/dfilter/sttype-set.c b/epan/dfilter/sttype-set.c
new file mode 100644
index 0000000..35b2114
--- /dev/null
+++ b/epan/dfilter/sttype-set.c
@@ -0,0 +1,99 @@
+/*
+ * Wireshark - Network traffic analyzer
+ * By Gerald Combs <gerald@wireshark.org>
+ * Copyright 2001 Gerald Combs
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "config.h"
+
+#include "syntax-tree.h"
+#include "sttype-set.h"
+#include <wsutil/ws_assert.h>
+
+/*
+ * The GSList stores a list of elements of the set. Each element is represented
+ * by two list items: (lower, upper) in case of a value range or (value, NULL)
+ * if the element is not a range value.
+ */
+
+static void
+slist_stnode_free(void *data)
+{
+ if (data) {
+ stnode_free(data);
+ }
+}
+
+void
+set_nodelist_free(GSList *params)
+{
+ g_slist_free_full(params, slist_stnode_free);
+}
+
+static void
+sttype_set_free(void *value)
+{
+ /* If the data was not claimed with stnode_steal_data(), free it. */
+ if (value) {
+ set_nodelist_free(value);
+ }
+}
+
+static char *
+sttype_set_tostr(const void *data, bool pretty)
+{
+ const GSList* nodelist = data;
+ stnode_t *lower, *upper;
+ GString *repr = g_string_new("");
+
+ while (nodelist) {
+ lower = nodelist->data;
+ g_string_append(repr, stnode_tostr(lower, pretty));
+
+ /* Set elements are always in pairs; upper may be null. */
+ nodelist = g_slist_next(nodelist);
+ ws_assert(nodelist);
+ upper = nodelist->data;
+ if (upper != NULL) {
+ g_string_append(repr, "..");
+ g_string_append(repr, stnode_tostr(upper, pretty));
+ }
+
+ nodelist = g_slist_next(nodelist);
+ if (nodelist != NULL) {
+ g_string_append_c(repr, ' ');
+ }
+ }
+
+ return g_string_free(repr, false);
+}
+
+void
+sttype_register_set(void)
+{
+ static sttype_t set_type = {
+ STTYPE_SET,
+ "SET",
+ NULL,
+ sttype_set_free,
+ NULL,
+ sttype_set_tostr
+ };
+
+ sttype_register(&set_type);
+}
+
+/*
+ * Editor modelines - https://www.wireshark.org/tools/modelines.html
+ *
+ * Local variables:
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ *
+ * vi: set shiftwidth=8 tabstop=8 noexpandtab:
+ * :indentSize=8:tabSize=8:noTabs=false:
+ */
diff --git a/epan/dfilter/sttype-set.h b/epan/dfilter/sttype-set.h
new file mode 100644
index 0000000..7b2670b
--- /dev/null
+++ b/epan/dfilter/sttype-set.h
@@ -0,0 +1,24 @@
+/** @file
+ *
+ * Wireshark - Network traffic analyzer
+ * By Gerald Combs <gerald@wireshark.org>
+ * Copyright 2001 Gerald Combs
+ *
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef STTYPE_SET_H
+#define STTYPE_SET_H
+
+#include <wireshark.h>
+
+#include "syntax-tree.h"
+
+bool
+sttype_set_convert_to_range(stnode_t **node_left, stnode_t **node_right);
+
+void
+set_nodelist_free(GSList *params);
+
+#endif
diff --git a/epan/dfilter/sttype-slice.c b/epan/dfilter/sttype-slice.c
new file mode 100644
index 0000000..a2bff76
--- /dev/null
+++ b/epan/dfilter/sttype-slice.c
@@ -0,0 +1,193 @@
+/*
+ * Wireshark - Network traffic analyzer
+ * By Gerald Combs <gerald@wireshark.org>
+ * Copyright 2001 Gerald Combs
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+/* The ideas in this code came from Ed Warnicke's original implementation
+ * of dranges for the old display filter code (Ethereal 0.8.15 and before).
+ * The code is different, but definitely inspired by his code.
+ */
+
+#include "config.h"
+
+#include <glib.h>
+
+#include <epan/proto.h>
+#include "drange.h"
+#include "sttype-slice.h"
+#include <wsutil/ws_assert.h>
+
+typedef struct {
+ uint32_t magic;
+ stnode_t *entity;
+ drange_t *drange;
+} slice_t;
+
+#define SLICE_MAGIC 0xec0990ce
+
+static void *
+slice_new(void *junk _U_)
+{
+ slice_t *slice;
+
+ ws_assert(junk == NULL);
+
+ slice = g_new(slice_t, 1);
+
+ slice->magic = SLICE_MAGIC;
+ slice->entity = NULL;
+ slice->drange = NULL;
+
+ return slice;
+}
+
+static void *
+slice_dup(gconstpointer data)
+{
+ const slice_t *org = data;
+ slice_t *slice;
+
+ slice = slice_new(NULL);
+ slice->entity = stnode_dup(org->entity);
+ slice->drange = drange_dup(org->drange);
+
+ return slice;
+}
+
+static void
+slice_free(void *value)
+{
+ slice_t *slice = value;
+ ws_assert_magic(slice, SLICE_MAGIC);
+
+ if (slice->drange)
+ drange_free(slice->drange);
+
+ if (slice->entity)
+ stnode_free(slice->entity);
+
+ g_free(slice);
+}
+
+static char *
+slice_tostr(const void *data, bool pretty)
+{
+ const slice_t *slice = data;
+ ws_assert_magic(slice, SLICE_MAGIC);
+
+ char *repr, *drange_str;
+
+ drange_str = drange_tostr(slice->drange);
+ repr = ws_strdup_printf("%s[%s]",
+ stnode_tostr(slice->entity, pretty),
+ drange_str);
+ g_free(drange_str);
+
+ return repr;
+}
+
+void
+sttype_slice_remove_drange(stnode_t *node)
+{
+ slice_t *slice;
+
+ slice = stnode_data(node);
+ ws_assert_magic(slice, SLICE_MAGIC);
+
+ slice->drange = NULL;
+}
+
+drange_t *
+sttype_slice_drange_steal(stnode_t *node)
+{
+ slice_t *slice;
+ drange_t *dr;
+
+ slice = stnode_data(node);
+ ws_assert_magic(slice, SLICE_MAGIC);
+ dr = slice->drange;
+ slice->drange = NULL;
+ return dr;
+}
+
+/* Set a slice */
+void
+sttype_slice_set(stnode_t *node, stnode_t *entity, GSList* drange_list)
+{
+ slice_t *slice;
+
+ slice = stnode_data(node);
+ ws_assert_magic(slice, SLICE_MAGIC);
+
+ slice->entity = entity;
+
+ slice->drange = drange_new_from_list(drange_list);
+}
+
+void
+sttype_slice_set1(stnode_t *node, stnode_t *entity, drange_node *rn)
+{
+ GSList *drange_list = g_slist_append(NULL, rn);
+ sttype_slice_set(node, entity, drange_list);
+ g_slist_free(drange_list);
+}
+
+void
+sttype_slice_set_drange(stnode_t *node, stnode_t *field, drange_t *dr)
+{
+ slice_t *slice;
+
+ slice = stnode_data(node);
+ ws_assert_magic(slice, SLICE_MAGIC);
+
+ slice->entity = field;
+
+ slice->drange = dr;
+}
+
+stnode_t *
+sttype_slice_entity(stnode_t *node)
+{
+ slice_t *slice = node->data;
+ ws_assert_magic(slice, SLICE_MAGIC);
+ return slice->entity;
+}
+
+drange_t *
+sttype_slice_drange(stnode_t *node)
+{
+ slice_t *slice = node->data;
+ ws_assert_magic(slice, SLICE_MAGIC);
+ return slice->drange;
+}
+
+void
+sttype_register_slice(void)
+{
+ static sttype_t slice_type = {
+ STTYPE_SLICE,
+ "SLICE",
+ slice_new,
+ slice_free,
+ slice_dup,
+ slice_tostr
+ };
+
+ sttype_register(&slice_type);
+}
+
+/*
+ * Editor modelines - https://www.wireshark.org/tools/modelines.html
+ *
+ * Local variables:
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ *
+ * vi: set shiftwidth=8 tabstop=8 noexpandtab:
+ * :indentSize=8:tabSize=8:noTabs=false:
+ */
diff --git a/epan/dfilter/sttype-slice.h b/epan/dfilter/sttype-slice.h
new file mode 100644
index 0000000..005675d
--- /dev/null
+++ b/epan/dfilter/sttype-slice.h
@@ -0,0 +1,42 @@
+/** @file
+ *
+ * Wireshark - Network traffic analyzer
+ * By Gerald Combs <gerald@wireshark.org>
+ * Copyright 2001 Gerald Combs
+ *
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef STTYPE_SLICE_H
+#define STTYPE_SLICE_H
+
+#include "syntax-tree.h"
+#include "drange.h"
+
+
+stnode_t *
+sttype_slice_entity(stnode_t *node);
+
+drange_t *
+sttype_slice_drange(stnode_t *node);
+
+drange_t *
+sttype_slice_drange_steal(stnode_t *node);
+
+/* Set a range */
+void
+sttype_slice_set(stnode_t *node, stnode_t *field, GSList* drange_list);
+
+void
+sttype_slice_set1(stnode_t *node, stnode_t *field, drange_node *rn);
+
+void
+sttype_slice_set_drange(stnode_t *node, stnode_t *field, drange_t *dr);
+
+/* Clear the 'drange' variable to remove responsibility for
+ * freeing it. */
+void
+sttype_slice_remove_drange(stnode_t *node);
+
+#endif
diff --git a/epan/dfilter/sttype-string.c b/epan/dfilter/sttype-string.c
new file mode 100644
index 0000000..62fe203
--- /dev/null
+++ b/epan/dfilter/sttype-string.c
@@ -0,0 +1,88 @@
+/*
+ * Wireshark - Network traffic analyzer
+ * By Gerald Combs <gerald@wireshark.org>
+ * Copyright 2001 Gerald Combs
+ *
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "syntax-tree.h"
+#include <wsutil/str_util.h>
+
+static void *
+string_dup(gconstpointer string)
+{
+ return g_strdup(string);
+}
+
+static void
+string_free(void *value)
+{
+ g_free(value);
+}
+
+static char *
+string_tostr(const void *data, bool pretty _U_)
+{
+ return g_strdup(data);
+}
+
+static void *
+gstring_dup(gconstpointer value)
+{
+ const GString *gs = value;
+ return g_string_new_len(gs->str, gs->len);
+}
+
+static void
+gstring_free(void *value)
+{
+ g_string_free(value, true);
+}
+
+static char *
+gstring_tostr(const void *value, bool pretty _U_)
+{
+ const GString *gs = value;
+ return ws_escape_string_len(NULL, gs->str, gs->len, false);
+}
+
+
+void
+sttype_register_string(void)
+{
+ static sttype_t string_type = {
+ STTYPE_STRING,
+ "STRING",
+ NULL,
+ gstring_free,
+ gstring_dup,
+ gstring_tostr
+ };
+
+ static sttype_t literal_type = {
+ STTYPE_LITERAL,
+ "LITERAL",
+ NULL,
+ string_free,
+ string_dup,
+ string_tostr
+ };
+
+ sttype_register(&string_type);
+ sttype_register(&literal_type);
+}
+
+/*
+ * Editor modelines - https://www.wireshark.org/tools/modelines.html
+ *
+ * Local variables:
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ *
+ * vi: set shiftwidth=8 tabstop=8 noexpandtab:
+ * :indentSize=8:tabSize=8:noTabs=false:
+ */
diff --git a/epan/dfilter/syntax-tree.c b/epan/dfilter/syntax-tree.c
new file mode 100644
index 0000000..b16d63b
--- /dev/null
+++ b/epan/dfilter/syntax-tree.c
@@ -0,0 +1,533 @@
+/*
+ * Wireshark - Network traffic analyzer
+ * By Gerald Combs <gerald@wireshark.org>
+ * Copyright 2001 Gerald Combs
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "config.h"
+
+#define WS_LOG_DOMAIN LOG_DOMAIN_DFILTER
+
+#include "syntax-tree.h"
+#include <wsutil/wmem/wmem.h>
+#include <wsutil/str_util.h>
+#include <wsutil/glib-compat.h>
+#include "sttype-op.h"
+#include "sttype-function.h"
+#include "dfilter-int.h"
+
+/* Keep track of sttype_t's via their sttype_id_t number */
+static sttype_t* type_list[STTYPE_NUM_TYPES];
+
+
+#define STNODE_MAGIC 0xe9b00b9e
+
+
+void
+sttype_init(void)
+{
+ sttype_register_field();
+ sttype_register_function();
+ sttype_register_pointer();
+ sttype_register_set();
+ sttype_register_slice();
+ sttype_register_string();
+ sttype_register_opers();
+}
+
+void
+sttype_cleanup(void)
+{
+ /* nothing to do */
+}
+
+
+void
+sttype_register(sttype_t *type)
+{
+ sttype_id_t type_id;
+
+ type_id = type->id;
+
+ /* Check input */
+ ws_assert(type_id < STTYPE_NUM_TYPES);
+
+ /* Don't re-register. */
+ ws_assert(type_list[type_id] == NULL);
+
+ type_list[type_id] = type;
+}
+
+static sttype_t*
+sttype_lookup(sttype_id_t type_id)
+{
+ sttype_t *result;
+
+ /* Check input */
+ ws_assert(type_id < STTYPE_NUM_TYPES);
+
+ result = type_list[type_id];
+
+ /* Check output. */
+ ws_assert(result != NULL);
+
+ return result;
+}
+
+void
+stnode_clear(stnode_t *node)
+{
+ ws_assert_magic(node, STNODE_MAGIC);
+ if (node->type) {
+ if (node->type->func_free && node->data) {
+ node->type->func_free(node->data);
+ }
+ }
+ else {
+ ws_assert(!node->data);
+ }
+
+ node->type = NULL;
+ node->data = NULL;
+ g_free(node->repr_display);
+ node->repr_display = NULL;
+ g_free(node->repr_debug);
+ node->repr_debug = NULL;
+ g_free(node->repr_token);
+ node->repr_token = NULL;
+ node->location.col_start = -1;
+ node->location.col_len = 0;
+ node->flags = 0;
+}
+
+void
+stnode_init(stnode_t *node, sttype_id_t type_id, void *data, char *token, df_loc_t loc)
+{
+ sttype_t *type;
+
+ ws_assert_magic(node, STNODE_MAGIC);
+ ws_assert(!node->type);
+ ws_assert(!node->data);
+ node->repr_display = NULL;
+ node->repr_debug = NULL;
+ node->repr_token = token;
+ node->location = loc;
+ node->flags = 0;
+
+ if (type_id == STTYPE_UNINITIALIZED) {
+ node->type = NULL;
+ node->data = NULL;
+ }
+ else {
+ /* Creating an initialized node with a NULL pointer is
+ * allowed and needs to be safe. The parser relies on that. */
+ type = sttype_lookup(type_id);
+ ws_assert(type);
+ node->type = type;
+ if (type->func_new) {
+ node->data = type->func_new(data);
+ }
+ else {
+ node->data = data;
+ }
+ }
+}
+
+void
+stnode_replace(stnode_t *node, sttype_id_t type_id, void *data)
+{
+ char *token = g_strdup(node->repr_token);
+ df_loc_t loc = node->location;
+ uint16_t flags = node->flags;
+ stnode_clear(node);
+ stnode_init(node, type_id, data, token, loc);
+ node->flags = flags;
+}
+
+stnode_t*
+stnode_new(sttype_id_t type_id, void *data, char *token, df_loc_t loc)
+{
+ stnode_t *node;
+
+ node = g_new0(stnode_t, 1);
+ node->magic = STNODE_MAGIC;
+
+ stnode_init(node, type_id, data, token, loc);
+
+ return node;
+}
+
+stnode_t*
+stnode_new_empty(sttype_id_t type_id)
+{
+ df_loc_t loc = {-1, 0};
+ return stnode_new(type_id, NULL, NULL, loc);
+}
+
+stnode_t*
+stnode_dup(const stnode_t *node)
+{
+ stnode_t *new;
+
+ ws_assert_magic(node, STNODE_MAGIC);
+ new = g_new(stnode_t, 1);
+ new->magic = STNODE_MAGIC;
+ new->repr_display = NULL;
+ new->repr_debug = NULL;
+ new->repr_token = g_strdup(node->repr_token);
+ new->location = node->location;
+ new->flags = node->flags;
+
+ new->type = node->type;
+ if (node->type == NULL)
+ new->data = NULL;
+ else if (node->type->func_dup)
+ new->data = node->type->func_dup(node->data);
+ else
+ new->data = node->data;
+
+ return new;
+}
+
+void
+stnode_free(stnode_t *node)
+{
+ ws_assert_magic(node, STNODE_MAGIC);
+ stnode_clear(node);
+ g_free(node);
+}
+
+const char*
+stnode_type_name(stnode_t *node)
+{
+ ws_assert_magic(node, STNODE_MAGIC);
+ if (node->type)
+ return node->type->name;
+ else
+ return "UNINITIALIZED";
+}
+
+sttype_id_t
+stnode_type_id(stnode_t *node)
+{
+ ws_assert_magic(node, STNODE_MAGIC);
+ if (node->type)
+ return node->type->id;
+ else
+ return STTYPE_UNINITIALIZED;
+}
+
+void *
+stnode_data(stnode_t *node)
+{
+ ws_assert_magic(node, STNODE_MAGIC);
+ return node->data;
+}
+
+GString *
+stnode_string(stnode_t *node)
+{
+ ws_assert(stnode_type_id(node) == STTYPE_STRING);
+ return stnode_data(node);
+}
+
+void *
+stnode_steal_data(stnode_t *node)
+{
+ ws_assert_magic(node, STNODE_MAGIC);
+ void *data = node->data;
+ ws_assert(data);
+ node->data = NULL;
+ return data;
+}
+
+const char *
+stnode_token(stnode_t *node)
+{
+ return node->repr_token;
+}
+
+df_loc_t
+stnode_location(stnode_t *node)
+{
+ return node->location;
+}
+
+void
+stnode_set_location(stnode_t *node, df_loc_t loc)
+{
+ node->location = loc;
+}
+
+bool
+stnode_get_flags(stnode_t *node, uint16_t flags)
+{
+ return node->flags & flags;
+}
+
+void
+stnode_set_flags(stnode_t *node, uint16_t flags)
+{
+ node->flags |= flags;
+}
+
+/* Finds the first and last location from a set and creates
+ * a new location from start of first (col_start) to end of
+ * last (col_start + col_len). Sets the result to dst. */
+void
+stnode_merge_location(stnode_t *dst, stnode_t *n1, stnode_t *n2)
+{
+ df_loc_t first, last;
+ df_loc_t loc2;
+
+ first = last = stnode_location(n1);
+ loc2 = stnode_location(n2);
+ if (loc2.col_start >= 0 && loc2.col_start > first.col_start)
+ last = loc2;
+ dst->location.col_start = first.col_start;
+ dst->location.col_len = last.col_start - first.col_start + last.col_len;
+}
+
+#define IS_OPERATOR(node) \
+ (stnode_type_id(node) == STTYPE_TEST || \
+ stnode_type_id(node) == STTYPE_ARITHMETIC)
+
+static char *
+_node_tostr(stnode_t *node, bool pretty)
+{
+ char *s, *repr;
+
+ if (node->type->func_tostr == NULL)
+ s = g_strdup("FIXME");
+ else
+ s = node->type->func_tostr(node->data, pretty);
+
+ if (pretty)
+ return s;
+
+ if (IS_OPERATOR(node)) {
+ repr = s;
+ }
+ else {
+ repr = ws_strdup_printf("%s(%s)", stnode_type_name(node), s);
+ g_free(s);
+ }
+
+ return repr;
+}
+
+const char *
+stnode_tostr(stnode_t *node, bool pretty)
+{
+ ws_assert_magic(node, STNODE_MAGIC);
+
+ if (pretty && IS_OPERATOR(node) && node->repr_token != NULL) {
+ /* Some operators can have synonyms, like "or" and "||".
+ * Show the user the same representation as he typed. */
+ g_free(node->repr_display);
+ node->repr_display = g_strdup(node->repr_token);
+ return node->repr_display;
+ }
+
+ char *str = _node_tostr(node, pretty);
+
+ if (pretty) {
+ g_free(node->repr_display);
+ node->repr_display = str;
+ }
+ else {
+ g_free(node->repr_debug);
+ node->repr_debug = str;
+ }
+
+ return str;
+}
+
+static char *
+sprint_node(stnode_t *node)
+{
+ wmem_strbuf_t *buf = wmem_strbuf_new(NULL, NULL);
+
+ wmem_strbuf_append_printf(buf, "{ ");
+ wmem_strbuf_append_printf(buf, "magic = 0x%"PRIx32", ", node->magic);
+ wmem_strbuf_append_printf(buf, "type = %s, ", stnode_type_name(node));
+ wmem_strbuf_append_printf(buf, "data = %s, ", stnode_todebug(node));
+ wmem_strbuf_append_printf(buf, "location = %ld:%zu",
+ node->location.col_start, node->location.col_len);
+ wmem_strbuf_append_printf(buf, " }");
+ return wmem_strbuf_finalize(buf);
+}
+
+void
+log_node_full(enum ws_log_level level,
+ const char *file, int line, const char *func,
+ stnode_t *node, const char *msg)
+{
+ if (!ws_log_msg_is_active(WS_LOG_DOMAIN, level))
+ return;
+
+ if (node == NULL) {
+ ws_log_write_always_full(WS_LOG_DOMAIN, level,
+ file, line, func, "%s is NULL", msg);
+ return;
+ }
+
+ char *str = sprint_node(node);
+
+ ws_log_write_always_full(WS_LOG_DOMAIN, level, file, line, func,
+ "%s = %s", msg, str);
+
+ g_free(str);
+}
+
+void
+log_test_full(enum ws_log_level level,
+ const char *file, int line, const char *func,
+ stnode_t *node, const char *msg)
+{
+ if (!ws_log_msg_is_active(WS_LOG_DOMAIN, level))
+ return;
+
+ if (node == NULL) {
+ ws_log_write_always_full(WS_LOG_DOMAIN, level,
+ file, line, func, "%s is NULL", msg);
+ return;
+ }
+
+ stnode_op_t st_op;
+ stnode_t *st_lhs = NULL, *st_rhs = NULL;
+ char *lhs = NULL, *rhs = NULL;
+
+ sttype_oper_get(node, &st_op, &st_lhs, &st_rhs);
+
+ if (st_lhs)
+ lhs = sprint_node(st_lhs);
+ if (st_rhs)
+ rhs = sprint_node(st_rhs);
+
+ ws_log_write_always_full(WS_LOG_DOMAIN, level, file, line, func,
+ "%s:\n LHS = %s\n RHS = %s",
+ stnode_todebug(node),
+ lhs ? lhs : "NULL",
+ rhs ? rhs : "NULL");
+
+ g_free(lhs);
+ g_free(rhs);
+}
+
+static void
+indent(wmem_strbuf_t *buf, int level)
+{
+ for (int i = 0; i < level * 2; i++) {
+ wmem_strbuf_append_c(buf, ' ');
+ }
+ wmem_strbuf_append_printf(buf, "% 2d ", level);
+}
+
+static void
+visit_tree(wmem_strbuf_t *buf, stnode_t *node, int level)
+{
+ stnode_t *left, *right;
+ stnode_t *lower, *upper;
+ GSList *params;
+ GSList *nodelist;
+
+ if (stnode_type_id(node) == STTYPE_TEST ||
+ stnode_type_id(node) == STTYPE_ARITHMETIC) {
+ wmem_strbuf_append_printf(buf, "%s:\n", stnode_todebug(node));
+ sttype_oper_get(node, NULL, &left, &right);
+ if (left && right) {
+ indent(buf, level + 1);
+ visit_tree(buf, left, level + 1);
+ wmem_strbuf_append_c(buf, '\n');
+ indent(buf, level + 1);
+ visit_tree(buf, right, level + 1);
+ }
+ else if (left) {
+ indent(buf, level + 1);
+ visit_tree(buf, left, level + 1);
+ }
+ else if (right) {
+ ws_assert_not_reached();
+ }
+ }
+ else if (stnode_type_id(node) == STTYPE_SET) {
+ nodelist = stnode_data(node);
+ wmem_strbuf_append_printf(buf, "SET(#%u):\n", g_slist_length(nodelist) / 2);
+ while (nodelist) {
+ indent(buf, level + 1);
+ lower = nodelist->data;
+ wmem_strbuf_append(buf, stnode_tostr(lower, false));
+ /* Set elements are always in pairs; upper may be null. */
+ nodelist = g_slist_next(nodelist);
+ ws_assert(nodelist);
+ upper = nodelist->data;
+ if (upper != NULL) {
+ wmem_strbuf_append(buf, " .. ");
+ wmem_strbuf_append(buf, stnode_tostr(upper, false));
+ }
+ nodelist = g_slist_next(nodelist);
+ if (nodelist != NULL) {
+ wmem_strbuf_append_c(buf, '\n');
+ }
+ }
+ }
+ else if (stnode_type_id(node) == STTYPE_FUNCTION) {
+ wmem_strbuf_append_printf(buf, "%s:\n", stnode_todebug(node));
+ params = sttype_function_params(node);
+ while (params) {
+ indent(buf, level + 1);
+ visit_tree(buf, params->data, level + 1);
+ if (params->next != NULL) {
+ wmem_strbuf_append_c(buf, '\n');
+ }
+ params = params->next;
+ }
+ }
+ else {
+ wmem_strbuf_append(buf, stnode_todebug(node));
+ }
+}
+
+char *
+dump_syntax_tree_str(stnode_t *root)
+{
+ wmem_strbuf_t *buf = wmem_strbuf_new(NULL, NULL);
+ indent(buf, 0);
+ visit_tree(buf, root, 0);
+ return wmem_strbuf_finalize(buf);
+}
+
+void
+log_syntax_tree(enum ws_log_level level, stnode_t *root, const char *msg, char **cache_ptr)
+{
+ if (!ws_log_msg_is_active(LOG_DOMAIN_DFILTER, level))
+ return;
+
+ char *str = dump_syntax_tree_str(root);
+
+ ws_log_write_always_full(LOG_DOMAIN_DFILTER, level, NULL, -1, NULL,
+ "%s:\n%s", msg, str);
+
+ if (cache_ptr) {
+ *cache_ptr = str;
+ }
+ else {
+ g_free(str);
+ }
+}
+
+/*
+ * Editor modelines - https://www.wireshark.org/tools/modelines.html
+ *
+ * Local variables:
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ *
+ * vi: set shiftwidth=8 tabstop=8 noexpandtab:
+ * :indentSize=8:tabSize=8:noTabs=false:
+ */
diff --git a/epan/dfilter/syntax-tree.h b/epan/dfilter/syntax-tree.h
new file mode 100644
index 0000000..332f6f2
--- /dev/null
+++ b/epan/dfilter/syntax-tree.h
@@ -0,0 +1,234 @@
+/*
+ * Wireshark - Network traffic analyzer
+ * By Gerald Combs <gerald@wireshark.org>
+ * Copyright 2001 Gerald Combs
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef SYNTAX_TREE_H
+#define SYNTAX_TREE_H
+
+#include <stdio.h>
+#include <inttypes.h>
+#include <glib.h>
+
+#include <wsutil/ws_assert.h>
+#include <wsutil/wslog.h>
+#include <epan/ftypes/ftypes.h>
+#include "dfilter-loc.h"
+
+/** @file
+ */
+
+typedef enum {
+ STTYPE_UNINITIALIZED,
+ STTYPE_TEST,
+ STTYPE_LITERAL,
+ STTYPE_REFERENCE,
+ STTYPE_STRING,
+ STTYPE_CHARCONST,
+ STTYPE_FIELD,
+ STTYPE_FVALUE,
+ STTYPE_SLICE,
+ STTYPE_FUNCTION,
+ STTYPE_SET,
+ STTYPE_PCRE,
+ STTYPE_ARITHMETIC,
+ STTYPE_NUM_TYPES
+} sttype_id_t;
+
+typedef void * (*STTypeNewFunc)(void *);
+typedef void * (*STTypeDupFunc)(gconstpointer);
+typedef void (*STTypeFreeFunc)(void *);
+typedef char* (*STTypeToStrFunc)(gconstpointer, bool pretty);
+
+
+/* Type information */
+typedef struct {
+ sttype_id_t id;
+ const char *name;
+ STTypeNewFunc func_new;
+ STTypeFreeFunc func_free;
+ STTypeDupFunc func_dup;
+ STTypeToStrFunc func_tostr;
+} sttype_t;
+
+
+/* Lexical value is ambiguous (can be a protocol field or a literal). */
+#define STFLAG_UNPARSED (1 << 0)
+
+/** Node (type instance) information */
+typedef struct {
+ uint32_t magic;
+ sttype_t *type;
+ void * data;
+ char *repr_token;
+ char *repr_display;
+ char *repr_debug;
+ df_loc_t location;
+ uint16_t flags;
+} stnode_t;
+
+typedef enum {
+ STNODE_OP_UNINITIALIZED,
+ STNODE_OP_NOT,
+ STNODE_OP_AND,
+ STNODE_OP_OR,
+ STNODE_OP_ALL_EQ,
+ STNODE_OP_ANY_EQ,
+ STNODE_OP_ALL_NE,
+ STNODE_OP_ANY_NE,
+ STNODE_OP_GT,
+ STNODE_OP_GE,
+ STNODE_OP_LT,
+ STNODE_OP_LE,
+ STNODE_OP_CONTAINS,
+ STNODE_OP_MATCHES,
+ STNODE_OP_IN,
+ STNODE_OP_NOT_IN,
+ STNODE_OP_BITWISE_AND,
+ STNODE_OP_UNARY_MINUS,
+ STNODE_OP_ADD,
+ STNODE_OP_SUBTRACT,
+ STNODE_OP_MULTIPLY,
+ STNODE_OP_DIVIDE,
+ STNODE_OP_MODULO,
+} stnode_op_t;
+
+typedef enum {
+ STNODE_MATCH_DEF,
+ STNODE_MATCH_ANY,
+ STNODE_MATCH_ALL,
+} stmatch_t;
+
+/* These are the sttype_t registration function prototypes. */
+void sttype_register_field(void);
+void sttype_register_function(void);
+void sttype_register_pointer(void);
+void sttype_register_set(void);
+void sttype_register_slice(void);
+void sttype_register_string(void);
+void sttype_register_opers(void);
+
+void
+sttype_init(void);
+
+void
+sttype_cleanup(void);
+
+void
+sttype_register(sttype_t *type);
+
+stnode_t*
+stnode_new(sttype_id_t type_id, void *data, char *token, df_loc_t loc);
+
+stnode_t*
+stnode_new_empty(sttype_id_t type_id);
+
+stnode_t*
+stnode_dup(const stnode_t *org);
+
+void
+stnode_clear(stnode_t *node);
+
+void
+stnode_init(stnode_t *node, sttype_id_t type_id, void *data, char *token, df_loc_t loc);
+
+void
+stnode_replace(stnode_t *node, sttype_id_t type_id, void *data);
+
+void
+stnode_free(stnode_t *node);
+
+const char*
+stnode_type_name(stnode_t *node);
+
+sttype_id_t
+stnode_type_id(stnode_t *node);
+
+void *
+stnode_data(stnode_t *node);
+
+GString *
+stnode_string(stnode_t *node);
+
+void *
+stnode_steal_data(stnode_t *node);
+
+const char *
+stnode_token(stnode_t *node);
+
+df_loc_t
+stnode_location(stnode_t *node);
+
+void
+stnode_set_location(stnode_t *node, df_loc_t loc);
+
+bool
+stnode_get_flags(stnode_t *node, uint16_t flags);
+
+void
+stnode_set_flags(stnode_t *node, uint16_t flags);
+
+void
+stnode_merge_location(stnode_t *dst, stnode_t *n1, stnode_t *n2);
+
+const char *
+stnode_tostr(stnode_t *node, bool pretty);
+
+#define stnode_todisplay(node) stnode_tostr(node, true)
+
+#define stnode_todebug(node) stnode_tostr(node, false)
+
+void
+log_node_full(enum ws_log_level level,
+ const char *file, int line, const char *func,
+ stnode_t *node, const char *msg);
+
+void
+log_test_full(enum ws_log_level level,
+ const char *file, int line, const char *func,
+ stnode_t *node, const char *msg);
+
+#ifdef WS_DEBUG
+#define log_node(node) \
+ log_node_full(LOG_LEVEL_NOISY, __FILE__, __LINE__, __func__, node, #node)
+#define log_test(node) \
+ log_test_full(LOG_LEVEL_NOISY, __FILE__, __LINE__, __func__, node, #node)
+#define LOG_NODE(node) \
+ do { \
+ if (stnode_type_id(node) == STTYPE_TEST) \
+ log_test(node); \
+ else \
+ log_node(node); \
+ } while (0)
+#else
+#define log_node(node) (void)0
+#define log_test(node) (void)0
+#define LOG_NODE(node) (void)0
+#endif
+
+char *
+dump_syntax_tree_str(stnode_t *root);
+
+void
+log_syntax_tree(enum ws_log_level, stnode_t *root, const char *msg, char **cache_ptr);
+
+#ifdef WS_DEBUG
+#define ws_assert_magic(obj, mnum) \
+ do { \
+ ws_assert(obj); \
+ if ((obj)->magic != (mnum)) { \
+ ws_log_full(LOG_DOMAIN_DFILTER, LOG_LEVEL_ERROR, \
+ __FILE__, __LINE__, __func__, \
+ "Magic num is 0x%08"PRIx32", " \
+ "but should be 0x%08"PRIx32, \
+ (obj)->magic, (mnum)); \
+ } \
+ } while(0)
+#else
+#define ws_assert_magic(obj, mnum) (void)0
+#endif
+
+#endif /* SYNTAX_TREE_H */