summaryrefslogtreecommitdiffstats
path: root/epan/tvbparse.h
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-10 20:34:10 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-10 20:34:10 +0000
commite4ba6dbc3f1e76890b22773807ea37fe8fa2b1bc (patch)
tree68cb5ef9081156392f1dd62a00c6ccc1451b93df /epan/tvbparse.h
parentInitial commit. (diff)
downloadwireshark-e4ba6dbc3f1e76890b22773807ea37fe8fa2b1bc.tar.xz
wireshark-e4ba6dbc3f1e76890b22773807ea37fe8fa2b1bc.zip
Adding upstream version 4.2.2.upstream/4.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'epan/tvbparse.h')
-rw-r--r--epan/tvbparse.h451
1 files changed, 451 insertions, 0 deletions
diff --git a/epan/tvbparse.h b/epan/tvbparse.h
new file mode 100644
index 00000000..2bc23fd8
--- /dev/null
+++ b/epan/tvbparse.h
@@ -0,0 +1,451 @@
+/** @file
+ *
+ * an API for text tvb parsers
+ *
+ * Copyright 2005, Luis E. Garcia Ontanon <luis@ontanon.org>
+ *
+ * Wireshark - Network traffic analyzer
+ * By Gerald Combs <gerald@wireshark.org>
+ * Copyright 1998 Gerald Combs
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+/*
+ The intention behind this is to ease the writing of dissectors that have to
+ parse text without the need of writing into buffers.
+
+ It was originally written to avoid using lex and yacc for the xml dissector.
+
+ the parser is able to look for wanted elements these can be:
+
+ simple tokens:
+ - a char out of a string of needles
+ - a char not belonging to a string of needles
+ - a sequence of chars that belong to a set of chars
+ - a sequence of chars that do not belong to a set of chars
+ - a string
+ - a caseless string
+ - all the characters up to a certain wanted element (included or excluded)
+
+ composed elements:
+ - one of a given group of wanted elements
+ - a sequence of wanted elements
+ - some (at least one) instances of a wanted element
+
+ Once a wanted element is successfully extracted, by either tvbparse_get or
+ tvbparse_find, the parser will invoke a given callback
+ before and another one after every of its component's subelement's callbacks
+ are being called.
+
+ If tvbparse_get or tvbparse_find fail to extract the wanted element the
+ subelements callbacks are not going to be invoked.
+
+ The wanted elements are instantiated once by the proto_register_xxx function.
+
+ The parser is instantiated for every packet and it mantains its state.
+
+ The element's data is destroyed before the next packet is dissected.
+ */
+
+#ifndef _TVB_PARSE_H_
+#define _TVB_PARSE_H_
+
+#include <epan/tvbuff.h>
+#include <glib.h>
+#include "ws_symbol_export.h"
+
+typedef struct _tvbparse_elem_t tvbparse_elem_t;
+typedef struct _tvbparse_wanted_t tvbparse_wanted_t;
+typedef struct _tvbparse_t tvbparse_t;
+
+
+/*
+ * a callback function to be called before or after an element has been
+ * successfuly extracted.
+ *
+ * Note that if the token belongs to a composed token the callbacks of the
+ * components won't be called unless the composed token is successfully
+ * extracted.
+ *
+ * tvbparse_data: the private data of the parser
+ * wanted_data: the private data of the wanted element
+ * elem: the extracted element
+ */
+typedef void (*tvbparse_action_t)(void* tvbparse_data, const void* wanted_data, struct _tvbparse_elem_t* elem);
+
+typedef int (*tvbparse_condition_t)
+(tvbparse_t*, const int,
+ const tvbparse_wanted_t*,
+ tvbparse_elem_t**);
+
+
+typedef enum {
+ TP_UNTIL_INCLUDE, /* last elem is included, its span is spent by the parser */
+ TP_UNTIL_SPEND, /* last elem is not included, but its span is spent by the parser */
+ TP_UNTIL_LEAVE /* last elem is not included, neither its span is spent by the parser */
+} until_mode_t;
+
+
+struct _tvbparse_wanted_t {
+ int id;
+ tvbparse_condition_t condition;
+
+ union {
+ const gchar* str;
+ struct _tvbparse_wanted_t** handle;
+ struct {
+ union {
+ gint64 i;
+ guint64 u;
+ gdouble f;
+ } value;
+ gboolean (*comp)(void*,const void*);
+ void* (*extract)(tvbuff_t*,guint);
+ } number;
+ enum ftenum ftenum;
+ struct {
+ until_mode_t mode;
+ const tvbparse_wanted_t* subelem;
+ } until;
+ struct {
+ wmem_map_t* table;
+ struct _tvbparse_wanted_t* key;
+ struct _tvbparse_wanted_t* other;
+ } hash;
+ GPtrArray* elems;
+ const tvbparse_wanted_t* subelem;
+ void* p;
+ } control;
+
+ int len;
+
+ guint min;
+ guint max;
+
+ const void* data;
+
+ tvbparse_action_t before;
+ tvbparse_action_t after;
+};
+
+/* an instance of a per packet parser */
+struct _tvbparse_t {
+ wmem_allocator_t* scope;
+ tvbuff_t* tvb;
+ int offset;
+ int end_offset;
+ void* data;
+ const tvbparse_wanted_t* ignore;
+ int recursion_depth;
+};
+
+
+/* a matching token returned by either tvbparser_get or tvb_parser_find */
+struct _tvbparse_elem_t {
+ int id;
+
+ tvbparse_t* parser;
+ tvbuff_t* tvb;
+ int offset;
+ int len;
+
+ void* data;
+
+ struct _tvbparse_elem_t* sub;
+
+ struct _tvbparse_elem_t* next;
+ struct _tvbparse_elem_t* last;
+
+ const tvbparse_wanted_t* wanted;
+};
+
+
+/*
+ * definition of wanted token types
+ *
+ * the following functions define the tokens we will be able to look for in a tvb
+ * common parameters are:
+ *
+ * id: an arbitrary id that will be copied to the eventual token (don't use 0)
+ * private_data: persistent data to be passed to the callback action (wanted_data)
+ * before_cb: an callback function to be called before those of the subelements
+ * after_cb: an callback function to be called after those of the subelements
+ */
+
+
+/*
+ * a char element.
+ *
+ * When looked for it returns a simple element one character long if the char
+ * at the current offset matches one of the needles.
+ */
+WS_DLL_PUBLIC
+tvbparse_wanted_t* tvbparse_char(const int id,
+ const gchar* needles,
+ const void* private_data,
+ tvbparse_action_t before_cb,
+ tvbparse_action_t after_cb);
+
+/*
+ * a not_char element.
+ *
+ * When looked for it returns a simple element one character long if the char
+ * at the current offset does not match one of the needles.
+ */
+WS_DLL_PUBLIC
+tvbparse_wanted_t* tvbparse_not_char(const int id,
+ const gchar* needle,
+ const void* private_data,
+ tvbparse_action_t before_cb,
+ tvbparse_action_t after_cb);
+
+/*
+ * a chars element
+ *
+ * When looked for it returns a simple element one or more characters long if
+ * one or more char(s) starting from the current offset match one of the needles.
+ * An element will be returned if at least min_len chars are given (1 if it's 0)
+ * It will get at most max_len chars or as much as it can if max_len is 0.
+ */
+WS_DLL_PUBLIC
+tvbparse_wanted_t* tvbparse_chars(const int id,
+ const guint min_len,
+ const guint max_len,
+ const gchar* needles,
+ const void* private_data,
+ tvbparse_action_t before_cb,
+ tvbparse_action_t after_cb);
+
+/*
+ * a not_chars element
+ *
+ * When looked for it returns a simple element one or more characters long if
+ * one or more char(s) starting from the current offset do not match one of the
+ * needles.
+ * An element will be returned if at least min_len chars are given (1 if it's 0)
+ * It will get at most max_len chars or as much as it can if max_len is 0.
+ */
+WS_DLL_PUBLIC
+tvbparse_wanted_t* tvbparse_not_chars(const int id,
+ const guint min_len,
+ const guint max_len,
+ const gchar* needles,
+ const void* private_data,
+ tvbparse_action_t before_cb,
+ tvbparse_action_t after_cb);
+
+/*
+ * a string element
+ *
+ * When looked for it returns a simple element if we have the given string at
+ * the current offset
+ */
+WS_DLL_PUBLIC
+tvbparse_wanted_t* tvbparse_string(const int id,
+ const gchar* string,
+ const void* private_data,
+ tvbparse_action_t before_cb,
+ tvbparse_action_t after_cb);
+
+/*
+ * casestring
+ *
+ * When looked for it returns a simple element if we have a matching string at
+ * the current offset
+ */
+WS_DLL_PUBLIC
+tvbparse_wanted_t* tvbparse_casestring(const int id,
+ const gchar* str,
+ const void* data,
+ tvbparse_action_t before_cb,
+ tvbparse_action_t after_cb);
+
+/*
+ * until
+ *
+ * When looked for it returns a simple element containing all the characters
+ * found until the first match of the ending element if the ending element is
+ * found.
+ *
+ * When looking for until elements it calls tvbparse_find so it can be very slow.
+ *
+ * It won't have a subelement, the ending's callbacks won't get called.
+ */
+
+/*
+ * op_mode values determine how the terminating element and the current offset
+ * of the parser are handled
+ */
+WS_DLL_PUBLIC
+tvbparse_wanted_t* tvbparse_until(const int id,
+ const void* private_data,
+ tvbparse_action_t before_cb,
+ tvbparse_action_t after_cb,
+ const tvbparse_wanted_t* ending,
+ until_mode_t until_mode);
+
+/*
+ * one_of
+ *
+ * When looked for it will try to match to the given candidates and return a
+ * composed element whose subelement is the first match.
+ *
+ * The list of candidates is terminated with a NULL
+ *
+ */
+WS_DLL_PUBLIC
+tvbparse_wanted_t* tvbparse_set_oneof(const int id,
+ const void* private_data,
+ tvbparse_action_t before_cb,
+ tvbparse_action_t after_cb,
+ ...);
+
+/*
+ * hashed
+ */
+WS_DLL_PUBLIC
+tvbparse_wanted_t* tvbparse_hashed(const int id,
+ const void* data,
+ tvbparse_action_t before_cb,
+ tvbparse_action_t after_cb,
+ tvbparse_wanted_t* key,
+ tvbparse_wanted_t* other,
+ ...);
+
+WS_DLL_PUBLIC
+void tvbparse_hashed_add(tvbparse_wanted_t* w, ...);
+
+/*
+ * sequence
+ *
+ * When looked for it will try to match in order all the given candidates. If
+ * every candidate is found in the given order it will return a composed
+ * element whose subelements are the matcheed elemets.
+ *
+ * The list of candidates is terminated with a NULL.
+ *
+ */
+WS_DLL_PUBLIC
+tvbparse_wanted_t* tvbparse_set_seq(const int id,
+ const void* private_data,
+ tvbparse_action_t before_cb,
+ tvbparse_action_t after_cb,
+ ...);
+
+/*
+ * some
+ *
+ * When looked for it will try to match the given candidate at least min times
+ * and at most max times. If the given candidate is matched at least min times
+ * a composed element is returned.
+ *
+ */
+WS_DLL_PUBLIC
+tvbparse_wanted_t* tvbparse_some(const int id,
+ const guint min,
+ const guint max,
+ const void* private_data,
+ tvbparse_action_t before_cb,
+ tvbparse_action_t after_cb,
+ const tvbparse_wanted_t* wanted);
+
+#define tvbparse_one_or_more(id, private_data, before_cb, after_cb, wanted)\
+ tvbparse_some(id, 1, G_MAXINT, private_data, before_cb, after_cb, wanted)
+
+
+/*
+ * handle
+ *
+ * this is a pointer to a pointer to a wanted element (that might have not
+ * been initialized yet) so that recursive structures
+ */
+WS_DLL_PUBLIC
+tvbparse_wanted_t* tvbparse_handle(tvbparse_wanted_t** handle);
+
+/* quoted
+ * this is a composed candidate, that will try to match a quoted string
+ * (included the quotes) including into it every escaped quote.
+ *
+ * C strings are matched with tvbparse_quoted(-1,NULL,NULL,NULL,"\"","\\")
+ */
+WS_DLL_PUBLIC
+tvbparse_wanted_t* tvbparse_quoted(const int id,
+ const void* data,
+ tvbparse_action_t before_cb,
+ tvbparse_action_t after_cb,
+ const char quote,
+ const char escape);
+
+/*
+ * a helper callback for quoted strings that will shrink the token to contain
+ * only the string andnot the quotes
+ */
+WS_DLL_PUBLIC
+void tvbparse_shrink_token_cb(void* tvbparse_data,
+ const void* wanted_data,
+ tvbparse_elem_t* tok);
+
+
+
+
+/* initialize the parser (at every packet)
+ * scope: memory scope/pool
+ * tvb: what are we parsing?
+ * offset: from where
+ * len: for how many bytes
+ * private_data: will be passed to the action callbacks
+ * ignore: a wanted token type to be ignored (the associated cb WILL be called when it matches)
+ */
+WS_DLL_PUBLIC
+tvbparse_t* tvbparse_init(wmem_allocator_t *scope,
+ tvbuff_t* tvb,
+ const int offset,
+ int len,
+ void* private_data,
+ const tvbparse_wanted_t* ignore);
+
+/* reset the parser */
+WS_DLL_PUBLIC
+gboolean tvbparse_reset(tvbparse_t* tt, const int offset, int len);
+
+WS_DLL_PUBLIC
+guint tvbparse_curr_offset(tvbparse_t* tt);
+guint tvbparse_len_left(tvbparse_t* tt);
+
+
+
+/*
+ * This will look for the wanted token at the current offset or after any given
+ * number of ignored tokens returning FALSE if there's no match or TRUE if there
+ * is a match.
+ * The parser will be left in its original state and no callbacks will be called.
+ */
+WS_DLL_PUBLIC
+gboolean tvbparse_peek(tvbparse_t* tt,
+ const tvbparse_wanted_t* wanted);
+
+/*
+ * This will look for the wanted token at the current offset or after any given
+ * number of ignored tokens returning NULL if there's no match.
+ * if there is a match it will set the offset of the current parser after
+ * the end of the token
+ */
+WS_DLL_PUBLIC
+tvbparse_elem_t* tvbparse_get(tvbparse_t* tt,
+ const tvbparse_wanted_t* wanted);
+
+/*
+ * Like tvbparse_get but this will look for a wanted token even beyond the
+ * current offset.
+ * This function is slow.
+ */
+WS_DLL_PUBLIC
+tvbparse_elem_t* tvbparse_find(tvbparse_t* tt,
+ const tvbparse_wanted_t* wanted);
+
+
+WS_DLL_PUBLIC
+void tvbparse_tree_add_elem(proto_tree* tree, tvbparse_elem_t* curr);
+
+#endif