From e4ba6dbc3f1e76890b22773807ea37fe8fa2b1bc Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 10 Apr 2024 22:34:10 +0200 Subject: Adding upstream version 4.2.2. Signed-off-by: Daniel Baumann --- doc/CMakeLists-PROTOABBREV.txt | 71 + doc/CMakeLists.txt | 175 ++ doc/README.capture | 138 ++ doc/README.design | 57 + doc/README.developer | 995 +++++++++ doc/README.display_filter | 586 ++++++ doc/README.dissector | 3723 ++++++++++++++++++++++++++++++++++ doc/README.heuristic | 244 +++ doc/README.idl2wrs | 144 ++ doc/README.plugins | 424 ++++ doc/README.regression | 76 + doc/README.request_response_tracking | 171 ++ doc/README.stats_tree | 238 +++ doc/README.tapping | 244 +++ doc/README.vagrant | 86 + doc/README.wmem | 402 ++++ doc/README.wslua | 549 +++++ doc/README.xml-output | 253 +++ doc/androiddump.adoc | 256 +++ doc/asn2deb.adoc | 84 + doc/capinfos.adoc | 390 ++++ doc/captype.adoc | 73 + doc/ciscodump.adoc | 324 +++ doc/diagnostic-options.adoc | 24 + doc/dissection-options.adoc | 185 ++ doc/dpauxmon.adoc | 142 ++ doc/dumpcap.adoc | 473 +++++ doc/editcap.adoc | 628 ++++++ doc/etwdump.adoc | 124 ++ doc/extcap.adoc | 145 ++ doc/extcap_example.py | 544 +++++ doc/falcodump.adoc | 145 ++ doc/http3.md | 103 + doc/idl2deb.adoc | 82 + doc/idl2wrs.adoc | 100 + doc/mergecap.adoc | 207 ++ doc/mmdbresolve.adoc | 71 + doc/packet-PROTOABBREV.c | 380 ++++ doc/plugins.example/CMakeLists.txt | 74 + doc/plugins.example/README | 35 + doc/plugins.example/hello.c | 58 + doc/randpkt.adoc | 147 ++ doc/randpkt.txt | 95 + doc/randpktdump.adoc | 142 ++ doc/rawshark.adoc | 551 +++++ doc/release-notes.adoc | 147 ++ doc/reordercap.adoc | 84 + doc/sdjournal.adoc | 132 ++ doc/sshdump.adoc | 313 +++ doc/text2pcap.adoc | 421 ++++ doc/tshark.adoc | 2613 ++++++++++++++++++++++++ doc/udpdump.adoc | 121 ++ doc/wifidump.adoc | 229 +++ doc/wireshark-filter.adoc | 648 ++++++ doc/wireshark.adoc | 2735 +++++++++++++++++++++++++ 55 files changed, 21601 insertions(+) create mode 100644 doc/CMakeLists-PROTOABBREV.txt create mode 100644 doc/CMakeLists.txt create mode 100644 doc/README.capture create mode 100644 doc/README.design create mode 100644 doc/README.developer create mode 100644 doc/README.display_filter create mode 100644 doc/README.dissector create mode 100644 doc/README.heuristic create mode 100644 doc/README.idl2wrs create mode 100644 doc/README.plugins create mode 100644 doc/README.regression create mode 100644 doc/README.request_response_tracking create mode 100644 doc/README.stats_tree create mode 100644 doc/README.tapping create mode 100644 doc/README.vagrant create mode 100644 doc/README.wmem create mode 100644 doc/README.wslua create mode 100644 doc/README.xml-output create mode 100644 doc/androiddump.adoc create mode 100644 doc/asn2deb.adoc create mode 100644 doc/capinfos.adoc create mode 100644 doc/captype.adoc create mode 100644 doc/ciscodump.adoc create mode 100644 doc/diagnostic-options.adoc create mode 100644 doc/dissection-options.adoc create mode 100644 doc/dpauxmon.adoc create mode 100644 doc/dumpcap.adoc create mode 100644 doc/editcap.adoc create mode 100644 doc/etwdump.adoc create mode 100644 doc/extcap.adoc create mode 100755 doc/extcap_example.py create mode 100644 doc/falcodump.adoc create mode 100644 doc/http3.md create mode 100644 doc/idl2deb.adoc create mode 100644 doc/idl2wrs.adoc create mode 100644 doc/mergecap.adoc create mode 100644 doc/mmdbresolve.adoc create mode 100644 doc/packet-PROTOABBREV.c create mode 100644 doc/plugins.example/CMakeLists.txt create mode 100644 doc/plugins.example/README create mode 100644 doc/plugins.example/hello.c create mode 100644 doc/randpkt.adoc create mode 100644 doc/randpkt.txt create mode 100644 doc/randpktdump.adoc create mode 100644 doc/rawshark.adoc create mode 100644 doc/release-notes.adoc create mode 100644 doc/reordercap.adoc create mode 100644 doc/sdjournal.adoc create mode 100644 doc/sshdump.adoc create mode 100644 doc/text2pcap.adoc create mode 100644 doc/tshark.adoc create mode 100644 doc/udpdump.adoc create mode 100644 doc/wifidump.adoc create mode 100644 doc/wireshark-filter.adoc create mode 100644 doc/wireshark.adoc (limited to 'doc') diff --git a/doc/CMakeLists-PROTOABBREV.txt b/doc/CMakeLists-PROTOABBREV.txt new file mode 100644 index 00000000..f8a62b9e --- /dev/null +++ b/doc/CMakeLists-PROTOABBREV.txt @@ -0,0 +1,71 @@ +# CMakeLists.txt +# +# Wireshark - Network traffic analyzer +# Copyright YEARS, YOUR_NAME +# +# SPDX-License-Identifier: LICENSE +# + +include(WiresharkPlugin) + +# Plugin name and version info (major minor micro extra) +set_module_info(PROTOABBREV 0 0 1 0) + +set(DISSECTOR_SRC + # Source files that directly dissect data + packet-PROTOABBREV.c +) + +set(DISSECTOR_SUPPORT_SRC + # Source files that provide additional routines +) + +set(PLUGIN_FILES + plugin.c + ${DISSECTOR_SRC} + ${DISSECTOR_SUPPORT_SRC} +) + +set_source_files_properties( + ${PLUGIN_FILES} + PROPERTIES + COMPILE_FLAGS "${WERROR_COMMON_FLAGS}" +) + +register_plugin_files(plugin.c + plugin + ${DISSECTOR_SRC} + ${DISSECTOR_SUPPORT_SRC} +) + +add_wireshark_plugin_library(PROTOABBREV epan) + +target_link_libraries(PROTOABBREV epan) + +install_plugin(PROTOABBREV epan) + +file(GLOB DISSECTOR_HEADERS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.h") +CHECKAPI( + NAME + PROTOABBREV + SWITCHES + --group dissectors-prohibited + --group dissectors-restricted + SOURCES + ${DISSECTOR_SRC} + ${DISSECTOR_SUPPORT_SRC} + ${DISSECTOR_HEADERS} +) + +# +# Editor modelines - https://www.wireshark.org/tools/modelines.html +# +# Local variables: +# c-basic-offset: 8 +# tab-width: 8 +# indent-tabs-mode: t +# End: +# +# vi: set shiftwidth=8 tabstop=8 noexpandtab: +# :indentSize=8:tabSize=8:noTabs=false: +# diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt new file mode 100644 index 00000000..674554fe --- /dev/null +++ b/doc/CMakeLists.txt @@ -0,0 +1,175 @@ +# CMakeLists.txt +# +# Wireshark - Network traffic analyzer +# By Gerald Combs +# Copyright 1998 Gerald Combs +# +# SPDX-License-Identifier: GPL-2.0-or-later +# + +find_package( Asciidoctor 1.5 ) + +set(MAN1_SOURCE_FILES) +set(MAN4_SOURCE_FILES) +set(MAN1_INSTALL_FILES) +set(MAN4_INSTALL_FILES) +set(HTML_INSTALL_FILES) + +macro (ADD_MAN_PAGE _page_name _man_section) + if(ASCIIDOCTOR_FOUND) + list(APPEND HTML_INSTALL_FILES ${CMAKE_CURRENT_BINARY_DIR}/${_page_name}.html) + + if (${_man_section} EQUAL 1) + list(APPEND MAN1_SOURCE_FILES ${CMAKE_CURRENT_SOURCE_DIR}/${_page_name}.adoc) + list(APPEND MAN1_INSTALL_FILES ${CMAKE_CURRENT_BINARY_DIR}/${_page_name}.${_man_section}) + elseif (${_man_section} EQUAL 4) + list(APPEND MAN4_SOURCE_FILES ${CMAKE_CURRENT_SOURCE_DIR}/${_page_name}.adoc) + list(APPEND MAN4_INSTALL_FILES ${CMAKE_CURRENT_BINARY_DIR}/${_page_name}.${_man_section}) + else() + message(FATAL_ERROR "Unsupported manual page section ${_man_section} for ${_page_name}") + endif() + endif() +endmacro() + +ADD_MAN_PAGE(wireshark 1) +ADD_MAN_PAGE(androiddump 1) +ADD_MAN_PAGE(capinfos 1) +ADD_MAN_PAGE(captype 1) +ADD_MAN_PAGE(ciscodump 1) +ADD_MAN_PAGE(dumpcap 1) +ADD_MAN_PAGE(editcap 1) +ADD_MAN_PAGE(mergecap 1) +ADD_MAN_PAGE(randpkt 1) +ADD_MAN_PAGE(randpktdump 1) +ADD_MAN_PAGE(etwdump 1) +ADD_MAN_PAGE(rawshark 1) +ADD_MAN_PAGE(reordercap 1) +ADD_MAN_PAGE(sshdump 1) +ADD_MAN_PAGE(text2pcap 1) +ADD_MAN_PAGE(tshark 1) +ADD_MAN_PAGE(falcodump 1) +ADD_MAN_PAGE(udpdump 1) +ADD_MAN_PAGE(wifidump 1) + +ADD_MAN_PAGE(extcap 4) +ADD_MAN_PAGE(wireshark-filter 4) + +if(BUILD_dpauxmon AND HAVE_LIBNL3) + ADD_MAN_PAGE(dpauxmon 1) +endif() + +if(BUILD_sdjournal AND SYSTEMD_FOUND) + ADD_MAN_PAGE(sdjournal 1) +endif() + +if(MAXMINDDB_FOUND) + ADD_MAN_PAGE(mmdbresolve 1) +endif() + +if (BUILD_corbaidl2wrs) + ADD_MAN_PAGE(idl2wrs 1) +endif() + +if (BUILD_xxx2deb) + ADD_MAN_PAGE(asn2deb 1) + ADD_MAN_PAGE(idl2deb 1) +endif() + +set(WIRESHARK_BUNDLE_RESOURCE_SHARE_MAN1_FILES ${MAN1_INSTALL_FILES} PARENT_SCOPE) +set(WIRESHARK_BUNDLE_RESOURCE_SHARE_MAN4_FILES ${MAN4_INSTALL_FILES} PARENT_SCOPE) +# XXX We need a Logray-specific man page list, which might overlap with Wireshark's. +# Just install everything for now. +set(LOGRAY_BUNDLE_RESOURCE_SHARE_MAN1_FILES ${WIRESHARK_BUNDLE_RESOURCE_SHARE_MAN1_FILES} PARENT_SCOPE) +set(LOGRAY_BUNDLE_RESOURCE_SHARE_MAN4_FILES ${WIRESHARK_BUNDLE_RESOURCE_SHARE_MAN4_FILES} PARENT_SCOPE) + +set(MAN_INCLUDES diagnostic-options.adoc dissection-options.adoc) + +if(ASCIIDOCTOR_FOUND) + ASCIIDOCTOR2ROFFMAN(1 ${MAN1_SOURCE_FILES}) + ASCIIDOCTOR2ROFFMAN(4 ${MAN4_SOURCE_FILES}) + ASCIIDOCTOR2HTMLMAN(${MAN1_SOURCE_FILES} ${MAN4_SOURCE_FILES}) + + add_custom_target(manpages DEPENDS + ${MAN1_INSTALL_FILES} + ${MAN4_INSTALL_FILES} + ) + set_target_properties(manpages PROPERTIES FOLDER "Documentation") +endif() + +if(ASCIIDOCTOR_FOUND) + ASCIIDOCTOR2HTML(release-notes.adoc) + ASCIIDOCTOR2TXT(release-notes.adoc) + + list(APPEND HTML_INSTALL_FILES "${CMAKE_CURRENT_BINARY_DIR}/release-notes.html") + + add_custom_target(release_notes_html + DEPENDS + ${CMAKE_CURRENT_BINARY_DIR}/release-notes.html + ) + set_target_properties(release_notes_html PROPERTIES FOLDER "Documentation") + add_custom_target(release_notes_txt + DEPENDS + ${CMAKE_CURRENT_BINARY_DIR}/release-notes.txt + ) + set_target_properties(release_notes_txt PROPERTIES FOLDER "Documentation") + add_custom_target(release_notes + DEPENDS + release_notes_html + release_notes_txt + ) + set_target_properties(release_notes PROPERTIES FOLDER "Documentation") + + add_custom_target(news + COMMAND ${CMAKE_COMMAND} -E copy_if_different + ${CMAKE_CURRENT_BINARY_DIR}/release-notes.txt + ${CMAKE_SOURCE_DIR}/NEWS + DEPENDS + release_notes_txt + ) + set_target_properties(news PROPERTIES FOLDER "Documentation") +endif() + +add_custom_target( + docs ALL + DEPENDS + ${MAN1_INSTALL_FILES} + ${MAN4_INSTALL_FILES} + ${HTML_INSTALL_FILES} +) +set_target_properties(docs PROPERTIES FOLDER "Documentation") + +if(ASCIIDOCTOR_FOUND) + install( + FILES + ${MAN1_INSTALL_FILES} + DESTINATION + ${CMAKE_INSTALL_MANDIR}/man1 + ) + + install( + FILES + ${MAN4_INSTALL_FILES} + DESTINATION + ${CMAKE_INSTALL_MANDIR}/man4 + ) +endif() + +install( + FILES + ${HTML_INSTALL_FILES} + DESTINATION + ${CMAKE_INSTALL_DOCDIR} +) + +# +# Editor modelines - https://www.wireshark.org/tools/modelines.html +# +# Local variables: +# c-basic-offset: 8 +# tab-width: 8 +# indent-tabs-mode: t +# End: +# +# vi: set shiftwidth=8 tabstop=8 noexpandtab: +# :indentSize=8:tabSize=8:noTabs=false: +# diff --git a/doc/README.capture b/doc/README.capture new file mode 100644 index 00000000..9191316c --- /dev/null +++ b/doc/README.capture @@ -0,0 +1,138 @@ +This document is an attempt, to bring some light to the things done, when +packet capturing is performed. There might be things missing, and others +maybe wrong :-( The following will concentrate a bit on the Windows +port of Wireshark. + + +XXX: when ongoing file reorganization will be completed, the following +two lists maybe won't be needed any longer! + +libpcap related source files: +----------------------------- +capture-pcap-util.c +capture-pcap-util.h +capture-pcap-util-int.h +capture-pcap-util-unix.c +capture-wpcap.c +capture-wpcap.h + +Capture related source files: +----------------------------- +capture.c +capture.h +capture_loop.c +capture_loop.h +capture_opts.c +capture_sync.c +capture_ui_utils.c +capture_ui_utils.h + + +Capture driver +-------------- +Wireshark doesn't have direct access to the capture hardware. Instead of this, +it uses the Libpcap/Winpcap library to capture data from network cards. + +On Win32, in capture-wpcap.c the function load_wpcap_module() is called +to load the wpcap.dll. This dll includes all functions needed for +packet capturing. + + + +Capture File +------------ +There are some kinds of targets to put the capture data into: + +-temporary file +-user specified "single" capture file +-user specified "ringbuffer" capture file + +Which kind of file is used depends on the user settings. In principle there +is no difference in handling these files, so if not otherwise notified, +it will be called the capture file. + +The capture file is stored, using the wiretap library. + + +Overview +-------- +Capturing is done using a two task model: the currently running (parent) +process will spawn a child process to do the real capture work, namely +controlling libpcap. This two task model is used because it's necessary +to split the capturing process (which should avoid packet drop) from the parent +process which might need significant time to display the data. + +When a capture is started, the parent builds a "command line" and creates a +new child process with it. A pipe from the child to the parent is created +which is used to transfer control messages. + +The child will init libpcap and send the parent a "new capture file is used" +control message through the pipe. + +The child cyclically takes the packet data from libpcap and saves it to disk. +From time to time it will send the parent a "new packets" control message. + +If the parent process receives this "new packets" message and the option +"Update list of packets in real time" is used, it will read the packet data +from the file, dissect and display it. + + +If the user wants to stop the capture, this can be done in two ways: by +menu/toolbar of the parent process or the Stop button of the child processes +dialog box (which obviously cannot be used it this dialog is hidden). + +The Stop button will stop the capture itself, close the control pipe and then +closes itself. The parent will detect this and stop its part of the capture. + +If the menu/toolbar is used, the parent will send a break signal to the child +which will lead to the same sequence as described above. + +Win32 only: as the windows implementation of signals simply doesn't work, +another pipe from the parent to the child is used to send a "close capture" +message instead of a signal. + + +Start capture +------------- +A capture is started, by specifying to start the capture at the command line, +trigger the OK button in the "Capture Options" dialog box and some more. The +capture start is actually done by calling the capture_start() function in +capture.c. + + +Capture child (Loop) +-------------------- +The capture child will open the target capture file, prepare pcap things, +init stop conditions, init the capture statistic dialog (if not hidden) and +start a loop which is running until the flag ld.go is false. + +Inside this loop, + +-Qt main things are updated +-pcap_dispatch(capture_pcap_cb) is called +-the capture stop conditions are checked (ld.go is set to false to finish) +-update the capture statistic dialog (if not hidden) + +While this loop is running, the pcap_dispatch() will call capture_pcap_cb() +for every packet captured. Inside this, the packet data is converted into +wtap (wiretap) format and saved to file. Beside saving, it is trying to +do some basic dissecting (for the statistic window), by calling the +appropriate capture_xxx function. + +When the user triggered a capture stop or one of the capture stop conditions +matched, the ld.go flag is set to false, and the loop will stop shortly after. + + +Capture parent +-------------- +In the capture parent the cap_pipe_input_cb() function is called "cyclically" +(unix:waiting for pipe, win32:timer,1000ms) to read data from the pipe and show +it on the main screen. While the capture is in progress, no other capture file +can be opened. + + +Updating +-------- +The actual packet capturing inside the libpcap is done using its own task. +Catching and processing the packet data from the libpcap is done using the +pcap_dispatch() function. diff --git a/doc/README.design b/doc/README.design new file mode 100644 index 00000000..e35cbd0f --- /dev/null +++ b/doc/README.design @@ -0,0 +1,57 @@ +Unfortunately, the closest thing to a design document is the +"README.developer" document in the "doc" directory of the Wireshark +source tree; however, although that's useful for people adding new +protocol dissectors to Wireshark, it doesn't describe the operations of +the "core" of Wireshark. + +We have no document describing that; however, a quick summary of the +part of the code you'd probably be working with is: + + for every capture file that Wireshark has open, there's a + "capture_file" structure - Wireshark currently supports only one + open capture file at a time, and that structure is named + "cfile" (see the "file.h" header file); + + that structure has a member "plist", which points to a + "frame_data" structure - every link-layer frame that Wireshark + has read in has a "frame_data" structure (see the + "epan/packet.h" header file), the "plist" member of "cfile" + points to the first frame, and each frame has a "next" member + that points to the next frame in the capture (or is null for the + last frame); + + each "frame_data" struct has: + + a pointer to the next frame (null for the last frame); + + a pointer to the previous frame (null for the first + frame); + + information such as the ordinal number of the frame in + the capture, the time stamps for the capture, the size + of the packet data in bytes, the size of the frame in + bytes (which might not equal the size of the packet data + if, for example, the program capturing the packets + captured no more than the first N bytes of the capture, + for some value of N); + + the byte offset in the capture file where the frame's + data is located. + +See the "print_packets()" routine in "file.c" for an example of a +routine that goes through all the packets in the capture; the loop does + + for (fdata = cf->plist; fdata != NULL; fdata = fdata->next) { + + update a progress bar (because it could take a + significant period of time to process all packets); + + read the packet data if the packet is to be printed; + + print the packet; + + } + +The "wtap_seek_read()" call reads the packet data into memory; the +"epan_dissect_new()" call "dissects" that data, building a tree +structure for the fields in the packet. diff --git a/doc/README.developer b/doc/README.developer new file mode 100644 index 00000000..6d6ede97 --- /dev/null +++ b/doc/README.developer @@ -0,0 +1,995 @@ +This file is a HOWTO for Wireshark developers. It describes general development +and coding practices for contributing to Wireshark no matter which part of +Wireshark you want to work on. + +To learn how to write a dissector, read this first, then read the file +README.dissector. + +This file is compiled to give in depth information on Wireshark. +It is by no means all inclusive and complete. Please feel free to discuss on +the developer mailing list or upload merge requests to gitlab. + +0. Prerequisites. + +Before starting to develop a new dissector, a "running" Wireshark build +environment is required - there's no such thing as a standalone "dissector +build toolkit". + +How to setup such an environment is platform dependent; detailed +information about these steps can be found in the "Developer's Guide" +(available from: https://www.wireshark.org) and in the INSTALL and +README.md files of the sources root dir. + +0.1. General README files. + +You'll find additional information in the following README files: + +- doc/README.capture - the capture engine internals +- doc/README.design - Wireshark software design - incomplete +- doc/README.developer - this file +- doc/README.dissector - How to dissect a packet +- doc/README.display_filter - Display Filter Engine +- doc/README.idl2wrs - CORBA IDL converter +- doc/README.regression - regression testing of WS and TS +- doc/README.stats_tree - a tree statistics counting specific packets +- doc/README.tapping - "tap" a dissector to get protocol specific events +- doc/README.vagrant - how to create a development VM using vagrant +- doc/README.wslua - working with LUA +- doc/README.xml-output - how to work with the PDML exported output +- wiretap/README.developer - how to add additional capture file types to + Wiretap + +0.2. Dissector related README files. + +You'll find additional dissector related information in the file +README.dissector as well as the following README files: + +- doc/README.heuristic - what are heuristic dissectors and how to write + them +- doc/README.plugins - how to "pluginize" a dissector +- doc/README.request_response_tracking - how to track req./resp. times and such +- doc/README.wmem - how to obtain "memory leak free" memory + +0.3 Contributors + +James Coe +Gilbert Ramirez +Jeff Foster +Olivier Abad +Laurent Deniel +Gerald Combs +Guy Harris +Ulf Lamping + +1. Portability. + +Wireshark runs on many platforms, and can be compiled with a number of +different compilers; here are some rules for writing code that will work +on multiple platforms. + +Building Wireshark requires a compiler that supports C11. This includes +reasonably recent versions of GCC and clang. Microsoft Visual Studio supports +C11 from Visual Studio 2019 version 16.8 and later. Support requires an updated +Universal C Runtime (UCRT) and Windows SDK version to work properly with the +conforming preprocessor. The minimum SDK version is 10.0.20348.0 (version 2104). + +The C11 has some optional parts that are not a requirement to build Wireshark. +In particular the following optional C11 features must NOT be used: + - Variable length arrays + - Bounds-checking interfaces (Annex K) + +We don't allow them because their value is questionable and requiring them +would exclude a lot of compilers and runtimes that we wish to support. + +Don't initialize global or static variables (variables with static +storage duration) in their declaration with non-constant values. This is not +permitted in C. E.g., if "i" is a static or global +variable, don't declare "i" as + + uint32_t i = somearray[2]; + +outside a function, or as + + static uint32_t i = somearray[2]; + +inside or outside a function, declare it as just + + uint32_t i; + +or + + static uint32_t i; + +and later, in code, initialize it with + + i = somearray[2]; + +instead. Initializations of variables with automatic storage duration - +i.e., local variables - with non-constant values is permitted, so, +within a function + + uint32_t i = somearray[2]; + +is allowed. + +Don't use zero-length arrays as structure members, use flexible array members +instead. + +Don't use "uchar", "u_char", "ushort", "u_short", "uint", "u_int", +"ulong", "u_long" or "boolean"; they aren't defined on all platforms. + +GLib typedefs have historically been used extensively throughout the +codebase (gchar, guint8, gint16, etc). We are moving towards the fixed +width integers provided in C since C99. These are defined in , +which is included in . You should choose stdint types when +possible, but realise that until we can fully migrate our APIs, in many +situations the GLib types still make sense. + +If you want an 8-bit unsigned quantity, use "uint8_t"; if you want an +8-bit character value with the 8th bit not interpreted as a sign bit, +use "unsigned char"; if you want a 16-bit unsigned quantity, use "uint16_t"; +if you want a 32-bit unsigned quantity, use "uint32_t"; and if you want +an "int-sized" unsigned quantity, use "unsigned"; if you want a boolean, +use "bool" (defined in ). You don't need to explicitly include +these headers; they are included in . Use that instead. + +To print fixed width integers you must use the macros provided in . + + uint32_t var; + printf("var = " PRIu32 "\n", var); + +Don't use "long" to mean "signed 32-bit integer", and don't use +"unsigned long" to mean "unsigned 32-bit integer"; "long"s are 64 bits +long on many platforms. Use "int32_t" for signed 32-bit integers and use +"uint32_t" for unsigned 32-bit integers. + +Don't use "long" to mean "signed 64-bit integer" and don't use "unsigned +long" to mean "unsigned 64-bit integer"; "long"s are 32 bits long on +many other platforms. Don't use "long long" or "unsigned long long", +either, as not all platforms support them; use "int64_t" or "uint64_t", +which will be defined as the appropriate types for 64-bit signed and +unsigned integers. + +On LLP64 data model systems (notably 64-bit Windows), "int" and "long" +are 32 bits while "size_t" and "ptrdiff_t" are 64 bits. This means that +the following will generate a compiler warning: + + int i; + i = strlen("hello, sailor"); /* Compiler warning */ + +Normally, you'd just make "i" a size_t. However, many GLib and Wireshark +functions won't accept a size_t on LLP64: + + size_t i; + char greeting[] = "hello, sailor"; + unsigned byte_after_greet; + + i = strlen(greeting); + byte_after_greet = tvb_get_guint8(tvb, i); /* Compiler warning */ + +Try to use the appropriate data type when you can. When you can't, you +will have to cast to a compatible data type, e.g. + + size_t i; + char greeting[] = "hello, sailor"; + uint8_t byte_after_greet; + + i = strlen(greeting); + byte_after_greet = tvb_get_guint8(tvb, (int) i); /* OK */ + +or + + int i; + char greeting[] = "hello, sailor"; + uint8_t byte_after_greet; + + i = (int) strlen(greeting); + byte_after_greet = tvb_get_guint8(tvb, i); /* OK */ + +See http://www.unix.org/version2/whatsnew/lp64_wp.html for more +information on the sizes of common types in different data models. + +A lot of legacy code still uses GLib types and I/O replacement API. These +should be gradually transitioned to use the standard interfaces provided in +C11. Sometimes it may be necessary to use an unsavory cast or two or abuse +a macro to bridge the two codebases during the transition. Such is life, +use your judgement and do the best possible under the circumstances. + +Avoid GLib synonyms like gchar and gint and especially don't +use gpointer and gconstpointer, unless you are writing GLib callbacks +and trying to match their signature exactly. These just obscure the +code and gconstpointer in particular is just semantically weird and poor style. + +When printing or displaying the values of 64-bit integral data types, +don't use "%lld", "%llu", "%llx", or "%llo" - not all platforms +support "%ll" for printing 64-bit integral data types. Instead use +the macros in , for example: + + proto_tree_add_uint64_format_value(tree, hf_uint64, tvb, offset, len, + val, "%" PRIu64, val); + +For GLib routines, and only those, you can choose whichever format style +you prefer: + + uint64_t val = UINT64_C(1); + char *str1 = g_string_printf("%" G_GUINT64_FORMAT, val); + char *str2 = g_string_printf("%" PRIu64, val); + +These format macros will be the same modulo any GLib bugs. + +When specifying an integral constant that doesn't fit in 32 bits, don't +use "LL" at the end of the constant - not all compilers use "LL" for +that. Instead, put the constant in a call to the "INT64_C()" or "UINT64_C()" +macro, e.g. + + INT64_C(-11644473600), UINT64_C(11644473600) + +rather than + + -11644473600LL, 11644473600ULL + +Don't assume that you can scan through a va_list initialized by va_start +more than once without closing it with va_end and re-initializing it with +va_start. This applies even if you're not scanning through it yourself, +but are calling a routine that scans through it, such as vfprintf() or +one of the routines in Wireshark that takes a format and a va_list as an +argument. You must do + + va_start(ap, format); + call_routine1(xxx, format, ap); + va_end(ap); + va_start(ap, format); + call_routine2(xxx, format, ap); + va_end(ap); + +rather than + + va_start(ap, format); + call_routine1(xxx, format, ap); + call_routine2(xxx, format, ap); + va_end(ap); + +Don't use a label without a statement following it. For example, +something such as + + if (...) { + + ... + + done: + } + +will not work with all compilers - you have to do + + if (...) { + + ... + + done: + ; + } + +with some statement, even if it's a null statement, after the label. +Preferably don't do it at all. + +Don't use "bzero()", "bcopy()", or "bcmp()"; instead, use the ANSI C +routines + + "memset()" (with zero as the second argument, so that it sets + all the bytes to zero); + + "memcpy()" or "memmove()" (note that the first and second + arguments to "memcpy()" are in the reverse order to the + arguments to "bcopy()"; note also that "bcopy()" is typically + guaranteed to work on overlapping memory regions, while + "memcpy()" isn't, so if you may be copying from one region to a + region that overlaps it, use "memmove()", not "memcpy()" - but + "memcpy()" might be faster as a result of not guaranteeing + correct operation on overlapping memory regions); + + and "memcmp()" (note that "memcmp()" returns 0, 1, or -1, doing + an ordered comparison, rather than just returning 0 for "equal" + and 1 for "not equal", as "bcmp()" does). + +Not all platforms necessarily have "bzero()"/"bcopy()"/"bcmp()", and +those that do might not declare them in the header file on which they're +declared on your platform. + +Don't use "index()" or "rindex()"; instead, use the ANSI C equivalents, +"strchr()" and "strrchr()". Not all platforms necessarily have +"index()" or "rindex()", and those that do might not declare them in the +header file on which they're declared on your platform. + +Don't use "tvb_get_ptr()". If you must use it, keep in mind that the pointer +returned by a call to "tvb_get_ptr()" is not guaranteed to be aligned on any +particular byte boundary; this means that you cannot safely cast it to any +data type other than a pointer to "char", "unsigned char", "guint8", or other +one-byte data types. Casting a pointer returned by tvb_get_ptr() into any +multi-byte data type or structure may cause crashes on some platforms (even +if it does not crash on x86-based PCs). Even if such mis-aligned accesses +don't crash on your platform they will be slower than properly aligned +accesses would be. Furthermore, the data in a packet is not necessarily in +the byte order of the machine on which Wireshark is running. Use the tvbuff +routines to extract individual items from the packet, or, better yet, use +"proto_tree_add_item()" and let it extract the items for you. + +Don't use structures that overlay packet data, or into which you copy +packet data; the C programming language does not guarantee any +particular alignment of fields within a structure, and even the +extensions that try to guarantee that are compiler-specific and not +necessarily supported by all compilers used to build Wireshark. Using +bitfields in those structures is even worse; the order of bitfields +is not guaranteed. + +Don't use "ntohs()", "ntohl()", "htons()", or "htonl()"; the header +files required to define or declare them differ between platforms, and +you might be able to get away with not including the appropriate header +file on your platform but that might not work on other platforms. +Instead, use "g_ntohs()", "g_ntohl()", "g_htons()", and "g_htonl()"; +those are declared by , and you'll need to include that anyway, +as Wireshark header files that all dissectors must include use stuff from +. + +Don't fetch a little-endian value using "tvb_get_ntohs() or +"tvb_get_ntohl()" and then using "g_ntohs()", "g_htons()", "g_ntohl()", +or "g_htonl()" on the resulting value - the g_ routines in question +convert between network byte order (big-endian) and *host* byte order, +not *little-endian* byte order; not all machines on which Wireshark runs +are little-endian, even though PCs are. Fetch those values using +"tvb_get_letohs()" and "tvb_get_letohl()". + +Do not use "open()", "rename()", "mkdir()", "stat()", "unlink()", "remove()", +"fopen()", "freopen()" directly. Instead use "ws_open()", "ws_rename()", +"ws_mkdir()", "ws_stat()", "ws_unlink()", "ws_remove()", "ws_fopen()", +"ws_freopen()": these wrapper functions change the path and file name from +UTF-8 to UTF-16 on Windows allowing the functions to work correctly when the +path or file name contain non-ASCII characters. + +Also, use ws_read(), ws_write(), ws_lseek(), ws_dup(), ws_fstat(), and +ws_fdopen(), rather than read(), write(), lseek(), dup(), fstat(), and +fdopen() on descriptors returned by ws_open(). + +Those functions are declared in ; include that +header in any code that uses any of those routines. + +When opening a file with "ws_fopen()", "ws_freopen()", or "ws_fdopen()", if +the file contains ASCII text, use "r", "w", "a", and so on as the open mode +- but if it contains binary data, use "rb", "wb", and so on. On +Windows, if a file is opened in a text mode, writing a byte with the +value of octal 12 (newline) to the file causes two bytes, one with the +value octal 15 (carriage return) and one with the value octal 12, to be +written to the file, and causes bytes with the value octal 15 to be +discarded when reading the file (to translate between C's UNIX-style +lines that end with newline and Windows' DEC-style lines that end with +carriage return/line feed). + +In addition, that also means that when opening or creating a binary +file, you must use "ws_open()" (with O_CREAT and possibly O_TRUNC if the +file is to be created if it doesn't exist), and OR in the O_BINARY flag, +even on UN*X - O_BINARY is defined by as 0 on UN*X. + +Do not include , , or to declare any of the +routines listed as replaced by routines in ; +instead, just include . + +If you need the declarations of other functions defined by , +don't include it without protecting it with + + #ifdef HAVE_UNISTD_H + + ... + + #endif + +Don't use forward declarations of static arrays without a specified size +in a fashion such as this: + + static const value_string foo_vals[]; + + ... + + static const value_string foo_vals[] = { + { 0, "Red" }, + { 1, "Green" }, + { 2, "Blue" }, + { 0, NULL } + }; + +as some compilers will reject the first of those statements. Instead, +initialize the array at the point at which it's first declared, so that +the size is known. + +For #define names and enum member names, prefix the names with a tag so +as to avoid collisions with other names - this might be more of an issue +on Windows, as it appears to #define names such as DELETE and +OPTIONAL. + +Don't use the "positional parameters" extension that many UNIX printf's +implement, e.g.: + + snprintf(add_string, 30, " - (%1$d) (0x%1$04x)", value); + +as not all UNIX printf's implement it, and Windows printf doesn't appear +to implement it. Use something like + + snprintf(add_string, 30, " - (%d) (0x%04x)", value, value); + +instead. + +Don't use + + case N ... M: + +as that's not supported by all compilers. + +Prefer the C99 output functions from instead of their GLib +replacements (note that positional format parameters are not part of C99). +In the past we used to recommend using g_snprintf() and g_vsnprintf() +instead but since Visual Studio 2015 native C99 implementations are +available on all platforms we support. These are optimized better than +the gnulib (GLib) implementation and on hot codepaths that can be a +noticeable difference in execution speed. + +tmpnam() -> mkstemp() +tmpnam is insecure and should not be used any more. Wireshark brings its +own mkstemp implementation for use on platforms that lack mkstemp. +Note: mkstemp does not accept NULL as a parameter. + +Wireshark requires minimum versions of each of the libraries it uses, in +particular GLib 2.54.0 and Qt 5.12.0 or newer. If you require a mechanism +that is available only in a newer version of a library then use its +version detection macros, e.g. "#if GLIB_CHECK_VERSION(...)" and "#if +QT_VERSION_CHECK(...)" to conditionally compile code using that +mechanism. + +When different code must be used on UN*X and Win32, use a #if or #ifdef +that tests _WIN32, not WIN32. Try to write code portably whenever +possible, however; note that there are some routines in Wireshark with +platform-dependent implementations and platform-independent APIs, such +as the routines in epan/filesystem.c, allowing the code that calls it to +be written portably without #ifdefs. + +We support building on Windows using MinGW-w64 (experimental) so be mindful +of the difference between an #ifdef on _WIN32 and _MSC_VER. The first tests +if the platform is some version of Windows and also applies to MinGW. The +latter tests if the toolchain is Microsoft Visual Studio. Sometimes you need +one or the other, depending on whether the condition applies to all Windows +compilers or only Microsoft's compiler. Use #ifdef __MINGW32__ to test for +a MinGW toolchain, including MinGW-w64. The same concern applies to CMake +code. Depending on the particular situation you may need to use if(WIN32) or +if(MSVC) or if(MINGW). + +Wireshark uses Libgcrypt as general-purpose crypto library. Some Wireshark +specific extensions are defined in wsutil/wsgcrypt.h. You might want to +include that file instead. + +2. String handling + +Do not use functions such as strcat() or strcpy(). +A lot of work has been done to remove the existing calls to these functions and +we do not want any new callers of these functions. + +Instead use snprintf() since that function will if used correctly prevent +buffer overflows for large strings. + +Be sure that all pointers passed to %s specifiers in format strings are non- +NULL. Some implementations will automatically replace NULL pointers with the +string "(NULL)", but most will not. + +When using a buffer to create a string, do not use a buffer stored on the stack. +I.e. do not use a buffer declared as + + char buffer[1024]; + +instead allocate a buffer dynamically using the string-specific or plain wmem +routines (see README.wmem) such as + + wmem_strbuf_t *strbuf; + strbuf = wmem_strbuf_new(pinfo->pool, ""); + wmem_strbuf_append_printf(strbuf, ... + +or + + char *buffer=NULL; + ... + #define MAX_BUFFER 1024 + buffer=wmem_alloc(pinfo->pool, MAX_BUFFER); + buffer[0]='\0'; + ... + snprintf(buffer, MAX_BUFFER, ... + +This avoids the stack from being corrupted in case there is a bug in your code +that accidentally writes beyond the end of the buffer. + + +If you write a routine that will create and return a pointer to a filled in +string and if that buffer will not be further processed or appended to after +the routine returns (except being added to the proto tree), +do not preallocate the buffer to fill in and pass as a parameter instead +pass a pointer to a pointer to the function and return a pointer to a +wmem-allocated buffer that will be automatically freed. (see README.wmem) + +I.e. do not write code such as + static void + foo_to_str(char *string, ... ){ + + } + ... + char buffer[1024]; + ... + foo_to_str(buffer, ... + proto_tree_add_string(... buffer ... + +instead write the code as + static void + foo_to_str(char **buffer, ... + #define MAX_BUFFER x + *buffer=wmem_alloc(pinfo->pool, MAX_BUFFER); + + } + ... + char *buffer; + ... + foo_to_str(&buffer, ... + proto_tree_add_string(... *buffer ... + +Use wmem_ allocated buffers. They are very fast and nice. These buffers are all +automatically free()d when the dissection of the current packet ends so you +don't have to worry about free()ing them explicitly in order to not leak memory. +Please read README.wmem. + +Source files can use UTF-8 encoding, but characters outside the ASCII +range should be used sparingly. It should be safe to use non-ASCII +characters in comments and strings, but some compilers (such as GCC +versions prior to 10) may not support extended identifiers very well. +There is also no guarantee that a developer's text editor will interpret +the characters the way you intend them to be interpreted. + +The majority of Wireshark encodes strings as UTF-8. The main exception +is the code that uses the Qt API, which uses UTF-16. Console output is +UTF-8, but as with the source code extended characters should be used +sparingly since some consoles (most notably Windows' cmd.exe) have +limited support for UTF-8. + +3. Robustness. + +Wireshark is not guaranteed to read only network traces that contain correctly- +formed packets. Wireshark is commonly used to track down networking +problems, and the problems might be due to a buggy protocol implementation +sending out bad packets. + +Therefore, code does not only have to be able to handle +correctly-formed packets without, for example, crashing or looping +infinitely, they also have to be able to handle *incorrectly*-formed +packets without crashing or looping infinitely. + +Here are some suggestions for making code more robust in the face +of incorrectly-formed packets: + +Do *NOT* use "ws_assert()" or "ws_assert_not_reached()" with input data in dissectors. +*NO* value in a packet's data should be considered "wrong" in the sense +that it's a problem with the dissector if found; if it cannot do +anything else with a particular value from a packet's data, the +dissector should put into the protocol tree an indication that the +value is invalid, and should return. The "expert" mechanism should be +used for that purpose. + +Use assertions to catch logic errors in your program. A failed assertion +indicates a bug in the code. Use ws_assert() instead of g_assert() to +test a logic condition. Note that ws_assert() can be removed at compile +time. Therefore assertions should not have any side-effects, +otherwise the program may behave inconsistently. + +Use ws_assert_not_reached() instead of g_assert_not_reached() for +unreachable error conditions. For example if (and only if) you know +'myvar' can only have the values 1 and 2 do: + switch(myvar) { + case 1: + (...) + break; + case 2: + (...) + break; + default: + ws_assert_not_reached(); + break; + } + +For dissectors use DISSECTOR_ASSERT() and DISSECTOR_ASSERT_NOT_REACHED() +instead, with the same caveats as above. + +You should continue to use g_assert_true(), g_assert_cmpstr(), etc for +"test code", such as unit testing. These assertions are always active. +See the GLib Testing API documentation for the details on each of those +functions. + +If there is a case where you are checking not for an invalid data item +in the packet, but for a bug in the dissector (for example, an +assumption being made at a particular point in the code about the +internal state of the dissector), use the DISSECTOR_ASSERT macro for +that purpose; this will put into the protocol tree an indication that +the dissector has a bug in it, and will not crash the application. + +If you are allocating a chunk of memory to contain data from a packet, +or to contain information derived from data in a packet, and the size of +the chunk of memory is derived from a size field in the packet, make +sure all the data is present in the packet before allocating the buffer. +Doing so means that: + + 1) Wireshark won't leak that chunk of memory if an attempt to + fetch data not present in the packet throws an exception. + +and + + 2) it won't crash trying to allocate an absurdly-large chunk of + memory if the size field has a bogus large value. + +If you're fetching into such a chunk of memory a sequence of bytes from +the buffer, and the sequence has a specified size, you can use +"tvb_memdup()", which will check whether the entire sequence is present +before allocating a buffer for it. + +Otherwise, you can check whether the data is present by using +"tvb_ensure_bytes_exist()" although this frequently is not needed: the +TVB-accessor routines can handle requests to read data beyond the end of +the TVB (by throwing an exception which will either mark the frame as +truncated--not all the data was captured--or as malformed). + +If you're fetching a string only to add it to the tree, you should +generally be using "proto_tree_add_item()" instead. If you also need +the string, you can use the variant "proto_tree_add_item_ret_string()" +or "proto_tree_add_item_ret_string_and_length()" forms. + +If you must fetch it from the tvbuff, and the string has a specified +size and known encoding, you can use "tvb_get_string_enc()" for most +encodings, which will check whether the entire string is present before +allocating a buffer for the string, will put a trailing '\0' at the end +of the buffer, and will also check for invalid characters in the supplied +encoding and convert the string to UTF-8. The "tvb_get_*_string()" set of +functions is available as well, and must be used for some encodings, +primarily non byte aligned ones. If the string has a known encoding and +is null terminated, the "stringz" variants can be used. (Note that these +functions are called with memory allocators, and if called with a NULL +allocator you are required to free the string when finished with it.) + +If the string has a known encoding but requires token parsing or other +text manipulation to determine the offset and size, do so by calling +tvb_*() functions on the tvbuff that perform bounds checking if possible. +Only extract the bytes into a newly allocated buffer to extract a string +if absolutely necessary. If you do so, then you *must* ensure that the +string is valid UTF-8 when passing it to a libwireshark API function +such as proto_tree_add_string(). (Cf. 7.5: Unicode and string encoding +best practices.) + +Conversion to UTF-8 can produce a string with a length longer than +that of the string in the original packet data; this includes strings +encoded in ASCII or UTF-8 itself if they have invalid character sequences +that are replaced with the 3 byte UTF-8 REPLACEMENT CHARACTER. Truncating +a valid UTF-8 string to an arbitrary number of bytes does not guarantee +that the result is a valid UTF-8 string, because a multibyte character +might span the boundary. + +Note also that you should only fetch string data into a fixed-length +buffer if the code ensures that no more bytes than will fit into the +buffer are fetched ("the protocol ensures" isn't good enough, as +protocol specifications can't ensure only packets that conform to the +specification will be transmitted or that only packets for the protocol +in question will be interpreted as packets for that protocol by +Wireshark). + +If you have gotten a pointer using "tvb_get_ptr()" (which you should not +have: you should seriously consider a better alternative to this function), +you must make sure that you do not refer to any data past the length passed +as the last argument to "tvb_get_ptr()"; while the various "tvb_get" +routines perform bounds checking and throw an exception if you refer to data +not available in the tvbuff, direct references through a pointer gotten from +"tvb_get_ptr()" do not do any bounds checking. + +If you have a loop that dissects a sequence of items, each of which has +a length field, with the offset in the tvbuff advanced by the length of +the item, then, if the length field is the total length of the item, and +thus can be zero, you *MUST* check for a zero-length item and abort the +loop if you see one. Otherwise, a zero-length item could cause the +dissector to loop infinitely. You should also check that the offset, +after having the length added to it, is greater than the offset before +the length was added to it, if the length field is greater than 24 bits +long, so that, if the length value is *very* large and adding it to the +offset causes an overflow, that overflow is detected. + +If you have a + + for (i = {start}; i < {end}; i++) + +loop, make sure that the type of the loop index variable is large enough +to hold the maximum {end} value plus 1; otherwise, the loop index +variable can overflow before it ever reaches its maximum value. In +particular, be very careful when using int8_t, uint8_t, int16_t, or uint16_t +(or the deprecated Glib synonyms gint8, guint8, gint16, or guint16) +variables as loop indices; you almost always want to use an "int"/"gint" +or "unsigned"/"guint" as the loop index rather than a shorter type. + +If you are fetching a length field from the buffer, corresponding to the +length of a portion of the packet, and subtracting from that length a +value corresponding to the length of, for example, a header in the +packet portion in question, *ALWAYS* check that the value of the length +field is greater than or equal to the length you're subtracting from it, +and report an error in the packet and stop dissecting the packet if it's +less than the length you're subtracting from it. Otherwise, the +resulting length value will be negative, which will either cause errors +in the dissector or routines called by the dissector, or, if the value +is interpreted as an unsigned integer, will cause the value to be +interpreted as a very large positive value. + +Any tvbuff offset that is added to as processing is done on a packet +should be stored in a 32-bit variable, such as an "int"; if you store it +in an 8-bit or 16-bit variable, you run the risk of the variable +overflowing. + +sprintf() -> snprintf() +Prevent yourself from using the sprintf() function, as it does not test the +length of the given output buffer and might be writing into unintended memory +areas. This function is one of the main causes of security problems like buffer +exploits and many other bugs that are very hard to find. It's much better to +use the snprintf() function declared by instead. + +You should test your dissector against incorrectly-formed packets. This +can be done using the randpkt and editcap utilities that come with the +Wireshark distribution. Testing using randpkt can be done by generating +output at the same layer as your protocol, and forcing Wireshark/TShark +to decode it as your protocol, e.g. if your protocol sits on top of UDP: + + randpkt -c 50000 -t dns randpkt.pcap + tshark -nVr randpkt.pcap -d udp.port==53, + +Testing using editcap can be done using preexisting capture files and the +"-E" flag, which introduces errors in a capture file. E.g.: + + editcap -E 0.03 infile.pcap outfile.pcap + tshark -nVr outfile.pcap + +tools/fuzz-test.sh is available to help automate these tests. + +4. Name convention. + +Wireshark uses the underscore_convention rather than the InterCapConvention for +function names, so new code should probably use underscores rather than +intercaps for functions and variable names. This is especially important if you +are writing code that will be called from outside your code. We are just +trying to keep things consistent for other developers. + +C symbols exported from libraries shipped with Wireshark should start with a +prefix that helps avoiding name collision with public symbols from other shared +libraries. The current suggested prefixes for newly added symbols are +ws_, wslua_, wmem_ and wtap_. + +5. White space convention. + +Most of the C and C++ files in Wireshark use 4-space or 2-space indentation. +When creating new files you are you are strongly encouraged to use 4-space +indentation for source code in order to ensure consistency between files. + +Please avoid using tab expansions different from 8 column widths, as not all +text editors in use by the developers support this. For a detailed discussion +of tabs, spaces, and indentation, see + + http://www.jwz.org/doc/tabs-vs-spaces.html + +We use EditorConfig (http://editorconfig.org) files to provide formatting +hints. Most editors and IDEs support EditorConfig, either directly or via +a plugin. If yours requires a plugin we encourage you to install it. Our +default EditorConfig indentation style for C and C++ files is 4 spaces. + +Many files also have a short comment (modelines) on the indentation logic at +the end of the file. This was required in the past but has been superseded by +EditorConfig. See + + https://www.wireshark.org/tools/modelines.html + +for more information. + +Please do not leave trailing whitespace (spaces/tabs) on lines. + +Quite a bit of our source code has varying indentation styles. When editing an +existing file, try following the existing indentation logic. If you wish to +convert a file to 4 space indentation, please do so in its own commit and be +sure to remove its .editorconfig entry so that the default setting takes +effect. + +6. Compiler warnings + +You should write code that is free of compiler warnings. Such warnings will +often indicate questionable code and sometimes even real bugs, so it's best +to avoid warnings at all. + +The compiler flags in the Makefiles are set to "treat warnings as errors", +so your code won't even compile when warnings occur. + +7. General observations about architecture + +7.1 The global header "wireshark.h" + +You should include the global header in your code. However +there are some things to keep in mind when using it and especially +if you are considering modifying it. + +** wireshark.h needs to be minimal: for efficiency reasons, to reduce the +error surface and because every time this header changes everything must be +rebuilt. Consider carefully if another header/module should be included +globally with every project file and exported as public header. + +** No configuration: configuration is specific to the build environment +and target machine. wireshark.h must not depend on that. + +** Only wireshark system headers allowed: plugins use this header and +cannot depend on any header (even indirectly) that is not installed on the +target system. + +** Only global definitions allowed: for example it is acceptable to include +'wsutil' headers in wireshark.h because every component of Wireshark is allowed +to depend on wsutil. wiretap is not acceptable because we cannot introduce +dependencies on wiretap globally (and wireshark.h must be usable everywhere). + +7.2 Best practices using headers + +C files can be categorized in three types: source files, private headers and +public headers. + +A module "foobar" can have only a private header, only a public header, or +both. If it's only one it is named "foobar.h" in both cases. If it is both they +are named "foobar-int.h" and "foobar.h" respectively. + +In general the order of #include's for a C module source files (foobar.c), +assuming foobar implements any kind of interface should be: + + #include "config.h" + #define WS_LOG_DOMAIN "mydomain" + #include "foobar-int.h" + + followed by + followed by + followed by + +For header files (private and public) config.h must NOT be included. A public +header file (foobar.h) looks like this: + + #ifndef __FOOBAR_H__ + #define __FOOBAR_H__ + #include + followed by + followed by + + #ifdef __cplusplus + extern "C" { + #endif + (declarations) + #ifdef __cplusplus + } + #endif + #endif /* FOOBAR_H */ + +A private header (foobar-int.h) is the public header plus the declarations +with private scope: + + #ifndef __FOOBAR_INT_H__ + #define __FOOBAR_INT_H__ + #include "foobar.h" + followed by + followed by + followed by + (etc.) + +Again if there are only public or private declarations the name foobar-int.h +is not used. The macro symbol WS_LOG_DOMAIN can be defined in source files or +private headers as long as it comes before wireshark.h. + +7.3 Wireshark internal and external API policy + +Wireshark has several APIs. We need to distinguish between internal +Wireshark library APIs and external Wireshark APIs. Wireshark the project is +composed of many different programs and these executable binaries use a number +of internal libraries to share code efficiently. These internal shared +libraries need to be installed on the system to run the programs (wireshark, +tshark, etc). + +A library's public API includes the symbols exported by the DSO (wsutil, +libwireshark, etc). The internal API is made available in the shared libraries +and exists to support the goals of the project. It is public from the point +of view of Wireshark programs (client users of the internal API). The +external API exists to support plugins (client users of the external API) +and is a loosely defined subset of the internal API plus any infrastructure +required to support a plugin system. Note that these two uses of shared +libraries coexist with a lot of overlap, but are nevertheless distinct. + +The internal (public) API is not considered to be stable and will regularly +change as a normal part of development to support new features, remove cruft, +and whatever else is necessary to make the project sustainable and ease the +burden on developers. There is less freedom to change something that could +break a lot of plugins but this is also acceptable (with cause). + +The plugin ABI policy is to be compatible only between micro releases (also +called patch releases). That means we try to make it unnecessary to recompile +plugins with each micro release (on a best-effort basis). For major.minor +releases it is explicitly required to recompile plugins. There is no stable +ABI contract of any kind in that case. + +Keep in mind that APIs can exist in different scopes and levels of abstraction. +Don't get stuck thinking the words public/private have a very specific +meaning, like being decorated or not with WS_DLL_PUBLIC, although that is a +big part of it usually. + +Also the Wireshark developers have historically tried to keep the Lua API +very stable and provide strong backward-compatibility guarantees. Under this +policy moving from Lua 5.2 is unlikely to happen in the foreseeable future. + +7.4 libwireshark is not a single monolithic entity + +One day we might conceivably wish to load dissectors on demand and do other +more sophisticated kinds of unit test. Plus other scenarios not immediately +obvious. For this to be possible it is important that the code in epan/ does +not depend on code in epan/dissectors, i.e it is possible to compile epan +without linking with dissector code. It helps to view dissectors as clients +of an API provided by epan (libwireshark being constituted by two distinct +components "epan" and "dissectors" bundled together, plus other bits and +pieces). The reverse is not* true; epan should not be the client of an API +provided by dissectors. + +The main way this separation of concerns is achieved is by using runtime +registration interfaces in epan for dissectors, preferences, etc. that are +dynamic and do not have any dissector routines hard coded. Naturally this +is also an essential component of a plugin system (libwireshark has plugins +for taps, dissectors and an experimental interface to augment dissection with +new extension languages). + +7.5 Unicode and string encoding best practices + +Wireshark strings are always encoded in UTF-8 internally, regardless of the +platform where it is running. The C datatype used is "pointer to char" and this +is assumed to point to a valid UTF-8 string. Sometimes older code uses char to +point to opaque byte strings but this archaic usage should be avoided. A better +data type for that is uint8_t. + +Every untrusted string needs to be validated for correct and error-free UTF-8 +encoding, or converted from the source encoding to UTF-8. This should be done +at the periphery of the code. This means converting input during dissection or +when reading input generally. To reiterate: all the Wireshark APIs expect to +receive valid UTF-8 strings. These include proto_tree_add_string(), +proto_item_append_text() and col_append_fstr() just to name a few. + +If a dissector uses standard API functions to handle strings, such as +proto_tree_add_item() with an FT_STRING header field type, the API will +transparently handle the conversion from the source encoding to UTF-8 and +nothing else needs to be done to ensure valid string input. + +If your dissector does text manipulation, token parsing and such and generally +extracts text strings from the TVBuff or tries to do line oriented input from +TVBuffs it *must* make sure it passes only valid UTF-8 to libwireshark APIs. +This should be done using tvb_get_string_enc() to extract a string from a TVbuff +or get_utf_8_string() to validate a string after it has been constructed. + +The Qt API uses UTF-16 for its QString class; when converting between a +QString and a pointer to char, functions that convert to or from UTF-8 +encoded pointers to char (or QByteArrays) such as toUtf8() should be used, +not toLocal8Bit() or toLatin1(). + +8. Miscellaneous notes + +Each commit in your branch corresponds to a different VCSVERSION string +automatically defined in the header 'vcs_version.h' during the build. If you happen +to find it convenient to disable this feature it can be done using: + + touch .git/wireshark-disable-versioning + +i.e., the file 'wireshark-disable-versioning' must exist in the git repo dir. + +/* + * Editor modelines - https://www.wireshark.org/tools/modelines.html + * + * Local variables: + * c-basic-offset: 4 + * tab-width: 8 + * indent-tabs-mode: nil + * End: + * + * vi: set shiftwidth=4 tabstop=8 expandtab: + * :indentSize=4:tabSize=8:noTabs=true: + */ diff --git a/doc/README.display_filter b/doc/README.display_filter new file mode 100644 index 00000000..ab4d74d1 --- /dev/null +++ b/doc/README.display_filter @@ -0,0 +1,586 @@ +(This is a consolidation of documentation written by stig, sahlberg, and gram) + +What is the display filter system? +================================== +The display filter system allows the user to select packets by testing +for values in the proto_tree that Wireshark constructs for that packet. +Every proto_item in the proto_tree has an 'abbrev' field +and a 'type' field, which tells the display filter engine the name +of the field and its type (what values it can hold). + +For example, this is the definition of the ip.proto field from packet-ip.c: + +{ &hf_ip_proto, + { "Protocol", "ip.proto", FT_UINT8, BASE_DEC | BASE_EXT_STRING, + &ipproto_val_ext, 0x0, NULL, HFILL }}, + +This definition says that "ip.proto" is the display-filter name for +this field, and that its field-type is FT_UINT8. + +The display filter system has 3 major parts to it: + + 1. A type system (field types, or "ftypes") + 2. A parser, to convert a user's query to an internal representation + 3. An engine that uses the internal representation to select packets. + + +code: +epan/dfilter/* - the display filter engine, including + scanner, parser, syntax-tree semantics checker, DFVM bytecode + generator, and DFVM engine. +epan/ftypes/* - the definitions of the various FT_* field types. +epan/proto.c - proto_tree-related routines + + +The field type system +===================== +The field type system is stored in epan/ftypes. + +The proto_tree system #includes ftypes.h, which gives it the ftenum +definition, which is the enum of all possible ftypes: + +/* field types */ +enum ftenum { + FT_NONE, /* used for text labels with no value */ + FT_PROTOCOL, + FT_BOOLEAN, + FT_CHAR, /* 1-octet character as 0-255 */ + FT_UINT8, + FT_UINT16, + FT_UINT24, /* really a UINT32, but displayed as 6 hex-digits if FD_HEX*/ + FT_UINT32, + FT_UINT40, /* really a UINT64, but displayed as 10 hex-digits if FD_HEX*/ + FT_UINT48, /* really a UINT64, but displayed as 12 hex-digits if FD_HEX*/ + FT_UINT56, /* really a UINT64, but displayed as 14 hex-digits if FD_HEX*/ + FT_UINT64, + etc., etc. +} + +It also provides the definition of fvalue_t, the struct that holds the *value* +that corresponds to the type. Each proto_item (proto_node) holds an fvalue_t +due to having a field_info struct (defined in proto.h). + +The fvalue_t is mostly just a gigantic union of possible C-language types +(as opposed to FT_* types): + +typedef struct _fvalue_t { + ftype_t *ftype; + union { + /* Put a few basic types in here */ + uint32_t uinteger; + int32_t sinteger; + uint64_t uinteger64; + int64_t sinteger64; + double floating; + wmem_strbuf_t *strbuf; + GByteArray *bytes; + ipv4_addr_and_mask ipv4; + ipv6_addr_and_prefix ipv6; + e_guid_t guid; + nstime_t time; + protocol_value_t protocol; + uint16_t sfloat_ieee_11073; + uint32_t float_ieee_11073; + } value; +} fvalue_t; + + +Defining a field type +--------------------- +The ftype system itself is designed to be modular, so that new field types +can be added when necessary. + +Each field type must implement an ftype_t structure, defined in +ftypes-int.h. This is the way a field type is registered with the ftype engine. + +If you take a look at ftype-integer.c, you will see that it provides +an ftype_register_integers() function, that fills in many such ftype_t +structs. It creates one for each integer type: FT_UINT8, FT_UINT16, +FT_UINT32, etc. + +The ftype_t struct defines the things needed for the ftype: + + * its ftenum value + * a string representation of the FT name ("FT_UINT8") + * how much data it consumes in the packet + * how to store that value in an fvalue_t: new(), free(), + various value-related functions + * how to compare that value against another + * how to slice that value (strings and byte ranges can be sliced) + +Using an fvalue_t +----------------- +Once the value of a field is stored in an fvalue_t (stored in +each proto_item via field_info), it's easy to use those values, +thanks to the various fvalue_*() functions defined in ftypes.h. + +Functions like fvalue_get(), fvalue_eq(), etc., are all generic +interfaces to get information about the field's value. They work +on any field type because of the ftype_t struct, which is the lookup +table that the field-type engine uses to work with any field type. + +The display filter parser +========================= +The display filter parser (along with the comparison engine) +is stored in epan/dfilter. + +The scanner/parser pair read the string representing the display filter +and convert it into a very simple syntax tree. The syntax tree is very +simple in that it is possible that many of the nodes contain unparsed +chunks of text from the display filter. + +There are four phases to parsing a user's request: + + 1. Scanning the string for dfilter syntax + 2. Parsing the keywords according to the dfilter grammar, into a + syntax tree + 3. Doing a semantic check of the nodes in that syntax tree + 4. Converting the syntax tree into a series of DFVM byte codes + +The dfilter_compile() function, in epan/dfilter/dfilter.c, +runs these 4 phases. The end result is a dfwork_t object (dfw), that +can be passed to dfilter_apply() to actually run the display filter +against a set of proto_trees. + + +Scanning the display filter string +---------------------------------- +epan/dfilter/scanner.l is the lex scanner for finding keywords +in the user's display filter string. + +Its operation is simple. It finds the special function and comparison +operators ("==", "!=", "eq", "ne", etc.), it finds slice operations +( "[0:1]" ), quoted strings, IP addresses, numbers, and any other "special" +keywords or string types. + +Anything it doesn't know how to handle is passed to the grammar parser +as an unparsed string (TOKEN_UNPARSED). This includes field names. The +scanner does not interpret any protocol field names at all. + +The scanner has to return a token type (TOKEN_*, and in many cases, +a value. The value will be an stnode_t struct, which is a syntax +tree node object. Since the final storage of the parse will +be in a syntax tree, it is convenient for the scanner to fill in +syntax tree nodes with values when it can. + +The stnode_t definition is in epan/dfilter/syntax-tree.h + + +Parsing the keywords according to the dfilter grammar +----------------------------------------------------- +The grammar parser is implemented with the 'lemon' tool, +rather than the traditional yacc or bison grammar parser, +as lemon grammars were found to be easier to work with. The +lemon parser specification (epan/dfilter/grammar.lemon) is +much easier to read than its bison counterpart would be, +thanks to lemon's feature of being able to name fields, rather +then using numbers ($1, $2, etc.) + +The lemon tool is located in tools/lemon in the Wireshark +distribution. + +An on-line introduction to lemon is available at: + +http://www.sqlite.org/src/doc/trunk/doc/lemon.html + +The grammar specifies which type of constructs are possible +within the dfilter language ("dfilter-lang") + +An "expression" in dfilter-lang can be a relational test or a logical test. + +A relational test compares a value against another, which is usually +a field (or a slice of a field) against some static value, like: + + ip.proto == 1 + eth.dst != ff:ff:ff:ff:ff:ff + +A logical test combines other expressions with "and", "or", and "not". + +At the end of the grammatical parsing, the dfw object will +have a valid syntax tree, pointed at by dfw->st_root. + +If there is an error in the syntax, the parser will call dfilter_fail() +with an appropriate error message, which the UI will need to report +to the user. + +The syntax tree system +---------------------- +The syntax tree is created as a result of running the lemon-based +grammar parser on the scanned tokens. The syntax tree code +is in epan/dfilter/syntax-tree* and epan/dfilter/sttype-*. It too +uses a set of code modules that implement different syntax node types, +similar to how the field-type system registers a set of ftypes +with a central engine. + +Each node (stnode_t) in the syntax tree has a type (sttype). +These sttypes are very much related to ftypes (field types), but there +is not a one-to-one correspondence. The syntax tree nodes are slightly +higher-level abstractions. The root node of the syntax tree is the main +test or comparison being done. + +Semantic Check +-------------- +After the parsing is done and a syntax tree is available, the +code in semcheck.c does a semantic check of what is in the syntax +tree. + +The semantics of the simple syntax tree are checked to make sure that +the fields that are being compared are being compared to appropriate +values. For example, if a field is an integer, it can't be compared to +a string, unless a value_string has been defined for that field. + +During the process of checking the semantics, the simple syntax tree is +fleshed out and no longer contains nodes with unparsed information. The +syntax tree is no longer in its simple form, but in its complete form. + +For example, if the dfilter is slicing a field and comparing +against a set of bytes, semcheck.c has to check that the field +in question can indeed be sliced. + +Or, can a field be compared against a certain type of value (string, +integer, float, IPv4 address, etc.) + +The semcheck code also makes adjustments to the syntax tree +when it needs to. The parser sometimes stores raw, unparsed strings +in the syntax tree, and semcheck has to convert them to +certain types. For example, the display filter may contain +a value_string string (the "enum" type that protocols can use +to define the possible textual descriptions of numeric fields), and +semcheck will convert that value_string string into the correct +integer value. + +Truth be told, the semcheck.c code is a bit disorganized, and could +be re-designed & re-written. + +DFVM Byte Codes +--------------- +The syntax tree is analyzed to create a sequence of bytecodes in the +"DFVM" language. "DFVM" stands for Display Filter Virtual Machine. The +DFVM is similar in spirit, but not in definition, to the BPF VM that +libpcap uses to analyze packets. + +A virtual bytecode is created and used so that the actual process of +filtering packets will be fast. That is, it should be faster to process +a list of VM bytecodes than to attempt to filter packets directly from +the syntax tree. (heh... no measurement has been made to support this +supposition) + +The DFVM opcodes are defined in epan/dfilter/dfvm.h (dfvm_opcode_t). +Similar to how the BPF opcode system works in libpcap, there is a +limited set of opcodes. They operate by loading values from the +proto_tree into registers, loading pre-defined values into +registers, and comparing them. The opcodes are checked in sequence, and +there are only 2 branching opcodes: IF_TRUE_GOTO and IF_FALSE_GOTO. +Both of these can only branch forwards, and never backwards. In this way +sets of DFVM instructions will never get into an infinite loop. + +The epan/dfilter/gencode.c code converts the syntax tree +into a set of dfvm instructions. + +The constants that are in the DFVM instructions (the constant +values that the user is checking against) are pre-loaded +into registers via the dfvm_init_const() call, and stored +in the dfilter_t structure for when the display filter is +actually applied. + + +DFVM Engine +=========== +Once the DFVM bytecode has been produced, it's a simple matter of +running the DFVM engine against the proto_tree from the packet +dissection, using the DFVM bytecodes as instructions. If the DFVM +bytecode is known before packet dissection occurs, the +proto_tree-related code can be "primed" to store away pointers to +field_info structures that are interesting to the display filter. This +makes lookup of those field_info structures during the filtering process +faster. + +The dfilter_apply() function runs a single pre-compiled +display filter against a single proto_tree function, and returns +true or false, meaning that the filter matched or not. + +That function calls dfvm_apply(), which runs across the DFVM +instructions, loading protocol field values into DFVM registers +and doing the comparisons. + +There is a top-level Makefile target called 'dftest' which +builds a 'dftest' executable that will print out the DFVM +bytecode for any display filter given on the command-line. +To build it, run: + +$ make dftest + +To use it, give it the display filter on the command-line: + +$ ./dftest 'ip.addr == 127.0.0.1' +Filter: ip.addr == 127.0.0.1 + +Constants: +00000 PUT_FVALUE 127.0.0.1 -> reg#1 + +Instructions: +00000 READ_TREE ip.addr -> reg#0 +00001 IF-FALSE-GOTO 3 +00002 ANY_EQ reg#0 == reg#1 +00003 RETURN + + +The output shows the original display filter, then the opcodes +that put constant values into registers. The registers are +numbered, and are shown in the output as "reg#n", where 'n' is the +identifying number. + +Then the instructions are shown. These are the instructions +which are run for each proto_tree. + +This is what happens in this example: + +00000 READ_TREE ip.addr -> reg#0 + +Any ip.addr fields in the proto_tree are loaded into register 0. Yes, +multiple values can be loaded into a single register. As a result +of this READ_TREE, the accumulator will hold true or false, indicating +if any field's value was loaded, or not. + +00001 IF-FALSE-GOTO 3 + +If the load failed because there were no ip.addr fields +in the proto_tree, then we jump to instruction 3. + +00002 ANY_EQ reg#0 == reg#1 + +This checks to see if any of the fields in register 1 +(which has the pre-loaded constant value of 127.0.0.1) are equal +to any of the fields in register 0 (which are all of the ip.addr +fields in the proto tree). The resulting value in the +accumulator will be true if any of the fields match, or false +if none match. + +00003 RETURN + +This returns the accumulator's value, either true or false. + +In addition to dftest, there is also a unit-test script for the +display filter engine - test/suite_dfilter/dfiltertest.py. +It makes use of tshark to run specific display filters against +specific captures in test/captures. See the "Wireshark Tests" chapter +in the Wireshark Developer’s Guide. + + + +Display Filter Functions +======================== +You define a display filter function by adding an entry to +the df_functions table in epan/dfilter/dfunctions.c. The record struct +is defined in dfunctions.h, and shown here: + +typedef struct { + char *name; + DFFuncType function; + ftenum_t retval_ftype; + unsigned min_nargs; + unsigned max_nargs; + DFSemCheckType semcheck_param_function; +} df_func_def_t; + +name - the name of the function; this is how the user will call your + function in the display filter language + +function - this is the run-time processing of your function. + +retval_ftype - what type of FT_* type does your function return? + +min_nargs - minimum number of arguments your function accepts +max_nargs - maximum number of arguments your function accepts + +semcheck_param_function - called during the semantic check of the + display filter string. + +DFFuncType function +------------------- +typedef bool (*DFFuncType)(GList *arg1list, GList *arg2list, GList **retval); + +The return value of your function is a bool; true if processing went fine, +or false if there was some sort of exception. + +For now, display filter functions can accept a maximum of 2 arguments. +The "arg1list" parameter is the GList for the first argument. The +'arg2list" parameter is the GList for the second argument. All arguments +to display filter functions are lists. This is because in the display +filter language a protocol field may have multiple instances. For example, +a field like "ip.addr" will exist more than once in a single frame. So +when the user invokes this display filter: + + somefunc(ip.addr) == true + +even though "ip.addr" is a single argument, the "somefunc" function will +receive a GList of *all* the values of "ip.addr" in the frame. + +Similarly, the return value of the function needs to be a GList, since all +values in the display filter language are lists. The GList** retval argument +is passed to your function so you can set the pointer to your return value. + +DFSemCheckType +-------------- +typedef void (*DFSemCheckType)(dfwork_t *dfw, int param_num, stnode_t *st_node); + +For each parameter in the syntax tree, this function will be called. +"param_num" will indicate the number of the parameter, starting with 0. +The "stnode_t" is the syntax-tree node representing that parameter. +If everything is okay with the value of that stnode_t, your function +does nothing --- it merely returns. If something is wrong, however, +it should call dfilter_fail(dfw,...) and THROW a TypeError exception. + + +Example: add an 'in' display filter operation +============================================= + +This example has been discussed on ethereal-dev in April 2004. +[Ethereal-dev] Need for an 'in' dfilter operator? +(https://www.wireshark.org/lists/ethereal-dev/200404/msg00372.html) +It illustrates how a more complex operation can be added to the display filter language. + +Question: + + If I want to add an 'in' display filter operation, I need to define + several things. This can happen in different ways. For instance, + every value from the "in" value collection will result in a test. + There are 2 options here, either a test for a single value: + + (x in {a b c}) + + or a test for a value in a given range: + + (x in {a ... z}) + + or even a combination of both. The former example can be reduced to: + + ((x == a) or (x == b) or (x == c)) + + while the latter can be reduced to + + ((x >= MIN(a, z)) and (x <= MAX(a, z))) + + I understand that I can replace "x in {" with the following steps: + first store x in the "in" test buffer, then add "(" to the display + filter expression internally. + + Similarly I can replace the closing brace "}" with the following + steps: release x from the "in" test buffer and then add ")" + to the display filter expression internally. + + How could I do this? + +Answer: + + This could be done in grammar.lemon. The grammar would produce + syntax tree nodes, combining them with "or", when it is given + tokens that represent the "in" syntax. + + It could also be done later in the process, maybe in + semcheck.c. But if you can do it earlier, in grammar.lemon, + then you shouldn't have to worry about modifying anything in + semcheck.c, as the syntax tree that is passed to semcheck.c + won't contain any new type of operators... just lots of nodes + combined with "or". + +How to add an operator FOO to the display filter language? +========================================================== + +Go to wireshark/epan/dfilter/ + +Edit grammar.lemon and add the operator. Add the operator FOO and the +test logic (defining TEST_OP_FOO). + +Edit scanner.l and add the operator name(s) hence defining +TOKEN_TEST_FOO. Also update the simple() or add the new operand's code. + +Edit sttype-test.h and add the TEST_OP_FOO to the list of test operations. + +Edit sttype-test.c and add TEST_OP_FOO to the num_operands() method. + +Edit gencode.c, add TEST_OP_FOO in the gen_test() method by defining +ANY_FOO. + +Edit dfvm.h and add ANY_FOO to the enum dfvm_opcode_t structure. + +Edit dfvm.c and add ANY_FOO to dfvm_dump() (for the dftest display filter +test binary), to dfvm_apply() hence defining the methods fvalue_foo(). + +Edit semcheck.c and look at the check_relation_XXX() methods if they +still apply to the foo operator; if not, amend the code. Start from the +check_test() method to discover the logic. + +Go to wireshark/epan/ftypes/ + +Edit ftypes.h and declare the fvalue_foo(), ftype_can_foo() and +fvalue_foo() methods. Add the cmp_foo() method to the struct _ftype_t. + +This is the first time that a make in wireshark/epan/dfilter/ can +succeed. If it fails, then some code in the previously edited files must +be corrected. + +Edit ftypes.c and define the fvalue_foo() method with its associated +logic. Define also the ftype_can_foo() and fvalue_foo() methods. + +Edit all ftype-*.c files and add the required fvalue_foo() methods. + +This is the point where you should be able to compile without errors in +wireshark/epan/ftypes/. If not, first fix the errors. + +Go to wireshark/epan/ and run make. If this one succeeds, then we're +almost done as no errors should occur here. + +Go to wireshark/ and run make. One thing to do is make dftest and see +if you can construct valid display filters with your new operator. Or +you may want to move directly to the generation of Wireshark. + +Also look at ui/qt/display_filter_expression_dialog.cpp and the display +filter expression generator. + +How to add a new test to the test suite +======================================= + +All display filter tests are located in test/suite_dfilter. +You can add a test to an existing file or create a new file. + +Each new test class must define "trace_file", which names +a capture file in "test/captures". All the tests +run in that class will use that one capture file. + +There are 2 fixtures you can use for testing: + +checkDFilterCount(dfilter, expected_count) + + This will run the display filter through tshark, on the + file named by "trace_file", and assert that the + number of resulting packets equals "expected_count". This + also asserts that tshark does not fail; success with zero + matches is not the same as failure to compile the display + filter string. + +checkDFilterFail(dfilter, error) + + This will run dftest with the display filter, and check + that it fails with a given error message. This is useful + when expecting display filter syntax errors to be caught. + +To execute tests: + +# Run all dfilter tests +$ test/test.py suite_dfilter + +# Run all tests from group_tvb.py: +$ test/test.py suite_dfilter.group_tvb + +# For faster, parallel tests, install the "pytest-xdist" first +# (for example, using "pip install pytest-xdist"), then: +$ pytest -nauto test -k suite_dfilter + +# Run all tests from group_tvb.py, in parallel: +$ pytest -nauto test -k case_tvb + +# Run a single test from group_tvb.py, case_tvb.test_slice_4: +$ pytest test -k "case_tvb and test_slice_4" + +See also https://www.wireshark.org/docs/wsdg_html_chunked/ChapterTests.html diff --git a/doc/README.dissector b/doc/README.dissector new file mode 100644 index 00000000..464bba49 --- /dev/null +++ b/doc/README.dissector @@ -0,0 +1,3723 @@ +This file is a HOWTO for Wireshark developers interested in writing or working +on Wireshark protocol dissectors. It describes expected code patterns and the +use of some of the important functions and variables. + +This file is compiled to give in depth information on Wireshark. +It is by no means all inclusive and complete. Please feel free to discuss on +the developer mailing list or upload merge requests to gitlab. +If you haven't read README.developer, read that first! + +0. Prerequisites. + +Before starting to develop a new dissector, a "running" Wireshark build +environment is required - there's no such thing as a standalone "dissector +build toolkit". + +How to setup such an environment is platform dependent; detailed +information about these steps can be found in the "Developer's Guide" +(available from: https://www.wireshark.org) and in the INSTALL and +README.md files of the sources root dir. + +0.1. Dissector related README files. + +You'll find additional dissector related information in the following README +files: + +- doc/README.heuristic - what are heuristic dissectors and how to write them +- doc/README.plugins - how to "pluginize" a dissector +- doc/README.request_response_tracking - how to track req./resp. times and such +- doc/README.wmem - how to obtain "memory leak free" memory + +0.2 Contributors + +James Coe +Gilbert Ramirez +Jeff Foster +Olivier Abad +Laurent Deniel +Gerald Combs +Guy Harris +Ulf Lamping +Barbu Paul - Gheorghe + +1. Setting up your protocol dissector code. + +This section provides skeleton code for a protocol dissector. It also explains +the basic functions needed to enter values in the traffic summary columns, +add to the protocol tree, and work with registered header fields. + +1.1 Skeleton code. + +Wireshark requires certain things when setting up a protocol dissector. +We provide basic skeleton code for a dissector that you can copy to a new file +and fill in. Your dissector should follow the naming convention of "packet-" +followed by the abbreviated name for the protocol. It is recommended that where +possible you keep to the IANA abbreviated name for the protocol, if there is +one, or a commonly-used abbreviation for the protocol, if any. + +The skeleton code lives in the file "packet-PROTOABBREV.c" in the same source +directory as this README. + +If instead of using the skeleton you base your dissector on an existing real +dissector, please put a little note in the copyright header indicating which +dissector you started with. + +Usually, you will put your newly created dissector file into the directory +epan/dissectors/, just like all the other packet-*.c files already in there. + +Also, please add your dissector file to the corresponding makefiles, +described in section "1.8 Editing CMakeLists.txt to add your dissector" below. + +Dissectors that use the dissector registration API to register with a lower +level protocol (this is the vast majority) don't need to define a prototype in +their .h file. For other dissectors the main dissector routine should have a +prototype in a header file whose name is "packet-", followed by the abbreviated +name for the protocol, followed by ".h"; any dissector file that calls your +dissector should be changed to include that file. + +You may not need to include all the headers listed in the skeleton, and you may +need to include additional headers. + +1.2 Explanation of needed substitutions in code skeleton. + +In the skeleton sample code the following strings should be substituted with +your information. + +YOUR_NAME Your name, of course. You do want credit, don't you? + It's the only payment you will receive.... +YOUR_EMAIL_ADDRESS Keep those cards and letters coming. +PROTONAME The name of the protocol; this is displayed in the + top-level protocol tree item for that protocol. +PROTOSHORTNAME An abbreviated name for the protocol; this is displayed + in the "Preferences" dialog box if your dissector has + any preferences, in the dialog box of enabled protocols, + and in the dialog box for filter fields when constructing + a filter expression. +PROTOFILTERNAME A name for the protocol for use in filter expressions; + it may contain only letters, digits, hyphens, underscores and + periods. Names should use lower case only. (Support for + upper/mixed case may be removed in the future.) +PROTOABBREV An abbreviation for the protocol; this is used in code and + must be a valid C identifier. Additionally it should follow + any applicable C style guidelines. It is usually the same as + PROTOFILTERNAME with all lower-case letters and + non-alphanumerics replaced with underscores. +LICENSE The license this dissector is under. Please use a SPDX License + identifier. +YEARS The years the above license is valid for. +FIELDNAME The displayed name for the header field. +FIELDFILTERNAME A name for the header field for use in filter expressions; + it may contain only letters, digits, hyphens, underscores and + periods. It must start with PROTOFILTERNAME followed by a dot. + Names should use lower case only. (Support for upper/mixed case + may be removed in the future.) +FIELDABBREV An abbreviation for the header field; this is used in code and + must be a valid C identifier. Additionally it should follow + any applicable C style guidelines. It is usually the same as + FIELDFILTERNAME with all lower-case letters and + non-alphanumerics replaced with underscores. +FIELDTYPE FT_NONE, FT_BOOLEAN, FT_CHAR, FT_UINT8, FT_UINT16, FT_UINT24, + FT_UINT32, FT_UINT40, FT_UINT48, FT_UINT56, FT_UINT64, + FT_INT8, FT_INT16, FT_INT24, FT_INT32, FT_INT40, FT_INT48, + FT_INT56, FT_INT64, FT_IEEE_11073_SFLOAT, FT_IEEE_11073_FLOAT, + FT_FLOAT, FT_DOUBLE, FT_ABSOLUTE_TIME, FT_RELATIVE_TIME, + FT_STRING, FT_STRINGZ, FT_STRINGZPAD, FT_STRINGZTRUNC, + FT_UINT_STRING, FT_ETHER, FT_BYTES, FT_UINT_BYTES, FT_IPv4, + FT_IPv6, FT_IPXNET, FT_FRAMENUM, FT_PROTOCOL, FT_EUI64, FT_GUID, + FT_OID, FT_REL_OID, FT_AX25, FT_VINES, FT_SYSTEM_ID, FT_FCWWN +FIELDDISPLAY --For FT_UINT{8,16,24,32,40,48,56,64} and + FT_INT{8,16,24,32,40,48,56,64): + + BASE_DEC, BASE_HEX, BASE_OCT, BASE_DEC_HEX, BASE_HEX_DEC, + BASE_CUSTOM, or BASE_NONE, possibly ORed with + BASE_RANGE_STRING, BASE_EXT_STRING, BASE_VAL64_STRING, + BASE_ALLOW_ZERO, BASE_UNIT_STRING, BASE_SPECIAL_VALS, + BASE_NO_DISPLAY_VALUE, BASE_SHOW_ASCII_PRINTABLE, or + BASE_SHOW_UTF_8_PRINTABLE + + BASE_NONE may be used with a non-NULL FIELDCONVERT when the + numeric value of the field itself is not of significance to + the user (for example, the number is a generated field). + When this is the case the numeric value is not shown to the + user in the protocol decode nor is it used when preparing + filters for the field in question. + + BASE_NO_DISPLAY_VALUE will just display the field name with + no value. It is intended for byte arrays (FT_BYTES or + FT_UINT_BYTES) or header fields above a subtree. The + value will still be filterable, just not displayed. + + --For FT_UINT16: + + BASE_PT_UDP, BASE_PT_TCP, BASE_PT_DCCP or BASE_PT_SCTP + + --For FT_UINT24: + + BASE_OUI + + --For FT_CHAR: + BASE_HEX, BASE_OCT, BASE_CUSTOM, or BASE_NONE, possibly + ORed with BASE_RANGE_STRING, BASE_EXT_STRING or + BASE_VAL64_STRING. + + BASE_NONE can be used in the same way as with FT_UINT8. + + --For FT_FLOAT, FT_DOUBLE: + BASE_NONE, BASE_DEC, BASE_HEX, BASE_EXP or BASE_CUSTOM. + + BASE_NONE uses BASE_DEC or BASE_EXP, similarly to the + %g double format for the printf() function. + + --For FT_ABSOLUTE_TIME: + + ABSOLUTE_TIME_LOCAL, ABSOLUTE_TIME_UTC, or + ABSOLUTE_TIME_DOY_UTC + + --For FT_BOOLEAN: + + if BITMASK is non-zero: + Number of bits in the field containing the FT_BOOLEAN + bitfield. + otherwise: + (must be) BASE_NONE + + --For FT_STRING, FT_STRINGZ and FT_UINT_STRING: + + (must be) BASE_NONE + + --For FT_BYTES and FT_UINT_BYTES: + + SEP_DOT, SEP_DASH, SEP_COLON, or SEP_SPACE to provide + a separator between bytes; BASE_NONE has no separator + between bytes. These can be ORed with BASE_ALLOW_ZERO, + BASE_SHOW_ASCII_PRINTABLE, or BASE_SHOW_UTF_8_PRINTABLE. + + BASE_ALLOW_ZERO displays instead of + for a zero-sized byte array. + BASE_SHOW_ASCII_PRINTABLE will check whether the + field's value consists entirely of printable ASCII + characters and, if so, will display the field's value + as a string, in quotes. The value will still be + filterable as a byte value. + BASE_SHOW_UTF_8_PRINTABLE will check whether the + field's value is valid UTF-8 consisting entirely of + printable characters and, if so, will display the field's + value as a string, in quotes. The value will still be + filterable as a byte value. + + --For FT_IPv4: + + BASE_NETMASK - Used for IPv4 address that should never + attempted to be resolved (like netmasks) + otherwise: + (must be) BASE_NONE + + --For all other types: + + BASE_NONE +FIELDCONVERT VALS(x), VALS64(x), RVALS(x), TFS(x), CF_FUNC(x), NULL +BITMASK Used to mask a field not 8-bit aligned or with a size other + than a multiple of 8 bits +FIELDDESCR A brief description of the field, or NULL. [Please do not use ""]. + +If, for example, PROTONAME is "Internet Bogosity Discovery Protocol", +PROTOSHORTNAME would be "IBDP", and PROTOFILTERNAME would be "ibdp". Try to +conform with IANA names. + +1.2.1 Automatic substitution in code skeleton + +Instead of manual substitutions in the code skeleton, a tool to automate it can +be found under the tools directory. The script is called tools/generate-dissector.py +and takes all the needed options to generate a compilable dissector. Look at the +above fields to know how to set them. Some assumptions have been made in the +generation to shorten the list of required options. The script patches the +CMakeLists.txt file adding the new dissector in the proper list, alphabetically +sorted. + +1.3 The dissector and the data it receives. + + +1.3.1 Header file. + +This is only needed if the dissector doesn't use self-registration to +register itself with the lower level dissector, or if the protocol dissector +wants/needs to expose code to other subdissectors. + +The dissector must be declared exactly as follows in the file +packet-PROTOABBREV.h: + +int +dissect_PROTOABBREV(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree); + + +1.3.2 Extracting data from packets. + +NOTE: See the file /epan/tvbuff.h for more details. + +The "tvb" argument to a dissector points to a buffer containing the raw +data to be analyzed by the dissector; for example, for a protocol +running atop UDP, it contains the UDP payload (but not the UDP header, +or any protocol headers above it). A tvbuffer is an opaque data +structure, the internal data structures are hidden and the data must be +accessed via the tvbuffer accessors. + +The accessors are: + +Bit accessors for a maximum of 8-bits, 16-bits 32-bits and 64-bits: + +uint8_t tvb_get_bits8(tvbuff_t *tvb, int bit_offset, const int no_of_bits); +uint16_t tvb_get_bits16(tvbuff_t *tvb, unsigned bit_offset, const int no_of_bits, const unsigned encoding); +uint32_t tvb_get_bits32(tvbuff_t *tvb, unsigned bit_offset, const int no_of_bits, const unsigned encoding); +uint64_t tvb_get_bits64(tvbuff_t *tvb, unsigned bit_offset, const int no_of_bits, const unsigned encoding); + +Single-byte accessors for 8-bit unsigned integers (uint8_t) and 8-bit +signed integers (int8_t): + +uint8_t tvb_get_guint8(tvbuff_t *tvb, const int offset); +int8_t tvb_get_gint8(tvbuff_t *tvb, const int offset); + +Network-to-host-order accessors: + +16-bit unsigned (uint16_t) and signed (int16_t) integers: + +uint16_t tvb_get_ntohs(tvbuff_t *tvb, const int offset); +int16_t tvb_get_ntohis(tvbuff_t *tvb, const int offset); + +24-bit unsigned and signed integers: + +uint32_t tvb_get_ntoh24(tvbuff_t *tvb, const int offset); +int32_t tvb_get_ntohi24(tvbuff_t *tvb, const int offset); + +32-bit unsigned (uint32_t) and signed (int32_t) integers: + +uint32_t tvb_get_ntohl(tvbuff_t *tvb, const int offset); +int32_t tvb_get_ntohil(tvbuff_t *tvb, const int offset); + +40-bit unsigned and signed integers: + +uint64_t tvb_get_ntoh40(tvbuff_t *tvb, const int offset); +int64_t tvb_get_ntohi40(tvbuff_t *tvb, const int offset); + +48-bit unsigned and signed integers: + +uint64_t tvb_get_ntoh48(tvbuff_t *tvb, const int offset); +int64_t tvb_get_ntohi48(tvbuff_t *tvb, const int offset); + +56-bit unsigned and signed integers: + +uint64_t tvb_get_ntoh56(tvbuff_t *tvb, const int offset); +int64_t tvb_get_ntohi56(tvbuff_t *tvb, const int offset); + +64-bit unsigned (uint64_t) and signed (int64_t) integers: + +uint64_t tvb_get_ntoh64(tvbuff_t *tvb, const int offset); +int64_t tvb_get_ntohi64(tvbuff_t *tvb, const int offset); + +Single-precision and double-precision IEEE floating-point numbers: + +float tvb_get_ntohieee_float(tvbuff_t *tvb, const int offset); +double tvb_get_ntohieee_double(tvbuff_t *tvb, const int offset); + +Little-Endian-to-host-order accessors: + +16-bit unsigned (uint16_t) and signed (int16_t) integers: + +uint16_t tvb_get_letohs(tvbuff_t *tvb, const int offset); +int16_t tvb_get_letohis(tvbuff_t *tvb, const int offset); + +24-bit unsigned and signed integers: + +uint32_t tvb_get_letoh24(tvbuff_t *tvb, const int offset); +int32_t tvb_get_letohi24(tvbuff_t *tvb, const int offset); + +32-bit unsigned (uint32_t) and signed (int32_t) integers: + +uint32_t tvb_get_letohl(tvbuff_t *tvb, const int offset); +int32_t tvb_get_letohil(tvbuff_t *tvb, const int offset); + +40-bit unsigned and signed integers: + +uint64_t tvb_get_letoh40(tvbuff_t *tvb, const int offset); +int64_t tvb_get_letohi40(tvbuff_t *tvb, const int offset); + +48-bit unsigned and signed integers: + +uint64_t tvb_get_letoh48(tvbuff_t *tvb, const int offset); +int64_t tvb_get_letohi48(tvbuff_t *tvb, const int offset); + +56-bit unsigned and signed integers: + +uint64_t tvb_get_letoh56(tvbuff_t *tvb, const int offset); +int64_t tvb_get_letohi56(tvbuff_t *tvb, const int offset); + +64-bit unsigned (uint64_t) and signed (int64_t) integers: + +uint64_t tvb_get_letoh64(tvbuff_t *tvb, const int offset); +int64_t tvb_get_letohi64(tvbuff_t *tvb, const int offset); + +NOTE: Although each of the integer accessors above return types with +specific sizes, the returned values are subject to C's integer promotion +rules. It's often safer and more useful to use int or unsigned for 32-bit +and smaller types, and int64_t or uint64_t for 40-bit and larger types. +Just because a value occupied 16 bits on the wire or over the air +doesn't mean it will within Wireshark. + +Single-precision and double-precision IEEE floating-point numbers: + +float tvb_get_letohieee_float(tvbuff_t *tvb, const int offset); +double tvb_get_letohieee_double(tvbuff_t *tvb, const int offset); + +Encoding-to_host-order accessors: + +16-bit unsigned (uint16_t) and signed (int16_t) integers: + +uint16_t tvb_get_guint16(tvbuff_t *tvb, const int offset, const unsigned encoding); +int16_t tvb_get_gint16(tvbuff_t *tvb, const int offset, const unsigned encoding); + +24-bit unsigned and signed integers: + +uint32_t tvb_get_guint24(tvbuff_t *tvb, const int offset, const unsigned encoding); +int32_t tvb_get_gint24(tvbuff_t *tvb, const int offset, const unsigned encoding); + +32-bit unsigned (uint32_t) and signed (int32_t) integers: + +uint32_t tvb_get_guint32(tvbuff_t *tvb, const int offset, const unsigned encoding); +int32_t tvb_get_gint32(tvbuff_t *tvb, const int offset, const unsigned encoding); + +40-bit unsigned and signed integers: + +uint64_t tvb_get_guint40(tvbuff_t *tvb, const int offset, const unsigned encoding); +int64_t tvb_get_gint40(tvbuff_t *tvb, const int offset, const unsigned encoding); + +48-bit unsigned and signed integers: + +uint64_t tvb_get_guint48(tvbuff_t *tvb, const int offset, const unsigned encoding); +int64_t tvb_get_gint48(tvbuff_t *tvb, const int offset, const unsigned encoding); + +56-bit unsigned and signed integers: + +uint64_t tvb_get_guint56(tvbuff_t *tvb, const int offset, const unsigned encoding); +int64_t tvb_get_gint56(tvbuff_t *tvb, const int offset, const unsigned encoding); + +64-bit unsigned (uint64_t) and signed (int64_t) integers: + +uint64_t tvb_get_guint64(tvbuff_t *tvb, const int offset, const unsigned encoding); +int64_t tvb_get_gint64(tvbuff_t *tvb, const int offset, const unsigned encoding); + +Single-precision and double-precision IEEE floating-point numbers: + +float tvb_get_ieee_float(tvbuff_t *tvb, const int offset, const unsigned encoding); +double tvb_get_ieee_double(tvbuff_t *tvb, const int offset, const unsigned encoding); + +"encoding" should be ENC_BIG_ENDIAN for Network-to-host-order, +ENC_LITTLE_ENDIAN for Little-Endian-to-host-order, or ENC_HOST_ENDIAN +for host order. + +Accessors for IPv4 and IPv6 addresses: + +uint32_t tvb_get_ipv4(tvbuff_t *tvb, const int offset); +void tvb_get_ipv6(tvbuff_t *tvb, const int offset, ws_in6_addr *addr); + +NOTE: IPv4 addresses are not to be converted to host byte order before +being passed to "proto_tree_add_ipv4()". You should use "tvb_get_ipv4()" +to fetch them, not "tvb_get_ntohl()" *OR* "tvb_get_letohl()" - don't, +for example, try to use "tvb_get_ntohl()", find that it gives you the +wrong answer on the PC on which you're doing development, and try +"tvb_get_letohl()" instead, as "tvb_get_letohl()" will give the wrong +answer on big-endian machines. + +char *tvb_ip_to_str(wmem_allocator_t *scope, tvbuff_t *tvb, const int offset) +char *tvb_ip6_to_str(wmem_allocator_t *scope, tvbuff_t *tvb, const int offset) + +Returns a null-terminated buffer containing a string with IPv4 or IPv6 Address +from the specified tvbuff, starting at the specified offset. + +Accessors for GUID: + +void tvb_get_ntohguid(tvbuff_t *tvb, const int offset, e_guid_t *guid); +void tvb_get_letohguid(tvbuff_t *tvb, const int offset, e_guid_t *guid); +void tvb_get_guid(tvbuff_t *tvb, const int offset, e_guid_t *guid, const unsigned encoding); + +String accessors: + +uint8_t *tvb_get_string_enc(wmem_allocator_t *scope, tvbuff_t *tvb, const int offset, const int length, const unsigned encoding); + +Returns a null-terminated buffer allocated from the specified scope, containing +data from the specified tvbuff, starting at the specified offset, and containing +the specified length worth of characters. Reads data in the specified encoding +and produces UTF-8 in the buffer. See below for a list of input encoding values. + +The buffer is allocated in the given wmem scope (see README.wmem for more +information). + +uint8_t *tvb_get_stringz_enc(wmem_allocator_t *scope, tvbuff_t *tvb, const int offset, int *lengthp, const unsigned encoding); + +Returns a null-terminated buffer allocated from the specified scope, +containing data from the specified tvbuff, starting at the specified +offset, and containing all characters from the tvbuff up to and +including a terminating null character in the tvbuff. Reads data in the +specified encoding and produces UTF-8 in the buffer. See below for a +list of input encoding values. "*lengthp" will be set to the length of +the string, including the terminating null. + +The buffer is allocated in the given wmem scope (see README.wmem for more +information). + +int tvb_get_nstringz(tvbuff_t *tvb, const int offset, const unsigned bufsize, uint8_t* buffer); +int tvb_get_nstringz0(tvbuff_t *tvb, const int offset, const unsigned bufsize, uint8_t* buffer); + +Copies bufsize bytes, including the terminating NULL, to buffer. If a NULL +terminator is found before reaching bufsize, only the bytes up to and including +the NULL are copied. Returns the number of bytes copied (not including +terminating NULL), or -1 if the string was truncated in the buffer due to +not having reached the terminating NULL. In this case, the resulting +buffer is not NULL-terminated. +tvb_get_nstringz0() works like tvb_get_nstringz(), but never returns -1 since +the string is guaranteed to have a terminating NULL. If the string was truncated +when copied into buffer, a NULL is placed at the end of buffer to terminate it. + +char *tvb_get_ts_23_038_7bits_string(wmem_allocator_t *scope, tvbuff_t *tvb, + const int bit_offset, int no_of_chars); + +tvb_get_ts_23_038_7bits_string() returns a string of a given number of +characters and encoded according to 3GPP TS 23.038 7 bits alphabet. + +The buffer is allocated in the given wmem scope (see README.wmem for more +information). + +Byte Array Accessors: + +char *tvb_bytes_to_str(wmem_allocator_t *scope, tvbuff_t *tvb, const int offset, const int len); + +Formats a bunch of data from a tvbuff as bytes, returning a pointer +to the string with the data formatted as two hex digits for each byte. +The string pointed to is stored in an "wmem_alloc'd" buffer which will be freed +depending on its scope (typically wmem_packet_scope which is freed after the frame). +The formatted string will contain the hex digits for at most the first 16 bytes of +the data. If len is greater than 16 bytes, a trailing "..." will be added to the string. + +char *tvb_bytes_to_str_punct(wmem_allocator_t *scope, tvbuff_t *tvb, + const int offset, const int len, const char punct); + +This function is similar to tvb_bytes_to_str(...) except that 'punct' is inserted +between the hex representation of each byte. + +GByteArray *tvb_get_string_bytes(tvbuff_t *tvb, const int offset, const int length, + const unsigned encoding, GByteArray* bytes, int *endoff) + +Given a tvbuff, an offset into the tvbuff, and a length that starts +at that offset (which may be -1 for "all the way to the end of the +tvbuff"), fetch the hex-decoded byte values of the tvbuff into the +passed-in 'bytes' array, based on the passed-in encoding. In other +words, convert from a hex-ascii string in tvbuff, into the supplied +GByteArray. + +char *tvb_bcd_dig_to_wmem_packet_str(tvbuff_t *tvb, const int offset, const int len, dgt_set_t *dgt, bool skip_first); + +Given a tvbuff, an offset into the tvbuff, and a length that starts +at that offset (which may be -1 for "all the way to the end of the +tvbuff"), fetch BCD encoded digits from a tvbuff starting from either +the low or high half byte, formatting the digits according to an input digit set, +if NUll a default digit set of 0-9 returning "?" for overdecadic digits will be used. +A pointer to the packet scope allocated string will be returned. +Note: a tvbuff content of 0xf is considered a 'filler' and will end the conversion. + +Copying memory: +void* tvb_memcpy(tvbuff_t *tvb, void* target, const int offset, size_t length); + +Copies into the specified target the specified length's worth of data +from the specified tvbuff, starting at the specified offset. + +void *tvb_memdup(wmem_allocator_t *scope, tvbuff_t *tvb, const int offset, size_t length); + +Returns a buffer containing a copy of the given TVB bytes. The buffer is +allocated in the given wmem scope (see README.wmem for more information). + +Pointer-retrieval: +/* WARNING! Don't use this function. There is almost always a better way. + * It's dangerous because once this pointer is given to the user, there's + * no guarantee that the user will honor the 'length' and not overstep the + * boundaries of the buffer. Also see the warning in the Portability section. + */ +const uint8_t* tvb_get_ptr(tvbuff_t *tvb, const int offset, const int length); + +Length query: +Get amount of captured data in the buffer (which is *NOT* necessarily the +length of the packet). You probably want tvb_reported_length instead: + + unsigned tvb_captured_length(const tvbuff_t *tvb); + +Get reported length of buffer: + + unsigned tvb_reported_length(const tvbuff_t *tvb); + + +1.4 Functions to handle columns in the traffic summary window. + +The topmost pane of the main window is a list of the packets in the +capture, possibly filtered by a display filter. + +Each line corresponds to a packet, and has one or more columns, as +configured by the user. + +Many of the columns are handled by code outside individual dissectors; +most dissectors need only specify the value to put in the "Protocol" and +"Info" columns. + +Columns are specified by COL_ values; the COL_ value for the "Protocol" +field, typically giving an abbreviated name for the protocol (but not +the all-lower-case abbreviation used elsewhere) is COL_PROTOCOL, and the +COL_ value for the "Info" field, giving a summary of the contents of the +packet for that protocol, is COL_INFO. + +The value for a column can be specified with one of several functions, +all of which take the 'fd' argument to the dissector as their first +argument, and the COL_ value for the column as their second argument. + +1.4.1 The col_set_str function. + +'col_set_str' takes a string as its third argument, and sets the value +for the column to that value. It assumes that the pointer passed to it +points to a string constant or a static "const" array, not to a +variable, as it doesn't copy the string, it merely saves the pointer +value; the argument can itself be a variable, as long as it always +points to a string constant or a static "const" array. + +It is more efficient than 'col_add_str' or 'col_add_fstr'; however, if +the dissector will be using 'col_append_str' or 'col_append_fstr" to +append more information to the column, the string will have to be copied +anyway, so it's best to use 'col_add_str' rather than 'col_set_str' in +that case. + +For example, to set the "Protocol" column +to "PROTOFILTERNAME": + + col_set_str(pinfo->cinfo, COL_PROTOCOL, "PROTOFILTERNAME"); + + +1.4.2 The col_add_str function. + +'col_add_str' takes a string as its third argument, and sets the value +for the column to that value. It takes the same arguments as +'col_set_str', but copies the string, so that if the string is, for +example, an automatic variable that won't remain in scope when the +dissector returns, it's safe to use. + + +1.4.3 The col_add_fstr function. + +'col_add_fstr' takes a 'printf'-style format string as its third +argument, and 'printf'-style arguments corresponding to '%' format +items in that string as its subsequent arguments. For example, to set +the "Info" field to " request, bytes", where "reqtype" is a +string containing the type of the request in the packet and "n" is an +unsigned integer containing the number of bytes in the request: + + col_add_fstr(pinfo->cinfo, COL_INFO, "%s request, %u bytes", + reqtype, n); + +Don't use 'col_add_fstr' with a format argument of just "%s" - +'col_add_str', or possibly even 'col_set_str' if the string that matches +the "%s" is a static constant string, will do the same job more +efficiently. + + +1.4.4 The col_clear function. + +If the Info column will be filled with information from the packet, that +means that some data will be fetched from the packet before the Info +column is filled in. If the packet is so small that the data in +question cannot be fetched, the routines to fetch the data will throw an +exception (see the comment at the beginning about tvbuffers improving +the handling of short packets - the tvbuffers keep track of how much +data is in the packet, and throw an exception on an attempt to fetch +data past the end of the packet, so that the dissector won't process +bogus data), causing the Info column not to be filled in. + +This means that the Info column will have data for the previous +protocol, which would be confusing if, for example, the Protocol column +had data for this protocol. + +Therefore, before a dissector fetches any data whatsoever from the +packet (unless it's a heuristic dissector fetching data to determine +whether the packet is one that it should dissect, in which case it +should check, before fetching the data, whether there's any data to +fetch; if there isn't, it should return false), it should set the +Protocol column and the Info column. + +If the Protocol column will ultimately be set to, for example, a value +containing a protocol version number, with the version number being a +field in the packet, the dissector should, before fetching the version +number field or any other field from the packet, set it to a value +without a version number, using 'col_set_str', and should later set it +to a value with the version number after it's fetched the version +number. + +If the Info column will ultimately be set to a value containing +information from the packet, the dissector should, before fetching any +fields from the packet, clear the column using 'col_clear' (which is +more efficient than clearing it by calling 'col_set_str' or +'col_add_str' with a null string), and should later set it to the real +string after it's fetched the data to use when doing that. + + +1.4.5 The col_append_str function. + +Sometimes the value of a column, especially the "Info" column, can't be +conveniently constructed at a single point in the dissection process; +for example, it might contain small bits of information from many of the +fields in the packet. 'col_append_str' takes, as arguments, the same +arguments as 'col_add_str', but the string is appended to the end of the +current value for the column, rather than replacing the value for that +column. (Note that no blank separates the appended string from the +string to which it is appended; if you want a blank there, you must add +it yourself as part of the string being appended.) + + +1.4.6 The col_append_fstr function. + +'col_append_fstr' is to 'col_add_fstr' as 'col_append_str' is to +'col_add_str' - it takes, as arguments, the same arguments as +'col_add_fstr', but the formatted string is appended to the end of the +current value for the column, rather than replacing the value for that +column. + +1.4.7 The col_append_sep_str and col_append_sep_fstr functions. + +In specific situations the developer knows that a column's value will be +created in a stepwise manner, where the appended values are listed. Both +'col_append_sep_str' and 'col_append_sep_fstr' functions will add an item +separator between two consecutive items, and will not add the separator at the +beginning of the column. The remainder of the work both functions do is +identical to what 'col_append_str' and 'col_append_fstr' do. + +1.4.8 The col_set_fence and col_prepend_fence_fstr functions. + +Sometimes a dissector may be called multiple times for different PDUs in the +same frame (for example in the case of SCTP chunk bundling: several upper +layer data packets may be contained in one SCTP packet). If the upper layer +dissector calls 'col_set_str()' or 'col_clear()' on the Info column when it +begins dissecting each of those PDUs then when the frame is fully dissected +the Info column would contain only the string from the last PDU in the frame. +The 'col_set_fence' function erects a "fence" in the column that prevents +subsequent 'col_...' calls from clearing the data currently in that column. +For example, the SCTP dissector calls 'col_set_fence' on the Info column +after it has called any subdissectors for that chunk so that subdissectors +of any subsequent chunks may only append to the Info column. +'col_prepend_fence_fstr' prepends data before a fence (moving it if +necessary). It will create a fence at the end of the prepended data if the +fence does not already exist. + + +1.4.9 The col_set_time function. + +The 'col_set_time' function takes an nstime value as its third argument. +This nstime value is a relative value and will be added as such to the +column. The fourth argument is the filtername holding this value. This +way, rightclicking on the column makes it possible to build a filter +based on the time-value. + +For example: + + col_set_time(pinfo->cinfo, COL_REL_TIME, &ts, "s4607.ploc.time"); + + +1.5 Constructing the protocol tree. + +The middle pane of the main window, and the topmost pane of a packet +popup window, are constructed from the "protocol tree" for a packet. + +The protocol tree, or proto_tree, is a GNode, the N-way tree structure +available within GLIB. Of course the protocol dissectors don't care +what a proto_tree really is; they just pass the proto_tree pointer as an +argument to the routines which allow them to add items and new branches +to the tree. + +When a packet is selected in the packet-list pane, or a packet popup +window is created, a new logical protocol tree (proto_tree) is created. +The pointer to the proto_tree (in this case, 'protocol tree'), is passed +to the top-level protocol dissector, and then to all subsequent protocol +dissectors for that packet, and then the GUI tree is drawn via +proto_tree_draw(). + +The logical proto_tree needs to know detailed information about the protocols +and fields about which information will be collected from the dissection +routines. By strictly defining (or "typing") the data that can be attached to a +proto tree, searching and filtering becomes possible. This means that for +every protocol and field (which I also call "header fields", since they are +fields in the protocol headers) which might be attached to a tree, some +information is needed. + +Every dissector routine will need to register its protocols and fields +with the central protocol routines (in proto.c). At first I thought I +might keep all the protocol and field information about all the +dissectors in one file, but decentralization seemed like a better idea. +That one file would have gotten very large; one small change would have +required a re-compilation of the entire file. Also, by allowing +registration of protocols and fields at run-time, loadable modules of +protocol dissectors (perhaps even user-supplied) is feasible. + +To do this, each protocol should have a register routine, which will be +called when Wireshark starts. The code to call the register routines is +generated automatically; to arrange that a protocol's register routine +be called at startup: + + the file containing a dissector's "register" routine must be + added to "DISSECTOR_SRC" in "epan/dissectors/CMakeLists.txt"; + + the "register" routine must have a name of the form + "proto_register_XXX"; + + the "register" routine must take no argument, and return no + value; + + the "register" routine's name must appear in the source file + either at the beginning of the line, or preceded only by "void " + at the beginning of the line (that would typically be the + definition) - other white space shouldn't cause a problem, e.g.: + +void proto_register_XXX(void) { + + ... + +} + +and + +void +proto_register_XXX( void ) +{ + + ... + +} + + and so on should work. + +For every protocol or field that a dissector wants to register, a variable of +type int needs to be used to keep track of the protocol. The IDs are +needed for establishing parent/child relationships between protocols and +fields, as well as associating data with a particular field so that it +can be stored in the logical tree and displayed in the GUI protocol +tree. + +Some dissectors will need to create branches within their tree to help +organize header fields. These branches should be registered as header +fields. Only true protocols should be registered as protocols. This is +so that a display filter user interface knows how to distinguish +protocols from fields. + +A protocol is registered with the name of the protocol and its +abbreviation. + +Here is how the frame "protocol" is registered. + + int proto_frame; + + proto_frame = proto_register_protocol ( + /* name */ "Frame", + /* short name */ "Frame", + /* abbrev */ "frame" ); + +A header field is also registered with its name and abbreviation, but +information about its data type is needed. It helps to look at +the header_field_info struct to see what information is expected: + +struct header_field_info { + const char *name; + const char *abbrev; + enum ftenum type; + int display; + const void *strings; + uint64_t bitmask; + const char *blurb; + ..... +}; + +name (FIELDNAME) +---------------- +A string representing the name of the field. This is the name +that will appear in the graphical protocol tree. It must be a non-empty +string. + +abbrev (FIELDFILTERNAME) +-------------------- +A string with a filter name for the field. The name should start +with the filter name of the parent protocol followed by a period as a +separator. For example, the "src" field in an IP packet would have "ip.src" +as a filter name. It is acceptable to have multiple levels of periods if, +for example, you have fields in your protocol that are then subdivided into +subfields. For example, TRMAC has multiple error fields, so the names +follow this pattern: "trmac.errors.iso", "trmac.errors.noniso", etc. +It must be a non-empty string. + +type (FIELDTYPE) +---------------- +The type of value this field holds. The current field types are: + + FT_NONE No field type. Used for fields that + aren't given a value, and that can only + be tested for presence or absence; a + field that represents a data structure, + with a subtree below it containing + fields for the members of the structure, + or that represents an array with a + subtree below it containing fields for + the members of the array, might be an + FT_NONE field. + FT_PROTOCOL Used for protocols which will be placing + themselves as top-level items in the + "Packet Details" pane of the UI. + FT_BOOLEAN 0 means "false", any other value means + "true". + FT_FRAMENUM A frame number; if this is used, the "Go + To Corresponding Frame" menu item can + work on that field. + FT_CHAR An 8-bit ASCII character. It's treated similarly to an + FT_UINT8, but is displayed as a C-style character + constant. + FT_UINT8 An 8-bit unsigned integer. + FT_UINT16 A 16-bit unsigned integer. + FT_UINT24 A 24-bit unsigned integer. + FT_UINT32 A 32-bit unsigned integer. + FT_UINT40 A 40-bit unsigned integer. + FT_UINT48 A 48-bit unsigned integer. + FT_UINT56 A 56-bit unsigned integer. + FT_UINT64 A 64-bit unsigned integer. + FT_INT8 An 8-bit signed integer. + FT_INT16 A 16-bit signed integer. + FT_INT24 A 24-bit signed integer. + FT_INT32 A 32-bit signed integer. + FT_INT40 A 40-bit signed integer. + FT_INT48 A 48-bit signed integer. + FT_INT56 A 56-bit signed integer. + FT_INT64 A 64-bit signed integer. + FT_IEEE_11073_SFLOAT A 16-bit floating point number, consisting + of an 4-bit exponent and 12-bit mantissa. + FT_IEEE_11073_FLOAT A 32-bit floating point number, consisting + of an 8-bit exponent and 24-bit mantissa. + FT_FLOAT A single-precision floating point number. + FT_DOUBLE A double-precision floating point number. + FT_ABSOLUTE_TIME An absolute time from some fixed point in time, + displayed as the date, followed by the time, as + hours, minutes, and seconds with 9 digits after + the decimal point. + FT_RELATIVE_TIME Seconds (4 bytes) and nanoseconds (4 bytes) + of time relative to an arbitrary time. + displayed as seconds and 9 digits + after the decimal point. + FT_STRING A string of characters, not necessarily + NULL-terminated, but possibly NULL-padded. + This, and the other string-of-characters + types, are to be used for text strings, + not raw binary data. + FT_STRINGZ A NULL-terminated string of characters. + The string length is normally the length + given in the proto_tree_add_item() call. + However if the length given in the call + is -1, then the length used is that + returned by calling tvb_strsize(). + This should only be used if the string, + in the packet, is always terminated with + a NULL character, either because the length + isn't otherwise specified or because a + character count *and* a NULL terminator are + both used. + FT_STRINGZPAD A NULL-padded string of characters. + The length is given in the proto_tree_add_item() + call, but may be larger than the length of + the string, with extra bytes being NULL padding. + This is typically used for fixed-length fields + that contain a string value that might be shorter + than the fixed length. + FT_STRINGZTRUNC A NULL-truncated string of characters. + The length is given in the proto_tree_add_item() + call, but may be larger than the length of + the string, with a NULL character after the last + character of the string, and the remaining bytes + being padding with unspecified contents. This is + typically used for fixed-length fields that contain + a string value that might be shorter than the fixed + length. + FT_UINT_STRING A counted string of characters, consisting + of a count (represented as an integral value, + of width given in the proto_tree_add_item() + call) followed immediately by that number of + characters. + FT_ETHER A six octet string displayed in + Ethernet-address format. + FT_BYTES A string of bytes with arbitrary values; + used for raw binary data. + FT_UINT_BYTES A counted string of bytes, consisting + of a count (represented as an integral value, + of width given in the proto_tree_add_item() + call) followed immediately by that number of + arbitrary values; used for raw binary data. + FT_IPv4 A version 4 IP address (4 bytes) displayed + in dotted-quad IP address format (4 + decimal numbers separated by dots). + FT_IPv6 A version 6 IP address (16 bytes) displayed + in standard IPv6 address format. + FT_IPXNET An IPX address displayed in hex as a 6-byte + network number followed by a 6-byte station + address. + FT_GUID A Globally Unique Identifier + FT_OID An ASN.1 Object Identifier + FT_REL_OID An ASN.1 Relative Object Identifier + FT_EUI64 A EUI-64 Address + FT_AX25 A AX-25 Address + FT_VINES A Vines Address + FT_SYSTEM_ID An OSI System-ID + FT_FCWWN A Fibre Channel WWN Address + +Some of these field types are still not handled in the display filter +routines, but the most common ones are. The FT_UINT* variables all +represent unsigned integers, and the FT_INT* variables all represent +signed integers; the number on the end represent how many bits are used +to represent the number. + +Some constraints are imposed on the header fields depending on the type +(e.g. FT_BYTES) of the field. Fields of type FT_ABSOLUTE_TIME must use +'ABSOLUTE_TIME_{LOCAL,UTC,DOY_UTC}, NULL, 0x0' as values for the +'display, 'strings', and 'bitmask' fields, and all other non-integral +types (i.e.. types that are _not_ FT_INT* and FT_UINT*) must use +'BASE_NONE, NULL, 0x0' as values for the 'display', 'strings', 'bitmask' +fields. The reason is simply that the type itself implicitly defines the +nature of 'display', 'strings', 'bitmask'. + +display (FIELDDISPLAY) +---------------------- +The display field has a couple of overloaded uses. This is unfortunate, +but since we're using C as an application programming language, this sometimes +makes for cleaner programs. Right now I still think that overloading +this variable was okay. + +For integer fields (FT_UINT* and FT_INT*), this variable represents the +base in which you would like the value displayed. The acceptable bases +are: + + BASE_DEC, + BASE_HEX, + BASE_OCT, + BASE_DEC_HEX, + BASE_HEX_DEC, + BASE_CUSTOM + +BASE_DEC, BASE_HEX, and BASE_OCT are decimal, hexadecimal, and octal, +respectively. BASE_DEC_HEX and BASE_HEX_DEC display value in two bases +(the 1st representation followed by the 2nd in parenthesis). + +BASE_CUSTOM allows one to specify a callback function pointer that will +format the value. + +For 32-bit and smaller values, custom_fmt_func_t can be used to declare +the callback function pointer. Specifically, this is defined as: + + void func(char *, uint32_t); + +For values larger than 32-bits, custom_fmt_func_64_t can be used to declare +the callback function pointer. Specifically, this is defined as: + + void func(char *, uint64_t); + +The first argument is a pointer to a buffer of the ITEM_LABEL_LENGTH size +and the second argument is the value to be formatted. + +Both custom_fmt_func_t and custom_fmt_func_64_t are defined in epan/proto.h. + +For FT_UINT16 'display' can be used to select a transport layer protocol using one +of BASE_PT_UDP, BASE_PT_TCP, BASE_PT_DCCP or BASE_PT_SCTP. If transport name +resolution is enabled the port field label is displayed in decimal and as a well-known +service name (if one is available). + +For FT_BOOLEAN fields that are also bitfields (i.e., 'bitmask' is non-zero), +'display' is used specify a "field-width" (i.e., tell the proto_tree how +wide the parent bitfield is). (If the FT_BOOLEAN 'bitmask' is zero, then +'display' must be BASE_NONE). + +For integer fields a "field-width" is not needed since the type of +integer itself (FT_UINT8, FT_UINT16, FT_UINT24, FT_UINT32, FT_UINT40, +FT_UINT48, FT_UINT56, FT_UINT64, etc) tells the proto_tree how wide the +parent bitfield is. The same is true of FT_CHAR, as it's an 8-bit +character. + +For FT_ABSOLUTE_TIME fields, 'display' is used to indicate whether the +time is to be displayed as a time in the time zone for the machine on +which Wireshark/TShark is running or as UTC and, for UTC, whether the +date should be displayed as "{monthname} {day_of_month}, {year}" or as +"{year/day_of_year}". + +Additionally, BASE_NONE is used for 'display' as a NULL-value. That is, for +non-integers other than FT_ABSOLUTE_TIME fields, and non-bitfield +FT_BOOLEANs, you'll want to use BASE_NONE in the 'display' field. You may +not use BASE_NONE for integers. + +It is possible that in the future we will record the endianness of +integers. If so, it is likely that we'll use a bitmask on the display field +so that integers would be represented as BEND|BASE_DEC or LEND|BASE_HEX. +But that has not happened yet; note that there are protocols for which +no endianness is specified, such as the X11 protocol and the DCE RPC +protocol, so it would not be possible to record the endianness of all +integral fields. + +strings (FIELDCONVERT) +---------------------- +-- value_string +Some integer fields, of type FT_UINT*, need labels to represent the true +value of a field. You could think of those fields as having an +enumerated data type, rather than an integral data type. + +A 'value_string' structure is a way to map values to strings. + + typedef struct _value_string { + uint32_t value; + char *strptr; + } value_string; + +For fields of that type, you would declare an array of "value_string"s: + + static const value_string valstringname[] = { + { INTVAL1, "Descriptive String 1" }, + { INTVAL2, "Descriptive String 2" }, + { 0, NULL } + }; + +(the last entry in the array must have a NULL 'strptr' value, to +indicate the end of the array). The 'strings' field would be set to +'VALS(valstringname)'. + +If the field has a numeric rather than an enumerated type, the 'strings' +field would be set to NULL. + +If BASE_SPECIAL_VALS is also applied to the display bitmask, then if the +numeric value of a field doesn't match any values in the value_string +then just the numeric value is displayed (i.e. no "Unknown"). This is +intended for use when the value_string only gives special names for +certain field values and values not in the value_string are expected. + +-- Extended value strings +You can also use an extended version of the value_string for faster lookups. +It requires a value_string array as input. +If all of a contiguous range of values from min to max are present in the array +in ascending order the value will be used as a direct index into a value_string array. + +If the values in the array are not contiguous (ie: there are "gaps"), but are +in ascending order a binary search will be used. + +Note: "gaps" in a value_string array can be filled with "empty" entries eg: +{value, "Unknown"} so that direct access to the array is possible. + +Note: the value_string array values are *unsigned*; IOW: -1 is greater than 0. + So: + { -2, -1, 1, 2 }; wrong: linear search will be used (note gap) + { 1, 2, -2, -1 }; correct: binary search will be used + + As a special case: + { -2, -1, 0, 1, 2 }; OK: direct(indexed) access will be used (note no gap) + +The init macro (see below) will perform a check on the value string the first +time it is used to determine which search algorithm fits and fall back to a +linear search if the value_string does not meet the criteria above. + +Use this macro to initialize the extended value_string at compile time: + +static value_string_ext valstringname_ext = VALUE_STRING_EXT_INIT(valstringname); + +Extended value strings can be created at run time by calling + value_string_ext_new(, + , /* include {0, NULL} entry */ + ); + +For hf[] array FT_(U)INT* fields that need a 'valstringname_ext' struct, the +'strings' field would be set to '&valstringname_ext'. Furthermore, the 'display' +field must be ORed with 'BASE_EXT_STRING' (e.g. BASE_DEC|BASE_EXT_STRING). + +-- val64_string + +val64_strings are like value_strings, except that the integer type +used is a uint64_t (instead of uint32_t). Instead of using the VALS() +macro for the 'strings' field in the header_field_info struct array, +'VALS64()' is used. + +BASE_SPECIAL_VALS can also be used for val64_string. + +-- val64_string_ext + +val64_string_ext is like value_string_ext, except that the integer type +used is a uint64_t (instead of uint32_t). + +Use this macro to initialize the extended val64_string at compile time: + +static val64_string_ext val64stringname_ext = VAL64_STRING_EXT_INIT(val64stringname); + +Extended val64 strings can be created at run time by calling + val64_string_ext_new(, + , /* include {0, NULL} entry */ + ); + +For hf[] array FT_(U)INT* fields that need a 'val64stringname_ext' struct, the +'strings' field would be set to '&val64stringname_ext'. Furthermore, the 'display' +field must be ORed with both 'BASE_EXT_STRING' and 'BASE_VAL64_STRING' +(e.g. BASE_DEC|BASE_EXT_STRING|BASE_VAL64_STRING). + +-- Unit string +Some integer fields, of type FT_UINT* and float fields, of type FT_FLOAT +or FT_DOUBLE, need units of measurement to help convey the field value. + +A 'unit_name_string' structure is a way to add a unit suffix to a field. + + typedef struct unit_name_string { + char *singular; /* name to use for 1 unit */ + char *plural; /* name to use for < 1 or > 1 units */ + } unit_name_string; + +For fields with that unit name, you would declare a "unit_name_string": + + static const unit_name_string unitname[] = + { "single item name" , "multiple item name" }; + +(the second entry can be NULL if there is no plural form of the unit name. +This is typically the case when abbreviations are used instead of full words.) + +For hf[] array FT_(U)INT*, FT_FLOAT and FT_DOUBLE fields that need a +'unit_name_string' struct, the 'strings' field would be set to +'&unitname'. Furthermore, the 'display' field must be ORed +with 'BASE_UNIT_STRING' (e.g. BASE_DEC|BASE_UNIT_STRING). + +There are several "common" unit name structures already defined in +epan/unit_strings.h, e.g. 'units_second_seconds'. Dissector authors may choose +to add the unit name structure there rather than locally in a dissector. + +-- Ranges +If the field has a numeric type that might logically fit in ranges of values +one can use a range_string struct. + +Thus a 'range_string' structure is a way to map ranges to strings. + + typedef struct _range_string { + uint32_t value_min; + uint32_t value_max; + const char *strptr; + } range_string; + +For fields of that type, you would declare an array of "range_string"s: + + static const range_string rvalstringname[] = { + { INTVAL_MIN1, INTVALMAX1, "Descriptive String 1" }, + { INTVAL_MIN2, INTVALMAX2, "Descriptive String 2" }, + { 0, 0, NULL } + }; + +If INTVAL_MIN equals INTVAL_MAX for a given entry the range_string +behavior collapses to the one of value_string. Note that each range_string +within the array is tested in order, so any 'catch-all' entries need to come +after specific individual entries. + +For FT_(U)INT* fields that need a 'range_string' struct, the 'strings' field +would be set to 'RVALS(rvalstringname)'. Furthermore, 'display' field must be +ORed with 'BASE_RANGE_STRING' (e.g. BASE_DEC|BASE_RANGE_STRING). + +-- Booleans +FT_BOOLEANs have a default map of 0 = "False", 1 (or anything else) = "True". +Sometimes it is useful to change the labels for boolean values (e.g., +to "Yes"/"No", "Fast"/"Slow", etc.). For these mappings, a struct called +true_false_string is used. + + typedef struct true_false_string { + char *true_string; + char *false_string; + } true_false_string; + +For Boolean fields for which "False" and "True" aren't the desired +labels, you would declare a "true_false_string"s: + + static const true_false_string boolstringname = { + "String for True", + "String for False" + }; + +Its two fields are pointers to the string representing truth, and the +string representing falsehood. For FT_BOOLEAN fields that need a +'true_false_string' struct, the 'strings' field would be set to +'TFS(&boolstringname)'. + +If the Boolean field is to be displayed as "False" or "True", the +'strings' field would be set to NULL. + +Wireshark predefines a whole range of ready made "true_false_string"s +in tfs.h, included via packet.h. + +-- Custom +Custom fields (BASE_CUSTOM) should use CF_FUNC(&custom_format_func) for the +'strings' field. + +-- Frame numbers +FT_FRAMENUMs can use the 'strings' field to indicate their purpose by +setting the field to 'FRAMENUM_TYPE(x)', where x is one of the values of +the ft_framenum_type enum: + + FT_FRAMENUM_NONE + FT_FRAMENUM_REQUEST + FT_FRAMENUM_RESPONSE + FT_FRAMENUM_ACK + FT_FRAMENUM_DUP_ACK + FT_FRAMENUM_RETRANS_PREV + FT_FRAMENUM_RETRANS_NEXT + +The packet list uses the value to determine the related packet symbol to draw. +Note that 'strings' field NULL is equal to FRAMENUM_TYPE(FT_FRAMENUM_NONE). + +-- Note to plugin authors +Data cannot get exported from DLLs. For this reason plugin authors cannot use +existing fieldconvert strings (e.g. from existing dissectors or those from +epan/unit_strings.h). Plugins must define value_strings, unit_name_strings, +range_strings and true_false_strings locally. + +bitmask (BITMASK) +----------------- +If the field is a bitfield, then the bitmask is the mask which will +leave only the bits needed to make the field when ANDed with a value. +The proto_tree routines will calculate 'bitshift' automatically +from 'bitmask', by finding the rightmost set bit in the bitmask. +This shift is applied before applying string mapping functions or +filtering. + +If the field is not a bitfield, then bitmask should be set to 0. + +blurb (FIELDDESCR) +------------------ +This is a string giving a proper description of the field. It should be +at least one grammatically complete sentence, or NULL in which case the +name field is used. (Please do not use ""). + +It is meant to provide a more detailed description of the field than the +name alone provides. This information will be used in the man page, and +in a future GUI display-filter creation tool. We might also add tooltips +to the labels in the GUI protocol tree, in which case the blurb would +be used as the tooltip text. + + +1.5.1 Field Registration. + +Protocol registration is handled by creating an instance of the +header_field_info struct (or an array of such structs), and +calling the registration function along with the registration ID of +the protocol that is the parent of the fields. Here is a complete example: + + static int proto_eg = -1; + static int hf_field_a = -1; + static int hf_field_b = -1; + + static hf_register_info hf[] = { + + { &hf_field_a, + { "Field A", "proto.field_a", FT_UINT8, BASE_HEX, NULL, + 0xf0, "Field A represents Apples", HFILL }}, + + { &hf_field_b, + { "Field B", "proto.field_b", FT_UINT16, BASE_DEC, VALS(vs), + 0x0, "Field B represents Bananas", HFILL }} + }; + + proto_eg = proto_register_protocol("Example Protocol", + "PROTO", "proto"); + proto_register_field_array(proto_eg, hf, array_length(hf)); + +Be sure that your array of hf_register_info structs is declared 'static', +since the proto_register_field_array() function does not create a copy +of the information in the array... it uses that static copy of the +information that the compiler created inside your array. Here's the +layout of the hf_register_info struct: + +typedef struct hf_register_info { + int *p_id; /* pointer to parent variable */ + header_field_info hfinfo; +} hf_register_info; + +Also be sure to use the handy array_length() macro found in packet.h +to have the compiler compute the array length for you at compile time. + +If you don't have any fields to register, do *NOT* create a zero-length +"hf" array; not all compilers used to compile Wireshark support them. +Just omit the "hf" array, and the "proto_register_field_array()" call, +entirely. + +It is OK to have header fields with a different format be registered with +the same abbreviation. For instance, the following is valid: + + static hf_register_info hf[] = { + + { &hf_field_8bit, /* 8-bit version of proto.field */ + { "Field (8 bit)", "proto.field", FT_UINT8, BASE_DEC, NULL, + 0x00, "Field represents FOO", HFILL }}, + + { &hf_field_32bit, /* 32-bit version of proto.field */ + { "Field (32 bit)", "proto.field", FT_UINT32, BASE_DEC, NULL, + 0x00, "Field represents FOO", HFILL }} + }; + +This way a filter expression can match a header field, irrespective of the +representation of it in the specific protocol context. This is interesting +for protocols with variable-width header fields. + +Note that the formats used must all belong to the same group as defined below: +- FT_INT8, FT_INT16, FT_INT24 and FT_INT32 +- FT_CHAR, FT_UINT8, FT_UINT16, FT_UINT24, FT_UINT32, FT_IPXNET and FT_FRAMENUM +- FT_INT40, FT_INT48, FT_INT56 and FT_INT64 +- FT_UINT40, FT_UINT48, FT_UINT56, FT_UINT64 and FT_EUI64 +- FT_ABSOLUTE_TIME and FT_RELATIVE_TIME +- FT_STRING, FT_STRINGZ, FT_UINT_STRING, FT_STRINGZPAD, and FT_STRINGZTRUNC +- FT_FLOAT, FT_DOUBLE, FT_IEEE_11073_SFLOAT and FT_IEEE_11073_FLOAT +- FT_BYTES, FT_UINT_BYTES, FT_ETHER, FT_AX25, FT_VINES and FT_FCWWN +- FT_OID, FT_REL_OID and FT_SYSTEM_ID + +Any field not in a grouping above should *NOT* be used in duplicate field +abbreviations. The current code does not prevent it, but someday in the future +it might. + +The HFILL macro at the end of the struct will set reasonable default values +for internally used fields. + +1.5.2 Adding Items and Values to the Protocol Tree. + +A protocol item is added to an existing protocol tree with one of a +handful of proto_XXX_DO_YYY() functions. + +Subtrees can be made with the proto_item_add_subtree() function: + + item = proto_tree_add_item(....); + new_tree = proto_item_add_subtree(item, tree_type); + +This will add a subtree under the item in question; a subtree can be +created under an item made by any of the "proto_tree_add_XXX" functions, +so that the tree can be given an arbitrary depth. + +Subtree types are integers, assigned by +"proto_register_subtree_array()". To register subtree types, pass an +array of pointers to "gint" variables to hold the subtree type values to +"proto_register_subtree_array()": + + static int ett_eg = -1; + static int ett_field_a = -1; + + static int *ett[] = { + &ett_eg, + &ett_field_a + }; + + proto_register_subtree_array(ett, array_length(ett)); + +in your "register" routine, just as you register the protocol and the +fields for that protocol. + +The ett_ variables identify particular type of subtree so that if you expand +one of them, Wireshark keeps track of that and, when you click on +another packet, it automatically opens all subtrees of that type. +If you close one of them, all subtrees of that type will be closed when +you move to another packet. + +There are many functions that the programmer can use to add either +protocol or field labels to the proto_tree, for example: + + proto_item* + proto_tree_add_item(tree, id, tvb, start, length, encoding); + + proto_item* + proto_tree_add_item_ret_int(tree, id, tvb, start, length, encoding, + *retval); + + proto_item* + proto_tree_add_subtree(tree, tvb, start, length, idx, tree_item, + text); + + proto_item * + proto_tree_add_int_format_value(tree, id, tvb, start, length, + value, format, ...); + + proto_item * + proto_tree_add_checksum(proto_tree *tree, tvbuff_t *tvb, const unsigned offset, + const int hf_checksum, const int hf_checksum_status, + struct expert_field* bad_checksum_expert, packet_info *pinfo, + uint32_t computed_checksum, const unsigned encoding, const unsigned flags); + + proto_item * + proto_tree_add_bitmask(tree, tvb, start, header, ett, fields, + encoding); + + proto_item * + proto_tree_add_bits_item(tree, id, tvb, bit_offset, no_of_bits, + encoding); + +The 'tree' argument is the tree to which the item is to be added. The +'tvb' argument is the tvbuff from which the item's value is being +extracted; the 'start' argument is the offset from the beginning of that +tvbuff of the item being added, and the 'length' argument is the length, +in bytes, of the item, bit_offset is the offset in bits and no_of_bits +is the length in bits. + +The length of some items cannot be determined until the item has been +dissected; to add such an item, add it with a length of -1, and, when the +dissection is complete, set the length with 'proto_item_set_len()': + + void + proto_item_set_len(ti, length); + +The "ti" argument is the value returned by the call that added the item +to the tree, and the "length" argument is the length of the item. + +All available protocol tree functions are declared in epan/proto.h, with +their documentation. The details of these functions and their parameters +are described below. + +proto_tree_add_item() +--------------------- +proto_tree_add_item is used when you wish to do no special formatting. +The item added to the GUI tree will contain the name (as passed in the +proto_register_*() function) and a value. The value will be fetched +from the tvbuff by proto_tree_add_item(), based on the type of the field +and the encoding of the value as specified by the "encoding" argument. + +For FT_NONE, FT_BYTES, FT_ETHER, FT_IPv6, FT_IPXNET, FT_OID, FT_REL_OID, +FT_AX25, FT_VINES, FT_SYSTEM_ID, FT_FCWWN fields, and 'protocol' fields +the encoding is not relevant; the 'encoding' argument should be +ENC_NA (Not Applicable). + +For FT_UINT_BYTES fields, the byte order of the count must be specified +as well as the 'encoding' for bytes which should be ENC_NA, +i.e. ENC_LITTLE_ENDIAN|ENC_NA + +For integral, floating-point, Boolean, FT_GUID, and FT_EUI64 fields, +the encoding specifies the byte order of the value; the 'encoding' +argument should be ENC_LITTLE_ENDIAN if the value is little-endian +and ENC_BIG_ENDIAN if it is big-endian. + +For FT_IPv4 fields, the encoding also specifies the byte order of the +value. In almost all cases, the encoding is in network byte order, +hence big-endian, but in at least one protocol dissected by Wireshark, +at least one IPv4 address is byte-swapped, so it's in little-endian +order. + +For string fields, the encoding specifies the character set used for the +string and the way individual code points in that character set are +encoded. For FT_UINT_STRING fields, the byte order of the count must be +specified. For UTF-16, UCS-2, and UCS-4, the byte order of the encoding +must be specified, and optionally ENC_BOM can also be indicated to detect +an initial BYTE ORDER MARK (the specified value is used if the field does +not begin with a BOM.) For counted UTF-16, UCS-2, and UCS-4 strings, the +byte order of the count and the characters in the string must be the same, +unless a BOM overrides the value for the characters. In other cases the +string encoding has no endianness or the endianness is implicitly specified +and nothing should be used. The character encodings that are currently +supported are: + + ENC_ASCII - ASCII (currently treated as UTF-8; in the future, + all bytes with the 8th bit set will be treated as + errors) + ENC_UTF_8 - UTF-8-encoded Unicode + ENC_UTF_16 - UTF-16-encoded Unicode, with surrogate pairs + ENC_UCS_2 - UCS-2-encoded subset of Unicode, with no surrogate pairs + and thus no code points above 0xFFFF + ENC_UCS_4 - UCS-4-encoded Unicode (aka UTF-32) + ENC_WINDOWS_1250 - Windows-1250 code page + ENC_WINDOWS_1251 - Windows-1251 code page + ENC_WINDOWS_1252 - Windows-1252 code page + ENC_ISO_646_BASIC - ISO 646 "basic code table" + ENC_ISO_8859_1 - ISO 8859-1 + ENC_ISO_8859_2 - ISO 8859-2 + ENC_ISO_8859_3 - ISO 8859-3 + ENC_ISO_8859_4 - ISO 8859-4 + ENC_ISO_8859_5 - ISO 8859-5 + ENC_ISO_8859_6 - ISO 8859-6 + ENC_ISO_8859_7 - ISO 8859-7 + ENC_ISO_8859_8 - ISO 8859-8 + ENC_ISO_8859_9 - ISO 8859-9 + ENC_ISO_8859_10 - ISO 8859-10 + ENC_ISO_8859_11 - ISO 8859-11 + ENC_ISO_8859_13 - ISO 8859-13 + ENC_ISO_8859_14 - ISO 8859-14 + ENC_ISO_8859_15 - ISO 8859-15 + ENC_ISO_8859_16 - ISO 8859-16 + ENC_3GPP_TS_23_038_7BITS - GSM 7 bits alphabet as described + in 3GPP TS 23.038 + ENC_3GPP_TS_23_038_7BITS_UNPACKED - GSM 7 bits alphabet where each + 7 bit character occupies a distinct octet + ENC_ETSI_TS_102_221_ANNEX_A - Coding scheme for SIM cards with GSM 7 bit + alphabet, UCS-2 characters, or a mixture of the two as described + in ETSI TS 102 221 Annex A + ENC_EBCDIC - EBCDIC + ENC_EBCDIC_CP037 - EBCDIC code page 037 + ENC_EBCDIC_CP500 - EBCDIC code page 500 + ENC_MAC_ROMAN - MAC ROMAN + ENC_CP437 - DOS code page 437 + ENC_CP855 - DOS code page 855 + ENC_CP866 - DOS code page 866 + ENC_ASCII_7BITS - 7 bits ASCII + ENC_T61 - ITU T.61 + ENC_BCD_DIGITS_0_9 - packed BCD (one digit per nibble), digits 0-9 + ENC_KEYPAD_ABC_TBCD - keypad-with-a/b/c "telephony packed BCD" = 0-9, *, #, a, b, c + ENC_KEYPAD_BC_TBCD - keypad-with-B/C "telephony packed BCD" = 0-9, B, C, *, # + ENC_GB18030 - GB 18030 + ENC_EUC_KR - EUC-KR + ENC_DECT_STANDARD_8BITS - DECT standard 8 bit character set as defined in + ETSI EN 300 175-5 + ENC_DECT_STANDARD_4BITS_TBCD - DECT standard 4 bit character set "telephony + packet BCD" = 0-9, 0xb = SPACE + +Other encodings will be added in the future. + +For FT_ABSOLUTE_TIME fields, the encoding specifies the form in which +the time stamp is specified, as well as its byte order. The time stamp +encodings that are currently supported are: + + ENC_TIME_SECS_NSECS - 8, 12, or 16 bytes. For 8 bytes, the first 4 + bytes are seconds and the next 4 bytes are nanoseconds; for 12 + bytes, the first 8 bytes are seconds and the next 4 bytes are + nanoseconds; for 16 bytes, the first 8 bytes are seconds and + the next 8 bytes are nanoseconds. The seconds are seconds + since the UN*X epoch (1970-01-01 00:00:00 UTC). (I.e., a UN*X + struct timespec with a 4-byte or 8-byte time_t or a structure + with an 8-byte time_t and an 8-byte nanoseconds field.) + + ENC_TIME_NTP - 8 bytes; the first 4 bytes are seconds since the NTP + epoch (1900-01-01 00:00:00 GMT) and the next 4 bytes are 1/2^32's of + a second since that second. (I.e., a 64-bit count of 1/2^32's of a + second since the NTP epoch, with the upper 32 bits first and the + lower 32 bits second, even when little-endian.) + + ENC_TIME_TOD - 8 bytes, as a count of microseconds since the System/3x0 + and z/Architecture epoch (1900-01-01 00:00:00 GMT). + + ENC_TIME_RTPS - 8 bytes; the first 4 bytes are seconds since the UN*X + epoch and the next 4 bytes are 1/2^32's of a second since that + second. (I.e., it's the offspring of a mating between UN*X time and + NTP time). It's used by the Object Management Group's Real-Time + Publish-Subscribe Wire Protocol for the Data Distribution Service. + + ENC_TIME_SECS_USECS - 8 bytes; the first 4 bytes are seconds since the + UN*X epoch and the next 4 bytes are microseconds since that + second. (I.e., a UN*X struct timeval with a 4-byte time_t.) + + ENC_TIME_SECS - 4 to 8 bytes, representing a value in seconds since + the UN*X epoch. + + ENC_TIME_MSECS - 6 to 8 bytes, representing a value in milliseconds + since the UN*X epoch. + + ENC_TIME_USECS - 8 bytes, representing a value in microseconds since + the UN*X epoch. + + ENC_TIME_NSECS - 8 bytes, representing a value in nanoseconds since + the UN*X epoch. + + ENC_TIME_SECS_NTP - 4 bytes, representing a count of seconds since + the NTP epoch. + + ENC_TIME_RFC_3971 - 8 bytes, representing a count of 1/64ths of a + second since the UN*X epoch; see section 5.3.1 "Timestamp Option" + in RFC 3971. + + ENC_TIME_MSEC_NTP - 4-8 bytes, representing a count of milliseconds since + the NTP epoch. + + ENC_TIME_MIP6 - 8 bytes; the first 48 bits are seconds since the UN*X epoch + and the remaining 16 bits indicate the number of 1/65536's of a second + since that second. + + ENC_TIME_CLASSIC_MAC_OS_SECS - 4-8 bytes, representing a count of seconds + since January 1, 1904, 00:00:00 UTC. + +For FT_RELATIVE_TIME fields, the encoding specifies the form in which +the time stamp is specified, as well as its byte order. The time stamp +encodings that are currently supported are: + + ENC_TIME_SECS_NSECS - 8, 12, or 16 bytes. For 8 bytes, the first 4 + bytes are seconds and the next 4 bytes are nanoseconds; for 12 + bytes, the first 8 bytes are seconds and the next 4 bytes are + nanoseconds; for 16 bytes, the first 8 bytes are seconds and + the next 8 bytes are nanoseconds. + + ENC_TIME_SECS_USECS - 8 bytes; the first 4 bytes are seconds and the + next 4 bytes are microseconds. + + ENC_TIME_SECS - 4 to 8 bytes, representing a value in seconds. + + ENC_TIME_MSECS - 6 to 8 bytes, representing a value in milliseconds. + + ENC_TIME_USECS - 8 bytes, representing a value in microseconds. + + ENC_TIME_NSECS - 8 bytes, representing a value in nanoseconds. + +For other types, there is no support for proto_tree_add_item(). + +Now that definitions of fields have detailed information about bitfield +fields, you can use proto_tree_add_item() with no extra processing to +add bitfield values to your tree. Here's an example. Take the Format +Identifier (FID) field in the Transmission Header (TH) portion of the SNA +protocol. The FID is the high nibble of the first byte of the TH. The +FID would be registered like this: + + name = "Format Identifier" + abbrev = "sna.th.fid" + type = FT_UINT8 + display = BASE_HEX + strings = sna_th_fid_vals + bitmask = 0xf0 + +The bitmask contains the value which would leave only the FID if bitwise-ANDed +against the parent field, the first byte of the TH. + +The code to add the FID to the tree would be; + + proto_tree_add_item(bf_tree, hf_sna_th_fid, tvb, offset, 1, + ENC_BIG_ENDIAN); + +The definition of the field already has the information about bitmasking +and bitshifting, so it does the work of masking and shifting for us! +This also means that you no longer have to create value_string structs +with the values bitshifted. The value_string for FID looks like this, +even though the FID value is actually contained in the high nibble. +(You'd expect the values to be 0x0, 0x10, 0x20, etc.) + +/* Format Identifier */ +static const value_string sna_th_fid_vals[] = { + { 0x0, "SNA device <--> Non-SNA Device" }, + { 0x1, "Subarea Node <--> Subarea Node" }, + { 0x2, "Subarea Node <--> PU2" }, + { 0x3, "Subarea Node or SNA host <--> Subarea Node" }, + { 0x4, "?" }, + { 0x5, "?" }, + { 0xf, "Adjacent Subarea Nodes" }, + { 0, NULL } +}; + +The final implication of this is that display filters work the way you'd +naturally expect them to. You'd type "sna.th.fid == 0xf" to find Adjacent +Subarea Nodes. The user does not have to shift the value of the FID to +the high nibble of the byte ("sna.th.fid == 0xf0") as was necessary +in the past. + +proto_tree_add_item_ret_XXX() +------------------------------ +proto_tree_add_item_ret_XXX is used when you want the displayed value returned +for further processing only integer and unsigned integer types up to 32 bits are +supported usage of proper FT_ is checked. + +proto_tree_add_XXX_item() +--------------------- +proto_tree_add_XXX_item is used when you wish to do no special formatting, +but also either wish for the retrieved value from the tvbuff to be handed +back (to avoid doing tvb_get_...), and/or wish to have the value be decoded +from the tvbuff in a string-encoded format. + +The item added to the GUI tree will contain the name (as passed in the +proto_register_*() function) and a value. The value will be fetched +from the tvbuff, based on the type of the XXX name and the encoding of +the value as specified by the "encoding" argument. + +This function retrieves the value even if the passed-in tree param is NULL, +so that it can be used by dissectors at all times to both get the value +and set the tree item to it. + +Like other proto_tree_add functions, if there is a tree and the value cannot +be decoded from the tvbuff, then an expert info error is reported. For string +encoding, this means that a failure to decode the hex value from the string +results in an expert info error being added to the tree. + +For string-decoding, the passed-in encoding argument needs to specify the +string encoding (e.g., ENC_ASCII, ENC_UTF_8) as well as the format. For +some XXX types, the format is constrained - for example for the encoding format +for proto_tree_add_time_item() can only be one of the ENC_ISO_8601_* ones +or ENC_IMF_DATE_TIME. For proto_tree_add_bytes_item() it can only +be ENC_STR_HEX bit-or'ed with one or more of the ENC_SEP_* separator types. + +proto_tree_add_protocol_format() +-------------------------------- +proto_tree_add_protocol_format is used to add the top-level item for the +protocol when the dissector routine wants complete control over how the +field and value will be represented on the GUI tree. The ID value for +the protocol is passed in as the "id" argument; the rest of the +arguments are a "printf"-style format and any arguments for that format. +The caller must include the name of the protocol in the format; it is +not added automatically as in proto_tree_add_item(). + +proto_tree_add_none_format() +---------------------------- +proto_tree_add_none_format is used to add an item of type FT_NONE. +The caller must include the name of the field in the format; it is +not added automatically as in proto_tree_add_item(). + +proto_tree_add_bytes() +proto_tree_add_time() +proto_tree_add_ipxnet() +proto_tree_add_ipv4() +proto_tree_add_ipv6() +proto_tree_add_ether() +proto_tree_add_string() +proto_tree_add_boolean() +proto_tree_add_float() +proto_tree_add_double() +proto_tree_add_uint() +proto_tree_add_uint64() +proto_tree_add_int() +proto_tree_add_int64() +proto_tree_add_guid() +proto_tree_add_oid() +proto_tree_add_eui64() +------------------------ +These routines are used to add items to the protocol tree if either: + + the value of the item to be added isn't just extracted from the + packet data, but is computed from data in the packet; + + the value was fetched into a variable. + +The 'value' argument has the value to be added to the tree. + +NOTE: in all cases where the 'value' argument is a pointer, a copy is +made of the object pointed to; if you have dynamically allocated a +buffer for the object, that buffer will not be freed when the protocol +tree is freed - you must free the buffer yourself when you don't need it +any more. + +For proto_tree_add_bytes(), the 'value_ptr' argument is a pointer to a +sequence of bytes. + + +proto_tree_add_bytes_with_length() is similar to proto_tree_add_bytes, +except that the length is not derived from the tvb length. Instead, +the displayed data size is controlled by 'ptr_length'. + +For proto_tree_add_bytes_format() and proto_tree_add_bytes_format_value(), the +'value_ptr' argument is a pointer to a sequence of bytes or NULL if the bytes +should be taken from the given TVB using the given offset and length. + +For proto_tree_add_time(), the 'value_ptr' argument is a pointer to an +"nstime_t", which is a structure containing the time to be added; it has +'secs' and 'nsecs' members, giving the integral part and the fractional +part of a time in units of seconds, with 'nsecs' being the number of +nanoseconds. For absolute times, "secs" is a UNIX-style seconds since +January 1, 1970, 00:00:00 GMT value. + +For proto_tree_add_ipxnet(), the 'value' argument is a 32-bit IPX +network address. + +For proto_tree_add_ipv4(), the 'value' argument is a 32-bit IPv4 +address, in network byte order. + +For proto_tree_add_ipv6(), the 'value_ptr' argument is a pointer to a +128-bit IPv6 address. + +For proto_tree_add_ether(), the 'value_ptr' argument is a pointer to a +48-bit MAC address. + +For proto_tree_add_string(), the 'value_ptr' argument is a pointer to a +text string; this string must be NULL terminated even if the string in the +TVB is not (as may be the case with FT_STRINGs). + +For proto_tree_add_boolean(), the 'value' argument is a 32-bit integer. +It is masked and shifted as defined by the field info after which zero +means "false", and non-zero means "true". + +For proto_tree_add_float(), the 'value' argument is a 'float' in the +host's floating-point format. + +For proto_tree_add_double(), the 'value' argument is a 'double' in the +host's floating-point format. + +For proto_tree_add_uint(), the 'value' argument is a 32-bit unsigned +integer value, in host byte order. (This routine cannot be used to add +64-bit integers.) + +For proto_tree_add_uint64(), the 'value' argument is a 64-bit unsigned +integer value, in host byte order. + +For proto_tree_add_int(), the 'value' argument is a 32-bit signed +integer value, in host byte order. (This routine cannot be used to add +64-bit integers.) + +For proto_tree_add_int64(), the 'value' argument is a 64-bit signed +integer value, in host byte order. + +For proto_tree_add_guid(), the 'value_ptr' argument is a pointer to an +e_guid_t structure. + +For proto_tree_add_oid(), the 'value_ptr' argument is a pointer to an +ASN.1 Object Identifier. + +For proto_tree_add_eui64(), the 'value' argument is a 64-bit integer +value + +proto_tree_add_bytes_format() +proto_tree_add_time_format() +proto_tree_add_ipxnet_format() +proto_tree_add_ipv4_format() +proto_tree_add_ipv6_format() +proto_tree_add_ether_format() +proto_tree_add_string_format() +proto_tree_add_boolean_format() +proto_tree_add_float_format() +proto_tree_add_double_format() +proto_tree_add_uint_format() +proto_tree_add_uint64_format() +proto_tree_add_int_format() +proto_tree_add_int64_format() +proto_tree_add_guid_format() +proto_tree_add_oid_format() +proto_tree_add_eui64_format() +---------------------------- +These routines are used to add items to the protocol tree when the +dissector routine wants complete control over how the field and value +will be represented on the GUI tree. The argument giving the value is +the same as the corresponding proto_tree_add_XXX() function; the rest of +the arguments are a "printf"-style format and any arguments for that +format. The caller must include the name of the field in the format; it +is not added automatically as in the proto_tree_add_XXX() functions. + +proto_tree_add_bytes_format_value() +proto_tree_add_time_format_value() +proto_tree_add_ipxnet_format_value() +proto_tree_add_ipv4_format_value() +proto_tree_add_ipv6_format_value() +proto_tree_add_ether_format_value() +proto_tree_add_string_format_value() +proto_tree_add_boolean_format_value() +proto_tree_add_float_format_value() +proto_tree_add_double_format_value() +proto_tree_add_uint_format_value() +proto_tree_add_uint64_format_value() +proto_tree_add_int_format_value() +proto_tree_add_int64_format_value() +proto_tree_add_guid_format_value() +proto_tree_add_oid_format_value() +proto_tree_add_eui64_format_value() +------------------------------------ + +These routines are used to add items to the protocol tree when the +dissector routine wants complete control over how the value will be +represented on the GUI tree. The argument giving the value is the same +as the corresponding proto_tree_add_XXX() function; the rest of the +arguments are a "printf"-style format and any arguments for that format. +With these routines, unlike the proto_tree_add_XXX_format() routines, +the name of the field is added automatically as in the +proto_tree_add_XXX() functions; only the value is added with the format. +One use case for this would be to add a unit of measurement string to +the value of the field, however using BASE_UNIT_STRING in the hf_ +definition is now preferred. + +proto_tree_add_checksum() +---------------------------- +proto_tree_add_checksum is used to add a checksum field. The hf field +provided must be the correct size of the checksum (FT_UINT, FT_UINT16, +FT_UINT32, etc). Additional parameters are there to provide "status" +and expert info depending on whether the checksum matches the provided +value. The "status" and expert info can be used in cases except +where PROTO_CHECKSUM_NO_FLAGS is used. + +proto_tree_add_subtree() +--------------------- +proto_tree_add_subtree() is used to add a label to the GUI tree and create +a subtree for other fields. It will contain no value, so it is not searchable +in the display filter process. + +This should only be used for items with subtrees, which may not +have values themselves - the items in the subtree are the ones with values. + +For a subtree, the label on the subtree might reflect some of the items +in the subtree. This means the label can't be set until at least some +of the items in the subtree have been dissected. To do this, use +'proto_item_set_text()' or 'proto_item_append_text()': + + void + proto_item_set_text(proto_item *ti, ...); + + void + proto_item_append_text(proto_item *ti, ...); + +'proto_item_set_text()' takes as an argument the proto_item value returned by +one of the parameters in 'proto_tree_add_subtree()', a 'printf'-style format +string, and a set of arguments corresponding to '%' format items in that string, +and replaces the text for the item created by 'proto_tree_add_subtree()' with the result +of applying the arguments to the format string. + +'proto_item_append_text()' is similar, but it appends to the text for +the item the result of applying the arguments to the format string. + +For example, early in the dissection, one might do: + + subtree = proto_tree_add_subtree(tree, tvb, offset, length, ett, &ti,