From e4ba6dbc3f1e76890b22773807ea37fe8fa2b1bc Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Wed, 10 Apr 2024 22:34:10 +0200
Subject: Adding upstream version 4.2.2.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 tools/check_tfs.py | 595 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 595 insertions(+)
 create mode 100755 tools/check_tfs.py

(limited to 'tools/check_tfs.py')

diff --git a/tools/check_tfs.py b/tools/check_tfs.py
new file mode 100755
index 0000000..cecf8d9
--- /dev/null
+++ b/tools/check_tfs.py
@@ -0,0 +1,595 @@
+#!/usr/bin/env python3
+# Wireshark - Network traffic analyzer
+# By Gerald Combs <gerald@wireshark.org>
+# Copyright 1998 Gerald Combs
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+import os
+import re
+import subprocess
+import argparse
+import signal
+
+# This utility scans for tfs items, and works out if standard ones
+# could have been used intead (from epan/tfs.c)
+# Can also check for value_string where common tfs could be used instead.
+
+# TODO:
+# - check how many of the definitions in epan/tfs.c are used in other dissectors
+#      - although even if unused, might be in external dissectors?
+# - consider merging Item class with check_typed_item_calls.py ?
+
+
+# Try to exit soon after Ctrl-C is pressed.
+should_exit = False
+
+def signal_handler(sig, frame):
+    global should_exit
+    should_exit = True
+    print('You pressed Ctrl+C - exiting')
+
+signal.signal(signal.SIGINT, signal_handler)
+
+
+# Test for whether the given file was automatically generated.
+def isGeneratedFile(filename):
+    # Check file exists - e.g. may have been deleted in a recent commit.
+    if not os.path.exists(filename):
+        return False
+
+    # Open file
+    f_read = open(os.path.join(filename), 'r')
+    lines_tested = 0
+    for line in f_read:
+        # The comment to say that its generated is near the top, so give up once
+        # get a few lines down.
+        if lines_tested > 10:
+            f_read.close()
+            return False
+        if (line.find('Generated automatically') != -1 or
+            line.find('Generated Automatically') != -1 or
+            line.find('Autogenerated from') != -1 or
+            line.find('is autogenerated') != -1 or
+            line.find('automatically generated by Pidl') != -1 or
+            line.find('Created by: The Qt Meta Object Compiler') != -1 or
+            line.find('This file was generated') != -1 or
+            line.find('This filter was automatically generated') != -1 or
+            line.find('This file is auto generated, do not edit!') != -1 or
+            line.find('This file is auto generated') != -1):
+
+            f_read.close()
+            return True
+        lines_tested = lines_tested + 1
+
+    # OK, looks like a hand-written file!
+    f_read.close()
+    return False
+
+
+# Keep track of custom entries that might appear in multiple dissectors,
+# so we can consider adding them to tfs.c
+custom_tfs_entries = {}
+def AddCustomEntry(val1, val2, file):
+    global custom_tfs_entries
+    if (val1, val2) in custom_tfs_entries:
+        custom_tfs_entries[(val1, val2)].append(file)
+    else:
+        custom_tfs_entries[(val1, val2)] = [file]
+
+
+
+class TFS:
+    def __init__(self, file, name, val1, val2):
+        self.file = file
+        self.name = name
+        self.val1 = val1
+        self.val2 = val2
+
+        global warnings_found
+
+        # Should not be empty
+        if not len(val1) or not len(val2):
+            print('Warning:', file, name, 'has an empty field', self)
+            warnings_found += 1
+        #else:
+            # Strange if one begins with capital but other doesn't?
+            #if val1[0].isalpha() and val2[0].isalpha():
+            #    if val1[0].isupper() != val2[0].isupper():
+            #        print(file, name, 'one starts lowercase and the other upper', self)
+
+        # Leading or trailing space should not be needed.
+        if val1.startswith(' ') or val1.endswith(' '):
+            print('Note: ' + self.file + ' ' + self.name + ' - false val begins or ends with space \"' + self.val1 + '\"')
+        if val2.startswith(' ') or val2.endswith(' '):
+            print('Note: ' + self.file + ' ' + self.name + ' - true val begins or ends with space \"' + self.val2 + '\"')
+
+        # Should really not be identical...
+        if val1.lower() == val2.lower():
+            print('Warning:', file, name, 'true and false strings are the same', self)
+            warnings_found += 1
+
+        # Shouldn't both be negation (with exception..)
+        if (file != os.path.join('epan', 'dissectors', 'packet-smb.c') and (val1.lower().find('not ') != -1) and (val2.lower().find('not ') != -1)):
+            print('Warning:', file, name, self, 'both strings contain not')
+            warnings_found += 1
+
+        # Not expecting full-stops inside strings..
+        if val1.find('.') != -1 or val2.find('.') != -1:
+            print('Warning:', file, name, 'Period found in string', self)
+            warnings_found += 1
+
+
+    def __str__(self):
+        return '{' + '"' + self.val1 + '", "' + self.val2 + '"}'
+
+
+class ValueString:
+    def __init__(self, file, name, vals):
+        self.file = file
+        self.name = name
+        self.raw_vals = vals
+        self.parsed_vals = {}
+        self.looks_like_tfs = True
+
+        no_lines =  self.raw_vals.count('{')
+        if no_lines != 3:
+            self.looks_like_tfs = False
+            return
+
+        # Now parse out each entry in the value_string
+        matches = re.finditer(r'\{([\"a-zA-Z\s\d\,]*)\}', self.raw_vals)
+        for m in matches:
+            entry = m[1]
+            # Check each entry looks like part of a TFS entry.
+            match = re.match(r'\s*([01])\,\s*\"([a-zA-Z\d\s]*\s*)\"', entry)
+            if match:
+                if match[1] == '1':
+                    self.parsed_vals[True] = match[2]
+                else:
+                    self.parsed_vals[False] = match[2]
+
+                # Now have both entries
+                if len(self.parsed_vals) == 2:
+                    break
+            else:
+                self.looks_like_tfs = False
+                break
+
+    def __str__(self):
+        return '{' + '"' + self.raw_vals + '"}'
+
+
+field_widths = {
+    'FT_BOOLEAN' : 64,   # TODO: Width depends upon 'display' field
+    'FT_CHAR'    : 8,
+    'FT_UINT8'   : 8,
+    'FT_INT8'    : 8,
+    'FT_UINT16'  : 16,
+    'FT_INT16'   : 16,
+    'FT_UINT24'  : 24,
+    'FT_INT24'   : 24,
+    'FT_UINT32'  : 32,
+    'FT_INT32'   : 32,
+    'FT_UINT40'  : 40,
+    'FT_INT40'   : 40,
+    'FT_UINT48'  : 48,
+    'FT_INT48'   : 48,
+    'FT_UINT56'  : 56,
+    'FT_INT56'   : 56,
+    'FT_UINT64'  : 64,
+    'FT_INT64'   : 64
+}
+
+
+
+
+# Simplified version of class that is in check_typed_item_calls.py
+class Item:
+
+    previousItem = None
+
+    def __init__(self, filename, hf, filter, label, item_type, type_modifier, strings, macros, mask=None,
+                 check_mask=False):
+        self.filename = filename
+        self.hf = hf
+        self.filter = filter
+        self.label = label
+        self.strings = strings
+        self.mask = mask
+
+        # N.B. Not sestting mask by looking up macros.
+
+        self.item_type = item_type
+        self.type_modifier = type_modifier
+
+        self.set_mask_value(macros)
+
+        self.bits_set = 0
+        for n in range(0, self.get_field_width_in_bits()):
+            if self.check_bit(self.mask_value, n):
+                self.bits_set += 1
+
+    def check_bit(self, value, n):
+        return (value & (0x1 << n)) != 0
+
+
+    def __str__(self):
+        return 'Item ({0} "{1}" {2} type={3}:{4} strings={5} mask={6})'.format(self.filename, self.label, self.filter,
+                                                                               self.item_type, self.type_modifier, self.strings, self.mask)
+
+
+
+    def set_mask_value(self, macros):
+        try:
+            self.mask_read = True
+
+            # Substitute mask if found as a macro..
+            if self.mask in macros:
+                self.mask = macros[self.mask]
+            elif any(not c in '0123456789abcdefABCDEFxX' for c in self.mask):
+                self.mask_read = False
+                self.mask_value = 0
+                return
+
+
+            # Read according to the appropriate base.
+            if self.mask.startswith('0x'):
+                self.mask_value = int(self.mask, 16)
+            elif self.mask.startswith('0'):
+                self.mask_value = int(self.mask, 8)
+            else:
+                self.mask_value = int(self.mask, 10)
+        except:
+            self.mask_read = False
+            self.mask_value = 0
+
+
+    # Return true if bit position n is set in value.
+    def check_bit(self, value, n):
+        return (value & (0x1 << n)) != 0
+
+
+    def get_field_width_in_bits(self):
+        if self.item_type == 'FT_BOOLEAN':
+            if self.type_modifier == 'NULL':
+                return 8  # i.e. 1 byte
+            elif self.type_modifier == 'BASE_NONE':
+                return 8
+            elif self.type_modifier == 'SEP_DOT':   # from proto.h, only meant for FT_BYTES
+                return 64
+            else:
+                try:
+                    # For FT_BOOLEAN, modifier is just numerical number of bits. Round up to next nibble.
+                    return int((int(self.type_modifier) + 3)/4)*4
+                except:
+                    #print('oops', self)
+                    return 0
+        else:
+            if self.item_type in field_widths:
+                # Lookup fixed width for this type
+                return field_widths[self.item_type]
+            else:
+                #print('returning 0 for', self)
+                return 0
+
+
+
+
+
+def removeComments(code_string):
+    code_string = re.sub(re.compile(r"/\*.*?\*/",re.DOTALL ) ,"" ,code_string) # C-style comment
+    code_string = re.sub(re.compile(r"//.*?\n" ) ,"" ,code_string)             # C++-style comment
+    code_string = re.sub(re.compile(r"#if 0.*?#endif",re.DOTALL ) ,"" , code_string) # Ignored region
+
+    return code_string
+
+
+# Look for true_false_string items in a dissector file.
+def findTFS(filename):
+    tfs_found = {}
+
+    with open(filename, 'r', encoding="utf8") as f:
+        contents = f.read()
+        # Example: const true_false_string tfs_yes_no = { "Yes", "No" };
+
+        # Remove comments so as not to trip up RE.
+        contents = removeComments(contents)
+
+        matches =   re.finditer(r'\sconst\s*true_false_string\s*([a-zA-Z0-9_]*)\s*=\s*{\s*\"([a-zA-Z_0-9/:! ]*)\"\s*,\s*\"([a-zA-Z_0-9/:! ]*)\"', contents)
+        for m in matches:
+            name = m.group(1)
+            val1 = m.group(2)
+            val2 = m.group(3)
+            # Store this entry.
+            tfs_found[name] = TFS(filename, name, val1, val2)
+
+    return tfs_found
+
+# Look for value_string entries in a dissector file.
+def findValueStrings(filename):
+    vals_found = {}
+
+    #static const value_string radio_type_vals[] =
+    #{
+    #    { 0,      "FDD"},
+    #    { 1,      "TDD"},
+    #    { 0, NULL }
+    #};
+
+    with open(filename, 'r', encoding="utf8") as f:
+        contents = f.read()
+
+        # Remove comments so as not to trip up RE.
+        contents = removeComments(contents)
+
+        matches =   re.finditer(r'.*const value_string\s*([a-zA-Z0-9_]*)\s*\[\s*\]\s*\=\s*\{([\{\}\d\,a-zA-Z0-9\s\"]*)\};', contents)
+        for m in matches:
+            name = m.group(1)
+            vals = m.group(2)
+            vals_found[name] = ValueString(filename, name, vals)
+
+    return vals_found
+
+# Look for hf items (i.e. full item to be registered) in a dissector file.
+def find_items(filename, macros, check_mask=False, mask_exact_width=False, check_label=False, check_consecutive=False):
+    is_generated = isGeneratedFile(filename)
+    items = {}
+    with open(filename, 'r', encoding="utf8") as f:
+        contents = f.read()
+        # Remove comments so as not to trip up RE.
+        contents = removeComments(contents)
+
+        # N.B. re extends all the way to HFILL to avoid greedy matching
+        matches = re.finditer( r'.*\{\s*\&(hf_[a-z_A-Z0-9]*)\s*,\s*{\s*\"(.*?)\"\s*,\s*\"(.*?)\"\s*,\s*(.*?)\s*,\s*([0-9A-Z_\|\s]*?)\s*,\s*(.*?)\s*,\s*(.*?)\s*,\s*([a-zA-Z0-9\W\s_\u00f6\u00e4]*?)\s*,\s*HFILL', contents)
+        for m in matches:
+            # Store this item.
+            hf = m.group(1)
+            items[hf] = Item(filename, hf, filter=m.group(3), label=m.group(2), item_type=m.group(4),
+                             type_modifier=m.group(5),
+                             strings=m.group(6),
+                             macros=macros,
+                             mask=m.group(7))
+    return items
+
+def find_macros(filename):
+    macros = {}
+    with open(filename, 'r', encoding="utf8") as f:
+        contents = f.read()
+        # Remove comments so as not to trip up RE.
+        contents = removeComments(contents)
+
+        matches = re.finditer( r'#define\s*([A-Z0-9_]*)\s*([0-9xa-fA-F]*)\n', contents)
+        for m in matches:
+            # Store this mapping.
+            macros[m.group(1)] = m.group(2)
+    return macros
+
+
+
+def is_dissector_file(filename):
+    p = re.compile(r'.*packet-.*\.c')
+    return p.match(filename)
+
+def findDissectorFilesInFolder(folder):
+    # Look at files in sorted order, to give some idea of how far through is.
+    files = []
+
+    for f in sorted(os.listdir(folder)):
+        if should_exit:
+            return
+        if is_dissector_file(f):
+            filename = os.path.join(folder, f)
+            files.append(filename)
+    return files
+
+
+
+warnings_found = 0
+errors_found = 0
+
+
+tfs_found = 0
+
+# Check the given dissector file.
+def checkFile(filename, common_tfs, look_for_common=False, check_value_strings=False):
+    global warnings_found
+    global errors_found
+
+    # Check file exists - e.g. may have been deleted in a recent commit.
+    if not os.path.exists(filename):
+        print(filename, 'does not exist!')
+        return
+
+    # Find items.
+    file_tfs = findTFS(filename)
+
+    # See if any of these items already existed in tfs.c
+    for f in file_tfs:
+        for c in common_tfs:
+            found = False
+
+            #
+            # Do not do this check for plugins; plugins cannot import
+            # data values from libwireshark (functions, yes; data
+            # values, no).
+            #
+            # Test whether there's a common prefix for the file name
+            # and "plugin/epan/"; if so, this is a plugin, and there
+            # is no common path and os.path.commonprefix returns an
+            # empty string, otherwise it returns the common path, so
+            # we check whether the common path is an empty string.
+            #
+            if os.path.commonprefix([filename, 'plugin/epan/']) == '':
+                exact_case = False
+                if file_tfs[f].val1 == common_tfs[c].val1 and file_tfs[f].val2 == common_tfs[c].val2:
+                    found = True
+                    exact_case = True
+                elif file_tfs[f].val1.upper() == common_tfs[c].val1.upper() and file_tfs[f].val2.upper() == common_tfs[c].val2.upper():
+                    found = True
+
+                if found:
+                    print("Error:" if exact_case else "Warn: ", filename, f, "- could have used", c, 'from tfs.c instead: ', common_tfs[c],
+                          '' if exact_case else '  (capitalisation differs)')
+                    if exact_case:
+                        errors_found += 1
+                    else:
+                        warnings_found += 1
+                    break
+        if not found:
+            if look_for_common:
+                AddCustomEntry(file_tfs[f].val1, file_tfs[f].val2, filename)
+
+    if check_value_strings:
+        # Get macros
+        macros = find_macros(filename)
+
+        # Get value_string entries.
+        vs = findValueStrings(filename)
+
+        # Also get hf items
+        items = find_items(filename, macros, check_mask=True)
+
+
+        for v in vs:
+            if vs[v].looks_like_tfs:
+                found = False
+                exact_case = False
+
+                #print('Candidate', v, vs[v])
+                for c in common_tfs:
+                    found = False
+
+                    #
+                    # Do not do this check for plugins; plugins cannot import
+                    # data values from libwireshark (functions, yes; data
+                    # values, no).
+                    #
+                    # Test whether there's a common prefix for the file name
+                    # and "plugin/epan/"; if so, this is a plugin, and there
+                    # is no common path and os.path.commonprefix returns an
+                    # empty string, otherwise it returns the common path, so
+                    # we check whether the common path is an empty string.
+                    #
+                    if os.path.commonprefix([filename, 'plugin/epan/']) == '':
+                        exact_case = False
+                        if common_tfs[c].val1 == vs[v].parsed_vals[True] and common_tfs[c].val2 == vs[v].parsed_vals[False]:
+                            found = True
+                            exact_case = True
+                        elif common_tfs[c].val1.upper() == vs[v].parsed_vals[True].upper() and common_tfs[c].val2.upper() == vs[v].parsed_vals[False].upper():
+                            found = True
+
+                        # Do values match?
+                        if found:
+                            # OK, now look for items that:
+                            # - have VALS(v)  AND
+                            # - have a mask width of 1 bit (no good if field can have values > 1...)
+                            for i in items:
+                                if re.match(r'VALS\(\s*'+v+r'\s*\)', items[i].strings):
+                                    if items[i].bits_set == 1:
+                                        print("Warn:" if exact_case else "Note:", filename, 'value_string', "'"+v+"'",
+                                              "- could have used", c, 'from tfs.c instead: ', common_tfs[c], 'for', i,
+                                            '' if exact_case else '  (capitalisation differs)')
+                                        if exact_case:
+                                            warnings_found += 1
+
+
+
+#################################################################
+# Main logic.
+
+# command-line args.  Controls which dissector files should be checked.
+# If no args given, will just scan epan/dissectors folder.
+parser = argparse.ArgumentParser(description='Check calls in dissectors')
+parser.add_argument('--file', action='append',
+                    help='specify individual dissector file to test')
+parser.add_argument('--commits', action='store',
+                    help='last N commits to check')
+parser.add_argument('--open', action='store_true',
+                    help='check open files')
+parser.add_argument('--check-value-strings', action='store_true',
+                    help='check whether value_strings could have been tfs?')
+
+parser.add_argument('--common', action='store_true',
+                    help='check for potential new entries for tfs.c')
+
+
+args = parser.parse_args()
+
+
+# Get files from wherever command-line args indicate.
+files = []
+if args.file:
+    # Add specified file(s)
+    for f in args.file:
+        if not f.startswith('epan'):
+            f = os.path.join('epan', 'dissectors', f)
+        if not os.path.isfile(f):
+            print('Chosen file', f, 'does not exist.')
+            exit(1)
+        else:
+            files.append(f)
+elif args.commits:
+    # Get files affected by specified number of commits.
+    command = ['git', 'diff', '--name-only', 'HEAD~' + args.commits]
+    files = [f.decode('utf-8')
+             for f in subprocess.check_output(command).splitlines()]
+    # Will examine dissector files only
+    files = list(filter(lambda f : is_dissector_file(f), files))
+elif args.open:
+    # Unstaged changes.
+    command = ['git', 'diff', '--name-only']
+    files = [f.decode('utf-8')
+             for f in subprocess.check_output(command).splitlines()]
+    # Only interested in dissector files.
+    files = list(filter(lambda f : is_dissector_file(f), files))
+    # Staged changes.
+    command = ['git', 'diff', '--staged', '--name-only']
+    files_staged = [f.decode('utf-8')
+                    for f in subprocess.check_output(command).splitlines()]
+    # Only interested in dissector files.
+    files_staged = list(filter(lambda f : is_dissector_file(f), files_staged))
+    for f in files_staged:
+        if not f in files:
+            files.append(f)
+else:
+    # Find all dissector files from folder.
+    files = findDissectorFilesInFolder(os.path.join('epan', 'dissectors'))
+
+
+# If scanning a subset of files, list them here.
+print('Examining:')
+if args.file or args.commits or args.open:
+    if files:
+        print(' '.join(files), '\n')
+    else:
+        print('No files to check.\n')
+else:
+    print('All dissector modules\n')
+
+
+# Get standard/ shared ones.
+tfs_entries = findTFS(os.path.join('epan', 'tfs.c'))
+
+# Now check the files to see if they could have used shared ones instead.
+for f in files:
+    if should_exit:
+        exit(1)
+    if not isGeneratedFile(f):
+        checkFile(f, tfs_entries, look_for_common=args.common, check_value_strings=args.check_value_strings)
+
+# Report on commonly-defined values.
+if args.common:
+    # Looking for items that could potentially be moved to tfs.c
+    for c in custom_tfs_entries:
+        # Only want to see items that have 3 or more occurrences.
+        # Even then, probably only want to consider ones that sound generic.
+        if len(custom_tfs_entries[c]) > 2:
+            print(c, 'appears', len(custom_tfs_entries[c]), 'times, in: ', custom_tfs_entries[c])
+
+
+# Show summary.
+print(warnings_found, 'warnings found')
+if errors_found:
+    print(errors_found, 'errors found')
+    exit(1)
-- 
cgit v1.2.3