diff options
Diffstat (limited to 'tools/check_tfs.py')
-rwxr-xr-x | tools/check_tfs.py | 595 |
1 files changed, 595 insertions, 0 deletions
diff --git a/tools/check_tfs.py b/tools/check_tfs.py new file mode 100755 index 00000000..cecf8d9d --- /dev/null +++ b/tools/check_tfs.py @@ -0,0 +1,595 @@ +#!/usr/bin/env python3 +# Wireshark - Network traffic analyzer +# By Gerald Combs <gerald@wireshark.org> +# Copyright 1998 Gerald Combs +# +# SPDX-License-Identifier: GPL-2.0-or-later + +import os +import re +import subprocess +import argparse +import signal + +# This utility scans for tfs items, and works out if standard ones +# could have been used intead (from epan/tfs.c) +# Can also check for value_string where common tfs could be used instead. + +# TODO: +# - check how many of the definitions in epan/tfs.c are used in other dissectors +# - although even if unused, might be in external dissectors? +# - consider merging Item class with check_typed_item_calls.py ? + + +# Try to exit soon after Ctrl-C is pressed. +should_exit = False + +def signal_handler(sig, frame): + global should_exit + should_exit = True + print('You pressed Ctrl+C - exiting') + +signal.signal(signal.SIGINT, signal_handler) + + +# Test for whether the given file was automatically generated. +def isGeneratedFile(filename): + # Check file exists - e.g. may have been deleted in a recent commit. + if not os.path.exists(filename): + return False + + # Open file + f_read = open(os.path.join(filename), 'r') + lines_tested = 0 + for line in f_read: + # The comment to say that its generated is near the top, so give up once + # get a few lines down. + if lines_tested > 10: + f_read.close() + return False + if (line.find('Generated automatically') != -1 or + line.find('Generated Automatically') != -1 or + line.find('Autogenerated from') != -1 or + line.find('is autogenerated') != -1 or + line.find('automatically generated by Pidl') != -1 or + line.find('Created by: The Qt Meta Object Compiler') != -1 or + line.find('This file was generated') != -1 or + line.find('This filter was automatically generated') != -1 or + line.find('This file is auto generated, do not edit!') != -1 or + line.find('This file is auto generated') != -1): + + f_read.close() + return True + lines_tested = lines_tested + 1 + + # OK, looks like a hand-written file! + f_read.close() + return False + + +# Keep track of custom entries that might appear in multiple dissectors, +# so we can consider adding them to tfs.c +custom_tfs_entries = {} +def AddCustomEntry(val1, val2, file): + global custom_tfs_entries + if (val1, val2) in custom_tfs_entries: + custom_tfs_entries[(val1, val2)].append(file) + else: + custom_tfs_entries[(val1, val2)] = [file] + + + +class TFS: + def __init__(self, file, name, val1, val2): + self.file = file + self.name = name + self.val1 = val1 + self.val2 = val2 + + global warnings_found + + # Should not be empty + if not len(val1) or not len(val2): + print('Warning:', file, name, 'has an empty field', self) + warnings_found += 1 + #else: + # Strange if one begins with capital but other doesn't? + #if val1[0].isalpha() and val2[0].isalpha(): + # if val1[0].isupper() != val2[0].isupper(): + # print(file, name, 'one starts lowercase and the other upper', self) + + # Leading or trailing space should not be needed. + if val1.startswith(' ') or val1.endswith(' '): + print('Note: ' + self.file + ' ' + self.name + ' - false val begins or ends with space \"' + self.val1 + '\"') + if val2.startswith(' ') or val2.endswith(' '): + print('Note: ' + self.file + ' ' + self.name + ' - true val begins or ends with space \"' + self.val2 + '\"') + + # Should really not be identical... + if val1.lower() == val2.lower(): + print('Warning:', file, name, 'true and false strings are the same', self) + warnings_found += 1 + + # Shouldn't both be negation (with exception..) + if (file != os.path.join('epan', 'dissectors', 'packet-smb.c') and (val1.lower().find('not ') != -1) and (val2.lower().find('not ') != -1)): + print('Warning:', file, name, self, 'both strings contain not') + warnings_found += 1 + + # Not expecting full-stops inside strings.. + if val1.find('.') != -1 or val2.find('.') != -1: + print('Warning:', file, name, 'Period found in string', self) + warnings_found += 1 + + + def __str__(self): + return '{' + '"' + self.val1 + '", "' + self.val2 + '"}' + + +class ValueString: + def __init__(self, file, name, vals): + self.file = file + self.name = name + self.raw_vals = vals + self.parsed_vals = {} + self.looks_like_tfs = True + + no_lines = self.raw_vals.count('{') + if no_lines != 3: + self.looks_like_tfs = False + return + + # Now parse out each entry in the value_string + matches = re.finditer(r'\{([\"a-zA-Z\s\d\,]*)\}', self.raw_vals) + for m in matches: + entry = m[1] + # Check each entry looks like part of a TFS entry. + match = re.match(r'\s*([01])\,\s*\"([a-zA-Z\d\s]*\s*)\"', entry) + if match: + if match[1] == '1': + self.parsed_vals[True] = match[2] + else: + self.parsed_vals[False] = match[2] + + # Now have both entries + if len(self.parsed_vals) == 2: + break + else: + self.looks_like_tfs = False + break + + def __str__(self): + return '{' + '"' + self.raw_vals + '"}' + + +field_widths = { + 'FT_BOOLEAN' : 64, # TODO: Width depends upon 'display' field + 'FT_CHAR' : 8, + 'FT_UINT8' : 8, + 'FT_INT8' : 8, + 'FT_UINT16' : 16, + 'FT_INT16' : 16, + 'FT_UINT24' : 24, + 'FT_INT24' : 24, + 'FT_UINT32' : 32, + 'FT_INT32' : 32, + 'FT_UINT40' : 40, + 'FT_INT40' : 40, + 'FT_UINT48' : 48, + 'FT_INT48' : 48, + 'FT_UINT56' : 56, + 'FT_INT56' : 56, + 'FT_UINT64' : 64, + 'FT_INT64' : 64 +} + + + + +# Simplified version of class that is in check_typed_item_calls.py +class Item: + + previousItem = None + + def __init__(self, filename, hf, filter, label, item_type, type_modifier, strings, macros, mask=None, + check_mask=False): + self.filename = filename + self.hf = hf + self.filter = filter + self.label = label + self.strings = strings + self.mask = mask + + # N.B. Not sestting mask by looking up macros. + + self.item_type = item_type + self.type_modifier = type_modifier + + self.set_mask_value(macros) + + self.bits_set = 0 + for n in range(0, self.get_field_width_in_bits()): + if self.check_bit(self.mask_value, n): + self.bits_set += 1 + + def check_bit(self, value, n): + return (value & (0x1 << n)) != 0 + + + def __str__(self): + return 'Item ({0} "{1}" {2} type={3}:{4} strings={5} mask={6})'.format(self.filename, self.label, self.filter, + self.item_type, self.type_modifier, self.strings, self.mask) + + + + def set_mask_value(self, macros): + try: + self.mask_read = True + + # Substitute mask if found as a macro.. + if self.mask in macros: + self.mask = macros[self.mask] + elif any(not c in '0123456789abcdefABCDEFxX' for c in self.mask): + self.mask_read = False + self.mask_value = 0 + return + + + # Read according to the appropriate base. + if self.mask.startswith('0x'): + self.mask_value = int(self.mask, 16) + elif self.mask.startswith('0'): + self.mask_value = int(self.mask, 8) + else: + self.mask_value = int(self.mask, 10) + except: + self.mask_read = False + self.mask_value = 0 + + + # Return true if bit position n is set in value. + def check_bit(self, value, n): + return (value & (0x1 << n)) != 0 + + + def get_field_width_in_bits(self): + if self.item_type == 'FT_BOOLEAN': + if self.type_modifier == 'NULL': + return 8 # i.e. 1 byte + elif self.type_modifier == 'BASE_NONE': + return 8 + elif self.type_modifier == 'SEP_DOT': # from proto.h, only meant for FT_BYTES + return 64 + else: + try: + # For FT_BOOLEAN, modifier is just numerical number of bits. Round up to next nibble. + return int((int(self.type_modifier) + 3)/4)*4 + except: + #print('oops', self) + return 0 + else: + if self.item_type in field_widths: + # Lookup fixed width for this type + return field_widths[self.item_type] + else: + #print('returning 0 for', self) + return 0 + + + + + +def removeComments(code_string): + code_string = re.sub(re.compile(r"/\*.*?\*/",re.DOTALL ) ,"" ,code_string) # C-style comment + code_string = re.sub(re.compile(r"//.*?\n" ) ,"" ,code_string) # C++-style comment + code_string = re.sub(re.compile(r"#if 0.*?#endif",re.DOTALL ) ,"" , code_string) # Ignored region + + return code_string + + +# Look for true_false_string items in a dissector file. +def findTFS(filename): + tfs_found = {} + + with open(filename, 'r', encoding="utf8") as f: + contents = f.read() + # Example: const true_false_string tfs_yes_no = { "Yes", "No" }; + + # Remove comments so as not to trip up RE. + contents = removeComments(contents) + + matches = re.finditer(r'\sconst\s*true_false_string\s*([a-zA-Z0-9_]*)\s*=\s*{\s*\"([a-zA-Z_0-9/:! ]*)\"\s*,\s*\"([a-zA-Z_0-9/:! ]*)\"', contents) + for m in matches: + name = m.group(1) + val1 = m.group(2) + val2 = m.group(3) + # Store this entry. + tfs_found[name] = TFS(filename, name, val1, val2) + + return tfs_found + +# Look for value_string entries in a dissector file. +def findValueStrings(filename): + vals_found = {} + + #static const value_string radio_type_vals[] = + #{ + # { 0, "FDD"}, + # { 1, "TDD"}, + # { 0, NULL } + #}; + + with open(filename, 'r', encoding="utf8") as f: + contents = f.read() + + # Remove comments so as not to trip up RE. + contents = removeComments(contents) + + matches = re.finditer(r'.*const value_string\s*([a-zA-Z0-9_]*)\s*\[\s*\]\s*\=\s*\{([\{\}\d\,a-zA-Z0-9\s\"]*)\};', contents) + for m in matches: + name = m.group(1) + vals = m.group(2) + vals_found[name] = ValueString(filename, name, vals) + + return vals_found + +# Look for hf items (i.e. full item to be registered) in a dissector file. +def find_items(filename, macros, check_mask=False, mask_exact_width=False, check_label=False, check_consecutive=False): + is_generated = isGeneratedFile(filename) + items = {} + with open(filename, 'r', encoding="utf8") as f: + contents = f.read() + # Remove comments so as not to trip up RE. + contents = removeComments(contents) + + # N.B. re extends all the way to HFILL to avoid greedy matching + matches = re.finditer( r'.*\{\s*\&(hf_[a-z_A-Z0-9]*)\s*,\s*{\s*\"(.*?)\"\s*,\s*\"(.*?)\"\s*,\s*(.*?)\s*,\s*([0-9A-Z_\|\s]*?)\s*,\s*(.*?)\s*,\s*(.*?)\s*,\s*([a-zA-Z0-9\W\s_\u00f6\u00e4]*?)\s*,\s*HFILL', contents) + for m in matches: + # Store this item. + hf = m.group(1) + items[hf] = Item(filename, hf, filter=m.group(3), label=m.group(2), item_type=m.group(4), + type_modifier=m.group(5), + strings=m.group(6), + macros=macros, + mask=m.group(7)) + return items + +def find_macros(filename): + macros = {} + with open(filename, 'r', encoding="utf8") as f: + contents = f.read() + # Remove comments so as not to trip up RE. + contents = removeComments(contents) + + matches = re.finditer( r'#define\s*([A-Z0-9_]*)\s*([0-9xa-fA-F]*)\n', contents) + for m in matches: + # Store this mapping. + macros[m.group(1)] = m.group(2) + return macros + + + +def is_dissector_file(filename): + p = re.compile(r'.*packet-.*\.c') + return p.match(filename) + +def findDissectorFilesInFolder(folder): + # Look at files in sorted order, to give some idea of how far through is. + files = [] + + for f in sorted(os.listdir(folder)): + if should_exit: + return + if is_dissector_file(f): + filename = os.path.join(folder, f) + files.append(filename) + return files + + + +warnings_found = 0 +errors_found = 0 + + +tfs_found = 0 + +# Check the given dissector file. +def checkFile(filename, common_tfs, look_for_common=False, check_value_strings=False): + global warnings_found + global errors_found + + # Check file exists - e.g. may have been deleted in a recent commit. + if not os.path.exists(filename): + print(filename, 'does not exist!') + return + + # Find items. + file_tfs = findTFS(filename) + + # See if any of these items already existed in tfs.c + for f in file_tfs: + for c in common_tfs: + found = False + + # + # Do not do this check for plugins; plugins cannot import + # data values from libwireshark (functions, yes; data + # values, no). + # + # Test whether there's a common prefix for the file name + # and "plugin/epan/"; if so, this is a plugin, and there + # is no common path and os.path.commonprefix returns an + # empty string, otherwise it returns the common path, so + # we check whether the common path is an empty string. + # + if os.path.commonprefix([filename, 'plugin/epan/']) == '': + exact_case = False + if file_tfs[f].val1 == common_tfs[c].val1 and file_tfs[f].val2 == common_tfs[c].val2: + found = True + exact_case = True + elif file_tfs[f].val1.upper() == common_tfs[c].val1.upper() and file_tfs[f].val2.upper() == common_tfs[c].val2.upper(): + found = True + + if found: + print("Error:" if exact_case else "Warn: ", filename, f, "- could have used", c, 'from tfs.c instead: ', common_tfs[c], + '' if exact_case else ' (capitalisation differs)') + if exact_case: + errors_found += 1 + else: + warnings_found += 1 + break + if not found: + if look_for_common: + AddCustomEntry(file_tfs[f].val1, file_tfs[f].val2, filename) + + if check_value_strings: + # Get macros + macros = find_macros(filename) + + # Get value_string entries. + vs = findValueStrings(filename) + + # Also get hf items + items = find_items(filename, macros, check_mask=True) + + + for v in vs: + if vs[v].looks_like_tfs: + found = False + exact_case = False + + #print('Candidate', v, vs[v]) + for c in common_tfs: + found = False + + # + # Do not do this check for plugins; plugins cannot import + # data values from libwireshark (functions, yes; data + # values, no). + # + # Test whether there's a common prefix for the file name + # and "plugin/epan/"; if so, this is a plugin, and there + # is no common path and os.path.commonprefix returns an + # empty string, otherwise it returns the common path, so + # we check whether the common path is an empty string. + # + if os.path.commonprefix([filename, 'plugin/epan/']) == '': + exact_case = False + if common_tfs[c].val1 == vs[v].parsed_vals[True] and common_tfs[c].val2 == vs[v].parsed_vals[False]: + found = True + exact_case = True + elif common_tfs[c].val1.upper() == vs[v].parsed_vals[True].upper() and common_tfs[c].val2.upper() == vs[v].parsed_vals[False].upper(): + found = True + + # Do values match? + if found: + # OK, now look for items that: + # - have VALS(v) AND + # - have a mask width of 1 bit (no good if field can have values > 1...) + for i in items: + if re.match(r'VALS\(\s*'+v+r'\s*\)', items[i].strings): + if items[i].bits_set == 1: + print("Warn:" if exact_case else "Note:", filename, 'value_string', "'"+v+"'", + "- could have used", c, 'from tfs.c instead: ', common_tfs[c], 'for', i, + '' if exact_case else ' (capitalisation differs)') + if exact_case: + warnings_found += 1 + + + +################################################################# +# Main logic. + +# command-line args. Controls which dissector files should be checked. +# If no args given, will just scan epan/dissectors folder. +parser = argparse.ArgumentParser(description='Check calls in dissectors') +parser.add_argument('--file', action='append', + help='specify individual dissector file to test') +parser.add_argument('--commits', action='store', + help='last N commits to check') +parser.add_argument('--open', action='store_true', + help='check open files') +parser.add_argument('--check-value-strings', action='store_true', + help='check whether value_strings could have been tfs?') + +parser.add_argument('--common', action='store_true', + help='check for potential new entries for tfs.c') + + +args = parser.parse_args() + + +# Get files from wherever command-line args indicate. +files = [] +if args.file: + # Add specified file(s) + for f in args.file: + if not f.startswith('epan'): + f = os.path.join('epan', 'dissectors', f) + if not os.path.isfile(f): + print('Chosen file', f, 'does not exist.') + exit(1) + else: + files.append(f) +elif args.commits: + # Get files affected by specified number of commits. + command = ['git', 'diff', '--name-only', 'HEAD~' + args.commits] + files = [f.decode('utf-8') + for f in subprocess.check_output(command).splitlines()] + # Will examine dissector files only + files = list(filter(lambda f : is_dissector_file(f), files)) +elif args.open: + # Unstaged changes. + command = ['git', 'diff', '--name-only'] + files = [f.decode('utf-8') + for f in subprocess.check_output(command).splitlines()] + # Only interested in dissector files. + files = list(filter(lambda f : is_dissector_file(f), files)) + # Staged changes. + command = ['git', 'diff', '--staged', '--name-only'] + files_staged = [f.decode('utf-8') + for f in subprocess.check_output(command).splitlines()] + # Only interested in dissector files. + files_staged = list(filter(lambda f : is_dissector_file(f), files_staged)) + for f in files_staged: + if not f in files: + files.append(f) +else: + # Find all dissector files from folder. + files = findDissectorFilesInFolder(os.path.join('epan', 'dissectors')) + + +# If scanning a subset of files, list them here. +print('Examining:') +if args.file or args.commits or args.open: + if files: + print(' '.join(files), '\n') + else: + print('No files to check.\n') +else: + print('All dissector modules\n') + + +# Get standard/ shared ones. +tfs_entries = findTFS(os.path.join('epan', 'tfs.c')) + +# Now check the files to see if they could have used shared ones instead. +for f in files: + if should_exit: + exit(1) + if not isGeneratedFile(f): + checkFile(f, tfs_entries, look_for_common=args.common, check_value_strings=args.check_value_strings) + +# Report on commonly-defined values. +if args.common: + # Looking for items that could potentially be moved to tfs.c + for c in custom_tfs_entries: + # Only want to see items that have 3 or more occurrences. + # Even then, probably only want to consider ones that sound generic. + if len(custom_tfs_entries[c]) > 2: + print(c, 'appears', len(custom_tfs_entries[c]), 'times, in: ', custom_tfs_entries[c]) + + +# Show summary. +print(warnings_found, 'warnings found') +if errors_found: + print(errors_found, 'errors found') + exit(1) |