compilerplugins/clang/singlevalfields.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111

#!/usr/bin/python2

import sys
import re
import io

definitionToSourceLocationMap = dict() # dict of tuple(parentClass, fieldName) to sourceLocation
definitionToTypeMap = dict() # dict of tuple(parentClass, fieldName) to field type
fieldAssignDict = dict() # dict of tuple(parentClass, fieldName) to (set of values)

# clang does not always use exactly the same numbers in the type-parameter vars it generates
# so I need to substitute them to ensure we can match correctly.
normalizeTypeParamsRegex = re.compile(r"type-parameter-\d+-\d+")
def normalizeTypeParams( line ):
    return normalizeTypeParamsRegex.sub("type-parameter-?-?", line)

# reading as binary (since we known it is pure ascii) is much faster than reading as unicode
with io.open("workdir/loplugin.singlevalfields.log", "rb", buffering=1024*1024) as txt:
    for line in txt:
        tokens = line.strip().split("\t")
        if tokens[0] == "defn:":
            parentClass = normalizeTypeParams(tokens[1])
            fieldName = normalizeTypeParams(tokens[2])
            fieldType = normalizeTypeParams(tokens[3])
            sourceLocation = tokens[4]
            fieldInfo = (parentClass, fieldName)
            definitionToSourceLocationMap[fieldInfo] = sourceLocation
            definitionToTypeMap[fieldInfo] = fieldType
        elif tokens[0] == "asgn:":
            parentClass = normalizeTypeParams(tokens[1])
            fieldName = normalizeTypeParams(tokens[2])
            if len(tokens) > 3:
                assignValue = tokens[3]
            else:
                assignValue = ""
            fieldInfo = (parentClass, fieldName)
            if not fieldInfo in fieldAssignDict:
                fieldAssignDict[fieldInfo] = set()
            fieldAssignDict[fieldInfo].add(assignValue)
        else:
            print( "unknown line: " + line)

# look for stuff also has a single value
tmp1list = list()
# look for things which have two values - zero and one
tmp2list = list()
for fieldInfo, assignValues in fieldAssignDict.iteritems():
    v0 = fieldInfo[0] + " " + fieldInfo[1]
    v1 = (",".join(assignValues))
    v2 = ""
    if fieldInfo not in definitionToSourceLocationMap:
        continue
    v2 = definitionToSourceLocationMap[fieldInfo]
    if len(assignValues) > 2:
        continue
    if "?" in assignValues:
        continue
    #if len(assignValues - set(["0", "1", "-1", "nullptr"])) > 0:
    #    continue
    # ignore things which are locally declared but are actually redeclarations of things from 3rd party code
    containingClass = fieldInfo[0]
    if containingClass == "_mwmhints":
        continue
    # ignore things which are representations of on-disk structures
    if containingClass in ["SEPr", "WW8Dop", "BmpInfoHeader", "BmpFileHeader", "Exif::ExifIFD",
            "sw::WW8FFData", "FFDataHeader", "INetURLHistory_Impl::head_entry", "ImplPPTParaPropSet", "SvxSwAutoFormatFlags",
            "T602ImportFilter::T602ImportFilter::format602struct", "DataNode"]:
        continue
    if v2.startswith("hwpfilter/source"):
        continue
    # ignore things which are representations of structures from external code
    if v2.startswith("desktop/unx/source/splashx.c"):
        continue
    # Windows-only
    if containingClass in ["SfxAppData_Impl", "sfx2::ImplDdeItem", "SvFileStream",
            "DdeService", "DdeTopic", "DdeItem", "DdeConnection", "connectivity::sdbcx::OUser", "connectivity::sdbcx::OGroup", "connectivity::sdbcx::OCatalog",
            "cairocanvas::SpriteHelper"]:
        continue
    if v2.startswith("include/svl/svdde.hxx") or v2.startswith("embeddedobj/source/inc/oleembobj.hxx"):
        continue
    # Some of our supported compilers don't do constexpr, which means o3tl::typed_flags can't be 'static const'
    if containingClass in ["WaitWindow_Impl"]:
        continue
    if len(assignValues) == 2:
        if "0" in assignValues and "1" in assignValues:
            fieldType = definitionToTypeMap[fieldInfo]
            if not "_Bool" in fieldType and not "enum " in fieldType and not "boolean" in fieldType:
                tmp2list.append((v0,v1,v2,fieldType))
    else:
        tmp1list.append((v0,v1,v2))

# sort results by filename:lineno
def natural_sort_key(s, _nsre=re.compile('([0-9]+)')):
    return [int(text) if text.isdigit() else text.lower()
            for text in re.split(_nsre, s)]
tmp1list.sort(key=lambda v: natural_sort_key(v[2]))
tmp2list.sort(key=lambda v: natural_sort_key(v[2]))

# print out the results
with open("compilerplugins/clang/singlevalfields.results", "wt") as f:
    for v in tmp1list:
        f.write(v[2] + "\n")
        f.write("    " + v[0] + "\n")
        f.write("    " + v[1] + "\n")
with open("compilerplugins/clang/singlevalfields.could-be-bool.results", "wt") as f:
    for v in tmp2list:
        f.write(v[2] + "\n")
        f.write("    " + v[0] + "\n")
        f.write("    " + v[3] + "\n")