summaryrefslogtreecommitdiffstats
path: root/lib/silfont/scripts/psfcheckglyphinventory.py
blob: 37a5ffc121dd88c198ffd7b156058d4d7daf19da (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/usr/bin/env python
__doc__ = '''Warn for differences in glyph inventory and encoding between UFO and input file (e.g., glyph_data.csv). 
Input file can be: 
    - simple text file with one glyph name per line
    - csv file with headers, using headers "glyph_name" and, if present, "USV"'''
__url__ = 'http://github.com/silnrsi/pysilfont'
__copyright__ = 'Copyright (c) 2020 SIL International (http://www.sil.org)'
__license__ = 'Released under the MIT License (http://opensource.org/licenses/MIT)'
__author__ = 'Bob Hallissy'

from silfont.core import execute

argspec = [
    ('ifont', {'help': 'Input UFO'}, {'type': 'infont'}),
    ('-i', '--input', {'help': 'Input text file, default glyph_data.csv in current directory', 'default': 'glyph_data.csv'}, {'type': 'incsv'}),
    ('--indent', {'help': 'size of indent (default 10)', 'type': int, 'default': 10}, {}),
    ('-l', '--log', {'help': 'Log file'}, {'type': 'outfile', 'def': '_checkinventory.log'})]

def doit(args):
    font = args.ifont
    incsv = args.input
    logger = args.logger
    indent = ' '*args.indent

    if not (args.quiet or 'scrlevel' in args.paramsobj.sets['command line']):
        logger.raisescrlevel('W')  # Raise level to W if not already W or higher

    def csvWarning(msg, exception=None):
        m = f'glyph_data line {incsv.line_num}: {msg}'
        if exception is not None:
            m += '; ' + exception.message
        logger.log(m, 'W')

    # Get glyph names and encoding from input file
    glyphFromCSVuid = {}
    uidFromCSVglyph = {}

    # Identify file format (plain text or csv) from first line
    # If csv file, it must have headers for "glyph_name" and "USV"
    fl = incsv.firstline
    if fl is None: logger.log('Empty input file', 'S')
    numfields = len(fl)
    incsv.numfields = numfields
    usvCol = None  # Use this as a flag later to determine whether to check USV inventory
    if numfields > 1:  # More than 1 column, so must have headers
        # Required columns:
        try:
            nameCol = fl.index('glyph_name');
        except ValueError as e:
            logger.log('Missing csv input field: ' + e.message, 'S')
        except Exception as e:
            logger.log('Error reading csv input field: ' + e.message, 'S')
        # Optional columns:
        usvCol = fl.index('USV') if 'USV' in fl else None

        next(incsv.reader, None)  # Skip first line with headers in

        glyphList = set()
        for line in incsv:
            gname = line[nameCol]
            if len(gname) == 0 or line[0].strip().startswith('#'):
                continue    # No need to include cases where name is blank or comment
            glyphList.add(gname)

            # Process USV
            # could be empty string, a single USV or space-separated list of USVs
            try:
                uidList = [int(x, 16) for x in line[usvCol].split()]
            except Exception as e:
                csvWarning("invalid USV '%s' (%s); ignored: " % (line[usvCol], e.message))
                uidList = []
            if len(uidList) == 1:
                # Handle simple encoded glyphs
                uid = uidList[0]
                if uid in glyphFromCSVuid:
                    csvWarning('USV %04X previously seen; ignored' % uid)
                    uidList = []
                else:
                    # Remember this glyph
                    glyphFromCSVuid[uid] = gname
                    uidFromCSVglyph[gname] = uid
    elif numfields == 1:   # Simple text file.
        glyphList = set(line[0] for line in incsv)
    else:
        logger.log('Invalid csv file', 'S')

    # Get the list of glyphs in the UFO
    ufoList = set(font.deflayer.keys())

    notInUFO = glyphList - ufoList
    notInGlyphData = ufoList - glyphList

    if len(notInUFO):
        logger.log('Glyphs present in glyph_data but missing from UFO:\n' + '\n'.join(indent + g for g in sorted(notInUFO)), 'W')

    if len(notInGlyphData):
        logger.log('Glyphs present in UFO but missing from glyph_data:\n' + '\n'.join(indent + g for g in sorted(notInGlyphData)), 'W')

    if len(notInUFO) == 0 and len(notInGlyphData) == 0:
        logger.log('No glyph inventory differences found', 'P')

    if usvCol:
        # We cam check USV inventory of glyphs in common
        inBoth = glyphList & ufoList   # Glyphs we want to examine
        csvEncodings = set(f'{gname}|{uidFromCSVglyph[gname]:04X}' for gname in filter(lambda x: x in uidFromCSVglyph, inBoth))
        ufoEncodings = set(f'{gname}|{int(x.hex, 16):04X}' for gname in inBoth for x in font.deflayer[gname]['unicode'])

        notInUFO = csvEncodings - ufoEncodings
        notInGlyphData = ufoEncodings - csvEncodings

        if len(notInUFO):
            logger.log('Encodings present in glyph_data but missing from UFO:\n' + '\n'.join(indent + g for g in sorted(notInUFO)), 'W')

        if len(notInGlyphData):
            logger.log('Encodings present in UFO but missing from glyph_data:\n' + '\n'.join(indent + g for g in sorted(notInGlyphData)), 'W')

        if len(notInUFO) == 0 and len(notInGlyphData) == 0:
            logger.log('No glyph encoding differences found', 'P')

    else:
        logger.log('Glyph encodings not compared', 'P')


def cmd(): execute('UFO', doit, argspec)
if __name__ == '__main__': cmd()