diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-03-02 20:01:10 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-03-02 20:01:10 +0000 |
commit | da875fcb62c801b8d19b3d4d984ad963574fb356 (patch) | |
tree | 3d85503747c56c2a387b291524442946f4bebb73 /lib/silfont/scripts/psfcheckglyphinventory.py | |
parent | Initial commit. (diff) | |
download | pysilfont-upstream/1.6.0.tar.xz pysilfont-upstream/1.6.0.zip |
Adding upstream version 1.6.0.upstream/1.6.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'lib/silfont/scripts/psfcheckglyphinventory.py')
-rw-r--r-- | lib/silfont/scripts/psfcheckglyphinventory.py | 125 |
1 files changed, 125 insertions, 0 deletions
diff --git a/lib/silfont/scripts/psfcheckglyphinventory.py b/lib/silfont/scripts/psfcheckglyphinventory.py new file mode 100644 index 0000000..37a5ffc --- /dev/null +++ b/lib/silfont/scripts/psfcheckglyphinventory.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python +__doc__ = '''Warn for differences in glyph inventory and encoding between UFO and input file (e.g., glyph_data.csv). +Input file can be: + - simple text file with one glyph name per line + - csv file with headers, using headers "glyph_name" and, if present, "USV"''' +__url__ = 'http://github.com/silnrsi/pysilfont' +__copyright__ = 'Copyright (c) 2020 SIL International (http://www.sil.org)' +__license__ = 'Released under the MIT License (http://opensource.org/licenses/MIT)' +__author__ = 'Bob Hallissy' + +from silfont.core import execute + +argspec = [ + ('ifont', {'help': 'Input UFO'}, {'type': 'infont'}), + ('-i', '--input', {'help': 'Input text file, default glyph_data.csv in current directory', 'default': 'glyph_data.csv'}, {'type': 'incsv'}), + ('--indent', {'help': 'size of indent (default 10)', 'type': int, 'default': 10}, {}), + ('-l', '--log', {'help': 'Log file'}, {'type': 'outfile', 'def': '_checkinventory.log'})] + +def doit(args): + font = args.ifont + incsv = args.input + logger = args.logger + indent = ' '*args.indent + + if not (args.quiet or 'scrlevel' in args.paramsobj.sets['command line']): + logger.raisescrlevel('W') # Raise level to W if not already W or higher + + def csvWarning(msg, exception=None): + m = f'glyph_data line {incsv.line_num}: {msg}' + if exception is not None: + m += '; ' + exception.message + logger.log(m, 'W') + + # Get glyph names and encoding from input file + glyphFromCSVuid = {} + uidFromCSVglyph = {} + + # Identify file format (plain text or csv) from first line + # If csv file, it must have headers for "glyph_name" and "USV" + fl = incsv.firstline + if fl is None: logger.log('Empty input file', 'S') + numfields = len(fl) + incsv.numfields = numfields + usvCol = None # Use this as a flag later to determine whether to check USV inventory + if numfields > 1: # More than 1 column, so must have headers + # Required columns: + try: + nameCol = fl.index('glyph_name'); + except ValueError as e: + logger.log('Missing csv input field: ' + e.message, 'S') + except Exception as e: + logger.log('Error reading csv input field: ' + e.message, 'S') + # Optional columns: + usvCol = fl.index('USV') if 'USV' in fl else None + + next(incsv.reader, None) # Skip first line with headers in + + glyphList = set() + for line in incsv: + gname = line[nameCol] + if len(gname) == 0 or line[0].strip().startswith('#'): + continue # No need to include cases where name is blank or comment + glyphList.add(gname) + + # Process USV + # could be empty string, a single USV or space-separated list of USVs + try: + uidList = [int(x, 16) for x in line[usvCol].split()] + except Exception as e: + csvWarning("invalid USV '%s' (%s); ignored: " % (line[usvCol], e.message)) + uidList = [] + if len(uidList) == 1: + # Handle simple encoded glyphs + uid = uidList[0] + if uid in glyphFromCSVuid: + csvWarning('USV %04X previously seen; ignored' % uid) + uidList = [] + else: + # Remember this glyph + glyphFromCSVuid[uid] = gname + uidFromCSVglyph[gname] = uid + elif numfields == 1: # Simple text file. + glyphList = set(line[0] for line in incsv) + else: + logger.log('Invalid csv file', 'S') + + # Get the list of glyphs in the UFO + ufoList = set(font.deflayer.keys()) + + notInUFO = glyphList - ufoList + notInGlyphData = ufoList - glyphList + + if len(notInUFO): + logger.log('Glyphs present in glyph_data but missing from UFO:\n' + '\n'.join(indent + g for g in sorted(notInUFO)), 'W') + + if len(notInGlyphData): + logger.log('Glyphs present in UFO but missing from glyph_data:\n' + '\n'.join(indent + g for g in sorted(notInGlyphData)), 'W') + + if len(notInUFO) == 0 and len(notInGlyphData) == 0: + logger.log('No glyph inventory differences found', 'P') + + if usvCol: + # We cam check USV inventory of glyphs in common + inBoth = glyphList & ufoList # Glyphs we want to examine + csvEncodings = set(f'{gname}|{uidFromCSVglyph[gname]:04X}' for gname in filter(lambda x: x in uidFromCSVglyph, inBoth)) + ufoEncodings = set(f'{gname}|{int(x.hex, 16):04X}' for gname in inBoth for x in font.deflayer[gname]['unicode']) + + notInUFO = csvEncodings - ufoEncodings + notInGlyphData = ufoEncodings - csvEncodings + + if len(notInUFO): + logger.log('Encodings present in glyph_data but missing from UFO:\n' + '\n'.join(indent + g for g in sorted(notInUFO)), 'W') + + if len(notInGlyphData): + logger.log('Encodings present in UFO but missing from glyph_data:\n' + '\n'.join(indent + g for g in sorted(notInGlyphData)), 'W') + + if len(notInUFO) == 0 and len(notInGlyphData) == 0: + logger.log('No glyph encoding differences found', 'P') + + else: + logger.log('Glyph encodings not compared', 'P') + + +def cmd(): execute('UFO', doit, argspec) +if __name__ == '__main__': cmd() |