#!/usr/bin/env python
__doc__ = '''Subset an existing UFO based on a csv or text list of glyph names or USVs to keep.
'''
__url__ = 'http://github.com/silnrsi/pysilfont'
__copyright__ = 'Copyright (c) 2018 SIL International (http://www.sil.org)'
__license__ = 'Released under the MIT License (http://opensource.org/licenses/MIT)'
__author__ = 'Bob Hallissy'

from silfont.core import execute
from xml.etree import ElementTree as ET
import re

argspec = [
    ('ifont',{'help': 'Input font file'}, {'type': 'infont'}),
    ('ofont',{'help': 'Output font file','nargs': '?' }, {'type': 'outfont'}),
    ('-i','--input',{'help': 'Input csv file'}, {'type': 'incsv'}),
    ('--header', {'help': 'Column header for glyphlist', 'default': 'glyph_name'}, {}),
    ('-l','--log',{'help': 'Log file'}, {'type': 'outfile', 'def': '_subset.log'})]

def doit(args) :
    font = args.ifont
    incsv = args.input
    logger = args.logger
    deflayer = font.deflayer

    # Create mappings to find glyph name from decimal usv:
    dusv2gname = {int(ucode.hex, 16): gname for gname in deflayer for ucode in deflayer[gname]['unicode']}

    # check for headers in the csv
    fl = incsv.firstline
    if fl is None: logger.log("Empty input file", "S")
    numfields = len(fl)
    if numfields == 1 and args.header not in fl:
        dataCol = 0       # Default for plain csv
    elif numfields >= 1:  # Must have headers
        try:
            dataCol = fl.index(args.header)
        except ValueError as e:
            logger.log('Missing csv input field: ' + e.message, 'S')
        except Exception as e:
            logger.log('Error reading csv input field: ' + e.message, 'S')
        next(incsv.reader, None)  # Skip first line with headers in
    else:
        logger.log("Invalid csv file", "S")

    # From the csv, assemble a list of glyphs to process:
    toProcess = set()
    usvRE = re.compile('[0-9a-f]{4,6}',re.IGNORECASE)   # matches 4-6 digit hex
    for r in incsv:
        gname = r[dataCol].strip()
        if usvRE.match(gname):
            # data is USV, not glyph name
            dusv = int(gname,16)
            if dusv in dusv2gname:
                toProcess.add(dusv2gname[dusv])
                continue
            # The USV wasn't in the font... try it as a glyph name
        if gname not in deflayer:
            logger.log("Glyph '%s' not in font; line %d ignored" % (gname, incsv.line_num), 'W')
            continue
        toProcess.add(gname)

    # Generate a complete list of glyphs to keep:
    toKeep = set()
    while len(toProcess):
        gname = toProcess.pop()   # retrieves a random item from the set
        if gname in toKeep:
            continue    # Already processed this one
        toKeep.add(gname)
        
        # If it has any components we haven't already processed, add them to the toProcess list
        for component in deflayer[gname].etree.findall('./outline/component[@base]'):
            cname = component.get('base')
            if cname not in toKeep:
                toProcess.add(cname)

    # Generate a complete list of glyphs to delete:
    toDelete = set(deflayer).difference(toKeep)

    # Remove any glyphs not in the toKeep set
    for gname in toDelete:
        logger.log("Deleting " + gname, "V")
        deflayer.delGlyph(gname)
    assert len(deflayer) == len(toKeep), "len(deflayer) != len(toKeep)"
    logger.log("Retained %d glyphs, deleted %d glyphs." % (len(toKeep), len(toDelete)), "P")

    # Clean up and rebuild sort orders
    libexists = True if "lib" in font.__dict__ else False
    for orderName in ('public.glyphOrder', 'com.schriftgestaltung.glyphOrder'):
        if libexists and orderName in font.lib:
            glyphOrder = font.lib.getval(orderName)  # This is an array
            array = ET.Element("array")
            for gname in glyphOrder:
                if gname in toKeep:
                    ET.SubElement(array, "string").text = gname
            font.lib.setelem(orderName, array)

    # Clean up and rebuild psnames
    if libexists and 'public.postscriptNames' in font.lib:
        psnames = font.lib.getval('public.postscriptNames')  # This is a dict keyed by glyphnames
        dict = ET.Element("dict")
        for gname in psnames:
            if gname in toKeep:
                ET.SubElement(dict, "key").text = gname
                ET.SubElement(dict, "string").text = psnames[gname]
        font.lib.setelem("public.postscriptNames", dict)

    return font

def cmd() : execute("UFO",doit,argspec) 

if __name__ == "__main__": cmd()