#!/usr/bin/env python3
'''verify classes defined in xml have correct ordering where needed

Looks for comment lines in the classes.xml file that match the string:
  *NEXT n CLASSES MUST MATCH*
where n is the number of upcoming class definitions that must result in the
same glyph alignment when glyph names are sorted by TTF order (as described
in the glyph_data.csv file).
'''
__url__ = 'http://github.com/silnrsi/pysilfont'
__copyright__ = 'Copyright (c) 2019 SIL International  (http://www.sil.org)'
__license__ = 'Released under the MIT License (http://opensource.org/licenses/MIT)'
__author__ = 'Bob Hallissy'

import re
import types
from xml.etree import ElementTree as ET
from silfont.core import execute

argspec = [
    ('classes', {'help': 'class definition in XML format', 'nargs': '?', 'default': 'classes.xml'}, {'type': 'infile'}),
    ('glyphdata', {'help': 'Glyph info csv file', 'nargs': '?', 'default': 'glyph_data.csv'}, {'type': 'incsv'}),
    ('--gname', {'help': 'Column header for glyph name', 'default': 'glyph_name'}, {}),
    ('--sort', {'help': 'Column header(s) for sort order', 'default': 'sort_final'}, {}),
]

# Dictionary of glyphName : sortValue
sorts = dict()

# Keep track of glyphs mentioned in classes but not in glyph_data.csv
missingGlyphs = set()

def doit(args):
    logger = args.logger

    # Read input csv to get glyph sort order
    incsv = args.glyphdata
    fl = incsv.firstline
    if fl is None: logger.log("Empty input file", "S")
    if args.gname in fl:
        glyphnpos = fl.index(args.gname)
    else:
        logger.log("No" + args.gname + "field in csv headers", "S")
    if args.sort in fl:
        sortpos = fl.index(args.sort)
    else:
        logger.log('No "' + args.sort + '" heading in csv headers"', "S")
    next(incsv.reader, None)  # Skip first line with containing headers
    for line in incsv:
        glyphn = line[glyphnpos]
        if len(glyphn) == 0:
            continue	# No need to include cases where name is blank
        sorts[glyphn] = float(line[sortpos])

    # RegEx we are looking for in comments
    matchCountRE = re.compile("\*NEXT ([1-9]\d*) CLASSES MUST MATCH\*")

    # parse classes.xml but include comments
    class MyTreeBuilder(ET.TreeBuilder):
        def comment(self, data):
            res = matchCountRE.search(data)
            if res:
                # record the count of classes that must match
                self.start(ET.Comment, {})
                self.data(res.group(1))
                self.end(ET.Comment)
    doc = ET.parse(args.classes, parser=ET.XMLParser(target=MyTreeBuilder())).getroot()

    # process results looking for both class elements and specially formatted comments
    matchCount = 0
    refClassList = None
    refClassName = None

    for child in doc:
        if isinstance(child.tag, types.FunctionType):
            # Special type used for comments
            if matchCount > 0:
                logger.log("Unexpected match request '{}': matching {} is not yet complete".format(child.text, refClassName), "E")
                ref = None
            matchCount = int(child.text)
            # print "Match count = {}".format(matchCount)

        elif child.tag == 'class':
            l = orderClass(child, logger)  # Do this so we record classes whether we match them or not.
            if matchCount > 0:
                matchCount -= 1
                className = child.attrib['name']
                if refClassName is None:
                    refClassList = l
                    refLen = len(refClassList)
                    refClassName = className
                else:
                    # compare ref list and l
                    if len(l) != refLen:
                        logger.log("Class {} (length {}) and {} (length {}) have unequal length".format(refClassName, refLen, className, len(l)), "E")
                    else:
                        errCount = 0
                        for i in range(refLen):
                            if l[i][0] != refClassList[i][0]:
                                logger.log ("Class {} and {} inconsistent order glyphs {} and {}".format(refClassName, className, refClassList[i][2], l[i][2]), "E")
                                errCount += 1
                                if errCount > 5:
                                    logger.log ("Abandoning compare between Classes {} and {}".format(refClassName, className), "E")
                                    break
                if matchCount == 0:
                    refClassName = None

    # List glyphs mentioned in classes.xml but not present in glyph_data:
    if len(missingGlyphs):
        logger.log('Glyphs mentioned in classes.xml but not present in glyph_data: ' + ', '.join(sorted(missingGlyphs)), 'W')


classes = {}  # Keep record of all classes we've seen so we can flatten references

def orderClass(classElement, logger):
    # returns a list of tuples, each containing (indexWithinClass, sortOrder, glyphName)
    # list is sorted by sortOrder
    glyphList = classElement.text.split()
    res = []
    for i in range(len(glyphList)):
        token = glyphList[i]
        if token.startswith('@'):
            # Nested class
            cname = token[1:]
            if cname in classes:
                res.extend(classes[cname])
            else:
                logger.log("Invalid fea: class {} referenced before being defined".format(cname),"S")
        else:
            # simple glyph name -- make sure it is in glyph_data:
            if token in sorts:
                res.append((i, sorts[token], token))
            else:
                missingGlyphs.add(token)

    classes[classElement.attrib['name']] = res
    return sorted(res, key=lambda x: x[1])


def cmd() : execute(None,doit,argspec)
if __name__ == "__main__": cmd()