#!/usr/bin/env python 'Composite glyph definition' __url__ = 'http://github.com/silnrsi/pysilfont' __copyright__ = 'Copyright (c) 2015 SIL International (http://www.sil.org)' __license__ = 'Released under the MIT License (http://opensource.org/licenses/MIT)' __author__ = 'David Rowe' import re from xml.etree import ElementTree as ET # REs to parse (from right to left) comment, SIL extension parameters, markinfo, UID, metrics, # and (from left) glyph name # Extract comment from end of line (NB: Doesn't use re.VERBOSE because it contains #.) # beginning of line, optional whitespace, remainder, optional whitespace, comment to end of line inputline=re.compile(r"""^\s*(?P.*?)(\s*#\s*(?P.*))?$""") # Parse SIL extension parameters in [...], but only after | paraminfo=re.compile(r"""^\s* (?P[^|]* ($| \|[^[]*$| \|[^[]*\[(?P[^]]*)\])) \s*$""",re.VERBOSE) # Parse markinfo markinfo=re.compile(r"""^\s* (?P[^!]*?) \s* (?:!\s*(?P[.0-9]+(?:,[ .0-9]+){3}))? # ! markinfo (?P[^!]*?) \s*$""",re.VERBOSE) # Parse uid uidinfo=re.compile(r"""^\s* (?P[^|]*?) \s* (?:\|\s*(?P[^^!]*)?)? # | followed by nothing, or 4- to 6-digit UID (?P[^|]*?) \s*$""",re.VERBOSE) # Parse metrics metricsinfo=re.compile(r"""^\s* (?P[^^]*?) \s* (?:\^\s*(?P[-0-9]+\s*(?:,\s*[-0-9]+)?))? # metrics (either ^x,y or ^a) (?P[^^]*?) \s*$""",re.VERBOSE) # Parse glyph information (up to =) glyphdef=re.compile(r"""^\s* (?P[._A-Za-z][._A-Za-z0-9-]*) # glyphname \s*=\s* (?P.*?) \s*$""",re.VERBOSE) # break tokens off the right hand side from right to left and finally off left hand side (up to =) initialtokens=[ (inputline, 'commenttext', ""), (paraminfo, 'paraminfo', "Error parsing parameters in [...]"), (markinfo, 'markinfo', "Error parsing information after !"), (uidinfo, 'UID', "Error parsing information after |"), (metricsinfo, 'metrics', "Error parsing information after ^"), (glyphdef, 'PSName', "Error parsing glyph name before =") ] # Parse base and diacritic information compdef=re.compile(r"""^\s* (?P[._A-Za-z][._A-Za-z0-9-]*) # name of base or diacritic in composite definition (?:@ # @ precedes position information (?:(?:\s*(?P[^: ]+)):)? # optional base glyph followed by : \s* (?P(?:[^ +&[])+) # position information (delimited by space + & [ or end of line) \s*)? # end of @ clause \s* (?:\[(?P[^]]*)\])? # parameters inside [..] \s* (?P.*)$ """,re.VERBOSE) # Parse metrics lsb_rsb=re.compile(r"""^\s* (?P[-0-9]+)\s*(?:,\s*(?P[-0-9]+))? # optional metrics (either ^lsb,rsb or ^adv) \s*$""",re.VERBOSE) # RE to break off one key=value parameter from text inside [key=value;key=value;key=value] paramdef=re.compile(r"""^\s* (?P[a-z0-9]+) # paramname \s*=\s* # = (with optional white space before/after) (?P[^;]+?) # any text up to ; or end of string \s* # optional whitespace (?:;\s*(?P.+)$|\s*$) # either ; and (non-empty) rest of parameters, or end of line """,re.VERBOSE) class CompGlyph(object): def __init__(self, CDelement=None, CDline=None): self.CDelement = CDelement self.CDline = CDline def _parseparams(self, rest): """Parse a parameter line such as: key1=value1;key2=value2 and return a dictionary with key:value pairs. """ params = {} while rest: matchparam=re.match(paramdef,rest) if matchparam == None: raise ValueError("Parameter error: " + rest) params[matchparam.group('paramname')] = matchparam.group('paramval') rest = matchparam.group('rest') return(params) def parsefromCDline(self): """Parse the composite glyph information (in self.CDline) such as: LtnCapADiear = LtnCapA + CombDiaer@U |00C4 ! 1, 0, 0, 1 # comment and return a element (in self.CDelement) comment Position info after @ can include optional base glyph name followed by colon. """ line = self.CDline results = {} for parseinfo in initialtokens: if len(line) > 0: regex, groupname, errormsg = parseinfo matchresults = re.match(regex,line) if matchresults == None: raise ValueError(errormsg) line = matchresults.group('remainder') resultsval = matchresults.group(groupname) if resultsval != None: results[groupname] = resultsval.strip() if groupname == 'paraminfo': # paraminfo match needs to be removed from remainder line = line.rstrip('['+resultsval+']') if 'remainder2' in matchresults.groupdict().keys(): line += ' ' + matchresults.group('remainder2') # At this point results optionally may contain entries for any of 'commenttext', 'paraminfo', 'markinfo', 'UID', or 'metrics', # but it must have 'PSName' if any of 'paraminfo', 'markinfo', 'UID', or 'metrics' present note = results.pop('commenttext', None) if 'PSName' not in results: if len(results) > 0: raise ValueError("Missing glyph name") else: # comment only, or blank line return None dic = {} UIDpresent = 'UID' in results if UIDpresent and results['UID'] == '': results.pop('UID') if 'paraminfo' in results: paramdata = results.pop('paraminfo') if UIDpresent: dic = self._parseparams(paramdata) else: line += " [" + paramdata + "]" mark = results.pop('markinfo', None) if 'metrics' in results: m = results.pop('metrics') matchmetrics = re.match(lsb_rsb,m) if matchmetrics == None: raise ValueError("Error in parameters: " + m) elif matchmetrics.group('rsb'): metricdic = {'lsb': matchmetrics.group('lsb'), 'rsb': matchmetrics.group('rsb')} else: metricdic = {'advance': matchmetrics.group('lsb')} else: metricdic = None # Create element and assign attributes g = ET.Element('glyph',attrib=results) if note: # note from commenttext becomes subelement n = ET.SubElement(g,'note') n.text = note.rstrip() # markinfo becomes subelement if mark: p = ET.SubElement(g, 'property', name = 'mark', value = mark) # paraminfo parameters (now in dic) become subelements if dic: for key in dic: p = ET.SubElement(g, 'property', name = key, value = dic[key]) # metrics parameters (now in metricdic) become subelements if metricdic: for key in metricdic: k = ET.SubElement(g, key, width=metricdic[key]) # Prepare to parse remainder of line prevbase = None prevdiac = None remainder = line expectingdiac = False # top of loop to process remainder of line, breaking off base or diacritics from left to right while remainder != "": matchresults=re.match(compdef,remainder) if matchresults == None or matchresults.group('compname') == "" : raise ValueError("Error parsing glyph name: " + remainder) propdic = {} if matchresults.group('params'): propdic = self._parseparams(matchresults.group('params')) base = matchresults.group('base') position = matchresults.group('position') if expectingdiac: # Determine parent element, based on previous base and diacritic glyphs and optional # matchresults.group('base'), indicating diacritic attaches to a different glyph if base == None: if prevdiac != None: parent = prevdiac else: parent = prevbase elif base != prevbase.attrib['PSName']: raise ValueError("Error in diacritic alternate base glyph: " + base) else: parent = prevbase if prevdiac == None: raise ValueError("Unnecessary diacritic alternate base glyph: " + base) # Because 'with' is Python reserved word, passing it directly as a parameter # causes Python syntax error, so build dictionary to pass to SubElement att = {'PSName': matchresults.group('compname')} if position: if 'with' in propdic: withval = propdic.pop('with') else: withval = "_" + position att['at'] = position att['with'] = withval # Create subelement e = ET.SubElement(parent, 'attach', attrib=att) prevdiac = e elif (base or position): raise ValueError("Position information on base glyph not supported") else: # Create subelement e = ET.SubElement(g, 'base', PSName=matchresults.group('compname')) prevbase = e prevdiac = None if 'shift' in propdic: xval, yval = propdic.pop('shift').split(',') s = ET.SubElement(e, 'shift', x=xval, y=yval) # whatever parameters are left in propdic become subelements for key, val in propdic.items(): p = ET.SubElement(e, 'property', name=key, value=val) remainder = matchresults.group('remainder').lstrip() nextchar = remainder[:1] remainder = remainder[1:].lstrip() expectingdiac = nextchar == '+' if nextchar == '&' or nextchar == '+': if len(remainder) == 0: raise ValueError("Expecting glyph name after & or +") elif len(nextchar) > 0: raise ValueError("Expecting & or + and found " + nextchar) self.CDelement = g def _diacinfo(self, node, parent, lastglyph): """receives attach element, PSName of its parent, PSName of most recent glyph returns a string equivalent of this node (and all its descendants) and a string with the name of the most recent glyph """ diacname = node.get('PSName') atstring = node.get('at') withstring = node.get('with') propdic = {} if withstring != "_" + atstring: propdic['with'] = withstring subattachlist = [] attachglyph = "" if parent != lastglyph: attachglyph = parent + ":" for subelement in node: if subelement.tag == 'property': propdic[subelement.get('name')] = subelement.get('value') elif subelement.tag == 'attach': subattachlist.append(subelement) elif subelement.tag == 'shift': propdic['shift'] = subelement.get('x') + "," + subelement.get('y') # else flag error/warning? propstring = "" if propdic: propstring += " [" + ";".join( [k + "=" + v for k,v in propdic.items()] ) + "]" returnstring = " + " + diacname + "@" + attachglyph + atstring + propstring prevglyph = diacname for s in subattachlist: string, prevglyph = self._diacinfo(s, diacname, prevglyph) returnstring += string return returnstring, prevglyph def _basediacinfo(self, baseelement): """receives base element and returns a string equivalent of this node (and all its desendants)""" basename = baseelement.get('PSName') returnstring = basename prevglyph = basename bpropdic = {} for child in baseelement: if child.tag == 'attach': string, prevglyph = self._diacinfo(child, basename, prevglyph) returnstring += string elif child.tag == 'shift': bpropdic['shift'] = child.get('x') + "," + child.get('y') if bpropdic: returnstring += " [" + ";".join( [k + "=" + v for k,v in bpropdic.items()] ) + "]" return returnstring def parsefromCDelement(self): """Parse a glyph element such as: i tilde grave dot-below and produce the equivalent CDline in format: LtnSmITildeGraveDotBlw = LtnSmDotlessI + CombDotBlw@L + CombTilde@LtnSmDotlessI:U + CombGrave@U | E000 # i tilde grave dot-below """ g = self.CDelement lsb = None rsb = None adv = None markinfo = None note = None paramdic = {} outputline = [g.get('PSName')] resultUID = g.get('UID') basesep = " = " for child in g: if child.tag == 'note': note = child.text elif child.tag == 'property': if child.get('name') == 'mark': markinfo = child.get('value') else: paramdic[child.get('name')] = child.get('value') elif child.tag == 'lsb': lsb = child.get('width') elif child.tag == 'rsb': rsb = child.get('width') elif child.tag == 'advance': adv = child.get('width') elif child.tag == 'base': outputline.extend([basesep, self._basediacinfo(child)]) basesep = " & " if paramdic and resultUID == None: resultUID = " " # to force output of | if adv: outputline.extend([' ^', adv]) if lsb and rsb: outputline.extend([' ^', lsb, ',', rsb]) if resultUID: outputline.extend([' |', resultUID]) if markinfo: outputline.extend([' !', markinfo]) if paramdic: paramsep = " [" for k in paramdic: outputline.extend([paramsep, k, "=", paramdic[k]]) paramsep = ";" outputline.append("]") if note: outputline.extend([" # ", note]) self.CDline = "".join(outputline)