#!/usr/bin/env python __doc__ = 'read FTML file and generate LO writer .odt file' __url__ = 'http://github.com/silnrsi/pysilfont' __copyright__ = 'Copyright (c) 2015, SIL International (http://www.sil.org)' __license__ = 'Released under the MIT License (http://opensource.org/licenses/MIT)' __author__ = 'David Rowe' from silfont.core import execute from fontTools import ttLib from xml.etree import ElementTree as ET ### used to parse input FTML (may not be needed if FTML parser used) import re import os import io from odf.opendocument import OpenDocumentText, OpaqueObject from odf.config import ConfigItem, ConfigItemSet from odf.office import FontFaceDecls from odf.style import FontFace, ParagraphProperties, Style, TableCellProperties, TableColumnProperties, TableProperties, TextProperties from odf.svg import FontFaceSrc, FontFaceUri, FontFaceFormat from odf.table import Table, TableCell, TableColumn, TableRow from odf.text import H, P, SequenceDecl, SequenceDecls, Span # specify two parameters: input file (FTML/XML format), output file (ODT format) # preceded by optional log file plus zero or more font strings argspec = [ ('input',{'help': 'Input file in FTML format'}, {'type': 'infile'}), ('output',{'help': 'Output file (LO writer .odt)', 'nargs': '?'}, {'type': 'filename', 'def': '_out.odt'}), ('-l','--log',{'help': 'Log file', 'required': False},{'type': 'outfile', 'def': '_ftml2odt_log.txt'}), ('-r','--report',{'help': 'Set reporting level for log', 'type':str, 'choices':['X','S','E','P','W','I','V']},{}), ('-f','--font',{'help': 'font specification','action': 'append', 'required': False}, {}), ] # RegExs for extracting font name from fontsrc element findfontnamelocal = re.compile(r"""local\( # begin with local( (["']?) # optional open quote (?P[^)]+) # font name \1 # optional matching close quote \)""", re.VERBOSE) # and end with ) findfontnameurl = re.compile(r"""url\( # begin with local( (["']?) # optional open quote (?P[^)]+) # font name \1 # optional matching close quote \)""", re.VERBOSE) # and end with ) fontspec = re.compile(r"""^ # beginning of string (?P[A-Za-z ]+?) # Font Family Name \s*(?PBold)? # Bold \s*(?PItalic)? # Italic \s*(?PRegular)? # Regular $""", re.VERBOSE) # end of string # RegEx for extracting feature(s) from feats attribute of style element onefeat = re.compile(r"""^\s* '(?P[^']+)'\s* # feature tag (?P[^', ]+)\s* # feature value ,?\s* # optional comma (?P.*) # rest of line (with zero or more tag-value pairs) $""", re.VERBOSE) # RegEx for extracting language (and country) from lang attribute of style element langcode = re.compile(r"""^ (?P[A-Za-z]+) # language name (- # (optional) hyphen and (?P[A-Za-z]+) # country name (-[A-Za-z0-9][-A-Za-z0-9]*)? # (optional) hyphen and other codes )?$""", re.VERBOSE) # RegEx to extract hex value from \uxxxxxx and function to generate Unicode character # use to change string to newstring: # newstring = re.sub(backu, hextounichr, string) # or newstring = re.sub(backu, lambda m: unichr(int(m.group(1),16)), string) backu = re.compile(r"\\u([0-9a-fA-F]{4,6})") def hextounichr(match): return chr(int(match.group(1),16)) def BoldItalic(bold, italic): rs = "" if bold: rs += " Bold" if italic: rs += " Italic" return rs def parsefeats(inputline): featdic = {} while inputline != "": results = re.match(onefeat, inputline) if results: featdic[results.group('featname')] = results.group('featval') inputline = results.group('remainder') else: break ### warning about unrecognized feature string: inputline return ":" + "&".join( [f + '=' + featdic[f] for f in sorted(featdic)]) def getfonts(fontsourcestrings, logfile, fromcommandline=True): fontlist = [] checkfontfamily = [] checkembeddedfont = [] for fs in fontsourcestrings: if not fromcommandline: # from FTML either local() or url() installed = True # Assume locally installed font results = re.match(findfontnamelocal, fs) fontstring = results.group('fontstring') if results else None if fontstring == None: installed = False results = re.match(findfontnameurl, fs) fontstring = results.group('fontstring') if results else None if fontstring == None: logfile.log("Invalid font specification: " + fs, "S") else: # from command line fontstring = fs if "." in fs: # must be a filename installed = False else: # must be an installed font installed = True if installed: # get name, bold and italic info from string results = re.match(fontspec, fontstring.strip()) if results: fontname = results.group('rest') bold = results.group('bold') != None italic = results.group('italic') != None fontlist.append( (fontname, bold, italic, None) ) if (fontname, bold, italic) in checkfontfamily: logfile.log("Duplicate font specification: " + fs, "W") ### or more severe? else: checkfontfamily.append( (fontname, bold, italic) ) else: logfile.log("Invalid font specification: " + fontstring.strip(), "E") else: try: # peek inside the font for the name, weight, style f = ttLib.TTFont(fontstring) # take name from name table, NameID 1, platform ID 3, Encoding ID 1 (possible fallback platformID 1, EncodingID =0) n = f['name'] # name table from font fontname = n.getName(1,3,1).toUnicode() # nameID 1 = Font Family name # take bold and italic info from OS/2 table, fsSelection bits 0 and 5 o = f['OS/2'] # OS/2 table italic = (o.fsSelection & 1) > 0 bold = (o.fsSelection & 32) > 0 fontlist.append( (fontname, bold, italic, fontstring) ) if (fontname, bold, italic) in checkfontfamily: logfile.log("Duplicate font specification: " + fs + BoldItalic(bold, italic), "W") ### or more severe? else: checkfontfamily.append( (fontname, bold, italic) ) if (os.path.basename(fontstring)) in checkembeddedfont: logfile.log("Duplicate embedded font: " + fontstring, "W") ### or more severe? else: checkembeddedfont.append(os.path.basename(fontstring)) except IOError: logfile.log("Unable to find font file to embed: " + fontstring, "E") except fontTools.ttLib.TTLibError: logfile.log("File is not a valid font: " + fontstring, "E") except: logfile.log("Error occurred while checking font: " + fontstring, "E") # some other error return fontlist def init(LOdoc, numfonts=1): totalwid = 6800 #6.8inches #compute column widths f = min(numfonts,4) ashare = 4*(6-f) dshare = 2*(6-f) bshare = 100 - 2*ashare - dshare awid = totalwid * ashare // 100 dwid = totalwid * dshare // 100 bwid = totalwid * bshare // (numfonts * 100) # create styles for table, for columns (one style for each column width) # and for one cell (used for everywhere except where background changed) tstyle = Style(name="Table1", family="table") tstyle.addElement(TableProperties(attributes={'width':str(totalwid/1000.)+"in", 'align':"left"})) LOdoc.automaticstyles.addElement(tstyle) tastyle = Style(name="Table1.A", family="table-column") tastyle.addElement(TableColumnProperties(attributes={'columnwidth':str(awid/1000.)+"in"})) LOdoc.automaticstyles.addElement(tastyle) tbstyle = Style(name="Table1.B", family="table-column") tbstyle.addElement(TableColumnProperties(attributes={'columnwidth':str(bwid/1000.)+"in"})) LOdoc.automaticstyles.addElement(tbstyle) tdstyle = Style(name="Table1.D", family="table-column") tdstyle.addElement(TableColumnProperties(attributes={'columnwidth':str(dwid/1000.)+"in"})) LOdoc.automaticstyles.addElement(tdstyle) ta1style = Style(name="Table1.A1", family="table-cell") ta1style.addElement(TableCellProperties(attributes={'padding':"0.035in", 'border':"0.05pt solid #000000"})) LOdoc.automaticstyles.addElement(ta1style) # text style used with non- text t1style = Style(name="T1", family="text") t1style.addElement(TextProperties(attributes={'color':"#999999" })) LOdoc.automaticstyles.addElement(t1style) # create styles for Title, Subtitle tstyle = Style(name="Title", family="paragraph") tstyle.addElement(TextProperties(attributes={'fontfamily':"Arial",'fontsize':"24pt",'fontweight':"bold" })) LOdoc.styles.addElement(tstyle) ststyle = Style(name="Subtitle", family="paragraph") ststyle.addElement(TextProperties(attributes={'fontfamily':"Arial",'fontsize':"18pt",'fontweight':"bold" })) LOdoc.styles.addElement(ststyle) def doit(args) : logfile = args.logger if args.report: logfile.loglevel = args.report try: root = ET.parse(args.input).getroot() except: logfile.log("Error parsing FTML input", "S") if args.font: # font(s) specified on command line fontlist = getfonts( args.font, logfile ) else: # get font spec from FTML fontsrc element fontlist = getfonts( [root.find("./head/fontsrc").text], logfile, False ) #fontlist = getfonts( [fs.text for fs in root.findall("./head/fontsrc")], False ) ### would allow multiple fontsrc elements numfonts = len(fontlist) if numfonts == 0: logfile.log("No font(s) specified", "S") if numfonts > 1: formattedfontnum = ["{0:02d}".format(n) for n in range(numfonts)] else: formattedfontnum = [""] logfile.log("Font(s) specified:", "V") for n, (fontname, bold, italic, embeddedfont) in enumerate(fontlist): logfile.log(" " + formattedfontnum[n] + " " + fontname + BoldItalic(bold, italic) + " " + str(embeddedfont), "V") # get optional fontscale; compute pointsize as int(12*fontscale/100). If result xx is not 12, then add "fo:font-size=xxpt" in Px styles pointsize = 12 fontscaleel = root.find("./head/fontscale") if fontscaleel != None: fontscale = fontscaleel.text try: pointsize = int(int(fontscale)*12/100) except ValueError: # any problem leaves pointsize 12 logfile.log("Problem with fontscale value; defaulting to 12 point", "W") # Get FTML styles and generate LO writer styles # P2 is paragraph style for string element when no features specified # each Px (for P3...) corresponds to an FTML style, which specifies lang or feats or both # if numfonts > 1, two-digit font number is appended to make an LO writer style for each FTML style + font combo # When LO writer style is used with attribute rtl="True", "R" appended to style name LOstyles = {} ftmlstyles = {} Pstylenum = 2 LOstyles["P2"] = ("", None, None) ftmlstyles[0] = "P2" for s in root.findall("./head/styles/style"): Pstylenum += 1 Pnum = "P" + str(Pstylenum) featstring = "" if s.get('feats'): featstring = parsefeats(s.get('feats')) langname = None countryname = None lang = s.get('lang') if lang != None: x = re.match(langcode, lang) langname = x.group('langname') countryname = x.group('countryname') # FTML element @stylename attribute references this