From da875fcb62c801b8d19b3d4d984ad963574fb356 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Thu, 2 Mar 2023 21:01:10 +0100 Subject: Adding upstream version 1.6.0. Signed-off-by: Daniel Baumann --- lib/silfont/scripts/psfftml2odt.py | 453 +++++++++++++++++++++++++++++++++++++ 1 file changed, 453 insertions(+) create mode 100755 lib/silfont/scripts/psfftml2odt.py (limited to 'lib/silfont/scripts/psfftml2odt.py') diff --git a/lib/silfont/scripts/psfftml2odt.py b/lib/silfont/scripts/psfftml2odt.py new file mode 100755 index 0000000..49777e4 --- /dev/null +++ b/lib/silfont/scripts/psfftml2odt.py @@ -0,0 +1,453 @@ +#!/usr/bin/env python +__doc__ = 'read FTML file and generate LO writer .odt file' +__url__ = 'http://github.com/silnrsi/pysilfont' +__copyright__ = 'Copyright (c) 2015, SIL International (http://www.sil.org)' +__license__ = 'Released under the MIT License (http://opensource.org/licenses/MIT)' +__author__ = 'David Rowe' + +from silfont.core import execute +from fontTools import ttLib +from xml.etree import ElementTree as ET ### used to parse input FTML (may not be needed if FTML parser used) +import re +import os +import io +from odf.opendocument import OpenDocumentText, OpaqueObject +from odf.config import ConfigItem, ConfigItemSet +from odf.office import FontFaceDecls +from odf.style import FontFace, ParagraphProperties, Style, TableCellProperties, TableColumnProperties, TableProperties, TextProperties +from odf.svg import FontFaceSrc, FontFaceUri, FontFaceFormat +from odf.table import Table, TableCell, TableColumn, TableRow +from odf.text import H, P, SequenceDecl, SequenceDecls, Span + +# specify two parameters: input file (FTML/XML format), output file (ODT format) +# preceded by optional log file plus zero or more font strings +argspec = [ + ('input',{'help': 'Input file in FTML format'}, {'type': 'infile'}), + ('output',{'help': 'Output file (LO writer .odt)', 'nargs': '?'}, {'type': 'filename', 'def': '_out.odt'}), + ('-l','--log',{'help': 'Log file', 'required': False},{'type': 'outfile', 'def': '_ftml2odt_log.txt'}), + ('-r','--report',{'help': 'Set reporting level for log', 'type':str, 'choices':['X','S','E','P','W','I','V']},{}), + ('-f','--font',{'help': 'font specification','action': 'append', 'required': False}, {}), + ] + +# RegExs for extracting font name from fontsrc element +findfontnamelocal = re.compile(r"""local\( # begin with local( + (["']?) # optional open quote + (?P[^)]+) # font name + \1 # optional matching close quote + \)""", re.VERBOSE) # and end with ) +findfontnameurl = re.compile(r"""url\( # begin with local( + (["']?) # optional open quote + (?P[^)]+) # font name + \1 # optional matching close quote + \)""", re.VERBOSE) # and end with ) +fontspec = re.compile(r"""^ # beginning of string + (?P[A-Za-z ]+?) # Font Family Name + \s*(?PBold)? # Bold + \s*(?PItalic)? # Italic + \s*(?PRegular)? # Regular + $""", re.VERBOSE) # end of string +# RegEx for extracting feature(s) from feats attribute of style element +onefeat = re.compile(r"""^\s* + '(?P[^']+)'\s* # feature tag + (?P[^', ]+)\s* # feature value + ,?\s* # optional comma + (?P.*) # rest of line (with zero or more tag-value pairs) + $""", re.VERBOSE) +# RegEx for extracting language (and country) from lang attribute of style element +langcode = re.compile(r"""^ + (?P[A-Za-z]+) # language name + (- # (optional) hyphen and + (?P[A-Za-z]+) # country name + (-[A-Za-z0-9][-A-Za-z0-9]*)? # (optional) hyphen and other codes + )?$""", re.VERBOSE) +# RegEx to extract hex value from \uxxxxxx and function to generate Unicode character +# use to change string to newstring: +# newstring = re.sub(backu, hextounichr, string) +# or newstring = re.sub(backu, lambda m: unichr(int(m.group(1),16)), string) +backu = re.compile(r"\\u([0-9a-fA-F]{4,6})") +def hextounichr(match): + return chr(int(match.group(1),16)) + +def BoldItalic(bold, italic): + rs = "" + if bold: + rs += " Bold" + if italic: + rs += " Italic" + return rs + +def parsefeats(inputline): + featdic = {} + while inputline != "": + results = re.match(onefeat, inputline) + if results: + featdic[results.group('featname')] = results.group('featval') + inputline = results.group('remainder') + else: + break ### warning about unrecognized feature string: inputline + return ":" + "&".join( [f + '=' + featdic[f] for f in sorted(featdic)]) + +def getfonts(fontsourcestrings, logfile, fromcommandline=True): + fontlist = [] + checkfontfamily = [] + checkembeddedfont = [] + for fs in fontsourcestrings: + if not fromcommandline: # from FTML either local() or url() + installed = True # Assume locally installed font + results = re.match(findfontnamelocal, fs) + fontstring = results.group('fontstring') if results else None + if fontstring == None: + installed = False + results = re.match(findfontnameurl, fs) + fontstring = results.group('fontstring') if results else None + if fontstring == None: + logfile.log("Invalid font specification: " + fs, "S") + else: # from command line + fontstring = fs + if "." in fs: # must be a filename + installed = False + else: # must be an installed font + installed = True + if installed: + # get name, bold and italic info from string + results = re.match(fontspec, fontstring.strip()) + if results: + fontname = results.group('rest') + bold = results.group('bold') != None + italic = results.group('italic') != None + fontlist.append( (fontname, bold, italic, None) ) + if (fontname, bold, italic) in checkfontfamily: + logfile.log("Duplicate font specification: " + fs, "W") ### or more severe? + else: + checkfontfamily.append( (fontname, bold, italic) ) + else: + logfile.log("Invalid font specification: " + fontstring.strip(), "E") + else: + try: + # peek inside the font for the name, weight, style + f = ttLib.TTFont(fontstring) + # take name from name table, NameID 1, platform ID 3, Encoding ID 1 (possible fallback platformID 1, EncodingID =0) + n = f['name'] # name table from font + fontname = n.getName(1,3,1).toUnicode() # nameID 1 = Font Family name + # take bold and italic info from OS/2 table, fsSelection bits 0 and 5 + o = f['OS/2'] # OS/2 table + italic = (o.fsSelection & 1) > 0 + bold = (o.fsSelection & 32) > 0 + fontlist.append( (fontname, bold, italic, fontstring) ) + if (fontname, bold, italic) in checkfontfamily: + logfile.log("Duplicate font specification: " + fs + BoldItalic(bold, italic), "W") ### or more severe? + else: + checkfontfamily.append( (fontname, bold, italic) ) + if (os.path.basename(fontstring)) in checkembeddedfont: + logfile.log("Duplicate embedded font: " + fontstring, "W") ### or more severe? + else: + checkembeddedfont.append(os.path.basename(fontstring)) + except IOError: + logfile.log("Unable to find font file to embed: " + fontstring, "E") + except fontTools.ttLib.TTLibError: + logfile.log("File is not a valid font: " + fontstring, "E") + except: + logfile.log("Error occurred while checking font: " + fontstring, "E") # some other error + return fontlist + +def init(LOdoc, numfonts=1): + totalwid = 6800 #6.8inches + + #compute column widths + f = min(numfonts,4) + ashare = 4*(6-f) + dshare = 2*(6-f) + bshare = 100 - 2*ashare - dshare + awid = totalwid * ashare // 100 + dwid = totalwid * dshare // 100 + bwid = totalwid * bshare // (numfonts * 100) + + # create styles for table, for columns (one style for each column width) + # and for one cell (used for everywhere except where background changed) + tstyle = Style(name="Table1", family="table") + tstyle.addElement(TableProperties(attributes={'width':str(totalwid/1000.)+"in", 'align':"left"})) + LOdoc.automaticstyles.addElement(tstyle) + tastyle = Style(name="Table1.A", family="table-column") + tastyle.addElement(TableColumnProperties(attributes={'columnwidth':str(awid/1000.)+"in"})) + LOdoc.automaticstyles.addElement(tastyle) + tbstyle = Style(name="Table1.B", family="table-column") + tbstyle.addElement(TableColumnProperties(attributes={'columnwidth':str(bwid/1000.)+"in"})) + LOdoc.automaticstyles.addElement(tbstyle) + tdstyle = Style(name="Table1.D", family="table-column") + tdstyle.addElement(TableColumnProperties(attributes={'columnwidth':str(dwid/1000.)+"in"})) + LOdoc.automaticstyles.addElement(tdstyle) + ta1style = Style(name="Table1.A1", family="table-cell") + ta1style.addElement(TableCellProperties(attributes={'padding':"0.035in", 'border':"0.05pt solid #000000"})) + LOdoc.automaticstyles.addElement(ta1style) + # text style used with non- text + t1style = Style(name="T1", family="text") + t1style.addElement(TextProperties(attributes={'color':"#999999" })) + LOdoc.automaticstyles.addElement(t1style) + # create styles for Title, Subtitle + tstyle = Style(name="Title", family="paragraph") + tstyle.addElement(TextProperties(attributes={'fontfamily':"Arial",'fontsize':"24pt",'fontweight':"bold" })) + LOdoc.styles.addElement(tstyle) + ststyle = Style(name="Subtitle", family="paragraph") + ststyle.addElement(TextProperties(attributes={'fontfamily':"Arial",'fontsize':"18pt",'fontweight':"bold" })) + LOdoc.styles.addElement(ststyle) + +def doit(args) : + logfile = args.logger + if args.report: logfile.loglevel = args.report + + try: + root = ET.parse(args.input).getroot() + except: + logfile.log("Error parsing FTML input", "S") + + if args.font: # font(s) specified on command line + fontlist = getfonts( args.font, logfile ) + else: # get font spec from FTML fontsrc element + fontlist = getfonts( [root.find("./head/fontsrc").text], logfile, False ) + #fontlist = getfonts( [fs.text for fs in root.findall("./head/fontsrc")], False ) ### would allow multiple fontsrc elements + numfonts = len(fontlist) + if numfonts == 0: + logfile.log("No font(s) specified", "S") + if numfonts > 1: + formattedfontnum = ["{0:02d}".format(n) for n in range(numfonts)] + else: + formattedfontnum = [""] + logfile.log("Font(s) specified:", "V") + for n, (fontname, bold, italic, embeddedfont) in enumerate(fontlist): + logfile.log(" " + formattedfontnum[n] + " " + fontname + BoldItalic(bold, italic) + " " + str(embeddedfont), "V") + + # get optional fontscale; compute pointsize as int(12*fontscale/100). If result xx is not 12, then add "fo:font-size=xxpt" in Px styles + pointsize = 12 + fontscaleel = root.find("./head/fontscale") + if fontscaleel != None: + fontscale = fontscaleel.text + try: + pointsize = int(int(fontscale)*12/100) + except ValueError: + # any problem leaves pointsize 12 + logfile.log("Problem with fontscale value; defaulting to 12 point", "W") + + # Get FTML styles and generate LO writer styles + # P2 is paragraph style for string element when no features specified + # each Px (for P3...) corresponds to an FTML style, which specifies lang or feats or both + # if numfonts > 1, two-digit font number is appended to make an LO writer style for each FTML style + font combo + # When LO writer style is used with attribute rtl="True", "R" appended to style name + LOstyles = {} + ftmlstyles = {} + Pstylenum = 2 + LOstyles["P2"] = ("", None, None) + ftmlstyles[0] = "P2" + for s in root.findall("./head/styles/style"): + Pstylenum += 1 + Pnum = "P" + str(Pstylenum) + featstring = "" + if s.get('feats'): + featstring = parsefeats(s.get('feats')) + langname = None + countryname = None + lang = s.get('lang') + if lang != None: + x = re.match(langcode, lang) + langname = x.group('langname') + countryname = x.group('countryname') + # FTML element @stylename attribute references this