diff options
Diffstat (limited to '')
-rwxr-xr-x | msgfmt.py | 238 |
1 files changed, 238 insertions, 0 deletions
diff --git a/msgfmt.py b/msgfmt.py new file mode 100755 index 0000000..c0e093a --- /dev/null +++ b/msgfmt.py @@ -0,0 +1,238 @@ +#!/usr/bin/env python +# -*- coding: iso-8859-1 -*- +# Written by Martin v. Lwis <loewis@informatik.hu-berlin.de> +# Plural forms support added by alexander smishlajev <alex@tycobka.lv> +""" +Generate binary message catalog from textual translation description. + +This program converts a textual Uniforum-style message catalog (.po file) into +a binary GNU catalog (.mo file). This is essentially the same function as the +GNU msgfmt program, however, it is a simpler implementation. + +Usage: msgfmt.py [OPTIONS] filename.po + +Options: + -o file + --output-file=file + Specify the output file to write to. If omitted, output will go to a + file named filename.mo (based off the input file name). + + -h + --help + Print this message and exit. + + -V + --version + Display version information and exit. +""" +from __future__ import print_function, unicode_literals + +import array +import ast +import getopt +import os +import struct +import sys + +__version__ = '1.2' + +MESSAGES = {} + + +def usage(ecode, msg=''): + """ + Print usage and msg and exit with given code. + """ + print(__doc__, file=sys.stderr) + if msg: + print(msg, file=sys.stderr) + sys.exit(ecode) + + +def add(msgid, transtr, fuzzy): + """ + Add a non-fuzzy translation to the dictionary. + """ + if not fuzzy and transtr and not transtr.startswith('\x00'): + MESSAGES[msgid] = transtr + + +def generate(): + """ + Return the generated output. + """ + # the keys are sorted in the .mo file + keys = sorted(MESSAGES) + offsets = [] + ids = strs = '' + for _id in keys: + # For each string, we need size and file offset when encoded. Each string is NUL + # terminated; the NUL does not count into the size. + offsets.append( + ( + len(ids.encode('utf8')), + len(_id.encode('utf8')), + len(strs.encode('utf8')), + len(MESSAGES[_id].encode('utf8')), + ) + ) + ids += _id + '\x00' + strs += MESSAGES[_id] + '\x00' + + # The header is 7 32-bit unsigned integers. We don't use hash tables, so + # the keys start right after the index tables. + # translated string. + keystart = 7 * 4 + 16 * len(keys) + # and the values start after the keys + valuestart = keystart + len(ids) + koffsets = [] + voffsets = [] + # The string table first has the list of keys, then the list of values. + # Each entry has first the size of the string, then the file offset. + for o1, l1, o2, l2 in offsets: + koffsets += [l1, o1 + keystart] + voffsets += [l2, o2 + valuestart] + offsets = koffsets + voffsets + output = struct.pack( + 'Iiiiiii', + 0x950412DE, # Magic + 0, # Version + len(keys), # # of entries + 7 * 4, # start of key index + 7 * 4 + len(keys) * 8, # start of value index + 0, + 0, + ) # size and offset of hash table + if sys.version_info.major == 2: + output += array.array(b'i', offsets).tostring() + else: + output += array.array('i', offsets).tobytes() + output += ids.encode('utf8') + output += strs.encode('utf8') + return output + + +def make(filename, outfile): + section_id = 1 + section_str = 2 + global MESSAGES + MESSAGES = {} + + # Compute .mo name from .po name and arguments + if filename.endswith('.po'): + infile = filename + else: + infile = filename + '.po' + if outfile is None: + outfile = os.path.splitext(infile)[0] + '.mo' + + try: + import io + + with io.open(infile, encoding='utf8') as _file: + lines = _file.readlines() + except IOError as msg: + print(msg, file=sys.stderr) + sys.exit(1) + + section = None + fuzzy = 0 + + # Parse the catalog + msgid = msgstr = '' + lno = 0 + for line in lines: + lno += 1 + # If we get a comment line after a msgstr, this is a new entry + if line[0] == '#' and section == section_str: + add(msgid, msgstr, fuzzy) + section = None + fuzzy = 0 + # Record a fuzzy mark + if line[:2] == '#,' and (line.find('fuzzy') >= 0): + fuzzy = 1 + # Skip comments + if line[0] == '#': + continue + # Start of msgid_plural section, separate from singular form with \0 + if line.startswith('msgid_plural'): + msgid += '\x00' + line = line[12:] + # Now we are in a msgid section, output previous section + elif line.startswith('msgid'): + if section == section_str: + add(msgid, msgstr, fuzzy) + section = section_id + line = line[5:] + msgid = msgstr = '' + # Now we are in a msgstr section + elif line.startswith('msgstr'): + section = section_str + line = line[6:] + # Check for plural forms + if line.startswith('['): + # Separate plural forms with \0 + if not line.startswith('[0]'): + msgstr += '\x00' + # Ignore the index - must come in sequence + line = line[line.index(']') + 1 :] + # Skip empty lines + line = line.strip() + if not line: + continue + line = ast.literal_eval(line) + # Python 2 ast.literal_eval returns bytes. + if isinstance(line, bytes): + line = line.decode('utf8') + if section == section_id: + msgid += line + elif section == section_str: + msgstr += line + else: + print('Syntax error on %s:%d' % (infile, lno), 'before:', file=sys.stderr) + print(line, file=sys.stderr) + sys.exit(1) + # Add last entry + if section == section_str: + add(msgid, msgstr, fuzzy) + + # Compute output + output = generate() + + try: + with open(outfile, 'wb') as _file: + _file.write(output) + except IOError as msg: + print(msg, file=sys.stderr) + + +def main(): + try: + opts, args = getopt.getopt( + sys.argv[1:], 'hVo:', ['help', 'version', 'output-file='] + ) + except getopt.error as msg: + usage(1, msg) + + outfile = None + # parse options + for opt, arg in opts: + if opt in ('-h', '--help'): + usage(0) + elif opt in ('-V', '--version'): + print('msgfmt.py', __version__, file=sys.stderr) + sys.exit(0) + elif opt in ('-o', '--output-file'): + outfile = arg + # do it + if not args: + print('No input file given', file=sys.stderr) + print('Try `msgfmt --help` for more information.', file=sys.stderr) + return + + for filename in args: + make(filename, outfile) + + +if __name__ == '__main__': + main() |