#!/usr/bin/env python from __future__ import absolute_import, division, print_function import argparse import os.path as path def read_tests(f): basename, _ = path.splitext(path.basename(f)) tests = [] prev_pattern = None for lineno, line in enumerate(open(f), 1): fields = list(filter(None, map(str.strip, line.split('\t')))) if not (4 <= len(fields) <= 5) \ or 'E' not in fields[0] or fields[0][0] == '#': continue terse_opts, pat, text, sgroups = fields[0:4] groups = [] # groups as integer ranges if sgroups == 'NOMATCH': groups = [] elif ',' in sgroups: noparen = map(lambda s: s.strip('()'), sgroups.split(')(')) for g in noparen: s, e = map(str.strip, g.split(',')) groups.append([int(s), int(e)]) break else: # This skips tests that should result in an error. # There aren't many, so I think we can just capture those # manually. Possibly fix this in future. continue opts = [] if text == "NULL": text = "" if pat == 'SAME': pat = prev_pattern if '$' in terse_opts: pat = pat.encode('utf-8').decode('unicode_escape') text = text.encode('utf-8').decode('unicode_escape') text = text.encode('unicode_escape').decode('utf-8') opts.append('escaped') else: opts.append('escaped') text = text.encode('unicode_escape').decode('utf-8') if 'i' in terse_opts: opts.append('case-insensitive') pat = pat.encode('unicode_escape').decode('utf-8') pat = pat.replace('\\\\', '\\') tests.append({ 'name': '"%s%d"' % (basename, lineno), 'options': repr(opts), 'pattern': "'''%s'''" % pat, 'input': "'''%s'''" % text, 'matches': str(groups), }) prev_pattern = pat return tests if __name__ == '__main__': parser = argparse.ArgumentParser( description='Generate match tests from an AT&T POSIX test file.') aa = parser.add_argument aa('datfile', help='A dat AT&T POSIX test file.') args = parser.parse_args() tests = read_tests(args.datfile) for t in tests: print('[[tests]]') for k, v in t.items(): print('%s = %s' % (k, v)) print('')