diff options
Diffstat (limited to 'third_party/python/cram/cram/_diff.py')
-rw-r--r-- | third_party/python/cram/cram/_diff.py | 158 |
1 files changed, 158 insertions, 0 deletions
diff --git a/third_party/python/cram/cram/_diff.py b/third_party/python/cram/cram/_diff.py new file mode 100644 index 0000000000..4877305082 --- /dev/null +++ b/third_party/python/cram/cram/_diff.py @@ -0,0 +1,158 @@ +"""Utilities for diffing test files and their output""" + +import codecs +import difflib +import re + +from cram._encoding import b + +__all__ = ['esc', 'glob', 'regex', 'unified_diff'] + +def _regex(pattern, s): + """Match a regular expression or return False if invalid. + + >>> from cram._encoding import b + >>> [bool(_regex(r, b('foobar'))) for r in (b('foo.*'), b('***'))] + [True, False] + """ + try: + return re.match(pattern + b(r'\Z'), s) + except re.error: + return False + +def _glob(el, l): + r"""Match a glob-like pattern. + + The only supported special characters are * and ?. Escaping is + supported. + + >>> from cram._encoding import b + >>> bool(_glob(b(r'\* \\ \? fo?b*'), b('* \\ ? foobar'))) + True + """ + i, n = 0, len(el) + res = b('') + while i < n: + c = el[i:i + 1] + i += 1 + if c == b('\\') and el[i] in b('*?\\'): + res += el[i - 1:i + 1] + i += 1 + elif c == b('*'): + res += b('.*') + elif c == b('?'): + res += b('.') + else: + res += re.escape(c) + return _regex(res, l) + +def _matchannotation(keyword, matchfunc, el, l): + """Apply match function based on annotation keyword""" + ann = b(' (%s)\n' % keyword) + return el.endswith(ann) and matchfunc(el[:-len(ann)], l[:-1]) + +def regex(el, l): + """Apply a regular expression match to a line annotated with '(re)'""" + return _matchannotation('re', _regex, el, l) + +def glob(el, l): + """Apply a glob match to a line annotated with '(glob)'""" + return _matchannotation('glob', _glob, el, l) + +def esc(el, l): + """Apply an escape match to a line annotated with '(esc)'""" + ann = b(' (esc)\n') + + if el.endswith(ann): + el = codecs.escape_decode(el[:-len(ann)])[0] + b('\n') + if el == l: + return True + + if l.endswith(ann): + l = codecs.escape_decode(l[:-len(ann)])[0] + b('\n') + return el == l + +class _SequenceMatcher(difflib.SequenceMatcher, object): + """Like difflib.SequenceMatcher, but supports custom match functions""" + def __init__(self, *args, **kwargs): + self._matchers = kwargs.pop('matchers', []) + super(_SequenceMatcher, self).__init__(*args, **kwargs) + + def _match(self, el, l): + """Tests for matching lines using custom matchers""" + for matcher in self._matchers: + if matcher(el, l): + return True + return False + + def find_longest_match(self, alo, ahi, blo, bhi): + """Find longest matching block in a[alo:ahi] and b[blo:bhi]""" + # SequenceMatcher uses find_longest_match() to slowly whittle down + # the differences between a and b until it has each matching block. + # Because of this, we can end up doing the same matches many times. + matches = [] + for n, (el, line) in enumerate(zip(self.a[alo:ahi], self.b[blo:bhi])): + if el != line and self._match(el, line): + # This fools the superclass's method into thinking that the + # regex/glob in a is identical to b by replacing a's line (the + # expected output) with b's line (the actual output). + self.a[alo + n] = line + matches.append((n, el)) + ret = super(_SequenceMatcher, self).find_longest_match(alo, ahi, + blo, bhi) + # Restore the lines replaced above. Otherwise, the diff output + # would seem to imply that the tests never had any regexes/globs. + for n, el in matches: + self.a[alo + n] = el + return ret + +def unified_diff(l1, l2, fromfile=b(''), tofile=b(''), fromfiledate=b(''), + tofiledate=b(''), n=3, lineterm=b('\n'), matchers=None): + r"""Compare two sequences of lines; generate the delta as a unified diff. + + This is like difflib.unified_diff(), but allows custom matchers. + + >>> from cram._encoding import b + >>> l1 = [b('a\n'), b('? (glob)\n')] + >>> l2 = [b('a\n'), b('b\n')] + >>> (list(unified_diff(l1, l2, b('f1'), b('f2'), b('1970-01-01'), + ... b('1970-01-02'))) == + ... [b('--- f1\t1970-01-01\n'), b('+++ f2\t1970-01-02\n'), + ... b('@@ -1,2 +1,2 @@\n'), b(' a\n'), b('-? (glob)\n'), b('+b\n')]) + True + + >>> from cram._diff import glob + >>> list(unified_diff(l1, l2, matchers=[glob])) + [] + """ + if matchers is None: + matchers = [] + started = False + matcher = _SequenceMatcher(None, l1, l2, matchers=matchers) + for group in matcher.get_grouped_opcodes(n): + if not started: + if fromfiledate: + fromdate = b('\t') + fromfiledate + else: + fromdate = b('') + if tofiledate: + todate = b('\t') + tofiledate + else: + todate = b('') + yield b('--- ') + fromfile + fromdate + lineterm + yield b('+++ ') + tofile + todate + lineterm + started = True + i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4] + yield (b("@@ -%d,%d +%d,%d @@" % (i1 + 1, i2 - i1, j1 + 1, j2 - j1)) + + lineterm) + for tag, i1, i2, j1, j2 in group: + if tag == 'equal': + for line in l1[i1:i2]: + yield b(' ') + line + continue + if tag == 'replace' or tag == 'delete': + for line in l1[i1:i2]: + yield b('-') + line + if tag == 'replace' or tag == 'insert': + for line in l2[j1:j2]: + yield b('+') + line |