summaryrefslogtreecommitdiffstats
path: root/third_party/python/cram/cram/_diff.py
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/python/cram/cram/_diff.py')
-rw-r--r--third_party/python/cram/cram/_diff.py158
1 files changed, 158 insertions, 0 deletions
diff --git a/third_party/python/cram/cram/_diff.py b/third_party/python/cram/cram/_diff.py
new file mode 100644
index 0000000000..4877305082
--- /dev/null
+++ b/third_party/python/cram/cram/_diff.py
@@ -0,0 +1,158 @@
+"""Utilities for diffing test files and their output"""
+
+import codecs
+import difflib
+import re
+
+from cram._encoding import b
+
+__all__ = ['esc', 'glob', 'regex', 'unified_diff']
+
+def _regex(pattern, s):
+ """Match a regular expression or return False if invalid.
+
+ >>> from cram._encoding import b
+ >>> [bool(_regex(r, b('foobar'))) for r in (b('foo.*'), b('***'))]
+ [True, False]
+ """
+ try:
+ return re.match(pattern + b(r'\Z'), s)
+ except re.error:
+ return False
+
+def _glob(el, l):
+ r"""Match a glob-like pattern.
+
+ The only supported special characters are * and ?. Escaping is
+ supported.
+
+ >>> from cram._encoding import b
+ >>> bool(_glob(b(r'\* \\ \? fo?b*'), b('* \\ ? foobar')))
+ True
+ """
+ i, n = 0, len(el)
+ res = b('')
+ while i < n:
+ c = el[i:i + 1]
+ i += 1
+ if c == b('\\') and el[i] in b('*?\\'):
+ res += el[i - 1:i + 1]
+ i += 1
+ elif c == b('*'):
+ res += b('.*')
+ elif c == b('?'):
+ res += b('.')
+ else:
+ res += re.escape(c)
+ return _regex(res, l)
+
+def _matchannotation(keyword, matchfunc, el, l):
+ """Apply match function based on annotation keyword"""
+ ann = b(' (%s)\n' % keyword)
+ return el.endswith(ann) and matchfunc(el[:-len(ann)], l[:-1])
+
+def regex(el, l):
+ """Apply a regular expression match to a line annotated with '(re)'"""
+ return _matchannotation('re', _regex, el, l)
+
+def glob(el, l):
+ """Apply a glob match to a line annotated with '(glob)'"""
+ return _matchannotation('glob', _glob, el, l)
+
+def esc(el, l):
+ """Apply an escape match to a line annotated with '(esc)'"""
+ ann = b(' (esc)\n')
+
+ if el.endswith(ann):
+ el = codecs.escape_decode(el[:-len(ann)])[0] + b('\n')
+ if el == l:
+ return True
+
+ if l.endswith(ann):
+ l = codecs.escape_decode(l[:-len(ann)])[0] + b('\n')
+ return el == l
+
+class _SequenceMatcher(difflib.SequenceMatcher, object):
+ """Like difflib.SequenceMatcher, but supports custom match functions"""
+ def __init__(self, *args, **kwargs):
+ self._matchers = kwargs.pop('matchers', [])
+ super(_SequenceMatcher, self).__init__(*args, **kwargs)
+
+ def _match(self, el, l):
+ """Tests for matching lines using custom matchers"""
+ for matcher in self._matchers:
+ if matcher(el, l):
+ return True
+ return False
+
+ def find_longest_match(self, alo, ahi, blo, bhi):
+ """Find longest matching block in a[alo:ahi] and b[blo:bhi]"""
+ # SequenceMatcher uses find_longest_match() to slowly whittle down
+ # the differences between a and b until it has each matching block.
+ # Because of this, we can end up doing the same matches many times.
+ matches = []
+ for n, (el, line) in enumerate(zip(self.a[alo:ahi], self.b[blo:bhi])):
+ if el != line and self._match(el, line):
+ # This fools the superclass's method into thinking that the
+ # regex/glob in a is identical to b by replacing a's line (the
+ # expected output) with b's line (the actual output).
+ self.a[alo + n] = line
+ matches.append((n, el))
+ ret = super(_SequenceMatcher, self).find_longest_match(alo, ahi,
+ blo, bhi)
+ # Restore the lines replaced above. Otherwise, the diff output
+ # would seem to imply that the tests never had any regexes/globs.
+ for n, el in matches:
+ self.a[alo + n] = el
+ return ret
+
+def unified_diff(l1, l2, fromfile=b(''), tofile=b(''), fromfiledate=b(''),
+ tofiledate=b(''), n=3, lineterm=b('\n'), matchers=None):
+ r"""Compare two sequences of lines; generate the delta as a unified diff.
+
+ This is like difflib.unified_diff(), but allows custom matchers.
+
+ >>> from cram._encoding import b
+ >>> l1 = [b('a\n'), b('? (glob)\n')]
+ >>> l2 = [b('a\n'), b('b\n')]
+ >>> (list(unified_diff(l1, l2, b('f1'), b('f2'), b('1970-01-01'),
+ ... b('1970-01-02'))) ==
+ ... [b('--- f1\t1970-01-01\n'), b('+++ f2\t1970-01-02\n'),
+ ... b('@@ -1,2 +1,2 @@\n'), b(' a\n'), b('-? (glob)\n'), b('+b\n')])
+ True
+
+ >>> from cram._diff import glob
+ >>> list(unified_diff(l1, l2, matchers=[glob]))
+ []
+ """
+ if matchers is None:
+ matchers = []
+ started = False
+ matcher = _SequenceMatcher(None, l1, l2, matchers=matchers)
+ for group in matcher.get_grouped_opcodes(n):
+ if not started:
+ if fromfiledate:
+ fromdate = b('\t') + fromfiledate
+ else:
+ fromdate = b('')
+ if tofiledate:
+ todate = b('\t') + tofiledate
+ else:
+ todate = b('')
+ yield b('--- ') + fromfile + fromdate + lineterm
+ yield b('+++ ') + tofile + todate + lineterm
+ started = True
+ i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
+ yield (b("@@ -%d,%d +%d,%d @@" % (i1 + 1, i2 - i1, j1 + 1, j2 - j1)) +
+ lineterm)
+ for tag, i1, i2, j1, j2 in group:
+ if tag == 'equal':
+ for line in l1[i1:i2]:
+ yield b(' ') + line
+ continue
+ if tag == 'replace' or tag == 'delete':
+ for line in l1[i1:i2]:
+ yield b('-') + line
+ if tag == 'replace' or tag == 'insert':
+ for line in l2[j1:j2]:
+ yield b('+') + line