diff options
Diffstat (limited to 'third_party/python/PyYAML/lib/yaml/reader.py')
-rw-r--r-- | third_party/python/PyYAML/lib/yaml/reader.py | 60 |
1 files changed, 26 insertions, 34 deletions
diff --git a/third_party/python/PyYAML/lib/yaml/reader.py b/third_party/python/PyYAML/lib/yaml/reader.py index 4c42150989..774b0219b5 100644 --- a/third_party/python/PyYAML/lib/yaml/reader.py +++ b/third_party/python/PyYAML/lib/yaml/reader.py @@ -17,11 +17,9 @@ __all__ = ['Reader', 'ReaderError'] -from error import YAMLError, Mark +from .error import YAMLError, Mark -import codecs, re, sys - -has_ucs4 = sys.maxunicode > 0xffff +import codecs, re class ReaderError(YAMLError): @@ -33,7 +31,7 @@ class ReaderError(YAMLError): self.reason = reason def __str__(self): - if isinstance(self.character, str): + if isinstance(self.character, bytes): return "'%s' codec can't decode byte #x%02x: %s\n" \ " in \"%s\", position %d" \ % (self.encoding, ord(self.character), self.reason, @@ -46,13 +44,13 @@ class ReaderError(YAMLError): class Reader(object): # Reader: - # - determines the data encoding and converts it to unicode, + # - determines the data encoding and converts it to a unicode string, # - checks if characters are in allowed range, # - adds '\0' to the end. # Reader accepts + # - a `bytes` object, # - a `str` object, - # - a `unicode` object, # - a file-like object with its `read` method returning `str`, # - a file-like object with its `read` method returning `unicode`. @@ -63,7 +61,7 @@ class Reader(object): self.stream = None self.stream_pointer = 0 self.eof = True - self.buffer = u'' + self.buffer = '' self.pointer = 0 self.raw_buffer = None self.raw_decode = None @@ -71,19 +69,19 @@ class Reader(object): self.index = 0 self.line = 0 self.column = 0 - if isinstance(stream, unicode): + if isinstance(stream, str): self.name = "<unicode string>" self.check_printable(stream) - self.buffer = stream+u'\0' - elif isinstance(stream, str): - self.name = "<string>" + self.buffer = stream+'\0' + elif isinstance(stream, bytes): + self.name = "<byte string>" self.raw_buffer = stream self.determine_encoding() else: self.stream = stream self.name = getattr(stream, 'name', "<file>") self.eof = False - self.raw_buffer = '' + self.raw_buffer = None self.determine_encoding() def peek(self, index=0): @@ -105,11 +103,11 @@ class Reader(object): ch = self.buffer[self.pointer] self.pointer += 1 self.index += 1 - if ch in u'\n\x85\u2028\u2029' \ - or (ch == u'\r' and self.buffer[self.pointer] != u'\n'): + if ch in '\n\x85\u2028\u2029' \ + or (ch == '\r' and self.buffer[self.pointer] != '\n'): self.line += 1 self.column = 0 - elif ch != u'\uFEFF': + elif ch != '\uFEFF': self.column += 1 length -= 1 @@ -122,9 +120,9 @@ class Reader(object): None, None) def determine_encoding(self): - while not self.eof and len(self.raw_buffer) < 2: + while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2): self.update_raw() - if not isinstance(self.raw_buffer, unicode): + if isinstance(self.raw_buffer, bytes): if self.raw_buffer.startswith(codecs.BOM_UTF16_LE): self.raw_decode = codecs.utf_16_le_decode self.encoding = 'utf-16-le' @@ -136,15 +134,7 @@ class Reader(object): self.encoding = 'utf-8' self.update(1) - if has_ucs4: - NON_PRINTABLE = u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]' - elif sys.platform.startswith('java'): - # Jython doesn't support lone surrogates https://bugs.jython.org/issue2048 - NON_PRINTABLE = u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]' - else: - # Need to use eval here due to the above Jython issue - NON_PRINTABLE = eval(r"u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uFFFD]|(?:^|[^\uD800-\uDBFF])[\uDC00-\uDFFF]|[\uD800-\uDBFF](?:[^\uDC00-\uDFFF]|$)'") - NON_PRINTABLE = re.compile(NON_PRINTABLE) + NON_PRINTABLE = re.compile('[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]') def check_printable(self, data): match = self.NON_PRINTABLE.search(data) if match: @@ -165,8 +155,8 @@ class Reader(object): try: data, converted = self.raw_decode(self.raw_buffer, 'strict', self.eof) - except UnicodeDecodeError, exc: - character = exc.object[exc.start] + except UnicodeDecodeError as exc: + character = self.raw_buffer[exc.start] if self.stream is not None: position = self.stream_pointer-len(self.raw_buffer)+exc.start else: @@ -180,14 +170,16 @@ class Reader(object): self.buffer += data self.raw_buffer = self.raw_buffer[converted:] if self.eof: - self.buffer += u'\0' + self.buffer += '\0' self.raw_buffer = None break - def update_raw(self, size=1024): + def update_raw(self, size=4096): data = self.stream.read(size) - if data: - self.raw_buffer += data - self.stream_pointer += len(data) + if self.raw_buffer is None: + self.raw_buffer = data else: + self.raw_buffer += data + self.stream_pointer += len(data) + if not data: self.eof = True |