summaryrefslogtreecommitdiffstats
path: root/third_party/python/PyYAML/lib/yaml/reader.py
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/python/PyYAML/lib/yaml/reader.py')
-rw-r--r--third_party/python/PyYAML/lib/yaml/reader.py60
1 files changed, 26 insertions, 34 deletions
diff --git a/third_party/python/PyYAML/lib/yaml/reader.py b/third_party/python/PyYAML/lib/yaml/reader.py
index 4c42150989..774b0219b5 100644
--- a/third_party/python/PyYAML/lib/yaml/reader.py
+++ b/third_party/python/PyYAML/lib/yaml/reader.py
@@ -17,11 +17,9 @@
__all__ = ['Reader', 'ReaderError']
-from error import YAMLError, Mark
+from .error import YAMLError, Mark
-import codecs, re, sys
-
-has_ucs4 = sys.maxunicode > 0xffff
+import codecs, re
class ReaderError(YAMLError):
@@ -33,7 +31,7 @@ class ReaderError(YAMLError):
self.reason = reason
def __str__(self):
- if isinstance(self.character, str):
+ if isinstance(self.character, bytes):
return "'%s' codec can't decode byte #x%02x: %s\n" \
" in \"%s\", position %d" \
% (self.encoding, ord(self.character), self.reason,
@@ -46,13 +44,13 @@ class ReaderError(YAMLError):
class Reader(object):
# Reader:
- # - determines the data encoding and converts it to unicode,
+ # - determines the data encoding and converts it to a unicode string,
# - checks if characters are in allowed range,
# - adds '\0' to the end.
# Reader accepts
+ # - a `bytes` object,
# - a `str` object,
- # - a `unicode` object,
# - a file-like object with its `read` method returning `str`,
# - a file-like object with its `read` method returning `unicode`.
@@ -63,7 +61,7 @@ class Reader(object):
self.stream = None
self.stream_pointer = 0
self.eof = True
- self.buffer = u''
+ self.buffer = ''
self.pointer = 0
self.raw_buffer = None
self.raw_decode = None
@@ -71,19 +69,19 @@ class Reader(object):
self.index = 0
self.line = 0
self.column = 0
- if isinstance(stream, unicode):
+ if isinstance(stream, str):
self.name = "<unicode string>"
self.check_printable(stream)
- self.buffer = stream+u'\0'
- elif isinstance(stream, str):
- self.name = "<string>"
+ self.buffer = stream+'\0'
+ elif isinstance(stream, bytes):
+ self.name = "<byte string>"
self.raw_buffer = stream
self.determine_encoding()
else:
self.stream = stream
self.name = getattr(stream, 'name', "<file>")
self.eof = False
- self.raw_buffer = ''
+ self.raw_buffer = None
self.determine_encoding()
def peek(self, index=0):
@@ -105,11 +103,11 @@ class Reader(object):
ch = self.buffer[self.pointer]
self.pointer += 1
self.index += 1
- if ch in u'\n\x85\u2028\u2029' \
- or (ch == u'\r' and self.buffer[self.pointer] != u'\n'):
+ if ch in '\n\x85\u2028\u2029' \
+ or (ch == '\r' and self.buffer[self.pointer] != '\n'):
self.line += 1
self.column = 0
- elif ch != u'\uFEFF':
+ elif ch != '\uFEFF':
self.column += 1
length -= 1
@@ -122,9 +120,9 @@ class Reader(object):
None, None)
def determine_encoding(self):
- while not self.eof and len(self.raw_buffer) < 2:
+ while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2):
self.update_raw()
- if not isinstance(self.raw_buffer, unicode):
+ if isinstance(self.raw_buffer, bytes):
if self.raw_buffer.startswith(codecs.BOM_UTF16_LE):
self.raw_decode = codecs.utf_16_le_decode
self.encoding = 'utf-16-le'
@@ -136,15 +134,7 @@ class Reader(object):
self.encoding = 'utf-8'
self.update(1)
- if has_ucs4:
- NON_PRINTABLE = u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]'
- elif sys.platform.startswith('java'):
- # Jython doesn't support lone surrogates https://bugs.jython.org/issue2048
- NON_PRINTABLE = u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]'
- else:
- # Need to use eval here due to the above Jython issue
- NON_PRINTABLE = eval(r"u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uFFFD]|(?:^|[^\uD800-\uDBFF])[\uDC00-\uDFFF]|[\uD800-\uDBFF](?:[^\uDC00-\uDFFF]|$)'")
- NON_PRINTABLE = re.compile(NON_PRINTABLE)
+ NON_PRINTABLE = re.compile('[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]')
def check_printable(self, data):
match = self.NON_PRINTABLE.search(data)
if match:
@@ -165,8 +155,8 @@ class Reader(object):
try:
data, converted = self.raw_decode(self.raw_buffer,
'strict', self.eof)
- except UnicodeDecodeError, exc:
- character = exc.object[exc.start]
+ except UnicodeDecodeError as exc:
+ character = self.raw_buffer[exc.start]
if self.stream is not None:
position = self.stream_pointer-len(self.raw_buffer)+exc.start
else:
@@ -180,14 +170,16 @@ class Reader(object):
self.buffer += data
self.raw_buffer = self.raw_buffer[converted:]
if self.eof:
- self.buffer += u'\0'
+ self.buffer += '\0'
self.raw_buffer = None
break
- def update_raw(self, size=1024):
+ def update_raw(self, size=4096):
data = self.stream.read(size)
- if data:
- self.raw_buffer += data
- self.stream_pointer += len(data)
+ if self.raw_buffer is None:
+ self.raw_buffer = data
else:
+ self.raw_buffer += data
+ self.stream_pointer += len(data)
+ if not data:
self.eof = True