diff options
Diffstat (limited to 'powerline/lint/markedjson')
-rw-r--r-- | powerline/lint/markedjson/__init__.py | 19 | ||||
-rw-r--r-- | powerline/lint/markedjson/composer.py | 119 | ||||
-rw-r--r-- | powerline/lint/markedjson/constructor.py | 285 | ||||
-rw-r--r-- | powerline/lint/markedjson/error.py | 241 | ||||
-rw-r--r-- | powerline/lint/markedjson/events.py | 97 | ||||
-rw-r--r-- | powerline/lint/markedjson/loader.py | 25 | ||||
-rw-r--r-- | powerline/lint/markedjson/markedvalue.py | 151 | ||||
-rw-r--r-- | powerline/lint/markedjson/nodes.py | 55 | ||||
-rw-r--r-- | powerline/lint/markedjson/parser.py | 255 | ||||
-rw-r--r-- | powerline/lint/markedjson/reader.py | 141 | ||||
-rw-r--r-- | powerline/lint/markedjson/resolver.py | 131 | ||||
-rw-r--r-- | powerline/lint/markedjson/scanner.py | 499 | ||||
-rw-r--r-- | powerline/lint/markedjson/tokens.py | 72 |
13 files changed, 2090 insertions, 0 deletions
diff --git a/powerline/lint/markedjson/__init__.py b/powerline/lint/markedjson/__init__.py new file mode 100644 index 0000000..dea5faf --- /dev/null +++ b/powerline/lint/markedjson/__init__.py @@ -0,0 +1,19 @@ +# vim:fileencoding=utf-8:noet +from __future__ import (unicode_literals, division, absolute_import, print_function) + +from powerline.lint.markedjson.loader import Loader + + +def load(stream, Loader=Loader): + '''Parse JSON value and produce the corresponding Python object + + :return: + (hadproblem, object) where first argument is true if there were errors + during loading JSON stream and second is the corresponding JSON object. + ''' + loader = Loader(stream) + try: + r = loader.get_single_data() + return r, loader.haserrors + finally: + loader.dispose() diff --git a/powerline/lint/markedjson/composer.py b/powerline/lint/markedjson/composer.py new file mode 100644 index 0000000..bd5620d --- /dev/null +++ b/powerline/lint/markedjson/composer.py @@ -0,0 +1,119 @@ +# vim:fileencoding=utf-8:noet +from __future__ import (unicode_literals, division, absolute_import, print_function) + +from powerline.lint.markedjson import nodes +from powerline.lint.markedjson import events +from powerline.lint.markedjson.error import MarkedError + + +__all__ = ['Composer', 'ComposerError'] + + +class ComposerError(MarkedError): + pass + + +class Composer: + def __init__(self): + pass + + def check_node(self): + # Drop the STREAM-START event. + if self.check_event(events.StreamStartEvent): + self.get_event() + + # If there are more documents available? + return not self.check_event(events.StreamEndEvent) + + def get_node(self): + # Get the root node of the next document. + if not self.check_event(events.StreamEndEvent): + return self.compose_document() + + def get_single_node(self): + # Drop the STREAM-START event. + self.get_event() + + # Compose a document if the stream is not empty. + document = None + if not self.check_event(events.StreamEndEvent): + document = self.compose_document() + + # Ensure that the stream contains no more documents. + if not self.check_event(events.StreamEndEvent): + event = self.get_event() + raise ComposerError( + 'expected a single document in the stream', + document.start_mark, + 'but found another document', + event.start_mark + ) + + # Drop the STREAM-END event. + self.get_event() + + return document + + def compose_document(self): + # Drop the DOCUMENT-START event. + self.get_event() + + # Compose the root node. + node = self.compose_node(None, None) + + # Drop the DOCUMENT-END event. + self.get_event() + + return node + + def compose_node(self, parent, index): + self.descend_resolver(parent, index) + if self.check_event(events.ScalarEvent): + node = self.compose_scalar_node() + elif self.check_event(events.SequenceStartEvent): + node = self.compose_sequence_node() + elif self.check_event(events.MappingStartEvent): + node = self.compose_mapping_node() + self.ascend_resolver() + return node + + def compose_scalar_node(self): + event = self.get_event() + tag = event.tag + if tag is None or tag == '!': + tag = self.resolve(nodes.ScalarNode, event.value, event.implicit, event.start_mark) + node = nodes.ScalarNode(tag, event.value, event.start_mark, event.end_mark, style=event.style) + return node + + def compose_sequence_node(self): + start_event = self.get_event() + tag = start_event.tag + if tag is None or tag == '!': + tag = self.resolve(nodes.SequenceNode, None, start_event.implicit) + node = nodes.SequenceNode(tag, [], start_event.start_mark, None, flow_style=start_event.flow_style) + index = 0 + while not self.check_event(events.SequenceEndEvent): + node.value.append(self.compose_node(node, index)) + index += 1 + end_event = self.get_event() + node.end_mark = end_event.end_mark + return node + + def compose_mapping_node(self): + start_event = self.get_event() + tag = start_event.tag + if tag is None or tag == '!': + tag = self.resolve(nodes.MappingNode, None, start_event.implicit) + node = nodes.MappingNode(tag, [], start_event.start_mark, None, flow_style=start_event.flow_style) + while not self.check_event(events.MappingEndEvent): + # key_event = self.peek_event() + item_key = self.compose_node(node, None) + # if item_key in node.value: + # raise ComposerError('while composing a mapping', start_event.start_mark, + # 'found duplicate key', key_event.start_mark) + item_value = self.compose_node(node, item_key) + # node.value[item_key] = item_value + node.value.append((item_key, item_value)) + end_event = self.get_event() + node.end_mark = end_event.end_mark + return node diff --git a/powerline/lint/markedjson/constructor.py b/powerline/lint/markedjson/constructor.py new file mode 100644 index 0000000..372d84b --- /dev/null +++ b/powerline/lint/markedjson/constructor.py @@ -0,0 +1,285 @@ +# vim:fileencoding=utf-8:noet +from __future__ import (unicode_literals, division, absolute_import, print_function) + +import collections +import types + +from functools import wraps + +from powerline.lint.markedjson.error import MarkedError + +from powerline.lint.markedjson import nodes +from powerline.lint.markedjson.markedvalue import gen_marked_value +from powerline.lib.unicode import unicode + + +def marked(func): + @wraps(func) + def f(self, node, *args, **kwargs): + return gen_marked_value(func(self, node, *args, **kwargs), node.start_mark) + return f + + +class ConstructorError(MarkedError): + pass + + +class BaseConstructor: + yaml_constructors = {} + + def __init__(self): + self.constructed_objects = {} + self.state_generators = [] + self.deep_construct = False + + def check_data(self): + # If there are more documents available? + return self.check_node() + + def get_data(self): + # Construct and return the next document. + if self.check_node(): + return self.construct_document(self.get_node()) + + def get_single_data(self): + # Ensure that the stream contains a single document and construct it. + node = self.get_single_node() + if node is not None: + return self.construct_document(node) + return None + + def construct_document(self, node): + data = self.construct_object(node) + while self.state_generators: + state_generators = self.state_generators + self.state_generators = [] + for generator in state_generators: + for dummy in generator: + pass + self.constructed_objects = {} + self.deep_construct = False + return data + + def construct_object(self, node, deep=False): + if node in self.constructed_objects: + return self.constructed_objects[node] + if deep: + old_deep = self.deep_construct + self.deep_construct = True + constructor = None + tag_suffix = None + if node.tag in self.yaml_constructors: + constructor = self.yaml_constructors[node.tag] + else: + raise ConstructorError(None, None, 'no constructor for tag %s' % node.tag) + if tag_suffix is None: + data = constructor(self, node) + else: + data = constructor(self, tag_suffix, node) + if isinstance(data, types.GeneratorType): + generator = data + data = next(generator) + if self.deep_construct: + for dummy in generator: + pass + else: + self.state_generators.append(generator) + self.constructed_objects[node] = data + if deep: + self.deep_construct = old_deep + return data + + @marked + def construct_scalar(self, node): + if not isinstance(node, nodes.ScalarNode): + raise ConstructorError( + None, None, + 'expected a scalar node, but found %s' % node.id, + node.start_mark + ) + return node.value + + def construct_sequence(self, node, deep=False): + if not isinstance(node, nodes.SequenceNode): + raise ConstructorError( + None, None, + 'expected a sequence node, but found %s' % node.id, + node.start_mark + ) + return [ + self.construct_object(child, deep=deep) + for child in node.value + ] + + @marked + def construct_mapping(self, node, deep=False): + if not isinstance(node, nodes.MappingNode): + raise ConstructorError( + None, None, + 'expected a mapping node, but found %s' % node.id, + node.start_mark + ) + mapping = {} + for key_node, value_node in node.value: + key = self.construct_object(key_node, deep=deep) + if not isinstance(key, collections.abc.Hashable): + self.echoerr( + 'While constructing a mapping', node.start_mark, + 'found unhashable key', key_node.start_mark + ) + continue + elif type(key.value) != unicode: + self.echoerr( + 'Error while constructing a mapping', node.start_mark, + 'found key that is not a string', key_node.start_mark + ) + continue + elif key in mapping: + self.echoerr( + 'Error while constructing a mapping', node.start_mark, + 'found duplicate key', key_node.start_mark + ) + continue + value = self.construct_object(value_node, deep=deep) + mapping[key] = value + return mapping + + @classmethod + def add_constructor(cls, tag, constructor): + if 'yaml_constructors' not in cls.__dict__: + cls.yaml_constructors = cls.yaml_constructors.copy() + cls.yaml_constructors[tag] = constructor + + +class Constructor(BaseConstructor): + def construct_scalar(self, node): + if isinstance(node, nodes.MappingNode): + for key_node, value_node in node.value: + if key_node.tag == 'tag:yaml.org,2002:value': + return self.construct_scalar(value_node) + return BaseConstructor.construct_scalar(self, node) + + def flatten_mapping(self, node): + merge = [] + index = 0 + while index < len(node.value): + key_node, value_node = node.value[index] + if key_node.tag == 'tag:yaml.org,2002:merge': + del node.value[index] + if isinstance(value_node, nodes.MappingNode): + self.flatten_mapping(value_node) + merge.extend(value_node.value) + elif isinstance(value_node, nodes.SequenceNode): + submerge = [] + for subnode in value_node.value: + if not isinstance(subnode, nodes.MappingNode): + raise ConstructorError( + 'while constructing a mapping', + node.start_mark, + 'expected a mapping for merging, but found %s' % subnode.id, + subnode.start_mark + ) + self.flatten_mapping(subnode) + submerge.append(subnode.value) + submerge.reverse() + for value in submerge: + merge.extend(value) + else: + raise ConstructorError( + 'while constructing a mapping', + node.start_mark, + ('expected a mapping or list of mappings for merging, but found %s' % value_node.id), + value_node.start_mark + ) + elif key_node.tag == 'tag:yaml.org,2002:value': + key_node.tag = 'tag:yaml.org,2002:str' + index += 1 + else: + index += 1 + if merge: + node.value = merge + node.value + + def construct_mapping(self, node, deep=False): + if isinstance(node, nodes.MappingNode): + self.flatten_mapping(node) + return BaseConstructor.construct_mapping(self, node, deep=deep) + + @marked + def construct_yaml_null(self, node): + self.construct_scalar(node) + return None + + @marked + def construct_yaml_bool(self, node): + value = self.construct_scalar(node).value + return bool(value) + + @marked + def construct_yaml_int(self, node): + value = self.construct_scalar(node).value + sign = +1 + if value[0] == '-': + sign = -1 + if value[0] in '+-': + value = value[1:] + if value == '0': + return 0 + else: + return sign * int(value) + + @marked + def construct_yaml_float(self, node): + value = self.construct_scalar(node).value + sign = +1 + if value[0] == '-': + sign = -1 + if value[0] in '+-': + value = value[1:] + else: + return sign * float(value) + + def construct_yaml_str(self, node): + return self.construct_scalar(node) + + def construct_yaml_seq(self, node): + data = gen_marked_value([], node.start_mark) + yield data + data.extend(self.construct_sequence(node)) + + def construct_yaml_map(self, node): + data = gen_marked_value({}, node.start_mark) + yield data + value = self.construct_mapping(node) + data.update(value) + + def construct_undefined(self, node): + raise ConstructorError( + None, None, + 'could not determine a constructor for the tag %r' % node.tag, + node.start_mark + ) + + +Constructor.add_constructor( + 'tag:yaml.org,2002:null', Constructor.construct_yaml_null) + +Constructor.add_constructor( + 'tag:yaml.org,2002:bool', Constructor.construct_yaml_bool) + +Constructor.add_constructor( + 'tag:yaml.org,2002:int', Constructor.construct_yaml_int) + +Constructor.add_constructor( + 'tag:yaml.org,2002:float', Constructor.construct_yaml_float) + +Constructor.add_constructor( + 'tag:yaml.org,2002:str', Constructor.construct_yaml_str) + +Constructor.add_constructor( + 'tag:yaml.org,2002:seq', Constructor.construct_yaml_seq) + +Constructor.add_constructor( + 'tag:yaml.org,2002:map', Constructor.construct_yaml_map) + +Constructor.add_constructor( + None, Constructor.construct_undefined) diff --git a/powerline/lint/markedjson/error.py b/powerline/lint/markedjson/error.py new file mode 100644 index 0000000..732120b --- /dev/null +++ b/powerline/lint/markedjson/error.py @@ -0,0 +1,241 @@ +# vim:fileencoding=utf-8:noet +from __future__ import (unicode_literals, division, absolute_import, print_function) + +import sys +import re + +from powerline.lib.encoding import get_preferred_output_encoding + + +NON_PRINTABLE_STR = ( + '[^' + # ASCII control characters: 0x00-0x19 + + '\t\n' # Tab, newline: allowed ASCII control characters + + '\x20-\x7E' # ASCII printable characters + # Unicode control characters: 0x7F-0x9F + + '\u0085' # Allowed unicode control character: next line character + + '\u00A0-\uD7FF' + # Surrogate escapes: 0xD800-0xDFFF + + '\uE000-\uFFFD' + + (( + '\uD800-\uDFFF' + ) if sys.maxunicode < 0x10FFFF else ( + '\U00010000-\U0010FFFF' + )) + + ']' + + (( + # Paired surrogate escapes: allowed in UCS-2 builds as the only way to + # represent characters above 0xFFFF. Only paired variant is allowed. + '|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF]' + + '|[\uD800-\uDBFF](?![\uDC00-\uDFFF])' + ) if sys.maxunicode < 0x10FFFF else ( + '' + )) +) +NON_PRINTABLE_RE = re.compile(NON_PRINTABLE_STR) + + +def repl(s): + return '<x%04x>' % ord(s.group()) + + +def strtrans(s): + return NON_PRINTABLE_RE.sub(repl, s.replace('\t', '>---')) + + +class Mark: + def __init__(self, name, line, column, buffer, pointer, old_mark=None, merged_marks=None): + self.name = name + self.line = line + self.column = column + self.buffer = buffer + self.pointer = pointer + self.old_mark = old_mark + self.merged_marks = merged_marks or [] + + def copy(self): + return Mark(self.name, self.line, self.column, self.buffer, self.pointer, self.old_mark, self.merged_marks[:]) + + def get_snippet(self, indent=4, max_length=75): + if self.buffer is None: + return None + head = '' + start = self.pointer + while start > 0 and self.buffer[start - 1] not in '\0\n': + start -= 1 + if self.pointer - start > max_length / 2 - 1: + head = ' ... ' + start += 5 + break + tail = '' + end = self.pointer + while end < len(self.buffer) and self.buffer[end] not in '\0\n': + end += 1 + if end - self.pointer > max_length / 2 - 1: + tail = ' ... ' + end -= 5 + break + snippet = [self.buffer[start:self.pointer], self.buffer[self.pointer], self.buffer[self.pointer + 1:end]] + snippet = [strtrans(s) for s in snippet] + return ( + ' ' * indent + head + ''.join(snippet) + tail + '\n' + + ' ' * (indent + len(head) + len(snippet[0])) + '^' + ) + + def advance_string(self, diff): + ret = self.copy() + # FIXME Currently does not work properly with escaped strings. + ret.column += diff + ret.pointer += diff + return ret + + def set_old_mark(self, old_mark): + if self is old_mark: + return + checked_marks = set([id(self)]) + older_mark = old_mark + while True: + if id(older_mark) in checked_marks: + raise ValueError('Trying to set recursive marks') + checked_marks.add(id(older_mark)) + older_mark = older_mark.old_mark + if not older_mark: + break + self.old_mark = old_mark + + def set_merged_mark(self, merged_mark): + self.merged_marks.append(merged_mark) + + def to_string(self, indent=0, head_text='in ', add_snippet=True): + mark = self + where = '' + processed_marks = set() + while mark: + indentstr = ' ' * indent + where += ('%s %s"%s", line %d, column %d' % ( + indentstr, head_text, mark.name, mark.line + 1, mark.column + 1)) + if add_snippet: + snippet = mark.get_snippet(indent=(indent + 4)) + if snippet: + where += ':\n' + snippet + if mark.merged_marks: + where += '\n' + indentstr + ' with additionally merged\n' + where += mark.merged_marks[0].to_string(indent + 4, head_text='', add_snippet=False) + for mmark in mark.merged_marks[1:]: + where += '\n' + indentstr + ' and\n' + where += mmark.to_string(indent + 4, head_text='', add_snippet=False) + if add_snippet: + processed_marks.add(id(mark)) + if mark.old_mark: + where += '\n' + indentstr + ' which replaced value\n' + indent += 4 + mark = mark.old_mark + if id(mark) in processed_marks: + raise ValueError('Trying to dump recursive mark') + return where + + if sys.version_info < (3,): + def __str__(self): + return self.to_string().encode('utf-8') + + def __unicode__(self): + return self.to_string() + else: + def __str__(self): + return self.to_string() + + def __eq__(self, other): + return self is other or ( + self.name == other.name + and self.line == other.line + and self.column == other.column + ) + + +if sys.version_info < (3,): + def echoerr(**kwargs): + stream = kwargs.pop('stream', sys.stderr) + stream.write('\n') + stream.write((format_error(**kwargs) + '\n').encode(get_preferred_output_encoding())) +else: + def echoerr(**kwargs): + stream = kwargs.pop('stream', sys.stderr) + stream.write('\n') + stream.write(format_error(**kwargs) + '\n') + + +def format_error(context=None, context_mark=None, problem=None, problem_mark=None, note=None, indent=0): + lines = [] + indentstr = ' ' * indent + if context is not None: + lines.append(indentstr + context) + if ( + context_mark is not None + and ( + problem is None or problem_mark is None + or context_mark != problem_mark + ) + ): + lines.append(context_mark.to_string(indent=indent)) + if problem is not None: + lines.append(indentstr + problem) + if problem_mark is not None: + lines.append(problem_mark.to_string(indent=indent)) + if note is not None: + lines.append(indentstr + note) + return '\n'.join(lines) + + +class MarkedError(Exception): + def __init__(self, context=None, context_mark=None, problem=None, problem_mark=None, note=None): + Exception.__init__(self, format_error(context, context_mark, problem, problem_mark, note)) + + +class EchoErr(object): + __slots__ = ('echoerr', 'logger', 'indent') + + def __init__(self, echoerr, logger, indent=0): + self.echoerr = echoerr + self.logger = logger + self.indent = indent + + def __call__(self, **kwargs): + kwargs = kwargs.copy() + kwargs.setdefault('indent', self.indent) + self.echoerr(**kwargs) + + +class DelayedEchoErr(EchoErr): + __slots__ = ('echoerr', 'logger', 'errs', 'message', 'separator_message', 'indent', 'indent_shift') + + def __init__(self, echoerr, message='', separator_message=''): + super(DelayedEchoErr, self).__init__(echoerr, echoerr.logger) + self.errs = [[]] + self.message = message + self.separator_message = separator_message + self.indent_shift = (4 if message or separator_message else 0) + self.indent = echoerr.indent + self.indent_shift + + def __call__(self, **kwargs): + kwargs = kwargs.copy() + kwargs['indent'] = kwargs.get('indent', 0) + self.indent + self.errs[-1].append(kwargs) + + def next_variant(self): + self.errs.append([]) + + def echo_all(self): + if self.message: + self.echoerr(problem=self.message, indent=(self.indent - self.indent_shift)) + for variant in self.errs: + if not variant: + continue + if self.separator_message and variant is not self.errs[0]: + self.echoerr(problem=self.separator_message, indent=(self.indent - self.indent_shift)) + for kwargs in variant: + self.echoerr(**kwargs) + + def __nonzero__(self): + return not not self.errs + + __bool__ = __nonzero__ diff --git a/powerline/lint/markedjson/events.py b/powerline/lint/markedjson/events.py new file mode 100644 index 0000000..ef8a70e --- /dev/null +++ b/powerline/lint/markedjson/events.py @@ -0,0 +1,97 @@ +# vim:fileencoding=utf-8:noet +from __future__ import (unicode_literals, division, absolute_import, print_function) + + +# Abstract classes. +class Event(object): + def __init__(self, start_mark=None, end_mark=None): + self.start_mark = start_mark + self.end_mark = end_mark + + def __repr__(self): + attributes = [ + key for key in ['implicit', 'value'] + if hasattr(self, key) + ] + arguments = ', '.join([ + '%s=%r' % (key, getattr(self, key)) + for key in attributes + ]) + return '%s(%s)' % (self.__class__.__name__, arguments) + + +class NodeEvent(Event): + def __init__(self, start_mark=None, end_mark=None): + self.start_mark = start_mark + self.end_mark = end_mark + + +class CollectionStartEvent(NodeEvent): + def __init__(self, implicit, start_mark=None, end_mark=None, flow_style=None): + self.tag = None + self.implicit = implicit + self.start_mark = start_mark + self.end_mark = end_mark + self.flow_style = flow_style + + +class CollectionEndEvent(Event): + pass + + +# Implementations. +class StreamStartEvent(Event): + def __init__(self, start_mark=None, end_mark=None, encoding=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.encoding = encoding + + +class StreamEndEvent(Event): + pass + + +class DocumentStartEvent(Event): + def __init__(self, start_mark=None, end_mark=None, explicit=None, version=None, tags=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.explicit = explicit + self.version = version + self.tags = tags + + +class DocumentEndEvent(Event): + def __init__(self, start_mark=None, end_mark=None, explicit=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.explicit = explicit + + +class AliasEvent(NodeEvent): + pass + + +class ScalarEvent(NodeEvent): + def __init__(self, implicit, value, start_mark=None, end_mark=None, style=None): + self.tag = None + self.implicit = implicit + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + self.style = style + + +class SequenceStartEvent(CollectionStartEvent): + pass + + +class SequenceEndEvent(CollectionEndEvent): + pass + + +class MappingStartEvent(CollectionStartEvent): + pass + + +class MappingEndEvent(CollectionEndEvent): + pass diff --git a/powerline/lint/markedjson/loader.py b/powerline/lint/markedjson/loader.py new file mode 100644 index 0000000..3ee5686 --- /dev/null +++ b/powerline/lint/markedjson/loader.py @@ -0,0 +1,25 @@ +# vim:fileencoding=utf-8:noet +from __future__ import (unicode_literals, division, absolute_import, print_function) + +from powerline.lint.markedjson.reader import Reader +from powerline.lint.markedjson.scanner import Scanner +from powerline.lint.markedjson.parser import Parser +from powerline.lint.markedjson.composer import Composer +from powerline.lint.markedjson.constructor import Constructor +from powerline.lint.markedjson.resolver import Resolver +from powerline.lint.markedjson.error import echoerr + + +class Loader(Reader, Scanner, Parser, Composer, Constructor, Resolver): + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + Constructor.__init__(self) + Resolver.__init__(self) + self.haserrors = False + + def echoerr(self, *args, **kwargs): + echoerr(*args, **kwargs) + self.haserrors = True diff --git a/powerline/lint/markedjson/markedvalue.py b/powerline/lint/markedjson/markedvalue.py new file mode 100644 index 0000000..3b8db3e --- /dev/null +++ b/powerline/lint/markedjson/markedvalue.py @@ -0,0 +1,151 @@ +# vim:fileencoding=utf-8:noet +from __future__ import (unicode_literals, division, absolute_import, print_function) + +from powerline.lib.unicode import unicode + + +def gen_new(cls): + def __new__(arg_cls, value, mark): + r = super(arg_cls, arg_cls).__new__(arg_cls, value) + r.mark = mark + r.value = value + return r + return __new__ + + +def gen_init(cls): + def __init__(self, value, mark): + return cls.__init__(self, value) + return __init__ + + +def gen_getnewargs(cls): + def __getnewargs__(self): + return (self.value, self.mark) + return __getnewargs__ + + +class MarkedUnicode(unicode): + __new__ = gen_new(unicode) + __getnewargs__ = gen_getnewargs(unicode) + + def _proc_partition(self, part_result): + pointdiff = 1 + r = [] + for s in part_result: + r.append(MarkedUnicode(s, self.mark.advance_string(pointdiff))) + pointdiff += len(s) + return tuple(r) + + def rpartition(self, sep): + return self._proc_partition(super(MarkedUnicode, self).rpartition(sep)) + + def partition(self, sep): + return self._proc_partition(super(MarkedUnicode, self).partition(sep)) + + +class MarkedInt(int): + __new__ = gen_new(int) + __getnewargs__ = gen_getnewargs(int) + + +class MarkedFloat(float): + __new__ = gen_new(float) + __getnewargs__ = gen_getnewargs(float) + + +class MarkedDict(dict): + __init__ = gen_init(dict) + __getnewargs__ = gen_getnewargs(dict) + + def __new__(arg_cls, value, mark): + r = super(arg_cls, arg_cls).__new__(arg_cls, value) + r.mark = mark + r.value = value + r.keydict = dict(((key, key) for key in r)) + return r + + def setmerged(self, d): + try: + self.mark.set_merged_mark(d.mark) + except AttributeError: + pass + + def __setitem__(self, key, value): + try: + old_value = self[key] + except KeyError: + pass + else: + try: + key.mark.set_old_mark(self.keydict[key].mark) + except AttributeError: + pass + except KeyError: + pass + try: + value.mark.set_old_mark(old_value.mark) + except AttributeError: + pass + dict.__setitem__(self, key, value) + self.keydict[key] = key + + def update(self, *args, **kwargs): + dict.update(self, *args, **kwargs) + self.keydict = dict(((key, key) for key in self)) + + def copy(self): + return MarkedDict(super(MarkedDict, self).copy(), self.mark) + + +class MarkedList(list): + __new__ = gen_new(list) + __init__ = gen_init(list) + __getnewargs__ = gen_getnewargs(list) + + +class MarkedValue: + def __init__(self, value, mark): + self.mark = mark + self.value = value + + __getinitargs__ = gen_getnewargs(None) + + +specialclasses = { + unicode: MarkedUnicode, + int: MarkedInt, + float: MarkedFloat, + dict: MarkedDict, + list: MarkedList, +} + +classcache = {} + + +def gen_marked_value(value, mark, use_special_classes=True): + if use_special_classes and value.__class__ in specialclasses: + Marked = specialclasses[value.__class__] + elif value.__class__ in classcache: + Marked = classcache[value.__class__] + else: + class Marked(MarkedValue): + for func in value.__class__.__dict__: + if func == 'copy': + def copy(self): + return self.__class__(self.value.copy(), self.mark) + elif func not in set(('__init__', '__new__', '__getattribute__')): + if func in set(('__eq__',)): + # HACK to make marked dictionaries always work + exec (( + 'def {0}(self, *args):\n' + ' return self.value.{0}(*[arg.value if isinstance(arg, MarkedValue) else arg for arg in args])' + ).format(func)) + else: + exec (( + 'def {0}(self, *args, **kwargs):\n' + ' return self.value.{0}(*args, **kwargs)\n' + ).format(func)) + classcache[value.__class__] = Marked + + return Marked(value, mark) diff --git a/powerline/lint/markedjson/nodes.py b/powerline/lint/markedjson/nodes.py new file mode 100644 index 0000000..66ad843 --- /dev/null +++ b/powerline/lint/markedjson/nodes.py @@ -0,0 +1,55 @@ +# vim:fileencoding=utf-8:noet +from __future__ import (unicode_literals, division, absolute_import, print_function) + + +class Node(object): + def __init__(self, tag, value, start_mark, end_mark): + self.tag = tag + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + + def __repr__(self): + value = self.value + # if isinstance(value, list): + # if len(value) == 0: + # value = '<empty>' + # elif len(value) == 1: + # value = '<1 item>' + # else: + # value = '<%d items>' % len(value) + # else: + # if len(value) > 75: + # value = repr(value[:70]+u' ... ') + # else: + # value = repr(value) + value = repr(value) + return '%s(tag=%r, value=%s)' % (self.__class__.__name__, self.tag, value) + + +class ScalarNode(Node): + id = 'scalar' + + def __init__(self, tag, value, start_mark=None, end_mark=None, style=None): + self.tag = tag + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + self.style = style + + +class CollectionNode(Node): + def __init__(self, tag, value, start_mark=None, end_mark=None, flow_style=None): + self.tag = tag + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + self.flow_style = flow_style + + +class SequenceNode(CollectionNode): + id = 'sequence' + + +class MappingNode(CollectionNode): + id = 'mapping' diff --git a/powerline/lint/markedjson/parser.py b/powerline/lint/markedjson/parser.py new file mode 100644 index 0000000..336a2a2 --- /dev/null +++ b/powerline/lint/markedjson/parser.py @@ -0,0 +1,255 @@ +# vim:fileencoding=utf-8:noet +from __future__ import (unicode_literals, division, absolute_import, print_function) + +from powerline.lint.markedjson.error import MarkedError +from powerline.lint.markedjson import tokens +from powerline.lint.markedjson import events + + +class ParserError(MarkedError): + pass + + +class Parser: + def __init__(self): + self.current_event = None + self.yaml_version = None + self.states = [] + self.marks = [] + self.state = self.parse_stream_start + + def dispose(self): + # Reset the state attributes (to clear self-references) + self.states = [] + self.state = None + + def check_event(self, *choices): + # Check the type of the next event. + if self.current_event is None: + if self.state: + self.current_event = self.state() + if self.current_event is not None: + if not choices: + return True + for choice in choices: + if isinstance(self.current_event, choice): + return True + return False + + def peek_event(self): + # Get the next event. + if self.current_event is None: + if self.state: + self.current_event = self.state() + return self.current_event + + def get_event(self): + # Get the next event and proceed further. + if self.current_event is None: + if self.state: + self.current_event = self.state() + value = self.current_event + self.current_event = None + return value + + # stream ::= STREAM-START implicit_document? explicit_document* STREAM-END + # implicit_document ::= block_node DOCUMENT-END* + # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* + + def parse_stream_start(self): + # Parse the stream start. + token = self.get_token() + event = events.StreamStartEvent(token.start_mark, token.end_mark, encoding=token.encoding) + + # Prepare the next state. + self.state = self.parse_implicit_document_start + + return event + + def parse_implicit_document_start(self): + # Parse an implicit document. + if not self.check_token(tokens.StreamEndToken): + token = self.peek_token() + start_mark = end_mark = token.start_mark + event = events.DocumentStartEvent(start_mark, end_mark, explicit=False) + + # Prepare the next state. + self.states.append(self.parse_document_end) + self.state = self.parse_node + + return event + + else: + return self.parse_document_start() + + def parse_document_start(self): + # Parse an explicit document. + if not self.check_token(tokens.StreamEndToken): + token = self.peek_token() + self.echoerr( + None, None, + ('expected \'<stream end>\', but found %r' % token.id), token.start_mark + ) + return events.StreamEndEvent(token.start_mark, token.end_mark) + else: + # Parse the end of the stream. + token = self.get_token() + event = events.StreamEndEvent(token.start_mark, token.end_mark) + assert not self.states + assert not self.marks + self.state = None + return event + + def parse_document_end(self): + # Parse the document end. + token = self.peek_token() + start_mark = end_mark = token.start_mark + explicit = False + event = events.DocumentEndEvent(start_mark, end_mark, explicit=explicit) + + # Prepare the next state. + self.state = self.parse_document_start + + return event + + def parse_document_content(self): + return self.parse_node() + + def parse_node(self, indentless_sequence=False): + start_mark = end_mark = None + if start_mark is None: + start_mark = end_mark = self.peek_token().start_mark + event = None + implicit = True + if self.check_token(tokens.ScalarToken): + token = self.get_token() + end_mark = token.end_mark + if token.plain: + implicit = (True, False) + else: + implicit = (False, True) + event = events.ScalarEvent(implicit, token.value, start_mark, end_mark, style=token.style) + self.state = self.states.pop() + elif self.check_token(tokens.FlowSequenceStartToken): + end_mark = self.peek_token().end_mark + event = events.SequenceStartEvent(implicit, start_mark, end_mark, flow_style=True) + self.state = self.parse_flow_sequence_first_entry + elif self.check_token(tokens.FlowMappingStartToken): + end_mark = self.peek_token().end_mark + event = events.MappingStartEvent(implicit, start_mark, end_mark, flow_style=True) + self.state = self.parse_flow_mapping_first_key + else: + token = self.peek_token() + raise ParserError( + 'while parsing a flow node', start_mark, + 'expected the node content, but found %r' % token.id, + token.start_mark + ) + return event + + def parse_flow_sequence_first_entry(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_flow_sequence_entry(first=True) + + def parse_flow_sequence_entry(self, first=False): + if not self.check_token(tokens.FlowSequenceEndToken): + if not first: + if self.check_token(tokens.FlowEntryToken): + self.get_token() + if self.check_token(tokens.FlowSequenceEndToken): + token = self.peek_token() + self.echoerr( + 'While parsing a flow sequence', self.marks[-1], + ('expected sequence value, but got %r' % token.id), token.start_mark + ) + else: + token = self.peek_token() + raise ParserError( + 'while parsing a flow sequence', self.marks[-1], + ('expected \',\' or \']\', but got %r' % token.id), token.start_mark + ) + + if not self.check_token(tokens.FlowSequenceEndToken): + self.states.append(self.parse_flow_sequence_entry) + return self.parse_node() + token = self.get_token() + event = events.SequenceEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + def parse_flow_sequence_entry_mapping_end(self): + self.state = self.parse_flow_sequence_entry + token = self.peek_token() + return events.MappingEndEvent(token.start_mark, token.start_mark) + + def parse_flow_mapping_first_key(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_flow_mapping_key(first=True) + + def parse_flow_mapping_key(self, first=False): + if not self.check_token(tokens.FlowMappingEndToken): + if not first: + if self.check_token(tokens.FlowEntryToken): + self.get_token() + if self.check_token(tokens.FlowMappingEndToken): + token = self.peek_token() + self.echoerr( + 'While parsing a flow mapping', self.marks[-1], + ('expected mapping key, but got %r' % token.id), token.start_mark + ) + else: + token = self.peek_token() + raise ParserError( + 'while parsing a flow mapping', self.marks[-1], + ('expected \',\' or \'}\', but got %r' % token.id), token.start_mark + ) + if self.check_token(tokens.KeyToken): + token = self.get_token() + if not self.check_token(tokens.ValueToken, tokens.FlowEntryToken, tokens.FlowMappingEndToken): + self.states.append(self.parse_flow_mapping_value) + return self.parse_node() + else: + token = self.peek_token() + raise ParserError( + 'while parsing a flow mapping', self.marks[-1], + ('expected value, but got %r' % token.id), token.start_mark + ) + elif not self.check_token(tokens.FlowMappingEndToken): + token = self.peek_token() + expect_key = self.check_token(tokens.ValueToken, tokens.FlowEntryToken) + if not expect_key: + self.get_token() + expect_key = self.check_token(tokens.ValueToken) + + if expect_key: + raise ParserError( + 'while parsing a flow mapping', self.marks[-1], + ('expected string key, but got %r' % token.id), token.start_mark + ) + else: + token = self.peek_token() + raise ParserError( + 'while parsing a flow mapping', self.marks[-1], + ('expected \':\', but got %r' % token.id), token.start_mark + ) + token = self.get_token() + event = events.MappingEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + def parse_flow_mapping_value(self): + if self.check_token(tokens.ValueToken): + token = self.get_token() + if not self.check_token(tokens.FlowEntryToken, tokens.FlowMappingEndToken): + self.states.append(self.parse_flow_mapping_key) + return self.parse_node() + + token = self.peek_token() + raise ParserError( + 'while parsing a flow mapping', self.marks[-1], + ('expected mapping value, but got %r' % token.id), token.start_mark + ) diff --git a/powerline/lint/markedjson/reader.py b/powerline/lint/markedjson/reader.py new file mode 100644 index 0000000..0ca4516 --- /dev/null +++ b/powerline/lint/markedjson/reader.py @@ -0,0 +1,141 @@ +# vim:fileencoding=utf-8:noet +from __future__ import (unicode_literals, division, absolute_import, print_function) + +import codecs + +from powerline.lint.markedjson.error import MarkedError, Mark, NON_PRINTABLE_RE +from powerline.lib.unicode import unicode + + +# This module contains abstractions for the input stream. You don’t have to +# looks further, there are no pretty code. + + +class ReaderError(MarkedError): + pass + + +class Reader(object): + # Reader: + # - determines the data encoding and converts it to a unicode string, + # - checks if characters are in allowed range, + # - adds '\0' to the end. + + # Reader accepts + # - a file-like object with its `read` method returning `str`, + + # Yeah, it’s ugly and slow. + def __init__(self, stream): + self.name = None + self.stream = None + self.stream_pointer = 0 + self.eof = True + self.buffer = '' + self.pointer = 0 + self.full_buffer = unicode('') + self.full_pointer = 0 + self.raw_buffer = None + self.raw_decode = codecs.utf_8_decode + self.encoding = 'utf-8' + self.index = 0 + self.line = 0 + self.column = 0 + + self.stream = stream + self.name = getattr(stream, 'name', '<file>') + self.eof = False + self.raw_buffer = None + + while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2): + self.update_raw() + self.update(1) + + def peek(self, index=0): + try: + return self.buffer[self.pointer + index] + except IndexError: + self.update(index + 1) + return self.buffer[self.pointer + index] + + def prefix(self, length=1): + if self.pointer + length >= len(self.buffer): + self.update(length) + return self.buffer[self.pointer:self.pointer + length] + + def update_pointer(self, length): + while length: + ch = self.buffer[self.pointer] + self.pointer += 1 + self.full_pointer += 1 + self.index += 1 + if ch == '\n': + self.line += 1 + self.column = 0 + else: + self.column += 1 + length -= 1 + + def forward(self, length=1): + if self.pointer + length + 1 >= len(self.buffer): + self.update(length + 1) + self.update_pointer(length) + + def get_mark(self): + return Mark(self.name, self.line, self.column, self.full_buffer, self.full_pointer) + + def check_printable(self, data): + match = NON_PRINTABLE_RE.search(data) + if match: + self.update_pointer(match.start()) + raise ReaderError( + 'while reading from stream', None, + 'found special characters which are not allowed', + Mark(self.name, self.line, self.column, self.full_buffer, self.full_pointer) + ) + + def update(self, length): + if self.raw_buffer is None: + return + self.buffer = self.buffer[self.pointer:] + self.pointer = 0 + while len(self.buffer) < length: + if not self.eof: + self.update_raw() + try: + data, converted = self.raw_decode(self.raw_buffer, 'strict', self.eof) + except UnicodeDecodeError as exc: + character = self.raw_buffer[exc.start] + position = self.stream_pointer - len(self.raw_buffer) + exc.start + data, converted = self.raw_decode(self.raw_buffer[:exc.start], 'strict', self.eof) + self.buffer += data + self.full_buffer += data + '<' + str(ord(character)) + '>' + self.raw_buffer = self.raw_buffer[converted:] + self.update_pointer(exc.start - 1) + raise ReaderError( + 'while reading from stream', None, + 'found character #x%04x that cannot be decoded by UTF-8 codec' % ord(character), + Mark(self.name, self.line, self.column, self.full_buffer, position) + ) + self.buffer += data + self.full_buffer += data + self.raw_buffer = self.raw_buffer[converted:] + self.check_printable(data) + if self.eof: + self.buffer += '\0' + self.raw_buffer = None + break + + def update_raw(self, size=-1): + # Was size=4096 + assert(size < 0) + # WARNING: reading the whole stream at once. To change this behaviour to + # former reading N characters at once one must make sure that reading + # never ends at partial unicode character. + data = self.stream.read(size) + if self.raw_buffer is None: + self.raw_buffer = data + else: + self.raw_buffer += data + self.stream_pointer += len(data) + if not data: + self.eof = True diff --git a/powerline/lint/markedjson/resolver.py b/powerline/lint/markedjson/resolver.py new file mode 100644 index 0000000..fa8ceaa --- /dev/null +++ b/powerline/lint/markedjson/resolver.py @@ -0,0 +1,131 @@ +# vim:fileencoding=utf-8:noet +from __future__ import (unicode_literals, division, absolute_import, print_function) + +import re + +from powerline.lint.markedjson.error import MarkedError +from powerline.lint.markedjson import nodes + + +class ResolverError(MarkedError): + pass + + +class BaseResolver: + DEFAULT_SCALAR_TAG = 'tag:yaml.org,2002:str' + DEFAULT_SEQUENCE_TAG = 'tag:yaml.org,2002:seq' + DEFAULT_MAPPING_TAG = 'tag:yaml.org,2002:map' + + yaml_implicit_resolvers = {} + yaml_path_resolvers = {} + + def __init__(self): + self.resolver_exact_paths = [] + self.resolver_prefix_paths = [] + + @classmethod + def add_implicit_resolver(cls, tag, regexp, first): + if 'yaml_implicit_resolvers' not in cls.__dict__: + cls.yaml_implicit_resolvers = cls.yaml_implicit_resolvers.copy() + if first is None: + first = [None] + for ch in first: + cls.yaml_implicit_resolvers.setdefault(ch, []).append((tag, regexp)) + + def descend_resolver(self, current_node, current_index): + if not self.yaml_path_resolvers: + return + exact_paths = {} + prefix_paths = [] + if current_node: + depth = len(self.resolver_prefix_paths) + for path, kind in self.resolver_prefix_paths[-1]: + if self.check_resolver_prefix(depth, path, kind, current_node, current_index): + if len(path) > depth: + prefix_paths.append((path, kind)) + else: + exact_paths[kind] = self.yaml_path_resolvers[path, kind] + else: + for path, kind in self.yaml_path_resolvers: + if not path: + exact_paths[kind] = self.yaml_path_resolvers[path, kind] + else: + prefix_paths.append((path, kind)) + self.resolver_exact_paths.append(exact_paths) + self.resolver_prefix_paths.append(prefix_paths) + + def ascend_resolver(self): + if not self.yaml_path_resolvers: + return + self.resolver_exact_paths.pop() + self.resolver_prefix_paths.pop() + + def check_resolver_prefix(self, depth, path, kind, current_node, current_index): + node_check, index_check = path[depth - 1] + if isinstance(node_check, str): + if current_node.tag != node_check: + return + elif node_check is not None: + if not isinstance(current_node, node_check): + return + if index_check is True and current_index is not None: + return + if ((index_check is False or index_check is None) + and current_index is None): + return + if isinstance(index_check, str): + if not (isinstance(current_index, nodes.ScalarNode) and index_check == current_index.value): + return + elif isinstance(index_check, int) and not isinstance(index_check, bool): + if index_check != current_index: + return + return True + + def resolve(self, kind, value, implicit, mark=None): + if kind is nodes.ScalarNode and implicit[0]: + if value == '': + resolvers = self.yaml_implicit_resolvers.get('', []) + else: + resolvers = self.yaml_implicit_resolvers.get(value[0], []) + resolvers += self.yaml_implicit_resolvers.get(None, []) + for tag, regexp in resolvers: + if regexp.match(value): + return tag + else: + self.echoerr( + 'While resolving plain scalar', None, + 'expected floating-point value, integer, null or boolean, but got %r' % value, + mark + ) + return self.DEFAULT_SCALAR_TAG + if kind is nodes.ScalarNode: + return self.DEFAULT_SCALAR_TAG + elif kind is nodes.SequenceNode: + return self.DEFAULT_SEQUENCE_TAG + elif kind is nodes.MappingNode: + return self.DEFAULT_MAPPING_TAG + + +class Resolver(BaseResolver): + pass + + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:bool', + re.compile(r'''^(?:true|false)$''', re.X), + list('yYnNtTfFoO')) + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:float', + re.compile(r'^-?(?:0|[1-9]\d*)(?=[.eE])(?:\.\d+)?(?:[eE][-+]?\d+)?$', re.X), + list('-0123456789')) + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:int', + re.compile(r'^(?:0|-?[1-9]\d*)$', re.X), + list('-0123456789')) + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:null', + re.compile(r'^null$', re.X), + ['n']) diff --git a/powerline/lint/markedjson/scanner.py b/powerline/lint/markedjson/scanner.py new file mode 100644 index 0000000..b0bddf3 --- /dev/null +++ b/powerline/lint/markedjson/scanner.py @@ -0,0 +1,499 @@ +# vim:fileencoding=utf-8:noet +from __future__ import (unicode_literals, division, absolute_import, print_function) + +from string import hexdigits + +from powerline.lint.markedjson.error import MarkedError +from powerline.lint.markedjson import tokens +from powerline.lib.unicode import unicode, unichr, surrogate_pair_to_character + + +hexdigits_set = set(hexdigits) + + +# Scanner produces tokens of the following types: +# STREAM-START +# STREAM-END +# DOCUMENT-START +# DOCUMENT-END +# FLOW-SEQUENCE-START +# FLOW-MAPPING-START +# FLOW-SEQUENCE-END +# FLOW-MAPPING-END +# FLOW-ENTRY +# KEY +# VALUE +# SCALAR(value, plain, style) +# +# Read comments in the Scanner code for more details. + + +class ScannerError(MarkedError): + pass + + +class SimpleKey: + # See below simple keys treatment. + def __init__(self, token_number, index, line, column, mark): + self.token_number = token_number + self.index = index + self.line = line + self.column = column + self.mark = mark + + +class Scanner: + def __init__(self): + '''Initialize the scanner.''' + # It is assumed that Scanner and Reader will have a common descendant. + # Reader do the dirty work of checking for BOM and converting the + # input data to Unicode. It also adds NUL to the end. + # + # Reader supports the following methods + # self.peek(i=0) # peek the next i-th character + # self.prefix(l=1) # peek the next l characters + # self.forward(l=1) # read the next l characters and move the pointer. + + # Had we reached the end of the stream? + self.done = False + + # The number of unclosed '{' and '['. `flow_level == 0` means block + # context. + self.flow_level = 0 + + # List of processed tokens that are not yet emitted. + self.tokens = [] + + # Add the STREAM-START token. + self.fetch_stream_start() + + # Number of tokens that were emitted through the `get_token` method. + self.tokens_taken = 0 + + # Variables related to simple keys treatment. + + # A simple key is a key that is not denoted by the '?' indicator. + # We emit the KEY token before all keys, so when we find a potential + # simple key, we try to locate the corresponding ':' indicator. + # Simple keys should be limited to a single line. + + # Can a simple key start at the current position? A simple key may + # start: + # - after '{', '[', ',' (in the flow context), + self.allow_simple_key = False + + # Keep track of possible simple keys. This is a dictionary. The key + # is `flow_level`; there can be no more that one possible simple key + # for each level. The value is a SimpleKey record: + # (token_number, index, line, column, mark) + # A simple key may start with SCALAR(flow), '[', or '{' tokens. + self.possible_simple_keys = {} + + # Public methods. + + def check_token(self, *choices): + # Check if the next token is one of the given types. + while self.need_more_tokens(): + self.fetch_more_tokens() + if self.tokens: + if not choices: + return True + for choice in choices: + if isinstance(self.tokens[0], choice): + return True + return False + + def peek_token(self): + # Return the next token, but do not delete if from the queue. + while self.need_more_tokens(): + self.fetch_more_tokens() + if self.tokens: + return self.tokens[0] + + def get_token(self): + # Return the next token. + while self.need_more_tokens(): + self.fetch_more_tokens() + if self.tokens: + self.tokens_taken += 1 + return self.tokens.pop(0) + + # Private methods. + + def need_more_tokens(self): + if self.done: + return False + if not self.tokens: + return True + # The current token may be a potential simple key, so we + # need to look further. + self.stale_possible_simple_keys() + if self.next_possible_simple_key() == self.tokens_taken: + return True + + def fetch_more_tokens(self): + + # Eat whitespaces and comments until we reach the next token. + self.scan_to_next_token() + + # Remove obsolete possible simple keys. + self.stale_possible_simple_keys() + + # Peek the next character. + ch = self.peek() + + # Is it the end of stream? + if ch == '\0': + return self.fetch_stream_end() + + # Note: the order of the following checks is NOT significant. + + # Is it the flow sequence start indicator? + if ch == '[': + return self.fetch_flow_sequence_start() + + # Is it the flow mapping start indicator? + if ch == '{': + return self.fetch_flow_mapping_start() + + # Is it the flow sequence end indicator? + if ch == ']': + return self.fetch_flow_sequence_end() + + # Is it the flow mapping end indicator? + if ch == '}': + return self.fetch_flow_mapping_end() + + # Is it the flow entry indicator? + if ch == ',': + return self.fetch_flow_entry() + + # Is it the value indicator? + if ch == ':' and self.flow_level: + return self.fetch_value() + + # Is it a double quoted scalar? + if ch == '"': + return self.fetch_double() + + # It must be a plain scalar then. + if self.check_plain(): + return self.fetch_plain() + + # No? It’s an error. Let’s produce a nice error message. + raise ScannerError( + 'while scanning for the next token', None, + 'found character %r that cannot start any token' % ch, + self.get_mark() + ) + + # Simple keys treatment. + + def next_possible_simple_key(self): + # Return the number of the nearest possible simple key. Actually we + # don’t need to loop through the whole dictionary. We may replace it + # with the following code: + # if not self.possible_simple_keys: + # return None + # return self.possible_simple_keys[ + # min(self.possible_simple_keys.keys())].token_number + min_token_number = None + for level in self.possible_simple_keys: + key = self.possible_simple_keys[level] + if min_token_number is None or key.token_number < min_token_number: + min_token_number = key.token_number + return min_token_number + + def stale_possible_simple_keys(self): + # Remove entries that are no longer possible simple keys. According to + # the YAML specification, simple keys + # - should be limited to a single line, + # Disabling this procedure will allow simple keys of any length and + # height (may cause problems if indentation is broken though). + for level in list(self.possible_simple_keys): + key = self.possible_simple_keys[level] + if key.line != self.line: + del self.possible_simple_keys[level] + + def save_possible_simple_key(self): + # The next token may start a simple key. We check if it’s possible + # and save its position. This function is called for + # SCALAR(flow), '[', and '{'. + + # The next token might be a simple key. Let’s save it’s number and + # position. + if self.allow_simple_key: + self.remove_possible_simple_key() + token_number = self.tokens_taken + len(self.tokens) + key = SimpleKey(token_number, self.index, self.line, self.column, self.get_mark()) + self.possible_simple_keys[self.flow_level] = key + + def remove_possible_simple_key(self): + # Remove the saved possible key position at the current flow level. + if self.flow_level in self.possible_simple_keys: + del self.possible_simple_keys[self.flow_level] + + # Fetchers. + + def fetch_stream_start(self): + # We always add STREAM-START as the first token and STREAM-END as the + # last token. + + # Read the token. + mark = self.get_mark() + + # Add STREAM-START. + self.tokens.append(tokens.StreamStartToken(mark, mark, encoding=self.encoding)) + + def fetch_stream_end(self): + # Reset simple keys. + self.remove_possible_simple_key() + self.allow_simple_key = False + self.possible_simple_keys = {} + + # Read the token. + mark = self.get_mark() + + # Add STREAM-END. + self.tokens.append(tokens.StreamEndToken(mark, mark)) + + # The steam is finished. + self.done = True + + def fetch_flow_sequence_start(self): + self.fetch_flow_collection_start(tokens.FlowSequenceStartToken) + + def fetch_flow_mapping_start(self): + self.fetch_flow_collection_start(tokens.FlowMappingStartToken) + + def fetch_flow_collection_start(self, TokenClass): + # '[' and '{' may start a simple key. + self.save_possible_simple_key() + + # Increase the flow level. + self.flow_level += 1 + + # Simple keys are allowed after '[' and '{'. + self.allow_simple_key = True + + # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) + + def fetch_flow_sequence_end(self): + self.fetch_flow_collection_end(tokens.FlowSequenceEndToken) + + def fetch_flow_mapping_end(self): + self.fetch_flow_collection_end(tokens.FlowMappingEndToken) + + def fetch_flow_collection_end(self, TokenClass): + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Decrease the flow level. + self.flow_level -= 1 + + # No simple keys after ']' or '}'. + self.allow_simple_key = False + + # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) + + def fetch_value(self): + # Do we determine a simple key? + if self.flow_level in self.possible_simple_keys: + + # Add KEY. + key = self.possible_simple_keys[self.flow_level] + del self.possible_simple_keys[self.flow_level] + self.tokens.insert(key.token_number - self.tokens_taken, tokens.KeyToken(key.mark, key.mark)) + + # There cannot be two simple keys one after another. + self.allow_simple_key = False + + # Add VALUE. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(tokens.ValueToken(start_mark, end_mark)) + + def fetch_flow_entry(self): + # Simple keys are allowed after ','. + self.allow_simple_key = True + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add FLOW-ENTRY. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(tokens.FlowEntryToken(start_mark, end_mark)) + + def fetch_double(self): + # A flow scalar could be a simple key. + self.save_possible_simple_key() + + # No simple keys after flow scalars. + self.allow_simple_key = False + + # Scan and add SCALAR. + self.tokens.append(self.scan_flow_scalar()) + + def fetch_plain(self): + + self.save_possible_simple_key() + + # No simple keys after plain scalars. + self.allow_simple_key = False + + # Scan and add SCALAR. May change `allow_simple_key`. + self.tokens.append(self.scan_plain()) + + # Checkers. + + def check_plain(self): + return self.peek() in '0123456789-ntf' + + # Scanners. + + def scan_to_next_token(self): + while self.peek() in ' \t\n': + self.forward() + + def scan_flow_scalar(self): + # See the specification for details. + # Note that we loose indentation rules for quoted scalars. Quoted + # scalars don’t need to adhere indentation because " and ' clearly + # mark the beginning and the end of them. Therefore we are less + # restrictive then the specification requires. We only need to check + # that document separators are not included in scalars. + chunks = [] + start_mark = self.get_mark() + quote = self.peek() + self.forward() + chunks.extend(self.scan_flow_scalar_non_spaces(start_mark)) + while self.peek() != quote: + chunks.extend(self.scan_flow_scalar_spaces(start_mark)) + chunks.extend(self.scan_flow_scalar_non_spaces(start_mark)) + self.forward() + end_mark = self.get_mark() + return tokens.ScalarToken(unicode().join(chunks), False, start_mark, end_mark, '"') + + ESCAPE_REPLACEMENTS = { + 'b': '\x08', + 't': '\x09', + 'n': '\x0A', + 'f': '\x0C', + 'r': '\x0D', + '"': '\"', + '\\': '\\', + } + + ESCAPE_CODES = { + 'u': 4, + } + + def scan_flow_scalar_non_spaces(self, start_mark): + # See the specification for details. + chunks = [] + while True: + length = 0 + while self.peek(length) not in '\"\\\0 \t\n': + length += 1 + if length: + chunks.append(self.prefix(length)) + self.forward(length) + ch = self.peek() + if ch == '\\': + self.forward() + ch = self.peek() + if ch in self.ESCAPE_REPLACEMENTS: + chunks.append(self.ESCAPE_REPLACEMENTS[ch]) + self.forward() + elif ch in self.ESCAPE_CODES: + length = self.ESCAPE_CODES[ch] + self.forward() + for k in range(length): + if self.peek(k) not in hexdigits: + raise ScannerError( + 'while scanning a double-quoted scalar', start_mark, + 'expected escape sequence of %d hexdecimal numbers, but found %r' % ( + length, self.peek(k)), + self.get_mark() + ) + code = int(self.prefix(length), 16) + self.forward(length) + if 0xD800 <= code <= 0xDC00: + # Start of the surrogate pair + next_char = self.prefix(6) + if ( + next_char[0] != '\\' + or next_char[1] != 'u' + or not (set(next_char[2:]) < hexdigits_set) + or not (0xDC00 <= int(next_char[2:], 16) <= 0xDFFF) + ): + raise ScannerError( + 'while scanning a double-quoted scalar', start_mark, + 'expected escape sequence with the next character in surrogate pair, but found %r' % ( + next_char + ), + self.get_mark() + ) + code = surrogate_pair_to_character(code, int(next_char[2:], 16)) + self.forward(6) + chunks.append(unichr(code)) + else: + raise ScannerError( + 'while scanning a double-quoted scalar', start_mark, + ('found unknown escape character %r' % ch), self.get_mark() + ) + else: + return chunks + + def scan_flow_scalar_spaces(self, start_mark): + # See the specification for details. + chunks = [] + length = 0 + while self.peek(length) in ' \t': + length += 1 + whitespaces = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch == '\0': + raise ScannerError( + 'while scanning a quoted scalar', start_mark, + 'found unexpected end of stream', self.get_mark() + ) + elif ch == '\n': + raise ScannerError( + 'while scanning a quoted scalar', start_mark, + 'found unexpected line end', self.get_mark() + ) + else: + chunks.append(whitespaces) + return chunks + + def scan_plain(self): + chunks = [] + start_mark = self.get_mark() + spaces = [] + while True: + length = 0 + while True: + if self.peek(length) not in 'eE.0123456789nul-tr+fas': + break + length += 1 + if length == 0: + break + self.allow_simple_key = False + chunks.extend(spaces) + chunks.append(self.prefix(length)) + self.forward(length) + end_mark = self.get_mark() + return tokens.ScalarToken(''.join(chunks), True, start_mark, end_mark) diff --git a/powerline/lint/markedjson/tokens.py b/powerline/lint/markedjson/tokens.py new file mode 100644 index 0000000..6fa8bf1 --- /dev/null +++ b/powerline/lint/markedjson/tokens.py @@ -0,0 +1,72 @@ +# vim:fileencoding=utf-8:noet +from __future__ import (unicode_literals, division, absolute_import, print_function) + + +class Token(object): + def __init__(self, start_mark, end_mark): + self.start_mark = start_mark + self.end_mark = end_mark + + def __repr__(self): + attributes = [ + key for key in self.__dict__ + if not key.endswith('_mark') + ] + attributes.sort() + arguments = ', '.join([ + '%s=%r' % (key, getattr(self, key)) + for key in attributes + ]) + return '%s(%s)' % (self.__class__.__name__, arguments) + + +class StreamStartToken(Token): + id = '<stream start>' + + def __init__(self, start_mark=None, end_mark=None, encoding=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.encoding = encoding + + +class StreamEndToken(Token): + id = '<stream end>' + + +class FlowSequenceStartToken(Token): + id = '[' + + +class FlowMappingStartToken(Token): + id = '{' + + +class FlowSequenceEndToken(Token): + id = ']' + + +class FlowMappingEndToken(Token): + id = '}' + + +class KeyToken(Token): + id = '?' + + +class ValueToken(Token): + id = ':' + + +class FlowEntryToken(Token): + id = ',' + + +class ScalarToken(Token): + id = '<scalar>' + + def __init__(self, value, plain, start_mark, end_mark, style=None): + self.value = value + self.plain = plain + self.start_mark = start_mark + self.end_mark = end_mark + self.style = style |