From 7a7ad2038d43ddf1c52d7a3155c8c4e3ad506277 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Thu, 29 Jun 2017 22:47:31 +0200 Subject: [PATCH] Trying to change the prefix parsing a bit. --- parso/python/normalizer.py | 98 +++++++++++++++++++++----------------- parso/python/prefix.py | 45 +++++++++++++---- test/test_prefix.py | 44 +++++++++-------- 3 files changed, 114 insertions(+), 73 deletions(-) diff --git a/parso/python/normalizer.py b/parso/python/normalizer.py index 84754aa..89eb26e 100644 --- a/parso/python/normalizer.py +++ b/parso/python/normalizer.py @@ -61,7 +61,13 @@ class WhitespaceInfo(object): start_pos = parts[0].start_pos else: start_pos = leaf.start_pos - indentation_part = PrefixPart(leaf, 'indentation', '', start_pos) + indentation_part = PrefixPart( + leaf, + type='spacing', + value='', + spacing='', + start_pos=start_pos + ) self.newline_count = 0 for part in parts: @@ -72,7 +78,7 @@ class WhitespaceInfo(object): if part.type == 'comment': self.comments.append(Comment(part, indentation_part)) - if part.type == 'indentation': + if part.type == 'spacing': indentation_part = part else: indentation_part = None @@ -291,22 +297,29 @@ class PEP8Normalizer(Normalizer): elif in_introducer: self._in_suite_introducer = False - def _check_tabs_spaces(self, leaf, indentation): - if self._wrong_indentation_char in indentation: - self.add_issue(101, 'Indentation contains ' + self._indentation_type, leaf) + def _check_tabs_spaces(self, spacing): + if self._wrong_indentation_char in spacing.value: + self.add_issue(101, 'Indentation contains ' + self._indentation_type, spacing) return True return False def normalize(self, leaf): + for part in leaf._split_prefix(): + if part.type == 'spacing': + # This part is used for the part call after for. + break + self._old_normalize(part, part.create_spacing_part()) + return self._old_normalize(leaf, part) + + def _old_normalize(self, leaf, spacing): value = leaf.value - info = WhitespaceInfo(leaf) if value == ',' and leaf.parent.type == 'dictorsetmaker': self._indentation_stack.pop() node = self._indentation_stack[-1] - if info.has_backslash and node.type != IndentationTypes.BACKSLASH: + if False and info.has_backslash and node.type != IndentationTypes.BACKSLASH: if node.type != IndentationTypes.SUITE: self.add_issue(502, 'The backslash is redundant between brackets', leaf) else: @@ -323,31 +336,33 @@ class PEP8Normalizer(Normalizer): if self._on_newline: - if node.type == IndentationTypes.BACKSLASH: + indentation = spacing.value + if node.type == IndentationTypes.BACKSLASH \ + and self._previous_leaf.type == 'newline': self._indentation_stack.pop() - if info.indentation != node.indentation: - if not self._check_tabs_spaces(info.indentation_part, info.indentation): + + if indentation != node.indentation: + if not self._check_tabs_spaces(spacing): s = '%s %s' % (len(self._config.indentation), self._indentation_type) self.add_issue(111, 'Indentation is not a multiple of ' + s, leaf) - elif info.newline_count: - if True: + else: if value in '])}': should_be_indentation = node.bracket_indentation else: should_be_indentation = node.indentation - if self._in_suite_introducer and info.indentation == \ + if self._in_suite_introducer and indentation == \ self._indentation_stack.get_latest_suite_node().indentation \ + self._config.indentation: self.add_issue(129, "Line with same indent as next logical block", leaf) - elif info.indentation != should_be_indentation: - if not self._check_tabs_spaces(info.indentation_part, info.indentation): + elif indentation != should_be_indentation: + if not self._check_tabs_spaces(spacing): if value in '])}': if node.type == IndentationTypes.VERTICAL_BRACKET: self.add_issue(124, "Closing bracket does not match visual indentation", leaf) else: self.add_issue(123, "Losing bracket does not match indentation of opening bracket's line", leaf) else: - if len(info.indentation) < len(should_be_indentation): + if len(indentation) < len(should_be_indentation): if node.type == IndentationTypes.VERTICAL_BRACKET: self.add_issue(128, 'Continuation line under-indented for visual indent', leaf) elif node.type == IndentationTypes.BACKSLASH: @@ -364,10 +379,10 @@ class PEP8Normalizer(Normalizer): else: self.add_issue(126, 'Continuation line over-indented for hanging indent', leaf) else: - self._check_spacing(leaf, info) + self._check_spacing(leaf, spacing) first = True - for comment in info.comments: + for comment in []:#info.comments: if first and not self._on_newline: continue first = False @@ -384,7 +399,7 @@ class PEP8Normalizer(Normalizer): if comment.indentation == should_be_indentation: self._last_indentation_level = i else: - if not self._check_tabs_spaces(comment.indentation_part, comment.indentation): + if not self._check_tabs_spaces(spacing): if actual_len < should_len: self.add_issue(115, 'Expected an indented block (comment)', comment) elif actual_len > should_len: @@ -443,10 +458,10 @@ class PEP8Normalizer(Normalizer): self._in_suite_introducer = False self._previous_leaf = leaf - self._previous_whitespace_info = info + self._previous_spacing = spacing return value - def _check_spacing(self, leaf, info): + def _check_spacing(self, leaf, spacing): def add_if_spaces(*args): if spaces: return self.add_issue(*args) @@ -455,37 +470,35 @@ class PEP8Normalizer(Normalizer): if not spaces: return self.add_issue(*args) - spaces = info.indentation + spaces = spacing.value prev = self._previous_leaf if prev is not None and prev.type == 'error_leaf' or leaf.type == 'error_leaf': return if '\t' in spaces: - self.add_issue(223, 'Used tab to separate tokens', info.indentation_part) + self.add_issue(223, 'Used tab to separate tokens', spacing) elif leaf.type == 'newline': - add_if_spaces(291, 'Trailing whitespace', info.indentation_part) + add_if_spaces(291, 'Trailing whitespace', spacing) elif len(spaces) > 1: - self.add_issue(221, 'Multiple spaces used', info.indentation_part) - elif info.comments: - pass + self.add_issue(221, 'Multiple spaces used', spacing) else: if prev in _OPENING_BRACKETS: message = "Whitespace after '%s'" % leaf.value - add_if_spaces(201, message, info.indentation_part) + add_if_spaces(201, message, spacing) elif leaf in _CLOSING_BRACKETS: message = "Whitespace before '%s'" % leaf.value - add_if_spaces(202, message, info.indentation_part) + add_if_spaces(202, message, spacing) #elif leaf in _OPENING_BRACKETS: # TODO # if False: # message = "Whitespace before '%s'" % leaf.value - # add_if_spaces(211, message, info.indentation_part) + # add_if_spaces(211, message, spacing) elif leaf in (',', ';') or leaf == ':' \ and leaf.parent.type not in ('subscript', 'subscriptlist'): message = "Whitespace before '%s'" % leaf.value - add_if_spaces(203, message, info.indentation_part) + add_if_spaces(203, message, spacing) elif prev in (',', ';', ':'): - add_not_spaces('231', "missing whitespace after '%s'", info.indentation_part) + add_not_spaces('231', "missing whitespace after '%s'", spacing) elif leaf == ':': # Is a subscript # TODO pass @@ -504,32 +517,29 @@ class PEP8Normalizer(Normalizer): else: param = prev.parent if param.type == 'param' and param.annotation: - add_not_spaces(252, 'Expected spaces around annotation equals', info.indentation_part) + add_not_spaces(252, 'Expected spaces around annotation equals', spacing) else: - add_if_spaces(251, 'Unexpected spaces around keyword / parameter equals', info.indentation_part) + add_if_spaces(251, 'Unexpected spaces around keyword / parameter equals', spacing) elif leaf in _BITWISE_OPERATOR or prev in _BITWISE_OPERATOR: - add_not_spaces(227, 'Missing whitespace around bitwise or shift operator', info.indentation_part) + add_not_spaces(227, 'Missing whitespace around bitwise or shift operator', spacing) elif leaf == '%' or prev == '%': - add_not_spaces(228, 'Missing whitespace around modulo operator', info.indentation_part) + add_not_spaces(228, 'Missing whitespace around modulo operator', spacing) else: message_225 = 'Missing whitespace between tokens' - add_not_spaces(225, message_225, info.indentation_part) + add_not_spaces(225, message_225, spacing) #print('x', leaf.start_pos, leaf, prev) elif leaf.type == 'keyword' or prev.type == 'keyword': - add_not_spaces(275, 'Missing whitespace around keyword', info.indentation_part) + add_not_spaces(275, 'Missing whitespace around keyword', spacing) else: - prev_info = self._previous_whitespace_info + prev_info = self._previous_spacing message_225 = 'Missing whitespace between tokens' if prev in _ALLOW_SPACE and spaces != prev_info.indentation: message = "Whitespace before operator doesn't match with whitespace after" - self.add_issue(229, message, info.indentation_part) + self.add_issue(229, message, spacing) if spaces and leaf not in _ALLOW_SPACE and prev not in _ALLOW_SPACE: #print(leaf, prev) - self.add_issue(225, message_225, info.indentation_part) - - #if not prev_info.indentation and leaf not in _ALLOW_SPACE: - #self.add_issue(225, message_225, prev_info.indentation_part) + self.add_issue(225, message_225, spacing) def _analyse_non_prefix(self, leaf): typ = leaf.type diff --git a/parso/python/prefix.py b/parso/python/prefix.py index 5260760..7fa1785 100644 --- a/parso/python/prefix.py +++ b/parso/python/prefix.py @@ -4,10 +4,12 @@ from parso.tokenize import group class PrefixPart(object): - def __init__(self, leaf, typ, value, start_pos): + def __init__(self, leaf, typ, value, spacing='', start_pos=None): + assert start_pos is not None self.parent = leaf self.type = typ self.value = value + self.spacing = spacing self.start_pos = start_pos @property @@ -16,6 +18,13 @@ class PrefixPart(object): return self.start_pos[0] + 1, 0 return self.start_pos[0], self.start_pos[1] + len(self.value) + def create_spacing_part(self): + column = self.start_pos[1] - len(self.spacing) + return PrefixPart( + self.parent, 'spacing', self.spacing, + start_pos=(self.start_pos[0], column) + ) + def __repr__(self): return '%s(%s, %s, %s)' % ( self.__class__.__name__, @@ -27,35 +36,53 @@ class PrefixPart(object): _comment = r'#[^\n\r\f]*' _backslash = r'\\\r?\n' -_indentation = r'[ \t]+' _newline = r'\r?\n' _form_feed = r'\f' +_only_spacing = '$' +_spacing = r'[ \t]*' -_regex = group(_comment, _backslash, _indentation, _newline, _form_feed) -_regex = re.compile(_regex) +_regex = group( + _comment, _backslash, _newline, _form_feed, _only_spacing, + capture=True +) +_regex = re.compile(group(_spacing, capture=True) + _regex) _types = { - ' ': 'indentation', '#': 'comment', '\\': 'backslash', '\f': 'formfeed', '\n': 'newline', '\r': 'newline', - '\t': 'indentation', } def split_prefix(leaf, start_pos): line, column = start_pos start = 0 + value = spacing = '' while start != len(leaf.prefix): match =_regex.match(leaf.prefix, start) - value = match.group(0) - typ = _types[value[0]] - yield PrefixPart(leaf, typ, value, (line, column + start)) + spacing = match.group(1) + value = match.group(2) + if not value: + break + type_ = _types[value[0]] + print(repr(spacing), repr(value), column) + yield PrefixPart( + leaf, type_, value, spacing, + start_pos=(line, column + start + len(spacing)) + ) start = match.end(0) if value.endswith('\n'): line += 1 column = -start + + print('x', repr(value), repr(spacing)) + if value: + spacing = '' + yield PrefixPart( + leaf, 'spacing', spacing, + start_pos=(line, column + start) + ) diff --git a/test/test_prefix.py b/test/test_prefix.py index ec9f338..88f40e7 100644 --- a/test/test_prefix.py +++ b/test/test_prefix.py @@ -1,20 +1,24 @@ +from itertools import zip_longest + import pytest + import parso @pytest.mark.parametrize(('string', 'tokens'), [ - ('#', ['#']), - (' # ', [' ', '# ']), - (' # \n', [' ', '# ', '\n']), - (' # \f\n', [' ', '# ', '\f', '\n']), - (' \n', [' ', '\n']), - (' \n ', [' ', '\n', ' ']), - (' \f ', [' ', '\f', ' ']), - (' \f ', [' ', '\f', ' ']), - (' \r\n', [' ', '\r\n']), - ('\\\n', ['\\\n']), - ('\\\r\n', ['\\\r\n']), - ('\t\t\n\t', ['\t\t', '\n', '\t']), + ('', ['']), + ('#', ['#', '']), + (' # ', ['# ', '']), + (' # \n', ['# ', '\n', '']), + (' # \f\n', ['# ', '\f', '\n', '']), + (' \n', ['\n', '']), + (' \n ', ['\n', ' ']), + (' \f ', ['\f', ' ']), + (' \f ', ['\f', ' ']), + (' \r\n', ['\r\n', '']), + ('\\\n', ['\\\n', '']), + ('\\\r\n', ['\\\r\n', '']), + ('\t\t\n\t', ['\n', '\t']), ]) def test_simple_prefix_splitting(string, tokens): tree = parso.parse(string) @@ -23,14 +27,14 @@ def test_simple_prefix_splitting(string, tokens): parsed_tokens = list(leaf._split_prefix()) start_pos = (1, 0) - for pt, expected in zip(parsed_tokens, tokens): + for pt, expected in zip_longest(parsed_tokens, tokens): assert pt.value == expected # Calculate the estimated end_pos if expected.endswith('\n'): end_pos = start_pos[0] + 1, 0 else: - end_pos = start_pos[0], start_pos[1] + len(expected) + end_pos = start_pos[0], start_pos[1] + len(expected) + len(pt.spacing) #assert start_pos == pt.start_pos assert end_pos == pt.end_pos @@ -38,12 +42,12 @@ def test_simple_prefix_splitting(string, tokens): @pytest.mark.parametrize(('string', 'types'), [ - ('# ', ['comment']), - ('\r\n', ['newline']), - ('\f', ['formfeed']), - ('\\\n', ['backslash']), - (' \t', ['indentation']), - (' \t ', ['indentation']), + ('# ', ['comment', 'spacing']), + ('\r\n', ['newline', 'spacing']), + ('\f', ['formfeed', 'spacing']), + ('\\\n', ['backslash', 'spacing']), + (' \t', ['spacing']), + (' \t ', ['spacing']), ]) def test_prefix_splitting_types(string, types): tree = parso.parse(string)