Trying to change the prefix parsing a bit.

This commit is contained in:
Dave Halter
2017-06-29 22:47:31 +02:00
parent 063d4b052e
commit 7a7ad2038d
3 changed files with 114 additions and 73 deletions

View File

@@ -61,7 +61,13 @@ class WhitespaceInfo(object):
start_pos = parts[0].start_pos start_pos = parts[0].start_pos
else: else:
start_pos = leaf.start_pos start_pos = leaf.start_pos
indentation_part = PrefixPart(leaf, 'indentation', '', start_pos) indentation_part = PrefixPart(
leaf,
type='spacing',
value='',
spacing='',
start_pos=start_pos
)
self.newline_count = 0 self.newline_count = 0
for part in parts: for part in parts:
@@ -72,7 +78,7 @@ class WhitespaceInfo(object):
if part.type == 'comment': if part.type == 'comment':
self.comments.append(Comment(part, indentation_part)) self.comments.append(Comment(part, indentation_part))
if part.type == 'indentation': if part.type == 'spacing':
indentation_part = part indentation_part = part
else: else:
indentation_part = None indentation_part = None
@@ -291,22 +297,29 @@ class PEP8Normalizer(Normalizer):
elif in_introducer: elif in_introducer:
self._in_suite_introducer = False self._in_suite_introducer = False
def _check_tabs_spaces(self, leaf, indentation): def _check_tabs_spaces(self, spacing):
if self._wrong_indentation_char in indentation: if self._wrong_indentation_char in spacing.value:
self.add_issue(101, 'Indentation contains ' + self._indentation_type, leaf) self.add_issue(101, 'Indentation contains ' + self._indentation_type, spacing)
return True return True
return False return False
def normalize(self, leaf): def normalize(self, leaf):
for part in leaf._split_prefix():
if part.type == 'spacing':
# This part is used for the part call after for.
break
self._old_normalize(part, part.create_spacing_part())
return self._old_normalize(leaf, part)
def _old_normalize(self, leaf, spacing):
value = leaf.value value = leaf.value
info = WhitespaceInfo(leaf)
if value == ',' and leaf.parent.type == 'dictorsetmaker': if value == ',' and leaf.parent.type == 'dictorsetmaker':
self._indentation_stack.pop() self._indentation_stack.pop()
node = self._indentation_stack[-1] node = self._indentation_stack[-1]
if info.has_backslash and node.type != IndentationTypes.BACKSLASH: if False and info.has_backslash and node.type != IndentationTypes.BACKSLASH:
if node.type != IndentationTypes.SUITE: if node.type != IndentationTypes.SUITE:
self.add_issue(502, 'The backslash is redundant between brackets', leaf) self.add_issue(502, 'The backslash is redundant between brackets', leaf)
else: else:
@@ -323,31 +336,33 @@ class PEP8Normalizer(Normalizer):
if self._on_newline: if self._on_newline:
if node.type == IndentationTypes.BACKSLASH: indentation = spacing.value
if node.type == IndentationTypes.BACKSLASH \
and self._previous_leaf.type == 'newline':
self._indentation_stack.pop() self._indentation_stack.pop()
if info.indentation != node.indentation:
if not self._check_tabs_spaces(info.indentation_part, info.indentation): if indentation != node.indentation:
if not self._check_tabs_spaces(spacing):
s = '%s %s' % (len(self._config.indentation), self._indentation_type) s = '%s %s' % (len(self._config.indentation), self._indentation_type)
self.add_issue(111, 'Indentation is not a multiple of ' + s, leaf) self.add_issue(111, 'Indentation is not a multiple of ' + s, leaf)
elif info.newline_count: else:
if True:
if value in '])}': if value in '])}':
should_be_indentation = node.bracket_indentation should_be_indentation = node.bracket_indentation
else: else:
should_be_indentation = node.indentation should_be_indentation = node.indentation
if self._in_suite_introducer and info.indentation == \ if self._in_suite_introducer and indentation == \
self._indentation_stack.get_latest_suite_node().indentation \ self._indentation_stack.get_latest_suite_node().indentation \
+ self._config.indentation: + self._config.indentation:
self.add_issue(129, "Line with same indent as next logical block", leaf) self.add_issue(129, "Line with same indent as next logical block", leaf)
elif info.indentation != should_be_indentation: elif indentation != should_be_indentation:
if not self._check_tabs_spaces(info.indentation_part, info.indentation): if not self._check_tabs_spaces(spacing):
if value in '])}': if value in '])}':
if node.type == IndentationTypes.VERTICAL_BRACKET: if node.type == IndentationTypes.VERTICAL_BRACKET:
self.add_issue(124, "Closing bracket does not match visual indentation", leaf) self.add_issue(124, "Closing bracket does not match visual indentation", leaf)
else: else:
self.add_issue(123, "Losing bracket does not match indentation of opening bracket's line", leaf) self.add_issue(123, "Losing bracket does not match indentation of opening bracket's line", leaf)
else: else:
if len(info.indentation) < len(should_be_indentation): if len(indentation) < len(should_be_indentation):
if node.type == IndentationTypes.VERTICAL_BRACKET: if node.type == IndentationTypes.VERTICAL_BRACKET:
self.add_issue(128, 'Continuation line under-indented for visual indent', leaf) self.add_issue(128, 'Continuation line under-indented for visual indent', leaf)
elif node.type == IndentationTypes.BACKSLASH: elif node.type == IndentationTypes.BACKSLASH:
@@ -364,10 +379,10 @@ class PEP8Normalizer(Normalizer):
else: else:
self.add_issue(126, 'Continuation line over-indented for hanging indent', leaf) self.add_issue(126, 'Continuation line over-indented for hanging indent', leaf)
else: else:
self._check_spacing(leaf, info) self._check_spacing(leaf, spacing)
first = True first = True
for comment in info.comments: for comment in []:#info.comments:
if first and not self._on_newline: if first and not self._on_newline:
continue continue
first = False first = False
@@ -384,7 +399,7 @@ class PEP8Normalizer(Normalizer):
if comment.indentation == should_be_indentation: if comment.indentation == should_be_indentation:
self._last_indentation_level = i self._last_indentation_level = i
else: else:
if not self._check_tabs_spaces(comment.indentation_part, comment.indentation): if not self._check_tabs_spaces(spacing):
if actual_len < should_len: if actual_len < should_len:
self.add_issue(115, 'Expected an indented block (comment)', comment) self.add_issue(115, 'Expected an indented block (comment)', comment)
elif actual_len > should_len: elif actual_len > should_len:
@@ -443,10 +458,10 @@ class PEP8Normalizer(Normalizer):
self._in_suite_introducer = False self._in_suite_introducer = False
self._previous_leaf = leaf self._previous_leaf = leaf
self._previous_whitespace_info = info self._previous_spacing = spacing
return value return value
def _check_spacing(self, leaf, info): def _check_spacing(self, leaf, spacing):
def add_if_spaces(*args): def add_if_spaces(*args):
if spaces: if spaces:
return self.add_issue(*args) return self.add_issue(*args)
@@ -455,37 +470,35 @@ class PEP8Normalizer(Normalizer):
if not spaces: if not spaces:
return self.add_issue(*args) return self.add_issue(*args)
spaces = info.indentation spaces = spacing.value
prev = self._previous_leaf prev = self._previous_leaf
if prev is not None and prev.type == 'error_leaf' or leaf.type == 'error_leaf': if prev is not None and prev.type == 'error_leaf' or leaf.type == 'error_leaf':
return return
if '\t' in spaces: if '\t' in spaces:
self.add_issue(223, 'Used tab to separate tokens', info.indentation_part) self.add_issue(223, 'Used tab to separate tokens', spacing)
elif leaf.type == 'newline': elif leaf.type == 'newline':
add_if_spaces(291, 'Trailing whitespace', info.indentation_part) add_if_spaces(291, 'Trailing whitespace', spacing)
elif len(spaces) > 1: elif len(spaces) > 1:
self.add_issue(221, 'Multiple spaces used', info.indentation_part) self.add_issue(221, 'Multiple spaces used', spacing)
elif info.comments:
pass
else: else:
if prev in _OPENING_BRACKETS: if prev in _OPENING_BRACKETS:
message = "Whitespace after '%s'" % leaf.value message = "Whitespace after '%s'" % leaf.value
add_if_spaces(201, message, info.indentation_part) add_if_spaces(201, message, spacing)
elif leaf in _CLOSING_BRACKETS: elif leaf in _CLOSING_BRACKETS:
message = "Whitespace before '%s'" % leaf.value message = "Whitespace before '%s'" % leaf.value
add_if_spaces(202, message, info.indentation_part) add_if_spaces(202, message, spacing)
#elif leaf in _OPENING_BRACKETS: #elif leaf in _OPENING_BRACKETS:
# TODO # TODO
# if False: # if False:
# message = "Whitespace before '%s'" % leaf.value # message = "Whitespace before '%s'" % leaf.value
# add_if_spaces(211, message, info.indentation_part) # add_if_spaces(211, message, spacing)
elif leaf in (',', ';') or leaf == ':' \ elif leaf in (',', ';') or leaf == ':' \
and leaf.parent.type not in ('subscript', 'subscriptlist'): and leaf.parent.type not in ('subscript', 'subscriptlist'):
message = "Whitespace before '%s'" % leaf.value message = "Whitespace before '%s'" % leaf.value
add_if_spaces(203, message, info.indentation_part) add_if_spaces(203, message, spacing)
elif prev in (',', ';', ':'): elif prev in (',', ';', ':'):
add_not_spaces('231', "missing whitespace after '%s'", info.indentation_part) add_not_spaces('231', "missing whitespace after '%s'", spacing)
elif leaf == ':': # Is a subscript elif leaf == ':': # Is a subscript
# TODO # TODO
pass pass
@@ -504,32 +517,29 @@ class PEP8Normalizer(Normalizer):
else: else:
param = prev.parent param = prev.parent
if param.type == 'param' and param.annotation: if param.type == 'param' and param.annotation:
add_not_spaces(252, 'Expected spaces around annotation equals', info.indentation_part) add_not_spaces(252, 'Expected spaces around annotation equals', spacing)
else: else:
add_if_spaces(251, 'Unexpected spaces around keyword / parameter equals', info.indentation_part) add_if_spaces(251, 'Unexpected spaces around keyword / parameter equals', spacing)
elif leaf in _BITWISE_OPERATOR or prev in _BITWISE_OPERATOR: elif leaf in _BITWISE_OPERATOR or prev in _BITWISE_OPERATOR:
add_not_spaces(227, 'Missing whitespace around bitwise or shift operator', info.indentation_part) add_not_spaces(227, 'Missing whitespace around bitwise or shift operator', spacing)
elif leaf == '%' or prev == '%': elif leaf == '%' or prev == '%':
add_not_spaces(228, 'Missing whitespace around modulo operator', info.indentation_part) add_not_spaces(228, 'Missing whitespace around modulo operator', spacing)
else: else:
message_225 = 'Missing whitespace between tokens' message_225 = 'Missing whitespace between tokens'
add_not_spaces(225, message_225, info.indentation_part) add_not_spaces(225, message_225, spacing)
#print('x', leaf.start_pos, leaf, prev) #print('x', leaf.start_pos, leaf, prev)
elif leaf.type == 'keyword' or prev.type == 'keyword': elif leaf.type == 'keyword' or prev.type == 'keyword':
add_not_spaces(275, 'Missing whitespace around keyword', info.indentation_part) add_not_spaces(275, 'Missing whitespace around keyword', spacing)
else: else:
prev_info = self._previous_whitespace_info prev_info = self._previous_spacing
message_225 = 'Missing whitespace between tokens' message_225 = 'Missing whitespace between tokens'
if prev in _ALLOW_SPACE and spaces != prev_info.indentation: if prev in _ALLOW_SPACE and spaces != prev_info.indentation:
message = "Whitespace before operator doesn't match with whitespace after" message = "Whitespace before operator doesn't match with whitespace after"
self.add_issue(229, message, info.indentation_part) self.add_issue(229, message, spacing)
if spaces and leaf not in _ALLOW_SPACE and prev not in _ALLOW_SPACE: if spaces and leaf not in _ALLOW_SPACE and prev not in _ALLOW_SPACE:
#print(leaf, prev) #print(leaf, prev)
self.add_issue(225, message_225, info.indentation_part) self.add_issue(225, message_225, spacing)
#if not prev_info.indentation and leaf not in _ALLOW_SPACE:
#self.add_issue(225, message_225, prev_info.indentation_part)
def _analyse_non_prefix(self, leaf): def _analyse_non_prefix(self, leaf):
typ = leaf.type typ = leaf.type

View File

@@ -4,10 +4,12 @@ from parso.tokenize import group
class PrefixPart(object): class PrefixPart(object):
def __init__(self, leaf, typ, value, start_pos): def __init__(self, leaf, typ, value, spacing='', start_pos=None):
assert start_pos is not None
self.parent = leaf self.parent = leaf
self.type = typ self.type = typ
self.value = value self.value = value
self.spacing = spacing
self.start_pos = start_pos self.start_pos = start_pos
@property @property
@@ -16,6 +18,13 @@ class PrefixPart(object):
return self.start_pos[0] + 1, 0 return self.start_pos[0] + 1, 0
return self.start_pos[0], self.start_pos[1] + len(self.value) return self.start_pos[0], self.start_pos[1] + len(self.value)
def create_spacing_part(self):
column = self.start_pos[1] - len(self.spacing)
return PrefixPart(
self.parent, 'spacing', self.spacing,
start_pos=(self.start_pos[0], column)
)
def __repr__(self): def __repr__(self):
return '%s(%s, %s, %s)' % ( return '%s(%s, %s, %s)' % (
self.__class__.__name__, self.__class__.__name__,
@@ -27,35 +36,53 @@ class PrefixPart(object):
_comment = r'#[^\n\r\f]*' _comment = r'#[^\n\r\f]*'
_backslash = r'\\\r?\n' _backslash = r'\\\r?\n'
_indentation = r'[ \t]+'
_newline = r'\r?\n' _newline = r'\r?\n'
_form_feed = r'\f' _form_feed = r'\f'
_only_spacing = '$'
_spacing = r'[ \t]*'
_regex = group(_comment, _backslash, _indentation, _newline, _form_feed) _regex = group(
_regex = re.compile(_regex) _comment, _backslash, _newline, _form_feed, _only_spacing,
capture=True
)
_regex = re.compile(group(_spacing, capture=True) + _regex)
_types = { _types = {
' ': 'indentation',
'#': 'comment', '#': 'comment',
'\\': 'backslash', '\\': 'backslash',
'\f': 'formfeed', '\f': 'formfeed',
'\n': 'newline', '\n': 'newline',
'\r': 'newline', '\r': 'newline',
'\t': 'indentation',
} }
def split_prefix(leaf, start_pos): def split_prefix(leaf, start_pos):
line, column = start_pos line, column = start_pos
start = 0 start = 0
value = spacing = ''
while start != len(leaf.prefix): while start != len(leaf.prefix):
match =_regex.match(leaf.prefix, start) match =_regex.match(leaf.prefix, start)
value = match.group(0) spacing = match.group(1)
typ = _types[value[0]] value = match.group(2)
yield PrefixPart(leaf, typ, value, (line, column + start)) if not value:
break
type_ = _types[value[0]]
print(repr(spacing), repr(value), column)
yield PrefixPart(
leaf, type_, value, spacing,
start_pos=(line, column + start + len(spacing))
)
start = match.end(0) start = match.end(0)
if value.endswith('\n'): if value.endswith('\n'):
line += 1 line += 1
column = -start column = -start
print('x', repr(value), repr(spacing))
if value:
spacing = ''
yield PrefixPart(
leaf, 'spacing', spacing,
start_pos=(line, column + start)
)

View File

@@ -1,20 +1,24 @@
from itertools import zip_longest
import pytest import pytest
import parso import parso
@pytest.mark.parametrize(('string', 'tokens'), [ @pytest.mark.parametrize(('string', 'tokens'), [
('#', ['#']), ('', ['']),
(' # ', [' ', '# ']), ('#', ['#', '']),
(' # \n', [' ', '# ', '\n']), (' # ', ['# ', '']),
(' # \f\n', [' ', '# ', '\f', '\n']), (' # \n', ['# ', '\n', '']),
(' \n', [' ', '\n']), (' # \f\n', ['# ', '\f', '\n', '']),
(' \n ', [' ', '\n', ' ']), (' \n', ['\n', '']),
(' \f ', [' ', '\f', ' ']), (' \n ', ['\n', ' ']),
(' \f ', [' ', '\f', ' ']), (' \f ', ['\f', ' ']),
(' \r\n', [' ', '\r\n']), (' \f ', ['\f', ' ']),
('\\\n', ['\\\n']), (' \r\n', ['\r\n', '']),
('\\\r\n', ['\\\r\n']), ('\\\n', ['\\\n', '']),
('\t\t\n\t', ['\t\t', '\n', '\t']), ('\\\r\n', ['\\\r\n', '']),
('\t\t\n\t', ['\n', '\t']),
]) ])
def test_simple_prefix_splitting(string, tokens): def test_simple_prefix_splitting(string, tokens):
tree = parso.parse(string) tree = parso.parse(string)
@@ -23,14 +27,14 @@ def test_simple_prefix_splitting(string, tokens):
parsed_tokens = list(leaf._split_prefix()) parsed_tokens = list(leaf._split_prefix())
start_pos = (1, 0) start_pos = (1, 0)
for pt, expected in zip(parsed_tokens, tokens): for pt, expected in zip_longest(parsed_tokens, tokens):
assert pt.value == expected assert pt.value == expected
# Calculate the estimated end_pos # Calculate the estimated end_pos
if expected.endswith('\n'): if expected.endswith('\n'):
end_pos = start_pos[0] + 1, 0 end_pos = start_pos[0] + 1, 0
else: else:
end_pos = start_pos[0], start_pos[1] + len(expected) end_pos = start_pos[0], start_pos[1] + len(expected) + len(pt.spacing)
#assert start_pos == pt.start_pos #assert start_pos == pt.start_pos
assert end_pos == pt.end_pos assert end_pos == pt.end_pos
@@ -38,12 +42,12 @@ def test_simple_prefix_splitting(string, tokens):
@pytest.mark.parametrize(('string', 'types'), [ @pytest.mark.parametrize(('string', 'types'), [
('# ', ['comment']), ('# ', ['comment', 'spacing']),
('\r\n', ['newline']), ('\r\n', ['newline', 'spacing']),
('\f', ['formfeed']), ('\f', ['formfeed', 'spacing']),
('\\\n', ['backslash']), ('\\\n', ['backslash', 'spacing']),
(' \t', ['indentation']), (' \t', ['spacing']),
(' \t ', ['indentation']), (' \t ', ['spacing']),
]) ])
def test_prefix_splitting_types(string, types): def test_prefix_splitting_types(string, types):
tree = parso.parse(string) tree = parso.parse(string)