From 7a7ad2038d43ddf1c52d7a3155c8c4e3ad506277 Mon Sep 17 00:00:00 2001
From: Dave Halter <davidhalter88@gmail.com>
Date: Thu, 29 Jun 2017 22:47:31 +0200
Subject: [PATCH] Trying to change the prefix parsing a bit.

---
 parso/python/normalizer.py | 98 +++++++++++++++++++++-----------------
 parso/python/prefix.py     | 45 +++++++++++++----
 test/test_prefix.py        | 44 +++++++++--------
 3 files changed, 114 insertions(+), 73 deletions(-)

diff --git a/parso/python/normalizer.py b/parso/python/normalizer.py
index 84754aa..89eb26e 100644
--- a/parso/python/normalizer.py
+++ b/parso/python/normalizer.py
@@ -61,7 +61,13 @@ class WhitespaceInfo(object):
             start_pos = parts[0].start_pos
         else:
             start_pos = leaf.start_pos
-        indentation_part = PrefixPart(leaf, 'indentation', '', start_pos)
+        indentation_part = PrefixPart(
+            leaf,
+            type='spacing',
+            value='',
+            spacing='',
+            start_pos=start_pos
+        )
 
         self.newline_count = 0
         for part in parts:
@@ -72,7 +78,7 @@ class WhitespaceInfo(object):
             if part.type == 'comment':
                 self.comments.append(Comment(part, indentation_part))
 
-            if part.type == 'indentation':
+            if part.type == 'spacing':
                 indentation_part = part
             else:
                 indentation_part = None
@@ -291,22 +297,29 @@ class PEP8Normalizer(Normalizer):
         elif in_introducer:
             self._in_suite_introducer = False
 
-    def _check_tabs_spaces(self, leaf, indentation):
-        if self._wrong_indentation_char in indentation:
-            self.add_issue(101, 'Indentation contains ' + self._indentation_type, leaf)
+    def _check_tabs_spaces(self, spacing):
+        if self._wrong_indentation_char in spacing.value:
+            self.add_issue(101, 'Indentation contains ' + self._indentation_type, spacing)
             return True
         return False
 
     def normalize(self, leaf):
+        for part in leaf._split_prefix():
+            if part.type == 'spacing':
+                # This part is used for the part call after for.
+                break
+            self._old_normalize(part, part.create_spacing_part())
+        return self._old_normalize(leaf, part)
+
+    def _old_normalize(self, leaf, spacing):
         value = leaf.value
-        info = WhitespaceInfo(leaf)
 
         if value == ',' and leaf.parent.type == 'dictorsetmaker':
             self._indentation_stack.pop()
 
         node = self._indentation_stack[-1]
 
-        if info.has_backslash and node.type != IndentationTypes.BACKSLASH:
+        if False and info.has_backslash and node.type != IndentationTypes.BACKSLASH:
             if node.type != IndentationTypes.SUITE:
                 self.add_issue(502, 'The backslash is redundant between brackets', leaf)
             else:
@@ -323,31 +336,33 @@ class PEP8Normalizer(Normalizer):
 
 
         if self._on_newline:
-            if node.type == IndentationTypes.BACKSLASH:
+            indentation = spacing.value
+            if node.type == IndentationTypes.BACKSLASH \
+                    and self._previous_leaf.type == 'newline':
                 self._indentation_stack.pop()
-            if info.indentation != node.indentation:
-                if not self._check_tabs_spaces(info.indentation_part, info.indentation):
+
+            if indentation != node.indentation:
+                if not self._check_tabs_spaces(spacing):
                     s = '%s %s' % (len(self._config.indentation), self._indentation_type)
                     self.add_issue(111, 'Indentation is not a multiple of ' + s, leaf)
-        elif info.newline_count:
-            if True:
+            else:
                 if value in '])}':
                     should_be_indentation = node.bracket_indentation
                 else:
                     should_be_indentation = node.indentation
-                if self._in_suite_introducer and info.indentation == \
+                if self._in_suite_introducer and indentation == \
                             self._indentation_stack.get_latest_suite_node().indentation \
                             + self._config.indentation:
                         self.add_issue(129, "Line with same indent as next logical block", leaf)
-                elif info.indentation != should_be_indentation:
-                    if not self._check_tabs_spaces(info.indentation_part, info.indentation):
+                elif indentation != should_be_indentation:
+                    if not self._check_tabs_spaces(spacing):
                         if value in '])}':
                             if node.type == IndentationTypes.VERTICAL_BRACKET:
                                 self.add_issue(124, "Closing bracket does not match visual indentation", leaf)
                             else:
                                 self.add_issue(123, "Losing bracket does not match indentation of opening bracket's line", leaf)
                         else:
-                            if len(info.indentation) < len(should_be_indentation):
+                            if len(indentation) < len(should_be_indentation):
                                 if node.type == IndentationTypes.VERTICAL_BRACKET:
                                     self.add_issue(128, 'Continuation line under-indented for visual indent', leaf)
                                 elif node.type == IndentationTypes.BACKSLASH:
@@ -364,10 +379,10 @@ class PEP8Normalizer(Normalizer):
                                 else:
                                     self.add_issue(126, 'Continuation line over-indented for hanging indent', leaf)
         else:
-            self._check_spacing(leaf, info)
+            self._check_spacing(leaf, spacing)
 
         first = True
-        for comment in info.comments:
+        for comment in []:#info.comments:
             if first and not self._on_newline:
                 continue
             first = False
@@ -384,7 +399,7 @@ class PEP8Normalizer(Normalizer):
             if comment.indentation == should_be_indentation:
                 self._last_indentation_level = i
             else:
-                if not self._check_tabs_spaces(comment.indentation_part, comment.indentation):
+                if not self._check_tabs_spaces(spacing):
                     if actual_len < should_len:
                         self.add_issue(115, 'Expected an indented block (comment)', comment)
                     elif actual_len > should_len:
@@ -443,10 +458,10 @@ class PEP8Normalizer(Normalizer):
             self._in_suite_introducer = False
 
         self._previous_leaf = leaf
-        self._previous_whitespace_info = info
+        self._previous_spacing = spacing
         return value
 
-    def _check_spacing(self, leaf, info):
+    def _check_spacing(self, leaf, spacing):
         def add_if_spaces(*args):
             if spaces:
                 return self.add_issue(*args)
@@ -455,37 +470,35 @@ class PEP8Normalizer(Normalizer):
             if not spaces:
                 return self.add_issue(*args)
 
-        spaces = info.indentation
+        spaces = spacing.value
         prev = self._previous_leaf
         if prev is not None and prev.type == 'error_leaf' or leaf.type == 'error_leaf':
             return
 
         if '\t' in spaces:
-            self.add_issue(223, 'Used tab to separate tokens', info.indentation_part)
+            self.add_issue(223, 'Used tab to separate tokens', spacing)
         elif leaf.type == 'newline':
-            add_if_spaces(291, 'Trailing whitespace', info.indentation_part)
+            add_if_spaces(291, 'Trailing whitespace', spacing)
         elif len(spaces) > 1:
-            self.add_issue(221, 'Multiple spaces used', info.indentation_part)
-        elif info.comments:
-            pass
+            self.add_issue(221, 'Multiple spaces used', spacing)
         else:
             if prev in _OPENING_BRACKETS:
                 message = "Whitespace after '%s'" % leaf.value
-                add_if_spaces(201, message, info.indentation_part)
+                add_if_spaces(201, message, spacing)
             elif leaf in _CLOSING_BRACKETS:
                 message = "Whitespace before '%s'" % leaf.value
-                add_if_spaces(202, message, info.indentation_part)
+                add_if_spaces(202, message, spacing)
             #elif leaf in _OPENING_BRACKETS:
                 # TODO
             #    if False:
             #        message = "Whitespace before '%s'" % leaf.value
-            #        add_if_spaces(211, message, info.indentation_part)
+            #        add_if_spaces(211, message, spacing)
             elif leaf in (',', ';') or leaf == ':' \
                     and leaf.parent.type not in ('subscript', 'subscriptlist'):
                 message = "Whitespace before '%s'" % leaf.value
-                add_if_spaces(203, message, info.indentation_part)
+                add_if_spaces(203, message, spacing)
             elif prev in (',', ';', ':'):
-                add_not_spaces('231', "missing whitespace after '%s'", info.indentation_part)
+                add_not_spaces('231', "missing whitespace after '%s'", spacing)
             elif leaf == ':':  # Is a subscript
                 # TODO
                 pass
@@ -504,32 +517,29 @@ class PEP8Normalizer(Normalizer):
                     else:
                         param = prev.parent
                     if param.type == 'param' and param.annotation:
-                        add_not_spaces(252, 'Expected spaces around annotation equals', info.indentation_part)
+                        add_not_spaces(252, 'Expected spaces around annotation equals', spacing)
                     else:
-                        add_if_spaces(251, 'Unexpected spaces around keyword / parameter equals', info.indentation_part)
+                        add_if_spaces(251, 'Unexpected spaces around keyword / parameter equals', spacing)
                 elif leaf in _BITWISE_OPERATOR or prev in _BITWISE_OPERATOR:
-                    add_not_spaces(227, 'Missing whitespace around bitwise or shift operator', info.indentation_part)
+                    add_not_spaces(227, 'Missing whitespace around bitwise or shift operator', spacing)
                 elif leaf == '%' or prev == '%':
-                    add_not_spaces(228, 'Missing whitespace around modulo operator', info.indentation_part)
+                    add_not_spaces(228, 'Missing whitespace around modulo operator', spacing)
                 else:
                     message_225 = 'Missing whitespace between tokens'
-                    add_not_spaces(225, message_225, info.indentation_part)
+                    add_not_spaces(225, message_225, spacing)
                     #print('x', leaf.start_pos, leaf, prev)
             elif leaf.type == 'keyword' or prev.type == 'keyword':
-                add_not_spaces(275, 'Missing whitespace around keyword', info.indentation_part)
+                add_not_spaces(275, 'Missing whitespace around keyword', spacing)
             else:
-                prev_info = self._previous_whitespace_info
+                prev_info = self._previous_spacing
                 message_225 = 'Missing whitespace between tokens'
                 if prev in _ALLOW_SPACE and spaces != prev_info.indentation:
                     message = "Whitespace before operator doesn't match with whitespace after"
-                    self.add_issue(229, message, info.indentation_part)
+                    self.add_issue(229, message, spacing)
 
                 if spaces and leaf not in _ALLOW_SPACE and prev not in _ALLOW_SPACE:
                     #print(leaf, prev)
-                    self.add_issue(225, message_225, info.indentation_part)
-
-                #if not prev_info.indentation and leaf not in _ALLOW_SPACE:
-                    #self.add_issue(225, message_225, prev_info.indentation_part)
+                    self.add_issue(225, message_225, spacing)
 
     def _analyse_non_prefix(self, leaf):
         typ = leaf.type
diff --git a/parso/python/prefix.py b/parso/python/prefix.py
index 5260760..7fa1785 100644
--- a/parso/python/prefix.py
+++ b/parso/python/prefix.py
@@ -4,10 +4,12 @@ from parso.tokenize import group
 
 
 class PrefixPart(object):
-    def __init__(self, leaf, typ, value, start_pos):
+    def __init__(self, leaf, typ, value, spacing='', start_pos=None):
+        assert start_pos is not None
         self.parent = leaf
         self.type = typ
         self.value = value
+        self.spacing = spacing
         self.start_pos = start_pos
 
     @property
@@ -16,6 +18,13 @@ class PrefixPart(object):
             return self.start_pos[0] + 1, 0
         return self.start_pos[0], self.start_pos[1] + len(self.value)
 
+    def create_spacing_part(self):
+        column = self.start_pos[1] - len(self.spacing)
+        return PrefixPart(
+            self.parent, 'spacing', self.spacing,
+            start_pos=(self.start_pos[0], column)
+        )
+
     def __repr__(self):
         return '%s(%s, %s, %s)' % (
             self.__class__.__name__,
@@ -27,35 +36,53 @@ class PrefixPart(object):
 
 _comment = r'#[^\n\r\f]*'
 _backslash = r'\\\r?\n'
-_indentation = r'[ \t]+'
 _newline = r'\r?\n'
 _form_feed = r'\f'
+_only_spacing = '$'
+_spacing = r'[ \t]*'
 
-_regex = group(_comment, _backslash, _indentation, _newline, _form_feed)
-_regex = re.compile(_regex)
+_regex = group(
+    _comment, _backslash, _newline, _form_feed, _only_spacing,
+    capture=True
+)
+_regex = re.compile(group(_spacing, capture=True) + _regex)
 
 
 _types = {
-    ' ': 'indentation',
     '#': 'comment',
     '\\': 'backslash',
     '\f': 'formfeed',
     '\n': 'newline',
     '\r': 'newline',
-    '\t': 'indentation',
 }
 
 
 def split_prefix(leaf, start_pos):
     line, column = start_pos
     start = 0
+    value = spacing = ''
     while start != len(leaf.prefix):
         match =_regex.match(leaf.prefix, start)
-        value = match.group(0)
-        typ = _types[value[0]]
-        yield PrefixPart(leaf, typ, value, (line, column + start))
+        spacing = match.group(1)
+        value = match.group(2)
+        if not value:
+            break
+        type_ = _types[value[0]]
+        print(repr(spacing), repr(value), column)
+        yield PrefixPart(
+            leaf, type_, value, spacing,
+            start_pos=(line, column + start + len(spacing))
+        )
 
         start = match.end(0)
         if value.endswith('\n'):
             line += 1
             column = -start
+
+    print('x', repr(value), repr(spacing))
+    if value:
+        spacing = ''
+    yield PrefixPart(
+        leaf, 'spacing', spacing,
+        start_pos=(line, column + start)
+    )
diff --git a/test/test_prefix.py b/test/test_prefix.py
index ec9f338..88f40e7 100644
--- a/test/test_prefix.py
+++ b/test/test_prefix.py
@@ -1,20 +1,24 @@
+from itertools import zip_longest
+
 import pytest
+
 import parso
 
 
 @pytest.mark.parametrize(('string', 'tokens'), [
-    ('#', ['#']),
-    (' # ', [' ', '# ']),
-    (' # \n', [' ', '# ', '\n']),
-    (' # \f\n', [' ', '# ', '\f', '\n']),
-    ('  \n', ['  ', '\n']),
-    ('  \n ', ['  ', '\n', ' ']),
-    (' \f ', [' ', '\f', ' ']),
-    (' \f ', [' ', '\f', ' ']),
-    (' \r\n', [' ', '\r\n']),
-    ('\\\n', ['\\\n']),
-    ('\\\r\n', ['\\\r\n']),
-    ('\t\t\n\t', ['\t\t', '\n', '\t']),
+    ('', ['']),
+    ('#', ['#', '']),
+    (' # ', ['# ', '']),
+    (' # \n', ['# ', '\n', '']),
+    (' # \f\n', ['# ', '\f', '\n', '']),
+    ('  \n', ['\n', '']),
+    ('  \n ', ['\n', ' ']),
+    (' \f ', ['\f', ' ']),
+    (' \f ', ['\f', ' ']),
+    (' \r\n', ['\r\n', '']),
+    ('\\\n', ['\\\n', '']),
+    ('\\\r\n', ['\\\r\n', '']),
+    ('\t\t\n\t', ['\n', '\t']),
 ])
 def test_simple_prefix_splitting(string, tokens):
     tree = parso.parse(string)
@@ -23,14 +27,14 @@ def test_simple_prefix_splitting(string, tokens):
 
     parsed_tokens = list(leaf._split_prefix())
     start_pos = (1, 0)
-    for pt, expected in zip(parsed_tokens, tokens):
+    for pt, expected in zip_longest(parsed_tokens, tokens):
         assert pt.value == expected
 
         # Calculate the estimated end_pos
         if expected.endswith('\n'):
             end_pos = start_pos[0] + 1, 0
         else:
-            end_pos = start_pos[0], start_pos[1] + len(expected)
+            end_pos = start_pos[0], start_pos[1] + len(expected) + len(pt.spacing)
 
         #assert start_pos == pt.start_pos
         assert end_pos == pt.end_pos
@@ -38,12 +42,12 @@ def test_simple_prefix_splitting(string, tokens):
 
 
 @pytest.mark.parametrize(('string', 'types'), [
-    ('# ', ['comment']),
-    ('\r\n', ['newline']),
-    ('\f', ['formfeed']),
-    ('\\\n', ['backslash']),
-    (' \t', ['indentation']),
-    (' \t ', ['indentation']),
+    ('# ', ['comment', 'spacing']),
+    ('\r\n', ['newline', 'spacing']),
+    ('\f', ['formfeed', 'spacing']),
+    ('\\\n', ['backslash', 'spacing']),
+    (' \t', ['spacing']),
+    (' \t ', ['spacing']),
 ])
 def test_prefix_splitting_types(string, types):
     tree = parso.parse(string)