Issue with backslashes again in the fast parser.

2015-02-21 18:07:21 +01:00
parent 0b5a509e83
commit 3ec96b25cc
4 changed files with 27 additions and 9 deletions
@@ -240,9 +240,10 @@ class FastParser(use_metaclass(CachedFastParser)):
    def update(self, source):
        # For testing purposes: It is important that the number of parsers used
-        # can be minimized. With this variable we can test it.
+        # can be minimized. With these variables we can test against that.
        self.number_parsers_used = 0
        self.number_of_splits = 0
        self.number_of_misses = 0
        self.module.reset_caches()
        try:
            self._parse(source)
@@ -285,7 +286,10 @@ class FastParser(use_metaclass(CachedFastParser)):
        for i, l in enumerate(self._lines):
            # Handle backslash newline escaping.
            if l.endswith('\\\n') or l.endswith('\\\r\n'):
-                previous_line = l
+                if previous_line is not None:
                    previous_line += l
                else:
                    previous_line = l
                continue
            if previous_line is not None:
                l = previous_line + l
@@ -371,6 +375,7 @@ class FastParser(use_metaclass(CachedFastParser)):
                # Means that some lines where not fully parsed. Parse it now.
                # This is a very rare case. Should only happens with very
                # strange code bits.
                self.number_of_misses += 1
                while last_end_line < next_line_offset + 1:
                    line_offset = last_end_line - 1
                    # We could calculate the src in a more complicated way to
@@ -383,7 +388,7 @@ class FastParser(use_metaclass(CachedFastParser)):
                    last_end_line = self.current_node.parser.module.end_pos[0]
                debug.dbg('While parsing %s, line %s slowed down the fast parser.',
-                          self.module_path, line_offset)
+                          self.module_path, line_offset + 1)
            line_offset = next_line_offset
            start += len(code_part)
@@ -501,7 +506,7 @@ class FastTokenizer(object):
        # Parentheses ignore the indentation rules. The other three stand for
        # new lines.
        if self.previous[0] in (NEWLINE, INDENT, DEDENT) \
-                and not self._parentheses_level and typ != INDENT:
+                and not self._parentheses_level and typ not in (INDENT, DEDENT):
            # Check for NEWLINE, which symbolizes the indent.
            if not self._in_flow:
                if value in FLOWS:
@@ -121,7 +121,7 @@ class PgenParser(object):
            ilabel = self.classify(type, value, start_pos)
        except ParseError:
            # Currently we ignore tokens like `?`.
-            print('invalid token', tokenize.tok_name[type], value)
+            print('invalid token', tokenize.tok_name[type], repr(value))
            return
        # Loop until the token is shifted; may raise exceptions
@@ -265,8 +265,8 @@ def generate_tokens(readline):
                            break
                yield NAME, token, spos, prefix
            elif initial == '\\' and line[start:] == '\\\n':  # continued stmt
-                additional_prefix += line[start:]
+                additional_prefix += prefix + line[start:]
-                continue
+                break
            else:
                if token in '([{':
                    paren_level += 1
@@ -76,7 +76,7 @@ def test_split_parts():
    test('a\n', 'def b():\n pass\n', 'c\n')
-def check_fp(src, number_parsers_used, number_of_splits=None):
+def check_fp(src, number_parsers_used, number_of_splits=None, number_of_misses=0):
    if number_of_splits is None:
        number_of_splits = number_parsers_used
@@ -88,6 +88,7 @@ def check_fp(src, number_parsers_used, number_of_splits=None):
    assert src == p.module.get_code()
    assert p.number_of_splits == number_of_splits
    assert p.number_parsers_used == number_parsers_used
    assert p.number_of_misses == number_of_misses
    return p.module
@@ -367,6 +368,18 @@ def test_backslash():
    # split.
    check_fp(src, 2)
    src = dedent(r"""
    def first():
        if foo \
                and bar \
                or baz:
            pass
    def second():
        pass
    """)
    check_fp(src, 2)
 def test_fake_parentheses():
    """
@@ -383,7 +396,7 @@ def test_fake_parentheses():
        def z():
            pass
    """)
-    check_fp(src, 3, 2)
+    check_fp(src, 3, 2, 1)
 def test_incomplete_function():