Fix more issues in the fast parser.

2026-03-18 05:52:24 +08:00 · 2015-01-29 15:38:38 +01:00
parent 0a537c05c4
commit a221eee02c
3 changed files with 50 additions and 20 deletions
--- a/jedi/parser/fast.py
+++ b/jedi/parser/fast.py
@@ -298,6 +298,7 @@ class FastParser(use_metaclass(CachedFastParser)):
        # For testing purposes: It is important that the number of parsers used
        # can be minimized. With this variable we can test it.
        self.number_parsers_used = 0
+        self.number_of_splits = 0
        self.module.reset_caches()
        try:
            self._parse(source)
@@ -315,6 +316,7 @@ class FastParser(use_metaclass(CachedFastParser)):
        def gen_part():
            text = '\n'.join(current_lines)
            del current_lines[:]
+            self.number_of_splits += 1
            return text

        # Split only new lines. Distinction between \r\n is the tokenizer's
@@ -594,17 +596,11 @@ class FastTokenizer(object):
                        #self._parser_indent += 1  # new scope: must be higher
                        #self._new_indent = True

-            if value != '@':
-                if self._first_stmt and not self._new_indent:
-                    self._parser_indent = indent
-                self._first_stmt = False
-
-        # Ignore closing parentheses, because they are all
-        # irrelevant for the indentation.
-
        if value in '([{' and value:
            self._parentheses_level += 1
        elif value in ')]}' and value:
+            # Ignore closing parentheses, because they are all
+            # irrelevant for the indentation.
            self._parentheses_level = max(self._parentheses_level - 1, 0)
        return current

@@ -612,7 +608,10 @@ class FastTokenizer(object):
        if self._first_stmt:
            # Continue like nothing has happened, because we want to enter
            # the first class/function.
-            self._first_stmt = False
+            if self.current[1] != '@':
+                #if self._first_stmt and not self._new_indent:
+                    #self._parser_indent = indent
+                self._first_stmt = False
            return self.current
        else:
            self._closed = True
@@ -627,7 +626,10 @@ class FastTokenizer(object):
        elif not self._returned_endmarker:
            self._returned_endmarker = True
            # We're using the current prefix for the endmarker to not loose any
-            # information.
-            return ENDMARKER, '', start_pos, self.current[3]
+            # information. However we care about "lost" lines. The prefix of
+            # the current line (indent) will always be included in the current
+            # line.
+            prefix = re.sub('[^\n]+$', '', self.current[3])
+            return ENDMARKER, '', start_pos, prefix
        else:
            raise StopIteration
--- a/jedi/parser/tokenize.py
+++ b/jedi/parser/tokenize.py
@@ -154,7 +154,11 @@ def generate_tokens(readline, line_offset=0):
    numchars = '0123456789'
    contstr = ''
    contline = None
-    new_line = False
+    # We start with a newline. This makes indent at the first position
+    # possible. It's not valid Python, but still better than an INDENT in the
+    # second line (and not in the first). This makes quite a few things in
+    # Jedi's fast parser possible.
+    new_line = True
    prefix = ''  # Should never be required, but here for safety
    additional_prefix = ''
    while True:            # loop over lines in stream
--- a/test/test_parser/test_fast_parser.py
+++ b/test/test_parser/test_fast_parser.py
@@ -59,13 +59,17 @@ def test_carriage_return_splitting():
    assert [n.value for lst in p.module.names_dict.values() for n in lst] == ['Foo']


-def check_fp(src, number_parsers_used):
+def check_fp(src, number_parsers_used, number_of_splits=None):
+    if number_of_splits is None:
+        number_of_splits = number_parsers_used
+
    p = FastParser(load_grammar(), u(src))
    cache.save_parser(None, None, p, pickling=False)

    # TODO Don't change get_code, the whole thing should be the same.
    # -> Need to refactor the parser first, though.
    assert src == p.module.get_code()
+    assert p.number_of_splits == number_of_splits
    assert p.number_parsers_used == number_parsers_used
    return p.module

@@ -77,18 +81,18 @@ def test_change_and_undo():
    # Parse the function and a.
    check_fp(func_before + 'a', 2)
    # Parse just b.
-    check_fp(func_before + 'b', 1)
+    check_fp(func_before + 'b', 1, 2)
    # b has changed to a again, so parse that.
-    check_fp(func_before + 'a', 1)
+    check_fp(func_before + 'a', 1, 2)
    # Same as before no parsers should be used.
-    check_fp(func_before + 'a', 0)
+    check_fp(func_before + 'a', 0, 2)

    # Getting rid of an old parser: Still no parsers used.
-    check_fp('a', 0)
+    check_fp('a', 0, 1)
    # Now the file has completely change and we need to parse.
-    check_fp('b', 1)
+    check_fp('b', 1, 1)
    # And again.
-    check_fp('a', 1)
+    check_fp('a', 1, 1)


 def test_positions():
@@ -100,7 +104,7 @@ def test_positions():
    assert m.start_pos == (1, 0)
    assert m.end_pos == (3, 1)

-    m = check_fp('a', 0)
+    m = check_fp('a', 0, 1)
    assert m.start_pos == (1, 0)
    assert m.end_pos == (1, 1)

@@ -156,6 +160,26 @@ def test_func_with_if():
    check_fp(src, 1)


+def test_decorator():
+    src = dedent("""\
+    class Decorator():
+        @memoize
+        def dec(self, a):
+            return a
+    """)
+    check_fp(src, 2)
+
+
+def test_nested_funcs():
+    src = dedent("""\
+    def memoize(func):
+        def wrapper(*args, **kwargs):
+            return func(*args, **kwargs)
+        return wrapper
+    """)
+    check_fp(src, 3)
+
+
 def test_incomplete_function():
    source = '''return ImportErr'''