Rework the parser so we can use arbitrary start nodes of the syntax.

This also includes a rework for error recovery in the parser. This is now just possible for file_input parsing, which means for full files. Includes also a refactoring of the tokenizer. No more do we have to add an additional newline, because it now works correctly (removes certain confusion.
2025-12-06 22:14:27 +08:00 · 2015-12-20 22:21:47 +01:00
parent 9a93d599da
commit c4906e0e3f
22 changed files with 246 additions and 198 deletions
--- a/jedi/api/init.py
+++ b/jedi/api/init.py
@@ -322,15 +322,12 @@ class Script(object):

    @memoize_default()
    def _get_under_cursor_stmt(self, cursor_txt, start_pos=None):
-        tokenizer = source_tokens(cursor_txt)
-        r = Parser(self._grammar, cursor_txt, tokenizer=tokenizer)
-        try:
-            # Take the last statement available that is not an endmarker.
-            # And because it's a simple_stmt, we need to get the first child.
-            stmt = r.module.children[-2].children[0]
-        except (AttributeError, IndexError):
+        node = Parser(self._grammar, cursor_txt, 'eval_input').get_parsed_node()
+        if node is None:
            return None

+        stmt = node.children[0]
+
        user_stmt = self._parser.user_stmt()
        if user_stmt is None:
            # Set the start_pos to a pseudo position, that doesn't exist but
--- a/jedi/evaluate/compiled/fake.py
+++ b/jedi/evaluate/compiled/fake.py
@@ -8,7 +8,7 @@ import os
 import inspect

 from jedi._compatibility import is_py3, builtins, unicode
-from jedi.parser import Parser, load_grammar
+from jedi.parser import ParserWithRecovery, load_grammar
 from jedi.parser import tree as pt
 from jedi.evaluate.helpers import FakeName

@@ -31,7 +31,7 @@ def _load_faked_module(module):
            modules[module_name] = None
            return
        grammar = load_grammar('grammar3.4')
-        module = Parser(grammar, unicode(source), module_name).module
+        module = ParserWithRecovery(grammar, unicode(source), module_name).module
        modules[module_name] = module

        if module_name == 'builtins' and not is_py3:
--- a/jedi/evaluate/docstrings.py
+++ b/jedi/evaluate/docstrings.py
@@ -20,7 +20,7 @@ from itertools import chain
 from textwrap import dedent

 from jedi.evaluate.cache import memoize_default
-from jedi.parser import Parser, load_grammar
+from jedi.parser import ParserWithRecovery, load_grammar
 from jedi.common import indent_block
 from jedi.evaluate.iterable import Array, FakeSequence, AlreadyEvaluated

@@ -130,7 +130,7 @@ def _evaluate_for_statement_string(evaluator, string, module):
    # Take the default grammar here, if we load the Python 2.7 grammar here, it
    # will be impossible to use `...` (Ellipsis) as a token. Docstring types
    # don't need to conform with the current grammar.
-    p = Parser(load_grammar(), code % indent_block(string))
+    p = ParserWithRecovery(load_grammar(), code % indent_block(string))
    try:
        pseudo_cls = p.module.subscopes[0]
        # First pick suite, then simple_stmt (-2 for DEDENT) and then the node,
--- a/jedi/evaluate/finder.py
+++ b/jedi/evaluate/finder.py
@@ -487,8 +487,8 @@ def global_names_dict_generator(evaluator, scope, position):
    the current scope is function:

    >>> from jedi._compatibility import u, no_unicode_pprint
-    >>> from jedi.parser import Parser, load_grammar
-    >>> parser = Parser(load_grammar(), u('''
+    >>> from jedi.parser import ParserWithRecovery, load_grammar
+    >>> parser = ParserWithRecovery(load_grammar(), u('''
    ... x = ['a', 'b', 'c']
    ... def func():
    ...     y = None
--- a/jedi/evaluate/pep0484.py
+++ b/jedi/evaluate/pep0484.py
@@ -19,9 +19,11 @@ x support for type hint comments `# type: (int, str) -> int`. See comment from
 """

 from itertools import chain
+
 from jedi.parser import Parser, load_grammar
 from jedi.evaluate.cache import memoize_default
 from jedi.evaluate.compiled import CompiledObject
+from jedi import debug


 def _evaluate_for_annotation(evaluator, annotation):
@@ -30,11 +32,11 @@ def _evaluate_for_annotation(evaluator, annotation):
        for definition in evaluator.eval_element(annotation):
            if (isinstance(definition, CompiledObject) and
                    isinstance(definition.obj, str)):
-                p = Parser(load_grammar(), definition.obj)
-                try:
-                    element = p.module.children[0].children[0]
-                except (AttributeError, IndexError):
-                    continue
+                p = Parser(load_grammar(), definition.obj, start='expr')
+                element = p.get_parsed_node()
+                if element is None:
+                    debug.warning('Annotation not parsed: %s' % definition.obj)
+                else:
                    element.parent = annotation.parent
                    definitions |= evaluator.eval_element(element)
            else:
--- a/jedi/evaluate/sys_path.py
+++ b/jedi/evaluate/sys_path.py
@@ -5,7 +5,7 @@ from jedi.evaluate.site import addsitedir

 from jedi._compatibility import exec_function, unicode
 from jedi.parser import tree
-from jedi.parser import Parser
+from jedi.parser import ParserWithRecovery
 from jedi.evaluate.cache import memoize_default
 from jedi import debug
 from jedi import common
@@ -209,7 +209,7 @@ def _get_paths_from_buildout_script(evaluator, buildout_script):
            debug.dbg('Error trying to read buildout_script: %s', buildout_script)
            return

-        p = Parser(evaluator.grammar, source, buildout_script)
+        p = ParserWithRecovery(evaluator.grammar, source, buildout_script)
        cache.save_parser(buildout_script, p)
        return p.module

--- a/jedi/parser/init.py
+++ b/jedi/parser/init.py
@@ -81,18 +81,7 @@ class ParserSyntaxError(object):


 class Parser(object):
-    """
-    This class is used to parse a Python file, it then divides them into a
-    class structure of different scopes.
-
-    :param grammar: The grammar object of pgen2. Loaded by load_grammar.
-    :param source: The codebase for the parser. Must be unicode.
-    :param module_path: The path of the module in the file system, may be None.
-    :type module_path: str
-    :param top_module: Use this module as a parent instead of `self.module`.
-    """
-    def __init__(self, grammar, source, module_path=None, tokenizer=None):
-        self._ast_mapping = {
+    AST_MAPPING = {
        'expr_stmt': pt.ExprStmt,
        'classdef': pt.Class,
        'funcdef': pt.Function,
@@ -122,51 +111,57 @@ class Parser(object):
        'lambdef_nocond': pt.Lambda,
    }

-        self.syntax_errors = []
+    class ParserError(Exception):
+        pass

-        self._global_names = []
-        self._omit_dedent_list = []
-        self._indent_counter = 0
-        self._last_failed_start_pos = (0, 0)
+    def __init__(self, grammar, source, start, tokenizer=None):
+        start_number = grammar.symbol2number[start]

-        # TODO do print absolute import detection here.
-        #try:
-        #    del python_grammar_no_print_statement.keywords["print"]
-        #except KeyError:
-        #    pass  # Doesn't exist in the Python 3 grammar.
-
-        #if self.options["print_function"]:
-        #    python_grammar = pygram.python_grammar_no_print_statement
-        #else:
        self._used_names = {}
        self._scope_names_stack = [{}]
        self._error_statement_stacks = []
-
-        added_newline = False
-        # The Python grammar needs a newline at the end of each statement.
-        if not source.endswith('\n'):
-            source += '\n'
-            added_newline = True
+        self._last_failed_start_pos = (0, 0)
+        self._global_names = []

        # For the fast parser.
        self.position_modifier = pt.PositionModifier()
+
+        added_newline = False
+        # The Python grammar needs a newline at the end of each statement.
+        if not source.endswith('\n') and start == 'file_input':
+            source += '\n'
+            added_newline = True
+
        p = PgenParser(grammar, self.convert_node, self.convert_leaf,
-                       self.error_recovery)
-        tokenizer = tokenizer or tokenize.source_tokens(source)
-        self.module = p.parse(self._tokenize(tokenizer))
-        if self.module.type != 'file_input':
+                       self.error_recovery, start_number)
+        if tokenizer is None:
+            tokenizer = tokenize.source_tokens(source)
+        try:
+            self._parsed = p.parse(self._tokenize(tokenizer))
+        except Parser.ParserError:
+            self._parsed = None
+        else:
+            if start == 'file_input' != self._parsed.type:
                # If there's only one statement, we get back a non-module. That's
                # not what we want, we want a module, so we add it here:
-            self.module = self.convert_node(grammar,
+                self._parsed = self.convert_node(grammar,
                                                 grammar.symbol2number['file_input'],
-                                            [self.module])
+                                                 [self._parsed])

            if added_newline:
                self.remove_last_newline()
-        self.module.used_names = self._used_names
-        self.module.path = module_path
-        self.module.global_names = self._global_names
-        self.module.error_statement_stacks = self._error_statement_stacks
+
+    def get_parsed_node(self):
+        return self._parsed
+
+    def _tokenize(self, tokenizer):
+        for typ, value, start_pos, prefix in tokenizer:
+            if typ == OP:
+                typ = token.opmap[value]
+            yield typ, value, prefix, start_pos
+
+    def error_recovery(self, *args, **kwargs):
+        raise Parser.ParserError

    def convert_node(self, grammar, type, children):
        """
@@ -178,7 +173,7 @@ class Parser(object):
        """
        symbol = grammar.number2symbol[type]
        try:
-            new_node = self._ast_mapping[symbol](children)
+            new_node = Parser.AST_MAPPING[symbol](children)
        except KeyError:
            new_node = pt.Node(symbol, children)

@@ -231,6 +226,83 @@ class Parser(object):
        else:
            return pt.Operator(self.position_modifier, value, start_pos, prefix)

+    def remove_last_newline(self):
+        """
+        In all of this we need to work with _start_pos, because if we worked
+        with start_pos, we would need to check the position_modifier as well
+        (which is accounted for in the start_pos property).
+        """
+        endmarker = self._parsed.children[-1]
+        # The newline is either in the endmarker as a prefix or the previous
+        # leaf as a newline token.
+        if endmarker.prefix.endswith('\n'):
+            endmarker.prefix = endmarker.prefix[:-1]
+            last_line = re.sub('.*\n', '', endmarker.prefix)
+            endmarker._start_pos = endmarker._start_pos[0] - 1, len(last_line)
+        else:
+            try:
+                newline = endmarker.get_previous()
+            except IndexError:
+                return  # This means that the parser is empty.
+            while True:
+                if newline.value == '':
+                    # Must be a DEDENT, just continue.
+                    try:
+                        newline = newline.get_previous()
+                    except IndexError:
+                        # If there's a statement that fails to be parsed, there
+                        # will be no previous leaf. So just ignore it.
+                        break
+                elif newline.value != '\n':
+                    # This may happen if error correction strikes and removes
+                    # a whole statement including '\n'.
+                    break
+                else:
+                    newline.value = ''
+                    if self._last_failed_start_pos > newline._start_pos:
+                        # It may be the case that there was a syntax error in a
+                        # function. In that case error correction removes the
+                        # right newline. So we use the previously assigned
+                        # _last_failed_start_pos variable to account for that.
+                        endmarker._start_pos = self._last_failed_start_pos
+                    else:
+                        endmarker._start_pos = newline._start_pos
+                    break
+
+
+class ParserWithRecovery(Parser):
+    """
+    This class is used to parse a Python file, it then divides them into a
+    class structure of different scopes.
+
+    :param grammar: The grammar object of pgen2. Loaded by load_grammar.
+    :param source: The codebase for the parser. Must be unicode.
+    :param module_path: The path of the module in the file system, may be None.
+    :type module_path: str
+    """
+    def __init__(self, grammar, source, module_path=None, tokenizer=None):
+        self.syntax_errors = []
+
+        self._omit_dedent_list = []
+        self._indent_counter = 0
+
+        # TODO do print absolute import detection here.
+        #try:
+        #    del python_grammar_no_print_statement.keywords["print"]
+        #except KeyError:
+        #    pass  # Doesn't exist in the Python 3 grammar.
+
+        #if self.options["print_function"]:
+        #    python_grammar = pygram.python_grammar_no_print_statement
+        #else:
+        super(ParserWithRecovery, self).__init__(grammar, source, 'file_input', tokenizer)
+
+        self.module = self._parsed
+        self.module.used_names = self._used_names
+        self.module.path = module_path
+        self.module.global_names = self._global_names
+        self.module.error_statement_stacks = self._error_statement_stacks
+
    def error_recovery(self, grammar, stack, typ, value, start_pos, prefix,
                       add_token_callback):
        """
@@ -349,46 +421,3 @@ class Parser(object):

    def __repr__(self):
        return "<%s: %s>" % (type(self).__name__, self.module)
-
-    def remove_last_newline(self):
-        """
-        In all of this we need to work with _start_pos, because if we worked
-        with start_pos, we would need to check the position_modifier as well
-        (which is accounted for in the start_pos property).
-        """
-        endmarker = self.module.children[-1]
-        # The newline is either in the endmarker as a prefix or the previous
-        # leaf as a newline token.
-        if endmarker.prefix.endswith('\n'):
-            endmarker.prefix = endmarker.prefix[:-1]
-            last_line = re.sub('.*\n', '', endmarker.prefix)
-            endmarker._start_pos = endmarker._start_pos[0] - 1, len(last_line)
-        else:
-            try:
-                newline = endmarker.get_previous()
-            except IndexError:
-                return  # This means that the parser is empty.
-            while True:
-                if newline.value == '':
-                    # Must be a DEDENT, just continue.
-                    try:
-                        newline = newline.get_previous()
-                    except IndexError:
-                        # If there's a statement that fails to be parsed, there
-                        # will be no previous leaf. So just ignore it.
-                        break
-                elif newline.value != '\n':
-                    # This may happen if error correction strikes and removes
-                    # a whole statement including '\n'.
-                    break
-                else:
-                    newline.value = ''
-                    if self._last_failed_start_pos > newline._start_pos:
-                        # It may be the case that there was a syntax error in a
-                        # function. In that case error correction removes the
-                        # right newline. So we use the previously assigned
-                        # _last_failed_start_pos variable to account for that.
-                        endmarker._start_pos = self._last_failed_start_pos
-                    else:
-                        endmarker._start_pos = newline._start_pos
-                    break
--- a/jedi/parser/fast.py
+++ b/jedi/parser/fast.py
@@ -8,7 +8,7 @@ from itertools import chain

 from jedi._compatibility import use_metaclass
 from jedi import settings
-from jedi.parser import Parser
+from jedi.parser import ParserWithRecovery
 from jedi.parser import tree
 from jedi import cache
 from jedi import debug
@@ -52,8 +52,9 @@ class FastModule(tree.Module):
        return "<fast.%s: %s@%s-%s>" % (type(self).__name__, self.name,
                                        self.start_pos[0], self.end_pos[0])

-    # To avoid issues with with the `parser.Parser`, we need setters that do
-    # nothing, because if pickle comes along and sets those values.
+    # To avoid issues with with the `parser.ParserWithRecovery`, we need
+    # setters that do nothing, because if pickle comes along and sets those
+    # values.
    @global_names.setter
    def global_names(self, value):
        pass
@@ -99,10 +100,10 @@ class CachedFastParser(type):
    """ This is a metaclass for caching `FastParser`. """
    def __call__(self, grammar, source, module_path=None):
        if not settings.fast_parser:
-            return Parser(grammar, source, module_path)
+            return ParserWithRecovery(grammar, source, module_path)

        pi = cache.parser_cache.get(module_path, None)
-        if pi is None or isinstance(pi.parser, Parser):
+        if pi is None or isinstance(pi.parser, ParserWithRecovery):
            p = super(CachedFastParser, self).__call__(grammar, source, module_path)
        else:
            p = pi.parser  # pi is a `cache.ParserCacheItem`
@@ -432,7 +433,7 @@ class FastParser(use_metaclass(CachedFastParser)):
        else:
            tokenizer = FastTokenizer(parser_code)
            self.number_parsers_used += 1
-            p = Parser(self._grammar, parser_code, self.module_path, tokenizer=tokenizer)
+            p = ParserWithRecovery(self._grammar, parser_code, self.module_path, tokenizer=tokenizer)

            end = line_offset + p.module.end_pos[0]
            used_lines = self._lines[line_offset:end - 1]
--- a/jedi/parser/pgen2/parse.py
+++ b/jedi/parser/pgen2/parse.py
@@ -60,7 +60,7 @@ class PgenParser(object):

    """

-    def __init__(self, grammar, convert_node, convert_leaf, error_recovery):
+    def __init__(self, grammar, convert_node, convert_leaf, error_recovery, start):
        """Constructor.

        The grammar argument is a grammar.Grammar instance; see the
@@ -90,8 +90,6 @@ class PgenParser(object):
        self.convert_node = convert_node
        self.convert_leaf = convert_leaf

-        # Prepare for parsing.
-        start = self.grammar.start
        # Each stack entry is a tuple: (dfa, state, node).
        # A node is a tuple: (type, children),
        # where children is a list of nodes or None
--- a/jedi/parser/tokenize.py
+++ b/jedi/parser/tokenize.py
@@ -149,7 +149,7 @@ ALWAYS_BREAK_TOKENS = (';', 'import', 'from', 'class', 'def', 'try', 'except',

 def source_tokens(source):
    """Generate tokens from a the source code (string)."""
-    source = source + '\n'  # end with \n, because the parser needs it
+    source = source
    readline = StringIO(source).readline
    return generate_tokens(readline)

@@ -165,6 +165,7 @@ def generate_tokens(readline):
    paren_level = 0  # count parentheses
    indents = [0]
    lnum = 0
+    max = 0
    numchars = '0123456789'
    contstr = ''
    contline = None
@@ -282,9 +283,12 @@ def generate_tokens(readline):
                    paren_level -= 1
                yield OP, token, spos, prefix

-    end_pos = (lnum, max - 1)
+    if new_line:
+        end_pos = lnum + 1, 0
+    else:
+        end_pos = lnum, max - 1
    # As the last position we just take the maximally possible position. We
    # remove -1 for the last new line.
    for indent in indents[1:]:
        yield DEDENT, '', end_pos, ''
-    yield ENDMARKER, '', end_pos, prefix
+    yield ENDMARKER, '', end_pos, additional_prefix
--- a/jedi/parser/tree.py
+++ b/jedi/parser/tree.py
@@ -14,8 +14,8 @@ The easiest way to play with this module is to use :class:`parsing.Parser`.
 :attr:`parsing.Parser.module` holds an instance of :class:`Module`:

 >>> from jedi._compatibility import u
->>> from jedi.parser import Parser, load_grammar
->>> parser = Parser(load_grammar(), u('import os'), 'example.py')
+>>> from jedi.parser import ParserWithRecovery, load_grammar
+>>> parser = ParserWithRecovery(load_grammar(), u('import os'), 'example.py')
 >>> submodule = parser.module
 >>> submodule
 <Module: example.py@1-1>
--- a/jedi/parser/user_context.py
+++ b/jedi/parser/user_context.py
@@ -4,7 +4,7 @@ import keyword

 from jedi import cache
 from jedi import common
-from jedi.parser import tokenize, Parser
+from jedi.parser import tokenize, ParserWithRecovery
 from jedi._compatibility import u
 from jedi.parser.fast import FastParser
 from jedi.parser import tree
@@ -284,7 +284,7 @@ class UserContextParser(object):
            # Don't pickle that module, because the main module is changing quickly
            cache.save_parser(self._path, parser, pickling=False)
        else:
-            parser = Parser(self._grammar, self._source, self._path)
+            parser = ParserWithRecovery(self._grammar, self._source, self._path)
        self._parser_done_callback(parser)
        return parser

--- a/test/test_evaluate/test_absolute_import.py
+++ b/test/test_evaluate/test_absolute_import.py
@@ -4,7 +4,7 @@ Python 2.X)
 """
 import jedi
 from jedi._compatibility import u
-from jedi.parser import Parser, load_grammar
+from jedi.parser import ParserWithRecovery, load_grammar
 from .. import helpers


@@ -12,7 +12,7 @@ def test_explicit_absolute_imports():
    """
    Detect modules with ``from __future__ import absolute_import``.
    """
-    parser = Parser(load_grammar(), u("from __future__ import absolute_import"), "test.py")
+    parser = ParserWithRecovery(load_grammar(), u("from __future__ import absolute_import"), "test.py")
    assert parser.module.has_explicit_absolute_import


@@ -20,7 +20,7 @@ def test_no_explicit_absolute_imports():
    """
     Detect modules without ``from __future__ import absolute_import``.
    """
-    parser = Parser(load_grammar(), u("1"), "test.py")
+    parser = ParserWithRecovery(load_grammar(), u("1"), "test.py")
    assert not parser.module.has_explicit_absolute_import


@@ -30,7 +30,7 @@ def test_dont_break_imports_without_namespaces():
    assume that all imports have non-``None`` namespaces.
    """
    src = u("from __future__ import absolute_import\nimport xyzzy")
-    parser = Parser(load_grammar(), src, "test.py")
+    parser = ParserWithRecovery(load_grammar(), src, "test.py")
    assert parser.module.has_explicit_absolute_import


--- a/test/test_evaluate/test_annotations.py
+++ b/test/test_evaluate/test_annotations.py
@@ -35,3 +35,20 @@ def test_simple_annotations():
    annot('')""")

    assert [d.name for d in jedi.Script(source, ).goto_definitions()] == ['int']
+
+
+@pytest.mark.skipif('sys.version_info[0] < 3')
+@pytest.mark.parametrize('reference', [
+    'assert 1',
+    '1',
+    'lambda: 3',
+    'def x(): pass',
+    '1, 2',
+    r'1\n'
+])
+def test_illegal_forward_references(reference):
+    source = """
+    def foo(bar: "%s"):
+        bar""" % reference
+
+    assert not jedi.Script(source).goto_definitions()
--- a/test/test_evaluate/test_buildout_detection.py
+++ b/test/test_evaluate/test_buildout_detection.py
@@ -7,7 +7,7 @@ from jedi.evaluate.sys_path import (_get_parent_dir_with_file,
                                    sys_path_with_modifications,
                                    _check_module)
 from jedi.evaluate import Evaluator
-from jedi.parser import Parser, load_grammar
+from jedi.parser import ParserWithRecovery, load_grammar

 from ..helpers import cwd_at

@@ -37,7 +37,7 @@ def test_append_on_non_sys_path():
        d = Dummy()
        d.path.append('foo')"""))
    grammar = load_grammar()
-    p = Parser(grammar, SRC)
+    p = ParserWithRecovery(grammar, SRC)
    paths = _check_module(Evaluator(grammar), p.module)
    assert len(paths) > 0
    assert 'foo' not in paths
@@ -48,7 +48,7 @@ def test_path_from_invalid_sys_path_assignment():
        import sys
        sys.path = 'invalid'"""))
    grammar = load_grammar()
-    p = Parser(grammar, SRC)
+    p = ParserWithRecovery(grammar, SRC)
    paths = _check_module(Evaluator(grammar), p.module)
    assert len(paths) > 0
    assert 'invalid' not in paths
@@ -60,7 +60,7 @@ def test_sys_path_with_modifications():
        import os
    """))
    grammar = load_grammar()
-    p = Parser(grammar, SRC)
+    p = ParserWithRecovery(grammar, SRC)
    p.module.path = os.path.abspath(os.path.join(os.curdir, 'module_name.py'))
    paths = sys_path_with_modifications(Evaluator(grammar), p.module)
    assert '/tmp/.buildout/eggs/important_package.egg' in paths
@@ -83,7 +83,7 @@ def test_path_from_sys_path_assignment():
        if __name__ == '__main__':
            sys.exit(important_package.main())"""))
    grammar = load_grammar()
-    p = Parser(grammar, SRC)
+    p = ParserWithRecovery(grammar, SRC)
    paths = _check_module(Evaluator(grammar), p.module)
    assert 1 not in paths
    assert '/home/test/.buildout/eggs/important_package.egg' in paths
--- a/test/test_evaluate/test_sys_path.py
+++ b/test/test_evaluate/test_sys_path.py
@@ -5,14 +5,14 @@ import sys
 import pytest

 from jedi._compatibility import unicode
-from jedi.parser import Parser, load_grammar
+from jedi.parser import ParserWithRecovery, load_grammar
 from jedi.evaluate import sys_path, Evaluator


 def test_paths_from_assignment():
    def paths(src):
        grammar = load_grammar()
-        stmt = Parser(grammar, unicode(src)).module.statements[0]
+        stmt = ParserWithRecovery(grammar, unicode(src)).module.statements[0]
        return set(sys_path._paths_from_assignment(Evaluator(grammar), stmt))

    assert paths('sys.path[0:0] = ["a"]') == set(['a'])
--- a/test/test_new_parser.py
+++ b/test/test_new_parser.py
@@ -1,11 +1,11 @@
 from jedi._compatibility import u
-from jedi.parser import Parser, load_grammar
+from jedi.parser import ParserWithRecovery, load_grammar


 def test_basic_parsing():
    def compare(string):
        """Generates the AST object and then regenerates the code."""
-        assert Parser(load_grammar(), string).module.get_code() == string
+        assert ParserWithRecovery(load_grammar(), string).module.get_code() == string

    compare(u('\na #pass\n'))
    compare(u('wblabla* 1\t\n'))
--- a/test/test_parser/test_get_code.py
+++ b/test/test_parser/test_get_code.py
@@ -3,7 +3,7 @@ import difflib
 import pytest

 from jedi._compatibility import u
-from jedi.parser import Parser, load_grammar
+from jedi.parser import ParserWithRecovery, load_grammar

 code_basic_features = u('''
 """A mod docstring"""
@@ -44,7 +44,7 @@ def diff_code_assert(a, b, n=4):
 def test_basic_parsing():
    """Validate the parsing features"""

-    prs = Parser(load_grammar(), code_basic_features)
+    prs = ParserWithRecovery(load_grammar(), code_basic_features)
    diff_code_assert(
        code_basic_features,
        prs.module.get_code()
@@ -53,7 +53,7 @@ def test_basic_parsing():

 def test_operators():
    src = u('5  * 3')
-    prs = Parser(load_grammar(), src)
+    prs = ParserWithRecovery(load_grammar(), src)
    diff_code_assert(src, prs.module.get_code())


@@ -82,7 +82,7 @@ def method_with_docstring():
    """class docstr"""
    pass
 ''')
-    assert Parser(load_grammar(), s).module.get_code() == s
+    assert ParserWithRecovery(load_grammar(), s).module.get_code() == s


 def test_end_newlines():
@@ -92,7 +92,7 @@ def test_end_newlines():
    line the parser needs.
    """
    def test(source, end_pos):
-        module = Parser(load_grammar(), u(source)).module
+        module = ParserWithRecovery(load_grammar(), u(source)).module
        assert module.get_code() == source
        assert module.end_pos == end_pos

--- a/test/test_parser/test_parser.py
+++ b/test/test_parser/test_parser.py
@@ -3,7 +3,7 @@ import sys

 import jedi
 from jedi._compatibility import u, is_py3
-from jedi.parser import Parser, load_grammar
+from jedi.parser import ParserWithRecovery, load_grammar
 from jedi.parser.user_context import UserContextParser
 from jedi.parser import tree as pt
 from textwrap import dedent
@@ -23,7 +23,7 @@ def test_user_statement_on_import():
 class TestCallAndName():
    def get_call(self, source):
        # Get the simple_stmt and then the first one.
-        simple_stmt = Parser(load_grammar(), u(source)).module.children[0]
+        simple_stmt = ParserWithRecovery(load_grammar(), u(source)).module.children[0]
        return simple_stmt.children[0]

    def test_name_and_call_positions(self):
@@ -58,7 +58,7 @@ class TestCallAndName():

 class TestSubscopes():
    def get_sub(self, source):
-        return Parser(load_grammar(), u(source)).module.subscopes[0]
+        return ParserWithRecovery(load_grammar(), u(source)).module.subscopes[0]

    def test_subscope_names(self):
        name = self.get_sub('class Foo: pass').name
@@ -74,7 +74,7 @@ class TestSubscopes():

 class TestImports():
    def get_import(self, source):
-        return Parser(load_grammar(), source).module.imports[0]
+        return ParserWithRecovery(load_grammar(), source).module.imports[0]

    def test_import_names(self):
        imp = self.get_import(u('import math\n'))
@@ -89,13 +89,13 @@ class TestImports():


 def test_module():
-    module = Parser(load_grammar(), u('asdf'), 'example.py').module
+    module = ParserWithRecovery(load_grammar(), u('asdf'), 'example.py').module
    name = module.name
    assert str(name) == 'example'
    assert name.start_pos == (1, 0)
    assert name.end_pos == (1, 7)

-    module = Parser(load_grammar(), u('asdf')).module
+    module = ParserWithRecovery(load_grammar(), u('asdf')).module
    name = module.name
    assert str(name) == ''
    assert name.start_pos == (1, 0)
@@ -108,7 +108,7 @@ def test_end_pos():
                 def func():
                     y = None
                 '''))
-    parser = Parser(load_grammar(), s)
+    parser = ParserWithRecovery(load_grammar(), s)
    scope = parser.module.subscopes[0]
    assert scope.start_pos == (3, 0)
    assert scope.end_pos == (5, 0)
@@ -121,7 +121,7 @@ def test_carriage_return_statements():
        # this is a namespace package
    '''))
    source = source.replace('\n', '\r\n')
-    stmt = Parser(load_grammar(), source).module.statements[0]
+    stmt = ParserWithRecovery(load_grammar(), source).module.statements[0]
    assert '#' not in stmt.get_code()


@@ -129,7 +129,7 @@ def test_incomplete_list_comprehension():
    """ Shouldn't raise an error, same bug as #418. """
    # With the old parser this actually returned a statement. With the new
    # parser only valid statements generate one.
-    assert Parser(load_grammar(), u('(1 for def')).module.statements == []
+    assert ParserWithRecovery(load_grammar(), u('(1 for def')).module.statements == []


 def test_hex_values_in_docstring():
@@ -141,7 +141,7 @@ def test_hex_values_in_docstring():
            return 1
        '''

-    doc = Parser(load_grammar(), dedent(u(source))).module.subscopes[0].raw_doc
+    doc = ParserWithRecovery(load_grammar(), dedent(u(source))).module.subscopes[0].raw_doc
    if is_py3:
        assert doc == '\xff'
    else:
@@ -160,7 +160,7 @@ def test_error_correction_with():


 def test_newline_positions():
-    endmarker = Parser(load_grammar(), u('a\n')).module.children[-1]
+    endmarker = ParserWithRecovery(load_grammar(), u('a\n')).module.children[-1]
    assert endmarker.end_pos == (2, 0)
    new_line = endmarker.get_previous()
    assert new_line.start_pos == (1, 1)
@@ -174,7 +174,7 @@ def test_end_pos_error_correction():
    end_pos, even if something breaks in the parser (error correction).
    """
    s = u('def x():\n .')
-    m = Parser(load_grammar(), s).module
+    m = ParserWithRecovery(load_grammar(), s).module
    func = m.children[0]
    assert func.type == 'funcdef'
    # This is not exactly correct, but ok, because it doesn't make a difference
@@ -191,7 +191,7 @@ def test_param_splitting():
    def check(src, result):
        # Python 2 tuple params should be ignored for now.
        grammar = load_grammar('grammar%s.%s' % sys.version_info[:2])
-        m = Parser(grammar, u(src)).module
+        m = ParserWithRecovery(grammar, u(src)).module
        if is_py3:
            assert not m.subscopes
        else:
@@ -211,5 +211,5 @@ def test_unicode_string():

 def test_backslash_dos_style():
    grammar = load_grammar()
-    m = Parser(grammar, u('\\\r\n')).module
+    m = ParserWithRecovery(grammar, u('\\\r\n')).module
    assert m
--- a/test/test_parser/test_parser_tree.py
+++ b/test/test_parser/test_parser_tree.py
@@ -5,7 +5,7 @@ from textwrap import dedent
 import pytest

 from jedi._compatibility import u, unicode
-from jedi.parser import Parser, load_grammar
+from jedi.parser import ParserWithRecovery, load_grammar
 from jedi.parser import tree as pt


@@ -27,7 +27,7 @@ class TestsFunctionAndLambdaParsing(object):

    @pytest.fixture(params=FIXTURES)
    def node(self, request):
-        parsed = Parser(load_grammar(), dedent(u(request.param[0])))
+        parsed = ParserWithRecovery(load_grammar(), dedent(u(request.param[0])))
        request.keywords['expected'] = request.param[1]
        return parsed.module.subscopes[0]

--- a/test/test_parser/test_tokenize.py
+++ b/test/test_parser/test_tokenize.py
@@ -7,7 +7,7 @@ import pytest

 from jedi._compatibility import u, is_py3
 from jedi.parser.token import NAME, OP, NEWLINE, STRING, INDENT
-from jedi.parser import Parser, load_grammar, tokenize
+from jedi.parser import ParserWithRecovery, load_grammar, tokenize


 from ..helpers import unittest
@@ -15,7 +15,7 @@ from ..helpers import unittest

 class TokenTest(unittest.TestCase):
    def test_end_pos_one_line(self):
-        parsed = Parser(load_grammar(), dedent(u('''
+        parsed = ParserWithRecovery(load_grammar(), dedent(u('''
        def testit():
            a = "huhu"
        ''')))
@@ -23,7 +23,7 @@ class TokenTest(unittest.TestCase):
        assert tok.end_pos == (3, 14)

    def test_end_pos_multi_line(self):
-        parsed = Parser(load_grammar(), dedent(u('''
+        parsed = ParserWithRecovery(load_grammar(), dedent(u('''
        def testit():
            a = """huhu
        asdfasdf""" + "h"
@@ -108,7 +108,7 @@ class TokenTest(unittest.TestCase):
        ]

        for s in string_tokens:
-            parsed = Parser(load_grammar(), u('''a = %s\n''' % s))
+            parsed = ParserWithRecovery(load_grammar(), u('''a = %s\n''' % s))
            simple_stmt = parsed.module.children[0]
            expr_stmt = simple_stmt.children[0]
            assert len(expr_stmt.children) == 3
--- a/test/test_regression.py
+++ b/test/test_regression.py
@@ -15,7 +15,7 @@ from jedi._compatibility import u
 from jedi import Script
 from jedi import api
 from jedi.evaluate import imports
-from jedi.parser import Parser, load_grammar
+from jedi.parser import ParserWithRecovery, load_grammar

 #jedi.set_debug_function()

@@ -102,7 +102,7 @@ class TestRegression(TestCase):
    def test_end_pos_line(self):
        # jedi issue #150
        s = u("x()\nx( )\nx(  )\nx (  )")
-        parser = Parser(load_grammar(), s)
+        parser = ParserWithRecovery(load_grammar(), s)
        for i, s in enumerate(parser.module.statements):
            assert s.end_pos == (i + 1, i + 3)