Rework the parser so we can use arbitrary start nodes of the syntax.

This also includes a rework for error recovery in the parser. This is now just possible for file_input parsing, which means for full files.
Includes also a refactoring of the tokenizer. No more do we have to add an additional newline, because it now works correctly (removes certain confusion.
This commit is contained in:
Dave Halter
2015-12-20 22:21:47 +01:00
parent 9a93d599da
commit c4906e0e3f
22 changed files with 246 additions and 198 deletions

View File

@@ -322,15 +322,12 @@ class Script(object):
@memoize_default() @memoize_default()
def _get_under_cursor_stmt(self, cursor_txt, start_pos=None): def _get_under_cursor_stmt(self, cursor_txt, start_pos=None):
tokenizer = source_tokens(cursor_txt) node = Parser(self._grammar, cursor_txt, 'eval_input').get_parsed_node()
r = Parser(self._grammar, cursor_txt, tokenizer=tokenizer) if node is None:
try:
# Take the last statement available that is not an endmarker.
# And because it's a simple_stmt, we need to get the first child.
stmt = r.module.children[-2].children[0]
except (AttributeError, IndexError):
return None return None
stmt = node.children[0]
user_stmt = self._parser.user_stmt() user_stmt = self._parser.user_stmt()
if user_stmt is None: if user_stmt is None:
# Set the start_pos to a pseudo position, that doesn't exist but # Set the start_pos to a pseudo position, that doesn't exist but

View File

@@ -8,7 +8,7 @@ import os
import inspect import inspect
from jedi._compatibility import is_py3, builtins, unicode from jedi._compatibility import is_py3, builtins, unicode
from jedi.parser import Parser, load_grammar from jedi.parser import ParserWithRecovery, load_grammar
from jedi.parser import tree as pt from jedi.parser import tree as pt
from jedi.evaluate.helpers import FakeName from jedi.evaluate.helpers import FakeName
@@ -31,7 +31,7 @@ def _load_faked_module(module):
modules[module_name] = None modules[module_name] = None
return return
grammar = load_grammar('grammar3.4') grammar = load_grammar('grammar3.4')
module = Parser(grammar, unicode(source), module_name).module module = ParserWithRecovery(grammar, unicode(source), module_name).module
modules[module_name] = module modules[module_name] = module
if module_name == 'builtins' and not is_py3: if module_name == 'builtins' and not is_py3:

View File

@@ -20,7 +20,7 @@ from itertools import chain
from textwrap import dedent from textwrap import dedent
from jedi.evaluate.cache import memoize_default from jedi.evaluate.cache import memoize_default
from jedi.parser import Parser, load_grammar from jedi.parser import ParserWithRecovery, load_grammar
from jedi.common import indent_block from jedi.common import indent_block
from jedi.evaluate.iterable import Array, FakeSequence, AlreadyEvaluated from jedi.evaluate.iterable import Array, FakeSequence, AlreadyEvaluated
@@ -130,7 +130,7 @@ def _evaluate_for_statement_string(evaluator, string, module):
# Take the default grammar here, if we load the Python 2.7 grammar here, it # Take the default grammar here, if we load the Python 2.7 grammar here, it
# will be impossible to use `...` (Ellipsis) as a token. Docstring types # will be impossible to use `...` (Ellipsis) as a token. Docstring types
# don't need to conform with the current grammar. # don't need to conform with the current grammar.
p = Parser(load_grammar(), code % indent_block(string)) p = ParserWithRecovery(load_grammar(), code % indent_block(string))
try: try:
pseudo_cls = p.module.subscopes[0] pseudo_cls = p.module.subscopes[0]
# First pick suite, then simple_stmt (-2 for DEDENT) and then the node, # First pick suite, then simple_stmt (-2 for DEDENT) and then the node,

View File

@@ -487,8 +487,8 @@ def global_names_dict_generator(evaluator, scope, position):
the current scope is function: the current scope is function:
>>> from jedi._compatibility import u, no_unicode_pprint >>> from jedi._compatibility import u, no_unicode_pprint
>>> from jedi.parser import Parser, load_grammar >>> from jedi.parser import ParserWithRecovery, load_grammar
>>> parser = Parser(load_grammar(), u(''' >>> parser = ParserWithRecovery(load_grammar(), u('''
... x = ['a', 'b', 'c'] ... x = ['a', 'b', 'c']
... def func(): ... def func():
... y = None ... y = None

View File

@@ -19,9 +19,11 @@ x support for type hint comments `# type: (int, str) -> int`. See comment from
""" """
from itertools import chain from itertools import chain
from jedi.parser import Parser, load_grammar from jedi.parser import Parser, load_grammar
from jedi.evaluate.cache import memoize_default from jedi.evaluate.cache import memoize_default
from jedi.evaluate.compiled import CompiledObject from jedi.evaluate.compiled import CompiledObject
from jedi import debug
def _evaluate_for_annotation(evaluator, annotation): def _evaluate_for_annotation(evaluator, annotation):
@@ -30,13 +32,13 @@ def _evaluate_for_annotation(evaluator, annotation):
for definition in evaluator.eval_element(annotation): for definition in evaluator.eval_element(annotation):
if (isinstance(definition, CompiledObject) and if (isinstance(definition, CompiledObject) and
isinstance(definition.obj, str)): isinstance(definition.obj, str)):
p = Parser(load_grammar(), definition.obj) p = Parser(load_grammar(), definition.obj, start='expr')
try: element = p.get_parsed_node()
element = p.module.children[0].children[0] if element is None:
except (AttributeError, IndexError): debug.warning('Annotation not parsed: %s' % definition.obj)
continue else:
element.parent = annotation.parent element.parent = annotation.parent
definitions |= evaluator.eval_element(element) definitions |= evaluator.eval_element(element)
else: else:
definitions.add(definition) definitions.add(definition)
return list(chain.from_iterable( return list(chain.from_iterable(

View File

@@ -5,7 +5,7 @@ from jedi.evaluate.site import addsitedir
from jedi._compatibility import exec_function, unicode from jedi._compatibility import exec_function, unicode
from jedi.parser import tree from jedi.parser import tree
from jedi.parser import Parser from jedi.parser import ParserWithRecovery
from jedi.evaluate.cache import memoize_default from jedi.evaluate.cache import memoize_default
from jedi import debug from jedi import debug
from jedi import common from jedi import common
@@ -209,7 +209,7 @@ def _get_paths_from_buildout_script(evaluator, buildout_script):
debug.dbg('Error trying to read buildout_script: %s', buildout_script) debug.dbg('Error trying to read buildout_script: %s', buildout_script)
return return
p = Parser(evaluator.grammar, source, buildout_script) p = ParserWithRecovery(evaluator.grammar, source, buildout_script)
cache.save_parser(buildout_script, p) cache.save_parser(buildout_script, p)
return p.module return p.module

View File

@@ -81,92 +81,87 @@ class ParserSyntaxError(object):
class Parser(object): class Parser(object):
""" AST_MAPPING = {
This class is used to parse a Python file, it then divides them into a 'expr_stmt': pt.ExprStmt,
class structure of different scopes. 'classdef': pt.Class,
'funcdef': pt.Function,
'file_input': pt.Module,
'import_name': pt.ImportName,
'import_from': pt.ImportFrom,
'break_stmt': pt.KeywordStatement,
'continue_stmt': pt.KeywordStatement,
'return_stmt': pt.ReturnStmt,
'raise_stmt': pt.KeywordStatement,
'yield_expr': pt.YieldExpr,
'del_stmt': pt.KeywordStatement,
'pass_stmt': pt.KeywordStatement,
'global_stmt': pt.GlobalStmt,
'nonlocal_stmt': pt.KeywordStatement,
'print_stmt': pt.KeywordStatement,
'assert_stmt': pt.AssertStmt,
'if_stmt': pt.IfStmt,
'with_stmt': pt.WithStmt,
'for_stmt': pt.ForStmt,
'while_stmt': pt.WhileStmt,
'try_stmt': pt.TryStmt,
'comp_for': pt.CompFor,
'decorator': pt.Decorator,
'lambdef': pt.Lambda,
'old_lambdef': pt.Lambda,
'lambdef_nocond': pt.Lambda,
}
:param grammar: The grammar object of pgen2. Loaded by load_grammar. class ParserError(Exception):
:param source: The codebase for the parser. Must be unicode. pass
:param module_path: The path of the module in the file system, may be None.
:type module_path: str
:param top_module: Use this module as a parent instead of `self.module`.
"""
def __init__(self, grammar, source, module_path=None, tokenizer=None):
self._ast_mapping = {
'expr_stmt': pt.ExprStmt,
'classdef': pt.Class,
'funcdef': pt.Function,
'file_input': pt.Module,
'import_name': pt.ImportName,
'import_from': pt.ImportFrom,
'break_stmt': pt.KeywordStatement,
'continue_stmt': pt.KeywordStatement,
'return_stmt': pt.ReturnStmt,
'raise_stmt': pt.KeywordStatement,
'yield_expr': pt.YieldExpr,
'del_stmt': pt.KeywordStatement,
'pass_stmt': pt.KeywordStatement,
'global_stmt': pt.GlobalStmt,
'nonlocal_stmt': pt.KeywordStatement,
'print_stmt': pt.KeywordStatement,
'assert_stmt': pt.AssertStmt,
'if_stmt': pt.IfStmt,
'with_stmt': pt.WithStmt,
'for_stmt': pt.ForStmt,
'while_stmt': pt.WhileStmt,
'try_stmt': pt.TryStmt,
'comp_for': pt.CompFor,
'decorator': pt.Decorator,
'lambdef': pt.Lambda,
'old_lambdef': pt.Lambda,
'lambdef_nocond': pt.Lambda,
}
self.syntax_errors = [] def __init__(self, grammar, source, start, tokenizer=None):
start_number = grammar.symbol2number[start]
self._global_names = []
self._omit_dedent_list = []
self._indent_counter = 0
self._last_failed_start_pos = (0, 0)
# TODO do print absolute import detection here.
#try:
# del python_grammar_no_print_statement.keywords["print"]
#except KeyError:
# pass # Doesn't exist in the Python 3 grammar.
#if self.options["print_function"]:
# python_grammar = pygram.python_grammar_no_print_statement
#else:
self._used_names = {} self._used_names = {}
self._scope_names_stack = [{}] self._scope_names_stack = [{}]
self._error_statement_stacks = [] self._error_statement_stacks = []
self._last_failed_start_pos = (0, 0)
added_newline = False self._global_names = []
# The Python grammar needs a newline at the end of each statement.
if not source.endswith('\n'):
source += '\n'
added_newline = True
# For the fast parser. # For the fast parser.
self.position_modifier = pt.PositionModifier() self.position_modifier = pt.PositionModifier()
p = PgenParser(grammar, self.convert_node, self.convert_leaf,
self.error_recovery)
tokenizer = tokenizer or tokenize.source_tokens(source)
self.module = p.parse(self._tokenize(tokenizer))
if self.module.type != 'file_input':
# If there's only one statement, we get back a non-module. That's
# not what we want, we want a module, so we add it here:
self.module = self.convert_node(grammar,
grammar.symbol2number['file_input'],
[self.module])
if added_newline: added_newline = False
self.remove_last_newline() # The Python grammar needs a newline at the end of each statement.
self.module.used_names = self._used_names if not source.endswith('\n') and start == 'file_input':
self.module.path = module_path source += '\n'
self.module.global_names = self._global_names added_newline = True
self.module.error_statement_stacks = self._error_statement_stacks
p = PgenParser(grammar, self.convert_node, self.convert_leaf,
self.error_recovery, start_number)
if tokenizer is None:
tokenizer = tokenize.source_tokens(source)
try:
self._parsed = p.parse(self._tokenize(tokenizer))
except Parser.ParserError:
self._parsed = None
else:
if start == 'file_input' != self._parsed.type:
# If there's only one statement, we get back a non-module. That's
# not what we want, we want a module, so we add it here:
self._parsed = self.convert_node(grammar,
grammar.symbol2number['file_input'],
[self._parsed])
if added_newline:
self.remove_last_newline()
def get_parsed_node(self):
return self._parsed
def _tokenize(self, tokenizer):
for typ, value, start_pos, prefix in tokenizer:
if typ == OP:
typ = token.opmap[value]
yield typ, value, prefix, start_pos
def error_recovery(self, *args, **kwargs):
raise Parser.ParserError
def convert_node(self, grammar, type, children): def convert_node(self, grammar, type, children):
""" """
@@ -178,7 +173,7 @@ class Parser(object):
""" """
symbol = grammar.number2symbol[type] symbol = grammar.number2symbol[type]
try: try:
new_node = self._ast_mapping[symbol](children) new_node = Parser.AST_MAPPING[symbol](children)
except KeyError: except KeyError:
new_node = pt.Node(symbol, children) new_node = pt.Node(symbol, children)
@@ -231,6 +226,83 @@ class Parser(object):
else: else:
return pt.Operator(self.position_modifier, value, start_pos, prefix) return pt.Operator(self.position_modifier, value, start_pos, prefix)
def remove_last_newline(self):
"""
In all of this we need to work with _start_pos, because if we worked
with start_pos, we would need to check the position_modifier as well
(which is accounted for in the start_pos property).
"""
endmarker = self._parsed.children[-1]
# The newline is either in the endmarker as a prefix or the previous
# leaf as a newline token.
if endmarker.prefix.endswith('\n'):
endmarker.prefix = endmarker.prefix[:-1]
last_line = re.sub('.*\n', '', endmarker.prefix)
endmarker._start_pos = endmarker._start_pos[0] - 1, len(last_line)
else:
try:
newline = endmarker.get_previous()
except IndexError:
return # This means that the parser is empty.
while True:
if newline.value == '':
# Must be a DEDENT, just continue.
try:
newline = newline.get_previous()
except IndexError:
# If there's a statement that fails to be parsed, there
# will be no previous leaf. So just ignore it.
break
elif newline.value != '\n':
# This may happen if error correction strikes and removes
# a whole statement including '\n'.
break
else:
newline.value = ''
if self._last_failed_start_pos > newline._start_pos:
# It may be the case that there was a syntax error in a
# function. In that case error correction removes the
# right newline. So we use the previously assigned
# _last_failed_start_pos variable to account for that.
endmarker._start_pos = self._last_failed_start_pos
else:
endmarker._start_pos = newline._start_pos
break
class ParserWithRecovery(Parser):
"""
This class is used to parse a Python file, it then divides them into a
class structure of different scopes.
:param grammar: The grammar object of pgen2. Loaded by load_grammar.
:param source: The codebase for the parser. Must be unicode.
:param module_path: The path of the module in the file system, may be None.
:type module_path: str
"""
def __init__(self, grammar, source, module_path=None, tokenizer=None):
self.syntax_errors = []
self._omit_dedent_list = []
self._indent_counter = 0
# TODO do print absolute import detection here.
#try:
# del python_grammar_no_print_statement.keywords["print"]
#except KeyError:
# pass # Doesn't exist in the Python 3 grammar.
#if self.options["print_function"]:
# python_grammar = pygram.python_grammar_no_print_statement
#else:
super(ParserWithRecovery, self).__init__(grammar, source, 'file_input', tokenizer)
self.module = self._parsed
self.module.used_names = self._used_names
self.module.path = module_path
self.module.global_names = self._global_names
self.module.error_statement_stacks = self._error_statement_stacks
def error_recovery(self, grammar, stack, typ, value, start_pos, prefix, def error_recovery(self, grammar, stack, typ, value, start_pos, prefix,
add_token_callback): add_token_callback):
""" """
@@ -349,46 +421,3 @@ class Parser(object):
def __repr__(self): def __repr__(self):
return "<%s: %s>" % (type(self).__name__, self.module) return "<%s: %s>" % (type(self).__name__, self.module)
def remove_last_newline(self):
"""
In all of this we need to work with _start_pos, because if we worked
with start_pos, we would need to check the position_modifier as well
(which is accounted for in the start_pos property).
"""
endmarker = self.module.children[-1]
# The newline is either in the endmarker as a prefix or the previous
# leaf as a newline token.
if endmarker.prefix.endswith('\n'):
endmarker.prefix = endmarker.prefix[:-1]
last_line = re.sub('.*\n', '', endmarker.prefix)
endmarker._start_pos = endmarker._start_pos[0] - 1, len(last_line)
else:
try:
newline = endmarker.get_previous()
except IndexError:
return # This means that the parser is empty.
while True:
if newline.value == '':
# Must be a DEDENT, just continue.
try:
newline = newline.get_previous()
except IndexError:
# If there's a statement that fails to be parsed, there
# will be no previous leaf. So just ignore it.
break
elif newline.value != '\n':
# This may happen if error correction strikes and removes
# a whole statement including '\n'.
break
else:
newline.value = ''
if self._last_failed_start_pos > newline._start_pos:
# It may be the case that there was a syntax error in a
# function. In that case error correction removes the
# right newline. So we use the previously assigned
# _last_failed_start_pos variable to account for that.
endmarker._start_pos = self._last_failed_start_pos
else:
endmarker._start_pos = newline._start_pos
break

View File

@@ -8,7 +8,7 @@ from itertools import chain
from jedi._compatibility import use_metaclass from jedi._compatibility import use_metaclass
from jedi import settings from jedi import settings
from jedi.parser import Parser from jedi.parser import ParserWithRecovery
from jedi.parser import tree from jedi.parser import tree
from jedi import cache from jedi import cache
from jedi import debug from jedi import debug
@@ -52,8 +52,9 @@ class FastModule(tree.Module):
return "<fast.%s: %s@%s-%s>" % (type(self).__name__, self.name, return "<fast.%s: %s@%s-%s>" % (type(self).__name__, self.name,
self.start_pos[0], self.end_pos[0]) self.start_pos[0], self.end_pos[0])
# To avoid issues with with the `parser.Parser`, we need setters that do # To avoid issues with with the `parser.ParserWithRecovery`, we need
# nothing, because if pickle comes along and sets those values. # setters that do nothing, because if pickle comes along and sets those
# values.
@global_names.setter @global_names.setter
def global_names(self, value): def global_names(self, value):
pass pass
@@ -99,10 +100,10 @@ class CachedFastParser(type):
""" This is a metaclass for caching `FastParser`. """ """ This is a metaclass for caching `FastParser`. """
def __call__(self, grammar, source, module_path=None): def __call__(self, grammar, source, module_path=None):
if not settings.fast_parser: if not settings.fast_parser:
return Parser(grammar, source, module_path) return ParserWithRecovery(grammar, source, module_path)
pi = cache.parser_cache.get(module_path, None) pi = cache.parser_cache.get(module_path, None)
if pi is None or isinstance(pi.parser, Parser): if pi is None or isinstance(pi.parser, ParserWithRecovery):
p = super(CachedFastParser, self).__call__(grammar, source, module_path) p = super(CachedFastParser, self).__call__(grammar, source, module_path)
else: else:
p = pi.parser # pi is a `cache.ParserCacheItem` p = pi.parser # pi is a `cache.ParserCacheItem`
@@ -432,7 +433,7 @@ class FastParser(use_metaclass(CachedFastParser)):
else: else:
tokenizer = FastTokenizer(parser_code) tokenizer = FastTokenizer(parser_code)
self.number_parsers_used += 1 self.number_parsers_used += 1
p = Parser(self._grammar, parser_code, self.module_path, tokenizer=tokenizer) p = ParserWithRecovery(self._grammar, parser_code, self.module_path, tokenizer=tokenizer)
end = line_offset + p.module.end_pos[0] end = line_offset + p.module.end_pos[0]
used_lines = self._lines[line_offset:end - 1] used_lines = self._lines[line_offset:end - 1]

View File

@@ -60,7 +60,7 @@ class PgenParser(object):
""" """
def __init__(self, grammar, convert_node, convert_leaf, error_recovery): def __init__(self, grammar, convert_node, convert_leaf, error_recovery, start):
"""Constructor. """Constructor.
The grammar argument is a grammar.Grammar instance; see the The grammar argument is a grammar.Grammar instance; see the
@@ -90,8 +90,6 @@ class PgenParser(object):
self.convert_node = convert_node self.convert_node = convert_node
self.convert_leaf = convert_leaf self.convert_leaf = convert_leaf
# Prepare for parsing.
start = self.grammar.start
# Each stack entry is a tuple: (dfa, state, node). # Each stack entry is a tuple: (dfa, state, node).
# A node is a tuple: (type, children), # A node is a tuple: (type, children),
# where children is a list of nodes or None # where children is a list of nodes or None

View File

@@ -149,7 +149,7 @@ ALWAYS_BREAK_TOKENS = (';', 'import', 'from', 'class', 'def', 'try', 'except',
def source_tokens(source): def source_tokens(source):
"""Generate tokens from a the source code (string).""" """Generate tokens from a the source code (string)."""
source = source + '\n' # end with \n, because the parser needs it source = source
readline = StringIO(source).readline readline = StringIO(source).readline
return generate_tokens(readline) return generate_tokens(readline)
@@ -165,6 +165,7 @@ def generate_tokens(readline):
paren_level = 0 # count parentheses paren_level = 0 # count parentheses
indents = [0] indents = [0]
lnum = 0 lnum = 0
max = 0
numchars = '0123456789' numchars = '0123456789'
contstr = '' contstr = ''
contline = None contline = None
@@ -282,9 +283,12 @@ def generate_tokens(readline):
paren_level -= 1 paren_level -= 1
yield OP, token, spos, prefix yield OP, token, spos, prefix
end_pos = (lnum, max - 1) if new_line:
end_pos = lnum + 1, 0
else:
end_pos = lnum, max - 1
# As the last position we just take the maximally possible position. We # As the last position we just take the maximally possible position. We
# remove -1 for the last new line. # remove -1 for the last new line.
for indent in indents[1:]: for indent in indents[1:]:
yield DEDENT, '', end_pos, '' yield DEDENT, '', end_pos, ''
yield ENDMARKER, '', end_pos, prefix yield ENDMARKER, '', end_pos, additional_prefix

View File

@@ -14,8 +14,8 @@ The easiest way to play with this module is to use :class:`parsing.Parser`.
:attr:`parsing.Parser.module` holds an instance of :class:`Module`: :attr:`parsing.Parser.module` holds an instance of :class:`Module`:
>>> from jedi._compatibility import u >>> from jedi._compatibility import u
>>> from jedi.parser import Parser, load_grammar >>> from jedi.parser import ParserWithRecovery, load_grammar
>>> parser = Parser(load_grammar(), u('import os'), 'example.py') >>> parser = ParserWithRecovery(load_grammar(), u('import os'), 'example.py')
>>> submodule = parser.module >>> submodule = parser.module
>>> submodule >>> submodule
<Module: example.py@1-1> <Module: example.py@1-1>

View File

@@ -4,7 +4,7 @@ import keyword
from jedi import cache from jedi import cache
from jedi import common from jedi import common
from jedi.parser import tokenize, Parser from jedi.parser import tokenize, ParserWithRecovery
from jedi._compatibility import u from jedi._compatibility import u
from jedi.parser.fast import FastParser from jedi.parser.fast import FastParser
from jedi.parser import tree from jedi.parser import tree
@@ -284,7 +284,7 @@ class UserContextParser(object):
# Don't pickle that module, because the main module is changing quickly # Don't pickle that module, because the main module is changing quickly
cache.save_parser(self._path, parser, pickling=False) cache.save_parser(self._path, parser, pickling=False)
else: else:
parser = Parser(self._grammar, self._source, self._path) parser = ParserWithRecovery(self._grammar, self._source, self._path)
self._parser_done_callback(parser) self._parser_done_callback(parser)
return parser return parser

View File

@@ -4,7 +4,7 @@ Python 2.X)
""" """
import jedi import jedi
from jedi._compatibility import u from jedi._compatibility import u
from jedi.parser import Parser, load_grammar from jedi.parser import ParserWithRecovery, load_grammar
from .. import helpers from .. import helpers
@@ -12,7 +12,7 @@ def test_explicit_absolute_imports():
""" """
Detect modules with ``from __future__ import absolute_import``. Detect modules with ``from __future__ import absolute_import``.
""" """
parser = Parser(load_grammar(), u("from __future__ import absolute_import"), "test.py") parser = ParserWithRecovery(load_grammar(), u("from __future__ import absolute_import"), "test.py")
assert parser.module.has_explicit_absolute_import assert parser.module.has_explicit_absolute_import
@@ -20,7 +20,7 @@ def test_no_explicit_absolute_imports():
""" """
Detect modules without ``from __future__ import absolute_import``. Detect modules without ``from __future__ import absolute_import``.
""" """
parser = Parser(load_grammar(), u("1"), "test.py") parser = ParserWithRecovery(load_grammar(), u("1"), "test.py")
assert not parser.module.has_explicit_absolute_import assert not parser.module.has_explicit_absolute_import
@@ -30,7 +30,7 @@ def test_dont_break_imports_without_namespaces():
assume that all imports have non-``None`` namespaces. assume that all imports have non-``None`` namespaces.
""" """
src = u("from __future__ import absolute_import\nimport xyzzy") src = u("from __future__ import absolute_import\nimport xyzzy")
parser = Parser(load_grammar(), src, "test.py") parser = ParserWithRecovery(load_grammar(), src, "test.py")
assert parser.module.has_explicit_absolute_import assert parser.module.has_explicit_absolute_import

View File

@@ -35,3 +35,20 @@ def test_simple_annotations():
annot('')""") annot('')""")
assert [d.name for d in jedi.Script(source, ).goto_definitions()] == ['int'] assert [d.name for d in jedi.Script(source, ).goto_definitions()] == ['int']
@pytest.mark.skipif('sys.version_info[0] < 3')
@pytest.mark.parametrize('reference', [
'assert 1',
'1',
'lambda: 3',
'def x(): pass',
'1, 2',
r'1\n'
])
def test_illegal_forward_references(reference):
source = """
def foo(bar: "%s"):
bar""" % reference
assert not jedi.Script(source).goto_definitions()

View File

@@ -7,7 +7,7 @@ from jedi.evaluate.sys_path import (_get_parent_dir_with_file,
sys_path_with_modifications, sys_path_with_modifications,
_check_module) _check_module)
from jedi.evaluate import Evaluator from jedi.evaluate import Evaluator
from jedi.parser import Parser, load_grammar from jedi.parser import ParserWithRecovery, load_grammar
from ..helpers import cwd_at from ..helpers import cwd_at
@@ -37,7 +37,7 @@ def test_append_on_non_sys_path():
d = Dummy() d = Dummy()
d.path.append('foo')""")) d.path.append('foo')"""))
grammar = load_grammar() grammar = load_grammar()
p = Parser(grammar, SRC) p = ParserWithRecovery(grammar, SRC)
paths = _check_module(Evaluator(grammar), p.module) paths = _check_module(Evaluator(grammar), p.module)
assert len(paths) > 0 assert len(paths) > 0
assert 'foo' not in paths assert 'foo' not in paths
@@ -48,7 +48,7 @@ def test_path_from_invalid_sys_path_assignment():
import sys import sys
sys.path = 'invalid'""")) sys.path = 'invalid'"""))
grammar = load_grammar() grammar = load_grammar()
p = Parser(grammar, SRC) p = ParserWithRecovery(grammar, SRC)
paths = _check_module(Evaluator(grammar), p.module) paths = _check_module(Evaluator(grammar), p.module)
assert len(paths) > 0 assert len(paths) > 0
assert 'invalid' not in paths assert 'invalid' not in paths
@@ -60,7 +60,7 @@ def test_sys_path_with_modifications():
import os import os
""")) """))
grammar = load_grammar() grammar = load_grammar()
p = Parser(grammar, SRC) p = ParserWithRecovery(grammar, SRC)
p.module.path = os.path.abspath(os.path.join(os.curdir, 'module_name.py')) p.module.path = os.path.abspath(os.path.join(os.curdir, 'module_name.py'))
paths = sys_path_with_modifications(Evaluator(grammar), p.module) paths = sys_path_with_modifications(Evaluator(grammar), p.module)
assert '/tmp/.buildout/eggs/important_package.egg' in paths assert '/tmp/.buildout/eggs/important_package.egg' in paths
@@ -83,7 +83,7 @@ def test_path_from_sys_path_assignment():
if __name__ == '__main__': if __name__ == '__main__':
sys.exit(important_package.main())""")) sys.exit(important_package.main())"""))
grammar = load_grammar() grammar = load_grammar()
p = Parser(grammar, SRC) p = ParserWithRecovery(grammar, SRC)
paths = _check_module(Evaluator(grammar), p.module) paths = _check_module(Evaluator(grammar), p.module)
assert 1 not in paths assert 1 not in paths
assert '/home/test/.buildout/eggs/important_package.egg' in paths assert '/home/test/.buildout/eggs/important_package.egg' in paths

View File

@@ -5,14 +5,14 @@ import sys
import pytest import pytest
from jedi._compatibility import unicode from jedi._compatibility import unicode
from jedi.parser import Parser, load_grammar from jedi.parser import ParserWithRecovery, load_grammar
from jedi.evaluate import sys_path, Evaluator from jedi.evaluate import sys_path, Evaluator
def test_paths_from_assignment(): def test_paths_from_assignment():
def paths(src): def paths(src):
grammar = load_grammar() grammar = load_grammar()
stmt = Parser(grammar, unicode(src)).module.statements[0] stmt = ParserWithRecovery(grammar, unicode(src)).module.statements[0]
return set(sys_path._paths_from_assignment(Evaluator(grammar), stmt)) return set(sys_path._paths_from_assignment(Evaluator(grammar), stmt))
assert paths('sys.path[0:0] = ["a"]') == set(['a']) assert paths('sys.path[0:0] = ["a"]') == set(['a'])

View File

@@ -1,11 +1,11 @@
from jedi._compatibility import u from jedi._compatibility import u
from jedi.parser import Parser, load_grammar from jedi.parser import ParserWithRecovery, load_grammar
def test_basic_parsing(): def test_basic_parsing():
def compare(string): def compare(string):
"""Generates the AST object and then regenerates the code.""" """Generates the AST object and then regenerates the code."""
assert Parser(load_grammar(), string).module.get_code() == string assert ParserWithRecovery(load_grammar(), string).module.get_code() == string
compare(u('\na #pass\n')) compare(u('\na #pass\n'))
compare(u('wblabla* 1\t\n')) compare(u('wblabla* 1\t\n'))

View File

@@ -3,7 +3,7 @@ import difflib
import pytest import pytest
from jedi._compatibility import u from jedi._compatibility import u
from jedi.parser import Parser, load_grammar from jedi.parser import ParserWithRecovery, load_grammar
code_basic_features = u(''' code_basic_features = u('''
"""A mod docstring""" """A mod docstring"""
@@ -44,7 +44,7 @@ def diff_code_assert(a, b, n=4):
def test_basic_parsing(): def test_basic_parsing():
"""Validate the parsing features""" """Validate the parsing features"""
prs = Parser(load_grammar(), code_basic_features) prs = ParserWithRecovery(load_grammar(), code_basic_features)
diff_code_assert( diff_code_assert(
code_basic_features, code_basic_features,
prs.module.get_code() prs.module.get_code()
@@ -53,7 +53,7 @@ def test_basic_parsing():
def test_operators(): def test_operators():
src = u('5 * 3') src = u('5 * 3')
prs = Parser(load_grammar(), src) prs = ParserWithRecovery(load_grammar(), src)
diff_code_assert(src, prs.module.get_code()) diff_code_assert(src, prs.module.get_code())
@@ -82,7 +82,7 @@ def method_with_docstring():
"""class docstr""" """class docstr"""
pass pass
''') ''')
assert Parser(load_grammar(), s).module.get_code() == s assert ParserWithRecovery(load_grammar(), s).module.get_code() == s
def test_end_newlines(): def test_end_newlines():
@@ -92,7 +92,7 @@ def test_end_newlines():
line the parser needs. line the parser needs.
""" """
def test(source, end_pos): def test(source, end_pos):
module = Parser(load_grammar(), u(source)).module module = ParserWithRecovery(load_grammar(), u(source)).module
assert module.get_code() == source assert module.get_code() == source
assert module.end_pos == end_pos assert module.end_pos == end_pos

View File

@@ -3,7 +3,7 @@ import sys
import jedi import jedi
from jedi._compatibility import u, is_py3 from jedi._compatibility import u, is_py3
from jedi.parser import Parser, load_grammar from jedi.parser import ParserWithRecovery, load_grammar
from jedi.parser.user_context import UserContextParser from jedi.parser.user_context import UserContextParser
from jedi.parser import tree as pt from jedi.parser import tree as pt
from textwrap import dedent from textwrap import dedent
@@ -23,7 +23,7 @@ def test_user_statement_on_import():
class TestCallAndName(): class TestCallAndName():
def get_call(self, source): def get_call(self, source):
# Get the simple_stmt and then the first one. # Get the simple_stmt and then the first one.
simple_stmt = Parser(load_grammar(), u(source)).module.children[0] simple_stmt = ParserWithRecovery(load_grammar(), u(source)).module.children[0]
return simple_stmt.children[0] return simple_stmt.children[0]
def test_name_and_call_positions(self): def test_name_and_call_positions(self):
@@ -58,7 +58,7 @@ class TestCallAndName():
class TestSubscopes(): class TestSubscopes():
def get_sub(self, source): def get_sub(self, source):
return Parser(load_grammar(), u(source)).module.subscopes[0] return ParserWithRecovery(load_grammar(), u(source)).module.subscopes[0]
def test_subscope_names(self): def test_subscope_names(self):
name = self.get_sub('class Foo: pass').name name = self.get_sub('class Foo: pass').name
@@ -74,7 +74,7 @@ class TestSubscopes():
class TestImports(): class TestImports():
def get_import(self, source): def get_import(self, source):
return Parser(load_grammar(), source).module.imports[0] return ParserWithRecovery(load_grammar(), source).module.imports[0]
def test_import_names(self): def test_import_names(self):
imp = self.get_import(u('import math\n')) imp = self.get_import(u('import math\n'))
@@ -89,13 +89,13 @@ class TestImports():
def test_module(): def test_module():
module = Parser(load_grammar(), u('asdf'), 'example.py').module module = ParserWithRecovery(load_grammar(), u('asdf'), 'example.py').module
name = module.name name = module.name
assert str(name) == 'example' assert str(name) == 'example'
assert name.start_pos == (1, 0) assert name.start_pos == (1, 0)
assert name.end_pos == (1, 7) assert name.end_pos == (1, 7)
module = Parser(load_grammar(), u('asdf')).module module = ParserWithRecovery(load_grammar(), u('asdf')).module
name = module.name name = module.name
assert str(name) == '' assert str(name) == ''
assert name.start_pos == (1, 0) assert name.start_pos == (1, 0)
@@ -108,7 +108,7 @@ def test_end_pos():
def func(): def func():
y = None y = None
''')) '''))
parser = Parser(load_grammar(), s) parser = ParserWithRecovery(load_grammar(), s)
scope = parser.module.subscopes[0] scope = parser.module.subscopes[0]
assert scope.start_pos == (3, 0) assert scope.start_pos == (3, 0)
assert scope.end_pos == (5, 0) assert scope.end_pos == (5, 0)
@@ -121,7 +121,7 @@ def test_carriage_return_statements():
# this is a namespace package # this is a namespace package
''')) '''))
source = source.replace('\n', '\r\n') source = source.replace('\n', '\r\n')
stmt = Parser(load_grammar(), source).module.statements[0] stmt = ParserWithRecovery(load_grammar(), source).module.statements[0]
assert '#' not in stmt.get_code() assert '#' not in stmt.get_code()
@@ -129,7 +129,7 @@ def test_incomplete_list_comprehension():
""" Shouldn't raise an error, same bug as #418. """ """ Shouldn't raise an error, same bug as #418. """
# With the old parser this actually returned a statement. With the new # With the old parser this actually returned a statement. With the new
# parser only valid statements generate one. # parser only valid statements generate one.
assert Parser(load_grammar(), u('(1 for def')).module.statements == [] assert ParserWithRecovery(load_grammar(), u('(1 for def')).module.statements == []
def test_hex_values_in_docstring(): def test_hex_values_in_docstring():
@@ -141,7 +141,7 @@ def test_hex_values_in_docstring():
return 1 return 1
''' '''
doc = Parser(load_grammar(), dedent(u(source))).module.subscopes[0].raw_doc doc = ParserWithRecovery(load_grammar(), dedent(u(source))).module.subscopes[0].raw_doc
if is_py3: if is_py3:
assert doc == '\xff' assert doc == '\xff'
else: else:
@@ -160,7 +160,7 @@ def test_error_correction_with():
def test_newline_positions(): def test_newline_positions():
endmarker = Parser(load_grammar(), u('a\n')).module.children[-1] endmarker = ParserWithRecovery(load_grammar(), u('a\n')).module.children[-1]
assert endmarker.end_pos == (2, 0) assert endmarker.end_pos == (2, 0)
new_line = endmarker.get_previous() new_line = endmarker.get_previous()
assert new_line.start_pos == (1, 1) assert new_line.start_pos == (1, 1)
@@ -174,7 +174,7 @@ def test_end_pos_error_correction():
end_pos, even if something breaks in the parser (error correction). end_pos, even if something breaks in the parser (error correction).
""" """
s = u('def x():\n .') s = u('def x():\n .')
m = Parser(load_grammar(), s).module m = ParserWithRecovery(load_grammar(), s).module
func = m.children[0] func = m.children[0]
assert func.type == 'funcdef' assert func.type == 'funcdef'
# This is not exactly correct, but ok, because it doesn't make a difference # This is not exactly correct, but ok, because it doesn't make a difference
@@ -191,7 +191,7 @@ def test_param_splitting():
def check(src, result): def check(src, result):
# Python 2 tuple params should be ignored for now. # Python 2 tuple params should be ignored for now.
grammar = load_grammar('grammar%s.%s' % sys.version_info[:2]) grammar = load_grammar('grammar%s.%s' % sys.version_info[:2])
m = Parser(grammar, u(src)).module m = ParserWithRecovery(grammar, u(src)).module
if is_py3: if is_py3:
assert not m.subscopes assert not m.subscopes
else: else:
@@ -211,5 +211,5 @@ def test_unicode_string():
def test_backslash_dos_style(): def test_backslash_dos_style():
grammar = load_grammar() grammar = load_grammar()
m = Parser(grammar, u('\\\r\n')).module m = ParserWithRecovery(grammar, u('\\\r\n')).module
assert m assert m

View File

@@ -5,7 +5,7 @@ from textwrap import dedent
import pytest import pytest
from jedi._compatibility import u, unicode from jedi._compatibility import u, unicode
from jedi.parser import Parser, load_grammar from jedi.parser import ParserWithRecovery, load_grammar
from jedi.parser import tree as pt from jedi.parser import tree as pt
@@ -27,7 +27,7 @@ class TestsFunctionAndLambdaParsing(object):
@pytest.fixture(params=FIXTURES) @pytest.fixture(params=FIXTURES)
def node(self, request): def node(self, request):
parsed = Parser(load_grammar(), dedent(u(request.param[0]))) parsed = ParserWithRecovery(load_grammar(), dedent(u(request.param[0])))
request.keywords['expected'] = request.param[1] request.keywords['expected'] = request.param[1]
return parsed.module.subscopes[0] return parsed.module.subscopes[0]

View File

@@ -7,7 +7,7 @@ import pytest
from jedi._compatibility import u, is_py3 from jedi._compatibility import u, is_py3
from jedi.parser.token import NAME, OP, NEWLINE, STRING, INDENT from jedi.parser.token import NAME, OP, NEWLINE, STRING, INDENT
from jedi.parser import Parser, load_grammar, tokenize from jedi.parser import ParserWithRecovery, load_grammar, tokenize
from ..helpers import unittest from ..helpers import unittest
@@ -15,7 +15,7 @@ from ..helpers import unittest
class TokenTest(unittest.TestCase): class TokenTest(unittest.TestCase):
def test_end_pos_one_line(self): def test_end_pos_one_line(self):
parsed = Parser(load_grammar(), dedent(u(''' parsed = ParserWithRecovery(load_grammar(), dedent(u('''
def testit(): def testit():
a = "huhu" a = "huhu"
'''))) ''')))
@@ -23,7 +23,7 @@ class TokenTest(unittest.TestCase):
assert tok.end_pos == (3, 14) assert tok.end_pos == (3, 14)
def test_end_pos_multi_line(self): def test_end_pos_multi_line(self):
parsed = Parser(load_grammar(), dedent(u(''' parsed = ParserWithRecovery(load_grammar(), dedent(u('''
def testit(): def testit():
a = """huhu a = """huhu
asdfasdf""" + "h" asdfasdf""" + "h"
@@ -108,7 +108,7 @@ class TokenTest(unittest.TestCase):
] ]
for s in string_tokens: for s in string_tokens:
parsed = Parser(load_grammar(), u('''a = %s\n''' % s)) parsed = ParserWithRecovery(load_grammar(), u('''a = %s\n''' % s))
simple_stmt = parsed.module.children[0] simple_stmt = parsed.module.children[0]
expr_stmt = simple_stmt.children[0] expr_stmt = simple_stmt.children[0]
assert len(expr_stmt.children) == 3 assert len(expr_stmt.children) == 3

View File

@@ -15,7 +15,7 @@ from jedi._compatibility import u
from jedi import Script from jedi import Script
from jedi import api from jedi import api
from jedi.evaluate import imports from jedi.evaluate import imports
from jedi.parser import Parser, load_grammar from jedi.parser import ParserWithRecovery, load_grammar
#jedi.set_debug_function() #jedi.set_debug_function()
@@ -102,7 +102,7 @@ class TestRegression(TestCase):
def test_end_pos_line(self): def test_end_pos_line(self):
# jedi issue #150 # jedi issue #150
s = u("x()\nx( )\nx( )\nx ( )") s = u("x()\nx( )\nx( )\nx ( )")
parser = Parser(load_grammar(), s) parser = ParserWithRecovery(load_grammar(), s)
for i, s in enumerate(parser.module.statements): for i, s in enumerate(parser.module.statements):
assert s.end_pos == (i + 1, i + 3) assert s.end_pos == (i + 1, i + 3)