1
0
forked from VimPlug/jedi

The parser without error recovery raises an error now if he's not able to parse something.

This commit is contained in:
Dave Halter
2015-12-25 18:53:05 +01:00
parent 6bad5a924b
commit a373e34229
4 changed files with 41 additions and 34 deletions

View File

@@ -13,7 +13,7 @@ import sys
from itertools import chain from itertools import chain
from jedi._compatibility import unicode, builtins from jedi._compatibility import unicode, builtins
from jedi.parser import Parser, load_grammar from jedi.parser import Parser, load_grammar, ParseError
from jedi.parser.tokenize import source_tokens from jedi.parser.tokenize import source_tokens
from jedi.parser import tree from jedi.parser import tree
from jedi.parser.user_context import UserContext, UserContextParser from jedi.parser.user_context import UserContext, UserContextParser
@@ -322,8 +322,9 @@ class Script(object):
@memoize_default() @memoize_default()
def _get_under_cursor_stmt(self, cursor_txt, start_pos=None): def _get_under_cursor_stmt(self, cursor_txt, start_pos=None):
stmt = Parser(self._grammar, cursor_txt, 'eval_input').get_parsed_node() try:
if stmt is None: stmt = Parser(self._grammar, cursor_txt, 'eval_input').get_parsed_node()
except ParseError:
return None return None
user_stmt = self._parser.user_stmt() user_stmt = self._parser.user_stmt()

View File

@@ -20,7 +20,7 @@ x support for type hint comments `# type: (int, str) -> int`. See comment from
from itertools import chain from itertools import chain
from jedi.parser import Parser, load_grammar from jedi.parser import Parser, load_grammar, ParseError
from jedi.evaluate.cache import memoize_default from jedi.evaluate.cache import memoize_default
from jedi.evaluate.compiled import CompiledObject from jedi.evaluate.compiled import CompiledObject
from jedi import debug from jedi import debug
@@ -32,9 +32,10 @@ def _evaluate_for_annotation(evaluator, annotation):
for definition in evaluator.eval_element(annotation): for definition in evaluator.eval_element(annotation):
if (isinstance(definition, CompiledObject) and if (isinstance(definition, CompiledObject) and
isinstance(definition.obj, str)): isinstance(definition.obj, str)):
p = Parser(load_grammar(), definition.obj, start='eval_input') try:
element = p.get_parsed_node() p = Parser(load_grammar(), definition.obj, start='eval_input')
if element is None: element = p.get_parsed_node()
except ParseError:
debug.warning('Annotation not parsed: %s' % definition.obj) debug.warning('Annotation not parsed: %s' % definition.obj)
else: else:
module = annotation.get_parent_until() module = annotation.get_parent_until()

View File

@@ -35,6 +35,12 @@ STATEMENT_KEYWORDS = 'assert', 'del', 'global', 'nonlocal', 'raise', \
_loaded_grammars = {} _loaded_grammars = {}
class ParseError(Exception):
"""
Signals you that the code you fed the Parser was not correct Python code.
"""
def load_grammar(file='grammar3.4'): def load_grammar(file='grammar3.4'):
# For now we only support two different Python syntax versions: The latest # For now we only support two different Python syntax versions: The latest
# Python 3 and Python 2. This may change. # Python 3 and Python 2. This may change.
@@ -111,9 +117,6 @@ class Parser(object):
'lambdef_nocond': pt.Lambda, 'lambdef_nocond': pt.Lambda,
} }
class ParserError(Exception):
pass
def __init__(self, grammar, source, start, tokenizer=None): def __init__(self, grammar, source, start, tokenizer=None):
start_number = grammar.symbol2number[start] start_number = grammar.symbol2number[start]
@@ -136,20 +139,18 @@ class Parser(object):
self.error_recovery, start_number) self.error_recovery, start_number)
if tokenizer is None: if tokenizer is None:
tokenizer = tokenize.source_tokens(source) tokenizer = tokenize.source_tokens(source)
try:
self._parsed = p.parse(self._tokenize(tokenizer))
except Parser.ParserError:
self._parsed = None
else:
if start == 'file_input' != self._parsed.type:
# If there's only one statement, we get back a non-module. That's
# not what we want, we want a module, so we add it here:
self._parsed = self.convert_node(grammar,
grammar.symbol2number['file_input'],
[self._parsed])
if added_newline: self._parsed = p.parse(self._tokenize(tokenizer))
self.remove_last_newline()
if start == 'file_input' != self._parsed.type:
# If there's only one statement, we get back a non-module. That's
# not what we want, we want a module, so we add it here:
self._parsed = self.convert_node(grammar,
grammar.symbol2number['file_input'],
[self._parsed])
if added_newline:
self.remove_last_newline()
def get_parsed_node(self): def get_parsed_node(self):
return self._parsed return self._parsed
@@ -157,13 +158,13 @@ class Parser(object):
def _tokenize(self, tokenizer): def _tokenize(self, tokenizer):
for typ, value, start_pos, prefix in tokenizer: for typ, value, start_pos, prefix in tokenizer:
if typ == ERRORTOKEN: if typ == ERRORTOKEN:
raise Parser.ParserError raise ParseError
elif typ == OP: elif typ == OP:
typ = token.opmap[value] typ = token.opmap[value]
yield typ, value, prefix, start_pos yield typ, value, prefix, start_pos
def error_recovery(self, *args, **kwargs): def error_recovery(self, *args, **kwargs):
raise Parser.ParserError raise ParseError
def convert_node(self, grammar, type, children): def convert_node(self, grammar, type, children):
""" """

View File

@@ -18,8 +18,12 @@ how this parsing engine works.
from jedi.parser import tokenize from jedi.parser import tokenize
class ParseError(Exception): class InternalParseError(Exception):
"""Exception to signal the parser is stuck.""" """
Exception to signal the parser is stuck and error recovery didn't help.
Basically this shouldn't happen. It's a sign that something is really
wrong.
"""
def __init__(self, msg, type, value, start_pos): def __init__(self, msg, type, value, start_pos):
Exception.__init__(self, "%s: type=%r, value=%r, start_pos=%r" % Exception.__init__(self, "%s: type=%r, value=%r, start_pos=%r" %
@@ -38,7 +42,7 @@ class PgenParser(object):
p = Parser(grammar, [converter]) # create instance p = Parser(grammar, [converter]) # create instance
p.setup([start]) # prepare for parsing p.setup([start]) # prepare for parsing
<for each input token>: <for each input token>:
if p.addtoken(...): # parse a token; may raise ParseError if p.addtoken(...): # parse a token
break break
root = p.rootnode # root of abstract syntax tree root = p.rootnode # root of abstract syntax tree
@@ -53,10 +57,10 @@ class PgenParser(object):
Parsing is complete when addtoken() returns True; the root of the Parsing is complete when addtoken() returns True; the root of the
abstract syntax tree can then be retrieved from the rootnode abstract syntax tree can then be retrieved from the rootnode
instance variable. When a syntax error occurs, addtoken() raises instance variable. When a syntax error occurs, error_recovery()
the ParseError exception. There is no error recovery; the parser is called. There is no error recovery; the parser cannot be used
cannot be used after a syntax error was reported (but it can be after a syntax error was reported (but it can be reinitialized by
reinitialized by calling setup()). calling setup()).
""" """
@@ -109,7 +113,7 @@ class PgenParser(object):
start_pos, prefix, self.addtoken) start_pos, prefix, self.addtoken)
# Add the ENDMARKER again. # Add the ENDMARKER again.
if not self.addtoken(type, value, prefix, start_pos): if not self.addtoken(type, value, prefix, start_pos):
raise ParseError("incomplete input", type, value, start_pos) raise InternalParseError("incomplete input", type, value, start_pos)
return self.rootnode return self.rootnode
def addtoken(self, type, value, prefix, start_pos): def addtoken(self, type, value, prefix, start_pos):
@@ -162,7 +166,7 @@ class PgenParser(object):
self.pop() self.pop()
if not self.stack: if not self.stack:
# Done parsing, but another token is input # Done parsing, but another token is input
raise ParseError("too much input", type, value, start_pos) raise InternalParseError("too much input", type, value, start_pos)
else: else:
self.error_recovery(self.grammar, self.stack, type, self.error_recovery(self.grammar, self.stack, type,
value, start_pos, prefix, self.addtoken) value, start_pos, prefix, self.addtoken)