mirror of
https://github.com/davidhalter/jedi.git
synced 2025-12-11 08:11:50 +08:00
parsing refactorings, because the errors were not catched the right way
This commit is contained in:
73
parsing.py
73
parsing.py
@@ -1122,7 +1122,6 @@ class PyFuzzyParser(object):
|
|||||||
|
|
||||||
# Stuff to fix tokenize errors. The parser is pretty good in tolerating
|
# Stuff to fix tokenize errors. The parser is pretty good in tolerating
|
||||||
# any errors of tokenize and just parse ahead.
|
# any errors of tokenize and just parse ahead.
|
||||||
self._tokenize_start_pos = 0
|
|
||||||
self._line_of_tokenize_restart = 0
|
self._line_of_tokenize_restart = 0
|
||||||
|
|
||||||
self.parse()
|
self.parse()
|
||||||
@@ -1384,6 +1383,7 @@ class PyFuzzyParser(object):
|
|||||||
|
|
||||||
tok_list = []
|
tok_list = []
|
||||||
while not (tok in always_break or tok in breaks and level <= 0):
|
while not (tok in always_break or tok in breaks and level <= 0):
|
||||||
|
try:
|
||||||
set_string = None
|
set_string = None
|
||||||
#print 'parse_stmt', tok, tokenize.tok_name[token_type]
|
#print 'parse_stmt', tok, tokenize.tok_name[token_type]
|
||||||
tok_list.append(self.current + (self.start_pos,))
|
tok_list.append(self.current + (self.start_pos,))
|
||||||
@@ -1400,13 +1400,13 @@ class PyFuzzyParser(object):
|
|||||||
elif token_type == tokenize.NAME:
|
elif token_type == tokenize.NAME:
|
||||||
if tok in ['return', 'yield', 'del', 'raise', 'assert']:
|
if tok in ['return', 'yield', 'del', 'raise', 'assert']:
|
||||||
if len(tok_list) > 1:
|
if len(tok_list) > 1:
|
||||||
# this happens, when a statement has opening brackets,
|
# this happens, when a statement has opening
|
||||||
# which are not closed again, here I just start a new
|
# brackets, which are not closed again, here I just
|
||||||
# statement. This is a hack, but I could not come up
|
# start a new statement. This is a hack, but I
|
||||||
# with a better solution.
|
# could not come up with a better solution.
|
||||||
# This is basically a reset of the statement.
|
# This is basically a reset of the statement.
|
||||||
debug.warning('keyword in statement %s@%s', tok_list,
|
debug.warning('keyword in statement %s@%s',
|
||||||
self.start_pos[0])
|
tok_list, self.start_pos[0])
|
||||||
tok_list = [self.current + (self.start_pos,)]
|
tok_list = [self.current + (self.start_pos,)]
|
||||||
set_vars = []
|
set_vars = []
|
||||||
used_funcs = []
|
used_funcs = []
|
||||||
@@ -1417,7 +1417,8 @@ class PyFuzzyParser(object):
|
|||||||
is_return = tok
|
is_return = tok
|
||||||
elif tok == 'for':
|
elif tok == 'for':
|
||||||
# list comprehensions!
|
# list comprehensions!
|
||||||
middle, tok = self._parse_statement(added_breaks=['in'])
|
middle, tok = self._parse_statement(
|
||||||
|
added_breaks=['in'])
|
||||||
if tok != 'in' or middle is None:
|
if tok != 'in' or middle is None:
|
||||||
if middle is None:
|
if middle is None:
|
||||||
level -= 1
|
level -= 1
|
||||||
@@ -1431,8 +1432,8 @@ class PyFuzzyParser(object):
|
|||||||
if tok not in b or in_clause is None:
|
if tok not in b or in_clause is None:
|
||||||
if in_clause is None:
|
if in_clause is None:
|
||||||
self.gen.push_back(self._current_full)
|
self.gen.push_back(self._current_full)
|
||||||
debug.warning('list comprehension in_clause %s@%s' %
|
debug.warning('list comprehension in_clause %s@%s'
|
||||||
(tok, self.start_pos[0]))
|
% (tok, self.start_pos[0]))
|
||||||
continue
|
continue
|
||||||
other_level = 0
|
other_level = 0
|
||||||
|
|
||||||
@@ -1446,12 +1447,14 @@ class PyFuzzyParser(object):
|
|||||||
if other_level > 0:
|
if other_level > 0:
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
i = 0 # could not detect brackets -> nested list comp
|
# could not detect brackets -> nested list comp
|
||||||
|
i = 0
|
||||||
|
|
||||||
tok_list, toks = tok_list[:-i], tok_list[-i:-1]
|
tok_list, toks = tok_list[:-i], tok_list[-i:-1]
|
||||||
src = ''
|
src = ''
|
||||||
for t in toks:
|
for t in toks:
|
||||||
src += t[1] if isinstance(t, tuple) else t.get_code()
|
src += t[1] if isinstance(t, tuple) \
|
||||||
|
else t.get_code()
|
||||||
st = Statement(src, [], [], [], \
|
st = Statement(src, [], [], [], \
|
||||||
toks, first_pos, self.end_pos)
|
toks, first_pos, self.end_pos)
|
||||||
|
|
||||||
@@ -1465,7 +1468,8 @@ class PyFuzzyParser(object):
|
|||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
n, token_type, tok = self._parsedotname(self.current)
|
n, token_type, tok = self._parsedotname(self.current)
|
||||||
tok_list.pop() # removed last entry, because we add Name
|
# removed last entry, because we add Name
|
||||||
|
tok_list.pop()
|
||||||
if n:
|
if n:
|
||||||
tok_list.append(n)
|
tok_list.append(n)
|
||||||
if tok == '(':
|
if tok == '(':
|
||||||
@@ -1492,6 +1496,10 @@ class PyFuzzyParser(object):
|
|||||||
else:
|
else:
|
||||||
string += tok
|
string += tok
|
||||||
token_type, tok = self.next()
|
token_type, tok = self.next()
|
||||||
|
except StopIteration:
|
||||||
|
# comes from tokenizer
|
||||||
|
break
|
||||||
|
|
||||||
if not string:
|
if not string:
|
||||||
return None, tok
|
return None, tok
|
||||||
#print 'new_stat', string, set_vars, used_funcs, used_vars
|
#print 'new_stat', string, set_vars, used_funcs, used_vars
|
||||||
@@ -1526,7 +1534,24 @@ class PyFuzzyParser(object):
|
|||||||
|
|
||||||
def next(self):
|
def next(self):
|
||||||
""" Generate the next tokenize pattern. """
|
""" Generate the next tokenize pattern. """
|
||||||
|
try:
|
||||||
self._current_full = next(self.gen)
|
self._current_full = next(self.gen)
|
||||||
|
except tokenize.TokenError:
|
||||||
|
# We just ignore this error, I try to handle it earlier - as
|
||||||
|
# good as possible
|
||||||
|
debug.warning('parentheses not closed error')
|
||||||
|
except IndentationError:
|
||||||
|
# This is an error, that tokenize may produce, because the code
|
||||||
|
# is not indented as it should. Here it just ignores this line
|
||||||
|
# and restarts the parser.
|
||||||
|
# (This is a rather unlikely error message, for normal code,
|
||||||
|
# tokenize seems to be pretty tolerant)
|
||||||
|
debug.warning('indentation error on line %s, ignoring it' %
|
||||||
|
(self.start_pos[0]))
|
||||||
|
self._line_of_tokenize_restart = self.start_pos[0] + 1
|
||||||
|
self.gen = PushBackIterator(tokenize_func(self.buf.readline))
|
||||||
|
return self.next()
|
||||||
|
|
||||||
type, tok, self._tokenize_start_pos, self._tokenize_end_pos, \
|
type, tok, self._tokenize_start_pos, self._tokenize_end_pos, \
|
||||||
self.parserline = self._current_full
|
self.parserline = self._current_full
|
||||||
if self.user_position and (self.start_pos[0] == self.user_position[0]
|
if self.user_position and (self.start_pos[0] == self.user_position[0]
|
||||||
@@ -1550,8 +1575,8 @@ class PyFuzzyParser(object):
|
|||||||
|
|
||||||
:raises: IndentationError
|
:raises: IndentationError
|
||||||
"""
|
"""
|
||||||
buf = BytesIO(self.code)
|
self.buf = BytesIO(self.code)
|
||||||
self.gen = PushBackIterator(tokenize_func(buf.readline))
|
self.gen = PushBackIterator(tokenize_func(self.buf.readline))
|
||||||
|
|
||||||
extended_flow = ['else', 'elif', 'except', 'finally']
|
extended_flow = ['else', 'elif', 'except', 'finally']
|
||||||
statement_toks = ['{', '[', '(', '`']
|
statement_toks = ['{', '[', '(', '`']
|
||||||
@@ -1725,20 +1750,6 @@ class PyFuzzyParser(object):
|
|||||||
self.start_pos[0])
|
self.start_pos[0])
|
||||||
except StopIteration: # thrown on EOF
|
except StopIteration: # thrown on EOF
|
||||||
break
|
break
|
||||||
except tokenize.TokenError:
|
|
||||||
# We just ignore this error, I try to handle it earlier - as
|
del self.buf
|
||||||
# good as possible
|
|
||||||
debug.warning('parentheses not closed error')
|
|
||||||
except IndentationError:
|
|
||||||
# This is an error, that tokenize may produce, because the code
|
|
||||||
# is not indented as it should. Here it just ignores this line
|
|
||||||
# and restarts the parser.
|
|
||||||
# (This is a rather unlikely error message, for normal code,
|
|
||||||
# tokenize seems to be pretty tolerant)
|
|
||||||
self._line_of_tokenize_restart = self.start_pos[0] + 1
|
|
||||||
self._tokenize_start_pos = (0, 0)
|
|
||||||
self._tokenize_end_pos = (0, 0)
|
|
||||||
debug.warning('indentation error on line %s, ignoring it' %
|
|
||||||
(self.start_pos[0]))
|
|
||||||
self.gen = PushBackIterator(tokenize_func(buf.readline))
|
|
||||||
return self.module
|
return self.module
|
||||||
|
|||||||
Reference in New Issue
Block a user