mirror of
https://github.com/davidhalter/jedi.git
synced 2025-12-07 14:34:31 +08:00
parsing refactorings, because the errors were not catched the right way
This commit is contained in:
259
parsing.py
259
parsing.py
@@ -1122,7 +1122,6 @@ class PyFuzzyParser(object):
|
||||
|
||||
# Stuff to fix tokenize errors. The parser is pretty good in tolerating
|
||||
# any errors of tokenize and just parse ahead.
|
||||
self._tokenize_start_pos = 0
|
||||
self._line_of_tokenize_restart = 0
|
||||
|
||||
self.parse()
|
||||
@@ -1384,114 +1383,123 @@ class PyFuzzyParser(object):
|
||||
|
||||
tok_list = []
|
||||
while not (tok in always_break or tok in breaks and level <= 0):
|
||||
set_string = None
|
||||
#print 'parse_stmt', tok, tokenize.tok_name[token_type]
|
||||
tok_list.append(self.current + (self.start_pos,))
|
||||
if tok == 'as':
|
||||
string += " %s " % tok
|
||||
token_type, tok = self.next()
|
||||
if token_type == tokenize.NAME:
|
||||
n, token_type, tok = self._parsedotname(self.current)
|
||||
if n:
|
||||
set_vars.append(n)
|
||||
tok_list.append(n)
|
||||
string += ".".join(n.names)
|
||||
continue
|
||||
elif token_type == tokenize.NAME:
|
||||
if tok in ['return', 'yield', 'del', 'raise', 'assert']:
|
||||
if len(tok_list) > 1:
|
||||
# this happens, when a statement has opening brackets,
|
||||
# which are not closed again, here I just start a new
|
||||
# statement. This is a hack, but I could not come up
|
||||
# with a better solution.
|
||||
# This is basically a reset of the statement.
|
||||
debug.warning('keyword in statement %s@%s', tok_list,
|
||||
self.start_pos[0])
|
||||
tok_list = [self.current + (self.start_pos,)]
|
||||
set_vars = []
|
||||
used_funcs = []
|
||||
used_vars = []
|
||||
level = 0
|
||||
set_string = tok + ' '
|
||||
if tok in ['return', 'yield']:
|
||||
is_return = tok
|
||||
elif tok == 'for':
|
||||
# list comprehensions!
|
||||
middle, tok = self._parse_statement(added_breaks=['in'])
|
||||
if tok != 'in' or middle is None:
|
||||
if middle is None:
|
||||
level -= 1
|
||||
debug.warning('list comprehension formatting @%s' %
|
||||
self.start_pos[0])
|
||||
continue
|
||||
|
||||
b = [')', ']']
|
||||
in_clause, tok = self._parse_statement(added_breaks=b,
|
||||
list_comp=True)
|
||||
if tok not in b or in_clause is None:
|
||||
if in_clause is None:
|
||||
self.gen.push_back(self._current_full)
|
||||
debug.warning('list comprehension in_clause %s@%s' %
|
||||
(tok, self.start_pos[0]))
|
||||
continue
|
||||
other_level = 0
|
||||
|
||||
for i, tok in enumerate(reversed(tok_list)):
|
||||
if not isinstance(tok, Name):
|
||||
tok = tok[1]
|
||||
if tok in closing_brackets:
|
||||
other_level -= 1
|
||||
elif tok in opening_brackets:
|
||||
other_level += 1
|
||||
if other_level > 0:
|
||||
break
|
||||
else:
|
||||
i = 0 # could not detect brackets -> nested list comp
|
||||
|
||||
tok_list, toks = tok_list[:-i], tok_list[-i:-1]
|
||||
src = ''
|
||||
for t in toks:
|
||||
src += t[1] if isinstance(t, tuple) else t.get_code()
|
||||
st = Statement(src, [], [], [], \
|
||||
toks, first_pos, self.end_pos)
|
||||
|
||||
for s in [st, middle, in_clause]:
|
||||
s.parent = weakref.ref(self.scope)
|
||||
tok = ListComprehension(st, middle, in_clause)
|
||||
tok_list.append(tok)
|
||||
if list_comp:
|
||||
string = ''
|
||||
string += tok.get_code()
|
||||
continue
|
||||
else:
|
||||
n, token_type, tok = self._parsedotname(self.current)
|
||||
tok_list.pop() # removed last entry, because we add Name
|
||||
if n:
|
||||
try:
|
||||
set_string = None
|
||||
#print 'parse_stmt', tok, tokenize.tok_name[token_type]
|
||||
tok_list.append(self.current + (self.start_pos,))
|
||||
if tok == 'as':
|
||||
string += " %s " % tok
|
||||
token_type, tok = self.next()
|
||||
if token_type == tokenize.NAME:
|
||||
n, token_type, tok = self._parsedotname(self.current)
|
||||
if n:
|
||||
set_vars.append(n)
|
||||
tok_list.append(n)
|
||||
if tok == '(':
|
||||
# it must be a function
|
||||
used_funcs.append(n)
|
||||
else:
|
||||
used_vars.append(n)
|
||||
if string and re.match(r'[\w\d\'"]', string[-1]):
|
||||
string += ' '
|
||||
string += ".".join(n.names)
|
||||
continue
|
||||
elif '=' in tok and not tok in ['>=', '<=', '==', '!=']:
|
||||
# there has been an assignement -> change vars
|
||||
if level == 0:
|
||||
set_vars = used_vars
|
||||
used_vars = []
|
||||
elif tok in opening_brackets:
|
||||
level += 1
|
||||
elif tok in closing_brackets:
|
||||
level -= 1
|
||||
elif token_type == tokenize.NAME:
|
||||
if tok in ['return', 'yield', 'del', 'raise', 'assert']:
|
||||
if len(tok_list) > 1:
|
||||
# this happens, when a statement has opening
|
||||
# brackets, which are not closed again, here I just
|
||||
# start a new statement. This is a hack, but I
|
||||
# could not come up with a better solution.
|
||||
# This is basically a reset of the statement.
|
||||
debug.warning('keyword in statement %s@%s',
|
||||
tok_list, self.start_pos[0])
|
||||
tok_list = [self.current + (self.start_pos,)]
|
||||
set_vars = []
|
||||
used_funcs = []
|
||||
used_vars = []
|
||||
level = 0
|
||||
set_string = tok + ' '
|
||||
if tok in ['return', 'yield']:
|
||||
is_return = tok
|
||||
elif tok == 'for':
|
||||
# list comprehensions!
|
||||
middle, tok = self._parse_statement(
|
||||
added_breaks=['in'])
|
||||
if tok != 'in' or middle is None:
|
||||
if middle is None:
|
||||
level -= 1
|
||||
debug.warning('list comprehension formatting @%s' %
|
||||
self.start_pos[0])
|
||||
continue
|
||||
|
||||
b = [')', ']']
|
||||
in_clause, tok = self._parse_statement(added_breaks=b,
|
||||
list_comp=True)
|
||||
if tok not in b or in_clause is None:
|
||||
if in_clause is None:
|
||||
self.gen.push_back(self._current_full)
|
||||
debug.warning('list comprehension in_clause %s@%s'
|
||||
% (tok, self.start_pos[0]))
|
||||
continue
|
||||
other_level = 0
|
||||
|
||||
for i, tok in enumerate(reversed(tok_list)):
|
||||
if not isinstance(tok, Name):
|
||||
tok = tok[1]
|
||||
if tok in closing_brackets:
|
||||
other_level -= 1
|
||||
elif tok in opening_brackets:
|
||||
other_level += 1
|
||||
if other_level > 0:
|
||||
break
|
||||
else:
|
||||
# could not detect brackets -> nested list comp
|
||||
i = 0
|
||||
|
||||
tok_list, toks = tok_list[:-i], tok_list[-i:-1]
|
||||
src = ''
|
||||
for t in toks:
|
||||
src += t[1] if isinstance(t, tuple) \
|
||||
else t.get_code()
|
||||
st = Statement(src, [], [], [], \
|
||||
toks, first_pos, self.end_pos)
|
||||
|
||||
for s in [st, middle, in_clause]:
|
||||
s.parent = weakref.ref(self.scope)
|
||||
tok = ListComprehension(st, middle, in_clause)
|
||||
tok_list.append(tok)
|
||||
if list_comp:
|
||||
string = ''
|
||||
string += tok.get_code()
|
||||
continue
|
||||
else:
|
||||
n, token_type, tok = self._parsedotname(self.current)
|
||||
# removed last entry, because we add Name
|
||||
tok_list.pop()
|
||||
if n:
|
||||
tok_list.append(n)
|
||||
if tok == '(':
|
||||
# it must be a function
|
||||
used_funcs.append(n)
|
||||
else:
|
||||
used_vars.append(n)
|
||||
if string and re.match(r'[\w\d\'"]', string[-1]):
|
||||
string += ' '
|
||||
string += ".".join(n.names)
|
||||
continue
|
||||
elif '=' in tok and not tok in ['>=', '<=', '==', '!=']:
|
||||
# there has been an assignement -> change vars
|
||||
if level == 0:
|
||||
set_vars = used_vars
|
||||
used_vars = []
|
||||
elif tok in opening_brackets:
|
||||
level += 1
|
||||
elif tok in closing_brackets:
|
||||
level -= 1
|
||||
|
||||
if set_string is not None:
|
||||
string = set_string
|
||||
else:
|
||||
string += tok
|
||||
token_type, tok = self.next()
|
||||
except StopIteration:
|
||||
# comes from tokenizer
|
||||
break
|
||||
|
||||
if set_string is not None:
|
||||
string = set_string
|
||||
else:
|
||||
string += tok
|
||||
token_type, tok = self.next()
|
||||
if not string:
|
||||
return None, tok
|
||||
#print 'new_stat', string, set_vars, used_funcs, used_vars
|
||||
@@ -1526,7 +1534,24 @@ class PyFuzzyParser(object):
|
||||
|
||||
def next(self):
|
||||
""" Generate the next tokenize pattern. """
|
||||
self._current_full = next(self.gen)
|
||||
try:
|
||||
self._current_full = next(self.gen)
|
||||
except tokenize.TokenError:
|
||||
# We just ignore this error, I try to handle it earlier - as
|
||||
# good as possible
|
||||
debug.warning('parentheses not closed error')
|
||||
except IndentationError:
|
||||
# This is an error, that tokenize may produce, because the code
|
||||
# is not indented as it should. Here it just ignores this line
|
||||
# and restarts the parser.
|
||||
# (This is a rather unlikely error message, for normal code,
|
||||
# tokenize seems to be pretty tolerant)
|
||||
debug.warning('indentation error on line %s, ignoring it' %
|
||||
(self.start_pos[0]))
|
||||
self._line_of_tokenize_restart = self.start_pos[0] + 1
|
||||
self.gen = PushBackIterator(tokenize_func(self.buf.readline))
|
||||
return self.next()
|
||||
|
||||
type, tok, self._tokenize_start_pos, self._tokenize_end_pos, \
|
||||
self.parserline = self._current_full
|
||||
if self.user_position and (self.start_pos[0] == self.user_position[0]
|
||||
@@ -1550,8 +1575,8 @@ class PyFuzzyParser(object):
|
||||
|
||||
:raises: IndentationError
|
||||
"""
|
||||
buf = BytesIO(self.code)
|
||||
self.gen = PushBackIterator(tokenize_func(buf.readline))
|
||||
self.buf = BytesIO(self.code)
|
||||
self.gen = PushBackIterator(tokenize_func(self.buf.readline))
|
||||
|
||||
extended_flow = ['else', 'elif', 'except', 'finally']
|
||||
statement_toks = ['{', '[', '(', '`']
|
||||
@@ -1725,20 +1750,6 @@ class PyFuzzyParser(object):
|
||||
self.start_pos[0])
|
||||
except StopIteration: # thrown on EOF
|
||||
break
|
||||
except tokenize.TokenError:
|
||||
# We just ignore this error, I try to handle it earlier - as
|
||||
# good as possible
|
||||
debug.warning('parentheses not closed error')
|
||||
except IndentationError:
|
||||
# This is an error, that tokenize may produce, because the code
|
||||
# is not indented as it should. Here it just ignores this line
|
||||
# and restarts the parser.
|
||||
# (This is a rather unlikely error message, for normal code,
|
||||
# tokenize seems to be pretty tolerant)
|
||||
self._line_of_tokenize_restart = self.start_pos[0] + 1
|
||||
self._tokenize_start_pos = (0, 0)
|
||||
self._tokenize_end_pos = (0, 0)
|
||||
debug.warning('indentation error on line %s, ignoring it' %
|
||||
(self.start_pos[0]))
|
||||
self.gen = PushBackIterator(tokenize_func(buf.readline))
|
||||
|
||||
del self.buf
|
||||
return self.module
|
||||
|
||||
Reference in New Issue
Block a user