parsing refactorings, because the errors were not catched the right way

This commit is contained in:
David Halter
2012-09-14 21:00:57 +02:00
parent b073440369
commit fd37ae53f2

View File

@@ -1122,7 +1122,6 @@ class PyFuzzyParser(object):
# Stuff to fix tokenize errors. The parser is pretty good in tolerating # Stuff to fix tokenize errors. The parser is pretty good in tolerating
# any errors of tokenize and just parse ahead. # any errors of tokenize and just parse ahead.
self._tokenize_start_pos = 0
self._line_of_tokenize_restart = 0 self._line_of_tokenize_restart = 0
self.parse() self.parse()
@@ -1384,6 +1383,7 @@ class PyFuzzyParser(object):
tok_list = [] tok_list = []
while not (tok in always_break or tok in breaks and level <= 0): while not (tok in always_break or tok in breaks and level <= 0):
try:
set_string = None set_string = None
#print 'parse_stmt', tok, tokenize.tok_name[token_type] #print 'parse_stmt', tok, tokenize.tok_name[token_type]
tok_list.append(self.current + (self.start_pos,)) tok_list.append(self.current + (self.start_pos,))
@@ -1400,13 +1400,13 @@ class PyFuzzyParser(object):
elif token_type == tokenize.NAME: elif token_type == tokenize.NAME:
if tok in ['return', 'yield', 'del', 'raise', 'assert']: if tok in ['return', 'yield', 'del', 'raise', 'assert']:
if len(tok_list) > 1: if len(tok_list) > 1:
# this happens, when a statement has opening brackets, # this happens, when a statement has opening
# which are not closed again, here I just start a new # brackets, which are not closed again, here I just
# statement. This is a hack, but I could not come up # start a new statement. This is a hack, but I
# with a better solution. # could not come up with a better solution.
# This is basically a reset of the statement. # This is basically a reset of the statement.
debug.warning('keyword in statement %s@%s', tok_list, debug.warning('keyword in statement %s@%s',
self.start_pos[0]) tok_list, self.start_pos[0])
tok_list = [self.current + (self.start_pos,)] tok_list = [self.current + (self.start_pos,)]
set_vars = [] set_vars = []
used_funcs = [] used_funcs = []
@@ -1417,7 +1417,8 @@ class PyFuzzyParser(object):
is_return = tok is_return = tok
elif tok == 'for': elif tok == 'for':
# list comprehensions! # list comprehensions!
middle, tok = self._parse_statement(added_breaks=['in']) middle, tok = self._parse_statement(
added_breaks=['in'])
if tok != 'in' or middle is None: if tok != 'in' or middle is None:
if middle is None: if middle is None:
level -= 1 level -= 1
@@ -1431,8 +1432,8 @@ class PyFuzzyParser(object):
if tok not in b or in_clause is None: if tok not in b or in_clause is None:
if in_clause is None: if in_clause is None:
self.gen.push_back(self._current_full) self.gen.push_back(self._current_full)
debug.warning('list comprehension in_clause %s@%s' % debug.warning('list comprehension in_clause %s@%s'
(tok, self.start_pos[0])) % (tok, self.start_pos[0]))
continue continue
other_level = 0 other_level = 0
@@ -1446,12 +1447,14 @@ class PyFuzzyParser(object):
if other_level > 0: if other_level > 0:
break break
else: else:
i = 0 # could not detect brackets -> nested list comp # could not detect brackets -> nested list comp
i = 0
tok_list, toks = tok_list[:-i], tok_list[-i:-1] tok_list, toks = tok_list[:-i], tok_list[-i:-1]
src = '' src = ''
for t in toks: for t in toks:
src += t[1] if isinstance(t, tuple) else t.get_code() src += t[1] if isinstance(t, tuple) \
else t.get_code()
st = Statement(src, [], [], [], \ st = Statement(src, [], [], [], \
toks, first_pos, self.end_pos) toks, first_pos, self.end_pos)
@@ -1465,7 +1468,8 @@ class PyFuzzyParser(object):
continue continue
else: else:
n, token_type, tok = self._parsedotname(self.current) n, token_type, tok = self._parsedotname(self.current)
tok_list.pop() # removed last entry, because we add Name # removed last entry, because we add Name
tok_list.pop()
if n: if n:
tok_list.append(n) tok_list.append(n)
if tok == '(': if tok == '(':
@@ -1492,6 +1496,10 @@ class PyFuzzyParser(object):
else: else:
string += tok string += tok
token_type, tok = self.next() token_type, tok = self.next()
except StopIteration:
# comes from tokenizer
break
if not string: if not string:
return None, tok return None, tok
#print 'new_stat', string, set_vars, used_funcs, used_vars #print 'new_stat', string, set_vars, used_funcs, used_vars
@@ -1526,7 +1534,24 @@ class PyFuzzyParser(object):
def next(self): def next(self):
""" Generate the next tokenize pattern. """ """ Generate the next tokenize pattern. """
try:
self._current_full = next(self.gen) self._current_full = next(self.gen)
except tokenize.TokenError:
# We just ignore this error, I try to handle it earlier - as
# good as possible
debug.warning('parentheses not closed error')
except IndentationError:
# This is an error, that tokenize may produce, because the code
# is not indented as it should. Here it just ignores this line
# and restarts the parser.
# (This is a rather unlikely error message, for normal code,
# tokenize seems to be pretty tolerant)
debug.warning('indentation error on line %s, ignoring it' %
(self.start_pos[0]))
self._line_of_tokenize_restart = self.start_pos[0] + 1
self.gen = PushBackIterator(tokenize_func(self.buf.readline))
return self.next()
type, tok, self._tokenize_start_pos, self._tokenize_end_pos, \ type, tok, self._tokenize_start_pos, self._tokenize_end_pos, \
self.parserline = self._current_full self.parserline = self._current_full
if self.user_position and (self.start_pos[0] == self.user_position[0] if self.user_position and (self.start_pos[0] == self.user_position[0]
@@ -1550,8 +1575,8 @@ class PyFuzzyParser(object):
:raises: IndentationError :raises: IndentationError
""" """
buf = BytesIO(self.code) self.buf = BytesIO(self.code)
self.gen = PushBackIterator(tokenize_func(buf.readline)) self.gen = PushBackIterator(tokenize_func(self.buf.readline))
extended_flow = ['else', 'elif', 'except', 'finally'] extended_flow = ['else', 'elif', 'except', 'finally']
statement_toks = ['{', '[', '(', '`'] statement_toks = ['{', '[', '(', '`']
@@ -1725,20 +1750,6 @@ class PyFuzzyParser(object):
self.start_pos[0]) self.start_pos[0])
except StopIteration: # thrown on EOF except StopIteration: # thrown on EOF
break break
except tokenize.TokenError:
# We just ignore this error, I try to handle it earlier - as del self.buf
# good as possible
debug.warning('parentheses not closed error')
except IndentationError:
# This is an error, that tokenize may produce, because the code
# is not indented as it should. Here it just ignores this line
# and restarts the parser.
# (This is a rather unlikely error message, for normal code,
# tokenize seems to be pretty tolerant)
self._line_of_tokenize_restart = self.start_pos[0] + 1
self._tokenize_start_pos = (0, 0)
self._tokenize_end_pos = (0, 0)
debug.warning('indentation error on line %s, ignoring it' %
(self.start_pos[0]))
self.gen = PushBackIterator(tokenize_func(buf.readline))
return self.module return self.module