the parser should now be possible to ignore single opening parentheses

This commit is contained in:
David Halter
2012-04-09 17:44:35 +02:00
parent 1c8438cc00
commit b8f6f2267b
5 changed files with 60 additions and 21 deletions

View File

@@ -30,8 +30,6 @@ Ignored statements:
TODO take special care for future imports
TODO check meta classes
TODO evaluate options to either replace tokenize or change its behavior for
multiline parentheses (if they don't close, there must be a break somewhere)
"""
import tokenize
@@ -830,11 +828,18 @@ class PyFuzzyParser(object):
self.scope = self.top
self.current = (None, None, None)
self._tokenize_line_nr = 0
self._line_of_tokenize_restart = 0
self.parse()
# delete code again, only the parser needs it
del self.code
@property
def line_nr(self):
return self._line_of_tokenize_restart + self._tokenize_line_nr
def _parsedotname(self, pre_used_token=None):
"""
The dot name parser parses a name, variable or function and returns
@@ -923,7 +928,7 @@ class PyFuzzyParser(object):
name2 = Name(name2, start_indent2, start_line, self.line_nr)
i = Name(name, start_indent, start_line, self.line_nr)
imports.append((i, name2))
while tok != "," and "\n" not in tok:
while tok not in [",", ";", "\n"]:
token_type, tok, indent = self.next()
if tok != ",":
break
@@ -1041,7 +1046,7 @@ class PyFuzzyParser(object):
# in a statement.
breaks = ['\n', ':', ')']
always_break = [';', 'import', 'from', 'class', 'def', 'try', 'except',
'finally']
'finally', 'while']
if added_breaks:
breaks += added_breaks
@@ -1064,6 +1069,19 @@ class PyFuzzyParser(object):
elif token_type == tokenize.NAME:
#print 'is_name', tok
if tok in ['return', 'yield', 'del', 'raise', 'assert']:
if len(tok_list) > 1:
# this happens, when a statement has opening brackets,
# which are not closed again, here I just start a new
# statement. This is a hack, but I could not come up
# with a better solution.
# This is basically a reset of the statement.
debug.warning('return in statement @%s', tok_list,
self.line_nr)
tok_list = [tok]
set_vars = []
used_funcs = []
used_vars = []
level = 0
set_string = tok + ' '
if tok in ['return', 'yield']:
is_return = tok
@@ -1124,7 +1142,7 @@ class PyFuzzyParser(object):
def next(self):
""" Generate the next tokenize pattern. """
type, tok, position, dummy, self.parserline = self.gen.next()
(self.line_nr, indent) = position
(self._tokenize_line_nr, indent) = position
if self.line_nr == self.user_line:
debug.dbg('user scope found [%s] =%s' % \
(self.parserline.replace('\n', ''), repr(self.scope)))
@@ -1170,7 +1188,7 @@ class PyFuzzyParser(object):
# errors. only check for names, because thats relevant here. If
# some docstrings are not indented, I don't care.
while indent <= self.scope.indent \
and token_type in [tokenize.NAME] \
and (token_type == tokenize.NAME or tok in ['(', '['])\
and self.scope != self.top:
debug.warning('syntax error: dedent @%s - %s<=%s', \
(self.line_nr, indent, self.scope.indent))
@@ -1204,7 +1222,7 @@ class PyFuzzyParser(object):
for m, alias in imports:
i = Import(indent, start_line, self.line_nr, m, alias)
self.scope.add_import(i)
debug.dbg("new import: %s" % (i))
debug.dbg("new import: %s" % (i), self.current)
self.freshscope = False
elif tok == 'from':
# take care for relative imports
@@ -1293,6 +1311,7 @@ class PyFuzzyParser(object):
# by the statement parser.
stmt, tok = self._parse_statement(self.current)
if stmt:
debug.dbg('new stmt', stmt)
self.scope.add_statement(stmt)
self.freshscope = False
else:
@@ -1303,6 +1322,21 @@ class PyFuzzyParser(object):
self.line_nr)
except StopIteration: # thrown on EOF
break
except tokenize.TokenError:
# We just ignore this error, I try to handle it earlier - as
# good as possible
debug.warning('parentheses not closed error')
except IndentationError:
# This is an error, that tokenize may produce, because the code
# is not indented as it should. Here it just ignores this line
# and restarts the parser.
# (This is a rather unlikely error message, for normal code,
# tokenize seems to be pretty tolerant)
self._line_of_tokenize_restart = self.line_nr + 1
self._tokenize_line_nr = 0
debug.warning('indentation error on line %s, ignoring it' %
(self.line_nr))
self.gen = tokenize.generate_tokens(buf.readline)
#except:
# debug.dbg("parse error: %s, %s @ %s" %
# (sys.exc_info()[0], sys.exc_info()[1], self.parserline))