the parser should now be possible to ignore single opening parentheses

This commit is contained in:
David Halter
2012-04-09 17:44:35 +02:00
parent 1c8438cc00
commit b8f6f2267b
5 changed files with 60 additions and 21 deletions

View File

@@ -3,6 +3,8 @@ follow_statement -> follow_call -> follow_paths -> follow_path
'follow_import' 'follow_import'
`get_names_for_scope` and `get_scopes_for_name` are search functions `get_names_for_scope` and `get_scopes_for_name` are search functions
TODO include super classes
""" """
import itertools import itertools

View File

@@ -2,8 +2,9 @@
import functions import functions
#functions.debug.debug_function = functions.debug.print_to_stdout functions.debug.debug_function = functions.debug.print_to_stdout
#functions.debug.ignored_modules += ['parsing', 'builtin'] #functions.debug.ignored_modules += ['parsing', 'builtin']
functions.debug.ignored_modules += ['parsing', 'builtin', 'evaluate', 'modules']
functions.modules.module_find_path.insert(0, '.') functions.modules.module_find_path.insert(0, '.')
f_name = 'test.py' f_name = 'test.py'

View File

@@ -58,10 +58,11 @@ class FileWithCursor(modules.File):
gen = tokenize.generate_tokens(fetch_line) gen = tokenize.generate_tokens(fetch_line)
# TODO can happen: raise TokenError, ("EOF in multi-line statement" # TODO can happen: raise TokenError, ("EOF in multi-line statement"
# where???
string = '' string = ''
level = 0 level = 0
for token_type, tok, start, end, line in gen: for token_type, tok, start, end, line in gen:
#print token_type, tok, line #print token_type, tok, force_point
if level > 0: if level > 0:
if tok in close_brackets: if tok in close_brackets:
level += 1 level += 1
@@ -70,12 +71,13 @@ class FileWithCursor(modules.File):
elif tok == '.': elif tok == '.':
force_point = False force_point = False
elif force_point: elif force_point:
if tok != '.': # it is reversed, therefore a number is getting recognized
# it is reversed, therefore a number is getting recognized # as a floating point number
# as a floating point number if token_type == tokenize.NUMBER and tok[0] == '.':
if not (token_type == tokenize.NUMBER and tok[0] == '.'): force_point = False
#print 'break2', token_type, tok else:
break #print 'break2', token_type, tok
break
elif tok in close_brackets: elif tok in close_brackets:
level += 1 level += 1
elif token_type in [tokenize.NAME, tokenize.STRING]: elif token_type in [tokenize.NAME, tokenize.STRING]:

View File

@@ -30,8 +30,6 @@ Ignored statements:
TODO take special care for future imports TODO take special care for future imports
TODO check meta classes TODO check meta classes
TODO evaluate options to either replace tokenize or change its behavior for
multiline parentheses (if they don't close, there must be a break somewhere)
""" """
import tokenize import tokenize
@@ -830,11 +828,18 @@ class PyFuzzyParser(object):
self.scope = self.top self.scope = self.top
self.current = (None, None, None) self.current = (None, None, None)
self._tokenize_line_nr = 0
self._line_of_tokenize_restart = 0
self.parse() self.parse()
# delete code again, only the parser needs it # delete code again, only the parser needs it
del self.code del self.code
@property
def line_nr(self):
return self._line_of_tokenize_restart + self._tokenize_line_nr
def _parsedotname(self, pre_used_token=None): def _parsedotname(self, pre_used_token=None):
""" """
The dot name parser parses a name, variable or function and returns The dot name parser parses a name, variable or function and returns
@@ -923,7 +928,7 @@ class PyFuzzyParser(object):
name2 = Name(name2, start_indent2, start_line, self.line_nr) name2 = Name(name2, start_indent2, start_line, self.line_nr)
i = Name(name, start_indent, start_line, self.line_nr) i = Name(name, start_indent, start_line, self.line_nr)
imports.append((i, name2)) imports.append((i, name2))
while tok != "," and "\n" not in tok: while tok not in [",", ";", "\n"]:
token_type, tok, indent = self.next() token_type, tok, indent = self.next()
if tok != ",": if tok != ",":
break break
@@ -1041,7 +1046,7 @@ class PyFuzzyParser(object):
# in a statement. # in a statement.
breaks = ['\n', ':', ')'] breaks = ['\n', ':', ')']
always_break = [';', 'import', 'from', 'class', 'def', 'try', 'except', always_break = [';', 'import', 'from', 'class', 'def', 'try', 'except',
'finally'] 'finally', 'while']
if added_breaks: if added_breaks:
breaks += added_breaks breaks += added_breaks
@@ -1064,6 +1069,19 @@ class PyFuzzyParser(object):
elif token_type == tokenize.NAME: elif token_type == tokenize.NAME:
#print 'is_name', tok #print 'is_name', tok
if tok in ['return', 'yield', 'del', 'raise', 'assert']: if tok in ['return', 'yield', 'del', 'raise', 'assert']:
if len(tok_list) > 1:
# this happens, when a statement has opening brackets,
# which are not closed again, here I just start a new
# statement. This is a hack, but I could not come up
# with a better solution.
# This is basically a reset of the statement.
debug.warning('return in statement @%s', tok_list,
self.line_nr)
tok_list = [tok]
set_vars = []
used_funcs = []
used_vars = []
level = 0
set_string = tok + ' ' set_string = tok + ' '
if tok in ['return', 'yield']: if tok in ['return', 'yield']:
is_return = tok is_return = tok
@@ -1124,7 +1142,7 @@ class PyFuzzyParser(object):
def next(self): def next(self):
""" Generate the next tokenize pattern. """ """ Generate the next tokenize pattern. """
type, tok, position, dummy, self.parserline = self.gen.next() type, tok, position, dummy, self.parserline = self.gen.next()
(self.line_nr, indent) = position (self._tokenize_line_nr, indent) = position
if self.line_nr == self.user_line: if self.line_nr == self.user_line:
debug.dbg('user scope found [%s] =%s' % \ debug.dbg('user scope found [%s] =%s' % \
(self.parserline.replace('\n', ''), repr(self.scope))) (self.parserline.replace('\n', ''), repr(self.scope)))
@@ -1170,7 +1188,7 @@ class PyFuzzyParser(object):
# errors. only check for names, because thats relevant here. If # errors. only check for names, because thats relevant here. If
# some docstrings are not indented, I don't care. # some docstrings are not indented, I don't care.
while indent <= self.scope.indent \ while indent <= self.scope.indent \
and token_type in [tokenize.NAME] \ and (token_type == tokenize.NAME or tok in ['(', '['])\
and self.scope != self.top: and self.scope != self.top:
debug.warning('syntax error: dedent @%s - %s<=%s', \ debug.warning('syntax error: dedent @%s - %s<=%s', \
(self.line_nr, indent, self.scope.indent)) (self.line_nr, indent, self.scope.indent))
@@ -1204,7 +1222,7 @@ class PyFuzzyParser(object):
for m, alias in imports: for m, alias in imports:
i = Import(indent, start_line, self.line_nr, m, alias) i = Import(indent, start_line, self.line_nr, m, alias)
self.scope.add_import(i) self.scope.add_import(i)
debug.dbg("new import: %s" % (i)) debug.dbg("new import: %s" % (i), self.current)
self.freshscope = False self.freshscope = False
elif tok == 'from': elif tok == 'from':
# take care for relative imports # take care for relative imports
@@ -1293,6 +1311,7 @@ class PyFuzzyParser(object):
# by the statement parser. # by the statement parser.
stmt, tok = self._parse_statement(self.current) stmt, tok = self._parse_statement(self.current)
if stmt: if stmt:
debug.dbg('new stmt', stmt)
self.scope.add_statement(stmt) self.scope.add_statement(stmt)
self.freshscope = False self.freshscope = False
else: else:
@@ -1303,6 +1322,21 @@ class PyFuzzyParser(object):
self.line_nr) self.line_nr)
except StopIteration: # thrown on EOF except StopIteration: # thrown on EOF
break break
except tokenize.TokenError:
# We just ignore this error, I try to handle it earlier - as
# good as possible
debug.warning('parentheses not closed error')
except IndentationError:
# This is an error, that tokenize may produce, because the code
# is not indented as it should. Here it just ignores this line
# and restarts the parser.
# (This is a rather unlikely error message, for normal code,
# tokenize seems to be pretty tolerant)
self._line_of_tokenize_restart = self.line_nr + 1
self._tokenize_line_nr = 0
debug.warning('indentation error on line %s, ignoring it' %
(self.line_nr))
self.gen = tokenize.generate_tokens(buf.readline)
#except: #except:
# debug.dbg("parse error: %s, %s @ %s" % # debug.dbg("parse error: %s, %s @ %s" %
# (sys.exc_info()[0], sys.exc_info()[1], self.parserline)) # (sys.exc_info()[0], sys.exc_info()[1], self.parserline))

12
test.py
View File

@@ -131,11 +131,11 @@ class c1():
c2,c5 = c1(), c1().c3() c2,c5 = c1(), c1().c3()
def c3(self): def c3(self):
import os as c4 #from parsing import Scope as c4 import os as c4 #from parsing import Scope as c4
c5 = c4 c5 = 1
c5 = 1 c5 = c4(
if 1:
print 1
return c5+'asdf' return c5+'asdf'
(c1().c2.\ (c1().c2.\
c, 1, c3()) [0].pop() c, 1, c3()) [0].pop()
@@ -146,5 +146,5 @@ asdf = c1; asdf2 = asdf
b= asdf2 b= asdf2
#import parsing as test #import parsing as test
c = b().c3() c = b().c3()
1.0.fromhex(); import flask ; flsk = flask.Flask + flask.Request;
1.0.fromhex(); from flask import Flask; Flask. c2(flask().