diff --git a/evaluate.py b/evaluate.py index 4df3c51e..2a63ee4d 100644 --- a/evaluate.py +++ b/evaluate.py @@ -3,6 +3,8 @@ follow_statement -> follow_call -> follow_paths -> follow_path 'follow_import' `get_names_for_scope` and `get_scopes_for_name` are search functions + +TODO include super classes """ import itertools diff --git a/ftest.py b/ftest.py index 2a810715..7207d053 100755 --- a/ftest.py +++ b/ftest.py @@ -2,8 +2,9 @@ import functions -#functions.debug.debug_function = functions.debug.print_to_stdout +functions.debug.debug_function = functions.debug.print_to_stdout #functions.debug.ignored_modules += ['parsing', 'builtin'] +functions.debug.ignored_modules += ['parsing', 'builtin', 'evaluate', 'modules'] functions.modules.module_find_path.insert(0, '.') f_name = 'test.py' diff --git a/functions.py b/functions.py index 1be4af48..4dcd45e2 100644 --- a/functions.py +++ b/functions.py @@ -58,10 +58,11 @@ class FileWithCursor(modules.File): gen = tokenize.generate_tokens(fetch_line) # TODO can happen: raise TokenError, ("EOF in multi-line statement" + # where??? string = '' level = 0 for token_type, tok, start, end, line in gen: - #print token_type, tok, line + #print token_type, tok, force_point if level > 0: if tok in close_brackets: level += 1 @@ -70,12 +71,13 @@ class FileWithCursor(modules.File): elif tok == '.': force_point = False elif force_point: - if tok != '.': - # it is reversed, therefore a number is getting recognized - # as a floating point number - if not (token_type == tokenize.NUMBER and tok[0] == '.'): - #print 'break2', token_type, tok - break + # it is reversed, therefore a number is getting recognized + # as a floating point number + if token_type == tokenize.NUMBER and tok[0] == '.': + force_point = False + else: + #print 'break2', token_type, tok + break elif tok in close_brackets: level += 1 elif token_type in [tokenize.NAME, tokenize.STRING]: diff --git a/parsing.py b/parsing.py index e9037a46..7d3146fc 100644 --- a/parsing.py +++ b/parsing.py @@ -30,8 +30,6 @@ Ignored statements: TODO take special care for future imports TODO check meta classes -TODO evaluate options to either replace tokenize or change its behavior for -multiline parentheses (if they don't close, there must be a break somewhere) """ import tokenize @@ -830,11 +828,18 @@ class PyFuzzyParser(object): self.scope = self.top self.current = (None, None, None) + self._tokenize_line_nr = 0 + self._line_of_tokenize_restart = 0 + self.parse() # delete code again, only the parser needs it del self.code + @property + def line_nr(self): + return self._line_of_tokenize_restart + self._tokenize_line_nr + def _parsedotname(self, pre_used_token=None): """ The dot name parser parses a name, variable or function and returns @@ -923,7 +928,7 @@ class PyFuzzyParser(object): name2 = Name(name2, start_indent2, start_line, self.line_nr) i = Name(name, start_indent, start_line, self.line_nr) imports.append((i, name2)) - while tok != "," and "\n" not in tok: + while tok not in [",", ";", "\n"]: token_type, tok, indent = self.next() if tok != ",": break @@ -1041,7 +1046,7 @@ class PyFuzzyParser(object): # in a statement. breaks = ['\n', ':', ')'] always_break = [';', 'import', 'from', 'class', 'def', 'try', 'except', - 'finally'] + 'finally', 'while'] if added_breaks: breaks += added_breaks @@ -1064,6 +1069,19 @@ class PyFuzzyParser(object): elif token_type == tokenize.NAME: #print 'is_name', tok if tok in ['return', 'yield', 'del', 'raise', 'assert']: + if len(tok_list) > 1: + # this happens, when a statement has opening brackets, + # which are not closed again, here I just start a new + # statement. This is a hack, but I could not come up + # with a better solution. + # This is basically a reset of the statement. + debug.warning('return in statement @%s', tok_list, + self.line_nr) + tok_list = [tok] + set_vars = [] + used_funcs = [] + used_vars = [] + level = 0 set_string = tok + ' ' if tok in ['return', 'yield']: is_return = tok @@ -1124,7 +1142,7 @@ class PyFuzzyParser(object): def next(self): """ Generate the next tokenize pattern. """ type, tok, position, dummy, self.parserline = self.gen.next() - (self.line_nr, indent) = position + (self._tokenize_line_nr, indent) = position if self.line_nr == self.user_line: debug.dbg('user scope found [%s] =%s' % \ (self.parserline.replace('\n', ''), repr(self.scope))) @@ -1170,7 +1188,7 @@ class PyFuzzyParser(object): # errors. only check for names, because thats relevant here. If # some docstrings are not indented, I don't care. while indent <= self.scope.indent \ - and token_type in [tokenize.NAME] \ + and (token_type == tokenize.NAME or tok in ['(', '['])\ and self.scope != self.top: debug.warning('syntax error: dedent @%s - %s<=%s', \ (self.line_nr, indent, self.scope.indent)) @@ -1204,7 +1222,7 @@ class PyFuzzyParser(object): for m, alias in imports: i = Import(indent, start_line, self.line_nr, m, alias) self.scope.add_import(i) - debug.dbg("new import: %s" % (i)) + debug.dbg("new import: %s" % (i), self.current) self.freshscope = False elif tok == 'from': # take care for relative imports @@ -1293,6 +1311,7 @@ class PyFuzzyParser(object): # by the statement parser. stmt, tok = self._parse_statement(self.current) if stmt: + debug.dbg('new stmt', stmt) self.scope.add_statement(stmt) self.freshscope = False else: @@ -1303,6 +1322,21 @@ class PyFuzzyParser(object): self.line_nr) except StopIteration: # thrown on EOF break + except tokenize.TokenError: + # We just ignore this error, I try to handle it earlier - as + # good as possible + debug.warning('parentheses not closed error') + except IndentationError: + # This is an error, that tokenize may produce, because the code + # is not indented as it should. Here it just ignores this line + # and restarts the parser. + # (This is a rather unlikely error message, for normal code, + # tokenize seems to be pretty tolerant) + self._line_of_tokenize_restart = self.line_nr + 1 + self._tokenize_line_nr = 0 + debug.warning('indentation error on line %s, ignoring it' % + (self.line_nr)) + self.gen = tokenize.generate_tokens(buf.readline) #except: # debug.dbg("parse error: %s, %s @ %s" % # (sys.exc_info()[0], sys.exc_info()[1], self.parserline)) diff --git a/test.py b/test.py index a4eee565..9516d4e8 100644 --- a/test.py +++ b/test.py @@ -131,11 +131,11 @@ class c1(): c2,c5 = c1(), c1().c3() def c3(self): import os as c4 #from parsing import Scope as c4 - c5 = c4 - c5 = 1 + c5 = 1 + c5 = c4( + if 1: + print 1 return c5+'asdf' - - (c1().c2.\ c, 1, c3()) [0].pop() @@ -146,5 +146,5 @@ asdf = c1; asdf2 = asdf b= asdf2 #import parsing as test c = b().c3() - -1.0.fromhex(); from flask import Flask; Flask. +1.0.fromhex(); import flask ; flsk = flask.Flask + flask.Request; +c2(flask().