the parser should now be possible to ignore single opening parentheses

2025-12-06 22:14:27 +08:00 · 2012-04-09 17:44:35 +02:00
parent 1c8438cc00
commit b8f6f2267b
5 changed files with 60 additions and 21 deletions
--- a/evaluate.py
+++ b/evaluate.py
@@ -3,6 +3,8 @@ follow_statement -> follow_call -> follow_paths -> follow_path
 'follow_import'
 `get_names_for_scope` and `get_scopes_for_name` are search functions
 TODO include super classes
 """
 import itertools
--- a/ftest.py
+++ b/ftest.py
@@ -2,8 +2,9 @@
 import functions
-#functions.debug.debug_function = functions.debug.print_to_stdout
+functions.debug.debug_function = functions.debug.print_to_stdout
 #functions.debug.ignored_modules += ['parsing', 'builtin']
 functions.debug.ignored_modules += ['parsing', 'builtin', 'evaluate', 'modules']
 functions.modules.module_find_path.insert(0, '.')
 f_name = 'test.py'
--- a/functions.py
+++ b/functions.py
@@ -58,10 +58,11 @@ class FileWithCursor(modules.File):
        gen = tokenize.generate_tokens(fetch_line)
        # TODO can happen: raise TokenError, ("EOF in multi-line statement"
        # where???
        string = ''
        level = 0
        for token_type, tok, start, end, line in gen:
-            #print token_type, tok, line
+            #print token_type, tok, force_point
            if level > 0:
                if tok in close_brackets:
                    level += 1
@@ -70,10 +71,11 @@ class FileWithCursor(modules.File):
            elif tok == '.':
                force_point = False
            elif force_point:
                if tok != '.':
                # it is reversed, therefore a number is getting recognized
                # as a floating point number
-                    if not (token_type == tokenize.NUMBER and tok[0] == '.'):
+                if token_type == tokenize.NUMBER and tok[0] == '.':
                    force_point = False
                else:
                    #print 'break2', token_type, tok
                    break
            elif tok in close_brackets:
--- a/parsing.py
+++ b/parsing.py
@@ -30,8 +30,6 @@ Ignored statements:
 TODO take special care for future imports
 TODO check meta classes
 TODO evaluate options to either replace tokenize or change its behavior for
 multiline parentheses (if they don't close, there must be a break somewhere)
 """
 import tokenize
@@ -830,11 +828,18 @@ class PyFuzzyParser(object):
        self.scope = self.top
        self.current = (None, None, None)
        self._tokenize_line_nr = 0
        self._line_of_tokenize_restart = 0
        self.parse()
        # delete code again, only the parser needs it
        del self.code
    @property
    def line_nr(self):
        return self._line_of_tokenize_restart + self._tokenize_line_nr
    def _parsedotname(self, pre_used_token=None):
        """
        The dot name parser parses a name, variable or function and returns
@@ -923,7 +928,7 @@ class PyFuzzyParser(object):
                name2 = Name(name2, start_indent2, start_line, self.line_nr)
            i = Name(name, start_indent, start_line, self.line_nr)
            imports.append((i, name2))
-            while tok != "," and "\n" not in tok:
+            while tok not in [",", ";", "\n"]:
                token_type, tok, indent = self.next()
            if tok != ",":
                break
@@ -1041,7 +1046,7 @@ class PyFuzzyParser(object):
        # in a statement.
        breaks = ['\n', ':', ')']
        always_break = [';', 'import', 'from', 'class', 'def', 'try', 'except',
-                        'finally']
+                        'finally', 'while']
        if added_breaks:
            breaks += added_breaks
@@ -1064,6 +1069,19 @@ class PyFuzzyParser(object):
            elif token_type == tokenize.NAME:
                #print 'is_name', tok
                if tok in ['return', 'yield', 'del', 'raise', 'assert']:
                    if len(tok_list) > 1:
                        # this happens, when a statement has opening brackets,
                        # which are not closed again, here I just start a new
                        # statement. This is a hack, but I could not come up
                        # with a better solution.
                        # This is basically a reset of the statement.
                        debug.warning('return in statement @%s', tok_list,
                                        self.line_nr)
                        tok_list = [tok]
                        set_vars = []
                        used_funcs = []
                        used_vars = []
                        level = 0
                    set_string = tok + ' '
                    if tok in ['return', 'yield']:
                        is_return = tok
@@ -1124,7 +1142,7 @@ class PyFuzzyParser(object):
    def next(self):
        """ Generate the next tokenize pattern. """
        type, tok, position, dummy, self.parserline = self.gen.next()
-        (self.line_nr, indent) = position
+        (self._tokenize_line_nr, indent) = position
        if self.line_nr == self.user_line:
            debug.dbg('user scope found [%s] =%s' % \
                    (self.parserline.replace('\n', ''), repr(self.scope)))
@@ -1170,7 +1188,7 @@ class PyFuzzyParser(object):
                # errors. only check for names, because thats relevant here. If
                # some docstrings are not indented, I don't care.
                while indent <= self.scope.indent \
-                        and token_type in [tokenize.NAME] \
+                        and (token_type == tokenize.NAME or tok in ['(', '['])\
                        and self.scope != self.top:
                    debug.warning('syntax error: dedent @%s - %s<=%s', \
                            (self.line_nr, indent, self.scope.indent))
@@ -1204,7 +1222,7 @@ class PyFuzzyParser(object):
                    for m, alias in imports:
                        i = Import(indent, start_line, self.line_nr, m, alias)
                        self.scope.add_import(i)
-                        debug.dbg("new import: %s" % (i))
+                        debug.dbg("new import: %s" % (i), self.current)
                    self.freshscope = False
                elif tok == 'from':
                    # take care for relative imports
@@ -1293,6 +1311,7 @@ class PyFuzzyParser(object):
                    # by the statement parser.
                    stmt, tok = self._parse_statement(self.current)
                    if stmt:
                        debug.dbg('new stmt', stmt)
                        self.scope.add_statement(stmt)
                    self.freshscope = False
                else:
@@ -1303,6 +1322,21 @@ class PyFuzzyParser(object):
                                        self.line_nr)
            except StopIteration:  # thrown on EOF
                break
            except tokenize.TokenError:
                # We just ignore this error, I try to handle it earlier - as
                # good as possible
                debug.warning('parentheses not closed error')
            except IndentationError:
                # This is an error, that tokenize may produce, because the code
                # is not indented as it should. Here it just ignores this line
                # and restarts the parser.
                # (This is a rather unlikely error message, for normal code,
                # tokenize seems to be pretty tolerant)
                self._line_of_tokenize_restart = self.line_nr + 1
                self._tokenize_line_nr = 0
                debug.warning('indentation error on line %s, ignoring it' %
                                (self.line_nr))
                self.gen = tokenize.generate_tokens(buf.readline)
        #except:
        #    debug.dbg("parse error: %s, %s @ %s" %
        #        (sys.exc_info()[0], sys.exc_info()[1], self.parserline))
--- a/test.py
+++ b/test.py
@@ -131,11 +131,11 @@ class c1():
    c2,c5 = c1(), c1().c3()
    def c3(self): 
        import os as c4 #from parsing import Scope as c4
        c5 = c4
       c5 = 1
        c5 = c4(
        if 1:
            print 1
        return c5+'asdf'
 (c1().c2.\
 c, 1, c3()) [0].pop()
@@ -146,5 +146,5 @@ asdf = c1; asdf2 = asdf
 b= asdf2
 #import parsing as test
 c = b().c3()
-
+1.0.fromhex(); import flask ; flsk = flask.Flask + flask.Request;
-1.0.fromhex(); from flask import Flask; Flask.
+c2(flask().