parsing refactorings, because the errors were not catched the right way

2025-12-11 08:11:50 +08:00 · 2012-09-14 21:00:57 +02:00
parent b073440369
commit fd37ae53f2
1 changed files with 135 additions and 124 deletions
--- a/parsing.py
+++ b/parsing.py
@@ -1122,7 +1122,6 @@ class PyFuzzyParser(object):
        # Stuff to fix tokenize errors. The parser is pretty good in tolerating
        # any errors of tokenize and just parse ahead.
        self._tokenize_start_pos = 0
        self._line_of_tokenize_restart = 0
        self.parse()
@@ -1384,6 +1383,7 @@ class PyFuzzyParser(object):
        tok_list = []
        while not (tok in always_break or tok in breaks and level <= 0):
            try:
                set_string = None
                #print 'parse_stmt', tok, tokenize.tok_name[token_type]
                tok_list.append(self.current + (self.start_pos,))
@@ -1400,13 +1400,13 @@ class PyFuzzyParser(object):
                elif token_type == tokenize.NAME:
                    if tok in ['return', 'yield', 'del', 'raise', 'assert']:
                        if len(tok_list) > 1:
-                        # this happens, when a statement has opening brackets,
+                            # this happens, when a statement has opening
-                        # which are not closed again, here I just start a new
+                            # brackets, which are not closed again, here I just
-                        # statement. This is a hack, but I could not come up
+                            # start a new statement. This is a hack, but I
-                        # with a better solution.
+                            # could not come up with a better solution.
                            # This is basically a reset of the statement.
-                        debug.warning('keyword in statement %s@%s', tok_list,
+                            debug.warning('keyword in statement %s@%s',
-                                                            self.start_pos[0])
+                                            tok_list, self.start_pos[0])
                            tok_list = [self.current + (self.start_pos,)]
                            set_vars = []
                            used_funcs = []
@@ -1417,7 +1417,8 @@ class PyFuzzyParser(object):
                            is_return = tok
                    elif tok == 'for':
                        # list comprehensions!
-                    middle, tok = self._parse_statement(added_breaks=['in'])
+                        middle, tok = self._parse_statement(
                                                        added_breaks=['in'])
                        if tok != 'in' or middle is None:
                            if middle is None:
                                level -= 1
@@ -1431,8 +1432,8 @@ class PyFuzzyParser(object):
                        if tok not in b or in_clause is None:
                            if in_clause is None:
                                self.gen.push_back(self._current_full)
-                        debug.warning('list comprehension in_clause %s@%s' %
+                            debug.warning('list comprehension in_clause %s@%s'
-                                                    (tok, self.start_pos[0]))
+                                                % (tok, self.start_pos[0]))
                            continue
                        other_level = 0
@@ -1446,12 +1447,14 @@ class PyFuzzyParser(object):
                            if other_level > 0:
                                break
                        else:
-                        i = 0  # could not detect brackets -> nested list comp
+                            # could not detect brackets -> nested list comp
                            i = 0
                        tok_list, toks = tok_list[:-i], tok_list[-i:-1]
                        src = ''
                        for t in toks:
-                        src += t[1] if isinstance(t, tuple) else t.get_code()
+                            src += t[1] if isinstance(t, tuple) \
                                        else t.get_code()
                        st = Statement(src, [], [], [], \
                                        toks, first_pos, self.end_pos)
@@ -1465,7 +1468,8 @@ class PyFuzzyParser(object):
                        continue
                    else:
                        n, token_type, tok = self._parsedotname(self.current)
-                    tok_list.pop()  # removed last entry, because we add Name
+                        # removed last entry, because we add Name
                        tok_list.pop()
                        if n:
                            tok_list.append(n)
                            if tok == '(':
@@ -1492,6 +1496,10 @@ class PyFuzzyParser(object):
                else:
                    string += tok
                token_type, tok = self.next()
            except StopIteration:
                # comes from tokenizer
                break
        if not string:
            return None, tok
        #print 'new_stat', string, set_vars, used_funcs, used_vars
@@ -1526,7 +1534,24 @@ class PyFuzzyParser(object):
    def next(self):
        """ Generate the next tokenize pattern. """
        try:
            self._current_full = next(self.gen)
        except tokenize.TokenError:
            # We just ignore this error, I try to handle it earlier - as
            # good as possible
            debug.warning('parentheses not closed error')
        except IndentationError:
            # This is an error, that tokenize may produce, because the code
            # is not indented as it should. Here it just ignores this line
            # and restarts the parser.
            # (This is a rather unlikely error message, for normal code,
            # tokenize seems to be pretty tolerant)
            debug.warning('indentation error on line %s, ignoring it' %
                                                    (self.start_pos[0]))
            self._line_of_tokenize_restart = self.start_pos[0] + 1
            self.gen = PushBackIterator(tokenize_func(self.buf.readline))
            return self.next()
        type, tok, self._tokenize_start_pos, self._tokenize_end_pos, \
                            self.parserline = self._current_full
        if self.user_position and (self.start_pos[0] == self.user_position[0]
@@ -1550,8 +1575,8 @@ class PyFuzzyParser(object):
        :raises: IndentationError
        """
-        buf = BytesIO(self.code)
+        self.buf = BytesIO(self.code)
-        self.gen = PushBackIterator(tokenize_func(buf.readline))
+        self.gen = PushBackIterator(tokenize_func(self.buf.readline))
        extended_flow = ['else', 'elif', 'except', 'finally']
        statement_toks = ['{', '[', '(', '`']
@@ -1725,20 +1750,6 @@ class PyFuzzyParser(object):
                                                            self.start_pos[0])
            except StopIteration:  # thrown on EOF
                break
-            except tokenize.TokenError:
+
-                # We just ignore this error, I try to handle it earlier - as
+        del self.buf
                # good as possible
                debug.warning('parentheses not closed error')
            except IndentationError:
                # This is an error, that tokenize may produce, because the code
                # is not indented as it should. Here it just ignores this line
                # and restarts the parser.
                # (This is a rather unlikely error message, for normal code,
                # tokenize seems to be pretty tolerant)
                self._line_of_tokenize_restart = self.start_pos[0] + 1
                self._tokenize_start_pos = (0, 0)
                self._tokenize_end_pos = (0, 0)
                debug.warning('indentation error on line %s, ignoring it' %
                                                        (self.start_pos[0]))
                self.gen = PushBackIterator(tokenize_func(buf.readline))
        return self.module