moved NoErrorTokenizer to fast.FastTokenizer

2025-12-09 23:34:45 +08:00 · 2014-02-24 11:05:31 +01:00
parent 9257062910
commit 7db090a48a
3 changed files with 87 additions and 82 deletions
--- a/jedi/api/init.py
+++ b/jedi/api/init.py
@@ -15,7 +15,7 @@ from itertools import chain

 from jedi._compatibility import next, unicode, builtins
 from jedi.parser import Parser
-from jedi.parser.tokenize import source_tokens, NoErrorTokenizer
+from jedi.parser.tokenize import source_tokens
 from jedi.parser import representation as pr
 from jedi.parser.user_context import UserContext, UserContextParser
 from jedi import debug
@@ -229,7 +229,6 @@ class Script(object):

    def _get_under_cursor_stmt(self, cursor_txt):
        tokenizer = source_tokens(cursor_txt, self._pos[0] - 1)
-        tokenizer = NoErrorTokenizer(cursor_txt, self._pos[0] - 1)
        r = Parser(cursor_txt, no_docstr=True, tokenizer=tokenizer)
        try:
            stmt = r.module.statements[0]
--- a/jedi/parser/fast.py
+++ b/jedi/parser/fast.py
@@ -12,6 +12,8 @@ from jedi.parser import Parser
 from jedi.parser import representation as pr
 from jedi.parser import tokenize
 from jedi import cache
+from jedi.parser.tokenize import (source_tokens, TokenInfo, FLOWS, NEWLINE,
+                                  COMMENT, ENDMARKER)


 class Module(pr.Simple, pr.Module):
@@ -362,7 +364,7 @@ class FastParser(use_metaclass(CachedFastParser)):
            if nodes[index].code != code:
                raise ValueError()
        except ValueError:
-            tokenizer = tokenize.NoErrorTokenizer(parser_code, line_offset, True)
+            tokenizer = FastTokenizer(parser_code, line_offset, True)
            p = Parser(parser_code, self.module_path, tokenizer=tokenizer,
                       top_module=self.module, no_docstr=no_docstr,
                       is_fast=True, offset=line_offset)
@@ -382,3 +384,80 @@ class FastParser(use_metaclass(CachedFastParser)):
        self.module.reset_caches()
        if self.current_node is not None:
            self.current_node.reset_contents()
+
+
+class FastTokenizer(object):
+    """
+    Breaks when certain conditions are met, i.e. a new function or class opens.
+    """
+    def __init__(self, source, line_offset=0, is_fast_parser=False):
+        self.source = source
+        self.gen = source_tokens(source, line_offset)
+        self.closed = False
+
+        # fast parser options
+        self.is_fast_parser = is_fast_parser
+        self.current = self.previous = TokenInfo(None, None, (0, 0), (0, 0))
+        self.in_flow = False
+        self.new_indent = False
+        self.parser_indent = self.old_parser_indent = 0
+        self.is_decorator = False
+        self.first_stmt = True
+
+    def next(self):
+        """ Python 2 Compatibility """
+        return self.__next__()
+
+    def __next__(self):
+        if self.closed:
+            raise common.MultiLevelStopIteration()
+
+        current = next(self.gen)
+        if current[0] == ENDMARKER:
+            raise common.MultiLevelStopIteration()
+
+        self.previous = self.current
+        self.current = current
+
+        # this is exactly the same check as in fast_parser, but this time with
+        # tokenize and therefore precise.
+        breaks = ['def', 'class', '@']
+
+        def close():
+            if not self.first_stmt:
+                self.closed = True
+                raise common.MultiLevelStopIteration()
+        # ignore comments/ newlines
+        if self.is_fast_parser \
+                and self.previous[0] in (None, NEWLINE) \
+                and current[0] not in (COMMENT, NEWLINE):
+            # print c, tok_name[c[0]]
+
+            tok = current[1]
+            indent = current[2][1]
+            if indent < self.parser_indent:  # -> dedent
+                self.parser_indent = indent
+                self.new_indent = False
+                if not self.in_flow or indent < self.old_parser_indent:
+                    close()
+                self.in_flow = False
+            elif self.new_indent:
+                self.parser_indent = indent
+                self.new_indent = False
+
+            if not self.in_flow:
+                if tok in FLOWS or tok in breaks:
+                    self.in_flow = tok in FLOWS
+                    if not self.is_decorator and not self.in_flow:
+                        close()
+                    self.is_decorator = '@' == tok
+                    if not self.is_decorator:
+                        self.old_parser_indent = self.parser_indent
+                        self.parser_indent += 1  # new scope: must be higher
+                        self.new_indent = True
+
+            if tok != '@':
+                if self.first_stmt and not self.new_indent:
+                    self.parser_indent = indent
+                self.first_stmt = False
+        return current
--- a/jedi/parser/tokenize.py
+++ b/jedi/parser/tokenize.py
@@ -19,6 +19,12 @@ cookie_re = re.compile("coding[:=]\s*([-\w.]+)")

 from jedi import common

+
+# From here on we have custom stuff (everything before was originally Python
+# internal code).
+FLOWS = ['if', 'else', 'elif', 'while', 'with', 'try', 'except', 'finally']
+
+
 namechars = string.ascii_letters + '_'


@@ -230,82 +236,3 @@ def generate_tokens(readline, line_offset=0):
                yield TokenInfo(OP, token, spos, epos)

    yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0))
-
-
-# From here on we have custom stuff (everything before was originally Python
-# internal code).
-FLOWS = ['if', 'else', 'elif', 'while', 'with', 'try', 'except', 'finally']
-
-
-class NoErrorTokenizer(object):
-    def __init__(self, source, line_offset=0, is_fast_parser=False):
-        self.source = source
-        self.gen = source_tokens(source, line_offset)
-        self.closed = False
-
-        # fast parser options
-        self.is_fast_parser = is_fast_parser
-        self.current = self.previous = TokenInfo(None, None, (0, 0), (0, 0))
-        self.in_flow = False
-        self.new_indent = False
-        self.parser_indent = self.old_parser_indent = 0
-        self.is_decorator = False
-        self.first_stmt = True
-
-    def next(self):
-        """ Python 2 Compatibility """
-        return self.__next__()
-
-    def __next__(self):
-        if self.closed:
-            raise common.MultiLevelStopIteration()
-
-        current = next(self.gen)
-        if current[0] == ENDMARKER:
-            raise common.MultiLevelStopIteration()
-
-        self.previous = self.current
-        self.current = current
-
-        # this is exactly the same check as in fast_parser, but this time with
-        # tokenize and therefore precise.
-        breaks = ['def', 'class', '@']
-
-        def close():
-            if not self.first_stmt:
-                self.closed = True
-                raise common.MultiLevelStopIteration()
-        # ignore comments/ newlines
-        if self.is_fast_parser \
-                and self.previous[0] in (None, NEWLINE) \
-                and current[0] not in (COMMENT, NEWLINE):
-            # print c, tok_name[c[0]]
-
-            tok = current[1]
-            indent = current[2][1]
-            if indent < self.parser_indent:  # -> dedent
-                self.parser_indent = indent
-                self.new_indent = False
-                if not self.in_flow or indent < self.old_parser_indent:
-                    close()
-                self.in_flow = False
-            elif self.new_indent:
-                self.parser_indent = indent
-                self.new_indent = False
-
-            if not self.in_flow:
-                if tok in FLOWS or tok in breaks:
-                    self.in_flow = tok in FLOWS
-                    if not self.is_decorator and not self.in_flow:
-                        close()
-                    self.is_decorator = '@' == tok
-                    if not self.is_decorator:
-                        self.old_parser_indent = self.parser_indent
-                        self.parser_indent += 1  # new scope: must be higher
-                        self.new_indent = True
-
-            if tok != '@':
-                if self.first_stmt and not self.new_indent:
-                    self.parser_indent = indent
-                self.first_stmt = False
-        return current