move NoErrorTokenizer to the tokenizer module, where it more or less belongs.

2014-01-05 13:34:29 +01:00
parent e115689b7f
commit 261f49d3e2
3 changed files with 105 additions and 108 deletions
--- a/jedi/parser/init.py
+++ b/jedi/parser/init.py
@@ -27,9 +27,6 @@ from jedi.parser import token as token_pr
 from jedi.parser import tokenizer as tokenize


-FLOWS = ['if', 'else', 'elif', 'while', 'with', 'try', 'except', 'finally']
-
-
 class Parser(object):
    """
    This class is used to parse a Python file, it then divides them into a
@@ -61,7 +58,7 @@ class Parser(object):

        source = source + '\n'  # end with \n, because the parser needs it
        buf = StringIO(source)
-        self._gen = NoErrorTokenizer(buf.readline, offset, is_fast_parser)
+        self._gen = tokenize.NoErrorTokenizer(buf.readline, offset, is_fast_parser)
        self.top_module = top_module or self.module
        try:
            self._parse()
@@ -694,104 +691,3 @@ class Parser(object):
                                  self.start_pos[0])
                continue
            self.no_docstr = False
-
-
-class NoErrorTokenizer(object):
-    def __init__(self, readline, offset=(0, 0), is_fast_parser=False):
-        self.readline = readline
-        self.gen = tokenize.generate_tokens(readline)
-        self.offset = offset
-        self.closed = False
-        self.is_first = True
-        self.push_backs = []
-
-        # fast parser options
-        self.is_fast_parser = is_fast_parser
-        self.current = self.previous = [None, None, (0, 0), (0, 0), '']
-        self.in_flow = False
-        self.new_indent = False
-        self.parser_indent = self.old_parser_indent = 0
-        self.is_decorator = False
-        self.first_stmt = True
-
-    def push_last_back(self):
-        self.push_backs.append(self.current)
-
-    def next(self):
-        """ Python 2 Compatibility """
-        return self.__next__()
-
-    def __next__(self):
-        if self.closed:
-            raise common.MultiLevelStopIteration()
-        if self.push_backs:
-            return self.push_backs.pop(0)
-
-        self.last_previous = self.previous
-        self.previous = self.current
-        self.current = next(self.gen)
-        c = list(self.current)
-
-        if c[0] == tokenize.ENDMARKER:
-            self.current = self.previous
-            self.previous = self.last_previous
-            raise common.MultiLevelStopIteration()
-
-        # this is exactly the same check as in fast_parser, but this time with
-        # tokenize and therefore precise.
-        breaks = ['def', 'class', '@']
-
-        if self.is_first:
-            c[2] = self.offset[0] + c[2][0], self.offset[1] + c[2][1]
-            c[3] = self.offset[0] + c[3][0], self.offset[1] + c[3][1]
-            self.is_first = False
-        else:
-            c[2] = self.offset[0] + c[2][0], c[2][1]
-            c[3] = self.offset[0] + c[3][0], c[3][1]
-        self.current = c
-
-        def close():
-            if not self.first_stmt:
-                self.closed = True
-                raise common.MultiLevelStopIteration()
-        # ignore indents/comments
-        if self.is_fast_parser \
-                and self.previous[0] in (tokenize.INDENT, tokenize.NL, None,
-                                         tokenize.NEWLINE, tokenize.DEDENT) \
-                and c[0] not in (
-                    tokenize.COMMENT,
-                    tokenize.INDENT,
-                    tokenize.NL,
-                    tokenize.NEWLINE,
-                    tokenize.DEDENT
-                ):
-            # print c, tokenize.tok_name[c[0]]
-
-            tok = c[1]
-            indent = c[2][1]
-            if indent < self.parser_indent:  # -> dedent
-                self.parser_indent = indent
-                self.new_indent = False
-                if not self.in_flow or indent < self.old_parser_indent:
-                    close()
-                self.in_flow = False
-            elif self.new_indent:
-                self.parser_indent = indent
-                self.new_indent = False
-
-            if not self.in_flow:
-                if tok in FLOWS or tok in breaks:
-                    self.in_flow = tok in FLOWS
-                    if not self.is_decorator and not self.in_flow:
-                        close()
-                    self.is_decorator = '@' == tok
-                    if not self.is_decorator:
-                        self.old_parser_indent = self.parser_indent
-                        self.parser_indent += 1  # new scope: must be higher
-                        self.new_indent = True
-
-            if tok != '@':
-                if self.first_stmt and not self.new_indent:
-                    self.parser_indent = indent
-                self.first_stmt = False
-        return c
--- a/jedi/parser/fast.py
+++ b/jedi/parser/fast.py
@@ -9,8 +9,8 @@ from jedi._compatibility import use_metaclass
 from jedi import settings
 from jedi.parser import Parser
 from jedi.parser import representation as pr
+from jedi.parser import tokenizer as tokenize
 from jedi import cache
-from jedi import common


 SCOPE_CONTENTS = ['asserts', 'subscopes', 'imports', 'statements', 'returns']
@@ -256,7 +256,7 @@ class FastParser(use_metaclass(CachedFastParser)):
                    parts.append(txt)
                current_lines[:] = []

-        r_keyword = '^[ \t]*(def|class|@|%s)' % '|'.join(common.FLOWS)
+        r_keyword = '^[ \t]*(def|class|@|%s)' % '|'.join(tokenize.FLOWS)

        self._lines = code.splitlines()
        current_lines = []
@@ -291,7 +291,7 @@ class FastParser(use_metaclass(CachedFastParser)):
            if not in_flow:
                m = re.match(r_keyword, l)
                if m:
-                    in_flow = m.group(1) in common.FLOWS
+                    in_flow = m.group(1) in tokenize.FLOWS
                    if not is_decorator and not in_flow:
                        add_part()
                        add_to_last = False
--- a/jedi/parser/tokenizer.py
+++ b/jedi/parser/tokenizer.py
@@ -15,6 +15,8 @@ from token import *
 import collections
 cookie_re = re.compile("coding[:=]\s*([-\w.]+)")

+from jedi import common
+
 namechars = string.ascii_letters + '_'


@@ -284,3 +286,102 @@ def generate_tokens(readline):
    for indent in indents[1:]:                 # pop remaining indent levels
        yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '')
    yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '')
+
+
+# From here on we have custom stuff (everything before was originally Python
+# internal code).
+FLOWS = ['if', 'else', 'elif', 'while', 'with', 'try', 'except', 'finally']
+
+
+class NoErrorTokenizer(object):
+    def __init__(self, readline, offset=(0, 0), is_fast_parser=False):
+        self.readline = readline
+        self.gen = generate_tokens(readline)
+        self.offset = offset
+        self.closed = False
+        self.is_first = True
+        self.push_backs = []
+
+        # fast parser options
+        self.is_fast_parser = is_fast_parser
+        self.current = self.previous = [None, None, (0, 0), (0, 0), '']
+        self.in_flow = False
+        self.new_indent = False
+        self.parser_indent = self.old_parser_indent = 0
+        self.is_decorator = False
+        self.first_stmt = True
+
+    def push_last_back(self):
+        self.push_backs.append(self.current)
+
+    def next(self):
+        """ Python 2 Compatibility """
+        return self.__next__()
+
+    def __next__(self):
+        if self.closed:
+            raise common.MultiLevelStopIteration()
+        if self.push_backs:
+            return self.push_backs.pop(0)
+
+        self.last_previous = self.previous
+        self.previous = self.current
+        self.current = next(self.gen)
+        c = list(self.current)
+
+        if c[0] == ENDMARKER:
+            self.current = self.previous
+            self.previous = self.last_previous
+            raise common.MultiLevelStopIteration()
+
+        # this is exactly the same check as in fast_parser, but this time with
+        # tokenize and therefore precise.
+        breaks = ['def', 'class', '@']
+
+        if self.is_first:
+            c[2] = self.offset[0] + c[2][0], self.offset[1] + c[2][1]
+            c[3] = self.offset[0] + c[3][0], self.offset[1] + c[3][1]
+            self.is_first = False
+        else:
+            c[2] = self.offset[0] + c[2][0], c[2][1]
+            c[3] = self.offset[0] + c[3][0], c[3][1]
+        self.current = c
+
+        def close():
+            if not self.first_stmt:
+                self.closed = True
+                raise common.MultiLevelStopIteration()
+        # ignore indents/comments
+        if self.is_fast_parser \
+                and self.previous[0] in (INDENT, NL, None, NEWLINE, DEDENT) \
+                and c[0] not in (COMMENT, INDENT, NL, NEWLINE, DEDENT):
+            # print c, tok_name[c[0]]
+
+            tok = c[1]
+            indent = c[2][1]
+            if indent < self.parser_indent:  # -> dedent
+                self.parser_indent = indent
+                self.new_indent = False
+                if not self.in_flow or indent < self.old_parser_indent:
+                    close()
+                self.in_flow = False
+            elif self.new_indent:
+                self.parser_indent = indent
+                self.new_indent = False
+
+            if not self.in_flow:
+                if tok in FLOWS or tok in breaks:
+                    self.in_flow = tok in FLOWS
+                    if not self.is_decorator and not self.in_flow:
+                        close()
+                    self.is_decorator = '@' == tok
+                    if not self.is_decorator:
+                        self.old_parser_indent = self.parser_indent
+                        self.parser_indent += 1  # new scope: must be higher
+                        self.new_indent = True
+
+            if tok != '@':
+                if self.first_stmt and not self.new_indent:
+                    self.parser_indent = indent
+                self.first_stmt = False
+        return c