From 7db090a48a1ff25c48acfae3ea4c017a9484c348 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Mon, 24 Feb 2014 11:05:31 +0100 Subject: [PATCH] moved NoErrorTokenizer to fast.FastTokenizer --- jedi/api/__init__.py | 3 +- jedi/parser/fast.py | 81 ++++++++++++++++++++++++++++++++++++++- jedi/parser/tokenize.py | 85 +++-------------------------------------- 3 files changed, 87 insertions(+), 82 deletions(-) diff --git a/jedi/api/__init__.py b/jedi/api/__init__.py index 42dd09a2..ef51298c 100644 --- a/jedi/api/__init__.py +++ b/jedi/api/__init__.py @@ -15,7 +15,7 @@ from itertools import chain from jedi._compatibility import next, unicode, builtins from jedi.parser import Parser -from jedi.parser.tokenize import source_tokens, NoErrorTokenizer +from jedi.parser.tokenize import source_tokens from jedi.parser import representation as pr from jedi.parser.user_context import UserContext, UserContextParser from jedi import debug @@ -229,7 +229,6 @@ class Script(object): def _get_under_cursor_stmt(self, cursor_txt): tokenizer = source_tokens(cursor_txt, self._pos[0] - 1) - tokenizer = NoErrorTokenizer(cursor_txt, self._pos[0] - 1) r = Parser(cursor_txt, no_docstr=True, tokenizer=tokenizer) try: stmt = r.module.statements[0] diff --git a/jedi/parser/fast.py b/jedi/parser/fast.py index ced65ce8..c5ff70ea 100644 --- a/jedi/parser/fast.py +++ b/jedi/parser/fast.py @@ -12,6 +12,8 @@ from jedi.parser import Parser from jedi.parser import representation as pr from jedi.parser import tokenize from jedi import cache +from jedi.parser.tokenize import (source_tokens, TokenInfo, FLOWS, NEWLINE, + COMMENT, ENDMARKER) class Module(pr.Simple, pr.Module): @@ -362,7 +364,7 @@ class FastParser(use_metaclass(CachedFastParser)): if nodes[index].code != code: raise ValueError() except ValueError: - tokenizer = tokenize.NoErrorTokenizer(parser_code, line_offset, True) + tokenizer = FastTokenizer(parser_code, line_offset, True) p = Parser(parser_code, self.module_path, tokenizer=tokenizer, top_module=self.module, no_docstr=no_docstr, is_fast=True, offset=line_offset) @@ -382,3 +384,80 @@ class FastParser(use_metaclass(CachedFastParser)): self.module.reset_caches() if self.current_node is not None: self.current_node.reset_contents() + + +class FastTokenizer(object): + """ + Breaks when certain conditions are met, i.e. a new function or class opens. + """ + def __init__(self, source, line_offset=0, is_fast_parser=False): + self.source = source + self.gen = source_tokens(source, line_offset) + self.closed = False + + # fast parser options + self.is_fast_parser = is_fast_parser + self.current = self.previous = TokenInfo(None, None, (0, 0), (0, 0)) + self.in_flow = False + self.new_indent = False + self.parser_indent = self.old_parser_indent = 0 + self.is_decorator = False + self.first_stmt = True + + def next(self): + """ Python 2 Compatibility """ + return self.__next__() + + def __next__(self): + if self.closed: + raise common.MultiLevelStopIteration() + + current = next(self.gen) + if current[0] == ENDMARKER: + raise common.MultiLevelStopIteration() + + self.previous = self.current + self.current = current + + # this is exactly the same check as in fast_parser, but this time with + # tokenize and therefore precise. + breaks = ['def', 'class', '@'] + + def close(): + if not self.first_stmt: + self.closed = True + raise common.MultiLevelStopIteration() + # ignore comments/ newlines + if self.is_fast_parser \ + and self.previous[0] in (None, NEWLINE) \ + and current[0] not in (COMMENT, NEWLINE): + # print c, tok_name[c[0]] + + tok = current[1] + indent = current[2][1] + if indent < self.parser_indent: # -> dedent + self.parser_indent = indent + self.new_indent = False + if not self.in_flow or indent < self.old_parser_indent: + close() + self.in_flow = False + elif self.new_indent: + self.parser_indent = indent + self.new_indent = False + + if not self.in_flow: + if tok in FLOWS or tok in breaks: + self.in_flow = tok in FLOWS + if not self.is_decorator and not self.in_flow: + close() + self.is_decorator = '@' == tok + if not self.is_decorator: + self.old_parser_indent = self.parser_indent + self.parser_indent += 1 # new scope: must be higher + self.new_indent = True + + if tok != '@': + if self.first_stmt and not self.new_indent: + self.parser_indent = indent + self.first_stmt = False + return current diff --git a/jedi/parser/tokenize.py b/jedi/parser/tokenize.py index ace07698..44cb22d0 100644 --- a/jedi/parser/tokenize.py +++ b/jedi/parser/tokenize.py @@ -19,6 +19,12 @@ cookie_re = re.compile("coding[:=]\s*([-\w.]+)") from jedi import common + +# From here on we have custom stuff (everything before was originally Python +# internal code). +FLOWS = ['if', 'else', 'elif', 'while', 'with', 'try', 'except', 'finally'] + + namechars = string.ascii_letters + '_' @@ -230,82 +236,3 @@ def generate_tokens(readline, line_offset=0): yield TokenInfo(OP, token, spos, epos) yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0)) - - -# From here on we have custom stuff (everything before was originally Python -# internal code). -FLOWS = ['if', 'else', 'elif', 'while', 'with', 'try', 'except', 'finally'] - - -class NoErrorTokenizer(object): - def __init__(self, source, line_offset=0, is_fast_parser=False): - self.source = source - self.gen = source_tokens(source, line_offset) - self.closed = False - - # fast parser options - self.is_fast_parser = is_fast_parser - self.current = self.previous = TokenInfo(None, None, (0, 0), (0, 0)) - self.in_flow = False - self.new_indent = False - self.parser_indent = self.old_parser_indent = 0 - self.is_decorator = False - self.first_stmt = True - - def next(self): - """ Python 2 Compatibility """ - return self.__next__() - - def __next__(self): - if self.closed: - raise common.MultiLevelStopIteration() - - current = next(self.gen) - if current[0] == ENDMARKER: - raise common.MultiLevelStopIteration() - - self.previous = self.current - self.current = current - - # this is exactly the same check as in fast_parser, but this time with - # tokenize and therefore precise. - breaks = ['def', 'class', '@'] - - def close(): - if not self.first_stmt: - self.closed = True - raise common.MultiLevelStopIteration() - # ignore comments/ newlines - if self.is_fast_parser \ - and self.previous[0] in (None, NEWLINE) \ - and current[0] not in (COMMENT, NEWLINE): - # print c, tok_name[c[0]] - - tok = current[1] - indent = current[2][1] - if indent < self.parser_indent: # -> dedent - self.parser_indent = indent - self.new_indent = False - if not self.in_flow or indent < self.old_parser_indent: - close() - self.in_flow = False - elif self.new_indent: - self.parser_indent = indent - self.new_indent = False - - if not self.in_flow: - if tok in FLOWS or tok in breaks: - self.in_flow = tok in FLOWS - if not self.is_decorator and not self.in_flow: - close() - self.is_decorator = '@' == tok - if not self.is_decorator: - self.old_parser_indent = self.parser_indent - self.parser_indent += 1 # new scope: must be higher - self.new_indent = True - - if tok != '@': - if self.first_stmt and not self.new_indent: - self.parser_indent = indent - self.first_stmt = False - return current