From e115689b7f6fc0b241c36ec028061e19702d5140 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Sun, 5 Jan 2014 13:25:11 +0100 Subject: [PATCH] move NoErrorTokenizer to the parser, where it more or less belongs. --- jedi/common.py | 107 +--------------------------------------- jedi/parser/__init__.py | 107 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 106 insertions(+), 108 deletions(-) diff --git a/jedi/common.py b/jedi/common.py index 54974344..9aa78dfe 100644 --- a/jedi/common.py +++ b/jedi/common.py @@ -5,13 +5,9 @@ import functools import re from ast import literal_eval -from jedi._compatibility import unicode -from jedi.parser import tokenizer as tokenize -from jedi._compatibility import next, reraise +from jedi._compatibility import unicode, next, reraise from jedi import settings -FLOWS = ['if', 'else', 'elif', 'while', 'with', 'try', 'except', 'finally'] - class MultiLevelStopIteration(Exception): """ @@ -87,107 +83,6 @@ class PushBackIterator(object): return self.current -class NoErrorTokenizer(object): - def __init__(self, readline, offset=(0, 0), is_fast_parser=False): - self.readline = readline - self.gen = tokenize.generate_tokens(readline) - self.offset = offset - self.closed = False - self.is_first = True - self.push_backs = [] - - # fast parser options - self.is_fast_parser = is_fast_parser - self.current = self.previous = [None, None, (0, 0), (0, 0), ''] - self.in_flow = False - self.new_indent = False - self.parser_indent = self.old_parser_indent = 0 - self.is_decorator = False - self.first_stmt = True - - def push_last_back(self): - self.push_backs.append(self.current) - - def next(self): - """ Python 2 Compatibility """ - return self.__next__() - - def __next__(self): - if self.closed: - raise MultiLevelStopIteration() - if self.push_backs: - return self.push_backs.pop(0) - - self.last_previous = self.previous - self.previous = self.current - self.current = next(self.gen) - c = list(self.current) - - if c[0] == tokenize.ENDMARKER: - self.current = self.previous - self.previous = self.last_previous - raise MultiLevelStopIteration() - - # this is exactly the same check as in fast_parser, but this time with - # tokenize and therefore precise. - breaks = ['def', 'class', '@'] - - if self.is_first: - c[2] = self.offset[0] + c[2][0], self.offset[1] + c[2][1] - c[3] = self.offset[0] + c[3][0], self.offset[1] + c[3][1] - self.is_first = False - else: - c[2] = self.offset[0] + c[2][0], c[2][1] - c[3] = self.offset[0] + c[3][0], c[3][1] - self.current = c - - def close(): - if not self.first_stmt: - self.closed = True - raise MultiLevelStopIteration() - # ignore indents/comments - if self.is_fast_parser \ - and self.previous[0] in (tokenize.INDENT, tokenize.NL, None, - tokenize.NEWLINE, tokenize.DEDENT) \ - and c[0] not in ( - tokenize.COMMENT, - tokenize.INDENT, - tokenize.NL, - tokenize.NEWLINE, - tokenize.DEDENT - ): - # print c, tokenize.tok_name[c[0]] - - tok = c[1] - indent = c[2][1] - if indent < self.parser_indent: # -> dedent - self.parser_indent = indent - self.new_indent = False - if not self.in_flow or indent < self.old_parser_indent: - close() - self.in_flow = False - elif self.new_indent: - self.parser_indent = indent - self.new_indent = False - - if not self.in_flow: - if tok in FLOWS or tok in breaks: - self.in_flow = tok in FLOWS - if not self.is_decorator and not self.in_flow: - close() - self.is_decorator = '@' == tok - if not self.is_decorator: - self.old_parser_indent = self.parser_indent - self.parser_indent += 1 # new scope: must be higher - self.new_indent = True - - if tok != '@': - if self.first_stmt and not self.new_indent: - self.parser_indent = indent - self.first_stmt = False - return c - - @contextlib.contextmanager def scale_speed_settings(factor): a = settings.max_executions diff --git a/jedi/parser/__init__.py b/jedi/parser/__init__.py index 93b6ecd0..5801d369 100644 --- a/jedi/parser/__init__.py +++ b/jedi/parser/__init__.py @@ -27,6 +27,9 @@ from jedi.parser import token as token_pr from jedi.parser import tokenizer as tokenize +FLOWS = ['if', 'else', 'elif', 'while', 'with', 'try', 'except', 'finally'] + + class Parser(object): """ This class is used to parse a Python file, it then divides them into a @@ -58,8 +61,7 @@ class Parser(object): source = source + '\n' # end with \n, because the parser needs it buf = StringIO(source) - self._gen = common.NoErrorTokenizer(buf.readline, offset, - is_fast_parser) + self._gen = NoErrorTokenizer(buf.readline, offset, is_fast_parser) self.top_module = top_module or self.module try: self._parse() @@ -692,3 +694,104 @@ class Parser(object): self.start_pos[0]) continue self.no_docstr = False + + +class NoErrorTokenizer(object): + def __init__(self, readline, offset=(0, 0), is_fast_parser=False): + self.readline = readline + self.gen = tokenize.generate_tokens(readline) + self.offset = offset + self.closed = False + self.is_first = True + self.push_backs = [] + + # fast parser options + self.is_fast_parser = is_fast_parser + self.current = self.previous = [None, None, (0, 0), (0, 0), ''] + self.in_flow = False + self.new_indent = False + self.parser_indent = self.old_parser_indent = 0 + self.is_decorator = False + self.first_stmt = True + + def push_last_back(self): + self.push_backs.append(self.current) + + def next(self): + """ Python 2 Compatibility """ + return self.__next__() + + def __next__(self): + if self.closed: + raise common.MultiLevelStopIteration() + if self.push_backs: + return self.push_backs.pop(0) + + self.last_previous = self.previous + self.previous = self.current + self.current = next(self.gen) + c = list(self.current) + + if c[0] == tokenize.ENDMARKER: + self.current = self.previous + self.previous = self.last_previous + raise common.MultiLevelStopIteration() + + # this is exactly the same check as in fast_parser, but this time with + # tokenize and therefore precise. + breaks = ['def', 'class', '@'] + + if self.is_first: + c[2] = self.offset[0] + c[2][0], self.offset[1] + c[2][1] + c[3] = self.offset[0] + c[3][0], self.offset[1] + c[3][1] + self.is_first = False + else: + c[2] = self.offset[0] + c[2][0], c[2][1] + c[3] = self.offset[0] + c[3][0], c[3][1] + self.current = c + + def close(): + if not self.first_stmt: + self.closed = True + raise common.MultiLevelStopIteration() + # ignore indents/comments + if self.is_fast_parser \ + and self.previous[0] in (tokenize.INDENT, tokenize.NL, None, + tokenize.NEWLINE, tokenize.DEDENT) \ + and c[0] not in ( + tokenize.COMMENT, + tokenize.INDENT, + tokenize.NL, + tokenize.NEWLINE, + tokenize.DEDENT + ): + # print c, tokenize.tok_name[c[0]] + + tok = c[1] + indent = c[2][1] + if indent < self.parser_indent: # -> dedent + self.parser_indent = indent + self.new_indent = False + if not self.in_flow or indent < self.old_parser_indent: + close() + self.in_flow = False + elif self.new_indent: + self.parser_indent = indent + self.new_indent = False + + if not self.in_flow: + if tok in FLOWS or tok in breaks: + self.in_flow = tok in FLOWS + if not self.is_decorator and not self.in_flow: + close() + self.is_decorator = '@' == tok + if not self.is_decorator: + self.old_parser_indent = self.parser_indent + self.parser_indent += 1 # new scope: must be higher + self.new_indent = True + + if tok != '@': + if self.first_stmt and not self.new_indent: + self.parser_indent = indent + self.first_stmt = False + return c