diff --git a/jedi/parser/__init__.py b/jedi/parser/__init__.py index 5801d369..afb438ce 100644 --- a/jedi/parser/__init__.py +++ b/jedi/parser/__init__.py @@ -27,9 +27,6 @@ from jedi.parser import token as token_pr from jedi.parser import tokenizer as tokenize -FLOWS = ['if', 'else', 'elif', 'while', 'with', 'try', 'except', 'finally'] - - class Parser(object): """ This class is used to parse a Python file, it then divides them into a @@ -61,7 +58,7 @@ class Parser(object): source = source + '\n' # end with \n, because the parser needs it buf = StringIO(source) - self._gen = NoErrorTokenizer(buf.readline, offset, is_fast_parser) + self._gen = tokenize.NoErrorTokenizer(buf.readline, offset, is_fast_parser) self.top_module = top_module or self.module try: self._parse() @@ -694,104 +691,3 @@ class Parser(object): self.start_pos[0]) continue self.no_docstr = False - - -class NoErrorTokenizer(object): - def __init__(self, readline, offset=(0, 0), is_fast_parser=False): - self.readline = readline - self.gen = tokenize.generate_tokens(readline) - self.offset = offset - self.closed = False - self.is_first = True - self.push_backs = [] - - # fast parser options - self.is_fast_parser = is_fast_parser - self.current = self.previous = [None, None, (0, 0), (0, 0), ''] - self.in_flow = False - self.new_indent = False - self.parser_indent = self.old_parser_indent = 0 - self.is_decorator = False - self.first_stmt = True - - def push_last_back(self): - self.push_backs.append(self.current) - - def next(self): - """ Python 2 Compatibility """ - return self.__next__() - - def __next__(self): - if self.closed: - raise common.MultiLevelStopIteration() - if self.push_backs: - return self.push_backs.pop(0) - - self.last_previous = self.previous - self.previous = self.current - self.current = next(self.gen) - c = list(self.current) - - if c[0] == tokenize.ENDMARKER: - self.current = self.previous - self.previous = self.last_previous - raise common.MultiLevelStopIteration() - - # this is exactly the same check as in fast_parser, but this time with - # tokenize and therefore precise. - breaks = ['def', 'class', '@'] - - if self.is_first: - c[2] = self.offset[0] + c[2][0], self.offset[1] + c[2][1] - c[3] = self.offset[0] + c[3][0], self.offset[1] + c[3][1] - self.is_first = False - else: - c[2] = self.offset[0] + c[2][0], c[2][1] - c[3] = self.offset[0] + c[3][0], c[3][1] - self.current = c - - def close(): - if not self.first_stmt: - self.closed = True - raise common.MultiLevelStopIteration() - # ignore indents/comments - if self.is_fast_parser \ - and self.previous[0] in (tokenize.INDENT, tokenize.NL, None, - tokenize.NEWLINE, tokenize.DEDENT) \ - and c[0] not in ( - tokenize.COMMENT, - tokenize.INDENT, - tokenize.NL, - tokenize.NEWLINE, - tokenize.DEDENT - ): - # print c, tokenize.tok_name[c[0]] - - tok = c[1] - indent = c[2][1] - if indent < self.parser_indent: # -> dedent - self.parser_indent = indent - self.new_indent = False - if not self.in_flow or indent < self.old_parser_indent: - close() - self.in_flow = False - elif self.new_indent: - self.parser_indent = indent - self.new_indent = False - - if not self.in_flow: - if tok in FLOWS or tok in breaks: - self.in_flow = tok in FLOWS - if not self.is_decorator and not self.in_flow: - close() - self.is_decorator = '@' == tok - if not self.is_decorator: - self.old_parser_indent = self.parser_indent - self.parser_indent += 1 # new scope: must be higher - self.new_indent = True - - if tok != '@': - if self.first_stmt and not self.new_indent: - self.parser_indent = indent - self.first_stmt = False - return c diff --git a/jedi/parser/fast.py b/jedi/parser/fast.py index a2a23370..9935acc3 100644 --- a/jedi/parser/fast.py +++ b/jedi/parser/fast.py @@ -9,8 +9,8 @@ from jedi._compatibility import use_metaclass from jedi import settings from jedi.parser import Parser from jedi.parser import representation as pr +from jedi.parser import tokenizer as tokenize from jedi import cache -from jedi import common SCOPE_CONTENTS = ['asserts', 'subscopes', 'imports', 'statements', 'returns'] @@ -256,7 +256,7 @@ class FastParser(use_metaclass(CachedFastParser)): parts.append(txt) current_lines[:] = [] - r_keyword = '^[ \t]*(def|class|@|%s)' % '|'.join(common.FLOWS) + r_keyword = '^[ \t]*(def|class|@|%s)' % '|'.join(tokenize.FLOWS) self._lines = code.splitlines() current_lines = [] @@ -291,7 +291,7 @@ class FastParser(use_metaclass(CachedFastParser)): if not in_flow: m = re.match(r_keyword, l) if m: - in_flow = m.group(1) in common.FLOWS + in_flow = m.group(1) in tokenize.FLOWS if not is_decorator and not in_flow: add_part() add_to_last = False diff --git a/jedi/parser/tokenizer.py b/jedi/parser/tokenizer.py index 689a3e43..36e4da84 100644 --- a/jedi/parser/tokenizer.py +++ b/jedi/parser/tokenizer.py @@ -15,6 +15,8 @@ from token import * import collections cookie_re = re.compile("coding[:=]\s*([-\w.]+)") +from jedi import common + namechars = string.ascii_letters + '_' @@ -284,3 +286,102 @@ def generate_tokens(readline): for indent in indents[1:]: # pop remaining indent levels yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '') yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '') + + +# From here on we have custom stuff (everything before was originally Python +# internal code). +FLOWS = ['if', 'else', 'elif', 'while', 'with', 'try', 'except', 'finally'] + + +class NoErrorTokenizer(object): + def __init__(self, readline, offset=(0, 0), is_fast_parser=False): + self.readline = readline + self.gen = generate_tokens(readline) + self.offset = offset + self.closed = False + self.is_first = True + self.push_backs = [] + + # fast parser options + self.is_fast_parser = is_fast_parser + self.current = self.previous = [None, None, (0, 0), (0, 0), ''] + self.in_flow = False + self.new_indent = False + self.parser_indent = self.old_parser_indent = 0 + self.is_decorator = False + self.first_stmt = True + + def push_last_back(self): + self.push_backs.append(self.current) + + def next(self): + """ Python 2 Compatibility """ + return self.__next__() + + def __next__(self): + if self.closed: + raise common.MultiLevelStopIteration() + if self.push_backs: + return self.push_backs.pop(0) + + self.last_previous = self.previous + self.previous = self.current + self.current = next(self.gen) + c = list(self.current) + + if c[0] == ENDMARKER: + self.current = self.previous + self.previous = self.last_previous + raise common.MultiLevelStopIteration() + + # this is exactly the same check as in fast_parser, but this time with + # tokenize and therefore precise. + breaks = ['def', 'class', '@'] + + if self.is_first: + c[2] = self.offset[0] + c[2][0], self.offset[1] + c[2][1] + c[3] = self.offset[0] + c[3][0], self.offset[1] + c[3][1] + self.is_first = False + else: + c[2] = self.offset[0] + c[2][0], c[2][1] + c[3] = self.offset[0] + c[3][0], c[3][1] + self.current = c + + def close(): + if not self.first_stmt: + self.closed = True + raise common.MultiLevelStopIteration() + # ignore indents/comments + if self.is_fast_parser \ + and self.previous[0] in (INDENT, NL, None, NEWLINE, DEDENT) \ + and c[0] not in (COMMENT, INDENT, NL, NEWLINE, DEDENT): + # print c, tok_name[c[0]] + + tok = c[1] + indent = c[2][1] + if indent < self.parser_indent: # -> dedent + self.parser_indent = indent + self.new_indent = False + if not self.in_flow or indent < self.old_parser_indent: + close() + self.in_flow = False + elif self.new_indent: + self.parser_indent = indent + self.new_indent = False + + if not self.in_flow: + if tok in FLOWS or tok in breaks: + self.in_flow = tok in FLOWS + if not self.is_decorator and not self.in_flow: + close() + self.is_decorator = '@' == tok + if not self.is_decorator: + self.old_parser_indent = self.parser_indent + self.parser_indent += 1 # new scope: must be higher + self.new_indent = True + + if tok != '@': + if self.first_stmt and not self.new_indent: + self.parser_indent = indent + self.first_stmt = False + return c