moved NoErrorTokenizer to fast.FastTokenizer

This commit is contained in:
Dave Halter
2014-02-24 11:05:31 +01:00
parent 9257062910
commit 7db090a48a
3 changed files with 87 additions and 82 deletions

View File

@@ -15,7 +15,7 @@ from itertools import chain
from jedi._compatibility import next, unicode, builtins from jedi._compatibility import next, unicode, builtins
from jedi.parser import Parser from jedi.parser import Parser
from jedi.parser.tokenize import source_tokens, NoErrorTokenizer from jedi.parser.tokenize import source_tokens
from jedi.parser import representation as pr from jedi.parser import representation as pr
from jedi.parser.user_context import UserContext, UserContextParser from jedi.parser.user_context import UserContext, UserContextParser
from jedi import debug from jedi import debug
@@ -229,7 +229,6 @@ class Script(object):
def _get_under_cursor_stmt(self, cursor_txt): def _get_under_cursor_stmt(self, cursor_txt):
tokenizer = source_tokens(cursor_txt, self._pos[0] - 1) tokenizer = source_tokens(cursor_txt, self._pos[0] - 1)
tokenizer = NoErrorTokenizer(cursor_txt, self._pos[0] - 1)
r = Parser(cursor_txt, no_docstr=True, tokenizer=tokenizer) r = Parser(cursor_txt, no_docstr=True, tokenizer=tokenizer)
try: try:
stmt = r.module.statements[0] stmt = r.module.statements[0]

View File

@@ -12,6 +12,8 @@ from jedi.parser import Parser
from jedi.parser import representation as pr from jedi.parser import representation as pr
from jedi.parser import tokenize from jedi.parser import tokenize
from jedi import cache from jedi import cache
from jedi.parser.tokenize import (source_tokens, TokenInfo, FLOWS, NEWLINE,
COMMENT, ENDMARKER)
class Module(pr.Simple, pr.Module): class Module(pr.Simple, pr.Module):
@@ -362,7 +364,7 @@ class FastParser(use_metaclass(CachedFastParser)):
if nodes[index].code != code: if nodes[index].code != code:
raise ValueError() raise ValueError()
except ValueError: except ValueError:
tokenizer = tokenize.NoErrorTokenizer(parser_code, line_offset, True) tokenizer = FastTokenizer(parser_code, line_offset, True)
p = Parser(parser_code, self.module_path, tokenizer=tokenizer, p = Parser(parser_code, self.module_path, tokenizer=tokenizer,
top_module=self.module, no_docstr=no_docstr, top_module=self.module, no_docstr=no_docstr,
is_fast=True, offset=line_offset) is_fast=True, offset=line_offset)
@@ -382,3 +384,80 @@ class FastParser(use_metaclass(CachedFastParser)):
self.module.reset_caches() self.module.reset_caches()
if self.current_node is not None: if self.current_node is not None:
self.current_node.reset_contents() self.current_node.reset_contents()
class FastTokenizer(object):
"""
Breaks when certain conditions are met, i.e. a new function or class opens.
"""
def __init__(self, source, line_offset=0, is_fast_parser=False):
self.source = source
self.gen = source_tokens(source, line_offset)
self.closed = False
# fast parser options
self.is_fast_parser = is_fast_parser
self.current = self.previous = TokenInfo(None, None, (0, 0), (0, 0))
self.in_flow = False
self.new_indent = False
self.parser_indent = self.old_parser_indent = 0
self.is_decorator = False
self.first_stmt = True
def next(self):
""" Python 2 Compatibility """
return self.__next__()
def __next__(self):
if self.closed:
raise common.MultiLevelStopIteration()
current = next(self.gen)
if current[0] == ENDMARKER:
raise common.MultiLevelStopIteration()
self.previous = self.current
self.current = current
# this is exactly the same check as in fast_parser, but this time with
# tokenize and therefore precise.
breaks = ['def', 'class', '@']
def close():
if not self.first_stmt:
self.closed = True
raise common.MultiLevelStopIteration()
# ignore comments/ newlines
if self.is_fast_parser \
and self.previous[0] in (None, NEWLINE) \
and current[0] not in (COMMENT, NEWLINE):
# print c, tok_name[c[0]]
tok = current[1]
indent = current[2][1]
if indent < self.parser_indent: # -> dedent
self.parser_indent = indent
self.new_indent = False
if not self.in_flow or indent < self.old_parser_indent:
close()
self.in_flow = False
elif self.new_indent:
self.parser_indent = indent
self.new_indent = False
if not self.in_flow:
if tok in FLOWS or tok in breaks:
self.in_flow = tok in FLOWS
if not self.is_decorator and not self.in_flow:
close()
self.is_decorator = '@' == tok
if not self.is_decorator:
self.old_parser_indent = self.parser_indent
self.parser_indent += 1 # new scope: must be higher
self.new_indent = True
if tok != '@':
if self.first_stmt and not self.new_indent:
self.parser_indent = indent
self.first_stmt = False
return current

View File

@@ -19,6 +19,12 @@ cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
from jedi import common from jedi import common
# From here on we have custom stuff (everything before was originally Python
# internal code).
FLOWS = ['if', 'else', 'elif', 'while', 'with', 'try', 'except', 'finally']
namechars = string.ascii_letters + '_' namechars = string.ascii_letters + '_'
@@ -230,82 +236,3 @@ def generate_tokens(readline, line_offset=0):
yield TokenInfo(OP, token, spos, epos) yield TokenInfo(OP, token, spos, epos)
yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0)) yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0))
# From here on we have custom stuff (everything before was originally Python
# internal code).
FLOWS = ['if', 'else', 'elif', 'while', 'with', 'try', 'except', 'finally']
class NoErrorTokenizer(object):
def __init__(self, source, line_offset=0, is_fast_parser=False):
self.source = source
self.gen = source_tokens(source, line_offset)
self.closed = False
# fast parser options
self.is_fast_parser = is_fast_parser
self.current = self.previous = TokenInfo(None, None, (0, 0), (0, 0))
self.in_flow = False
self.new_indent = False
self.parser_indent = self.old_parser_indent = 0
self.is_decorator = False
self.first_stmt = True
def next(self):
""" Python 2 Compatibility """
return self.__next__()
def __next__(self):
if self.closed:
raise common.MultiLevelStopIteration()
current = next(self.gen)
if current[0] == ENDMARKER:
raise common.MultiLevelStopIteration()
self.previous = self.current
self.current = current
# this is exactly the same check as in fast_parser, but this time with
# tokenize and therefore precise.
breaks = ['def', 'class', '@']
def close():
if not self.first_stmt:
self.closed = True
raise common.MultiLevelStopIteration()
# ignore comments/ newlines
if self.is_fast_parser \
and self.previous[0] in (None, NEWLINE) \
and current[0] not in (COMMENT, NEWLINE):
# print c, tok_name[c[0]]
tok = current[1]
indent = current[2][1]
if indent < self.parser_indent: # -> dedent
self.parser_indent = indent
self.new_indent = False
if not self.in_flow or indent < self.old_parser_indent:
close()
self.in_flow = False
elif self.new_indent:
self.parser_indent = indent
self.new_indent = False
if not self.in_flow:
if tok in FLOWS or tok in breaks:
self.in_flow = tok in FLOWS
if not self.is_decorator and not self.in_flow:
close()
self.is_decorator = '@' == tok
if not self.is_decorator:
self.old_parser_indent = self.parser_indent
self.parser_indent += 1 # new scope: must be higher
self.new_indent = True
if tok != '@':
if self.first_stmt and not self.new_indent:
self.parser_indent = indent
self.first_stmt = False
return current