1
0
forked from VimPlug/jedi

move NoErrorTokenizer to the tokenizer module, where it more or less belongs.

This commit is contained in:
Dave Halter
2014-01-05 13:34:29 +01:00
parent e115689b7f
commit 261f49d3e2
3 changed files with 105 additions and 108 deletions

View File

@@ -27,9 +27,6 @@ from jedi.parser import token as token_pr
from jedi.parser import tokenizer as tokenize from jedi.parser import tokenizer as tokenize
FLOWS = ['if', 'else', 'elif', 'while', 'with', 'try', 'except', 'finally']
class Parser(object): class Parser(object):
""" """
This class is used to parse a Python file, it then divides them into a This class is used to parse a Python file, it then divides them into a
@@ -61,7 +58,7 @@ class Parser(object):
source = source + '\n' # end with \n, because the parser needs it source = source + '\n' # end with \n, because the parser needs it
buf = StringIO(source) buf = StringIO(source)
self._gen = NoErrorTokenizer(buf.readline, offset, is_fast_parser) self._gen = tokenize.NoErrorTokenizer(buf.readline, offset, is_fast_parser)
self.top_module = top_module or self.module self.top_module = top_module or self.module
try: try:
self._parse() self._parse()
@@ -694,104 +691,3 @@ class Parser(object):
self.start_pos[0]) self.start_pos[0])
continue continue
self.no_docstr = False self.no_docstr = False
class NoErrorTokenizer(object):
def __init__(self, readline, offset=(0, 0), is_fast_parser=False):
self.readline = readline
self.gen = tokenize.generate_tokens(readline)
self.offset = offset
self.closed = False
self.is_first = True
self.push_backs = []
# fast parser options
self.is_fast_parser = is_fast_parser
self.current = self.previous = [None, None, (0, 0), (0, 0), '']
self.in_flow = False
self.new_indent = False
self.parser_indent = self.old_parser_indent = 0
self.is_decorator = False
self.first_stmt = True
def push_last_back(self):
self.push_backs.append(self.current)
def next(self):
""" Python 2 Compatibility """
return self.__next__()
def __next__(self):
if self.closed:
raise common.MultiLevelStopIteration()
if self.push_backs:
return self.push_backs.pop(0)
self.last_previous = self.previous
self.previous = self.current
self.current = next(self.gen)
c = list(self.current)
if c[0] == tokenize.ENDMARKER:
self.current = self.previous
self.previous = self.last_previous
raise common.MultiLevelStopIteration()
# this is exactly the same check as in fast_parser, but this time with
# tokenize and therefore precise.
breaks = ['def', 'class', '@']
if self.is_first:
c[2] = self.offset[0] + c[2][0], self.offset[1] + c[2][1]
c[3] = self.offset[0] + c[3][0], self.offset[1] + c[3][1]
self.is_first = False
else:
c[2] = self.offset[0] + c[2][0], c[2][1]
c[3] = self.offset[0] + c[3][0], c[3][1]
self.current = c
def close():
if not self.first_stmt:
self.closed = True
raise common.MultiLevelStopIteration()
# ignore indents/comments
if self.is_fast_parser \
and self.previous[0] in (tokenize.INDENT, tokenize.NL, None,
tokenize.NEWLINE, tokenize.DEDENT) \
and c[0] not in (
tokenize.COMMENT,
tokenize.INDENT,
tokenize.NL,
tokenize.NEWLINE,
tokenize.DEDENT
):
# print c, tokenize.tok_name[c[0]]
tok = c[1]
indent = c[2][1]
if indent < self.parser_indent: # -> dedent
self.parser_indent = indent
self.new_indent = False
if not self.in_flow or indent < self.old_parser_indent:
close()
self.in_flow = False
elif self.new_indent:
self.parser_indent = indent
self.new_indent = False
if not self.in_flow:
if tok in FLOWS or tok in breaks:
self.in_flow = tok in FLOWS
if not self.is_decorator and not self.in_flow:
close()
self.is_decorator = '@' == tok
if not self.is_decorator:
self.old_parser_indent = self.parser_indent
self.parser_indent += 1 # new scope: must be higher
self.new_indent = True
if tok != '@':
if self.first_stmt and not self.new_indent:
self.parser_indent = indent
self.first_stmt = False
return c

View File

@@ -9,8 +9,8 @@ from jedi._compatibility import use_metaclass
from jedi import settings from jedi import settings
from jedi.parser import Parser from jedi.parser import Parser
from jedi.parser import representation as pr from jedi.parser import representation as pr
from jedi.parser import tokenizer as tokenize
from jedi import cache from jedi import cache
from jedi import common
SCOPE_CONTENTS = ['asserts', 'subscopes', 'imports', 'statements', 'returns'] SCOPE_CONTENTS = ['asserts', 'subscopes', 'imports', 'statements', 'returns']
@@ -256,7 +256,7 @@ class FastParser(use_metaclass(CachedFastParser)):
parts.append(txt) parts.append(txt)
current_lines[:] = [] current_lines[:] = []
r_keyword = '^[ \t]*(def|class|@|%s)' % '|'.join(common.FLOWS) r_keyword = '^[ \t]*(def|class|@|%s)' % '|'.join(tokenize.FLOWS)
self._lines = code.splitlines() self._lines = code.splitlines()
current_lines = [] current_lines = []
@@ -291,7 +291,7 @@ class FastParser(use_metaclass(CachedFastParser)):
if not in_flow: if not in_flow:
m = re.match(r_keyword, l) m = re.match(r_keyword, l)
if m: if m:
in_flow = m.group(1) in common.FLOWS in_flow = m.group(1) in tokenize.FLOWS
if not is_decorator and not in_flow: if not is_decorator and not in_flow:
add_part() add_part()
add_to_last = False add_to_last = False

View File

@@ -15,6 +15,8 @@ from token import *
import collections import collections
cookie_re = re.compile("coding[:=]\s*([-\w.]+)") cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
from jedi import common
namechars = string.ascii_letters + '_' namechars = string.ascii_letters + '_'
@@ -284,3 +286,102 @@ def generate_tokens(readline):
for indent in indents[1:]: # pop remaining indent levels for indent in indents[1:]: # pop remaining indent levels
yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '') yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '')
yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '') yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '')
# From here on we have custom stuff (everything before was originally Python
# internal code).
FLOWS = ['if', 'else', 'elif', 'while', 'with', 'try', 'except', 'finally']
class NoErrorTokenizer(object):
def __init__(self, readline, offset=(0, 0), is_fast_parser=False):
self.readline = readline
self.gen = generate_tokens(readline)
self.offset = offset
self.closed = False
self.is_first = True
self.push_backs = []
# fast parser options
self.is_fast_parser = is_fast_parser
self.current = self.previous = [None, None, (0, 0), (0, 0), '']
self.in_flow = False
self.new_indent = False
self.parser_indent = self.old_parser_indent = 0
self.is_decorator = False
self.first_stmt = True
def push_last_back(self):
self.push_backs.append(self.current)
def next(self):
""" Python 2 Compatibility """
return self.__next__()
def __next__(self):
if self.closed:
raise common.MultiLevelStopIteration()
if self.push_backs:
return self.push_backs.pop(0)
self.last_previous = self.previous
self.previous = self.current
self.current = next(self.gen)
c = list(self.current)
if c[0] == ENDMARKER:
self.current = self.previous
self.previous = self.last_previous
raise common.MultiLevelStopIteration()
# this is exactly the same check as in fast_parser, but this time with
# tokenize and therefore precise.
breaks = ['def', 'class', '@']
if self.is_first:
c[2] = self.offset[0] + c[2][0], self.offset[1] + c[2][1]
c[3] = self.offset[0] + c[3][0], self.offset[1] + c[3][1]
self.is_first = False
else:
c[2] = self.offset[0] + c[2][0], c[2][1]
c[3] = self.offset[0] + c[3][0], c[3][1]
self.current = c
def close():
if not self.first_stmt:
self.closed = True
raise common.MultiLevelStopIteration()
# ignore indents/comments
if self.is_fast_parser \
and self.previous[0] in (INDENT, NL, None, NEWLINE, DEDENT) \
and c[0] not in (COMMENT, INDENT, NL, NEWLINE, DEDENT):
# print c, tok_name[c[0]]
tok = c[1]
indent = c[2][1]
if indent < self.parser_indent: # -> dedent
self.parser_indent = indent
self.new_indent = False
if not self.in_flow or indent < self.old_parser_indent:
close()
self.in_flow = False
elif self.new_indent:
self.parser_indent = indent
self.new_indent = False
if not self.in_flow:
if tok in FLOWS or tok in breaks:
self.in_flow = tok in FLOWS
if not self.is_decorator and not self.in_flow:
close()
self.is_decorator = '@' == tok
if not self.is_decorator:
self.old_parser_indent = self.parser_indent
self.parser_indent += 1 # new scope: must be higher
self.new_indent = True
if tok != '@':
if self.first_stmt and not self.new_indent:
self.parser_indent = indent
self.first_stmt = False
return c