forked from VimPlug/jedi
move NoErrorTokenizer to the tokenizer module, where it more or less belongs.
This commit is contained in:
@@ -27,9 +27,6 @@ from jedi.parser import token as token_pr
|
|||||||
from jedi.parser import tokenizer as tokenize
|
from jedi.parser import tokenizer as tokenize
|
||||||
|
|
||||||
|
|
||||||
FLOWS = ['if', 'else', 'elif', 'while', 'with', 'try', 'except', 'finally']
|
|
||||||
|
|
||||||
|
|
||||||
class Parser(object):
|
class Parser(object):
|
||||||
"""
|
"""
|
||||||
This class is used to parse a Python file, it then divides them into a
|
This class is used to parse a Python file, it then divides them into a
|
||||||
@@ -61,7 +58,7 @@ class Parser(object):
|
|||||||
|
|
||||||
source = source + '\n' # end with \n, because the parser needs it
|
source = source + '\n' # end with \n, because the parser needs it
|
||||||
buf = StringIO(source)
|
buf = StringIO(source)
|
||||||
self._gen = NoErrorTokenizer(buf.readline, offset, is_fast_parser)
|
self._gen = tokenize.NoErrorTokenizer(buf.readline, offset, is_fast_parser)
|
||||||
self.top_module = top_module or self.module
|
self.top_module = top_module or self.module
|
||||||
try:
|
try:
|
||||||
self._parse()
|
self._parse()
|
||||||
@@ -694,104 +691,3 @@ class Parser(object):
|
|||||||
self.start_pos[0])
|
self.start_pos[0])
|
||||||
continue
|
continue
|
||||||
self.no_docstr = False
|
self.no_docstr = False
|
||||||
|
|
||||||
|
|
||||||
class NoErrorTokenizer(object):
|
|
||||||
def __init__(self, readline, offset=(0, 0), is_fast_parser=False):
|
|
||||||
self.readline = readline
|
|
||||||
self.gen = tokenize.generate_tokens(readline)
|
|
||||||
self.offset = offset
|
|
||||||
self.closed = False
|
|
||||||
self.is_first = True
|
|
||||||
self.push_backs = []
|
|
||||||
|
|
||||||
# fast parser options
|
|
||||||
self.is_fast_parser = is_fast_parser
|
|
||||||
self.current = self.previous = [None, None, (0, 0), (0, 0), '']
|
|
||||||
self.in_flow = False
|
|
||||||
self.new_indent = False
|
|
||||||
self.parser_indent = self.old_parser_indent = 0
|
|
||||||
self.is_decorator = False
|
|
||||||
self.first_stmt = True
|
|
||||||
|
|
||||||
def push_last_back(self):
|
|
||||||
self.push_backs.append(self.current)
|
|
||||||
|
|
||||||
def next(self):
|
|
||||||
""" Python 2 Compatibility """
|
|
||||||
return self.__next__()
|
|
||||||
|
|
||||||
def __next__(self):
|
|
||||||
if self.closed:
|
|
||||||
raise common.MultiLevelStopIteration()
|
|
||||||
if self.push_backs:
|
|
||||||
return self.push_backs.pop(0)
|
|
||||||
|
|
||||||
self.last_previous = self.previous
|
|
||||||
self.previous = self.current
|
|
||||||
self.current = next(self.gen)
|
|
||||||
c = list(self.current)
|
|
||||||
|
|
||||||
if c[0] == tokenize.ENDMARKER:
|
|
||||||
self.current = self.previous
|
|
||||||
self.previous = self.last_previous
|
|
||||||
raise common.MultiLevelStopIteration()
|
|
||||||
|
|
||||||
# this is exactly the same check as in fast_parser, but this time with
|
|
||||||
# tokenize and therefore precise.
|
|
||||||
breaks = ['def', 'class', '@']
|
|
||||||
|
|
||||||
if self.is_first:
|
|
||||||
c[2] = self.offset[0] + c[2][0], self.offset[1] + c[2][1]
|
|
||||||
c[3] = self.offset[0] + c[3][0], self.offset[1] + c[3][1]
|
|
||||||
self.is_first = False
|
|
||||||
else:
|
|
||||||
c[2] = self.offset[0] + c[2][0], c[2][1]
|
|
||||||
c[3] = self.offset[0] + c[3][0], c[3][1]
|
|
||||||
self.current = c
|
|
||||||
|
|
||||||
def close():
|
|
||||||
if not self.first_stmt:
|
|
||||||
self.closed = True
|
|
||||||
raise common.MultiLevelStopIteration()
|
|
||||||
# ignore indents/comments
|
|
||||||
if self.is_fast_parser \
|
|
||||||
and self.previous[0] in (tokenize.INDENT, tokenize.NL, None,
|
|
||||||
tokenize.NEWLINE, tokenize.DEDENT) \
|
|
||||||
and c[0] not in (
|
|
||||||
tokenize.COMMENT,
|
|
||||||
tokenize.INDENT,
|
|
||||||
tokenize.NL,
|
|
||||||
tokenize.NEWLINE,
|
|
||||||
tokenize.DEDENT
|
|
||||||
):
|
|
||||||
# print c, tokenize.tok_name[c[0]]
|
|
||||||
|
|
||||||
tok = c[1]
|
|
||||||
indent = c[2][1]
|
|
||||||
if indent < self.parser_indent: # -> dedent
|
|
||||||
self.parser_indent = indent
|
|
||||||
self.new_indent = False
|
|
||||||
if not self.in_flow or indent < self.old_parser_indent:
|
|
||||||
close()
|
|
||||||
self.in_flow = False
|
|
||||||
elif self.new_indent:
|
|
||||||
self.parser_indent = indent
|
|
||||||
self.new_indent = False
|
|
||||||
|
|
||||||
if not self.in_flow:
|
|
||||||
if tok in FLOWS or tok in breaks:
|
|
||||||
self.in_flow = tok in FLOWS
|
|
||||||
if not self.is_decorator and not self.in_flow:
|
|
||||||
close()
|
|
||||||
self.is_decorator = '@' == tok
|
|
||||||
if not self.is_decorator:
|
|
||||||
self.old_parser_indent = self.parser_indent
|
|
||||||
self.parser_indent += 1 # new scope: must be higher
|
|
||||||
self.new_indent = True
|
|
||||||
|
|
||||||
if tok != '@':
|
|
||||||
if self.first_stmt and not self.new_indent:
|
|
||||||
self.parser_indent = indent
|
|
||||||
self.first_stmt = False
|
|
||||||
return c
|
|
||||||
|
|||||||
@@ -9,8 +9,8 @@ from jedi._compatibility import use_metaclass
|
|||||||
from jedi import settings
|
from jedi import settings
|
||||||
from jedi.parser import Parser
|
from jedi.parser import Parser
|
||||||
from jedi.parser import representation as pr
|
from jedi.parser import representation as pr
|
||||||
|
from jedi.parser import tokenizer as tokenize
|
||||||
from jedi import cache
|
from jedi import cache
|
||||||
from jedi import common
|
|
||||||
|
|
||||||
|
|
||||||
SCOPE_CONTENTS = ['asserts', 'subscopes', 'imports', 'statements', 'returns']
|
SCOPE_CONTENTS = ['asserts', 'subscopes', 'imports', 'statements', 'returns']
|
||||||
@@ -256,7 +256,7 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
parts.append(txt)
|
parts.append(txt)
|
||||||
current_lines[:] = []
|
current_lines[:] = []
|
||||||
|
|
||||||
r_keyword = '^[ \t]*(def|class|@|%s)' % '|'.join(common.FLOWS)
|
r_keyword = '^[ \t]*(def|class|@|%s)' % '|'.join(tokenize.FLOWS)
|
||||||
|
|
||||||
self._lines = code.splitlines()
|
self._lines = code.splitlines()
|
||||||
current_lines = []
|
current_lines = []
|
||||||
@@ -291,7 +291,7 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
if not in_flow:
|
if not in_flow:
|
||||||
m = re.match(r_keyword, l)
|
m = re.match(r_keyword, l)
|
||||||
if m:
|
if m:
|
||||||
in_flow = m.group(1) in common.FLOWS
|
in_flow = m.group(1) in tokenize.FLOWS
|
||||||
if not is_decorator and not in_flow:
|
if not is_decorator and not in_flow:
|
||||||
add_part()
|
add_part()
|
||||||
add_to_last = False
|
add_to_last = False
|
||||||
|
|||||||
@@ -15,6 +15,8 @@ from token import *
|
|||||||
import collections
|
import collections
|
||||||
cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
|
cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
|
||||||
|
|
||||||
|
from jedi import common
|
||||||
|
|
||||||
namechars = string.ascii_letters + '_'
|
namechars = string.ascii_letters + '_'
|
||||||
|
|
||||||
|
|
||||||
@@ -284,3 +286,102 @@ def generate_tokens(readline):
|
|||||||
for indent in indents[1:]: # pop remaining indent levels
|
for indent in indents[1:]: # pop remaining indent levels
|
||||||
yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '')
|
yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '')
|
||||||
yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '')
|
yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '')
|
||||||
|
|
||||||
|
|
||||||
|
# From here on we have custom stuff (everything before was originally Python
|
||||||
|
# internal code).
|
||||||
|
FLOWS = ['if', 'else', 'elif', 'while', 'with', 'try', 'except', 'finally']
|
||||||
|
|
||||||
|
|
||||||
|
class NoErrorTokenizer(object):
|
||||||
|
def __init__(self, readline, offset=(0, 0), is_fast_parser=False):
|
||||||
|
self.readline = readline
|
||||||
|
self.gen = generate_tokens(readline)
|
||||||
|
self.offset = offset
|
||||||
|
self.closed = False
|
||||||
|
self.is_first = True
|
||||||
|
self.push_backs = []
|
||||||
|
|
||||||
|
# fast parser options
|
||||||
|
self.is_fast_parser = is_fast_parser
|
||||||
|
self.current = self.previous = [None, None, (0, 0), (0, 0), '']
|
||||||
|
self.in_flow = False
|
||||||
|
self.new_indent = False
|
||||||
|
self.parser_indent = self.old_parser_indent = 0
|
||||||
|
self.is_decorator = False
|
||||||
|
self.first_stmt = True
|
||||||
|
|
||||||
|
def push_last_back(self):
|
||||||
|
self.push_backs.append(self.current)
|
||||||
|
|
||||||
|
def next(self):
|
||||||
|
""" Python 2 Compatibility """
|
||||||
|
return self.__next__()
|
||||||
|
|
||||||
|
def __next__(self):
|
||||||
|
if self.closed:
|
||||||
|
raise common.MultiLevelStopIteration()
|
||||||
|
if self.push_backs:
|
||||||
|
return self.push_backs.pop(0)
|
||||||
|
|
||||||
|
self.last_previous = self.previous
|
||||||
|
self.previous = self.current
|
||||||
|
self.current = next(self.gen)
|
||||||
|
c = list(self.current)
|
||||||
|
|
||||||
|
if c[0] == ENDMARKER:
|
||||||
|
self.current = self.previous
|
||||||
|
self.previous = self.last_previous
|
||||||
|
raise common.MultiLevelStopIteration()
|
||||||
|
|
||||||
|
# this is exactly the same check as in fast_parser, but this time with
|
||||||
|
# tokenize and therefore precise.
|
||||||
|
breaks = ['def', 'class', '@']
|
||||||
|
|
||||||
|
if self.is_first:
|
||||||
|
c[2] = self.offset[0] + c[2][0], self.offset[1] + c[2][1]
|
||||||
|
c[3] = self.offset[0] + c[3][0], self.offset[1] + c[3][1]
|
||||||
|
self.is_first = False
|
||||||
|
else:
|
||||||
|
c[2] = self.offset[0] + c[2][0], c[2][1]
|
||||||
|
c[3] = self.offset[0] + c[3][0], c[3][1]
|
||||||
|
self.current = c
|
||||||
|
|
||||||
|
def close():
|
||||||
|
if not self.first_stmt:
|
||||||
|
self.closed = True
|
||||||
|
raise common.MultiLevelStopIteration()
|
||||||
|
# ignore indents/comments
|
||||||
|
if self.is_fast_parser \
|
||||||
|
and self.previous[0] in (INDENT, NL, None, NEWLINE, DEDENT) \
|
||||||
|
and c[0] not in (COMMENT, INDENT, NL, NEWLINE, DEDENT):
|
||||||
|
# print c, tok_name[c[0]]
|
||||||
|
|
||||||
|
tok = c[1]
|
||||||
|
indent = c[2][1]
|
||||||
|
if indent < self.parser_indent: # -> dedent
|
||||||
|
self.parser_indent = indent
|
||||||
|
self.new_indent = False
|
||||||
|
if not self.in_flow or indent < self.old_parser_indent:
|
||||||
|
close()
|
||||||
|
self.in_flow = False
|
||||||
|
elif self.new_indent:
|
||||||
|
self.parser_indent = indent
|
||||||
|
self.new_indent = False
|
||||||
|
|
||||||
|
if not self.in_flow:
|
||||||
|
if tok in FLOWS or tok in breaks:
|
||||||
|
self.in_flow = tok in FLOWS
|
||||||
|
if not self.is_decorator and not self.in_flow:
|
||||||
|
close()
|
||||||
|
self.is_decorator = '@' == tok
|
||||||
|
if not self.is_decorator:
|
||||||
|
self.old_parser_indent = self.parser_indent
|
||||||
|
self.parser_indent += 1 # new scope: must be higher
|
||||||
|
self.new_indent = True
|
||||||
|
|
||||||
|
if tok != '@':
|
||||||
|
if self.first_stmt and not self.new_indent:
|
||||||
|
self.parser_indent = indent
|
||||||
|
self.first_stmt = False
|
||||||
|
return c
|
||||||
|
|||||||
Reference in New Issue
Block a user