forked from VimPlug/jedi
on the way to a better fast_parser - improved a lot of the positioning stuff
This commit is contained in:
@@ -6,6 +6,8 @@ from _compatibility import next
|
|||||||
import debug
|
import debug
|
||||||
import settings
|
import settings
|
||||||
|
|
||||||
|
FLOWS = ['if', 'else', 'elif', 'while', 'with', 'try', 'except', 'finally']
|
||||||
|
|
||||||
|
|
||||||
class MultiLevelStopIteration(Exception):
|
class MultiLevelStopIteration(Exception):
|
||||||
"""
|
"""
|
||||||
@@ -56,14 +58,21 @@ class PushBackIterator(object):
|
|||||||
|
|
||||||
|
|
||||||
class NoErrorTokenizer(object):
|
class NoErrorTokenizer(object):
|
||||||
def __init__(self, readline, offset=(0, 0), stop_on_scope=False):
|
def __init__(self, readline, offset=(0, 0), is_fast_parser=False):
|
||||||
self.readline = readline
|
self.readline = readline
|
||||||
self.gen = PushBackIterator(tokenize.generate_tokens(readline))
|
self.gen = PushBackIterator(tokenize.generate_tokens(readline))
|
||||||
self.offset = offset
|
self.offset = offset
|
||||||
self.stop_on_scope = stop_on_scope
|
|
||||||
self.first_scope = False
|
|
||||||
self.closed = False
|
self.closed = False
|
||||||
self.first = True
|
self.is_first = True
|
||||||
|
|
||||||
|
# fast parser options
|
||||||
|
self.is_fast_parser = is_fast_parser
|
||||||
|
self.current = self.previous = [None, None, (0, 0), (0, 0), '']
|
||||||
|
self.in_flow = False
|
||||||
|
self.new_indent = False
|
||||||
|
self.parser_indent = 0
|
||||||
|
self.is_decorator = False
|
||||||
|
self.first_stmt = True
|
||||||
|
|
||||||
def push_last_back(self):
|
def push_last_back(self):
|
||||||
self.gen.push_back(self.current)
|
self.gen.push_back(self.current)
|
||||||
@@ -76,6 +85,8 @@ class NoErrorTokenizer(object):
|
|||||||
if self.closed:
|
if self.closed:
|
||||||
raise MultiLevelStopIteration()
|
raise MultiLevelStopIteration()
|
||||||
try:
|
try:
|
||||||
|
last_previous = self.previous
|
||||||
|
self.previous = self.current
|
||||||
self.current = next(self.gen)
|
self.current = next(self.gen)
|
||||||
except tokenize.TokenError:
|
except tokenize.TokenError:
|
||||||
# We just ignore this error, I try to handle it earlier - as
|
# We just ignore this error, I try to handle it earlier - as
|
||||||
@@ -99,22 +110,60 @@ class NoErrorTokenizer(object):
|
|||||||
|
|
||||||
c = list(self.current)
|
c = list(self.current)
|
||||||
|
|
||||||
# stop if a new class or definition is started at position zero.
|
if c[0] == tokenize.ENDMARKER:
|
||||||
breaks = ['def', 'class', '@']
|
self.current = self.previous
|
||||||
if self.stop_on_scope and c[1] in breaks and c[2][1] == 0:
|
self.previous = last_previous
|
||||||
if self.first_scope:
|
raise MultiLevelStopIteration()
|
||||||
self.closed = True
|
|
||||||
raise MultiLevelStopIteration()
|
|
||||||
elif c[1] != '@':
|
|
||||||
self.first_scope = True
|
|
||||||
|
|
||||||
if self.first:
|
# this is exactly the same check as in fast_parser, but this time with
|
||||||
|
# tokenize and therefore precise.
|
||||||
|
breaks = ['def', 'class', '@']
|
||||||
|
|
||||||
|
if self.is_first:
|
||||||
c[2] = self.offset[0] + c[2][0], self.offset[1] + c[2][1]
|
c[2] = self.offset[0] + c[2][0], self.offset[1] + c[2][1]
|
||||||
c[3] = self.offset[0] + c[3][0], self.offset[1] + c[3][1]
|
c[3] = self.offset[0] + c[3][0], self.offset[1] + c[3][1]
|
||||||
self.first = False
|
self.is_first = False
|
||||||
else:
|
else:
|
||||||
c[2] = self.offset[0] + c[2][0], c[2][1]
|
c[2] = self.offset[0] + c[2][0], c[2][1]
|
||||||
c[3] = self.offset[0] + c[3][0], c[3][1]
|
c[3] = self.offset[0] + c[3][0], c[3][1]
|
||||||
|
print 'h', c, tokenize.tok_name[c[0]], self.current[2:4]
|
||||||
|
self.current = c
|
||||||
|
|
||||||
|
def close():
|
||||||
|
if not self.first_stmt:
|
||||||
|
self.closed = True
|
||||||
|
raise MultiLevelStopIteration()
|
||||||
|
# ignore indents/comments
|
||||||
|
if self.is_fast_parser \
|
||||||
|
and self.previous[0] in (tokenize.INDENT, tokenize.NL, None,
|
||||||
|
tokenize.NEWLINE, tokenize.DEDENT) \
|
||||||
|
and c[0] not in (tokenize.COMMENT, tokenize.INDENT,
|
||||||
|
tokenize.NL, tokenize.NEWLINE, tokenize.DEDENT):
|
||||||
|
print c, tokenize.tok_name[c[0]]
|
||||||
|
|
||||||
|
tok = c[1]
|
||||||
|
indent = c[2][1]
|
||||||
|
if indent < self.parser_indent: # -> dedent
|
||||||
|
self.parser_indent = indent
|
||||||
|
self.new_indent = False
|
||||||
|
if not self.in_flow:
|
||||||
|
close()
|
||||||
|
self.in_flow = False
|
||||||
|
elif self.new_indent:
|
||||||
|
self.parser_indent = indent
|
||||||
|
|
||||||
|
if not self.in_flow:
|
||||||
|
if tok in FLOWS or tok in breaks:
|
||||||
|
self.in_flow = tok in FLOWS
|
||||||
|
if not self.is_decorator and not self.in_flow:
|
||||||
|
close()
|
||||||
|
self.is_decorator = '@' == tok
|
||||||
|
if not self.is_decorator:
|
||||||
|
self.parser_indent += 1 # new scope: must be higher
|
||||||
|
self.new_indent = True
|
||||||
|
|
||||||
|
if tok != '@':
|
||||||
|
self.first_stmt = False
|
||||||
return c
|
return c
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ import settings
|
|||||||
import parsing
|
import parsing
|
||||||
import parsing_representation as pr
|
import parsing_representation as pr
|
||||||
import cache
|
import cache
|
||||||
|
import common
|
||||||
|
|
||||||
|
|
||||||
class Module(pr.Simple, pr.Module):
|
class Module(pr.Simple, pr.Module):
|
||||||
@@ -219,17 +220,15 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
parts.append(txt)
|
parts.append(txt)
|
||||||
current_lines[:] = []
|
current_lines[:] = []
|
||||||
|
|
||||||
flows = ['if', 'else', 'elif', 'while', 'with', 'try', 'except',
|
r_keyword = '^[ \t]*(def|class|@|%s)' % '|'.join(common.FLOWS)
|
||||||
'finally']
|
|
||||||
r_keyword = '^[ \t]*(def|class|@|%s)' % '|'.join(flows)
|
|
||||||
|
|
||||||
lines = code.splitlines()
|
lines = code.splitlines()
|
||||||
current_lines = []
|
current_lines = []
|
||||||
parts = []
|
parts = []
|
||||||
is_generator = False
|
is_decorator = False
|
||||||
current_indent = 0
|
current_indent = 0
|
||||||
new_indent = False
|
new_indent = False
|
||||||
is_flow = False
|
in_flow = False
|
||||||
# All things within flows are simply being ignored.
|
# All things within flows are simply being ignored.
|
||||||
for i, l in enumerate(lines):
|
for i, l in enumerate(lines):
|
||||||
# check for dedents
|
# check for dedents
|
||||||
@@ -242,29 +241,35 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
if indent < current_indent: # -> dedent
|
if indent < current_indent: # -> dedent
|
||||||
current_indent = indent
|
current_indent = indent
|
||||||
new_indent = False
|
new_indent = False
|
||||||
if not is_flow:
|
if not in_flow:
|
||||||
add_part()
|
add_part()
|
||||||
is_flow = False
|
in_flow = False
|
||||||
elif new_indent:
|
elif new_indent:
|
||||||
current_indent = indent
|
current_indent = indent
|
||||||
new_indent = False
|
new_indent = False
|
||||||
|
|
||||||
# Check lines for functions/classes and split the code there.
|
# Check lines for functions/classes and split the code there.
|
||||||
if not is_flow:
|
if not in_flow:
|
||||||
m = re.match(r_keyword, l)
|
m = re.match(r_keyword, l)
|
||||||
if m:
|
if m:
|
||||||
is_flow = m.group(1) in flows
|
in_flow = m.group(1) in common.FLOWS
|
||||||
if not is_generator and not is_flow:
|
if not is_decorator and not in_flow:
|
||||||
add_part()
|
add_part()
|
||||||
current_lines = []
|
current_lines = []
|
||||||
is_generator = '@' == m.group(1)
|
is_decorator = '@' == m.group(1)
|
||||||
if not is_generator:
|
if not is_decorator:
|
||||||
current_indent += 1 # it must be higher
|
current_indent += 1 # it must be higher
|
||||||
new_indent = True
|
new_indent = True
|
||||||
|
|
||||||
current_lines.append(l)
|
current_lines.append(l)
|
||||||
add_part()
|
add_part()
|
||||||
|
|
||||||
|
for p in parts:
|
||||||
|
#print '#####################################'
|
||||||
|
#print p
|
||||||
|
#print len(p.splitlines())
|
||||||
|
pass
|
||||||
|
|
||||||
return parts
|
return parts
|
||||||
|
|
||||||
def _parse(self, code):
|
def _parse(self, code):
|
||||||
@@ -280,11 +285,12 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
el = module.imports[0]
|
el = module.imports[0]
|
||||||
return el.start_pos[1]
|
return el.start_pos[1]
|
||||||
|
|
||||||
if self.parsers:
|
if self.parsers and False:
|
||||||
new_indent = get_indent(module)
|
new_indent = get_indent(module)
|
||||||
old_indent = get_indent(self.parsers[-1].module)
|
old_indent = get_indent(self.parsers[-1].module)
|
||||||
if old_indent < new_indent:
|
if old_indent < new_indent:
|
||||||
module.parent = self.parsers[-1].module.subscopes[0]
|
#module.parent = self.parsers[-1].module.subscopes[0]
|
||||||
|
# TODO set parents + add to subscopes
|
||||||
return
|
return
|
||||||
p.module.parent = self.module
|
p.module.parent = self.module
|
||||||
|
|
||||||
@@ -301,7 +307,7 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
p = None
|
p = None
|
||||||
parser_order = 0
|
parser_order = 0
|
||||||
for code_part in parts:
|
for code_part in parts:
|
||||||
lines = code_part.count('\n')
|
lines = code_part.count('\n') + 1
|
||||||
# the parser is using additional newlines, therefore substract
|
# the parser is using additional newlines, therefore substract
|
||||||
if p is None or line_offset >= p.end_pos[0] - 2:
|
if p is None or line_offset >= p.end_pos[0] - 2:
|
||||||
# check if code_part has already been parsed
|
# check if code_part has already been parsed
|
||||||
@@ -336,8 +342,13 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
|
|
||||||
parser_order += 1
|
parser_order += 1
|
||||||
line_offset += lines
|
line_offset += lines
|
||||||
start += len(code_part)
|
print line_offset
|
||||||
|
start += len(code_part) + 1 # +1 for newline
|
||||||
self.parsers[parser_order + 1:] = []
|
self.parsers[parser_order + 1:] = []
|
||||||
|
for p in self.parsers:
|
||||||
|
print(p.module.get_code())
|
||||||
|
print(p.module.start_pos, p.module.end_pos)
|
||||||
|
exit()
|
||||||
|
|
||||||
def reset_caches(self):
|
def reset_caches(self):
|
||||||
self._user_scope = None
|
self._user_scope = None
|
||||||
|
|||||||
@@ -52,13 +52,11 @@ class Parser(object):
|
|||||||
self.user_stmt = None
|
self.user_stmt = None
|
||||||
self.no_docstr = no_docstr
|
self.no_docstr = no_docstr
|
||||||
|
|
||||||
|
self.start_pos = self.end_pos = 1 + offset[0], offset[1]
|
||||||
# initialize global Scope
|
# initialize global Scope
|
||||||
self.module = pr.SubModule(module_path, (offset[0] + 1, offset[1]),
|
self.module = pr.SubModule(module_path, self.start_pos, top_module)
|
||||||
top_module)
|
|
||||||
self.scope = self.module
|
self.scope = self.module
|
||||||
self.current = (None, None)
|
self.current = (None, None)
|
||||||
self.start_pos = 1, 0
|
|
||||||
self.end_pos = 1, 0
|
|
||||||
|
|
||||||
source = source + '\n' # end with \n, because the parser needs it
|
source = source + '\n' # end with \n, because the parser needs it
|
||||||
buf = StringIO(source)
|
buf = StringIO(source)
|
||||||
@@ -79,6 +77,10 @@ class Parser(object):
|
|||||||
# because of `self.module.used_names`.
|
# because of `self.module.used_names`.
|
||||||
d.parent = self.module
|
d.parent = self.module
|
||||||
|
|
||||||
|
if self.current[0] in (tokenize.NL, tokenize.NEWLINE):
|
||||||
|
# we added a newline before, so we need to "remove" it again.
|
||||||
|
self.end_pos = self._gen.previous[2]
|
||||||
|
|
||||||
self.start_pos = self.module.start_pos
|
self.start_pos = self.module.start_pos
|
||||||
self.module.end_pos = self.end_pos
|
self.module.end_pos = self.end_pos
|
||||||
del self._gen
|
del self._gen
|
||||||
@@ -170,8 +172,6 @@ class Parser(object):
|
|||||||
while True:
|
while True:
|
||||||
defunct = False
|
defunct = False
|
||||||
token_type, tok = self.next()
|
token_type, tok = self.next()
|
||||||
if token_type == tokenize.ENDMARKER:
|
|
||||||
break
|
|
||||||
if brackets and tok == '\n':
|
if brackets and tok == '\n':
|
||||||
self.next()
|
self.next()
|
||||||
if tok == '(': # python allows only one `(` in the statement.
|
if tok == '(': # python allows only one `(` in the statement.
|
||||||
@@ -421,8 +421,10 @@ class Parser(object):
|
|||||||
def __next__(self):
|
def __next__(self):
|
||||||
""" Generate the next tokenize pattern. """
|
""" Generate the next tokenize pattern. """
|
||||||
try:
|
try:
|
||||||
typ, tok, self.start_pos, self.end_pos, \
|
typ, tok, start_pos, end_pos, self.parserline = next(self._gen)
|
||||||
self.parserline = next(self._gen)
|
# dedents shouldn't change positions
|
||||||
|
if typ != tokenize.DEDENT:
|
||||||
|
self.start_pos, self.end_pos = start_pos, end_pos
|
||||||
except (StopIteration, common.MultiLevelStopIteration):
|
except (StopIteration, common.MultiLevelStopIteration):
|
||||||
# on finish, set end_pos correctly
|
# on finish, set end_pos correctly
|
||||||
s = self.scope
|
s = self.scope
|
||||||
@@ -662,7 +664,6 @@ class Parser(object):
|
|||||||
self.freshscope = False
|
self.freshscope = False
|
||||||
else:
|
else:
|
||||||
if token_type not in [tokenize.COMMENT, tokenize.INDENT,
|
if token_type not in [tokenize.COMMENT, tokenize.INDENT,
|
||||||
tokenize.NEWLINE, tokenize.NL,
|
tokenize.NEWLINE, tokenize.NL]:
|
||||||
tokenize.ENDMARKER]:
|
|
||||||
debug.warning('token not classified', tok, token_type,
|
debug.warning('token not classified', tok, token_type,
|
||||||
self.start_pos[0])
|
self.start_pos[0])
|
||||||
|
|||||||
Reference in New Issue
Block a user