forked from VimPlug/jedi
Simplify the fast parser tokenizer more. Now it is more readable and less buggy (+bugfixes).
This commit is contained in:
@@ -177,6 +177,7 @@ class Instance(use_metaclass(CachedMetaClass, Executed)):
|
||||
return names
|
||||
|
||||
def get_subscope_by_name(self, name):
|
||||
print(name)
|
||||
sub = self.base.get_subscope_by_name(name)
|
||||
return get_instance_el(self._evaluator, self, sub, True)
|
||||
|
||||
|
||||
@@ -301,13 +301,13 @@ class Parser(object):
|
||||
|
||||
def _tokenize(self, tokenizer):
|
||||
for typ, value, start_pos, prefix in tokenizer:
|
||||
# print(token.tok_name[typ], repr(value), start_pos, repr(prefix))
|
||||
if self._omit_dedent and typ == token.DEDENT:
|
||||
self._omit_dedent -= 1
|
||||
continue
|
||||
|
||||
if typ == token.OP:
|
||||
typ = token.opmap[value]
|
||||
print(token.tok_name[typ], repr(value), start_pos, repr(prefix))
|
||||
yield typ, value, prefix, start_pos
|
||||
|
||||
def __repr__(self):
|
||||
@@ -322,7 +322,6 @@ class Parser(object):
|
||||
endmarker = self.module.children[-1]
|
||||
# The newline is either in the endmarker as a prefix or the previous
|
||||
# leaf as a newline token.
|
||||
print('REMOVE', endmarker.start_pos)
|
||||
if endmarker.prefix.endswith('\n'):
|
||||
endmarker.prefix = endmarker.prefix[:-1]
|
||||
last_line = re.sub('.*\n', '', endmarker.prefix)
|
||||
|
||||
@@ -12,6 +12,7 @@ from jedi.parser import Parser
|
||||
from jedi.parser import tree as pr
|
||||
from jedi.parser import tokenize
|
||||
from jedi import cache
|
||||
from jedi import debug
|
||||
from jedi.parser.tokenize import (source_tokens, NEWLINE,
|
||||
ENDMARKER, INDENT, DEDENT)
|
||||
|
||||
@@ -155,7 +156,6 @@ class ParserNode(object):
|
||||
self._node_children = []
|
||||
scope = self._content_scope
|
||||
scope.children = list(self._old_children)
|
||||
print('reset', scope.children)
|
||||
try:
|
||||
# This works if it's a MergedNamesDict.
|
||||
# We are correcting it, because the MergedNamesDicts are artificial
|
||||
@@ -187,14 +187,11 @@ class ParserNode(object):
|
||||
nodes should be added anymore.
|
||||
"""
|
||||
print('CLOSE NODE', id(self), self.parent, self._node_children)
|
||||
print(self.parser.module.names_dict, [p.parser.module.names_dict for p in
|
||||
self._node_children])
|
||||
# We only need to replace the dict if multiple dictionaries are used:
|
||||
if self._node_children:
|
||||
dcts = [n.parser.module.names_dict for n in self._node_children]
|
||||
# Need to insert the own node as well.
|
||||
dcts.insert(0, self._content_scope.names_dict)
|
||||
print('DCTS', self.parser, dcts, self._node_children)
|
||||
self._content_scope.names_dict = MergedNamesDict(dcts)
|
||||
|
||||
def parent_until_indent(self, indent=None):
|
||||
@@ -411,19 +408,15 @@ class FastParser(use_metaclass(CachedFastParser)):
|
||||
print('OFF', line_offset + 1, self.current_node.parser.module.end_pos)
|
||||
#import pdb; pdb.set_trace()
|
||||
if is_first or line_offset + 1 == self.current_node.parser.module.end_pos[0]:
|
||||
print(repr(code_part))
|
||||
|
||||
indent = len(code_part) - len(code_part.lstrip('\t '))
|
||||
self.current_node = self.current_node.parent_until_indent(indent)
|
||||
|
||||
print('cur', id(self.current_node))
|
||||
# print '#'*45,line_offset, p.module.end_pos, '\n', code_part
|
||||
# check if code_part has already been parsed
|
||||
# print '#'*45,line_offset, p and p.module.end_pos, '\n', code_part
|
||||
self.current_node = self._get_node(code_part, source[start:],
|
||||
line_offset, nodes, not is_first)
|
||||
|
||||
if False and is_first and self.current_node.parser.module.subscopes:
|
||||
print('NOXXXX')
|
||||
raise NotImplementedError
|
||||
# Special case, we cannot use a function subscope as a
|
||||
# base scope, subscopes would save all the other contents
|
||||
@@ -474,6 +467,9 @@ class FastParser(use_metaclass(CachedFastParser)):
|
||||
""" TODO used?
|
||||
self.module.end_pos = self.parsers[-1].module.end_pos
|
||||
"""
|
||||
debug.dbg('Parsed %s, with %s parsers in %s splits.'
|
||||
% (self.module_path, self.number_parsers_used,
|
||||
self.number_of_splits))
|
||||
|
||||
# print(self.parsers[0].module.get_code())
|
||||
|
||||
@@ -483,7 +479,7 @@ class FastParser(use_metaclass(CachedFastParser)):
|
||||
"""
|
||||
h = hash(source)
|
||||
for index, node in enumerate(nodes):
|
||||
print('EQ', node, repr(node.source), repr(source))
|
||||
#print('EQ', node, repr(node.source), repr(source))
|
||||
if node.hash == h and node.source == source:
|
||||
node.reset_node()
|
||||
nodes.remove(node)
|
||||
@@ -497,8 +493,7 @@ class FastParser(use_metaclass(CachedFastParser)):
|
||||
node = ParserNode(self.module)
|
||||
|
||||
end = line_offset + p.module.end_pos[0]
|
||||
print('\nACTUALLY PARSING', p.module.end_pos, repr(source),
|
||||
len(self._lines), line_offset)
|
||||
print('\nACTUALLY PARSING', p.module.end_pos, repr(source), len(self._lines), line_offset)
|
||||
if not (len(self._lines) == end):
|
||||
# We don't keep the last line, except if were done. A newline
|
||||
# ends on the next line, which is part of the next parser. But
|
||||
@@ -525,15 +520,12 @@ class FastTokenizer(object):
|
||||
# fast parser options
|
||||
self.current = self.previous = NEWLINE, '', (0, 0)
|
||||
self._in_flow = False
|
||||
self._new_indent = False
|
||||
self._parser_indent = self._old_parser_indent = 0
|
||||
self._is_decorator = False
|
||||
self._first_stmt = True
|
||||
self._parentheses_level = 0
|
||||
self._indent_counter = 0
|
||||
self._flow_indent_counter = 0
|
||||
self._returned_endmarker = False
|
||||
self._next_dedent_noclose = False
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
@@ -555,65 +547,35 @@ class FastTokenizer(object):
|
||||
self.previous = self.current
|
||||
self.current = current
|
||||
|
||||
# this is exactly the same check as in fast_parser, but this time with
|
||||
# tokenize and therefore precise.
|
||||
breaks = ['def', 'class', '@']
|
||||
|
||||
if typ == INDENT:
|
||||
self._indent_counter += 1
|
||||
elif typ == DEDENT:
|
||||
self._indent_counter -= 1
|
||||
print('DEDENT', self._flow_indent_counter, start_pos, self._indent_counter)
|
||||
print(self._in_flow, self._indent_counter, self._flow_indent_counter)
|
||||
if self._in_flow and self._indent_counter == self._flow_indent_counter:
|
||||
self._in_flow = False
|
||||
self._next_dedent_noclose = True
|
||||
elif not self._in_flow:
|
||||
self._closed = True
|
||||
return current
|
||||
|
||||
if self.previous[0] in (NEWLINE, INDENT, DEDENT):
|
||||
if self.previous[0] == DEDENT:
|
||||
if not self._in_flow:
|
||||
if not self._next_dedent_noclose:
|
||||
self._first_stmt = False
|
||||
return self._close()
|
||||
|
||||
self._next_dedent_noclose = False
|
||||
# Parentheses ignore the indentation rules. The other three stand for
|
||||
# new lines.
|
||||
if self.previous[0] in (NEWLINE, INDENT, DEDENT) \
|
||||
and not self._parentheses_level:
|
||||
# Check for NEWLINE, which symbolizes the indent.
|
||||
#print('X', repr(value), tokenize.tok_name[typ])
|
||||
indent = start_pos[1]
|
||||
#print(indent, self._parser_indent)
|
||||
if self._parentheses_level:
|
||||
# Parentheses ignore the indentation rules.
|
||||
pass
|
||||
elif False and indent < self._parser_indent: # -> dedent
|
||||
raise NotImplementedError
|
||||
self._parser_indent = indent
|
||||
self._new_indent = False
|
||||
print(self._in_flow, indent, self._old_parser_indent)
|
||||
if not self._in_flow or indent < self._old_parser_indent:
|
||||
return self._close()
|
||||
|
||||
self._in_flow = False
|
||||
elif self._new_indent:
|
||||
self._parser_indent = indent
|
||||
self._new_indent = False
|
||||
|
||||
if not self._in_flow:
|
||||
self._in_flow = value in FLOWS
|
||||
if self._in_flow:
|
||||
print('INFLOW', self._indent_counter)
|
||||
self._flow_indent_counter = self._indent_counter
|
||||
#self._old_parser_indent = self._parser_indent
|
||||
#self._parser_indent += 1 # new scope: must be higher
|
||||
#self._new_indent = True
|
||||
elif value in breaks:
|
||||
elif value in ('def', 'class', '@'):
|
||||
# The values here are exactly the same check as in
|
||||
# _split_parts, but this time with tokenize and therefore
|
||||
# precise.
|
||||
if not self._is_decorator:
|
||||
return self._close()
|
||||
|
||||
self._is_decorator = '@' == value
|
||||
#if not self._is_decorator:
|
||||
#self._old_parser_indent = self._parser_indent
|
||||
#self._parser_indent += 1 # new scope: must be higher
|
||||
#self._new_indent = True
|
||||
|
||||
if value in '([{' and value:
|
||||
self._parentheses_level += 1
|
||||
@@ -628,8 +590,6 @@ class FastTokenizer(object):
|
||||
# Continue like nothing has happened, because we want to enter
|
||||
# the first class/function.
|
||||
if self.current[1] != '@':
|
||||
#if self._first_stmt and not self._new_indent:
|
||||
#self._parser_indent = indent
|
||||
self._first_stmt = False
|
||||
return self.current
|
||||
else:
|
||||
@@ -637,18 +597,19 @@ class FastTokenizer(object):
|
||||
return self._finish_dedents()
|
||||
|
||||
def _finish_dedents(self):
|
||||
start_pos = self.current[2]
|
||||
print('FINISH', self._indent_counter)
|
||||
if self._indent_counter:
|
||||
self._indent_counter -= 1
|
||||
return tokenize.DEDENT, '', start_pos, ''
|
||||
return tokenize.DEDENT, '', self.current[2], ''
|
||||
elif not self._returned_endmarker:
|
||||
self._returned_endmarker = True
|
||||
# We're using the current prefix for the endmarker to not loose any
|
||||
# information. However we care about "lost" lines. The prefix of
|
||||
# the current line (indent) will always be included in the current
|
||||
# line.
|
||||
prefix = re.sub('[^\n]+$', '', self.current[3])
|
||||
t, _, start_pos, prefix = next(self._gen)
|
||||
# \Z for the end of the string. $ is bugged, because it has the
|
||||
# same behavior with or without re.MULTILINE.
|
||||
prefix = re.sub(r'[^\n]+\Z', '', prefix)
|
||||
return ENDMARKER, '', start_pos, prefix
|
||||
else:
|
||||
raise StopIteration
|
||||
|
||||
@@ -706,7 +706,7 @@ class Class(ClassOrFunc):
|
||||
super(Class, self).__init__(children)
|
||||
|
||||
def get_super_arglist(self):
|
||||
if len(self.children) == 4: # Has no parentheses
|
||||
if self.children[2] != '(': # Has no parentheses
|
||||
return None
|
||||
else:
|
||||
if self.children[3] == ')': # Empty parentheses
|
||||
|
||||
@@ -180,6 +180,24 @@ def test_nested_funcs():
|
||||
check_fp(src, 3)
|
||||
|
||||
|
||||
def test_class_and_if():
|
||||
src = dedent("""\
|
||||
class V:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
if 1:
|
||||
c = 3
|
||||
|
||||
def a_func():
|
||||
return 1
|
||||
|
||||
# COMMENT
|
||||
a_func()""")
|
||||
check_fp(src, 5, 5)
|
||||
assert [d.name for d in jedi.Script(src).goto_definitions()] == ['int']
|
||||
|
||||
|
||||
def test_func_with_for_and_comment():
|
||||
# The first newline is important, leave it. It should not trigger another
|
||||
# parser split.
|
||||
|
||||
Reference in New Issue
Block a user