forked from VimPlug/jedi
Trying to restructure the fast parser.
This commit is contained in:
@@ -20,9 +20,9 @@ from jedi.parser.tokenize import (source_tokens, FLOWS, NEWLINE, COMMENT,
|
|||||||
class FastModule(pr.Module, pr.Simple):
|
class FastModule(pr.Module, pr.Simple):
|
||||||
type = 'file_input'
|
type = 'file_input'
|
||||||
|
|
||||||
def __init__(self, parsers):
|
def __init__(self):
|
||||||
super(FastModule, self).__init__([])
|
super(FastModule, self).__init__([])
|
||||||
self.parsers = parsers
|
self.modules = []
|
||||||
self.reset_caches()
|
self.reset_caches()
|
||||||
|
|
||||||
def reset_caches(self):
|
def reset_caches(self):
|
||||||
@@ -35,21 +35,21 @@ class FastModule(pr.Module, pr.Simple):
|
|||||||
if name.startswith('__'):
|
if name.startswith('__'):
|
||||||
raise AttributeError('Not available!')
|
raise AttributeError('Not available!')
|
||||||
else:
|
else:
|
||||||
return getattr(self.parsers[0].module, name)
|
return getattr(self.modules[0], name)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@cache.underscore_memoization
|
@cache.underscore_memoization
|
||||||
def used_names(self):
|
def used_names(self):
|
||||||
"""
|
"""
|
||||||
used_names = {}
|
used_names = {}
|
||||||
for p in self.parsers:
|
for m in self.modules:
|
||||||
for k, statement_set in p.module.used_names.items():
|
for k, statement_set in m.used_names.items():
|
||||||
if k in used_names:
|
if k in used_names:
|
||||||
used_names[k] |= statement_set
|
used_names[k] |= statement_set
|
||||||
else:
|
else:
|
||||||
used_names[k] = set(statement_set)
|
used_names[k] = set(statement_set)
|
||||||
"""
|
"""
|
||||||
return MergedNamesDict([p.module.used_names for p in self.parsers])
|
return MergedNamesDict([m.used_names for m in self.modules])
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "<fast.%s: %s@%s-%s>" % (type(self).__name__, self.name,
|
return "<fast.%s: %s@%s-%s>" % (type(self).__name__, self.name,
|
||||||
@@ -87,16 +87,23 @@ class CachedFastParser(type):
|
|||||||
|
|
||||||
|
|
||||||
class ParserNode(object):
|
class ParserNode(object):
|
||||||
def __init__(self, fast_module, parser, code, parent=None):
|
def __init__(self, fast_module, parent=None):
|
||||||
self._fast_module = fast_module
|
self._fast_module = fast_module
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
|
|
||||||
self.parser_children = []
|
self.node_children = []
|
||||||
# must be created before new things are added to it.
|
self.code = None
|
||||||
self.save_contents(parser, code)
|
self.hash = None
|
||||||
|
self.parser = None
|
||||||
|
|
||||||
def save_contents(self, parser, code):
|
def __repr__(self):
|
||||||
print('SAVE')
|
if self.parser is None:
|
||||||
|
return '<%s: empty>' % type(self).__name__
|
||||||
|
|
||||||
|
module = self.parser.module
|
||||||
|
return '<%s: %s-%s>' % (type(self).__name__, module.start_pos, module.end_pos)
|
||||||
|
|
||||||
|
def set_parser(self, parser, code):
|
||||||
self.code = code
|
self.code = code
|
||||||
self.hash = hash(code)
|
self.hash = hash(code)
|
||||||
self.parser = parser
|
self.parser = parser
|
||||||
@@ -116,8 +123,7 @@ class ParserNode(object):
|
|||||||
self._is_generator = scope.is_generator
|
self._is_generator = scope.is_generator
|
||||||
"""
|
"""
|
||||||
|
|
||||||
self.old_children = self.parser_children
|
self.node_children = []
|
||||||
self.parser_children = []
|
|
||||||
|
|
||||||
def reset_contents(self):
|
def reset_contents(self):
|
||||||
"""
|
"""
|
||||||
@@ -133,34 +139,31 @@ class ParserNode(object):
|
|||||||
# they make no sense.
|
# they make no sense.
|
||||||
self.parser.module.global_vars = []
|
self.parser.module.global_vars = []
|
||||||
"""
|
"""
|
||||||
|
# TODO REMOVE
|
||||||
for c in self.parser_children:
|
|
||||||
c.reset_contents()
|
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
"""
|
"""
|
||||||
Closes the current parser node. This means that after this no further
|
Closes the current parser node. This means that after this no further
|
||||||
nodes should be added anymore.
|
nodes should be added anymore.
|
||||||
"""
|
"""
|
||||||
print('CLOSE NODE', self.parent, self.parser_children)
|
print('CLOSE NODE', self.parent, self.node_children)
|
||||||
print(self.parser.module.names_dict, [p.parser.module.names_dict for p in
|
print(self.parser.module.names_dict, [p.parser.module.names_dict for p in
|
||||||
self.parser_children])
|
self.node_children])
|
||||||
# We only need to replace the dict if multiple dictionaries are used:
|
# We only need to replace the dict if multiple dictionaries are used:
|
||||||
if self.parser_children:
|
if self.node_children:
|
||||||
dcts = [n.parser.module.names_dict for n in self.parser_children]
|
dcts = [n.parser.module.names_dict for n in self.node_children]
|
||||||
dct = MergedNamesDict([self._names_dict_scope.names_dict] + dcts)
|
dct = MergedNamesDict([self._names_dict_scope.names_dict] + dcts)
|
||||||
self._content_scope.names_dict = dct
|
self._content_scope.names_dict = dct
|
||||||
|
|
||||||
def parent_until_indent(self, indent=None):
|
def parent_until_indent(self, indent=None):
|
||||||
if indent is None or self.indent >= indent and self.parent:
|
if indent is None or self._indent >= indent and self.parent:
|
||||||
self.old_children = []
|
|
||||||
if self.parent is not None:
|
if self.parent is not None:
|
||||||
self.close()
|
self.close()
|
||||||
return self.parent.parent_until_indent(indent)
|
return self.parent.parent_until_indent(indent)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def indent(self):
|
def _indent(self):
|
||||||
if not self.parent:
|
if not self.parent:
|
||||||
return 0
|
return 0
|
||||||
module = self.parser.module
|
module = self.parser.module
|
||||||
@@ -202,13 +205,17 @@ class ParserNode(object):
|
|||||||
scope.is_generator |= parser.module.is_generator
|
scope.is_generator |= parser.module.is_generator
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def add_node(self, node, set_parent=False):
|
def add_node(self, node, line_offset):
|
||||||
"""Adding a node means adding a node that was already added earlier"""
|
"""Adding a node means adding a node that was already added earlier"""
|
||||||
print('ADD')
|
print('ADD')
|
||||||
self.parser_children.append(node)
|
# Changing the line offsets is very important, because if they don't
|
||||||
self._set_items(node.parser, set_parent=set_parent)
|
# fit, all the start_pos values will be wrong.
|
||||||
node.old_children = node.parser_children # TODO potential memory leak?
|
m = node.parser.module
|
||||||
node.parser_children = []
|
m.line_offset += line_offset + 1 - m.start_pos[0]
|
||||||
|
|
||||||
|
self.node_children.append(node)
|
||||||
|
self._set_items(node.parser, set_parent=node.parent == self)
|
||||||
|
node.node_children = []
|
||||||
|
|
||||||
"""
|
"""
|
||||||
scope = self.content_scope
|
scope = self.content_scope
|
||||||
@@ -222,9 +229,20 @@ class ParserNode(object):
|
|||||||
return node
|
return node
|
||||||
|
|
||||||
def add_parser(self, parser, code):
|
def add_parser(self, parser, code):
|
||||||
|
# TODO REMOVE
|
||||||
|
raise NotImplementedError
|
||||||
print('add parser')
|
print('add parser')
|
||||||
return self.add_node(ParserNode(self._fast_module, parser, code, self), True)
|
return self.add_node(ParserNode(self._fast_module, parser, code, self), True)
|
||||||
|
|
||||||
|
def all_nodes(self):
|
||||||
|
"""
|
||||||
|
Returns all nodes including nested ones.
|
||||||
|
"""
|
||||||
|
yield self
|
||||||
|
for n in self.node_children:
|
||||||
|
for y in n.all_nodes():
|
||||||
|
yield y
|
||||||
|
|
||||||
|
|
||||||
class FastParser(use_metaclass(CachedFastParser)):
|
class FastParser(use_metaclass(CachedFastParser)):
|
||||||
|
|
||||||
@@ -234,20 +252,20 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
# set values like `pr.Module`.
|
# set values like `pr.Module`.
|
||||||
self._grammar = grammar
|
self._grammar = grammar
|
||||||
self.module_path = module_path
|
self.module_path = module_path
|
||||||
print(module_path)
|
|
||||||
|
|
||||||
self.current_node = None
|
self._reset_caches()
|
||||||
self.parsers = []
|
|
||||||
self.module = FastModule(self.parsers)
|
|
||||||
self.reset_caches()
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self._parse(code)
|
self._parse(code)
|
||||||
except:
|
except:
|
||||||
# FastParser is cached, be careful with exceptions
|
# FastParser is cached, be careful with exceptions
|
||||||
del self.parsers[:]
|
self._reset_caches()
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
def _reset_caches(self):
|
||||||
|
self.module = FastModule()
|
||||||
|
self.current_node = ParserNode(self.module)
|
||||||
|
|
||||||
def update(self, code):
|
def update(self, code):
|
||||||
self.reset_caches()
|
self.reset_caches()
|
||||||
|
|
||||||
@@ -255,7 +273,7 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
self._parse(code)
|
self._parse(code)
|
||||||
except:
|
except:
|
||||||
# FastParser is cached, be careful with exceptions
|
# FastParser is cached, be careful with exceptions
|
||||||
del self.parsers[:]
|
self._reset_caches()
|
||||||
raise
|
raise
|
||||||
|
|
||||||
def _split_parts(self, code):
|
def _split_parts(self, code):
|
||||||
@@ -320,57 +338,45 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
|
|
||||||
def _parse(self, code):
|
def _parse(self, code):
|
||||||
""" :type code: str """
|
""" :type code: str """
|
||||||
def empty_parser():
|
def empty_parser_node():
|
||||||
new, temp = self._get_parser(unicode(''), unicode(''), 0, [], False)
|
return self._get_node(unicode(''), unicode(''), 0, [], False)
|
||||||
return new
|
|
||||||
|
|
||||||
del self.parsers[:]
|
|
||||||
|
|
||||||
line_offset = 0
|
line_offset = 0
|
||||||
start = 0
|
start = 0
|
||||||
p = None
|
p = None
|
||||||
is_first = True
|
is_first = True
|
||||||
|
nodes = self.current_node.all_nodes()
|
||||||
|
|
||||||
for code_part in self._split_parts(code):
|
for code_part in self._split_parts(code):
|
||||||
if is_first or line_offset + 1 == p.module.end_pos[0]:
|
if is_first or line_offset + 1 == p.module.end_pos[0]:
|
||||||
print(repr(code_part))
|
print(repr(code_part))
|
||||||
|
|
||||||
indent = len(code_part) - len(code_part.lstrip('\t '))
|
indent = len(code_part) - len(code_part.lstrip('\t '))
|
||||||
if is_first and self.current_node is not None:
|
self.current_node = self.current_node.parent_until_indent(indent)
|
||||||
nodes = [self.current_node]
|
|
||||||
else:
|
|
||||||
nodes = []
|
|
||||||
if self.current_node is not None:
|
|
||||||
self.current_node = self.current_node.parent_until_indent(indent)
|
|
||||||
nodes += self.current_node.old_children
|
|
||||||
|
|
||||||
# check if code_part has already been parsed
|
# check if code_part has already been parsed
|
||||||
# print '#'*45,line_offset, p and p.module.end_pos, '\n', code_part
|
# print '#'*45,line_offset, p and p.module.end_pos, '\n', code_part
|
||||||
p, node = self._get_parser(code_part, code[start:],
|
self.current_node = self._get_node(code_part, code[start:],
|
||||||
line_offset, nodes, not is_first)
|
line_offset, nodes, not is_first)
|
||||||
print('HmmmmA', p.module.names_dict)
|
print('HmmmmA', self.current_node.parser.module.names_dict)
|
||||||
|
|
||||||
# The actual used code_part is different from the given code
|
if is_first and self.current_node.parser.module.subscopes:
|
||||||
# part, because of docstrings for example there's a chance that
|
|
||||||
# splits are wrong.
|
|
||||||
used_lines = self._lines[line_offset:p.module.end_pos[0]]
|
|
||||||
code_part_actually_used = '\n'.join(used_lines)
|
|
||||||
|
|
||||||
if is_first and p.module.subscopes:
|
|
||||||
print('NOXXXX')
|
print('NOXXXX')
|
||||||
# special case, we cannot use a function subscope as a
|
raise NotImplementedError
|
||||||
|
# Special case, we cannot use a function subscope as a
|
||||||
# base scope, subscopes would save all the other contents
|
# base scope, subscopes would save all the other contents
|
||||||
new = empty_parser()
|
new = empty_parser_node() # TODO should be node =
|
||||||
if self.current_node is None:
|
self.current_node.set_parser(new, '')
|
||||||
self.current_node = ParserNode(self.module, new, '')
|
|
||||||
else:
|
|
||||||
self.current_node.save_contents(new, '')
|
|
||||||
self.parsers.append(new)
|
self.parsers.append(new)
|
||||||
is_first = False
|
is_first = False
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
if is_first:
|
if is_first:
|
||||||
if self.current_node is None:
|
if self.current_node is None:
|
||||||
self.current_node = ParserNode(self.module, p, code_part_actually_used)
|
self.current_node = ParserNode(self.module, p, code_part_actually_used)
|
||||||
else:
|
else:
|
||||||
self.current_node.save_contents(p, code_part_actually_used)
|
pass
|
||||||
else:
|
else:
|
||||||
if node is None:
|
if node is None:
|
||||||
self.current_node = \
|
self.current_node = \
|
||||||
@@ -379,6 +385,7 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
self.current_node = self.current_node.add_node(node)
|
self.current_node = self.current_node.add_node(node)
|
||||||
|
|
||||||
self.parsers.append(p)
|
self.parsers.append(p)
|
||||||
|
"""
|
||||||
|
|
||||||
is_first = False
|
is_first = False
|
||||||
#else:
|
#else:
|
||||||
@@ -387,11 +394,13 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
line_offset += code_part.count('\n') + 1
|
line_offset += code_part.count('\n') + 1
|
||||||
start += len(code_part) + 1 # +1 for newline
|
start += len(code_part) + 1 # +1 for newline
|
||||||
|
|
||||||
|
# Now that the for loop is finished, we still want to close all nodes.
|
||||||
if self.parsers:
|
if self.parsers:
|
||||||
self.current_node = self.current_node.parent_until_indent()
|
self.current_node = self.current_node.parent_until_indent()
|
||||||
self.current_node.close()
|
self.current_node.close()
|
||||||
else:
|
else:
|
||||||
self.parsers.append(empty_parser())
|
raise NotImplementedError
|
||||||
|
self.parsers.append(empty_parser_node())
|
||||||
|
|
||||||
""" TODO used?
|
""" TODO used?
|
||||||
self.module.end_pos = self.parsers[-1].module.end_pos
|
self.module.end_pos = self.parsers[-1].module.end_pos
|
||||||
@@ -399,30 +408,33 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
|
|
||||||
# print(self.parsers[0].module.get_code())
|
# print(self.parsers[0].module.get_code())
|
||||||
|
|
||||||
def _get_parser(self, code, parser_code, line_offset, nodes, no_docstr):
|
def _get_node(self, code, parser_code, line_offset, nodes, no_docstr):
|
||||||
|
"""
|
||||||
|
Side effect: Alters the list of nodes.
|
||||||
|
"""
|
||||||
h = hash(code)
|
h = hash(code)
|
||||||
for index, node in enumerate(nodes):
|
for index, node in enumerate(list(nodes)):
|
||||||
|
print('EQ', node, repr(node.code), repr(code))
|
||||||
if node.hash == h and node.code == code:
|
if node.hash == h and node.code == code:
|
||||||
if node != self.current_node:
|
nodes.remove(node)
|
||||||
offset = int(nodes[0] == self.current_node)
|
|
||||||
self.current_node.old_children.pop(index - offset)
|
|
||||||
p = node.parser
|
|
||||||
m = p.module
|
|
||||||
m.line_offset += line_offset + 1 - m.start_pos[0]
|
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
|
print('ACTUALLY PARSING')
|
||||||
tokenizer = FastTokenizer(parser_code, line_offset)
|
tokenizer = FastTokenizer(parser_code, line_offset)
|
||||||
p = Parser(self._grammar, parser_code, self.module_path, tokenizer=tokenizer)
|
p = Parser(self._grammar, parser_code, self.module_path, tokenizer=tokenizer)
|
||||||
#p.module.parent = self.module # With the new parser this is not
|
#p.module.parent = self.module # With the new parser this is not
|
||||||
# necessary anymore?
|
# necessary anymore?
|
||||||
node = None
|
node = ParserNode(self.module, self.current_node)
|
||||||
|
|
||||||
return p, node
|
# The actual used code_part is different from the given code
|
||||||
|
# part, because of docstrings for example there's a chance that
|
||||||
|
# splits are wrong.
|
||||||
|
used_lines = self._lines[line_offset:p.module.end_pos[0] - 1]
|
||||||
|
code_part_actually_used = '\n'.join(used_lines)
|
||||||
|
node.set_parser(p, code_part_actually_used)
|
||||||
|
|
||||||
def reset_caches(self):
|
self.current_node.add_node(node, line_offset)
|
||||||
self.module.reset_caches()
|
return node
|
||||||
if self.current_node is not None:
|
|
||||||
self.current_node.reset_contents()
|
|
||||||
|
|
||||||
|
|
||||||
class FastTokenizer(object):
|
class FastTokenizer(object):
|
||||||
|
|||||||
@@ -609,9 +609,6 @@ class SubModule(Scope, Module):
|
|||||||
# this may be changed depending on fast_parser
|
# this may be changed depending on fast_parser
|
||||||
self.line_offset = 0
|
self.line_offset = 0
|
||||||
|
|
||||||
if 0:
|
|
||||||
self.use_as_parent = top_module or self
|
|
||||||
|
|
||||||
def set_global_names(self, names):
|
def set_global_names(self, names):
|
||||||
"""
|
"""
|
||||||
Global means in these context a function (subscope) which has a global
|
Global means in these context a function (subscope) which has a global
|
||||||
|
|||||||
Reference in New Issue
Block a user