mirror of
https://github.com/davidhalter/jedi.git
synced 2026-02-25 06:27:13 +08:00
Playing with the fast parser implementation.
This commit is contained in:
@@ -4,6 +4,7 @@ anything changes, it only reparses the changed parts. But because it's not
|
|||||||
finished (and still not working as I want), I won't document it any further.
|
finished (and still not working as I want), I won't document it any further.
|
||||||
"""
|
"""
|
||||||
import re
|
import re
|
||||||
|
from itertools import chain
|
||||||
|
|
||||||
from jedi._compatibility import use_metaclass, unicode
|
from jedi._compatibility import use_metaclass, unicode
|
||||||
from jedi import settings
|
from jedi import settings
|
||||||
@@ -15,15 +16,14 @@ from jedi import cache
|
|||||||
from jedi.parser.tokenize import source_tokens, FLOWS, NEWLINE, COMMENT, ENDMARKER
|
from jedi.parser.tokenize import source_tokens, FLOWS, NEWLINE, COMMENT, ENDMARKER
|
||||||
|
|
||||||
|
|
||||||
class Module(pr.Module, pr.Simple):
|
class FastModule(pr.Module, pr.Simple):
|
||||||
|
type = 'file_input'
|
||||||
|
|
||||||
def __init__(self, parsers):
|
def __init__(self, parsers):
|
||||||
super(Module, self).__init__(self, (1, 0))
|
super(FastModule, self).__init__([])
|
||||||
self.parsers = parsers
|
self.parsers = parsers
|
||||||
self.reset_caches()
|
self.reset_caches()
|
||||||
|
|
||||||
self.start_pos = 1, 0
|
|
||||||
self.end_pos = None, None
|
|
||||||
|
|
||||||
def reset_caches(self):
|
def reset_caches(self):
|
||||||
""" This module does a whole lot of caching, because it uses different
|
""" This module does a whole lot of caching, because it uses different
|
||||||
parsers. """
|
parsers. """
|
||||||
@@ -69,45 +69,53 @@ class CachedFastParser(type):
|
|||||||
|
|
||||||
|
|
||||||
class ParserNode(object):
|
class ParserNode(object):
|
||||||
def __init__(self, parser, code, parent=None):
|
def __init__(self, fast_module, parser, code, parent=None):
|
||||||
|
self._fast_module = fast_module
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
|
|
||||||
self.children = []
|
self.parser_children = []
|
||||||
# must be created before new things are added to it.
|
# must be created before new things are added to it.
|
||||||
self.save_contents(parser, code)
|
self.save_contents(parser, code)
|
||||||
|
|
||||||
def save_contents(self, parser, code):
|
def save_contents(self, parser, code):
|
||||||
|
print('SAVE')
|
||||||
self.code = code
|
self.code = code
|
||||||
self.hash = hash(code)
|
self.hash = hash(code)
|
||||||
self.parser = parser
|
self.parser = parser
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# with fast_parser we have either 1 subscope or only statements.
|
# With fast_parser we have either 1 subscope or only statements.
|
||||||
self.content_scope = parser.module.subscopes[0]
|
self.content_scope = parser.module.subscopes[0]
|
||||||
except IndexError:
|
except IndexError:
|
||||||
self.content_scope = parser.module
|
self.content_scope = self._fast_module
|
||||||
|
|
||||||
|
"""
|
||||||
scope = self.content_scope
|
scope = self.content_scope
|
||||||
self._contents = {}
|
self._contents = {}
|
||||||
for c in pr.SCOPE_CONTENTS:
|
for c in pr.SCOPE_CONTENTS:
|
||||||
self._contents[c] = list(getattr(scope, c))
|
self._contents[c] = list(getattr(scope, c))
|
||||||
self._is_generator = scope.is_generator
|
self._is_generator = scope.is_generator
|
||||||
|
"""
|
||||||
|
|
||||||
self.old_children = self.children
|
self.old_children = self.parser_children
|
||||||
self.children = []
|
self.parser_children = []
|
||||||
|
|
||||||
def reset_contents(self):
|
def reset_contents(self):
|
||||||
|
"""
|
||||||
scope = self.content_scope
|
scope = self.content_scope
|
||||||
for key, c in self._contents.items():
|
for key, c in self._contents.items():
|
||||||
setattr(scope, key, list(c))
|
setattr(scope, key, list(c))
|
||||||
scope.is_generator = self._is_generator
|
scope.is_generator = self._is_generator
|
||||||
|
"""
|
||||||
|
|
||||||
|
"""
|
||||||
if self.parent is None:
|
if self.parent is None:
|
||||||
# Global vars of the first one can be deleted, in the global scope
|
# Global vars of the first one can be deleted, in the global scope
|
||||||
# they make no sense.
|
# they make no sense.
|
||||||
self.parser.module.global_vars = []
|
self.parser.module.global_vars = []
|
||||||
|
"""
|
||||||
|
|
||||||
for c in self.children:
|
for c in self.parser_children:
|
||||||
c.reset_contents()
|
c.reset_contents()
|
||||||
|
|
||||||
def parent_until_indent(self, indent=None):
|
def parent_until_indent(self, indent=None):
|
||||||
@@ -140,34 +148,35 @@ class ParserNode(object):
|
|||||||
def _set_items(self, parser, set_parent=False):
|
def _set_items(self, parser, set_parent=False):
|
||||||
# insert parser objects into current structure
|
# insert parser objects into current structure
|
||||||
scope = self.content_scope
|
scope = self.content_scope
|
||||||
for c in pr.SCOPE_CONTENTS:
|
if set_parent:
|
||||||
content = getattr(scope, c)
|
for child in parser.module.children:
|
||||||
items = getattr(parser.module, c)
|
child.parent = scope
|
||||||
if set_parent:
|
scope.children.append(child)
|
||||||
for i in items:
|
print('\t\t', scope, child)
|
||||||
if i is None:
|
"""
|
||||||
continue # happens with empty returns
|
if isinstance(i, (pr.Function, pr.Class)):
|
||||||
i.parent = scope.use_as_parent
|
for d in i.decorators:
|
||||||
if isinstance(i, (pr.Function, pr.Class)):
|
d.parent = scope
|
||||||
for d in i.decorators:
|
"""
|
||||||
d.parent = scope.use_as_parent
|
# TODO global_vars ? is_generator ?
|
||||||
content += items
|
"""
|
||||||
|
|
||||||
# global_vars
|
|
||||||
cur = self
|
cur = self
|
||||||
while cur.parent is not None:
|
while cur.parent is not None:
|
||||||
cur = cur.parent
|
cur = cur.parent
|
||||||
cur.parser.module.global_vars += parser.module.global_vars
|
cur.parser.module.global_vars += parser.module.global_vars
|
||||||
|
|
||||||
scope.is_generator |= parser.module.is_generator
|
scope.is_generator |= parser.module.is_generator
|
||||||
|
"""
|
||||||
|
|
||||||
def add_node(self, node, set_parent=False):
|
def add_node(self, node, set_parent=False):
|
||||||
"""Adding a node means adding a node that was already added earlier"""
|
"""Adding a node means adding a node that was already added earlier"""
|
||||||
self.children.append(node)
|
print('ADD')
|
||||||
|
self.parser_children.append(node)
|
||||||
self._set_items(node.parser, set_parent=set_parent)
|
self._set_items(node.parser, set_parent=set_parent)
|
||||||
node.old_children = node.children # TODO potential memory leak?
|
node.old_children = node.parser_children # TODO potential memory leak?
|
||||||
node.children = []
|
node.parser_children = []
|
||||||
|
|
||||||
|
"""
|
||||||
scope = self.content_scope
|
scope = self.content_scope
|
||||||
while scope is not None:
|
while scope is not None:
|
||||||
#print('x',scope)
|
#print('x',scope)
|
||||||
@@ -175,10 +184,12 @@ class ParserNode(object):
|
|||||||
# TODO This seems like a strange thing. Check again.
|
# TODO This seems like a strange thing. Check again.
|
||||||
scope.end_pos = node.content_scope.end_pos
|
scope.end_pos = node.content_scope.end_pos
|
||||||
scope = scope.parent
|
scope = scope.parent
|
||||||
|
"""
|
||||||
return node
|
return node
|
||||||
|
|
||||||
def add_parser(self, parser, code):
|
def add_parser(self, parser, code):
|
||||||
return self.add_node(ParserNode(parser, code, self), True)
|
print('add parser')
|
||||||
|
return self.add_node(ParserNode(self._fast_module, parser, code, self), True)
|
||||||
|
|
||||||
|
|
||||||
class FastParser(use_metaclass(CachedFastParser)):
|
class FastParser(use_metaclass(CachedFastParser)):
|
||||||
@@ -189,10 +200,11 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
# set values like `pr.Module`.
|
# set values like `pr.Module`.
|
||||||
self._grammar = grammar
|
self._grammar = grammar
|
||||||
self.module_path = module_path
|
self.module_path = module_path
|
||||||
|
print(module_path)
|
||||||
|
|
||||||
self.current_node = None
|
self.current_node = None
|
||||||
self.parsers = []
|
self.parsers = []
|
||||||
self.module = Module(self.parsers)
|
self.module = FastModule(self.parsers)
|
||||||
self.reset_caches()
|
self.reset_caches()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -285,6 +297,7 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
p = None
|
p = None
|
||||||
is_first = True
|
is_first = True
|
||||||
for code_part in self._split_parts(code):
|
for code_part in self._split_parts(code):
|
||||||
|
print(repr(code_part))
|
||||||
if is_first or line_offset >= p.module.end_pos[0]:
|
if is_first or line_offset >= p.module.end_pos[0]:
|
||||||
indent = len(code_part) - len(code_part.lstrip('\t '))
|
indent = len(code_part) - len(code_part.lstrip('\t '))
|
||||||
if is_first and self.current_node is not None:
|
if is_first and self.current_node is not None:
|
||||||
@@ -308,11 +321,12 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
code_part_actually_used = '\n'.join(used_lines)
|
code_part_actually_used = '\n'.join(used_lines)
|
||||||
|
|
||||||
if is_first and p.module.subscopes:
|
if is_first and p.module.subscopes:
|
||||||
|
print('NOXXXX')
|
||||||
# special case, we cannot use a function subscope as a
|
# special case, we cannot use a function subscope as a
|
||||||
# base scope, subscopes would save all the other contents
|
# base scope, subscopes would save all the other contents
|
||||||
new = empty_parser()
|
new = empty_parser()
|
||||||
if self.current_node is None:
|
if self.current_node is None:
|
||||||
self.current_node = ParserNode(new, '')
|
self.current_node = ParserNode(self.module, new, '')
|
||||||
else:
|
else:
|
||||||
self.current_node.save_contents(new, '')
|
self.current_node.save_contents(new, '')
|
||||||
self.parsers.append(new)
|
self.parsers.append(new)
|
||||||
@@ -320,7 +334,7 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
|
|
||||||
if is_first:
|
if is_first:
|
||||||
if self.current_node is None:
|
if self.current_node is None:
|
||||||
self.current_node = ParserNode(p, code_part_actually_used)
|
self.current_node = ParserNode(self.module, p, code_part_actually_used)
|
||||||
else:
|
else:
|
||||||
self.current_node.save_contents(p, code_part_actually_used)
|
self.current_node.save_contents(p, code_part_actually_used)
|
||||||
else:
|
else:
|
||||||
@@ -344,28 +358,28 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
else:
|
else:
|
||||||
self.parsers.append(empty_parser())
|
self.parsers.append(empty_parser())
|
||||||
|
|
||||||
|
""" TODO used?
|
||||||
self.module.end_pos = self.parsers[-1].module.end_pos
|
self.module.end_pos = self.parsers[-1].module.end_pos
|
||||||
|
"""
|
||||||
|
|
||||||
# print(self.parsers[0].module.get_code())
|
# print(self.parsers[0].module.get_code())
|
||||||
|
|
||||||
def _get_parser(self, code, parser_code, line_offset, nodes, no_docstr):
|
def _get_parser(self, code, parser_code, line_offset, nodes, no_docstr):
|
||||||
h = hash(code)
|
h = hash(code)
|
||||||
for index, node in enumerate(nodes):
|
for index, node in enumerate(nodes):
|
||||||
if node.hash != h or node.code != code:
|
if node.hash == h and node.code == code:
|
||||||
continue
|
if node != self.current_node:
|
||||||
|
offset = int(nodes[0] == self.current_node)
|
||||||
if node != self.current_node:
|
self.current_node.old_children.pop(index - offset)
|
||||||
offset = int(nodes[0] == self.current_node)
|
p = node.parser
|
||||||
self.current_node.old_children.pop(index - offset)
|
m = p.module
|
||||||
p = node.parser
|
m.line_offset += line_offset + 1 - m.start_pos[0]
|
||||||
m = p.module
|
break
|
||||||
m.line_offset += line_offset + 1 - m.start_pos[0]
|
|
||||||
break
|
|
||||||
else:
|
else:
|
||||||
tokenizer = FastTokenizer(parser_code, line_offset)
|
tokenizer = FastTokenizer(parser_code, line_offset)
|
||||||
p = Parser(parser_code, self.module_path, tokenizer=tokenizer,
|
p = Parser(self._grammar, parser_code, self.module_path, tokenizer=tokenizer)
|
||||||
top_module=self.module, no_docstr=no_docstr)
|
#p.module.parent = self.module # With the new parser this is not
|
||||||
p.module.parent = self.module
|
# necessary anymore?
|
||||||
node = None
|
node = None
|
||||||
|
|
||||||
return p, node
|
return p, node
|
||||||
@@ -392,21 +406,27 @@ class FastTokenizer(object):
|
|||||||
self.parser_indent = self.old_parser_indent = 0
|
self.parser_indent = self.old_parser_indent = 0
|
||||||
self.is_decorator = False
|
self.is_decorator = False
|
||||||
self.first_stmt = True
|
self.first_stmt = True
|
||||||
|
self._add_end_marker = False
|
||||||
self.parentheses_level = 0
|
self.parentheses_level = 0
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
def next(self):
|
def next(self):
|
||||||
""" Python 2 Compatibility """
|
""" Python 2 Compatibility """
|
||||||
return self.__next__()
|
return self.__next__()
|
||||||
|
|
||||||
def __next__(self):
|
def __next__(self):
|
||||||
if self.closed:
|
if self.closed:
|
||||||
raise common.MultiLevelStopIteration()
|
if self._add_end_marker:
|
||||||
|
self._add_end_marker = False
|
||||||
|
start_pos = self.current[2]
|
||||||
|
return tokenize.ENDMARKER, '', start_pos, ''
|
||||||
|
raise StopIteration
|
||||||
|
|
||||||
current = next(self.gen)
|
typ, value, start_pos, prefix = current = next(self.gen)
|
||||||
tok_type = current.type
|
if typ == ENDMARKER:
|
||||||
tok_str = current.string
|
self.closed = True
|
||||||
if tok_type == ENDMARKER:
|
|
||||||
raise common.MultiLevelStopIteration()
|
|
||||||
|
|
||||||
self.previous = self.current
|
self.previous = self.current
|
||||||
self.current = current
|
self.current = current
|
||||||
@@ -417,14 +437,14 @@ class FastTokenizer(object):
|
|||||||
|
|
||||||
def close():
|
def close():
|
||||||
if not self.first_stmt:
|
if not self.first_stmt:
|
||||||
|
self._add_end_marker = True
|
||||||
self.closed = True
|
self.closed = True
|
||||||
raise common.MultiLevelStopIteration()
|
|
||||||
|
|
||||||
# Ignore comments/newlines, irrelevant for indentation.
|
# Ignore comments/newlines, irrelevant for indentation.
|
||||||
if self.previous.type in (None, NEWLINE) \
|
if self.previous[0] in (None, NEWLINE) \
|
||||||
and tok_type not in (COMMENT, NEWLINE):
|
and typ not in (COMMENT, NEWLINE):
|
||||||
# print c, tok_name[c[0]]
|
# print c, tok_name[c[0]]
|
||||||
indent = current.start_pos[1]
|
indent = start_pos[1]
|
||||||
if self.parentheses_level:
|
if self.parentheses_level:
|
||||||
# parentheses ignore the indentation rules.
|
# parentheses ignore the indentation rules.
|
||||||
pass
|
pass
|
||||||
@@ -440,18 +460,18 @@ class FastTokenizer(object):
|
|||||||
self.new_indent = False
|
self.new_indent = False
|
||||||
|
|
||||||
if not self.in_flow:
|
if not self.in_flow:
|
||||||
if tok_str in FLOWS or tok_str in breaks:
|
if value in FLOWS or value in breaks:
|
||||||
self.in_flow = tok_str in FLOWS
|
self.in_flow = value in FLOWS
|
||||||
if not self.is_decorator and not self.in_flow:
|
if not self.is_decorator and not self.in_flow:
|
||||||
close()
|
close()
|
||||||
|
|
||||||
self.is_decorator = '@' == tok_str
|
self.is_decorator = '@' == value
|
||||||
if not self.is_decorator:
|
if not self.is_decorator:
|
||||||
self.old_parser_indent = self.parser_indent
|
self.old_parser_indent = self.parser_indent
|
||||||
self.parser_indent += 1 # new scope: must be higher
|
self.parser_indent += 1 # new scope: must be higher
|
||||||
self.new_indent = True
|
self.new_indent = True
|
||||||
|
|
||||||
if tok_str != '@':
|
if value != '@':
|
||||||
if self.first_stmt and not self.new_indent:
|
if self.first_stmt and not self.new_indent:
|
||||||
self.parser_indent = indent
|
self.parser_indent = indent
|
||||||
self.first_stmt = False
|
self.first_stmt = False
|
||||||
@@ -459,8 +479,8 @@ class FastTokenizer(object):
|
|||||||
# Ignore closing parentheses, because they are all
|
# Ignore closing parentheses, because they are all
|
||||||
# irrelevant for the indentation.
|
# irrelevant for the indentation.
|
||||||
|
|
||||||
if tok_str in '([{':
|
if value in '([{':
|
||||||
self.parentheses_level += 1
|
self.parentheses_level += 1
|
||||||
elif tok_str in ')]}':
|
elif value in ')]}':
|
||||||
self.parentheses_level = max(self.parentheses_level - 1, 0)
|
self.parentheses_level = max(self.parentheses_level - 1, 0)
|
||||||
return current
|
return current
|
||||||
|
|||||||
@@ -39,9 +39,6 @@ from jedi._compatibility import (next, Python3Method, encoding, is_py3,
|
|||||||
from jedi import cache
|
from jedi import cache
|
||||||
|
|
||||||
|
|
||||||
SCOPE_CONTENTS = 'asserts', 'subscopes', 'imports', 'statements', 'returns'
|
|
||||||
|
|
||||||
|
|
||||||
def is_node(node, *symbol_names):
|
def is_node(node, *symbol_names):
|
||||||
try:
|
try:
|
||||||
type = node.type
|
type = node.type
|
||||||
|
|||||||
@@ -136,7 +136,7 @@ On Linux, if environment variable ``$XDG_CACHE_HOME`` is set,
|
|||||||
# parser
|
# parser
|
||||||
# ----------------
|
# ----------------
|
||||||
|
|
||||||
fast_parser = False
|
fast_parser = True
|
||||||
"""
|
"""
|
||||||
Use the fast parser. This means that reparsing is only being done if
|
Use the fast parser. This means that reparsing is only being done if
|
||||||
something has been changed e.g. to a function. If this happens, only the
|
something has been changed e.g. to a function. If this happens, only the
|
||||||
|
|||||||
Reference in New Issue
Block a user