forked from VimPlug/jedi
a basic approach to the new fast parser
This commit is contained in:
@@ -126,7 +126,6 @@ class NoErrorTokenizer(object):
|
|||||||
else:
|
else:
|
||||||
c[2] = self.offset[0] + c[2][0], c[2][1]
|
c[2] = self.offset[0] + c[2][0], c[2][1]
|
||||||
c[3] = self.offset[0] + c[3][0], c[3][1]
|
c[3] = self.offset[0] + c[3][0], c[3][1]
|
||||||
print 'h', c, tokenize.tok_name[c[0]], self.current[2:4]
|
|
||||||
self.current = c
|
self.current = c
|
||||||
|
|
||||||
def close():
|
def close():
|
||||||
@@ -139,7 +138,7 @@ class NoErrorTokenizer(object):
|
|||||||
tokenize.NEWLINE, tokenize.DEDENT) \
|
tokenize.NEWLINE, tokenize.DEDENT) \
|
||||||
and c[0] not in (tokenize.COMMENT, tokenize.INDENT,
|
and c[0] not in (tokenize.COMMENT, tokenize.INDENT,
|
||||||
tokenize.NL, tokenize.NEWLINE, tokenize.DEDENT):
|
tokenize.NL, tokenize.NEWLINE, tokenize.DEDENT):
|
||||||
print c, tokenize.tok_name[c[0]]
|
#print c, tokenize.tok_name[c[0]]
|
||||||
|
|
||||||
tok = c[1]
|
tok = c[1]
|
||||||
indent = c[2][1]
|
indent = c[2][1]
|
||||||
|
|||||||
@@ -14,13 +14,15 @@ import cache
|
|||||||
import common
|
import common
|
||||||
|
|
||||||
|
|
||||||
|
SCOPE_CONTENTS = ['asserts', 'subscopes', 'imports', 'statements', 'returns']
|
||||||
|
|
||||||
|
|
||||||
class Module(pr.Simple, pr.Module):
|
class Module(pr.Simple, pr.Module):
|
||||||
def __init__(self, parsers):
|
def __init__(self, parsers):
|
||||||
self._end_pos = None, None
|
self._end_pos = None, None
|
||||||
super(Module, self).__init__(self, (1, 0))
|
super(Module, self).__init__(self, (1, 0))
|
||||||
self.parsers = parsers
|
self.parsers = parsers
|
||||||
self.reset_caches()
|
self.reset_caches()
|
||||||
self.line_offset = 0
|
|
||||||
|
|
||||||
def reset_caches(self):
|
def reset_caches(self):
|
||||||
""" This module does a whole lot of caching, because it uses different
|
""" This module does a whole lot of caching, because it uses different
|
||||||
@@ -63,18 +65,6 @@ class Module(pr.Simple, pr.Module):
|
|||||||
else:
|
else:
|
||||||
raise AttributeError("__getattr__ doesn't offer %s" % name)
|
raise AttributeError("__getattr__ doesn't offer %s" % name)
|
||||||
|
|
||||||
def get_statement_for_position(self, pos):
|
|
||||||
key = 'get_statement_for_position', pos
|
|
||||||
if key not in self.cache:
|
|
||||||
for p in self.parsers:
|
|
||||||
s = p.module.get_statement_for_position(pos)
|
|
||||||
if s:
|
|
||||||
self.cache[key] = s
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
self.cache[key] = None
|
|
||||||
return self.cache[key]
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def used_names(self):
|
def used_names(self):
|
||||||
if not self.parsers:
|
if not self.parsers:
|
||||||
@@ -92,30 +82,6 @@ class Module(pr.Simple, pr.Module):
|
|||||||
self.cache[key] = dct
|
self.cache[key] = dct
|
||||||
return self.cache[key]
|
return self.cache[key]
|
||||||
|
|
||||||
@property
|
|
||||||
def docstr(self):
|
|
||||||
if not self.parsers:
|
|
||||||
raise NotImplementedError("Parser doesn't exist.")
|
|
||||||
return self.parsers[0].module.docstr
|
|
||||||
|
|
||||||
@property
|
|
||||||
def name(self):
|
|
||||||
if not self.parsers:
|
|
||||||
raise NotImplementedError("Parser doesn't exist.")
|
|
||||||
return self.parsers[0].module.name
|
|
||||||
|
|
||||||
@property
|
|
||||||
def path(self):
|
|
||||||
if not self.parsers:
|
|
||||||
raise NotImplementedError("Parser doesn't exist.")
|
|
||||||
return self.parsers[0].module.path
|
|
||||||
|
|
||||||
@property
|
|
||||||
def is_builtin(self):
|
|
||||||
if not self.parsers:
|
|
||||||
raise NotImplementedError("Parser doesn't exist.")
|
|
||||||
return self.parsers[0].module.is_builtin
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def start_pos(self):
|
def start_pos(self):
|
||||||
""" overwrite start_pos of Simple """
|
""" overwrite start_pos of Simple """
|
||||||
@@ -157,12 +123,93 @@ class CachedFastParser(type):
|
|||||||
return p
|
return p
|
||||||
|
|
||||||
|
|
||||||
|
class ParserNode(object):
|
||||||
|
def __init__(self, parser, code, parent=None):
|
||||||
|
self.parent = parent
|
||||||
|
self.parser = parser
|
||||||
|
self.code = code
|
||||||
|
self.hash = hash(code)
|
||||||
|
|
||||||
|
self.children = []
|
||||||
|
self._checked = True
|
||||||
|
self.save_contents()
|
||||||
|
|
||||||
|
def save_contents(self):
|
||||||
|
scope = self._get_content_scope()
|
||||||
|
self._contents = {}
|
||||||
|
for c in SCOPE_CONTENTS:
|
||||||
|
self._contents[c] = list(getattr(scope, c))
|
||||||
|
self._is_generator = scope.is_generator
|
||||||
|
|
||||||
|
def _get_content_scope(self):
|
||||||
|
try:
|
||||||
|
# with fast_parser we have either 1 subscope or only statements.
|
||||||
|
return self.parser.module.subscopes[0]
|
||||||
|
except IndexError:
|
||||||
|
return self.parser.module
|
||||||
|
|
||||||
|
def reset_contents(self):
|
||||||
|
self._checked = False
|
||||||
|
|
||||||
|
scope = self._get_content_scope()
|
||||||
|
for key, c in self._contents.items():
|
||||||
|
setattr(scope, key, self.contents.items())
|
||||||
|
scope.is_generator = self._is_generator
|
||||||
|
|
||||||
|
for c in self.children:
|
||||||
|
c.reset_contents()
|
||||||
|
|
||||||
|
def parent_until_indent(self, indent):
|
||||||
|
if self.indent >= indent:
|
||||||
|
# check for
|
||||||
|
for i, c in enumerate(self.children):
|
||||||
|
if not c._checked:
|
||||||
|
# all of the following
|
||||||
|
del self.children[i:]
|
||||||
|
break
|
||||||
|
|
||||||
|
return self.parent.parent_until_indent(indent)
|
||||||
|
return self
|
||||||
|
|
||||||
|
@property
|
||||||
|
def indent(self):
|
||||||
|
if not self.parent:
|
||||||
|
return -1
|
||||||
|
module = self.parser.module
|
||||||
|
try:
|
||||||
|
el = module.subscopes[0]
|
||||||
|
except IndexError:
|
||||||
|
try:
|
||||||
|
el = module.statements[0]
|
||||||
|
except IndexError:
|
||||||
|
el = module.imports[0]
|
||||||
|
return el.start_pos[1]
|
||||||
|
|
||||||
|
def add_node(self, parser, code):
|
||||||
|
# only compare at the right indent level
|
||||||
|
insert = 0
|
||||||
|
for insert, c in enumerate(self.children):
|
||||||
|
if not c._checked:
|
||||||
|
break
|
||||||
|
node = ParserNode(parser, code, self)
|
||||||
|
self.children.insert(insert, node)
|
||||||
|
|
||||||
|
# insert parser objects into current structure
|
||||||
|
scope = self._get_content_scope()
|
||||||
|
for c in SCOPE_CONTENTS:
|
||||||
|
content = getattr(scope, c)
|
||||||
|
content += getattr(parser.module, c)
|
||||||
|
scope.is_generator |= parser.module.is_generator
|
||||||
|
return node
|
||||||
|
|
||||||
|
|
||||||
class FastParser(use_metaclass(CachedFastParser)):
|
class FastParser(use_metaclass(CachedFastParser)):
|
||||||
def __init__(self, code, module_path=None, user_position=None):
|
def __init__(self, code, module_path=None, user_position=None):
|
||||||
# set values like `pr.Module`.
|
# set values like `pr.Module`.
|
||||||
self.module_path = module_path
|
self.module_path = module_path
|
||||||
self.user_position = user_position
|
self.user_position = user_position
|
||||||
|
|
||||||
|
self.current_node = None
|
||||||
self.parsers = []
|
self.parsers = []
|
||||||
self.module = Module(self.parsers)
|
self.module = Module(self.parsers)
|
||||||
self.reset_caches()
|
self.reset_caches()
|
||||||
@@ -274,83 +321,84 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
|
|
||||||
def _parse(self, code):
|
def _parse(self, code):
|
||||||
""" :type code: str """
|
""" :type code: str """
|
||||||
def set_parent(module):
|
|
||||||
def get_indent(module):
|
|
||||||
try:
|
|
||||||
el = module.subscopes[0]
|
|
||||||
except IndexError:
|
|
||||||
try:
|
|
||||||
el = module.statements[0]
|
|
||||||
except IndexError:
|
|
||||||
el = module.imports[0]
|
|
||||||
return el.start_pos[1]
|
|
||||||
|
|
||||||
if self.parsers and False:
|
|
||||||
new_indent = get_indent(module)
|
|
||||||
old_indent = get_indent(self.parsers[-1].module)
|
|
||||||
if old_indent < new_indent:
|
|
||||||
#module.parent = self.parsers[-1].module.subscopes[0]
|
|
||||||
# TODO set parents + add to subscopes
|
|
||||||
return
|
|
||||||
p.module.parent = self.module
|
|
||||||
|
|
||||||
parts = self._split_parts(code)
|
parts = self._split_parts(code)
|
||||||
|
self.parsers[:] = []
|
||||||
|
|
||||||
if settings.fast_parser_always_reparse:
|
self._code = code
|
||||||
self.parsers[:] = []
|
self._line_offset = 0
|
||||||
|
self._start = 0
|
||||||
# dict comprehensions are not available in py2.5/2.6 :-(
|
|
||||||
hashes = dict((p.hash, p) for p in self.parsers)
|
|
||||||
|
|
||||||
line_offset = 0
|
|
||||||
start = 0
|
|
||||||
p = None
|
p = None
|
||||||
parser_order = 0
|
is_first = True
|
||||||
for code_part in parts:
|
for code_part in parts:
|
||||||
lines = code_part.count('\n') + 1
|
lines = code_part.count('\n') + 1
|
||||||
# the parser is using additional newlines, therefore substract
|
if is_first or self._line_offset >= p.end_pos[0] - 1:
|
||||||
if p is None or line_offset >= p.end_pos[0] - 2:
|
indent = len(re.match(r'[ \t]*', code).groups(0))
|
||||||
# check if code_part has already been parsed
|
if is_first and self.current_node is not None:
|
||||||
h = hash(code_part)
|
nodes = [self]
|
||||||
|
|
||||||
if h in hashes and hashes[h].code == code_part:
|
|
||||||
p = hashes[h]
|
|
||||||
del hashes[h]
|
|
||||||
m = p.module
|
|
||||||
m.line_offset += line_offset + 1 - m.start_pos[0]
|
|
||||||
if self.user_position is not None and \
|
|
||||||
m.start_pos <= self.user_position <= m.end_pos:
|
|
||||||
# It's important to take care of the whole user
|
|
||||||
# positioning stuff, if no reparsing is being done.
|
|
||||||
p.user_stmt = m.get_statement_for_position(
|
|
||||||
self.user_position, include_imports=True)
|
|
||||||
if p.user_stmt:
|
|
||||||
p.user_scope = p.user_stmt.parent
|
|
||||||
else:
|
|
||||||
p.user_scope = self.scan_user_scope(m) \
|
|
||||||
or self.module
|
|
||||||
else:
|
else:
|
||||||
p = parsing.Parser(code[start:],
|
nodes = []
|
||||||
self.module_path, self.user_position,
|
if self.current_node is not None:
|
||||||
offset=(line_offset, 0), is_fast_parser=True,
|
|
||||||
top_module=self.module)
|
|
||||||
|
|
||||||
p.hash = h
|
self.current_node = \
|
||||||
p.code = code_part
|
self.current_node.parent_until_indent(indent)
|
||||||
set_parent(p.module)
|
nodes += self.current_node.children
|
||||||
self.parsers.insert(parser_order, p)
|
|
||||||
|
|
||||||
parser_order += 1
|
# check if code_part has already been parsed
|
||||||
line_offset += lines
|
p = self._get_parser(code, nodes)
|
||||||
print line_offset
|
|
||||||
start += len(code_part) + 1 # +1 for newline
|
if is_first:
|
||||||
self.parsers[parser_order + 1:] = []
|
if self.current_node is None:
|
||||||
|
self.current_node = ParserNode(p, code)
|
||||||
|
else:
|
||||||
|
self.current_node.parser = p
|
||||||
|
self.current_node.save_contents()
|
||||||
|
else:
|
||||||
|
self.current_node = self.current_node.add_node(p, code)
|
||||||
|
self.parsers.append(p)
|
||||||
|
|
||||||
|
is_first = False
|
||||||
|
|
||||||
|
self._line_offset += lines
|
||||||
|
self._start += len(code_part) + 1 # +1 for newline
|
||||||
|
print 'hmm'
|
||||||
for p in self.parsers:
|
for p in self.parsers:
|
||||||
print(p.module.get_code())
|
print(p.module.get_code())
|
||||||
print(p.module.start_pos, p.module.end_pos)
|
print(p.module.start_pos, p.module.end_pos)
|
||||||
exit()
|
exit()
|
||||||
|
del self._code
|
||||||
|
|
||||||
|
def _get_parser(self, code, nodes):
|
||||||
|
h = hash(code)
|
||||||
|
hashes = [n.hash for n in nodes]
|
||||||
|
try:
|
||||||
|
index = hashes.index(h)
|
||||||
|
if nodes[index].code != code:
|
||||||
|
raise ValueError()
|
||||||
|
except ValueError:
|
||||||
|
p = parsing.Parser(self._code[self._start:],
|
||||||
|
self.module_path, self.user_position,
|
||||||
|
offset=(self._line_offset, 0),
|
||||||
|
is_fast_parser=True, top_module=self.module)
|
||||||
|
else:
|
||||||
|
node = nodes.pop(index)
|
||||||
|
p = node.parser
|
||||||
|
m = p.module
|
||||||
|
m.line_offset += self._line_offset + 1 - m.start_pos[0]
|
||||||
|
if self.user_position is not None and \
|
||||||
|
m.start_pos <= self.user_position <= m.end_pos:
|
||||||
|
# It's important to take care of the whole user
|
||||||
|
# positioning stuff, if no reparsing is being done.
|
||||||
|
p.user_stmt = m.get_statement_for_position(
|
||||||
|
self.user_position, include_imports=True)
|
||||||
|
if p.user_stmt:
|
||||||
|
p.user_scope = p.user_stmt.parent
|
||||||
|
else:
|
||||||
|
p.user_scope = self.scan_user_scope(m) or self.module
|
||||||
|
return p
|
||||||
|
|
||||||
def reset_caches(self):
|
def reset_caches(self):
|
||||||
self._user_scope = None
|
self._user_scope = None
|
||||||
self._user_stmt = None
|
self._user_stmt = None
|
||||||
self.module.reset_caches()
|
self.module.reset_caches()
|
||||||
|
if self.current_node is not None:
|
||||||
|
self.current_node.reset_contents()
|
||||||
|
|||||||
@@ -149,6 +149,10 @@ class Scope(Simple, IsScope):
|
|||||||
self.statements = []
|
self.statements = []
|
||||||
self.docstr = ''
|
self.docstr = ''
|
||||||
self.asserts = []
|
self.asserts = []
|
||||||
|
# Needed here for fast_parser, because the fast_parser splits and
|
||||||
|
# returns will be in "normal" modules.
|
||||||
|
self.returns = []
|
||||||
|
self.is_generator = False
|
||||||
|
|
||||||
def add_scope(self, sub, decorators):
|
def add_scope(self, sub, decorators):
|
||||||
sub.parent = self.use_as_parent
|
sub.parent = self.use_as_parent
|
||||||
@@ -194,13 +198,12 @@ class Scope(Simple, IsScope):
|
|||||||
if len(self.docstr) > 0:
|
if len(self.docstr) > 0:
|
||||||
string += '"""' + self.docstr + '"""\n'
|
string += '"""' + self.docstr + '"""\n'
|
||||||
|
|
||||||
returns = self.returns if hasattr(self, 'returns') else []
|
objs = self.subscopes + self.imports + self.statements + self.returns
|
||||||
objs = self.subscopes + self.imports + self.statements + returns
|
|
||||||
for obj in sorted(objs, key=lambda x: x.start_pos):
|
for obj in sorted(objs, key=lambda x: x.start_pos):
|
||||||
if isinstance(obj, Scope):
|
if isinstance(obj, Scope):
|
||||||
string += obj.get_code(first_indent=True, indention=indention)
|
string += obj.get_code(first_indent=True, indention=indention)
|
||||||
else:
|
else:
|
||||||
if obj in returns and not isinstance(self, Lambda):
|
if obj in self.returns and not isinstance(self, Lambda):
|
||||||
string += 'yield ' if self.is_generator else 'return '
|
string += 'yield ' if self.is_generator else 'return '
|
||||||
string += obj.get_code()
|
string += obj.get_code()
|
||||||
|
|
||||||
@@ -439,8 +442,6 @@ class Function(Scope):
|
|||||||
p.parent = self.use_as_parent
|
p.parent = self.use_as_parent
|
||||||
p.parent_function = self.use_as_parent
|
p.parent_function = self.use_as_parent
|
||||||
self.decorators = []
|
self.decorators = []
|
||||||
self.returns = []
|
|
||||||
self.is_generator = False
|
|
||||||
self.listeners = set() # not used here, but in evaluation.
|
self.listeners = set() # not used here, but in evaluation.
|
||||||
|
|
||||||
if annotation is not None:
|
if annotation is not None:
|
||||||
@@ -456,6 +457,9 @@ class Function(Scope):
|
|||||||
string += "pass\n"
|
string += "pass\n"
|
||||||
return string
|
return string
|
||||||
|
|
||||||
|
def is_empty(self):
|
||||||
|
return super(Function, self).is_empty() and not self.returns
|
||||||
|
|
||||||
def get_set_vars(self):
|
def get_set_vars(self):
|
||||||
n = super(Function, self).get_set_vars()
|
n = super(Function, self).get_set_vars()
|
||||||
for p in self.params:
|
for p in self.params:
|
||||||
|
|||||||
@@ -33,7 +33,6 @@ Parser
|
|||||||
~~~~~~
|
~~~~~~
|
||||||
|
|
||||||
.. autodata:: fast_parser
|
.. autodata:: fast_parser
|
||||||
.. autodata:: fast_parser_always_reparse
|
|
||||||
.. autodata:: use_function_definition_cache
|
.. autodata:: use_function_definition_cache
|
||||||
|
|
||||||
|
|
||||||
@@ -150,12 +149,6 @@ something has been changed e.g. to a function. If this happens, only the
|
|||||||
function is being reparsed.
|
function is being reparsed.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
fast_parser_always_reparse = False
|
|
||||||
"""
|
|
||||||
This is just a debugging option. Always reparsing means that the fast parser
|
|
||||||
is basically useless. So don't use it.
|
|
||||||
"""
|
|
||||||
|
|
||||||
use_function_definition_cache = True
|
use_function_definition_cache = True
|
||||||
"""
|
"""
|
||||||
Use the cache (full cache) to generate function_definition's. This may fail
|
Use the cache (full cache) to generate function_definition's. This may fail
|
||||||
|
|||||||
Reference in New Issue
Block a user