Files
jedi/jedi/fast_parser.py
2013-04-27 23:45:48 +04:30

419 lines
14 KiB
Python

"""
Basically a parser that is faster, because it tries to parse only parts and if
anything changes, it only reparses the changed parts. But because it's not
finished (and still not working as I want), I won't document it any further.
"""
import re
import operator
from jedi._compatibility import use_metaclass, reduce, property
from jedi import settings
from jedi import parsing
from jedi import parsing_representation as pr
from jedi import cache
import common
SCOPE_CONTENTS = ['asserts', 'subscopes', 'imports', 'statements', 'returns']
class Module(pr.Simple, pr.Module):
def __init__(self, parsers):
super(Module, self).__init__(self, (1, 0))
self.parsers = parsers
self.reset_caches()
self.start_pos = 1, 0
self.end_pos = None, None
def reset_caches(self):
""" This module does a whole lot of caching, because it uses different
parsers. """
self._used_names = None
for p in self.parsers:
p.user_scope = None
p.user_stmt = None
def __getattr__(self, name):
if name.startswith('__'):
raise AttributeError('Not available!')
else:
return getattr(self.parsers[0].module, name)
@property
def used_names(self):
if self._used_names is None:
dct = {}
for p in self.parsers:
for k, statement_set in p.module.used_names.items():
if k in dct:
dct[k] |= statement_set
else:
dct[k] = set(statement_set)
self._used_names = dct
return self._used_names
def __repr__(self):
return "<%s: %s@%s-%s>" % (type(self).__name__, self.name,
self.start_pos[0], self.end_pos[0])
class CachedFastParser(type):
""" This is a metaclass for caching `FastParser`. """
def __call__(self, source, module_path=None, user_position=None):
if not settings.fast_parser:
return parsing.Parser(source, module_path, user_position)
pi = cache.parser_cache.get(module_path, None)
if pi is None or isinstance(pi.parser, parsing.Parser):
p = super(CachedFastParser, self).__call__(source, module_path,
user_position)
else:
p = pi.parser # pi is a `cache.ParserCacheItem`
p.update(source, user_position)
return p
class ParserNode(object):
def __init__(self, parser, code, parent=None):
self.parent = parent
self.code = code
self.hash = hash(code)
self.children = []
self._old_children = []
# must be created before new things are added to it.
self.save_contents(parser)
def save_contents(self, parser):
self.parser = parser
try:
# with fast_parser we have either 1 subscope or only statements.
self._content_scope = parser.module.subscopes[0]
except IndexError:
self._content_scope = parser.module
scope = self._content_scope
self._contents = {}
for c in SCOPE_CONTENTS:
self._contents[c] = getattr(scope, c)
self._is_generator = scope.is_generator
def reset_contents(self):
scope = self._content_scope
for key, c in self._contents.items():
setattr(scope, key, c)
scope.is_generator = self._is_generator
self.parser.user_scope = None
if self.parent is None:
# Global vars of the first one can be deleted, in the global scope
# they make no sense.
self.parser.module.global_vars = []
for c in self.children:
c.reset_contents()
def parent_until_indent(self, indent):
if self.indent >= indent and self.parent:
self._old_children = []
return self.parent.parent_until_indent(indent)
return self
@property
def indent(self):
if not self.parent:
return 0
module = self.parser.module
try:
el = module.subscopes[0]
except IndexError:
try:
el = module.statements[0]
except IndexError:
try:
el = module.imports[0]
except IndexError:
try:
el = module.returns[0]
except IndexError:
return self.parent.indent + 1
return el.start_pos[1]
def _set_items(self, parser, set_parent=False):
# insert parser objects into current structure
scope = self._content_scope
for c in SCOPE_CONTENTS:
content = getattr(scope, c)
items = getattr(parser.module, c)
if set_parent:
for i in items:
i.parent = scope.use_as_parent
if isinstance(i, (pr.Function, pr.Class)):
for d in i.decorators:
d.parent = scope.use_as_parent
content += items
if isinstance(parser.user_scope, pr.SubModule) \
and parser.start_pos <= parser.user_position < parser.end_pos:
parser.user_scope = scope
# global_vars
cur = self
while cur.parent is not None:
cur = cur.parent
cur.parser.module.global_vars += parser.module.global_vars
scope.is_generator |= parser.module.is_generator
def add_node(self, node):
"""Adding a node means adding a node that was already added earlier"""
self.children.append(node)
self._set_items(node.parser)
node._old_children = node.children
node.children = []
return node
def add_parser(self, parser, code):
node = ParserNode(parser, code, self)
self._set_items(parser, set_parent=True)
self.children.append(node)
return node
class FastParser(use_metaclass(CachedFastParser)):
def __init__(self, code, module_path=None, user_position=None):
# set values like `pr.Module`.
self.module_path = module_path
self.user_position = user_position
self._user_scope = None
self.current_node = None
self.parsers = []
self.module = Module(self.parsers)
self.reset_caches()
self._parse(code)
@property
def user_scope(self):
if self._user_scope is None:
for p in self.parsers:
if p.user_scope:
if isinstance(p.user_scope, pr.SubModule):
continue
self._user_scope = p.user_scope
if isinstance(self._user_scope, pr.SubModule) \
or self._user_scope is None:
self._user_scope = self.module
return self._user_scope
@property
def user_stmt(self):
if self._user_stmt is None:
for p in self.parsers:
if p.user_stmt:
self._user_stmt = p.user_stmt
break
return self._user_stmt
def update(self, code, user_position=None):
self.user_position = user_position
self.reset_caches()
self._parse(code)
def scan_user_scope(self, sub_module):
""" Scan with self.user_position.
:type sub_module: pr.SubModule
"""
for scope in sub_module.statements + sub_module.subscopes:
if isinstance(scope, pr.Scope):
if scope.start_pos <= self.user_position <= scope.end_pos:
return self.scan_user_scope(scope) or scope
return None
def _split_parts(self, code):
"""
Split the code into different parts. This makes it possible to parse
each part seperately and therefore cache parts of the file and not
everything.
"""
def add_part():
txt = '\n'.join(current_lines)
if txt:
if add_to_last and parts:
parts[-1] += '\n' + txt
else:
parts.append(txt)
current_lines[:] = []
r_keyword = '^[ \t]*(def|class|@|%s)' % '|'.join(common.FLOWS)
lines = code.splitlines()
current_lines = []
parts = []
is_decorator = False
current_indent = 0
new_indent = False
in_flow = False
add_to_last = False
# All things within flows are simply being ignored.
for i, l in enumerate(lines):
# check for dedents
m = re.match('^([\t ]*)(.?)', l)
indent = len(m.group(1))
if m.group(2) in ['', '#']:
current_lines.append(l) # just ignore comments and blank lines
continue
if indent < current_indent: # -> dedent
current_indent = indent
new_indent = False
if not in_flow:
add_part()
add_to_last = False
in_flow = False
elif new_indent:
current_indent = indent
new_indent = False
# Check lines for functions/classes and split the code there.
if not in_flow:
m = re.match(r_keyword, l)
if m:
in_flow = m.group(1) in common.FLOWS
if not is_decorator and not in_flow:
add_part()
add_to_last = False
is_decorator = '@' == m.group(1)
if not is_decorator:
current_indent += 1 # it must be higher
new_indent = True
elif is_decorator:
is_decorator = False
add_to_last = True
current_lines.append(l)
add_part()
for p in parts:
#print '#####################################'
#print p
#print len(p.splitlines())
pass
return parts
def _parse(self, code):
""" :type code: str """
def empty_parser():
new, temp = self._get_parser('', '', 0, [])
return new
parts = self._split_parts(code)
self.parsers[:] = []
line_offset = 0
start = 0
p = None
is_first = True
for code_part in parts:
lines = code_part.count('\n') + 1
if is_first or line_offset >= p.end_pos[0]:
indent = len(re.match(r'[ \t]*', code_part).group(0))
if is_first and self.current_node is not None:
nodes = [self.current_node]
else:
nodes = []
if self.current_node is not None:
self.current_node = \
self.current_node.parent_until_indent(indent)
nodes += self.current_node._old_children
# check if code_part has already been parsed
#print '#'*45,line_offset, p and p.end_pos, '\n', code_part
p, node = self._get_parser(code_part, code[start:],
line_offset, nodes)
if is_first and p.module.subscopes:
# special case, we cannot use a function subscope as a
# base scope, subscopes would save all the other contents
new = empty_parser()
if self.current_node is None:
self.current_node = ParserNode(new, code)
else:
self.current_node.save_contents(new)
self.parsers.append(new)
is_first = False
if is_first:
if self.current_node is None:
self.current_node = ParserNode(p, code)
else:
self.current_node.save_contents(p)
else:
if node is None:
self.current_node = \
self.current_node.add_parser(p, code)
else:
self.current_node = self.current_node.add_node(node)
self.parsers.append(p)
is_first = False
else:
#print '#'*45, line_offset, p.end_pos, 'theheck\n', code_part
pass
line_offset += lines
start += len(code_part) + 1 # +1 for newline
if not self.parsers:
self.parsers.append(empty_parser())
self.module.end_pos = self.parsers[-1].end_pos
#print(self.parsers[0].module.get_code())
del code
def _get_parser(self, code, parser_code, line_offset, nodes):
h = hash(code)
hashes = [n.hash for n in nodes]
node = None
try:
index = hashes.index(h)
if nodes[index].code != code:
raise ValueError()
except ValueError:
p = parsing.Parser(parser_code, self.module_path,
self.user_position, offset=(line_offset, 0),
is_fast_parser=True, top_module=self.module)
p.module.parent = self.module
else:
node = nodes.pop(index)
p = node.parser
m = p.module
m.line_offset += line_offset + 1 - m.start_pos[0]
if self.user_position is not None and \
m.start_pos <= self.user_position <= m.end_pos:
# It's important to take care of the whole user
# positioning stuff, if no reparsing is being done.
p.user_stmt = m.get_statement_for_position(
self.user_position, include_imports=True)
if p.user_stmt:
p.user_scope = p.user_stmt.parent
else:
p.user_scope = self.scan_user_scope(m) or self.module
return p, node
def reset_caches(self):
self._user_scope = None
self._user_stmt = None
self.module.reset_caches()
if self.current_node is not None:
self.current_node.reset_contents()