1
0
forked from VimPlug/jedi
Files
jedi-fork/jedi/fast_parser.py
2013-04-15 10:22:27 +04:30

400 lines
13 KiB
Python

"""
Basically a parser that is faster, because it tries to parse only parts and if
anything changes, it only reparses the changed parts. But because it's not
finished (and still not working as I want), I won't document it any further.
"""
import re
import operator
from _compatibility import use_metaclass, reduce, property
import settings
import parsing
import parsing_representation as pr
import cache
import common
SCOPE_CONTENTS = ['asserts', 'subscopes', 'imports', 'statements', 'returns']
class Module(pr.Simple, pr.Module):
def __init__(self, parsers):
self._end_pos = None, None
super(Module, self).__init__(self, (1, 0))
self.parsers = parsers
self.reset_caches()
def reset_caches(self):
""" This module does a whole lot of caching, because it uses different
parsers. """
self.cache = {}
for p in self.parsers:
p.user_scope = None
p.user_stmt = None
def _get(self, name, operation, execute=False, *args, **kwargs):
key = (name, args, frozenset(kwargs.items()))
if key not in self.cache:
if execute:
objs = (getattr(p.module, name)(*args, **kwargs)
for p in self.parsers)
else:
objs = (getattr(p.module, name) for p in self.parsers)
self.cache[key] = reduce(operation, objs)
return self.cache[key]
def __getattr__(self, name):
if name == 'global_vars':
return self._get(name, operator.add)
elif name.startswith('__'):
raise AttributeError('Not available!')
else:
return getattr(self.parsers[0].module, name)
@property
def used_names(self):
if not self.parsers:
raise NotImplementedError("Parser doesn't exist.")
key = 'used_names'
if key not in self.cache:
dct = {}
for p in self.parsers:
for k, statement_set in p.module.used_names.items():
if k in dct:
dct[k] |= statement_set
else:
dct[k] = set(statement_set)
self.cache[key] = dct
return self.cache[key]
@property
def start_pos(self):
""" overwrite start_pos of Simple """
return 1, 0
@start_pos.setter
def start_pos(self):
""" ignore """
raise AttributeError('TODO remove - just a check if everything works fine.')
@property
def end_pos(self):
return self._end_pos
@end_pos.setter
def end_pos(self, value):
if None in self._end_pos \
or None not in value and self._end_pos < value:
self._end_pos = value
def __repr__(self):
return "<%s: %s@%s-%s>" % (type(self).__name__, self.name,
self.start_pos[0], self.end_pos[0])
class CachedFastParser(type):
""" This is a metaclass for caching `FastParser`. """
def __call__(self, source, module_path=None, user_position=None):
if not settings.fast_parser:
return parsing.Parser(source, module_path, user_position)
pi = cache.parser_cache.get(module_path, None)
if pi is None or isinstance(pi.parser, parsing.Parser):
p = super(CachedFastParser, self).__call__(source, module_path,
user_position)
else:
p = pi.parser # pi is a `cache.ParserCacheItem`
p.update(source, user_position)
return p
class ParserNode(object):
def __init__(self, parser, code, parent=None):
self.parent = parent
self.parser = parser
self.code = code
self.hash = hash(code)
self.children = []
self._old_children = []
# must be created before new things are added to it.
try:
# with fast_parser we have either 1 subscope or only statements.
self._content_scope = self.parser.module.subscopes[0]
except IndexError:
self._content_scope = self.parser.module
self.save_contents()
def save_contents(self):
scope = self._content_scope
self._contents = {}
for c in SCOPE_CONTENTS:
self._contents[c] = getattr(scope, c)
self._is_generator = scope.is_generator
def reset_contents(self):
scope = self._content_scope
for key, c in self._contents.items():
setattr(scope, key, c)
scope.is_generator = self._is_generator
for c in self.children:
c.reset_contents()
def parent_until_indent(self, indent):
if self.indent >= indent and self.parent:
self._old_children = []
return self.parent.parent_until_indent(indent)
return self
@property
def indent(self):
if not self.parent:
return 0
module = self.parser.module
try:
el = module.subscopes[0]
except IndexError:
try:
el = module.statements[0]
except IndexError:
el = module.imports[0]
return el.start_pos[1]
def _set_items(self, parser, set_parent=False):
# insert parser objects into current structure
scope = self._content_scope
for c in SCOPE_CONTENTS:
content = getattr(scope, c)
items = getattr(parser.module, c)
if set_parent:
for i in items:
i.parent = scope
content += items
scope.is_generator |= parser.module.is_generator
def add_node(self, node):
"""Adding a node means adding a node that was already added earlier"""
self.children.append(node)
self._set_items(node.parser)
node._old_children = node.children
node.children = []
return node
def add_parser(self, parser, code):
node = ParserNode(parser, code, self)
self._set_items(parser, set_parent=True)
self.children.append(node)
return node
class FastParser(use_metaclass(CachedFastParser)):
def __init__(self, code, module_path=None, user_position=None):
# set values like `pr.Module`.
self.module_path = module_path
self.user_position = user_position
self.current_node = None
self.parsers = []
self.module = Module(self.parsers)
self.reset_caches()
self._parse(code)
@property
def user_scope(self):
if self._user_scope is None:
for p in self.parsers:
if p.user_scope:
if self._user_scope is not None and not \
isinstance(self._user_scope, pr.SubModule):
continue
self._user_scope = p.user_scope
if isinstance(self._user_scope, pr.SubModule):
self._user_scope = self.module
return self._user_scope
@property
def user_stmt(self):
if self._user_stmt is None:
for p in self.parsers:
if p.user_stmt:
self._user_stmt = p.user_stmt
break
return self._user_stmt
def update(self, code, user_position=None):
self.user_position = user_position
self.reset_caches()
self._parse(code)
def scan_user_scope(self, sub_module):
""" Scan with self.user_position.
:type sub_module: pr.SubModule
"""
for scope in sub_module.statements + sub_module.subscopes:
if isinstance(scope, pr.Scope):
if scope.start_pos <= self.user_position <= scope.end_pos:
return self.scan_user_scope(scope) or scope
return None
def _split_parts(self, code):
"""
Split the code into different parts. This makes it possible to parse
each part seperately and therefore cache parts of the file and not
everything.
"""
def add_part():
txt = '\n'.join(current_lines)
if txt:
parts.append(txt)
current_lines[:] = []
r_keyword = '^[ \t]*(def|class|@|%s)' % '|'.join(common.FLOWS)
lines = code.splitlines()
current_lines = []
parts = []
is_decorator = False
current_indent = 0
new_indent = False
in_flow = False
# All things within flows are simply being ignored.
for i, l in enumerate(lines):
# check for dedents
m = re.match('^([\t ]*)(.?)', l)
indent = len(m.group(1))
if m.group(2) in ['', '#']:
current_lines.append(l) # just ignore comments and blank lines
continue
if indent < current_indent: # -> dedent
current_indent = indent
new_indent = False
if not in_flow:
add_part()
in_flow = False
elif new_indent:
current_indent = indent
new_indent = False
# Check lines for functions/classes and split the code there.
if not in_flow:
m = re.match(r_keyword, l)
if m:
in_flow = m.group(1) in common.FLOWS
if not is_decorator and not in_flow:
add_part()
current_lines = []
is_decorator = '@' == m.group(1)
if not is_decorator:
current_indent += 1 # it must be higher
new_indent = True
current_lines.append(l)
add_part()
for p in parts:
#print '#####################################'
#print p
#print len(p.splitlines())
pass
return parts
def _parse(self, code):
""" :type code: str """
parts = self._split_parts(code)
self.parsers[:] = []
self._code = code
self._line_offset = 0
self._start = 0
p = None
is_first = True
for code_part in parts:
lines = code_part.count('\n') + 1
if is_first or self._line_offset >= p.end_pos[0]:
indent = len(re.match(r'[ \t]*', code_part).group(0))
if is_first and self.current_node is not None:
nodes = [self.current_node]
else:
nodes = []
if self.current_node is not None:
self.current_node = \
self.current_node.parent_until_indent(indent)
nodes += self.current_node._old_children
# check if code_part has already been parsed
print '#'*45,self._line_offset, p and p.end_pos, '\n', code_part
p, node = self._get_parser(code_part, nodes)
if is_first:
if self.current_node is None:
self.current_node = ParserNode(p, code)
else:
self.current_node.parser = p
self.current_node.save_contents()
else:
if node is None:
self.current_node = \
self.current_node.add_parser(p, code)
else:
self.current_node = self.current_node.add_node(node)
self.parsers.append(p)
is_first = False
else:
print '#'*45, self._line_offset, p.end_pos, 'theheck\n', code_part
self._line_offset += lines
self._start += len(code_part) + 1 # +1 for newline
print(self.parsers[0].module.get_code())
#for p in self.parsers:
# print(p.module.get_code())
# print(p.module.start_pos, p.module.end_pos)
#exit()
del self._code
def _get_parser(self, code, nodes):
h = hash(code)
hashes = [n.hash for n in nodes]
node = None
try:
index = hashes.index(h)
if nodes[index].code != code:
raise ValueError()
except ValueError:
p = parsing.Parser(self._code[self._start:],
self.module_path, self.user_position,
offset=(self._line_offset, 0),
is_fast_parser=True, top_module=self.module)
else:
node = nodes.pop(index)
p = node.parser
m = p.module
m.line_offset += self._line_offset + 1 - m.start_pos[0]
if self.user_position is not None and \
m.start_pos <= self.user_position <= m.end_pos:
# It's important to take care of the whole user
# positioning stuff, if no reparsing is being done.
p.user_stmt = m.get_statement_for_position(
self.user_position, include_imports=True)
if p.user_stmt:
p.user_scope = p.user_stmt.parent
else:
p.user_scope = self.scan_user_scope(m) or self.module
return p, node
def reset_caches(self):
self._user_scope = None
self._user_stmt = None
self.module.reset_caches()
if self.current_node is not None:
self.current_node.reset_contents()