1
0
forked from VimPlug/jedi
Files
jedi-fork/jedi/parser/fast.py
2016-08-14 00:23:40 +02:00

209 lines
7.4 KiB
Python

"""
Basically a parser that is faster, because it tries to parse only parts and if
anything changes, it only reparses the changed parts. But because it's not
finished (and still not working as I want), I won't document it any further.
"""
import re
from itertools import chain
import difflib
from jedi._compatibility import use_metaclass
from jedi import settings
from jedi.common import splitlines
from jedi.parser import ParserWithRecovery
from jedi.parser import tree
from jedi.parser.utils import underscore_memoization, parser_cache
from jedi.parser import tokenize
from jedi import debug
from jedi.parser.tokenize import (generate_tokens, NEWLINE,
ENDMARKER, INDENT, DEDENT)
class CachedFastParser(type):
""" This is a metaclass for caching `FastParser`. """
def __call__(self, grammar, source, module_path=None):
pi = parser_cache.get(module_path, None)
if pi is None or not settings.fast_parser:
return ParserWithRecovery(grammar, source, module_path)
parser = pi.parser
d = DiffParser(parser)
d.update(splitlines(source, keepends=True))
return parser
class FastParser(use_metaclass(CachedFastParser)):
pass
class DiffParser():
def __init__(self, parser):
self._parser = parser
self._module = parser.get_root_node()
def _reset(self):
self._delete_count = 0
self._insert_count = 0
self._parsed_until_line = 0
def update(self, lines_new):
'''
The algorithm works as follows:
Equal:
- Assure that the start is a newline, otherwise parse until we get
one.
- Copy from parsed_until_line + 1 to max(i2 + 1)
- Make sure that the indentation is correct (e.g. add DEDENT)
- Add old and change positions
Insert:
- Parse from parsed_until_line + 1 to min(j2 + 1), hopefully not
much more.
Always:
- Set parsed_until_line
'''
self._lines_new = lines_new
self._reset()
self._old_children = self._module.children
self._new_children = []
self._prefix = ''
lines_old = splitlines(self._parser.source, keepends=True)
sm = difflib.SequenceMatcher(None, lines_old, lines_new)
for operation, i1, i2, j1, j2 in sm.get_opcodes():
print(operation)
if operation == 'equal':
line_offset = j1 - i1
self._copy_from_old_parser(line_offset, i2 + 1, j2 + 1)
elif operation == 'replace':
self._delete_count += 1
self._insert(j2 + 1)
elif operation == 'insert':
self._insert(j2 + 1)
else:
assert operation == 'delete'
self._delete_count += 1 # For statistics
def _copy_from_old_parser(self, line_offset, until_line_old, until_line_new):
while until_line_new < self._parsed_until_line:
parsed_until_line_old = self._parsed_until_line + line_offset
if matches:
# TODO check missing indent/dedent
_copy_p()
self._update_positions(line_offset)
# We have copied as much as possible (but definitely not too
# much). Therefore we escape, even if we're not at the end. The
# rest will be parsed.
# Might not reach until the end, because there's a statement
# that is not finished.
break
else:
# Parse 1 line at least. We don't need more, because we just
# want to get into a state where the old parser has starting
# statements again (not e.g. lines within parentheses).
self._parse(self._parsed_until_line + 1)
def _update_positions(self, line_offset, line_start, line_end):
if line_offset == 0:
return
# Find start node:
node = self._parser.get_pared_node()
while True:
return node
def _insert(self, until_line_new):
self._insert_count += 1
self._parse(until_line_new)
def _get_before_insertion_node(self):
if not self._new_children:
return None
leaf = self._module.get_leaf_for_position((line, 0), include_prefixes=False)
while leaf.type != 'newline':
try:
leaf = leaf.get_previous_leaf()
except IndexError:
# TODO
raise NotImplementedError
node = leaf
while True:
parent = node.parent
print(parent)
if parent.type in ('suite', 'file_input'):
print(node)
print(i, line, node.end_pos)
assert node.end_pos[0] <= line
assert node.end_pos[1] == 0
return node
node = parent
def _parse(self, until_line):
"""
Parses at least until the given line, but might just parse more until a
valid state is reached.
"""
while until_line > self._parsed_until_line:
node = self._parse_scope_part(before_node, until_line)
first_leaf = node.first_leaf()
before_node = self._get_before_insertion_node()
if before_node is None:
# The start of the file.
self.new_children += node.children
else:
before_node.parent.children += node.children
def _parse_scope_node(self, before_node, until_line, line_offset=0):
# TODO speed up, shouldn't copy the whole thing all the time.
# memoryview?
lines_after = self._lines_new[self._parsed_until_line + 1:]
tokenizer = self._diff_tokenize(lines_after, until_line, line_offset)
self._parser = ParserWithRecovery(
self._parser._grammar,
source=None,
tokenizer=tokenizer,
start_parsing=False
)
return self._parser.parse()
def _diff_tokenize(lines, until_line, line_offset=0):
is_first_token = True
omited_first_indent = False
indent_count = 0
tokens = generate_tokens(lambda: next(l, ''))
for token_info in tokens:
typ = token_info.type
if typ == 'indent':
indent_count += 1
if is_first_token:
omited_first_indent = True
# We want to get rid of indents that are only here because
# we only parse part of the file. These indents would only
# get parsed as error leafs, which doesn't make any sense.
continue
elif typ == 'dedent':
indent_count -= 1
if omited_first_indent and indent_count == 0:
# We are done here, only thing that can come now is an
# endmarker or another dedented code block.
break
elif typ == 'newline' and token_info.start_pos[0] >= until_line:
yield token_info
x = self.
import pdb; pdb.set_trace()
break
is_first_token = False
if line_offset != 0:
raise NotImplementedError
yield tokenize.TokenInfo(*token_info.string[1:])
else:
yield token_info
yield tokenize.TokenInfo(tokenize.ENDMARKER, *token_info.string[1:])