forked from VimPlug/jedi
209 lines
7.4 KiB
Python
209 lines
7.4 KiB
Python
"""
|
|
Basically a parser that is faster, because it tries to parse only parts and if
|
|
anything changes, it only reparses the changed parts. But because it's not
|
|
finished (and still not working as I want), I won't document it any further.
|
|
"""
|
|
import re
|
|
from itertools import chain
|
|
import difflib
|
|
|
|
from jedi._compatibility import use_metaclass
|
|
from jedi import settings
|
|
from jedi.common import splitlines
|
|
from jedi.parser import ParserWithRecovery
|
|
from jedi.parser import tree
|
|
from jedi.parser.utils import underscore_memoization, parser_cache
|
|
from jedi.parser import tokenize
|
|
from jedi import debug
|
|
from jedi.parser.tokenize import (generate_tokens, NEWLINE,
|
|
ENDMARKER, INDENT, DEDENT)
|
|
|
|
|
|
class CachedFastParser(type):
|
|
""" This is a metaclass for caching `FastParser`. """
|
|
def __call__(self, grammar, source, module_path=None):
|
|
pi = parser_cache.get(module_path, None)
|
|
if pi is None or not settings.fast_parser:
|
|
return ParserWithRecovery(grammar, source, module_path)
|
|
|
|
parser = pi.parser
|
|
d = DiffParser(parser)
|
|
d.update(splitlines(source, keepends=True))
|
|
return parser
|
|
|
|
|
|
class FastParser(use_metaclass(CachedFastParser)):
|
|
pass
|
|
|
|
|
|
class DiffParser():
|
|
def __init__(self, parser):
|
|
self._parser = parser
|
|
self._module = parser.get_root_node()
|
|
|
|
def _reset(self):
|
|
self._delete_count = 0
|
|
self._insert_count = 0
|
|
|
|
self._parsed_until_line = 0
|
|
|
|
def update(self, lines_new):
|
|
'''
|
|
The algorithm works as follows:
|
|
|
|
Equal:
|
|
- Assure that the start is a newline, otherwise parse until we get
|
|
one.
|
|
- Copy from parsed_until_line + 1 to max(i2 + 1)
|
|
- Make sure that the indentation is correct (e.g. add DEDENT)
|
|
- Add old and change positions
|
|
Insert:
|
|
- Parse from parsed_until_line + 1 to min(j2 + 1), hopefully not
|
|
much more.
|
|
Always:
|
|
- Set parsed_until_line
|
|
'''
|
|
self._lines_new = lines_new
|
|
self._reset()
|
|
|
|
self._old_children = self._module.children
|
|
self._new_children = []
|
|
self._prefix = ''
|
|
|
|
lines_old = splitlines(self._parser.source, keepends=True)
|
|
sm = difflib.SequenceMatcher(None, lines_old, lines_new)
|
|
for operation, i1, i2, j1, j2 in sm.get_opcodes():
|
|
print(operation)
|
|
if operation == 'equal':
|
|
line_offset = j1 - i1
|
|
self._copy_from_old_parser(line_offset, i2 + 1, j2 + 1)
|
|
elif operation == 'replace':
|
|
self._delete_count += 1
|
|
self._insert(j2 + 1)
|
|
elif operation == 'insert':
|
|
self._insert(j2 + 1)
|
|
else:
|
|
assert operation == 'delete'
|
|
self._delete_count += 1 # For statistics
|
|
|
|
def _copy_from_old_parser(self, line_offset, until_line_old, until_line_new):
|
|
while until_line_new < self._parsed_until_line:
|
|
parsed_until_line_old = self._parsed_until_line + line_offset
|
|
if matches:
|
|
# TODO check missing indent/dedent
|
|
_copy_p()
|
|
self._update_positions(line_offset)
|
|
# We have copied as much as possible (but definitely not too
|
|
# much). Therefore we escape, even if we're not at the end. The
|
|
# rest will be parsed.
|
|
# Might not reach until the end, because there's a statement
|
|
# that is not finished.
|
|
break
|
|
else:
|
|
# Parse 1 line at least. We don't need more, because we just
|
|
# want to get into a state where the old parser has starting
|
|
# statements again (not e.g. lines within parentheses).
|
|
self._parse(self._parsed_until_line + 1)
|
|
|
|
def _update_positions(self, line_offset, line_start, line_end):
|
|
if line_offset == 0:
|
|
return
|
|
|
|
# Find start node:
|
|
node = self._parser.get_pared_node()
|
|
while True:
|
|
return node
|
|
|
|
def _insert(self, until_line_new):
|
|
self._insert_count += 1
|
|
self._parse(until_line_new)
|
|
|
|
def _get_before_insertion_node(self):
|
|
if not self._new_children:
|
|
return None
|
|
|
|
leaf = self._module.get_leaf_for_position((line, 0), include_prefixes=False)
|
|
while leaf.type != 'newline':
|
|
try:
|
|
leaf = leaf.get_previous_leaf()
|
|
except IndexError:
|
|
# TODO
|
|
raise NotImplementedError
|
|
|
|
node = leaf
|
|
while True:
|
|
parent = node.parent
|
|
print(parent)
|
|
if parent.type in ('suite', 'file_input'):
|
|
print(node)
|
|
print(i, line, node.end_pos)
|
|
assert node.end_pos[0] <= line
|
|
assert node.end_pos[1] == 0
|
|
return node
|
|
node = parent
|
|
|
|
def _parse(self, until_line):
|
|
"""
|
|
Parses at least until the given line, but might just parse more until a
|
|
valid state is reached.
|
|
"""
|
|
while until_line > self._parsed_until_line:
|
|
node = self._parse_scope_part(before_node, until_line)
|
|
first_leaf = node.first_leaf()
|
|
|
|
before_node = self._get_before_insertion_node()
|
|
if before_node is None:
|
|
# The start of the file.
|
|
self.new_children += node.children
|
|
else:
|
|
before_node.parent.children += node.children
|
|
|
|
def _parse_scope_node(self, before_node, until_line, line_offset=0):
|
|
# TODO speed up, shouldn't copy the whole thing all the time.
|
|
# memoryview?
|
|
lines_after = self._lines_new[self._parsed_until_line + 1:]
|
|
tokenizer = self._diff_tokenize(lines_after, until_line, line_offset)
|
|
self._parser = ParserWithRecovery(
|
|
self._parser._grammar,
|
|
source=None,
|
|
tokenizer=tokenizer,
|
|
start_parsing=False
|
|
)
|
|
return self._parser.parse()
|
|
|
|
def _diff_tokenize(lines, until_line, line_offset=0):
|
|
is_first_token = True
|
|
omited_first_indent = False
|
|
indent_count = 0
|
|
tokens = generate_tokens(lambda: next(l, ''))
|
|
for token_info in tokens:
|
|
typ = token_info.type
|
|
if typ == 'indent':
|
|
indent_count += 1
|
|
if is_first_token:
|
|
omited_first_indent = True
|
|
# We want to get rid of indents that are only here because
|
|
# we only parse part of the file. These indents would only
|
|
# get parsed as error leafs, which doesn't make any sense.
|
|
continue
|
|
elif typ == 'dedent':
|
|
indent_count -= 1
|
|
if omited_first_indent and indent_count == 0:
|
|
# We are done here, only thing that can come now is an
|
|
# endmarker or another dedented code block.
|
|
break
|
|
elif typ == 'newline' and token_info.start_pos[0] >= until_line:
|
|
yield token_info
|
|
x = self.
|
|
import pdb; pdb.set_trace()
|
|
break
|
|
|
|
is_first_token = False
|
|
if line_offset != 0:
|
|
raise NotImplementedError
|
|
yield tokenize.TokenInfo(*token_info.string[1:])
|
|
else:
|
|
yield token_info
|
|
|
|
yield tokenize.TokenInfo(tokenize.ENDMARKER, *token_info.string[1:])
|