1
0
forked from VimPlug/jedi

Most of the new diff parsers functionality should be working now. There are a few TODOs to solve, though.

This commit is contained in:
Dave Halter
2016-08-16 18:58:28 +02:00
parent b9040870c0
commit 54297cc4a5
2 changed files with 144 additions and 37 deletions

View File

@@ -149,7 +149,7 @@ def get_stack_at_position(grammar, code_lines, module, pos):
try: try:
p.parse(tokenizer=tokenize_without_endmarker(code)) p.parse(tokenizer=tokenize_without_endmarker(code))
except EndMarkerReached: except EndMarkerReached:
return Stack(p.stack) return Stack(p.pgen_parser.stack)
raise SystemError("This really shouldn't happen. There's a bug in Jedi.") raise SystemError("This really shouldn't happen. There's a bug in Jedi.")

View File

@@ -5,6 +5,7 @@ finished (and still not working as I want), I won't document it any further.
""" """
import re import re
from itertools import chain from itertools import chain
import copy
import difflib import difflib
from jedi._compatibility import use_metaclass from jedi._compatibility import use_metaclass
@@ -72,6 +73,7 @@ class DiffParser():
lines_old = splitlines(self._parser.source, keepends=True) lines_old = splitlines(self._parser.source, keepends=True)
sm = difflib.SequenceMatcher(None, lines_old, lines_new) sm = difflib.SequenceMatcher(None, lines_old, lines_new)
print(lines_old, lines_new)
for operation, i1, i2, j1, j2 in sm.get_opcodes(): for operation, i1, i2, j1, j2 in sm.get_opcodes():
print(operation) print(operation)
if operation == 'equal': if operation == 'equal':
@@ -87,25 +89,47 @@ class DiffParser():
self._delete_count += 1 # For statistics self._delete_count += 1 # For statistics
def _copy_from_old_parser(self, line_offset, until_line_old, until_line_new): def _copy_from_old_parser(self, line_offset, until_line_old, until_line_new):
while until_line_new < self._parsed_until_line: while until_line_new > self._parsed_until_line:
parsed_until_line_old = self._parsed_until_line + line_offset parsed_until_line_old = self._parsed_until_line - line_offset
if matches: line_stmt = self._get_old_line_stmt(parsed_until_line_old + 1)
# TODO check missing indent/dedent if line_stmt is None:
_copy_p() # Parse 1 line at least. We don't need more, because we just
self._update_positions(line_offset) # want to get into a state where the old parser has starting
# statements again (not e.g. lines within parentheses).
self._parse(self._parsed_until_line + 1)
else:
p_children = line_stmt.parent.children
index = p_children.index(line_stmt)
nodes = []
for node in p_children[index:]:
if until_line_old < node.end_pos[0]:
divided_node = self._divide_node(node)
if divided_node is not None:
nodes.append(divided_node)
break
else:
nodes.append(node)
if nodes:
self._insert_nodes(nodes)
# TODO remove dedent at end
self._update_positions(nodes, line_offset)
# We have copied as much as possible (but definitely not too # We have copied as much as possible (but definitely not too
# much). Therefore we escape, even if we're not at the end. The # much). Therefore we escape, even if we're not at the end. The
# rest will be parsed. # rest will be parsed.
# Might not reach until the end, because there's a statement # Might not reach until the end, because there's a statement
# that is not finished. # that is not finished.
break break
else:
# Parse 1 line at least. We don't need more, because we just
# want to get into a state where the old parser has starting
# statements again (not e.g. lines within parentheses).
self._parse(self._parsed_until_line + 1)
def _update_positions(self, line_offset, line_start, line_end): def _update_positions(self, nodes, line_offset):
for node in nodes:
try:
children = node.children
except AttributeError:
# Is a leaf
node.start_pos = node.start_pos[0] + line_offset, node.start_pos[1]
else:
self._update_positions(children)
if line_offset == 0: if line_offset == 0:
return return
@@ -118,10 +142,60 @@ class DiffParser():
self._insert_count += 1 self._insert_count += 1
self._parse(until_line_new) self._parse(until_line_new)
def _insert_nodes(self, nodes):
before_node = self._get_before_insertion_node()
line_indentation = nodes[0].start_pos[1]
while True:
p_children = before_node.parent.children
indentation = p_children[0].start_pos[1]
if line_indentation < indentation: # Dedent
# We might be at the most outer layer: modules. We
# don't want to depend on the first statement
# having the right indentation.
if before_node.parent is not None:
# TODO add dedent
before_node = before_node.parent
continue
# TODO check if the indentation is lower than the last statement
# and add a dedent error leaf.
# TODO do the same for indent error leafs.
p_children += nodes
break
def _divide_node(self, node, until_line):
"""
Breaks up scopes and returns only the part until the given line.
Tries to get the parts it can safely get and ignores the rest.
"""
if node.type not in ('classdef', 'funcdef'):
return None
suite = node.children[-1]
if suite.type != 'suite':
return None
new_node = copy.copy(node)
new_node.children[-1] = new_suite = copy.copy(suite)
for i, child_node in enumerate(new_suite.children):
if child_node.end_pos[1] > until_line:
divided_node = self._divide_node(child_node, until_line)
if divided_node is not None:
new_suite.children[i] = divided_node
new_suite.children[i + 1:] = []
else:
new_suite.children[i:] = []
break
return new_node
def _get_before_insertion_node(self): def _get_before_insertion_node(self):
if not self._new_children: if not self._new_children:
return None return None
line = self._parsed_until_line + 1
print(line)
leaf = self._module.get_leaf_for_position((line, 0), include_prefixes=False) leaf = self._module.get_leaf_for_position((line, 0), include_prefixes=False)
while leaf.type != 'newline': while leaf.type != 'newline':
try: try:
@@ -136,48 +210,78 @@ class DiffParser():
print(parent) print(parent)
if parent.type in ('suite', 'file_input'): if parent.type in ('suite', 'file_input'):
print(node) print(node)
print(i, line, node.end_pos) print(line, node.end_pos)
assert node.end_pos[0] <= line assert node.end_pos[0] <= line
assert node.end_pos[1] == 0 assert node.end_pos[1] == 0
return node return node
node = parent node = parent
def _get_old_line_stmt(self, old_line):
leaf = self._module.get_leaf_for_position((old_line, 0), include_prefixes=True)
if leaf.get_start_pos_with_prefix()[0] == old_line:
return leaf.get_definition()
# Must be on the same line. Otherwise we need to parse that bit.
return None
def _parse(self, until_line): def _parse(self, until_line):
""" """
Parses at least until the given line, but might just parse more until a Parses at least until the given line, but might just parse more until a
valid state is reached. valid state is reached.
""" """
while until_line > self._parsed_until_line: while until_line > self._parsed_until_line:
node = self._parse_scope_part(before_node, until_line) node = self._parse_scope_node(until_line)
first_leaf = node.first_leaf() nodes = self._get_children_nodes(node)
if nodes:
self._insert_nodes(nodes)
first_leaf = nodes[0].first_leaf()
first_leaf.prefix = self._prefix + first_leaf.prefix
self._prefix = ''
self._prefix += node.children[-1].prefix
before_node = self._get_before_insertion_node() before_node = self._get_before_insertion_node()
if before_node is None: if before_node is None:
# The start of the file. # The start of the file.
self.new_children += node.children self._new_children += node.children
else: else:
before_node.parent.children += node.children before_node.parent.children += node.children
def _parse_scope_node(self, before_node, until_line, line_offset=0): def _get_children_nodes(self, node):
# TODO speed up, shouldn't copy the whole thing all the time. nodes = node.children[:-1]
if nodes: # More than an error leaf
first_element = nodes[0]
if first_element.type == 'error_leaf' and \
first_element.original_type == 'indent':
assert nodes[-1].type == 'dedent'
# This means that the start and end leaf
nodes = nodes[1:-1]
return nodes
def _parse_scope_node(self, until_line):
# TODO speed up, shouldn't copy the whole list all the time.
# memoryview? # memoryview?
lines_after = self._lines_new[self._parsed_until_line + 1:] lines_after = self._lines_new[self._parsed_until_line + 1:]
tokenizer = self._diff_tokenize(lines_after, until_line, line_offset) tokenizer = self._diff_tokenize(
lines_after,
until_line,
line_offset=self._parsed_until_line
)
self._parser = ParserWithRecovery( self._parser = ParserWithRecovery(
self._parser._grammar, self._parser._grammar,
source=None, source='\n',
tokenizer=tokenizer,
start_parsing=False start_parsing=False
) )
return self._parser.parse() return self._parser.parse(tokenizer=tokenizer)
def _diff_tokenize(lines, until_line, line_offset=0): def _diff_tokenize(self, lines, until_line, line_offset=0):
is_first_token = True is_first_token = True
omited_first_indent = False omited_first_indent = False
indent_count = 0 indent_count = 0
l = iter(lines)
tokens = generate_tokens(lambda: next(l, '')) tokens = generate_tokens(lambda: next(l, ''))
for token_info in tokens: for typ, string, start_pos, prefix in tokens:
typ = token_info.type start_pos = start_pos[0] + line_offset, start_pos[1]
if typ == 'indent': if typ == 'indent':
indent_count += 1 indent_count += 1
if is_first_token: if is_first_token:
@@ -192,17 +296,20 @@ class DiffParser():
# We are done here, only thing that can come now is an # We are done here, only thing that can come now is an
# endmarker or another dedented code block. # endmarker or another dedented code block.
break break
elif typ == 'newline' and token_info.start_pos[0] >= until_line: elif typ == 'newline' and start_pos[0] >= until_line:
yield token_info yield tokenize.TokenInfo(typ, string, start_pos, prefix)
x = self. x = self._parser.pgen_parser.stack
# Check if the parser is actually in a valid suite state.
if 1:
# TODO check if the parser is in a flow, and let it pass if
# so.
import pdb; pdb.set_trace() import pdb; pdb.set_trace()
break break
is_first_token = False is_first_token = False
if line_offset != 0:
raise NotImplementedError
yield tokenize.TokenInfo(*token_info.string[1:])
else:
yield token_info
yield tokenize.TokenInfo(tokenize.ENDMARKER, *token_info.string[1:]) yield tokenize.TokenInfo(typ, string, start_pos, prefix)
typ, string, start_pos, prefix = next(tokens)
start_pos = start_pos[0] + line_offset, start_pos[1]
yield tokenize.TokenInfo(tokenize.ENDMARKER, string, start_pos, prefix)