diff --git a/jedi/parser/fast.py b/jedi/parser/fast.py index 568c04f6..4cad62d9 100644 --- a/jedi/parser/fast.py +++ b/jedi/parser/fast.py @@ -52,7 +52,7 @@ def suite_or_file_input_is_valid(parser): return True -class DiffParser(): +class DiffParser(object): endmarker_type = 'endmarker' def __init__(self, parser): @@ -60,8 +60,8 @@ class DiffParser(): self._old_module = parser.get_root_node() def _reset(self): - self._delete_count = 0 - self._insert_count = 0 + self._copy_count = 0 + self._parser_count = 0 self._parsed_until_line = 0 self._copied_ranges = [] @@ -96,20 +96,21 @@ class DiffParser(): ''' self._lines_new = lines_new self._added_newline = False - # The Python grammar needs a newline at the end of a file. - if lines_new[-1] != '': - lines_new[-1] += '\n' - lines_new.append('') + if self._lines_new[-1] != '': + # The Python grammar needs a newline at the end of a file, but for + # everything else we keep working with lines_new here. + self._lines_new[-1] += '\n' self._added_newline = True self._reset() + line_length = len(lines_new) lines_old = splitlines(self._parser.source, keepends=True) sm = difflib.SequenceMatcher(None, lines_old, lines_new) - print(len(lines_old), len(lines_new), lines_old, lines_new) + print(len(lines_old), line_length, lines_old, lines_new) for operation, i1, i2, j1, j2 in sm.get_opcodes(): print('\t\t', operation, i1, i2, j1, j2) - if j2 == len(lines_new): + if j2 == line_length + int(self._added_newline): # The empty part after the last newline is not relevant. j2 -= 1 @@ -117,27 +118,24 @@ class DiffParser(): line_offset = j1 - i1 self._copy_from_old_parser(line_offset, i2, j2) elif operation == 'replace': - self._delete_count += 1 - self._insert(j2) + self._parse(until_line=j2) elif operation == 'insert': - self._insert(j2) + self._parse(until_line=j2) else: assert operation == 'delete' - self._delete_count += 1 # For statistics + # Cleanup (setting endmarker, used_names) self._post_parse() - # TODO insert endmarker if self._added_newline: - print("ADDED") self._parser.module = self._parser._parsed = self._new_module self._parser.remove_last_newline() + self._parsed_until_line -= 1 + self._parser.source = ''.join(lines_new) self._old_module = self._new_module - return self._new_module - def _insert(self, until_line_new): - self._insert_count += 1 - self._parse(until_line_new) + assert self._new_module.end_pos[0] == line_length + return self._new_module def _copy_from_old_parser(self, line_offset, until_line_old, until_line_new): while until_line_new > self._parsed_until_line: @@ -149,14 +147,21 @@ class DiffParser(): # statements again (not e.g. lines within parentheses). self._parse(self._parsed_until_line + 1) else: - print('copy', line_stmt.end_pos, parsed_until_line_old, - until_line_old, line_stmt) + print('copy', line_stmt.end_pos, parsed_until_line_old, until_line_old, line_stmt) p_children = line_stmt.parent.children index = p_children.index(line_stmt) nodes = [] for node in p_children[index:]: - if node.end_pos[0] > until_line_old: + last_leaf = node.last_leaf() + if last_leaf.type == 'newline': + last_line = last_leaf.start_pos[0] + else: + last_line = last_leaf.end_pos[0] + + print('test', last_line, until_line_old, node) + if last_line > until_line_old: divided_node = self._divide_node(node, until_line_new) + print('divided', divided_node) if divided_node is not None: nodes.append(divided_node) break @@ -165,6 +170,7 @@ class DiffParser(): if nodes: print('COPY', until_line_new) + self._copy_count += 1 parent = self._insert_nodes(nodes) self._update_names_dict(parent, nodes) # TODO remove dedent at end @@ -216,8 +222,7 @@ class DiffParser(): if last_non_endmarker.type == 'newline': # Newlines end on the next line, which means that they would cover # the next line. That line is not fully parsed at this point. - print('menno', last_leaf.end_pos, last_non_endmarker.end_pos) - self._parsed_until_line = last_leaf.end_pos[0] - 1 + self._parsed_until_line = last_leaf.start_pos[0] else: self._parsed_until_line = last_leaf.end_pos[0] print('parsed_until', last_leaf.end_pos, self._parsed_until_line) @@ -274,6 +279,7 @@ class DiffParser(): node.parent = new_parent if new_parent.type == 'suite': return new_parent.get_parent_scope() + return new_parent def _get_before_insertion_node(self): @@ -351,6 +357,10 @@ class DiffParser(): def _get_old_line_stmt(self, old_line): leaf = self._old_module.get_leaf_for_position((old_line, 0), include_prefixes=True) + if leaf.type == 'newline': + leaf = leaf.get_next_leaf() + while leaf.type == 'dedent': + leaf = leaf.get_next_leaf() if leaf.get_start_pos_of_prefix()[0] == old_line: node = leaf # TODO use leaf.get_definition one day when that one is working @@ -387,6 +397,7 @@ class DiffParser(): return nodes def _parse_scope_node(self, until_line): + self._parser_count += 1 print('PARSE', self._parsed_until_line, until_line) # TODO speed up, shouldn't copy the whole list all the time. # memoryview? @@ -432,7 +443,7 @@ class DiffParser(): tokens = generate_tokens(lambda: next(l, ''), use_exact_op_types=True) for typ, string, start_pos, prefix in tokens: start_pos = start_pos[0] + line_offset, start_pos[1] - if typ == tokenize.INDENT: + if typ == INDENT: indents.append(start_pos[1]) if is_first_token: omitted_first_indent = True @@ -450,16 +461,16 @@ class DiffParser(): # endmarker or another dedented code block. yield tokenize.TokenInfo(tokenize.ENDMARKER, '', start_pos, '') break - elif typ == tokenize.NEWLINE and start_pos[0] >= until_line: + elif typ == NEWLINE and start_pos[0] >= until_line: yield tokenize.TokenInfo(typ, string, start_pos, prefix) # Check if the parser is actually in a valid suite state. if suite_or_file_input_is_valid(self._active_parser): start_pos = start_pos[0] + 1, 0 while len(indents) > int(omitted_first_indent): indents.pop() - yield tokenize.TokenInfo(tokenize.DEDENT, '', start_pos, '') + yield tokenize.TokenInfo(DEDENT, '', start_pos, '') - yield tokenize.TokenInfo(tokenize.ENDMARKER, '', start_pos, '') + yield tokenize.TokenInfo(ENDMARKER, '', start_pos, '') break else: continue diff --git a/jedi/parser/tree.py b/jedi/parser/tree.py index aab744eb..fde3e704 100644 --- a/jedi/parser/tree.py +++ b/jedi/parser/tree.py @@ -328,6 +328,8 @@ class Leaf(Base): return self def last_leaf(self): + if self.type == 'dedent': + return self.get_previous_leaf() return self def get_code(self, normalized=False, include_prefix=True): @@ -607,10 +609,7 @@ class BaseNode(Base): return c[index + 1] def last_leaf(self): - try: - return self.children[-1].last_leaf() - except AttributeError: - return self.children[-1] + return self.children[-1].last_leaf() def get_following_comment_same_line(self): """ diff --git a/test/test_parser/test_diff_parser.py b/test/test_parser/test_diff_parser.py index 97981f7f..95162533 100644 --- a/test/test_parser/test_diff_parser.py +++ b/test/test_parser/test_diff_parser.py @@ -1,10 +1,14 @@ from textwrap import dedent +import pytest + import jedi from jedi._compatibility import u +from jedi.common import splitlines from jedi import cache from jedi.parser import load_grammar -from jedi.parser.fast import FastParser +from jedi.parser.fast import FastParser, DiffParser +from jedi.parser import ParserWithRecovery from jedi.parser.utils import save_parser @@ -34,59 +38,49 @@ def test_add_to_end(): assert jedi.Script(a + b, path='example.py').completions() -def test_split_parts(): - cache.parser_cache.pop(None, None) +class Differ(object): + def __init__(self): + self._first_use = True - def splits(source): - class Mock(FastParser): - def __init__(self, *args): - self.number_of_splits = 0 + def initialize(self, source): + grammar = load_grammar() + self.parser = ParserWithRecovery(grammar, source) - return tuple(FastParser._split_parts(Mock(None, None), source)) - - def test(*parts): - assert splits(''.join(parts)) == parts - - test('a\n\n', 'def b(): pass\n', 'c\n') - test('a\n', 'def b():\n pass\n', 'c\n') - - test('from x\\\n') - test('a\n\\\n') + def parse(self, source, copies=0, parsers=0): + lines = splitlines(source, keepends=True) + diff_parser = DiffParser(self.parser) + new_module = diff_parser.update(lines) + assert source == new_module.get_code() + assert diff_parser._copy_count == copies + assert diff_parser._parser_count == parsers + self.parser.module = new_module + return new_module -def check_fp(src, number_parsers_used, number_of_splits=None, number_of_misses=0): - if number_of_splits is None: - number_of_splits = number_parsers_used - - p = FastParser(load_grammar(), u(src)) - save_parser(None, p, pickling=False) - - assert src == p.module.get_code() - assert p.number_of_splits == number_of_splits - assert p.number_parsers_used == number_parsers_used - assert p.number_of_misses == number_of_misses - return p.module +@pytest.fixture() +def differ(): + return Differ() -def test_change_and_undo(): +def test_change_and_undo(differ): # Empty the parser cache for the path None. cache.parser_cache.pop(None, None) func_before = 'def func():\n pass\n' # Parse the function and a. - check_fp(func_before + 'a', 2) + differ.initialize(func_before + 'a') # Parse just b. - check_fp(func_before + 'b', 1, 2) + differ.parse(func_before + 'b', copies=1, parsers=1) # b has changed to a again, so parse that. - check_fp(func_before + 'a', 1, 2) + differ.parse(func_before + 'a', copies=1, parsers=1) # Same as before no parsers should be used. - check_fp(func_before + 'a', 0, 2) + differ.parse(func_before + 'a', copies=1) # Getting rid of an old parser: Still no parsers used. - check_fp('a', 0, 1) - # Now the file has completely change and we need to parse. - check_fp('b', 1, 1) + differ.parse('a', copies=1) + # Now the file has completely changed and we need to parse. + differ.parse('b', parsers=1) # And again. - check_fp('a', 1, 1) + differ.parse('a', parsers=1) def test_positions():