From 9b85d5517f684abda2ca6631977ced366e27bfb4 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Tue, 27 Sep 2016 00:29:11 +0200 Subject: [PATCH] Fix more issues in the diff parser. --- jedi/parser/fast.py | 22 ++++++++++++++-------- jedi/parser/tokenize.py | 9 +++++++++ test/test_parser/test_diff_parser.py | 17 ++++++++--------- 3 files changed, 31 insertions(+), 17 deletions(-) diff --git a/jedi/parser/fast.py b/jedi/parser/fast.py index 6cb02419..77f21b29 100644 --- a/jedi/parser/fast.py +++ b/jedi/parser/fast.py @@ -4,6 +4,7 @@ anything changes, it only reparses the changed parts. But because it's not finished (and still not working as I want), I won't document it any further. """ import copy +import re import difflib from jedi._compatibility import use_metaclass @@ -14,7 +15,7 @@ from jedi.parser.tree import Module, search_ancestor, EndMarker from jedi.parser.utils import parser_cache from jedi.parser import tokenize from jedi import debug -from jedi.parser.tokenize import (generate_tokens, NEWLINE, +from jedi.parser.tokenize import (generate_tokens, NEWLINE, TokenInfo, ENDMARKER, INDENT, DEDENT, tok_name) @@ -44,7 +45,7 @@ def _merge_names_dicts(base_dict, other_dict): def _get_last_line(node_or_leaf): last_leaf = node_or_leaf.last_leaf() if last_leaf.type == 'error_leaf': - typ = tokenize.tok_name[last_leaf.original_type].lower() + typ = tok_name[last_leaf.original_type].lower() else: typ = last_leaf.type if typ == 'newline': @@ -503,25 +504,30 @@ class DiffParser(object): continue is_first_token = False - if typ == tokenize.DEDENT: + if typ == DEDENT: indents.pop() if omitted_first_indent and not indents: # We are done here, only thing that can come now is an # endmarker or another dedented code block. - yield tokenize.TokenInfo(tokenize.ENDMARKER, '', start_pos, '') + typ, string, start_pos, prefix = next(tokens) + if '\n' in prefix: + prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix) + else: + prefix = '' + yield TokenInfo(ENDMARKER, '', (start_pos[0] + line_offset, 0), prefix) break elif typ == NEWLINE and start_pos[0] >= until_line: - yield tokenize.TokenInfo(typ, string, start_pos, prefix) + yield TokenInfo(typ, string, start_pos, prefix) # Check if the parser is actually in a valid suite state. if suite_or_file_input_is_valid(self._grammar, stack): start_pos = start_pos[0] + 1, 0 while len(indents) > int(omitted_first_indent): indents.pop() - yield tokenize.TokenInfo(DEDENT, '', start_pos, '') + yield TokenInfo(DEDENT, '', start_pos, '') - yield tokenize.TokenInfo(ENDMARKER, '', start_pos, '') + yield TokenInfo(ENDMARKER, '', start_pos, '') break else: continue - yield tokenize.TokenInfo(typ, string, start_pos, prefix) + yield TokenInfo(typ, string, start_pos, prefix) diff --git a/jedi/parser/tokenize.py b/jedi/parser/tokenize.py index 233c6d87..82e5d612 100644 --- a/jedi/parser/tokenize.py +++ b/jedi/parser/tokenize.py @@ -19,6 +19,7 @@ from io import StringIO from jedi.parser.token import (tok_name, N_TOKENS, ENDMARKER, STRING, NUMBER, opmap, NAME, OP, ERRORTOKEN, NEWLINE, INDENT, DEDENT) from jedi._compatibility import is_py3 +from jedi.common import splitlines cookie_re = re.compile("coding[:=]\s*([-\w.]+)") @@ -166,6 +167,14 @@ class TokenInfo(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])): else: return self.type + @property + def end_pos(self): + lines = splitlines(self.string) + if len(lines) > 1: + return self.start_pos[0] + len(lines) - 1, 0 + else: + return self.start_pos[0], self.start_pos[1] + len(self.string) + def source_tokens(source, use_exact_op_types=False): """Generate tokens from a the source code (string).""" diff --git a/test/test_parser/test_diff_parser.py b/test/test_parser/test_diff_parser.py index 07d93c36..be678e24 100644 --- a/test/test_parser/test_diff_parser.py +++ b/test/test_parser/test_diff_parser.py @@ -81,7 +81,7 @@ class Differ(object): self.parser = ParserWithRecovery(grammar, source) return self.parser.module - def parse(self, source, copies=0, parsers=0, expect_error_leafs=False): + def parse(self, source, copies=0, parsers=0, expect_error_leaves=False): debug.dbg('differ: parse copies=%s parsers=%s', copies, parsers, color='YELLOW') lines = splitlines(source, keepends=True) diff_parser = DiffParser(self.parser) @@ -92,7 +92,7 @@ class Differ(object): self.parser.module = new_module self.parser._parsed = new_module - assert expect_error_leafs == _check_error_leaves_nodes(new_module) + assert expect_error_leaves == _check_error_leaves_nodes(new_module) _assert_valid_graph(new_module) return new_module @@ -160,7 +160,7 @@ def test_if_simple(differ): differ.initialize(src + 'a') differ.parse(src + else_ + "a", copies=0, parsers=1) - differ.parse(else_, parsers=1, expect_error_leafs=True) + differ.parse(else_, parsers=1, expect_error_leaves=True) differ.parse(src + else_, parsers=1) @@ -227,10 +227,10 @@ def test_open_parentheses(differ): new_code = 'isinstance(\n' + func differ.initialize(code) - differ.parse(new_code, parsers=1, expect_error_leafs=True) + differ.parse(new_code, parsers=1, expect_error_leaves=True) new_code = 'a = 1\n' + new_code - differ.parse(new_code, copies=1, parsers=1, expect_error_leafs=True) + differ.parse(new_code, copies=1, parsers=1, expect_error_leaves=True) func += 'def other_func():\n pass\n' differ.initialize('isinstance(\n' + func) @@ -280,8 +280,7 @@ def test_wrong_whitespace(differ): hello ''' differ.initialize(code) - # Need to parsers, because the code is not dedented. - differ.parse(code + 'bar\n ', parsers=2, copies=1) + differ.parse(code + 'bar\n ', parsers=1, copies=1) - code += """abc(\nif 1:\npass\n """ - differ.parse(code, parsers=2, copies=1) + code += """abc(\npass\n """ + differ.parse(code, parsers=1, copies=1, expect_error_leaves=True)