diff --git a/jedi/parser/python/__init__.py b/jedi/parser/python/__init__.py index 6f8211bf..c435e65e 100644 --- a/jedi/parser/python/__init__.py +++ b/jedi/parser/python/__init__.py @@ -112,7 +112,7 @@ def parse(code=None, path=None, grammar=None, error_recovery=True, code += '\n' tokenize_lines = list(tokenize_lines) tokenize_lines[-1] += '\n' - tokenize_lines.append([]) + tokenize_lines.append('') tokens = generate_tokens(tokenize_lines, use_exact_op_types=True) diff --git a/jedi/parser/python/parser.py b/jedi/parser/python/parser.py index 0c8da560..545daaf1 100644 --- a/jedi/parser/python/parser.py +++ b/jedi/parser/python/parser.py @@ -1,10 +1,9 @@ -import re - from jedi.parser.python import tree from jedi.parser import tokenize from jedi.parser.token import (DEDENT, INDENT, ENDMARKER, NEWLINE, NUMBER, STRING, tok_name) from jedi.parser.parser import BaseParser +from jedi.common import splitlines class Parser(BaseParser): @@ -214,23 +213,35 @@ def _remove_last_newline(node): # The newline is either in the endmarker as a prefix or the previous # leaf as a newline token. prefix = endmarker.prefix - if prefix.endswith('\n'): - endmarker.prefix = prefix = prefix[:-1] - last_end = 0 - if '\n' not in prefix: - # Basically if the last line doesn't end with a newline. we - # have to add the previous line's end_position. - previous_leaf = endmarker.get_previous_leaf() - if previous_leaf is not None: - last_end = previous_leaf.end_pos[1] - last_line = re.sub('.*\n', '', prefix) - endmarker.start_pos = endmarker.line - 1, last_end + len(last_line) + leaf = endmarker.get_previous_leaf() + if prefix: + text = prefix else: - newline = endmarker.get_previous_leaf() - if newline is None: - return # This means that the parser is empty. + if leaf is None: + raise ValueError("You're trying to remove a newline from an empty module.") - assert newline.value.endswith('\n') - newline.value = newline.value[:-1] - endmarker.start_pos = \ - newline.start_pos[0], newline.start_pos[1] + len(newline.value) + text = leaf.value + + if not text.endswith('\n'): + raise ValueError("There's no newline at the end, cannot remove it.") + + text = text[:-1] + if prefix: + endmarker.prefix = text + print(endmarker.start_pos) + + if leaf is None: + end_pos = (1, 0) + else: + end_pos = leaf.end_pos + + lines = splitlines(text, keepends=True) + if len(lines) == 1: + end_pos = end_pos[0], end_pos[1] + len(lines[0]) + else: + end_pos = end_pos[0] + len(lines) - 1, len(lines[-1]) + endmarker.start_pos = end_pos + print(endmarker.start_pos) + else: + leaf.value = text + endmarker.start_pos = leaf.end_pos diff --git a/jedi/parser/tokenize.py b/jedi/parser/tokenize.py index e7fbee7b..ddbb15fc 100644 --- a/jedi/parser/tokenize.py +++ b/jedi/parser/tokenize.py @@ -249,11 +249,7 @@ def generate_tokens(lines, use_exact_op_types=False): while pos < max: pseudomatch = pseudo_token_compiled.match(line, pos) if not pseudomatch: # scan for tokens - txt = line[pos] - if line[pos] in '"\'': - # If a literal starts but doesn't end the whole rest of the - # line is an error token. - txt = line[pos:] + txt = line[pos:] if txt.endswith('\n'): new_line = True yield TokenInfo(ERRORTOKEN, txt, (lnum, pos), prefix) @@ -263,7 +259,8 @@ def generate_tokens(lines, use_exact_op_types=False): additional_prefix = '' start, pos = pseudomatch.span(2) spos = (lnum, start) - token, initial = line[start:pos], line[start] + token = pseudomatch.group(2) + initial = token[0] if new_line and initial not in '\r\n#': new_line = False diff --git a/test/test_parser/test_parser.py b/test/test_parser/test_parser.py index 71d78699..b7d4208f 100644 --- a/test/test_parser/test_parser.py +++ b/test/test_parser/test_parser.py @@ -2,10 +2,13 @@ import sys from textwrap import dedent +import pytest + import jedi from jedi._compatibility import u, is_py3 from jedi.parser.python import parse, load_grammar from jedi.parser.python import tree +from jedi.common import splitlines def test_user_statement_on_import(): @@ -226,3 +229,15 @@ def test_load_newer_grammar(): # The same is true for very old grammars (even though this is probably not # going to be an issue. load_grammar('1.5') + + +@pytest.mark.parametrize('code', ['foo "', 'foo """\n', 'foo """\nbar']) +def test_open_string_literal(code): + """ + Testing mostly if removing the last newline works. + """ + lines = splitlines(code, keepends=True) + end_pos = (len(lines), len(lines[-1])) + module = parse(code) + assert module.get_code() == code + assert module.end_pos == end_pos == module.children[1].end_pos