From 09779c88aa59308a7ac4a468bfd1ca52ae8cbef2 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Tue, 24 Jan 2017 00:50:37 +0100 Subject: [PATCH] Fix a nasty issue in the tokenizer. Fixes #836. At the same time there was a related issue of not cleaning up newlines properly. --- jedi/parser/__init__.py | 9 +++------ jedi/parser/tokenize.py | 3 +-- test/test_parser/test_diff_parser.py | 5 +++++ test/test_parser/test_tokenize.py | 25 ++++++++++++++++++++----- 4 files changed, 29 insertions(+), 13 deletions(-) diff --git a/jedi/parser/__init__.py b/jedi/parser/__init__.py index c411e878..a02fbbcd 100644 --- a/jedi/parser/__init__.py +++ b/jedi/parser/__init__.py @@ -232,14 +232,11 @@ class Parser(object): # If there's a statement that fails to be parsed, there # will be no previous leaf. So just ignore it. break - elif newline.value != '\n': - # TODO REMOVE, error recovery was simplified. - # This may happen if error correction strikes and removes - # a whole statement including '\n'. - break else: - newline.value = '' + assert newline.value.endswith('\n') + newline.value = newline.value[:-1] endmarker.start_pos = newline.start_pos + break class ParserWithRecovery(Parser): diff --git a/jedi/parser/tokenize.py b/jedi/parser/tokenize.py index 81d4f9dd..794bfb9b 100644 --- a/jedi/parser/tokenize.py +++ b/jedi/parser/tokenize.py @@ -259,8 +259,7 @@ def generate_tokens(readline, use_exact_op_types=False): # line is an error token. txt = line[pos:] yield TokenInfo(ERRORTOKEN, txt, (lnum, pos), prefix) - pos += 1 - continue + break prefix = additional_prefix + pseudomatch.group(1) additional_prefix = '' diff --git a/test/test_parser/test_diff_parser.py b/test/test_parser/test_diff_parser.py index 819a8916..0adc4ea0 100644 --- a/test/test_parser/test_diff_parser.py +++ b/test/test_parser/test_diff_parser.py @@ -206,6 +206,11 @@ def test_open_parentheses(differ): differ.parse('isinstance()\n' + func, parsers=2, copies=1) +def test_open_parentheses_at_end(differ): + code = "a['" + differ.initialize(code) + differ.parse(code, parsers=1, expect_error_leaves=True) + def test_backslash(differ): src = dedent(r""" a = 1\ diff --git a/test/test_parser/test_tokenize.py b/test/test_parser/test_tokenize.py index 1cbc4f3b..1ee880ea 100644 --- a/test/test_parser/test_tokenize.py +++ b/test/test_parser/test_tokenize.py @@ -3,8 +3,6 @@ from io import StringIO from textwrap import dedent -import pytest - from jedi._compatibility import u, is_py3, py_version from jedi.parser.token import NAME, OP, NEWLINE, STRING, INDENT from jedi.parser import ParserWithRecovery, load_grammar, tokenize @@ -12,6 +10,9 @@ from jedi.parser import ParserWithRecovery, load_grammar, tokenize from ..helpers import unittest +def _get_token_list(string): + io = StringIO(u(string)) + return list(tokenize.generate_tokens(io.readline)) class TokenTest(unittest.TestCase): def test_end_pos_one_line(self): @@ -135,9 +136,7 @@ def test_ur_literals(): - All the other Python versions work very well with it. """ def check(literal, is_literal=True): - io = StringIO(u(literal)) - tokens = tokenize.generate_tokens(io.readline) - token_list = list(tokens) + token_list = _get_token_list(literal) typ, result_literal, _, _ = token_list[0] if is_literal: assert typ == STRING @@ -158,3 +157,19 @@ def test_ur_literals(): check('rF""', is_literal=py_version >= 36) check('f""', is_literal=py_version >= 36) check('F""', is_literal=py_version >= 36) + + +def test_error_literal(): + error_token, endmarker = _get_token_list('"\n') + assert error_token.type == tokenize.ERRORTOKEN + assert endmarker.prefix == '' + assert error_token.string == '"\n' + assert endmarker.type == tokenize.ENDMARKER + assert endmarker.prefix == '' + + bracket, error_token, endmarker = _get_token_list('( """') + assert error_token.type == tokenize.ERRORTOKEN + assert error_token.prefix == ' ' + assert error_token.string == '"""' + assert endmarker.type == tokenize.ENDMARKER + assert endmarker.prefix == ''