From b367058af6ce2f5f28d67293622fc549a4efd9be Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Wed, 31 May 2017 08:59:49 +0200 Subject: [PATCH] Temporary work on carriage returns. --- parso/python/parser.py | 36 ++++++++++++++++++++++++++---------- parso/python/prefix.py | 10 ++++++---- test/test_get_code.py | 14 ++++++++++++++ test/test_prefix.py | 2 ++ 4 files changed, 48 insertions(+), 14 deletions(-) diff --git a/parso/python/parser.py b/parso/python/parser.py index 44871b3..4300a8a 100644 --- a/parso/python/parser.py +++ b/parso/python/parser.py @@ -196,6 +196,18 @@ class Parser(BaseParser): def remove_last_newline(node): + def calculate_end_pos(leaf, text): + if leaf is None: + end_pos = (1, 0) + else: + end_pos = leaf.end_pos + + lines = splitlines(text, keepends=True) + if len(lines) == 1: + return end_pos[0], end_pos[1] + len(lines[0]) + else: + return end_pos[0] + len(lines) - 1, len(lines[-1]) + endmarker = node.children[-1] # The newline is either in the endmarker as a prefix or the previous # leaf as a newline token. @@ -213,20 +225,24 @@ def remove_last_newline(node): raise ValueError("There's no newline at the end, cannot remove it.") text = text[:-1] + if text and text[-1] == '\r': + # By adding an artificial newline this creates weird side effects for + # \r at the end of files that would normally be error leafs. Try to + # correct that here. + text = text[:-1] + start_pos = calculate_end_pos(leaf, text) + error_token = tree.PythonErrorLeaf('errortoken', '\r', start_pos, prefix=text) + node.children.insert(-2, error_token) + + # Cleanup + leaf = error_token + text = '' + if prefix: endmarker.prefix = text - if leaf is None: - end_pos = (1, 0) - else: - end_pos = leaf.end_pos - lines = splitlines(text, keepends=True) - if len(lines) == 1: - end_pos = end_pos[0], end_pos[1] + len(lines[0]) - else: - end_pos = end_pos[0] + len(lines) - 1, len(lines[-1]) - endmarker.start_pos = end_pos + endmarker.start_pos = calculate_end_pos(leaf, text) else: leaf.value = text endmarker.start_pos = leaf.end_pos diff --git a/parso/python/prefix.py b/parso/python/prefix.py index 6ba3eb6..06bbf53 100644 --- a/parso/python/prefix.py +++ b/parso/python/prefix.py @@ -3,7 +3,7 @@ import re from parso.tokenize import group -class PrefixToken(object): +class PrefixPart(object): def __init__(self, typ, value, start_pos): self.type = typ self.value = value @@ -19,10 +19,11 @@ class PrefixToken(object): _comment = r'#[^\n\r\f]*' _backslash = r'\\\r?\n?' _whitespace = r' +' +_tabs = r'\t+' _newline = r'\r?\n' _form_feed = r'\f' -_regex = group(_comment, _backslash, _whitespace, _newline, _form_feed) +_regex = group(_comment, _backslash, _whitespace, _newline, _form_feed, _tabs) _regex = re.compile(_regex) @@ -32,7 +33,8 @@ _types = { '\\': 'backslash', '\f': 'formfeed', '\n': 'newline', - '\r': 'newline' + '\r': 'newline', + '\t': 'tabs', } @@ -43,7 +45,7 @@ def split_prefix(prefix, start_pos): match =_regex.match(prefix, start) value = match.group(0) typ = _types[value[0]] - yield PrefixToken(typ, value, (line, column + start)) + yield PrefixPart(typ, value, (line, column + start)) start = match.end(0) if value.endswith('\n'): diff --git a/test/test_get_code.py b/test/test_get_code.py index 6a54385..5bb789f 100644 --- a/test/test_get_code.py +++ b/test/test_get_code.py @@ -104,3 +104,17 @@ def test_end_newlines(): test('def a():\n pass', (2, 5)) test('def a(', (1, 6)) + + +@pytest.mark.parametrize(('code', 'types'), [ + ('\r', ['error_leaf', 'endmarker']), + ('\n\r', ['error_leaf', 'endmarker']) +]) +def test_carriage_return_at_end(code, types): + """ + By adding an artificial newline this creates weird side effects for + \r at the end of files that would normally be error leafs. + """ + tree = parse(code) + assert tree.get_code() == code + assert [c.type for c in tree.children] == types diff --git a/test/test_prefix.py b/test/test_prefix.py index 95eea4b..ab0d1d4 100644 --- a/test/test_prefix.py +++ b/test/test_prefix.py @@ -15,6 +15,7 @@ import parso ('\\', ['\\']), ('\\\n', ['\\\n']), ('\\\r\n', ['\\\r\n']), + ('\t\t\n\t', ['\t\t', '\n', '\t']), ]) def test_simple_prefix_splitting(string, tokens): tree = parso.parse(string) @@ -42,6 +43,7 @@ def test_simple_prefix_splitting(string, tokens): ('\r\n', ['newline']), ('\f', ['formfeed']), ('\\\n', ['backslash']), + ('\r', ['newline']), ]) def test_prefix_splitting_types(string, types): tree = parso.parse(string)