Temporary work on carriage returns.

This commit is contained in:
Dave Halter
2017-05-31 08:59:49 +02:00
parent 8356bcab10
commit b367058af6
4 changed files with 48 additions and 14 deletions

View File

@@ -196,6 +196,18 @@ class Parser(BaseParser):
def remove_last_newline(node): def remove_last_newline(node):
def calculate_end_pos(leaf, text):
if leaf is None:
end_pos = (1, 0)
else:
end_pos = leaf.end_pos
lines = splitlines(text, keepends=True)
if len(lines) == 1:
return end_pos[0], end_pos[1] + len(lines[0])
else:
return end_pos[0] + len(lines) - 1, len(lines[-1])
endmarker = node.children[-1] endmarker = node.children[-1]
# The newline is either in the endmarker as a prefix or the previous # The newline is either in the endmarker as a prefix or the previous
# leaf as a newline token. # leaf as a newline token.
@@ -213,20 +225,24 @@ def remove_last_newline(node):
raise ValueError("There's no newline at the end, cannot remove it.") raise ValueError("There's no newline at the end, cannot remove it.")
text = text[:-1] text = text[:-1]
if text and text[-1] == '\r':
# By adding an artificial newline this creates weird side effects for
# \r at the end of files that would normally be error leafs. Try to
# correct that here.
text = text[:-1]
start_pos = calculate_end_pos(leaf, text)
error_token = tree.PythonErrorLeaf('errortoken', '\r', start_pos, prefix=text)
node.children.insert(-2, error_token)
# Cleanup
leaf = error_token
text = ''
if prefix: if prefix:
endmarker.prefix = text endmarker.prefix = text
if leaf is None:
end_pos = (1, 0)
else:
end_pos = leaf.end_pos
lines = splitlines(text, keepends=True) endmarker.start_pos = calculate_end_pos(leaf, text)
if len(lines) == 1:
end_pos = end_pos[0], end_pos[1] + len(lines[0])
else:
end_pos = end_pos[0] + len(lines) - 1, len(lines[-1])
endmarker.start_pos = end_pos
else: else:
leaf.value = text leaf.value = text
endmarker.start_pos = leaf.end_pos endmarker.start_pos = leaf.end_pos

View File

@@ -3,7 +3,7 @@ import re
from parso.tokenize import group from parso.tokenize import group
class PrefixToken(object): class PrefixPart(object):
def __init__(self, typ, value, start_pos): def __init__(self, typ, value, start_pos):
self.type = typ self.type = typ
self.value = value self.value = value
@@ -19,10 +19,11 @@ class PrefixToken(object):
_comment = r'#[^\n\r\f]*' _comment = r'#[^\n\r\f]*'
_backslash = r'\\\r?\n?' _backslash = r'\\\r?\n?'
_whitespace = r' +' _whitespace = r' +'
_tabs = r'\t+'
_newline = r'\r?\n' _newline = r'\r?\n'
_form_feed = r'\f' _form_feed = r'\f'
_regex = group(_comment, _backslash, _whitespace, _newline, _form_feed) _regex = group(_comment, _backslash, _whitespace, _newline, _form_feed, _tabs)
_regex = re.compile(_regex) _regex = re.compile(_regex)
@@ -32,7 +33,8 @@ _types = {
'\\': 'backslash', '\\': 'backslash',
'\f': 'formfeed', '\f': 'formfeed',
'\n': 'newline', '\n': 'newline',
'\r': 'newline' '\r': 'newline',
'\t': 'tabs',
} }
@@ -43,7 +45,7 @@ def split_prefix(prefix, start_pos):
match =_regex.match(prefix, start) match =_regex.match(prefix, start)
value = match.group(0) value = match.group(0)
typ = _types[value[0]] typ = _types[value[0]]
yield PrefixToken(typ, value, (line, column + start)) yield PrefixPart(typ, value, (line, column + start))
start = match.end(0) start = match.end(0)
if value.endswith('\n'): if value.endswith('\n'):

View File

@@ -104,3 +104,17 @@ def test_end_newlines():
test('def a():\n pass', (2, 5)) test('def a():\n pass', (2, 5))
test('def a(', (1, 6)) test('def a(', (1, 6))
@pytest.mark.parametrize(('code', 'types'), [
('\r', ['error_leaf', 'endmarker']),
('\n\r', ['error_leaf', 'endmarker'])
])
def test_carriage_return_at_end(code, types):
"""
By adding an artificial newline this creates weird side effects for
\r at the end of files that would normally be error leafs.
"""
tree = parse(code)
assert tree.get_code() == code
assert [c.type for c in tree.children] == types

View File

@@ -15,6 +15,7 @@ import parso
('\\', ['\\']), ('\\', ['\\']),
('\\\n', ['\\\n']), ('\\\n', ['\\\n']),
('\\\r\n', ['\\\r\n']), ('\\\r\n', ['\\\r\n']),
('\t\t\n\t', ['\t\t', '\n', '\t']),
]) ])
def test_simple_prefix_splitting(string, tokens): def test_simple_prefix_splitting(string, tokens):
tree = parso.parse(string) tree = parso.parse(string)
@@ -42,6 +43,7 @@ def test_simple_prefix_splitting(string, tokens):
('\r\n', ['newline']), ('\r\n', ['newline']),
('\f', ['formfeed']), ('\f', ['formfeed']),
('\\\n', ['backslash']), ('\\\n', ['backslash']),
('\r', ['newline']),
]) ])
def test_prefix_splitting_types(string, types): def test_prefix_splitting_types(string, types):
tree = parso.parse(string) tree = parso.parse(string)