Actually make \r usable

This commit is contained in:
Dave Halter
2019-01-08 20:01:57 +01:00
parent b1f613fe16
commit fbaad7883f
4 changed files with 29 additions and 30 deletions

View File

@@ -53,7 +53,7 @@ def _assert_valid_graph(node):
content = previous_leaf.value + node.prefix content = previous_leaf.value + node.prefix
previous_start_pos = previous_leaf.start_pos previous_start_pos = previous_leaf.start_pos
if '\n' in content: if '\n' in content or '\r' in content:
splitted = split_lines(content) splitted = split_lines(content)
line = previous_start_pos[0] + len(splitted) - 1 line = previous_start_pos[0] + len(splitted) - 1
actual = line, len(splitted[-1]) actual = line, len(splitted[-1])
@@ -96,7 +96,7 @@ def _ends_with_newline(leaf, suffix=''):
else: else:
typ = leaf.type typ = leaf.type
return typ == 'newline' or suffix.endswith('\n') return typ == 'newline' or suffix.endswith('\n') or suffix.endswith('\r')
def _flows_finished(pgen_grammar, stack): def _flows_finished(pgen_grammar, stack):
@@ -387,8 +387,8 @@ class DiffParser(object):
# We are done here, only thing that can come now is an # We are done here, only thing that can come now is an
# endmarker or another dedented code block. # endmarker or another dedented code block.
typ, string, start_pos, prefix = next(tokens) typ, string, start_pos, prefix = next(tokens)
if '\n' in prefix: if '\n' in prefix or '\r' in prefix:
prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix) prefix = re.sub(r'(<=\n|\r)[^\n\r]+$', '', prefix)
else: else:
prefix = '' prefix = ''
yield PythonToken( yield PythonToken(
@@ -463,9 +463,9 @@ class _NodesTreeNode(object):
# the next line. That line is not fully parsed at this point. # the next line. That line is not fully parsed at this point.
if _ends_with_newline(last_leaf, suffix): if _ends_with_newline(last_leaf, suffix):
line -= 1 line -= 1
line += suffix.count('\n') line += len(split_lines(suffix)) - 1
if suffix and not suffix.endswith('\n'): if suffix and not suffix.endswith('\n') and not suffix.endswith('\r'):
# This is the end of a file (that doesn't end with a newline). # This is the end of a file (that doesn't end with a newline).
line += 1 line += 1
@@ -545,16 +545,13 @@ class _NodesTree(object):
is_endmarker = last_leaf.type == self.endmarker_type is_endmarker = last_leaf.type == self.endmarker_type
self._prefix_remainder = '' self._prefix_remainder = ''
if is_endmarker: if is_endmarker:
try: separation = max(last_leaf.prefix.rfind('\n'), last_leaf.prefix.rfind('\r'))
separation = last_leaf.prefix.rindex('\n') + 1 if separation > -1:
except ValueError:
pass
else:
# Remove the whitespace part of the prefix after a newline. # Remove the whitespace part of the prefix after a newline.
# That is not relevant if parentheses were opened. Always parse # That is not relevant if parentheses were opened. Always parse
# until the end of a line. # until the end of a line.
last_leaf.prefix, self._prefix_remainder = \ last_leaf.prefix, self._prefix_remainder = \
last_leaf.prefix[:separation], last_leaf.prefix[separation:] last_leaf.prefix[:separation + 1], last_leaf.prefix[separation + 1:]
self.prefix = '' self.prefix = ''
@@ -597,10 +594,9 @@ class _NodesTree(object):
# We basically removed the endmarker, but we are not allowed to # We basically removed the endmarker, but we are not allowed to
# remove the newline at the end of the line, otherwise it's # remove the newline at the end of the line, otherwise it's
# going to be missing. # going to be missing.
try: newline_index = max(node.prefix.rfind('\n'), node.prefix.rfind('\r'))
new_prefix = node.prefix[:node.prefix.rindex('\n') + 1] if newline_index > -1:
except ValueError: new_prefix = node.prefix[:newline_index + 1]
pass
# Endmarkers just distort all the checks below. Remove them. # Endmarkers just distort all the checks below. Remove them.
break break

View File

@@ -186,7 +186,7 @@ def _create_token_collection(version_info):
Bracket = '[][(){}]' Bracket = '[][(){}]'
special_args = [r'\r?\n', r'[:;.,@]'] special_args = [r'\r\n?', r'\n', r'[:;.,@]']
if version_info >= (3, 0): if version_info >= (3, 0):
special_args.insert(0, r'\.\.\.') special_args.insert(0, r'\.\.\.')
Special = group(*special_args) Special = group(*special_args)
@@ -194,16 +194,16 @@ def _create_token_collection(version_info):
Funny = group(Operator, Bracket, Special) Funny = group(Operator, Bracket, Special)
# First (or only) line of ' or " string. # First (or only) line of ' or " string.
ContStr = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" + ContStr = group(StringPrefix + r"'[^\r\n'\\]*(?:\\.[^\r\n'\\]*)*" +
group("'", r'\\\r?\n'), group("'", r'\\(?:\r\n?|\n)'),
StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' + StringPrefix + r'"[^\r\n"\\]*(?:\\.[^\r\n"\\]*)*' +
group('"', r'\\\r?\n')) group('"', r'\\(?:\r\n?|\n)'))
pseudo_extra_pool = [Comment, Triple] pseudo_extra_pool = [Comment, Triple]
all_quotes = '"', "'", '"""', "'''" all_quotes = '"', "'", '"""', "'''"
if fstring_prefixes: if fstring_prefixes:
pseudo_extra_pool.append(FStringStart + group(*all_quotes)) pseudo_extra_pool.append(FStringStart + group(*all_quotes))
PseudoExtras = group(r'\\\r?\n|\Z', *pseudo_extra_pool) PseudoExtras = group(r'\\(?:\r\n?|\n)|\Z', *pseudo_extra_pool)
PseudoToken = group(Whitespace, capture=True) + \ PseudoToken = group(Whitespace, capture=True) + \
group(PseudoExtras, Number, Funny, ContStr, Name, capture=True) group(PseudoExtras, Number, Funny, ContStr, Name, capture=True)
@@ -328,7 +328,7 @@ def _find_fstring_string(fstring_stack, line, lnum, pos):
pass # The string was not found. pass # The string was not found.
new_pos += len(string) new_pos += len(string)
if allow_multiline and string.endswith('\n'): if allow_multiline and (string.endswith('\n') or string.endswith('\r')):
tos.previous_lines += string tos.previous_lines += string
string = '' string = ''
else: else:
@@ -545,7 +545,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
elif initial in single_quoted or \ elif initial in single_quoted or \
token[:2] in single_quoted or \ token[:2] in single_quoted or \
token[:3] in single_quoted: token[:3] in single_quoted:
if token[-1] == '\n': # continued string if token[-1] in '\r\n': # continued string
contstr_start = lnum, start contstr_start = lnum, start
endprog = (endpats.get(initial) or endpats.get(token[1]) endprog = (endpats.get(initial) or endpats.get(token[1])
or endpats.get(token[2])) or endpats.get(token[2]))
@@ -571,7 +571,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
indents.append(indent) indents.append(indent)
break break
yield PythonToken(NAME, token, spos, prefix) yield PythonToken(NAME, token, spos, prefix)
elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n'): # continued stmt elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n', '\\r'): # continued stmt
additional_prefix += prefix + line[start:] additional_prefix += prefix + line[start:]
break break
else: else:
@@ -593,7 +593,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
if contstr: if contstr:
yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix) yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix)
if contstr.endswith('\n'): if contstr.endswith('\n') or contstr.endswith('\r'):
new_line = True new_line = True
end_pos = lnum, max end_pos = lnum, max

View File

@@ -958,4 +958,6 @@ def test_wrong_backslash(differ):
def test_random_unicode_characters(differ): def test_random_unicode_characters(differ):
differ.initialize('') differ.initialize('')
differ.parse('\x1dĔBϞɛˁşʑ˳˻ȣſéÎ\x90̕ȟòwʘ\x1dĔBϞɛˁşʑ˳˻ȣſéÎ', parsers=1, expect_error_leaves=True) differ.parse('\x1dĔBϞɛˁşʑ˳˻ȣſéÎ\x90̕ȟòwʘ\x1dĔBϞɛˁşʑ˳˻ȣſéÎ', parsers=1, expect_error_leaves=True)
differ.parse('\r\r', parsers=1)
differ.parse("˟Ę\x05À\r rúƣ@\x8a\x15r()\n", parsers=1, expect_error_leaves=True)
differ.parse('') differ.parse('')

View File

@@ -106,14 +106,15 @@ def test_end_newlines():
@pytest.mark.parametrize(('code', 'types'), [ @pytest.mark.parametrize(('code', 'types'), [
('\r', ['error_leaf', 'endmarker']), ('\r', ['endmarker']),
('\n\r', ['error_leaf', 'endmarker']) ('\n\r', ['endmarker'])
]) ])
def test_carriage_return_at_end(code, types): def test_carriage_return_at_end(code, types):
""" """
By adding an artificial newline this creates weird side effects for By adding an artificial newline this created weird side effects for
\r at the end of files that would normally be error leafs. \r at the end of files.
""" """
tree = parse(code) tree = parse(code)
assert tree.get_code() == code assert tree.get_code() == code
assert [c.type for c in tree.children] == types assert [c.type for c in tree.children] == types
assert tree.end_pos == (len(code) + 1, 0)