mirror of
https://github.com/davidhalter/parso.git
synced 2025-12-07 13:24:39 +08:00
Actually make \r usable
This commit is contained in:
@@ -53,7 +53,7 @@ def _assert_valid_graph(node):
|
||||
content = previous_leaf.value + node.prefix
|
||||
previous_start_pos = previous_leaf.start_pos
|
||||
|
||||
if '\n' in content:
|
||||
if '\n' in content or '\r' in content:
|
||||
splitted = split_lines(content)
|
||||
line = previous_start_pos[0] + len(splitted) - 1
|
||||
actual = line, len(splitted[-1])
|
||||
@@ -96,7 +96,7 @@ def _ends_with_newline(leaf, suffix=''):
|
||||
else:
|
||||
typ = leaf.type
|
||||
|
||||
return typ == 'newline' or suffix.endswith('\n')
|
||||
return typ == 'newline' or suffix.endswith('\n') or suffix.endswith('\r')
|
||||
|
||||
|
||||
def _flows_finished(pgen_grammar, stack):
|
||||
@@ -387,8 +387,8 @@ class DiffParser(object):
|
||||
# We are done here, only thing that can come now is an
|
||||
# endmarker or another dedented code block.
|
||||
typ, string, start_pos, prefix = next(tokens)
|
||||
if '\n' in prefix:
|
||||
prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix)
|
||||
if '\n' in prefix or '\r' in prefix:
|
||||
prefix = re.sub(r'(<=\n|\r)[^\n\r]+$', '', prefix)
|
||||
else:
|
||||
prefix = ''
|
||||
yield PythonToken(
|
||||
@@ -463,9 +463,9 @@ class _NodesTreeNode(object):
|
||||
# the next line. That line is not fully parsed at this point.
|
||||
if _ends_with_newline(last_leaf, suffix):
|
||||
line -= 1
|
||||
line += suffix.count('\n')
|
||||
line += len(split_lines(suffix)) - 1
|
||||
|
||||
if suffix and not suffix.endswith('\n'):
|
||||
if suffix and not suffix.endswith('\n') and not suffix.endswith('\r'):
|
||||
# This is the end of a file (that doesn't end with a newline).
|
||||
line += 1
|
||||
|
||||
@@ -545,16 +545,13 @@ class _NodesTree(object):
|
||||
is_endmarker = last_leaf.type == self.endmarker_type
|
||||
self._prefix_remainder = ''
|
||||
if is_endmarker:
|
||||
try:
|
||||
separation = last_leaf.prefix.rindex('\n') + 1
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
separation = max(last_leaf.prefix.rfind('\n'), last_leaf.prefix.rfind('\r'))
|
||||
if separation > -1:
|
||||
# Remove the whitespace part of the prefix after a newline.
|
||||
# That is not relevant if parentheses were opened. Always parse
|
||||
# until the end of a line.
|
||||
last_leaf.prefix, self._prefix_remainder = \
|
||||
last_leaf.prefix[:separation], last_leaf.prefix[separation:]
|
||||
last_leaf.prefix[:separation + 1], last_leaf.prefix[separation + 1:]
|
||||
|
||||
self.prefix = ''
|
||||
|
||||
@@ -597,10 +594,9 @@ class _NodesTree(object):
|
||||
# We basically removed the endmarker, but we are not allowed to
|
||||
# remove the newline at the end of the line, otherwise it's
|
||||
# going to be missing.
|
||||
try:
|
||||
new_prefix = node.prefix[:node.prefix.rindex('\n') + 1]
|
||||
except ValueError:
|
||||
pass
|
||||
newline_index = max(node.prefix.rfind('\n'), node.prefix.rfind('\r'))
|
||||
if newline_index > -1:
|
||||
new_prefix = node.prefix[:newline_index + 1]
|
||||
# Endmarkers just distort all the checks below. Remove them.
|
||||
break
|
||||
|
||||
|
||||
@@ -186,7 +186,7 @@ def _create_token_collection(version_info):
|
||||
|
||||
Bracket = '[][(){}]'
|
||||
|
||||
special_args = [r'\r?\n', r'[:;.,@]']
|
||||
special_args = [r'\r\n?', r'\n', r'[:;.,@]']
|
||||
if version_info >= (3, 0):
|
||||
special_args.insert(0, r'\.\.\.')
|
||||
Special = group(*special_args)
|
||||
@@ -194,16 +194,16 @@ def _create_token_collection(version_info):
|
||||
Funny = group(Operator, Bracket, Special)
|
||||
|
||||
# First (or only) line of ' or " string.
|
||||
ContStr = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
|
||||
group("'", r'\\\r?\n'),
|
||||
StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
|
||||
group('"', r'\\\r?\n'))
|
||||
ContStr = group(StringPrefix + r"'[^\r\n'\\]*(?:\\.[^\r\n'\\]*)*" +
|
||||
group("'", r'\\(?:\r\n?|\n)'),
|
||||
StringPrefix + r'"[^\r\n"\\]*(?:\\.[^\r\n"\\]*)*' +
|
||||
group('"', r'\\(?:\r\n?|\n)'))
|
||||
pseudo_extra_pool = [Comment, Triple]
|
||||
all_quotes = '"', "'", '"""', "'''"
|
||||
if fstring_prefixes:
|
||||
pseudo_extra_pool.append(FStringStart + group(*all_quotes))
|
||||
|
||||
PseudoExtras = group(r'\\\r?\n|\Z', *pseudo_extra_pool)
|
||||
PseudoExtras = group(r'\\(?:\r\n?|\n)|\Z', *pseudo_extra_pool)
|
||||
PseudoToken = group(Whitespace, capture=True) + \
|
||||
group(PseudoExtras, Number, Funny, ContStr, Name, capture=True)
|
||||
|
||||
@@ -328,7 +328,7 @@ def _find_fstring_string(fstring_stack, line, lnum, pos):
|
||||
pass # The string was not found.
|
||||
|
||||
new_pos += len(string)
|
||||
if allow_multiline and string.endswith('\n'):
|
||||
if allow_multiline and (string.endswith('\n') or string.endswith('\r')):
|
||||
tos.previous_lines += string
|
||||
string = ''
|
||||
else:
|
||||
@@ -545,7 +545,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
elif initial in single_quoted or \
|
||||
token[:2] in single_quoted or \
|
||||
token[:3] in single_quoted:
|
||||
if token[-1] == '\n': # continued string
|
||||
if token[-1] in '\r\n': # continued string
|
||||
contstr_start = lnum, start
|
||||
endprog = (endpats.get(initial) or endpats.get(token[1])
|
||||
or endpats.get(token[2]))
|
||||
@@ -571,7 +571,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
indents.append(indent)
|
||||
break
|
||||
yield PythonToken(NAME, token, spos, prefix)
|
||||
elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n'): # continued stmt
|
||||
elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n', '\\r'): # continued stmt
|
||||
additional_prefix += prefix + line[start:]
|
||||
break
|
||||
else:
|
||||
@@ -593,7 +593,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
|
||||
if contstr:
|
||||
yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix)
|
||||
if contstr.endswith('\n'):
|
||||
if contstr.endswith('\n') or contstr.endswith('\r'):
|
||||
new_line = True
|
||||
|
||||
end_pos = lnum, max
|
||||
|
||||
@@ -958,4 +958,6 @@ def test_wrong_backslash(differ):
|
||||
def test_random_unicode_characters(differ):
|
||||
differ.initialize('')
|
||||
differ.parse('\x1dĔBϞɛˁşʑ˳˻ȣſéÎ\x90̕ȟòwʘ\x1dĔBϞɛˁşʑ˳˻ȣſéÎ', parsers=1, expect_error_leaves=True)
|
||||
differ.parse('\r\r', parsers=1)
|
||||
differ.parse("˟Ę\x05À\r rúƣ@\x8a\x15r()\n", parsers=1, expect_error_leaves=True)
|
||||
differ.parse('')
|
||||
|
||||
@@ -106,14 +106,15 @@ def test_end_newlines():
|
||||
|
||||
|
||||
@pytest.mark.parametrize(('code', 'types'), [
|
||||
('\r', ['error_leaf', 'endmarker']),
|
||||
('\n\r', ['error_leaf', 'endmarker'])
|
||||
('\r', ['endmarker']),
|
||||
('\n\r', ['endmarker'])
|
||||
])
|
||||
def test_carriage_return_at_end(code, types):
|
||||
"""
|
||||
By adding an artificial newline this creates weird side effects for
|
||||
\r at the end of files that would normally be error leafs.
|
||||
By adding an artificial newline this created weird side effects for
|
||||
\r at the end of files.
|
||||
"""
|
||||
tree = parse(code)
|
||||
assert tree.get_code() == code
|
||||
assert [c.type for c in tree.children] == types
|
||||
assert tree.end_pos == (len(code) + 1, 0)
|
||||
|
||||
Reference in New Issue
Block a user