mirror of
https://github.com/davidhalter/parso.git
synced 2025-12-07 21:34:32 +08:00
Actually make \r usable
This commit is contained in:
@@ -53,7 +53,7 @@ def _assert_valid_graph(node):
|
|||||||
content = previous_leaf.value + node.prefix
|
content = previous_leaf.value + node.prefix
|
||||||
previous_start_pos = previous_leaf.start_pos
|
previous_start_pos = previous_leaf.start_pos
|
||||||
|
|
||||||
if '\n' in content:
|
if '\n' in content or '\r' in content:
|
||||||
splitted = split_lines(content)
|
splitted = split_lines(content)
|
||||||
line = previous_start_pos[0] + len(splitted) - 1
|
line = previous_start_pos[0] + len(splitted) - 1
|
||||||
actual = line, len(splitted[-1])
|
actual = line, len(splitted[-1])
|
||||||
@@ -96,7 +96,7 @@ def _ends_with_newline(leaf, suffix=''):
|
|||||||
else:
|
else:
|
||||||
typ = leaf.type
|
typ = leaf.type
|
||||||
|
|
||||||
return typ == 'newline' or suffix.endswith('\n')
|
return typ == 'newline' or suffix.endswith('\n') or suffix.endswith('\r')
|
||||||
|
|
||||||
|
|
||||||
def _flows_finished(pgen_grammar, stack):
|
def _flows_finished(pgen_grammar, stack):
|
||||||
@@ -387,8 +387,8 @@ class DiffParser(object):
|
|||||||
# We are done here, only thing that can come now is an
|
# We are done here, only thing that can come now is an
|
||||||
# endmarker or another dedented code block.
|
# endmarker or another dedented code block.
|
||||||
typ, string, start_pos, prefix = next(tokens)
|
typ, string, start_pos, prefix = next(tokens)
|
||||||
if '\n' in prefix:
|
if '\n' in prefix or '\r' in prefix:
|
||||||
prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix)
|
prefix = re.sub(r'(<=\n|\r)[^\n\r]+$', '', prefix)
|
||||||
else:
|
else:
|
||||||
prefix = ''
|
prefix = ''
|
||||||
yield PythonToken(
|
yield PythonToken(
|
||||||
@@ -463,9 +463,9 @@ class _NodesTreeNode(object):
|
|||||||
# the next line. That line is not fully parsed at this point.
|
# the next line. That line is not fully parsed at this point.
|
||||||
if _ends_with_newline(last_leaf, suffix):
|
if _ends_with_newline(last_leaf, suffix):
|
||||||
line -= 1
|
line -= 1
|
||||||
line += suffix.count('\n')
|
line += len(split_lines(suffix)) - 1
|
||||||
|
|
||||||
if suffix and not suffix.endswith('\n'):
|
if suffix and not suffix.endswith('\n') and not suffix.endswith('\r'):
|
||||||
# This is the end of a file (that doesn't end with a newline).
|
# This is the end of a file (that doesn't end with a newline).
|
||||||
line += 1
|
line += 1
|
||||||
|
|
||||||
@@ -545,16 +545,13 @@ class _NodesTree(object):
|
|||||||
is_endmarker = last_leaf.type == self.endmarker_type
|
is_endmarker = last_leaf.type == self.endmarker_type
|
||||||
self._prefix_remainder = ''
|
self._prefix_remainder = ''
|
||||||
if is_endmarker:
|
if is_endmarker:
|
||||||
try:
|
separation = max(last_leaf.prefix.rfind('\n'), last_leaf.prefix.rfind('\r'))
|
||||||
separation = last_leaf.prefix.rindex('\n') + 1
|
if separation > -1:
|
||||||
except ValueError:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
# Remove the whitespace part of the prefix after a newline.
|
# Remove the whitespace part of the prefix after a newline.
|
||||||
# That is not relevant if parentheses were opened. Always parse
|
# That is not relevant if parentheses were opened. Always parse
|
||||||
# until the end of a line.
|
# until the end of a line.
|
||||||
last_leaf.prefix, self._prefix_remainder = \
|
last_leaf.prefix, self._prefix_remainder = \
|
||||||
last_leaf.prefix[:separation], last_leaf.prefix[separation:]
|
last_leaf.prefix[:separation + 1], last_leaf.prefix[separation + 1:]
|
||||||
|
|
||||||
self.prefix = ''
|
self.prefix = ''
|
||||||
|
|
||||||
@@ -597,10 +594,9 @@ class _NodesTree(object):
|
|||||||
# We basically removed the endmarker, but we are not allowed to
|
# We basically removed the endmarker, but we are not allowed to
|
||||||
# remove the newline at the end of the line, otherwise it's
|
# remove the newline at the end of the line, otherwise it's
|
||||||
# going to be missing.
|
# going to be missing.
|
||||||
try:
|
newline_index = max(node.prefix.rfind('\n'), node.prefix.rfind('\r'))
|
||||||
new_prefix = node.prefix[:node.prefix.rindex('\n') + 1]
|
if newline_index > -1:
|
||||||
except ValueError:
|
new_prefix = node.prefix[:newline_index + 1]
|
||||||
pass
|
|
||||||
# Endmarkers just distort all the checks below. Remove them.
|
# Endmarkers just distort all the checks below. Remove them.
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|||||||
@@ -186,7 +186,7 @@ def _create_token_collection(version_info):
|
|||||||
|
|
||||||
Bracket = '[][(){}]'
|
Bracket = '[][(){}]'
|
||||||
|
|
||||||
special_args = [r'\r?\n', r'[:;.,@]']
|
special_args = [r'\r\n?', r'\n', r'[:;.,@]']
|
||||||
if version_info >= (3, 0):
|
if version_info >= (3, 0):
|
||||||
special_args.insert(0, r'\.\.\.')
|
special_args.insert(0, r'\.\.\.')
|
||||||
Special = group(*special_args)
|
Special = group(*special_args)
|
||||||
@@ -194,16 +194,16 @@ def _create_token_collection(version_info):
|
|||||||
Funny = group(Operator, Bracket, Special)
|
Funny = group(Operator, Bracket, Special)
|
||||||
|
|
||||||
# First (or only) line of ' or " string.
|
# First (or only) line of ' or " string.
|
||||||
ContStr = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
|
ContStr = group(StringPrefix + r"'[^\r\n'\\]*(?:\\.[^\r\n'\\]*)*" +
|
||||||
group("'", r'\\\r?\n'),
|
group("'", r'\\(?:\r\n?|\n)'),
|
||||||
StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
|
StringPrefix + r'"[^\r\n"\\]*(?:\\.[^\r\n"\\]*)*' +
|
||||||
group('"', r'\\\r?\n'))
|
group('"', r'\\(?:\r\n?|\n)'))
|
||||||
pseudo_extra_pool = [Comment, Triple]
|
pseudo_extra_pool = [Comment, Triple]
|
||||||
all_quotes = '"', "'", '"""', "'''"
|
all_quotes = '"', "'", '"""', "'''"
|
||||||
if fstring_prefixes:
|
if fstring_prefixes:
|
||||||
pseudo_extra_pool.append(FStringStart + group(*all_quotes))
|
pseudo_extra_pool.append(FStringStart + group(*all_quotes))
|
||||||
|
|
||||||
PseudoExtras = group(r'\\\r?\n|\Z', *pseudo_extra_pool)
|
PseudoExtras = group(r'\\(?:\r\n?|\n)|\Z', *pseudo_extra_pool)
|
||||||
PseudoToken = group(Whitespace, capture=True) + \
|
PseudoToken = group(Whitespace, capture=True) + \
|
||||||
group(PseudoExtras, Number, Funny, ContStr, Name, capture=True)
|
group(PseudoExtras, Number, Funny, ContStr, Name, capture=True)
|
||||||
|
|
||||||
@@ -328,7 +328,7 @@ def _find_fstring_string(fstring_stack, line, lnum, pos):
|
|||||||
pass # The string was not found.
|
pass # The string was not found.
|
||||||
|
|
||||||
new_pos += len(string)
|
new_pos += len(string)
|
||||||
if allow_multiline and string.endswith('\n'):
|
if allow_multiline and (string.endswith('\n') or string.endswith('\r')):
|
||||||
tos.previous_lines += string
|
tos.previous_lines += string
|
||||||
string = ''
|
string = ''
|
||||||
else:
|
else:
|
||||||
@@ -545,7 +545,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
|||||||
elif initial in single_quoted or \
|
elif initial in single_quoted or \
|
||||||
token[:2] in single_quoted or \
|
token[:2] in single_quoted or \
|
||||||
token[:3] in single_quoted:
|
token[:3] in single_quoted:
|
||||||
if token[-1] == '\n': # continued string
|
if token[-1] in '\r\n': # continued string
|
||||||
contstr_start = lnum, start
|
contstr_start = lnum, start
|
||||||
endprog = (endpats.get(initial) or endpats.get(token[1])
|
endprog = (endpats.get(initial) or endpats.get(token[1])
|
||||||
or endpats.get(token[2]))
|
or endpats.get(token[2]))
|
||||||
@@ -571,7 +571,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
|||||||
indents.append(indent)
|
indents.append(indent)
|
||||||
break
|
break
|
||||||
yield PythonToken(NAME, token, spos, prefix)
|
yield PythonToken(NAME, token, spos, prefix)
|
||||||
elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n'): # continued stmt
|
elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n', '\\r'): # continued stmt
|
||||||
additional_prefix += prefix + line[start:]
|
additional_prefix += prefix + line[start:]
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
@@ -593,7 +593,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
|||||||
|
|
||||||
if contstr:
|
if contstr:
|
||||||
yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix)
|
yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix)
|
||||||
if contstr.endswith('\n'):
|
if contstr.endswith('\n') or contstr.endswith('\r'):
|
||||||
new_line = True
|
new_line = True
|
||||||
|
|
||||||
end_pos = lnum, max
|
end_pos = lnum, max
|
||||||
|
|||||||
@@ -958,4 +958,6 @@ def test_wrong_backslash(differ):
|
|||||||
def test_random_unicode_characters(differ):
|
def test_random_unicode_characters(differ):
|
||||||
differ.initialize('')
|
differ.initialize('')
|
||||||
differ.parse('\x1dĔBϞɛˁşʑ˳˻ȣſéÎ\x90̕ȟòwʘ\x1dĔBϞɛˁşʑ˳˻ȣſéÎ', parsers=1, expect_error_leaves=True)
|
differ.parse('\x1dĔBϞɛˁşʑ˳˻ȣſéÎ\x90̕ȟòwʘ\x1dĔBϞɛˁşʑ˳˻ȣſéÎ', parsers=1, expect_error_leaves=True)
|
||||||
|
differ.parse('\r\r', parsers=1)
|
||||||
|
differ.parse("˟Ę\x05À\r rúƣ@\x8a\x15r()\n", parsers=1, expect_error_leaves=True)
|
||||||
differ.parse('')
|
differ.parse('')
|
||||||
|
|||||||
@@ -106,14 +106,15 @@ def test_end_newlines():
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(('code', 'types'), [
|
@pytest.mark.parametrize(('code', 'types'), [
|
||||||
('\r', ['error_leaf', 'endmarker']),
|
('\r', ['endmarker']),
|
||||||
('\n\r', ['error_leaf', 'endmarker'])
|
('\n\r', ['endmarker'])
|
||||||
])
|
])
|
||||||
def test_carriage_return_at_end(code, types):
|
def test_carriage_return_at_end(code, types):
|
||||||
"""
|
"""
|
||||||
By adding an artificial newline this creates weird side effects for
|
By adding an artificial newline this created weird side effects for
|
||||||
\r at the end of files that would normally be error leafs.
|
\r at the end of files.
|
||||||
"""
|
"""
|
||||||
tree = parse(code)
|
tree = parse(code)
|
||||||
assert tree.get_code() == code
|
assert tree.get_code() == code
|
||||||
assert [c.type for c in tree.children] == types
|
assert [c.type for c in tree.children] == types
|
||||||
|
assert tree.end_pos == (len(code) + 1, 0)
|
||||||
|
|||||||
Reference in New Issue
Block a user