Actually make \r usable

2025-12-07 13:24:39 +08:00 · 2019-01-08 20:01:57 +01:00
parent b1f613fe16
commit fbaad7883f
4 changed files with 29 additions and 30 deletions
--- a/parso/python/diff.py
+++ b/parso/python/diff.py
@@ -53,7 +53,7 @@ def _assert_valid_graph(node):
            content = previous_leaf.value + node.prefix
            previous_start_pos = previous_leaf.start_pos

-        if '\n' in content:
+        if '\n' in content or '\r' in content:
            splitted = split_lines(content)
            line = previous_start_pos[0] + len(splitted) - 1
            actual = line, len(splitted[-1])
@@ -96,7 +96,7 @@ def _ends_with_newline(leaf, suffix=''):
    else:
        typ = leaf.type

-    return typ == 'newline' or suffix.endswith('\n')
+    return typ == 'newline' or suffix.endswith('\n') or suffix.endswith('\r')


 def _flows_finished(pgen_grammar, stack):
@@ -387,8 +387,8 @@ class DiffParser(object):
                    # We are done here, only thing that can come now is an
                    # endmarker or another dedented code block.
                    typ, string, start_pos, prefix = next(tokens)
-                    if '\n' in prefix:
-                        prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix)
+                    if '\n' in prefix or '\r' in prefix:
+                        prefix = re.sub(r'(<=\n|\r)[^\n\r]+$', '', prefix)
                    else:
                        prefix = ''
                    yield PythonToken(
@@ -463,9 +463,9 @@ class _NodesTreeNode(object):
            # the next line. That line is not fully parsed at this point.
            if _ends_with_newline(last_leaf, suffix):
                line -= 1
-        line += suffix.count('\n')
+        line += len(split_lines(suffix)) - 1

-        if suffix and not suffix.endswith('\n'):
+        if suffix and not suffix.endswith('\n') and not suffix.endswith('\r'):
            # This is the end of a file (that doesn't end with a newline).
            line += 1

@@ -545,16 +545,13 @@ class _NodesTree(object):
        is_endmarker = last_leaf.type == self.endmarker_type
        self._prefix_remainder = ''
        if is_endmarker:
-            try:
-                separation = last_leaf.prefix.rindex('\n') + 1
-            except ValueError:
-                pass
-            else:
+            separation = max(last_leaf.prefix.rfind('\n'), last_leaf.prefix.rfind('\r'))
+            if separation > -1:
                # Remove the whitespace part of the prefix after a newline.
                # That is not relevant if parentheses were opened. Always parse
                # until the end of a line.
                last_leaf.prefix, self._prefix_remainder = \
-                    last_leaf.prefix[:separation], last_leaf.prefix[separation:]
+                    last_leaf.prefix[:separation + 1], last_leaf.prefix[separation + 1:]

        self.prefix = ''

@@ -597,10 +594,9 @@ class _NodesTree(object):
                # We basically removed the endmarker, but we are not allowed to
                # remove the newline at the end of the line, otherwise it's
                # going to be missing.
-                try:
-                    new_prefix = node.prefix[:node.prefix.rindex('\n') + 1]
-                except ValueError:
-                    pass
+                newline_index = max(node.prefix.rfind('\n'), node.prefix.rfind('\r'))
+                if newline_index > -1:
+                    new_prefix = node.prefix[:newline_index + 1]
                # Endmarkers just distort all the checks below. Remove them.
                break

--- a/parso/python/tokenize.py
+++ b/parso/python/tokenize.py
@@ -186,7 +186,7 @@ def _create_token_collection(version_info):

    Bracket = '[][(){}]'

-    special_args = [r'\r?\n', r'[:;.,@]']
+    special_args = [r'\r\n?', r'\n', r'[:;.,@]']
    if version_info >= (3, 0):
        special_args.insert(0, r'\.\.\.')
    Special = group(*special_args)
@@ -194,16 +194,16 @@ def _create_token_collection(version_info):
    Funny = group(Operator, Bracket, Special)

    # First (or only) line of ' or " string.
-    ContStr = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
-                    group("'", r'\\\r?\n'),
-                    StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
-                    group('"', r'\\\r?\n'))
+    ContStr = group(StringPrefix + r"'[^\r\n'\\]*(?:\\.[^\r\n'\\]*)*" +
+                    group("'", r'\\(?:\r\n?|\n)'),
+                    StringPrefix + r'"[^\r\n"\\]*(?:\\.[^\r\n"\\]*)*' +
+                    group('"', r'\\(?:\r\n?|\n)'))
    pseudo_extra_pool = [Comment, Triple]
    all_quotes = '"', "'", '"""', "'''"
    if fstring_prefixes:
        pseudo_extra_pool.append(FStringStart + group(*all_quotes))

-    PseudoExtras = group(r'\\\r?\n|\Z', *pseudo_extra_pool)
+    PseudoExtras = group(r'\\(?:\r\n?|\n)|\Z', *pseudo_extra_pool)
    PseudoToken = group(Whitespace, capture=True) + \
        group(PseudoExtras, Number, Funny, ContStr, Name, capture=True)

@@ -328,7 +328,7 @@ def _find_fstring_string(fstring_stack, line, lnum, pos):
                    pass  # The string was not found.

            new_pos += len(string)
-            if allow_multiline and string.endswith('\n'):
+            if allow_multiline and (string.endswith('\n') or string.endswith('\r')):
                tos.previous_lines += string
                string = ''
            else:
@@ -545,7 +545,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
            elif initial in single_quoted or \
                    token[:2] in single_quoted or \
                    token[:3] in single_quoted:
-                if token[-1] == '\n':                       # continued string
+                if token[-1] in '\r\n':                       # continued string
                    contstr_start = lnum, start
                    endprog = (endpats.get(initial) or endpats.get(token[1])
                               or endpats.get(token[2]))
@@ -571,7 +571,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
                                indents.append(indent)
                                break
                yield PythonToken(NAME, token, spos, prefix)
-            elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n'):  # continued stmt
+            elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n', '\\r'):  # continued stmt
                additional_prefix += prefix + line[start:]
                break
            else:
@@ -593,7 +593,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):

    if contstr:
        yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix)
-        if contstr.endswith('\n'):
+        if contstr.endswith('\n') or contstr.endswith('\r'):
            new_line = True

    end_pos = lnum, max
--- a/test/test_diff_parser.py
+++ b/test/test_diff_parser.py
@@ -958,4 +958,6 @@ def test_wrong_backslash(differ):
 def test_random_unicode_characters(differ):
    differ.initialize('')
    differ.parse('\x1dĔBϞɛˁşʑ˳˻ȣſéÎ\x90̕ȟòwʘ\x1dĔBϞɛˁşʑ˳˻ȣſéÎ', parsers=1, expect_error_leaves=True)
+    differ.parse('\r\r', parsers=1)
+    differ.parse("˟Ę\x05À\r   rúƣ@\x8a\x15r()\n", parsers=1, expect_error_leaves=True)
    differ.parse('')
--- a/test/test_get_code.py
+++ b/test/test_get_code.py
@@ -106,14 +106,15 @@ def test_end_newlines():


@pytest.mark.parametrize(('code', 'types'), [
-    ('\r', ['error_leaf', 'endmarker']),
-    ('\n\r', ['error_leaf', 'endmarker'])
+    ('\r', ['endmarker']),
+    ('\n\r', ['endmarker'])
 ])
 def test_carriage_return_at_end(code, types):
    """
-    By adding an artificial newline this creates weird side effects for
-    \r at the end of files that would normally be error leafs.
+    By adding an artificial newline this created weird side effects for
+    \r at the end of files.
    """
    tree = parse(code)
    assert tree.get_code() == code
    assert [c.type for c in tree.children] == types
+    assert tree.end_pos == (len(code) + 1, 0)