diff --git a/parso/python/diff.py b/parso/python/diff.py index c538091..25687fd 100644 --- a/parso/python/diff.py +++ b/parso/python/diff.py @@ -53,7 +53,7 @@ def _assert_valid_graph(node): content = previous_leaf.value + node.prefix previous_start_pos = previous_leaf.start_pos - if '\n' in content: + if '\n' in content or '\r' in content: splitted = split_lines(content) line = previous_start_pos[0] + len(splitted) - 1 actual = line, len(splitted[-1]) @@ -96,7 +96,7 @@ def _ends_with_newline(leaf, suffix=''): else: typ = leaf.type - return typ == 'newline' or suffix.endswith('\n') + return typ == 'newline' or suffix.endswith('\n') or suffix.endswith('\r') def _flows_finished(pgen_grammar, stack): @@ -387,8 +387,8 @@ class DiffParser(object): # We are done here, only thing that can come now is an # endmarker or another dedented code block. typ, string, start_pos, prefix = next(tokens) - if '\n' in prefix: - prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix) + if '\n' in prefix or '\r' in prefix: + prefix = re.sub(r'(<=\n|\r)[^\n\r]+$', '', prefix) else: prefix = '' yield PythonToken( @@ -463,9 +463,9 @@ class _NodesTreeNode(object): # the next line. That line is not fully parsed at this point. if _ends_with_newline(last_leaf, suffix): line -= 1 - line += suffix.count('\n') + line += len(split_lines(suffix)) - 1 - if suffix and not suffix.endswith('\n'): + if suffix and not suffix.endswith('\n') and not suffix.endswith('\r'): # This is the end of a file (that doesn't end with a newline). line += 1 @@ -545,16 +545,13 @@ class _NodesTree(object): is_endmarker = last_leaf.type == self.endmarker_type self._prefix_remainder = '' if is_endmarker: - try: - separation = last_leaf.prefix.rindex('\n') + 1 - except ValueError: - pass - else: + separation = max(last_leaf.prefix.rfind('\n'), last_leaf.prefix.rfind('\r')) + if separation > -1: # Remove the whitespace part of the prefix after a newline. # That is not relevant if parentheses were opened. Always parse # until the end of a line. last_leaf.prefix, self._prefix_remainder = \ - last_leaf.prefix[:separation], last_leaf.prefix[separation:] + last_leaf.prefix[:separation + 1], last_leaf.prefix[separation + 1:] self.prefix = '' @@ -597,10 +594,9 @@ class _NodesTree(object): # We basically removed the endmarker, but we are not allowed to # remove the newline at the end of the line, otherwise it's # going to be missing. - try: - new_prefix = node.prefix[:node.prefix.rindex('\n') + 1] - except ValueError: - pass + newline_index = max(node.prefix.rfind('\n'), node.prefix.rfind('\r')) + if newline_index > -1: + new_prefix = node.prefix[:newline_index + 1] # Endmarkers just distort all the checks below. Remove them. break diff --git a/parso/python/tokenize.py b/parso/python/tokenize.py index 4d6a71e..2b72cb4 100644 --- a/parso/python/tokenize.py +++ b/parso/python/tokenize.py @@ -186,7 +186,7 @@ def _create_token_collection(version_info): Bracket = '[][(){}]' - special_args = [r'\r?\n', r'[:;.,@]'] + special_args = [r'\r\n?', r'\n', r'[:;.,@]'] if version_info >= (3, 0): special_args.insert(0, r'\.\.\.') Special = group(*special_args) @@ -194,16 +194,16 @@ def _create_token_collection(version_info): Funny = group(Operator, Bracket, Special) # First (or only) line of ' or " string. - ContStr = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" + - group("'", r'\\\r?\n'), - StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' + - group('"', r'\\\r?\n')) + ContStr = group(StringPrefix + r"'[^\r\n'\\]*(?:\\.[^\r\n'\\]*)*" + + group("'", r'\\(?:\r\n?|\n)'), + StringPrefix + r'"[^\r\n"\\]*(?:\\.[^\r\n"\\]*)*' + + group('"', r'\\(?:\r\n?|\n)')) pseudo_extra_pool = [Comment, Triple] all_quotes = '"', "'", '"""', "'''" if fstring_prefixes: pseudo_extra_pool.append(FStringStart + group(*all_quotes)) - PseudoExtras = group(r'\\\r?\n|\Z', *pseudo_extra_pool) + PseudoExtras = group(r'\\(?:\r\n?|\n)|\Z', *pseudo_extra_pool) PseudoToken = group(Whitespace, capture=True) + \ group(PseudoExtras, Number, Funny, ContStr, Name, capture=True) @@ -328,7 +328,7 @@ def _find_fstring_string(fstring_stack, line, lnum, pos): pass # The string was not found. new_pos += len(string) - if allow_multiline and string.endswith('\n'): + if allow_multiline and (string.endswith('\n') or string.endswith('\r')): tos.previous_lines += string string = '' else: @@ -545,7 +545,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): elif initial in single_quoted or \ token[:2] in single_quoted or \ token[:3] in single_quoted: - if token[-1] == '\n': # continued string + if token[-1] in '\r\n': # continued string contstr_start = lnum, start endprog = (endpats.get(initial) or endpats.get(token[1]) or endpats.get(token[2])) @@ -571,7 +571,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): indents.append(indent) break yield PythonToken(NAME, token, spos, prefix) - elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n'): # continued stmt + elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n', '\\r'): # continued stmt additional_prefix += prefix + line[start:] break else: @@ -593,7 +593,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): if contstr: yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix) - if contstr.endswith('\n'): + if contstr.endswith('\n') or contstr.endswith('\r'): new_line = True end_pos = lnum, max diff --git a/test/test_diff_parser.py b/test/test_diff_parser.py index 172e3df..e36f4a1 100644 --- a/test/test_diff_parser.py +++ b/test/test_diff_parser.py @@ -958,4 +958,6 @@ def test_wrong_backslash(differ): def test_random_unicode_characters(differ): differ.initialize('') differ.parse('\x1dĔBϞɛˁşʑ˳˻ȣſéÎ\x90̕ȟòwʘ\x1dĔBϞɛˁşʑ˳˻ȣſéÎ', parsers=1, expect_error_leaves=True) + differ.parse('\r\r', parsers=1) + differ.parse("˟Ę\x05À\r rúƣ@\x8a\x15r()\n", parsers=1, expect_error_leaves=True) differ.parse('') diff --git a/test/test_get_code.py b/test/test_get_code.py index 51e6401..2f2260d 100644 --- a/test/test_get_code.py +++ b/test/test_get_code.py @@ -106,14 +106,15 @@ def test_end_newlines(): @pytest.mark.parametrize(('code', 'types'), [ - ('\r', ['error_leaf', 'endmarker']), - ('\n\r', ['error_leaf', 'endmarker']) + ('\r', ['endmarker']), + ('\n\r', ['endmarker']) ]) def test_carriage_return_at_end(code, types): """ - By adding an artificial newline this creates weird side effects for - \r at the end of files that would normally be error leafs. + By adding an artificial newline this created weird side effects for + \r at the end of files. """ tree = parse(code) assert tree.get_code() == code assert [c.type for c in tree.children] == types + assert tree.end_pos == (len(code) + 1, 0)