From 7000dd24d7abcaf472cf324d1875744e53ab8dcf Mon Sep 17 00:00:00 2001 From: gousaiyang Date: Sat, 29 May 2021 17:56:50 -0700 Subject: [PATCH] Fix line endings support at various locations --- parso/python/pep8.py | 14 ++++++++------ parso/python/prefix.py | 8 ++++---- parso/python/tokenize.py | 2 +- parso/utils.py | 2 +- test/test_pep8.py | 2 ++ test/test_prefix.py | 3 ++- test/test_utils.py | 4 ++++ 7 files changed, 22 insertions(+), 13 deletions(-) diff --git a/parso/python/pep8.py b/parso/python/pep8.py index b277d99..c492dae 100644 --- a/parso/python/pep8.py +++ b/parso/python/pep8.py @@ -74,7 +74,7 @@ class BracketNode(IndentationNode): parent_indentation = n.indentation next_leaf = leaf.get_next_leaf() - if '\n' in next_leaf.prefix: + if '\n' in next_leaf.prefix or '\r' in next_leaf.prefix: # This implies code like: # foobarbaz( # a, @@ -116,7 +116,7 @@ class ImplicitNode(BracketNode): self.type = IndentationTypes.IMPLICIT next_leaf = leaf.get_next_leaf() - if leaf == ':' and '\n' not in next_leaf.prefix: + if leaf == ':' and '\n' not in next_leaf.prefix and '\r' not in next_leaf.prefix: self.indentation += ' ' @@ -216,8 +216,8 @@ class PEP8Normalizer(ErrorFinder): endmarker = node.children[-1] prev = endmarker.get_previous_leaf() prefix = endmarker.prefix - if (not prefix.endswith('\n') and ( - prefix or prev is None or prev.value != '\n')): + if (not prefix.endswith('\n') and not prefix.endswith('\r') and ( + prefix or prev is None or prev.value not in {'\n', '\r\n', '\r'})): self.add_issue(endmarker, 292, "No newline at end of file") if typ in _IMPORT_TYPES: @@ -465,7 +465,8 @@ class PEP8Normalizer(ErrorFinder): + self._config.indentation: self.add_issue(part, 129, "Line with same indent as next logical block") elif indentation != should_be_indentation: - if not self._check_tabs_spaces(spacing) and part.value != '\n': + if not self._check_tabs_spaces(spacing) and part.value not in \ + {'\n', '\r\n', '\r'}: if value in '])}': if node.type == IndentationTypes.VERTICAL_BRACKET: self.add_issue( @@ -652,7 +653,8 @@ class PEP8Normalizer(ErrorFinder): else: prev_spacing = self._previous_spacing if prev in _ALLOW_SPACE and spaces != prev_spacing.value \ - and '\n' not in self._previous_leaf.prefix: + and '\n' not in self._previous_leaf.prefix \ + and '\r' not in self._previous_leaf.prefix: message = "Whitespace before operator doesn't match with whitespace after" self.add_issue(spacing, 229, message) diff --git a/parso/python/prefix.py b/parso/python/prefix.py index 1e08b41..6b8d59e 100644 --- a/parso/python/prefix.py +++ b/parso/python/prefix.py @@ -18,7 +18,7 @@ class PrefixPart: @property def end_pos(self) -> Tuple[int, int]: - if self.value.endswith('\n'): + if self.value.endswith('\n') or self.value.endswith('\r'): return self.start_pos[0] + 1, 0 if self.value == unicode_bom: # The bom doesn't have a length at the start of a Python file. @@ -50,8 +50,8 @@ class PrefixPart: _comment = r'#[^\n\r\f]*' -_backslash = r'\\\r?\n' -_newline = r'\r?\n' +_backslash = r'\\\r?\n|\\\r' +_newline = r'\r?\n|\r' _form_feed = r'\f' _only_spacing = '$' _spacing = r'[ \t]*' @@ -94,7 +94,7 @@ def split_prefix(leaf, start_pos): bom = True start = match.end(0) - if value.endswith('\n'): + if value.endswith('\n') or value.endswith('\r'): line += 1 column = -start diff --git a/parso/python/tokenize.py b/parso/python/tokenize.py index b65eaac..e3ffe44 100644 --- a/parso/python/tokenize.py +++ b/parso/python/tokenize.py @@ -548,7 +548,7 @@ def tokenize_lines( additional_prefix = prefix + token new_line = True elif initial == '#': # Comments - assert not token.endswith("\n") + assert not token.endswith("\n") and not token.endswith("\r") if fstring_stack and fstring_stack[-1].is_in_expr(): # `#` is not allowed in f-string expressions yield PythonToken(ERRORTOKEN, initial, spos, prefix) diff --git a/parso/utils.py b/parso/utils.py index e3d9038..6ad1ea2 100644 --- a/parso/utils.py +++ b/parso/utils.py @@ -92,7 +92,7 @@ def python_bytes_to_unicode( # UTF-8 byte-order mark return 'utf-8' - first_two_lines = re.match(br'(?:[^\n]*\n){0,2}', source).group(0) + first_two_lines = re.match(br'(?:[^\r\n]*(?:\r\n|\r|\n)){0,2}', source).group(0) possible_encoding = re.search(br"coding[=:]\s*([-\w.]+)", first_two_lines) if possible_encoding: diff --git a/test/test_pep8.py b/test/test_pep8.py index 43ae767..06cffb4 100644 --- a/test/test_pep8.py +++ b/test/test_pep8.py @@ -15,6 +15,8 @@ def test_eof_newline(): assert issue.code == 292 assert not issues('asdf = 1\n') + assert not issues('asdf = 1\r\n') + assert not issues('asdf = 1\r') assert_issue('asdf = 1') assert_issue('asdf = 1\n# foo') assert_issue('# foobar') diff --git a/test/test_prefix.py b/test/test_prefix.py index a6e254b..58c1dcf 100644 --- a/test/test_prefix.py +++ b/test/test_prefix.py @@ -19,6 +19,7 @@ unicode_bom = BOM_UTF8.decode('utf-8') (' \f ', ['\f', ' ']), (' \f ', ['\f', ' ']), (' \r\n', ['\r\n', '']), + (' \r', ['\r', '']), ('\\\n', ['\\\n', '']), ('\\\r\n', ['\\\r\n', '']), ('\t\t\n\t', ['\n', '\t']), @@ -34,7 +35,7 @@ def test_simple_prefix_splitting(string, tokens): assert pt.value == expected # Calculate the estimated end_pos - if expected.endswith('\n'): + if expected.endswith('\n') or expected.endswith('\r'): end_pos = start_pos[0] + 1, 0 else: end_pos = start_pos[0], start_pos[1] + len(expected) + len(pt.spacing) diff --git a/test/test_utils.py b/test/test_utils.py index 06fbe79..300a54e 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -74,6 +74,10 @@ def test_utf8_bom(): ('code', 'errors'), [ (b'# coding: wtf-12\nfoo', 'strict'), (b'# coding: wtf-12\nfoo', 'replace'), + (b'# coding: wtf-12\r\nfoo', 'strict'), + (b'# coding: wtf-12\r\nfoo', 'replace'), + (b'# coding: wtf-12\rfoo', 'strict'), + (b'# coding: wtf-12\rfoo', 'replace'), ] ) def test_bytes_to_unicode_failing_encoding(code, errors):