Remove support for specialized treatment of form feeds

This is a very intentional change. Previously form feeds were handled very
poorly and sometimes where not counted as indentation. This obviously makes
sense. But at the same time indentation is very tricky to deal with (both for
editors and parso).

Especially in the diff parser this led to a lot of very weird issues. The
decision probably makes sense since:

1. Almost nobody uses form feeds in the first place.
2. People that use form feeds like Barry Warsaw often put a newline ater them.
   (e.g Python's email.__init__)
3. If you write an editor you want to be able to identify a unicode character
   with a clear line/column. This would not be the case if form feeds were just
   ignored when counting.

Form feeds will still work in Jedi, will not cause parse errors and in general
you should be fine using them. It might just cause Jedi to count them as
indentation **if** you use it like '\f  foo()'. This is however confusing for
most editors anyway. It leads to a weird display e.g. in VIM, even if it's
perfectly valid code in Python.

Since parso is a code analysis parser and not the languages parser I think it's
fine to ignore this edge case.
This commit is contained in:
Dave Halter
2020-04-04 15:38:10 +02:00
parent 1047204654
commit 734a4b0e67
5 changed files with 37 additions and 14 deletions

View File

@@ -602,7 +602,8 @@ class _NodesTree(object):
is_endmarker = last_leaf.type == 'endmarker' is_endmarker = last_leaf.type == 'endmarker'
self._prefix_remainder = '' self._prefix_remainder = ''
if is_endmarker: if is_endmarker:
separation = max(last_leaf.prefix.rfind('\n'), last_leaf.prefix.rfind('\r')) prefix = last_leaf.prefix
separation = max(prefix.rfind('\n'), prefix.rfind('\r'))
if separation > -1: if separation > -1:
# Remove the whitespace part of the prefix after a newline. # Remove the whitespace part of the prefix after a newline.
# That is not relevant if parentheses were opened. Always parse # That is not relevant if parentheses were opened. Always parse

View File

@@ -522,12 +522,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0), indents=None):
if new_line and initial not in '\r\n#' and (initial != '\\' or pseudomatch is None): if new_line and initial not in '\r\n#' and (initial != '\\' or pseudomatch is None):
new_line = False new_line = False
if paren_level == 0 and not fstring_stack: if paren_level == 0 and not fstring_stack:
i = 0
indent_start = start indent_start = start
while line[i] == '\f':
i += 1
# TODO don't we need to change spos as well?
indent_start -= 1
if indent_start > indents[-1]: if indent_start > indents[-1]:
yield PythonToken(INDENT, '', spos, '') yield PythonToken(INDENT, '', spos, '')
indents.append(indent_start) indents.append(indent_start)

View File

@@ -1406,3 +1406,24 @@ def test_error_dedent_in_function(differ):
''') ''')
differ.initialize(code1) differ.initialize(code1)
differ.parse(code2, parsers=ANY, copies=ANY, expect_error_leaves=True) differ.parse(code2, parsers=ANY, copies=ANY, expect_error_leaves=True)
def test_x(differ):
code1 = dedent('''\
@bla
async def foo():
1
yield from []
return
return ''
''')
code2 = dedent('''\
@bla
async def foo():
1
\x0cimport
return
return ''
''')
differ.initialize(code1)
differ.parse(code2, parsers=ANY, copies=ANY, expect_error_leaves=True)

View File

@@ -29,13 +29,17 @@ def _invalid_syntax(code, version=None, **kwargs):
print(module.children) print(module.children)
def test_formfeed(each_py2_version): def test_formfeed(each_version):
s = u"""print 1\n\x0Cprint 2\n""" s = u"foo\n\x0c\nfoo\n"
t = _parse(s, each_py2_version) t = _parse(s, each_version)
assert t.children[0].children[0].type == 'print_stmt' assert t.children[0].children[0].type == 'name'
assert t.children[1].children[0].type == 'print_stmt' assert t.children[1].children[0].type == 'name'
s = u"""1\n\x0C\x0C2\n""" s = u"1\n\x0c\x0c\n2\n"
t = _parse(s, each_py2_version) t = _parse(s, each_version)
with pytest.raises(ParserSyntaxError):
s = u"\n\x0c2\n"
_parse(s, each_version)
def test_matrix_multiplication_operator(works_ge_py35): def test_matrix_multiplication_operator(works_ge_py35):

View File

@@ -332,11 +332,13 @@ def test_brackets_no_indentation():
def test_form_feed(): def test_form_feed():
error_token, endmarker = _get_token_list(dedent('''\ indent, error_token, dedent_, endmarker = _get_token_list(dedent('''\
\f"""''')) \f"""'''))
assert error_token.prefix == '\f' assert error_token.prefix == '\f'
assert error_token.string == '"""' assert error_token.string == '"""'
assert endmarker.prefix == '' assert endmarker.prefix == ''
assert indent.type == INDENT
assert dedent_.type == DEDENT
def test_carriage_return(): def test_carriage_return():