Remove the remove_newline hack and build that stuff into error recovery instead.

Tests are passing except for diff parser tests.
This commit is contained in:
Dave Halter
2017-05-31 21:24:24 +02:00
parent b367058af6
commit 814b16cc6c
10 changed files with 73 additions and 96 deletions

View File

@@ -5,16 +5,18 @@ versions, file caching, round-trips and other stuff:
>>> from parso import load_grammar
>>> grammar = load_grammar(version='2.7')
>>> module = grammar.parse('hello + 1')
>>> stmt = module.children[0]
>>> stmt
PythonNode(simple_stmt, [PythonNode(arith_expr, [...]), <Newline: ''>])
>>> stmt.get_code()
>>> expr = module.children[0]
>>> expr
PythonNode(arith_expr, [<Name: hello@1,0>, <Operator: +>, <Number: 1>])
>>> expr.get_code()
'hello + 1'
>>> name = stmt.children[0].children[0]
>>> name = expr.children[0]
>>> name
<Name: hello@1,0>
>>> name.end_pos
(1, 5)
>>> expr.end_pos
(1, 9)
"""
from parso.parser import ParserSyntaxError

View File

@@ -6,7 +6,6 @@ import re
from parso._compatibility import FileNotFoundError, unicode
from parso.pgen2.pgen import generate_grammar
from parso.utils import splitlines, source_to_unicode
from parso.python.parser import remove_last_newline
from parso.python.diff import DiffParser
from parso.tokenize import tokenize_lines
from parso.cache import parser_cache, load_module, save_module
@@ -85,7 +84,7 @@ class Grammar(object):
with open(path, 'rb') as f:
code = source_to_unicode(f.read())
lines = tokenize_lines = splitlines(code, keepends=True)
lines = splitlines(code, keepends=True)
if diff_cache:
if self._diff_parser is None:
raise TypeError("You have to define a diff parser to be able "
@@ -108,19 +107,10 @@ class Grammar(object):
cache_path=cache_path)
return new_node
added_newline = not code.endswith('\n')
if added_newline:
code += '\n'
tokenize_lines = list(tokenize_lines)
tokenize_lines[-1] += '\n'
tokenize_lines.append('')
tokens = self._tokenizer(tokenize_lines)
tokens = self._tokenizer(lines)
p = self._parser(self._pgen_grammar, error_recovery=error_recovery, start_symbol=start_symbol)
root_node = p.parse(tokens=tokens)
if added_newline:
remove_last_newline(root_node)
if cache or diff_cache:
save_module(self._hashed, path, root_node, lines, pickling=cache,

View File

@@ -6,8 +6,8 @@ class Normalizer(object):
>>> normalizer = Normalizer()
>>> @normalizer.register_rule
>>> class MyRule(Rule):
>>> error_code = 42
... class MyRule(Rule):
... error_code = 42
"""
try:
rules = cls.rules

View File

@@ -11,7 +11,7 @@ from collections import namedtuple
import logging
from parso.utils import splitlines
from parso.python.parser import Parser, remove_last_newline
from parso.python.parser import Parser
from parso.python.tree import EndMarker
from parso.tokenize import (tokenize_lines, NEWLINE, TokenInfo,
ENDMARKER, INDENT, DEDENT)
@@ -120,14 +120,6 @@ class DiffParser(object):
self._module._used_names = None
self._parser_lines_new = new_lines
self._added_newline = False
if new_lines[-1] != '':
# The Python grammar needs a newline at the end of a file, but for
# everything else we keep working with new_lines here.
self._parser_lines_new = list(new_lines)
self._parser_lines_new[-1] += '\n'
self._parser_lines_new.append('')
self._added_newline = True
self._reset()
@@ -141,7 +133,7 @@ class DiffParser(object):
logging.debug('diff %s old[%s:%s] new[%s:%s]',
operation, i1 + 1, i2, j1 + 1, j2)
if j2 == line_length + int(self._added_newline):
if j2 == line_length:
# The empty part after the last newline is not relevant.
j2 -= 1
@@ -159,9 +151,6 @@ class DiffParser(object):
# changed module.
self._nodes_stack.close()
if self._added_newline:
remove_last_newline(self._module)
last_pos = self._module.end_pos[0]
if last_pos != line_length:
current_lines = splitlines(self._module.get_code(), keepends=True)

View File

@@ -3,7 +3,7 @@ from parso import tokenize
from parso.token import (DEDENT, INDENT, ENDMARKER, NEWLINE, NUMBER,
STRING, tok_name)
from parso.parser import BaseParser
from parso.utils import splitlines
from parso.pgen2.parse import token_to_ilabel
class Parser(BaseParser):
@@ -128,6 +128,51 @@ class Parser(BaseParser):
allows using different grammars (even non-Python). However, error
recovery is purely written for Python.
"""
def get_symbol_and_nodes(stack):
for dfa, state, (type_, nodes) in stack:
symbol = pgen_grammar.number2symbol[type_]
yield symbol, nodes
if typ == ENDMARKER:
def reduce_stack(states, newstate):
# reduce
state = newstate
while states[state] == [(0, state)]:
self.pgen_parser._pop()
dfa, state, (type_, nodes) = stack[-1]
states, first = dfa
# In Python statements need to end with a newline. But since it's
# possible (and valid in Python ) that there's no newline at the
# end of a file, we have to recover even if the user doesn't want
# error recovery.
#print('x', pprint.pprint(stack))
ilabel = token_to_ilabel(pgen_grammar, NEWLINE, value)
dfa, state, (type_, nodes) = stack[-1]
symbol = pgen_grammar.number2symbol[type_]
states, first = dfa
arcs = states[state]
# Look for a state with this label
for i, newstate in arcs:
if ilabel == i:
if symbol == 'simple_stmt':
# This is basically shifting
stack[-1] = (dfa, newstate, (type_, nodes))
reduce_stack(states, newstate)
add_token_callback(typ, value, start_pos, prefix)
return
# Check if we're at the right point
#for symbol, nodes in get_symbol_and_nodes(stack):
# self.pgen_parser._pop()
#break
break
#symbol = pgen_grammar.number2symbol[type_]
if not self._error_recovery:
return super(Parser, self).error_recovery(
pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
@@ -136,9 +181,8 @@ class Parser(BaseParser):
def current_suite(stack):
# For now just discard everything that is not a suite or
# file_input, if we detect an error.
for index, (dfa, state, (type_, nodes)) in reversed(list(enumerate(stack))):
for index, (symbol, nodes) in reversed(list(enumerate(get_symbol_and_nodes(stack)))):
# `suite` can sometimes be only simple_stmt, not stmt.
symbol = pgen_grammar.number2symbol[type_]
if symbol == 'file_input':
break
elif symbol == 'suite' and len(nodes) > 1:
@@ -191,58 +235,4 @@ class Parser(BaseParser):
self._indent_counter -= 1
elif typ == INDENT:
self._indent_counter += 1
yield typ, value, start_pos, prefix
def remove_last_newline(node):
def calculate_end_pos(leaf, text):
if leaf is None:
end_pos = (1, 0)
else:
end_pos = leaf.end_pos
lines = splitlines(text, keepends=True)
if len(lines) == 1:
return end_pos[0], end_pos[1] + len(lines[0])
else:
return end_pos[0] + len(lines) - 1, len(lines[-1])
endmarker = node.children[-1]
# The newline is either in the endmarker as a prefix or the previous
# leaf as a newline token.
prefix = endmarker.prefix
leaf = endmarker.get_previous_leaf()
if prefix:
text = prefix
else:
if leaf is None:
raise ValueError("You're trying to remove a newline from an empty module.")
text = leaf.value
if not text.endswith('\n'):
raise ValueError("There's no newline at the end, cannot remove it.")
text = text[:-1]
if text and text[-1] == '\r':
# By adding an artificial newline this creates weird side effects for
# \r at the end of files that would normally be error leafs. Try to
# correct that here.
text = text[:-1]
start_pos = calculate_end_pos(leaf, text)
error_token = tree.PythonErrorLeaf('errortoken', '\r', start_pos, prefix=text)
node.children.insert(-2, error_token)
# Cleanup
leaf = error_token
text = ''
if prefix:
endmarker.prefix = text
endmarker.start_pos = calculate_end_pos(leaf, text)
else:
leaf.value = text
endmarker.start_pos = leaf.end_pos

View File

@@ -17,7 +17,7 @@ class PrefixPart(object):
_comment = r'#[^\n\r\f]*'
_backslash = r'\\\r?\n?'
_backslash = r'\\\r?\n'
_whitespace = r' +'
_tabs = r'\t+'
_newline = r'\r?\n'

View File

@@ -251,7 +251,9 @@ def tokenize_lines(lines):
txt = line[pos:]
if txt.endswith('\n'):
new_line = True
yield TokenInfo(ERRORTOKEN, txt, (lnum, pos), prefix)
# TODO remove prefix?
yield TokenInfo(ERRORTOKEN, txt, (lnum, pos), additional_prefix)
additional_prefix = ''
break
prefix = additional_prefix + pseudomatch.group(1)
@@ -259,6 +261,12 @@ def tokenize_lines(lines):
start, pos = pseudomatch.span(2)
spos = (lnum, start)
token = pseudomatch.group(2)
if token == '':
assert prefix
additional_prefix = prefix
# This means that we have a line with whitespace/comments at
# the end, which just results in an endmarker.
break
initial = token[0]
if new_line and initial not in '\r\n#':

View File

@@ -81,7 +81,7 @@ def test_incomplete_list_comprehension():
# parser only valid statements generate one.
children = parse('(1 for def').children
assert [c.type for c in children] == \
['error_node', 'error_node', 'newline', 'endmarker']
['error_node', 'error_node', 'endmarker']
def test_newline_positions():
@@ -153,7 +153,7 @@ def test_python2_octal():
def test_python3_octal():
module = parse('0o660')
if py_version >= 30:
assert module.children[0].children[0].type == 'number'
assert module.children[0].type == 'number'
else:
assert module.children[0].type == 'error_node'

View File

@@ -63,7 +63,7 @@ class TestsFunctionAndLambdaParsing(object):
def test_end_pos_line():
# jedi issue #150
s = "x()\nx( )\nx( )\nx ( )"
s = "x()\nx( )\nx( )\nx ( )\n"
module = parse(s)
for i, simple_stmt in enumerate(module.children[:-1]):
expr_stmt = simple_stmt.children[0]

View File

@@ -12,7 +12,6 @@ import parso
(' \f ', [' ', '\f', ' ']),
(' \f ', [' ', '\f', ' ']),
(' \r\n', [' ', '\r\n']),
('\\', ['\\']),
('\\\n', ['\\\n']),
('\\\r\n', ['\\\r\n']),
('\t\t\n\t', ['\t\t', '\n', '\t']),
@@ -43,7 +42,6 @@ def test_simple_prefix_splitting(string, tokens):
('\r\n', ['newline']),
('\f', ['formfeed']),
('\\\n', ['backslash']),
('\r', ['newline']),
])
def test_prefix_splitting_types(string, types):
tree = parso.parse(string)