mirror of
https://github.com/davidhalter/parso.git
synced 2025-12-07 05:14:29 +08:00
Remove the remove_newline hack and build that stuff into error recovery instead.
Tests are passing except for diff parser tests.
This commit is contained in:
@@ -5,16 +5,18 @@ versions, file caching, round-trips and other stuff:
|
||||
>>> from parso import load_grammar
|
||||
>>> grammar = load_grammar(version='2.7')
|
||||
>>> module = grammar.parse('hello + 1')
|
||||
>>> stmt = module.children[0]
|
||||
>>> stmt
|
||||
PythonNode(simple_stmt, [PythonNode(arith_expr, [...]), <Newline: ''>])
|
||||
>>> stmt.get_code()
|
||||
>>> expr = module.children[0]
|
||||
>>> expr
|
||||
PythonNode(arith_expr, [<Name: hello@1,0>, <Operator: +>, <Number: 1>])
|
||||
>>> expr.get_code()
|
||||
'hello + 1'
|
||||
>>> name = stmt.children[0].children[0]
|
||||
>>> name = expr.children[0]
|
||||
>>> name
|
||||
<Name: hello@1,0>
|
||||
>>> name.end_pos
|
||||
(1, 5)
|
||||
>>> expr.end_pos
|
||||
(1, 9)
|
||||
"""
|
||||
|
||||
from parso.parser import ParserSyntaxError
|
||||
|
||||
@@ -6,7 +6,6 @@ import re
|
||||
from parso._compatibility import FileNotFoundError, unicode
|
||||
from parso.pgen2.pgen import generate_grammar
|
||||
from parso.utils import splitlines, source_to_unicode
|
||||
from parso.python.parser import remove_last_newline
|
||||
from parso.python.diff import DiffParser
|
||||
from parso.tokenize import tokenize_lines
|
||||
from parso.cache import parser_cache, load_module, save_module
|
||||
@@ -85,7 +84,7 @@ class Grammar(object):
|
||||
with open(path, 'rb') as f:
|
||||
code = source_to_unicode(f.read())
|
||||
|
||||
lines = tokenize_lines = splitlines(code, keepends=True)
|
||||
lines = splitlines(code, keepends=True)
|
||||
if diff_cache:
|
||||
if self._diff_parser is None:
|
||||
raise TypeError("You have to define a diff parser to be able "
|
||||
@@ -108,19 +107,10 @@ class Grammar(object):
|
||||
cache_path=cache_path)
|
||||
return new_node
|
||||
|
||||
added_newline = not code.endswith('\n')
|
||||
if added_newline:
|
||||
code += '\n'
|
||||
tokenize_lines = list(tokenize_lines)
|
||||
tokenize_lines[-1] += '\n'
|
||||
tokenize_lines.append('')
|
||||
|
||||
tokens = self._tokenizer(tokenize_lines)
|
||||
tokens = self._tokenizer(lines)
|
||||
|
||||
p = self._parser(self._pgen_grammar, error_recovery=error_recovery, start_symbol=start_symbol)
|
||||
root_node = p.parse(tokens=tokens)
|
||||
if added_newline:
|
||||
remove_last_newline(root_node)
|
||||
|
||||
if cache or diff_cache:
|
||||
save_module(self._hashed, path, root_node, lines, pickling=cache,
|
||||
|
||||
@@ -6,8 +6,8 @@ class Normalizer(object):
|
||||
|
||||
>>> normalizer = Normalizer()
|
||||
>>> @normalizer.register_rule
|
||||
>>> class MyRule(Rule):
|
||||
>>> error_code = 42
|
||||
... class MyRule(Rule):
|
||||
... error_code = 42
|
||||
"""
|
||||
try:
|
||||
rules = cls.rules
|
||||
|
||||
@@ -11,7 +11,7 @@ from collections import namedtuple
|
||||
import logging
|
||||
|
||||
from parso.utils import splitlines
|
||||
from parso.python.parser import Parser, remove_last_newline
|
||||
from parso.python.parser import Parser
|
||||
from parso.python.tree import EndMarker
|
||||
from parso.tokenize import (tokenize_lines, NEWLINE, TokenInfo,
|
||||
ENDMARKER, INDENT, DEDENT)
|
||||
@@ -120,14 +120,6 @@ class DiffParser(object):
|
||||
self._module._used_names = None
|
||||
|
||||
self._parser_lines_new = new_lines
|
||||
self._added_newline = False
|
||||
if new_lines[-1] != '':
|
||||
# The Python grammar needs a newline at the end of a file, but for
|
||||
# everything else we keep working with new_lines here.
|
||||
self._parser_lines_new = list(new_lines)
|
||||
self._parser_lines_new[-1] += '\n'
|
||||
self._parser_lines_new.append('')
|
||||
self._added_newline = True
|
||||
|
||||
self._reset()
|
||||
|
||||
@@ -141,7 +133,7 @@ class DiffParser(object):
|
||||
logging.debug('diff %s old[%s:%s] new[%s:%s]',
|
||||
operation, i1 + 1, i2, j1 + 1, j2)
|
||||
|
||||
if j2 == line_length + int(self._added_newline):
|
||||
if j2 == line_length:
|
||||
# The empty part after the last newline is not relevant.
|
||||
j2 -= 1
|
||||
|
||||
@@ -159,9 +151,6 @@ class DiffParser(object):
|
||||
# changed module.
|
||||
self._nodes_stack.close()
|
||||
|
||||
if self._added_newline:
|
||||
remove_last_newline(self._module)
|
||||
|
||||
last_pos = self._module.end_pos[0]
|
||||
if last_pos != line_length:
|
||||
current_lines = splitlines(self._module.get_code(), keepends=True)
|
||||
|
||||
@@ -3,7 +3,7 @@ from parso import tokenize
|
||||
from parso.token import (DEDENT, INDENT, ENDMARKER, NEWLINE, NUMBER,
|
||||
STRING, tok_name)
|
||||
from parso.parser import BaseParser
|
||||
from parso.utils import splitlines
|
||||
from parso.pgen2.parse import token_to_ilabel
|
||||
|
||||
|
||||
class Parser(BaseParser):
|
||||
@@ -128,6 +128,51 @@ class Parser(BaseParser):
|
||||
allows using different grammars (even non-Python). However, error
|
||||
recovery is purely written for Python.
|
||||
"""
|
||||
def get_symbol_and_nodes(stack):
|
||||
for dfa, state, (type_, nodes) in stack:
|
||||
symbol = pgen_grammar.number2symbol[type_]
|
||||
yield symbol, nodes
|
||||
|
||||
if typ == ENDMARKER:
|
||||
def reduce_stack(states, newstate):
|
||||
# reduce
|
||||
state = newstate
|
||||
while states[state] == [(0, state)]:
|
||||
self.pgen_parser._pop()
|
||||
|
||||
dfa, state, (type_, nodes) = stack[-1]
|
||||
states, first = dfa
|
||||
|
||||
|
||||
# In Python statements need to end with a newline. But since it's
|
||||
# possible (and valid in Python ) that there's no newline at the
|
||||
# end of a file, we have to recover even if the user doesn't want
|
||||
# error recovery.
|
||||
#print('x', pprint.pprint(stack))
|
||||
ilabel = token_to_ilabel(pgen_grammar, NEWLINE, value)
|
||||
|
||||
dfa, state, (type_, nodes) = stack[-1]
|
||||
symbol = pgen_grammar.number2symbol[type_]
|
||||
states, first = dfa
|
||||
arcs = states[state]
|
||||
# Look for a state with this label
|
||||
for i, newstate in arcs:
|
||||
if ilabel == i:
|
||||
if symbol == 'simple_stmt':
|
||||
# This is basically shifting
|
||||
stack[-1] = (dfa, newstate, (type_, nodes))
|
||||
|
||||
reduce_stack(states, newstate)
|
||||
add_token_callback(typ, value, start_pos, prefix)
|
||||
return
|
||||
# Check if we're at the right point
|
||||
#for symbol, nodes in get_symbol_and_nodes(stack):
|
||||
# self.pgen_parser._pop()
|
||||
|
||||
#break
|
||||
break
|
||||
#symbol = pgen_grammar.number2symbol[type_]
|
||||
|
||||
if not self._error_recovery:
|
||||
return super(Parser, self).error_recovery(
|
||||
pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
|
||||
@@ -136,9 +181,8 @@ class Parser(BaseParser):
|
||||
def current_suite(stack):
|
||||
# For now just discard everything that is not a suite or
|
||||
# file_input, if we detect an error.
|
||||
for index, (dfa, state, (type_, nodes)) in reversed(list(enumerate(stack))):
|
||||
for index, (symbol, nodes) in reversed(list(enumerate(get_symbol_and_nodes(stack)))):
|
||||
# `suite` can sometimes be only simple_stmt, not stmt.
|
||||
symbol = pgen_grammar.number2symbol[type_]
|
||||
if symbol == 'file_input':
|
||||
break
|
||||
elif symbol == 'suite' and len(nodes) > 1:
|
||||
@@ -191,58 +235,4 @@ class Parser(BaseParser):
|
||||
self._indent_counter -= 1
|
||||
elif typ == INDENT:
|
||||
self._indent_counter += 1
|
||||
|
||||
yield typ, value, start_pos, prefix
|
||||
|
||||
|
||||
def remove_last_newline(node):
|
||||
def calculate_end_pos(leaf, text):
|
||||
if leaf is None:
|
||||
end_pos = (1, 0)
|
||||
else:
|
||||
end_pos = leaf.end_pos
|
||||
|
||||
lines = splitlines(text, keepends=True)
|
||||
if len(lines) == 1:
|
||||
return end_pos[0], end_pos[1] + len(lines[0])
|
||||
else:
|
||||
return end_pos[0] + len(lines) - 1, len(lines[-1])
|
||||
|
||||
endmarker = node.children[-1]
|
||||
# The newline is either in the endmarker as a prefix or the previous
|
||||
# leaf as a newline token.
|
||||
prefix = endmarker.prefix
|
||||
leaf = endmarker.get_previous_leaf()
|
||||
if prefix:
|
||||
text = prefix
|
||||
else:
|
||||
if leaf is None:
|
||||
raise ValueError("You're trying to remove a newline from an empty module.")
|
||||
|
||||
text = leaf.value
|
||||
|
||||
if not text.endswith('\n'):
|
||||
raise ValueError("There's no newline at the end, cannot remove it.")
|
||||
|
||||
text = text[:-1]
|
||||
if text and text[-1] == '\r':
|
||||
# By adding an artificial newline this creates weird side effects for
|
||||
# \r at the end of files that would normally be error leafs. Try to
|
||||
# correct that here.
|
||||
text = text[:-1]
|
||||
start_pos = calculate_end_pos(leaf, text)
|
||||
error_token = tree.PythonErrorLeaf('errortoken', '\r', start_pos, prefix=text)
|
||||
node.children.insert(-2, error_token)
|
||||
|
||||
# Cleanup
|
||||
leaf = error_token
|
||||
text = ''
|
||||
|
||||
if prefix:
|
||||
endmarker.prefix = text
|
||||
|
||||
|
||||
endmarker.start_pos = calculate_end_pos(leaf, text)
|
||||
else:
|
||||
leaf.value = text
|
||||
endmarker.start_pos = leaf.end_pos
|
||||
|
||||
@@ -17,7 +17,7 @@ class PrefixPart(object):
|
||||
|
||||
|
||||
_comment = r'#[^\n\r\f]*'
|
||||
_backslash = r'\\\r?\n?'
|
||||
_backslash = r'\\\r?\n'
|
||||
_whitespace = r' +'
|
||||
_tabs = r'\t+'
|
||||
_newline = r'\r?\n'
|
||||
|
||||
@@ -251,7 +251,9 @@ def tokenize_lines(lines):
|
||||
txt = line[pos:]
|
||||
if txt.endswith('\n'):
|
||||
new_line = True
|
||||
yield TokenInfo(ERRORTOKEN, txt, (lnum, pos), prefix)
|
||||
# TODO remove prefix?
|
||||
yield TokenInfo(ERRORTOKEN, txt, (lnum, pos), additional_prefix)
|
||||
additional_prefix = ''
|
||||
break
|
||||
|
||||
prefix = additional_prefix + pseudomatch.group(1)
|
||||
@@ -259,6 +261,12 @@ def tokenize_lines(lines):
|
||||
start, pos = pseudomatch.span(2)
|
||||
spos = (lnum, start)
|
||||
token = pseudomatch.group(2)
|
||||
if token == '':
|
||||
assert prefix
|
||||
additional_prefix = prefix
|
||||
# This means that we have a line with whitespace/comments at
|
||||
# the end, which just results in an endmarker.
|
||||
break
|
||||
initial = token[0]
|
||||
|
||||
if new_line and initial not in '\r\n#':
|
||||
|
||||
@@ -81,7 +81,7 @@ def test_incomplete_list_comprehension():
|
||||
# parser only valid statements generate one.
|
||||
children = parse('(1 for def').children
|
||||
assert [c.type for c in children] == \
|
||||
['error_node', 'error_node', 'newline', 'endmarker']
|
||||
['error_node', 'error_node', 'endmarker']
|
||||
|
||||
|
||||
def test_newline_positions():
|
||||
@@ -153,7 +153,7 @@ def test_python2_octal():
|
||||
def test_python3_octal():
|
||||
module = parse('0o660')
|
||||
if py_version >= 30:
|
||||
assert module.children[0].children[0].type == 'number'
|
||||
assert module.children[0].type == 'number'
|
||||
else:
|
||||
assert module.children[0].type == 'error_node'
|
||||
|
||||
|
||||
@@ -63,7 +63,7 @@ class TestsFunctionAndLambdaParsing(object):
|
||||
|
||||
def test_end_pos_line():
|
||||
# jedi issue #150
|
||||
s = "x()\nx( )\nx( )\nx ( )"
|
||||
s = "x()\nx( )\nx( )\nx ( )\n"
|
||||
module = parse(s)
|
||||
for i, simple_stmt in enumerate(module.children[:-1]):
|
||||
expr_stmt = simple_stmt.children[0]
|
||||
|
||||
@@ -12,7 +12,6 @@ import parso
|
||||
(' \f ', [' ', '\f', ' ']),
|
||||
(' \f ', [' ', '\f', ' ']),
|
||||
(' \r\n', [' ', '\r\n']),
|
||||
('\\', ['\\']),
|
||||
('\\\n', ['\\\n']),
|
||||
('\\\r\n', ['\\\r\n']),
|
||||
('\t\t\n\t', ['\t\t', '\n', '\t']),
|
||||
@@ -43,7 +42,6 @@ def test_simple_prefix_splitting(string, tokens):
|
||||
('\r\n', ['newline']),
|
||||
('\f', ['formfeed']),
|
||||
('\\\n', ['backslash']),
|
||||
('\r', ['newline']),
|
||||
])
|
||||
def test_prefix_splitting_types(string, types):
|
||||
tree = parso.parse(string)
|
||||
|
||||
Reference in New Issue
Block a user