Try to correctly calculate start positions.

This commit is contained in:
Dave Halter
2017-08-25 20:58:28 +02:00
parent 8bc54f5a29
commit f3db064d7d
2 changed files with 30 additions and 2 deletions

View File

@@ -1,6 +1,7 @@
import re import re
from parso.utils import PythonVersionInfo from parso.utils import PythonVersionInfo
from parso.utils import split_lines
from parso.python.tokenize import Token from parso.python.tokenize import Token
from parso.python import token from parso.python import token
from parso import parser from parso import parser
@@ -54,15 +55,28 @@ def tokenize(*args, **kwargs):
print(t) print(t)
yield t yield t
def _tokenize(code, start_pos=(1, 0)): def _tokenize(code, start_pos=(1, 0)):
def add_to_pos(string):
lines = split_lines(string)
l = len(lines[-1])
if len(lines) > 1:
start_pos[0] += len(lines) - 1
start_pos[1] = l
else:
start_pos[1] += l
def tok(value, type=None, prefix=''): def tok(value, type=None, prefix=''):
if type is None: if type is None:
type = TokenNamespace.generate_token_id(value) type = TokenNamespace.generate_token_id(value)
line = column=1
return Token(type, value, (line, column), prefix) add_to_pos(prefix)
token = Token(type, value, tuple(start_pos), prefix)
add_to_pos(value)
return token
start = 0 start = 0
recursion_level = 0 recursion_level = 0
added_prefix = '' added_prefix = ''
start_pos = list(start_pos)
while True: while True:
match = _compiled_expr.match(code, start) match = _compiled_expr.match(code, start)
prefix = added_prefix + match.group(1) prefix = added_prefix + match.group(1)

View File

@@ -1,6 +1,7 @@
import pytest import pytest
from parso import load_grammar, ParserSyntaxError from parso import load_grammar, ParserSyntaxError
from parso.python.fstring import tokenize
@pytest.fixture @pytest.fixture
@@ -59,3 +60,16 @@ def test_invalid(code, grammar):
# It should work with error recovery. # It should work with error recovery.
#grammar.parse(code, error_recovery=True) #grammar.parse(code, error_recovery=True)
@pytest.mark.parametrize(
('code', 'start_pos', 'positions'), [
# 2 times 2, 5 because python expr and endmarker.
('}{', (2, 3), [(2, 3), (2, 4), (2, 5), (2, 5)]),
(' :{ 1 : } ', (1, 0), [(1, 2), (1, 3), (1, 6), (1, 8), (1, 10)]),
('\n{\nfoo\n }', (2, 1), [(3, 0), (3, 1), (5, 1), (5, 2)]),
]
)
def test_tokenize_start_pos(code, start_pos, positions):
tokens = tokenize(code, start_pos)
assert positions == [p.start_pos for p in tokens]