Try to correctly calculate start positions.

2026-05-26 02:08:57 +08:00 · 2017-08-25 20:58:28 +02:00
parent 8bc54f5a29
commit f3db064d7d
2 changed files with 30 additions and 2 deletions
@@ -1,6 +1,7 @@
 import re

 from parso.utils import PythonVersionInfo
+from parso.utils import split_lines
 from parso.python.tokenize import Token
 from parso.python import token
 from parso import parser
@@ -54,15 +55,28 @@ def tokenize(*args, **kwargs):
        print(t)
        yield t
 def _tokenize(code, start_pos=(1, 0)):
+    def add_to_pos(string):
+        lines = split_lines(string)
+        l = len(lines[-1])
+        if len(lines) > 1:
+            start_pos[0] += len(lines) - 1
+            start_pos[1] = l
+        else:
+            start_pos[1] += l
+
    def tok(value, type=None, prefix=''):
        if type is None:
            type = TokenNamespace.generate_token_id(value)
-        line = column=1
-        return Token(type, value, (line, column), prefix)
+
+        add_to_pos(prefix)
+        token = Token(type, value, tuple(start_pos), prefix)
+        add_to_pos(value)
+        return token

    start = 0
    recursion_level = 0
    added_prefix = ''
+    start_pos = list(start_pos)
    while True:
        match = _compiled_expr.match(code, start)
        prefix = added_prefix + match.group(1)
@@ -1,6 +1,7 @@
 import pytest

 from parso import load_grammar, ParserSyntaxError
+from parso.python.fstring import tokenize


@pytest.fixture
@@ -59,3 +60,16 @@ def test_invalid(code, grammar):

    # It should work with error recovery.
    #grammar.parse(code, error_recovery=True)
+
+
+@pytest.mark.parametrize(
+    ('code', 'start_pos', 'positions'), [
+        # 2 times 2, 5 because python expr and endmarker.
+        ('}{', (2, 3), [(2, 3), (2, 4), (2, 5), (2, 5)]),
+        (' :{ 1 : } ', (1, 0), [(1, 2), (1, 3), (1, 6), (1, 8), (1, 10)]),
+        ('\n{\nfoo\n }', (2, 1), [(3, 0), (3, 1), (5, 1), (5, 2)]),
+    ]
+)
+def test_tokenize_start_pos(code, start_pos, positions):
+    tokens = tokenize(code, start_pos)
+    assert positions == [p.start_pos for p in tokens]