Get rid of mypy issues with tokenize.py

2025-12-06 04:44:29 +08:00 · 2020-07-25 15:34:29 +02:00
parent a474895764
commit 8a34245239
4 changed files with 42 additions and 50 deletions
--- a/parso/grammar.py
+++ b/parso/grammar.py
@@ -206,11 +206,11 @@ class PythonGrammar(Grammar):
        self.version_info = version_info

    def _tokenize_lines(self, lines, **kwargs):
-        return tokenize_lines(lines, self.version_info, **kwargs)
+        return tokenize_lines(lines, version_info=self.version_info, **kwargs)

    def _tokenize(self, code):
        # Used by Jedi.
-        return tokenize(code, self.version_info)
+        return tokenize(code, version_info=self.version_info)


 def load_grammar(*, language='python', version=None, path=None):
--- a/parso/python/tokenize.py
+++ b/parso/python/tokenize.py
@@ -13,12 +13,13 @@ from __future__ import absolute_import

 import sys
 import re
-from collections import namedtuple
 import itertools as _itertools
 from codecs import BOM_UTF8
+from typing import NamedTuple, Tuple, Iterator, Iterable, List, Dict, \
+    Pattern, Set

-from parso.python.token import PythonTokenTypes
-from parso.utils import split_lines
+from parso.python.token import PythonTokenTypes, TokenType
+from parso.utils import split_lines, PythonVersionInfo, parse_version_string


 # Maximum code point of Unicode 6.0: 0x10ffff (1,114,111)
@@ -38,15 +39,20 @@ FSTRING_START = PythonTokenTypes.FSTRING_START
 FSTRING_STRING = PythonTokenTypes.FSTRING_STRING
 FSTRING_END = PythonTokenTypes.FSTRING_END

-TokenCollection = namedtuple(
-    'TokenCollection',
-    'pseudo_token single_quoted triple_quoted endpats whitespace '
-    'fstring_pattern_map always_break_tokens',
-)
+
+class TokenCollection(NamedTuple):
+    pseudo_token: Pattern
+    single_quoted: Set[str]
+    triple_quoted: Set[str]
+    endpats: Dict[str, Pattern]
+    whitespace: Pattern
+    fstring_pattern_map: Dict[str, str]
+    always_break_tokens: Tuple[str]
+

 BOM_UTF8_STRING = BOM_UTF8.decode('utf-8')

-_token_collection_cache = {}
+_token_collection_cache: Dict[PythonVersionInfo, TokenCollection] = {}


 def group(*choices, capture=False, **kwargs):
@@ -219,9 +225,13 @@ def _create_token_collection(version_info):
    )


-class Token(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])):
+class Token(NamedTuple):
+    type: TokenType
+    string: str
+    start_pos: Tuple[int, int]
+    prefix: str
    @property
-    def end_pos(self):
+    def end_pos(self) -> Tuple[int, int]:
        lines = split_lines(self.string)
        if len(lines) > 1:
            return self.start_pos[0] + len(lines) - 1, 0
@@ -322,10 +332,12 @@ def _find_fstring_string(endpats, fstring_stack, line, lnum, pos):
    return string, new_pos


-def tokenize(code, version_info, start_pos=(1, 0)):
+def tokenize(
+    code: str, *, version_info: PythonVersionInfo, start_pos: Tuple[int, int] = (1, 0)
+) -> Iterator[PythonToken]:
    """Generate tokens from a the source code (string)."""
    lines = split_lines(code, keepends=True)
-    return tokenize_lines(lines, version_info, start_pos=start_pos)
+    return tokenize_lines(lines, version_info=version_info, start_pos=start_pos)


 def _print_tokens(func):
@@ -341,7 +353,14 @@ def _print_tokens(func):


 # @_print_tokens
-def tokenize_lines(lines, version_info, start_pos=(1, 0), indents=None, is_first_token=True):
+def tokenize_lines(
+    lines: Iterable[str],
+    *,
+    version_info: PythonVersionInfo,
+    indents: List[int] = None,
+    start_pos: Tuple[int, int] = (1, 0),
+    is_first_token=True,
+) -> Iterator[PythonToken]:
    """
    A heavily modified Python standard library tokenizer.

@@ -367,7 +386,9 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0), indents=None, is_first
    max_ = 0
    numchars = '0123456789'
    contstr = ''
-    contline = None
+    contline: str
+    contstr_start: Tuple[int, int]
+    endprog: Pattern
    # We start with a newline. This makes indent at the first position
    # possible. It's not valid Python, but still better than an INDENT in the
    # second line (and not in the first). This makes quite a few things in
@@ -376,7 +397,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0), indents=None, is_first
    prefix = ''  # Should never be required, but here for safety
    additional_prefix = ''
    lnum = start_pos[0] - 1
-    fstring_stack = []
+    fstring_stack: List[FStringNode] = []
    for line in lines:  # loop over lines in stream
        lnum += 1
        pos = 0
@@ -402,7 +423,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0), indents=None, is_first
                    STRING, contstr + line[:pos],
                    contstr_start, prefix)  # noqa: F821
                contstr = ''
-                contline = None
+                contline = ''
            else:
                contstr = contstr + line
                contline = contline + line
@@ -655,10 +676,5 @@ if __name__ == "__main__":
    with open(path) as f:
        code = f.read()

-    from parso.utils import python_bytes_to_unicode, parse_version_string
-
-    if isinstance(code, bytes):
-        code = python_bytes_to_unicode(code)
-
-    for token in tokenize(code, parse_version_string()):
+    for token in tokenize(code, version_info=parse_version_string('3.10')):
        print(token)
--- a/parso/python/tokenize.pyi
+++ b/parso/python/tokenize.pyi
@@ -1,24 +0,0 @@
-from typing import Generator, Iterable, NamedTuple, Tuple
-
-from parso.python.token import TokenType
-from parso.utils import PythonVersionInfo
-
-class Token(NamedTuple):
-    type: TokenType
-    string: str
-    start_pos: Tuple[int, int]
-    prefix: str
-    @property
-    def end_pos(self) -> Tuple[int, int]: ...
-
-class PythonToken(Token):
-    def __repr__(self) -> str: ...
-
-def tokenize(
-    code: str, version_info: PythonVersionInfo, start_pos: Tuple[int, int] = (1, 0)
-) -> Generator[PythonToken, None, None]: ...
-def tokenize_lines(
-    lines: Iterable[str],
-    version_info: PythonVersionInfo,
-    start_pos: Tuple[int, int] = (1, 0),
-) -> Generator[PythonToken, None, None]: ...
--- a/test/test_tokenize.py
+++ b/test/test_tokenize.py
@@ -30,7 +30,7 @@ FSTRING_END = PythonTokenTypes.FSTRING_END
 def _get_token_list(string, version=None):
    # Load the current version.
    version_info = parse_version_string(version)
-    return list(tokenize.tokenize(string, version_info))
+    return list(tokenize.tokenize(string, version_info=version_info))


 def test_end_pos_one_line():