Get rid of mypy issues with tokenize.py

This commit is contained in:
Dave Halter
2020-07-25 15:34:29 +02:00
parent a474895764
commit 8a34245239
4 changed files with 42 additions and 50 deletions

View File

@@ -206,11 +206,11 @@ class PythonGrammar(Grammar):
self.version_info = version_info self.version_info = version_info
def _tokenize_lines(self, lines, **kwargs): def _tokenize_lines(self, lines, **kwargs):
return tokenize_lines(lines, self.version_info, **kwargs) return tokenize_lines(lines, version_info=self.version_info, **kwargs)
def _tokenize(self, code): def _tokenize(self, code):
# Used by Jedi. # Used by Jedi.
return tokenize(code, self.version_info) return tokenize(code, version_info=self.version_info)
def load_grammar(*, language='python', version=None, path=None): def load_grammar(*, language='python', version=None, path=None):

View File

@@ -13,12 +13,13 @@ from __future__ import absolute_import
import sys import sys
import re import re
from collections import namedtuple
import itertools as _itertools import itertools as _itertools
from codecs import BOM_UTF8 from codecs import BOM_UTF8
from typing import NamedTuple, Tuple, Iterator, Iterable, List, Dict, \
Pattern, Set
from parso.python.token import PythonTokenTypes from parso.python.token import PythonTokenTypes, TokenType
from parso.utils import split_lines from parso.utils import split_lines, PythonVersionInfo, parse_version_string
# Maximum code point of Unicode 6.0: 0x10ffff (1,114,111) # Maximum code point of Unicode 6.0: 0x10ffff (1,114,111)
@@ -38,15 +39,20 @@ FSTRING_START = PythonTokenTypes.FSTRING_START
FSTRING_STRING = PythonTokenTypes.FSTRING_STRING FSTRING_STRING = PythonTokenTypes.FSTRING_STRING
FSTRING_END = PythonTokenTypes.FSTRING_END FSTRING_END = PythonTokenTypes.FSTRING_END
TokenCollection = namedtuple(
'TokenCollection', class TokenCollection(NamedTuple):
'pseudo_token single_quoted triple_quoted endpats whitespace ' pseudo_token: Pattern
'fstring_pattern_map always_break_tokens', single_quoted: Set[str]
) triple_quoted: Set[str]
endpats: Dict[str, Pattern]
whitespace: Pattern
fstring_pattern_map: Dict[str, str]
always_break_tokens: Tuple[str]
BOM_UTF8_STRING = BOM_UTF8.decode('utf-8') BOM_UTF8_STRING = BOM_UTF8.decode('utf-8')
_token_collection_cache = {} _token_collection_cache: Dict[PythonVersionInfo, TokenCollection] = {}
def group(*choices, capture=False, **kwargs): def group(*choices, capture=False, **kwargs):
@@ -219,9 +225,13 @@ def _create_token_collection(version_info):
) )
class Token(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])): class Token(NamedTuple):
type: TokenType
string: str
start_pos: Tuple[int, int]
prefix: str
@property @property
def end_pos(self): def end_pos(self) -> Tuple[int, int]:
lines = split_lines(self.string) lines = split_lines(self.string)
if len(lines) > 1: if len(lines) > 1:
return self.start_pos[0] + len(lines) - 1, 0 return self.start_pos[0] + len(lines) - 1, 0
@@ -322,10 +332,12 @@ def _find_fstring_string(endpats, fstring_stack, line, lnum, pos):
return string, new_pos return string, new_pos
def tokenize(code, version_info, start_pos=(1, 0)): def tokenize(
code: str, *, version_info: PythonVersionInfo, start_pos: Tuple[int, int] = (1, 0)
) -> Iterator[PythonToken]:
"""Generate tokens from a the source code (string).""" """Generate tokens from a the source code (string)."""
lines = split_lines(code, keepends=True) lines = split_lines(code, keepends=True)
return tokenize_lines(lines, version_info, start_pos=start_pos) return tokenize_lines(lines, version_info=version_info, start_pos=start_pos)
def _print_tokens(func): def _print_tokens(func):
@@ -341,7 +353,14 @@ def _print_tokens(func):
# @_print_tokens # @_print_tokens
def tokenize_lines(lines, version_info, start_pos=(1, 0), indents=None, is_first_token=True): def tokenize_lines(
lines: Iterable[str],
*,
version_info: PythonVersionInfo,
indents: List[int] = None,
start_pos: Tuple[int, int] = (1, 0),
is_first_token=True,
) -> Iterator[PythonToken]:
""" """
A heavily modified Python standard library tokenizer. A heavily modified Python standard library tokenizer.
@@ -367,7 +386,9 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0), indents=None, is_first
max_ = 0 max_ = 0
numchars = '0123456789' numchars = '0123456789'
contstr = '' contstr = ''
contline = None contline: str
contstr_start: Tuple[int, int]
endprog: Pattern
# We start with a newline. This makes indent at the first position # We start with a newline. This makes indent at the first position
# possible. It's not valid Python, but still better than an INDENT in the # possible. It's not valid Python, but still better than an INDENT in the
# second line (and not in the first). This makes quite a few things in # second line (and not in the first). This makes quite a few things in
@@ -376,7 +397,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0), indents=None, is_first
prefix = '' # Should never be required, but here for safety prefix = '' # Should never be required, but here for safety
additional_prefix = '' additional_prefix = ''
lnum = start_pos[0] - 1 lnum = start_pos[0] - 1
fstring_stack = [] fstring_stack: List[FStringNode] = []
for line in lines: # loop over lines in stream for line in lines: # loop over lines in stream
lnum += 1 lnum += 1
pos = 0 pos = 0
@@ -402,7 +423,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0), indents=None, is_first
STRING, contstr + line[:pos], STRING, contstr + line[:pos],
contstr_start, prefix) # noqa: F821 contstr_start, prefix) # noqa: F821
contstr = '' contstr = ''
contline = None contline = ''
else: else:
contstr = contstr + line contstr = contstr + line
contline = contline + line contline = contline + line
@@ -655,10 +676,5 @@ if __name__ == "__main__":
with open(path) as f: with open(path) as f:
code = f.read() code = f.read()
from parso.utils import python_bytes_to_unicode, parse_version_string for token in tokenize(code, version_info=parse_version_string('3.10')):
if isinstance(code, bytes):
code = python_bytes_to_unicode(code)
for token in tokenize(code, parse_version_string()):
print(token) print(token)

View File

@@ -1,24 +0,0 @@
from typing import Generator, Iterable, NamedTuple, Tuple
from parso.python.token import TokenType
from parso.utils import PythonVersionInfo
class Token(NamedTuple):
type: TokenType
string: str
start_pos: Tuple[int, int]
prefix: str
@property
def end_pos(self) -> Tuple[int, int]: ...
class PythonToken(Token):
def __repr__(self) -> str: ...
def tokenize(
code: str, version_info: PythonVersionInfo, start_pos: Tuple[int, int] = (1, 0)
) -> Generator[PythonToken, None, None]: ...
def tokenize_lines(
lines: Iterable[str],
version_info: PythonVersionInfo,
start_pos: Tuple[int, int] = (1, 0),
) -> Generator[PythonToken, None, None]: ...

View File

@@ -30,7 +30,7 @@ FSTRING_END = PythonTokenTypes.FSTRING_END
def _get_token_list(string, version=None): def _get_token_list(string, version=None):
# Load the current version. # Load the current version.
version_info = parse_version_string(version) version_info = parse_version_string(version)
return list(tokenize.tokenize(string, version_info)) return list(tokenize.tokenize(string, version_info=version_info))
def test_end_pos_one_line(): def test_end_pos_one_line():