Merge pull request #235 from davidhalter/typing

Typing changes
2026-02-14 06:02:00 +08:00 · 2026-02-09 15:34:33 +00:00
parent be9f5a401f 341a60b115
commit cc2c562500
17 changed files with 98 additions and 76 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -1,6 +1,14 @@
 name: Build

-on: [push, pull_request]
+on:
+  push:
+  pull_request:
+  workflow_call:
+    inputs:
+      debug_ssh_session:
+        required: false
+        type: boolean
+

 env:
  PYTEST_ADDOPTS: --color=yes
@@ -17,12 +25,17 @@ jobs:
    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip setuptools wheel
-        pip install .[qa]
+        pip install .[qa] .[testing]
+    - name: Setup tmate session
+      uses: mxschmitt/action-tmate@v3
+      if: ${{ inputs.debug_ssh_session }}
+      with:
+        limit-access-to-actor: true
    - name: Run Flake8
      # Ignore F401, which are unused imports. flake8 is a primitive tool and is sometimes wrong.
      run: flake8 --extend-ignore F401 parso test/*.py setup.py scripts/
-    - name: Run Mypy
-      run: mypy parso setup.py
+    - name: Run Zuban
+      run: zuban check
  test:
    runs-on: ubuntu-latest
    continue-on-error: ${{ matrix.experimental }}
--- a/.github/workflows/debug_ci.yml
+++ b/.github/workflows/debug_ci.yml
@@ -0,0 +1,12 @@
+name: Debug CI
+
+on:
+  workflow_dispatch:
+
+jobs:
+  tests:
+    uses: ./.github/workflows/tests.yml
+    with:
+      all_tests: true
+      debug_ssh_session: true
+    secrets: inherit
--- a/parso/grammar.py
+++ b/parso/grammar.py
@@ -1,6 +1,6 @@
 import hashlib
 import os
-from typing import Generic, TypeVar, Union, Dict, Optional, Any
+from typing import Generic, TypeVar, Union, Dict, Optional, Any, Iterator
 from pathlib import Path

 from parso._compatibility import is_pypy
@@ -8,7 +8,7 @@ from parso.pgen2 import generate_grammar
 from parso.utils import split_lines, python_bytes_to_unicode, \
    PythonVersionInfo, parse_version_string
 from parso.python.diff import DiffParser
-from parso.python.tokenize import tokenize_lines, tokenize
+from parso.python.tokenize import tokenize_lines, tokenize, PythonToken
 from parso.python.token import PythonTokenTypes
 from parso.cache import parser_cache, load_module, try_to_save_module
 from parso.parser import BaseParser
@@ -223,7 +223,7 @@ class PythonGrammar(Grammar):
        )
        self.version_info = version_info

-    def _tokenize_lines(self, lines, **kwargs):
+    def _tokenize_lines(self, lines, **kwargs) -> Iterator[PythonToken]:
        return tokenize_lines(lines, version_info=self.version_info, **kwargs)

    def _tokenize(self, code):
@@ -255,7 +255,6 @@ def load_grammar(*, version: str = None, path: str = None):
        'grammar%s%s.txt' % (version_info.major, version_info.minor)
    )

-    global _loaded_grammars
    path = os.path.join(os.path.dirname(__file__), file)
    try:
        return _loaded_grammars[path]
--- a/parso/normalizer.py
+++ b/parso/normalizer.py
@@ -1,8 +1,11 @@
 from contextlib import contextmanager
-from typing import Dict, List
+from typing import Dict, List, Any


 class _NormalizerMeta(type):
+    rule_value_classes: Any
+    rule_type_classes: Any
+
    def __new__(cls, name, bases, dct):
        new_cls = type.__new__(cls, name, bases, dct)
        new_cls.rule_value_classes = {}
@@ -109,9 +112,6 @@ class NormalizerConfig:
    normalizer_class = Normalizer

    def create_normalizer(self, grammar):
-        if self.normalizer_class is None:
-            return None
-
        return self.normalizer_class(grammar, self)


--- a/parso/pgen2/generator.py
+++ b/parso/pgen2/generator.py
@@ -83,14 +83,14 @@ class DFAState(Generic[_TokenTypeT]):
        self.from_rule = from_rule
        self.nfa_set = nfa_set
        # map from terminals/nonterminals to DFAState
-        self.arcs: Mapping[str, DFAState] = {}
+        self.arcs: dict[str, DFAState] = {}
        # In an intermediary step we set these nonterminal arcs (which has the
        # same structure as arcs). These don't contain terminals anymore.
-        self.nonterminal_arcs: Mapping[str, DFAState] = {}
+        self.nonterminal_arcs: dict[str, DFAState] = {}

        # Transitions are basically the only thing that  the parser is using
        # with is_final. Everyting else is purely here to create a parser.
-        self.transitions: Mapping[Union[_TokenTypeT, ReservedString], DFAPlan] = {}
+        self.transitions: dict[Union[_TokenTypeT, ReservedString], DFAPlan] = {}
        self.is_final = final in nfa_set

    def add_arc(self, next_, label):
@@ -261,7 +261,7 @@ def generate_grammar(bnf_grammar: str, token_namespace) -> Grammar:
        if start_nonterminal is None:
            start_nonterminal = nfa_a.from_rule

-    reserved_strings: Mapping[str, ReservedString] = {}
+    reserved_strings: dict[str, ReservedString] = {}
    for nonterminal, dfas in rule_to_dfas.items():
        for dfa_state in dfas:
            for terminal_or_nonterminal, next_dfa in dfa_state.arcs.items():
--- a/parso/python/diff.py
+++ b/parso/python/diff.py
@@ -881,6 +881,6 @@ class _NodesTree:
            end_pos[0] += len(lines) - 1
            end_pos[1] = len(lines[-1])

-        endmarker = EndMarker('', tuple(end_pos), self.prefix + self._prefix_remainder)
+        endmarker = EndMarker('', (end_pos[0], end_pos[1]), self.prefix + self._prefix_remainder)
        endmarker.parent = self._module
        self._module.children.append(endmarker)
--- a/parso/python/pep8.py
+++ b/parso/python/pep8.py
@@ -676,7 +676,7 @@ class PEP8Normalizer(ErrorFinder):
                elif leaf.parent.type == 'function' and leaf.parent.name == leaf:
                    self.add_issue(leaf, 743, message % 'function')
                else:
-                    self.add_issuadd_issue(741, message % 'variables', leaf)
+                    self.add_issue(741, message % 'variables', leaf)
        elif leaf.value == ':':
            if isinstance(leaf.parent, (Flow, Scope)) and leaf.parent.type != 'lambdef':
                next_leaf = leaf.get_next_leaf()
@@ -764,4 +764,4 @@ class BlankLineAtEnd(Rule):
    message = 'Blank line at end of file'

    def is_issue(self, leaf):
-        return self._newline_count >= 2
+        return False  # TODO return self._newline_count >= 2
--- a/parso/python/tokenize.py
+++ b/parso/python/tokenize.py
@@ -16,7 +16,7 @@ import re
 import itertools as _itertools
 from codecs import BOM_UTF8
 from typing import NamedTuple, Tuple, Iterator, Iterable, List, Dict, \
-    Pattern, Set
+    Pattern, Set, Any

 from parso.python.token import PythonTokenTypes
 from parso.utils import split_lines, PythonVersionInfo, parse_version_string
@@ -47,12 +47,12 @@ class TokenCollection(NamedTuple):
    endpats: Dict[str, Pattern]
    whitespace: Pattern
    fstring_pattern_map: Dict[str, str]
-    always_break_tokens: Tuple[str]
+    always_break_tokens: Set[str]


 BOM_UTF8_STRING = BOM_UTF8.decode('utf-8')

-_token_collection_cache: Dict[PythonVersionInfo, TokenCollection] = {}
+_token_collection_cache: Dict[Tuple[int, int], TokenCollection] = {}


 def group(*choices, capture=False, **kwargs):
@@ -249,7 +249,7 @@ class Token(NamedTuple):
 class PythonToken(Token):
    def __repr__(self):
        return ('TokenInfo(type=%s, string=%r, start_pos=%r, prefix=%r)' %
-                self._replace(type=self.type.name))
+                self._replace(type=self.type.name))  # type: ignore[arg-type]


 class FStringNode:
@@ -257,7 +257,7 @@ class FStringNode:
        self.quote = quote
        self.parentheses_count = 0
        self.previous_lines = ''
-        self.last_string_start_pos = None
+        self.last_string_start_pos: Any = None
        # In the syntax there can be multiple format_spec's nested:
        # {x:{y:3}}
        self.format_spec_count = 0
@@ -340,7 +340,7 @@ def _find_fstring_string(endpats, fstring_stack, line, lnum, pos):


 def tokenize(
-    code: str, *, version_info: PythonVersionInfo, start_pos: Tuple[int, int] = (1, 0)
+    code: str, *, version_info: Tuple[int, int], start_pos: Tuple[int, int] = (1, 0)
 ) -> Iterator[PythonToken]:
    """Generate tokens from a the source code (string)."""
    lines = split_lines(code, keepends=True)
@@ -363,7 +363,7 @@ def _print_tokens(func):
 def tokenize_lines(
    lines: Iterable[str],
    *,
-    version_info: PythonVersionInfo,
+    version_info: Tuple[int, int],
    indents: List[int] = None,
    start_pos: Tuple[int, int] = (1, 0),
    is_first_token=True,
@@ -444,7 +444,7 @@ def tokenize_lines(
                    if string:
                        yield PythonToken(
                            FSTRING_STRING, string,
-                            tos.last_string_start_pos,
+                            tos.last_string_start_pos,  # type: ignore[arg-type]
                            # Never has a prefix because it can start anywhere and
                            # include whitespace.
                            prefix=''
@@ -496,8 +496,8 @@ def tokenize_lines(
                initial = token[0]
            else:
                match = whitespace.match(line, pos)
-                initial = line[match.end()]
-                start = match.end()
+                initial = line[match.end()]  # type: ignore[union-attr]
+                start = match.end()  # type: ignore[union-attr]
                spos = (lnum, start)

            if new_line and initial not in '\r\n#' and (initial != '\\' or pseudomatch is None):
@@ -512,12 +512,12 @@ def tokenize_lines(
            if not pseudomatch:  # scan for tokens
                match = whitespace.match(line, pos)
                if new_line and paren_level == 0 and not fstring_stack:
-                    yield from dedent_if_necessary(match.end())
-                pos = match.end()
+                    yield from dedent_if_necessary(match.end())  # type: ignore[union-attr]
+                pos = match.end()  # type: ignore[union-attr]
                new_line = False
                yield PythonToken(
                    ERRORTOKEN, line[pos], (lnum, pos),
-                    additional_prefix + match.group(0)
+                    additional_prefix + match.group(0)  # type: ignore[union-attr]
                )
                additional_prefix = ''
                pos += 1
@@ -586,7 +586,7 @@ def tokenize_lines(
                    # backslash and is continued.
                    contstr_start = lnum, start
                    endprog = (endpats.get(initial) or endpats.get(token[1])
-                               or endpats.get(token[2]))
+                               or endpats.get(token[2]))  # type: ignore[assignment]
                    contstr = line[start:]
                    contline = line
                    break
--- a/parso/python/tree.py
+++ b/parso/python/tree.py
@@ -43,11 +43,8 @@ Parser Tree Classes
 """

 import re
-try:
-    from collections.abc import Mapping
-except ImportError:
-    from collections import Mapping
-from typing import Tuple
+from collections.abc import Mapping
+from typing import Tuple, Any

 from parso.tree import Node, BaseNode, Leaf, ErrorNode, ErrorLeaf, search_ancestor  # noqa
 from parso.python.prefix import split_prefix
@@ -70,6 +67,9 @@ _IMPORTS = set(['import_name', 'import_from'])

 class DocstringMixin:
    __slots__ = ()
+    type: str
+    children: "list[Any]"
+    parent: Any

    def get_doc_node(self):
        """
@@ -101,6 +101,7 @@ class PythonMixin:
    Some Python specific utilities.
    """
    __slots__ = ()
+    children: "list[Any]"

    def get_name_of_position(self, position):
        """
@@ -219,7 +220,7 @@ class Name(_LeafWithoutNewlines):
        type_ = node.type

        if type_ in ('funcdef', 'classdef'):
-            if self == node.name:
+            if self == node.name:  # type: ignore[union-attr]
                return node
            return None

@@ -232,7 +233,7 @@ class Name(_LeafWithoutNewlines):
            if node.type == 'suite':
                return None
            if node.type in _GET_DEFINITION_TYPES:
-                if self in node.get_defined_names(include_setitem):
+                if self in node.get_defined_names(include_setitem):  # type: ignore[attr-defined]
                    return node
                if import_name_always and node.type in _IMPORTS:
                    return node
@@ -296,6 +297,7 @@ class FStringEnd(PythonLeaf):

 class _StringComparisonMixin:
    __slots__ = ()
+    value: Any

    def __eq__(self, other):
        """
@@ -368,7 +370,7 @@ class Scope(PythonBaseNode, DocstringMixin):

    def __repr__(self):
        try:
-            name = self.name.value
+            name = self.name.value  # type: ignore[attr-defined]
        except AttributeError:
            name = ''

@@ -794,6 +796,8 @@ class WithStmt(Flow):

 class Import(PythonBaseNode):
    __slots__ = ()
+    get_paths: Any
+    _aliases: Any

    def get_path_for_name(self, name):
        """
@@ -818,6 +822,9 @@ class Import(PythonBaseNode):
    def is_star_import(self):
        return self.children[-1] == '*'

+    def get_defined_names(self):
+        raise NotImplementedError("Use ImportFrom or ImportName")
+

 class ImportFrom(Import):
    type = 'import_from'
--- a/parso/tree.py
+++ b/parso/tree.py
@@ -14,12 +14,7 @@ def search_ancestor(node: 'NodeOrLeaf', *node_types: str) -> 'Optional[BaseNode]
    :param node: The ancestors of this node will be checked.
    :param node_types: type names that are searched for.
    """
-    n = node.parent
-    while n is not None:
-        if n.type in node_types:
-            return n
-        n = n.parent
-    return None
+    return node.search_ancestor(*node_types)


 class NodeOrLeaf:
@@ -371,7 +366,7 @@ class BaseNode(NodeOrLeaf):
    """
    __slots__ = ('children',)

-    def __init__(self, children: List[NodeOrLeaf]) -> None:
+    def __init__(self, children) -> None:
        self.children = children
        """
        A list of :class:`NodeOrLeaf` child nodes.
--- a/parso/utils.py
+++ b/parso/utils.py
@@ -2,7 +2,7 @@ import re
 import sys
 from ast import literal_eval
 from functools import total_ordering
-from typing import NamedTuple, Sequence, Union
+from typing import NamedTuple, Union

 # The following is a list in Python that are line breaks in str.splitlines, but
 # not in Python. In Python only \r (Carriage Return, 0xD) and \n (Line Feed,
@@ -26,7 +26,7 @@ class Version(NamedTuple):
    micro: int


-def split_lines(string: str, keepends: bool = False) -> Sequence[str]:
+def split_lines(string: str, keepends: bool = False) -> "list[str]":
    r"""
    Intended for Python code. In contrast to Python's :py:meth:`str.splitlines`,
    looks at form feeds and other special characters as normal text. Just
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,14 @@
+[tool.zuban]
+enable_error_code = ["ignore-without-code"]
+
+disallow_subclassing_any = true
+
+# Avoid creating future gotchas emerging from bad typing
+warn_redundant_casts = true
+warn_unused_ignores = true
+warn_unused_configs = true
+warn_unreachable = true
+
+strict_equality = true
+implicit_optional = true
+exclude = "^test/normalizer_issue_files"
--- a/scripts/diff_parser_profile.py
+++ b/scripts/diff_parser_profile.py
@@ -15,11 +15,11 @@ Options:
 import cProfile

 from docopt import docopt
-from jedi.parser.python import load_grammar
-from jedi.parser.diff import DiffParser
-from jedi.parser.python import ParserWithRecovery
-from jedi.common import splitlines
-import jedi
+from jedi.parser.python import load_grammar  # type: ignore[import-not-found]
+from jedi.parser.diff import DiffParser  # type: ignore[import-not-found]
+from jedi.parser.python import ParserWithRecovery  # type: ignore[import-not-found]
+from jedi.common import splitlines  # type: ignore[import-not-found]
+import jedi  # type: ignore[import-not-found]


 def run(parser, lines):
--- a/setup.cfg
+++ b/setup.cfg
@@ -10,20 +10,3 @@ ignore =
  E226,
  # line break before binary operator
  W503,
-
-
-[mypy]
-show_error_codes = true
-enable_error_code = ignore-without-code
-
-disallow_subclassing_any = True
-
-# Avoid creating future gotchas emerging from bad typing
-warn_redundant_casts = True
-warn_unused_ignores = True
-warn_return_any = True
-warn_unused_configs = True
-warn_unreachable = True
-
-strict_equality = True
-no_implicit_optional = False
--- a/setup.py
+++ b/setup.py
@@ -58,9 +58,8 @@ setup(
        'qa': [
            # Latest version which supports Python 3.6
            'flake8==5.0.4',
-            # Latest version which supports Python 3.6
-            'mypy==0.971',
            # Arbitrary pins, latest at the time of pinning
+            'zuban==0.5.1',
            'types-setuptools==67.2.0.1',
        ],
    },
--- a/test/test_load_grammar.py
+++ b/test/test_load_grammar.py
@@ -33,4 +33,4 @@ def test_invalid_grammar_version(string):

 def test_grammar_int_version():
    with pytest.raises(TypeError):
-        load_grammar(version=3.8)
+        load_grammar(version=3.8)  # type: ignore
--- a/test/test_parser.py
+++ b/test/test_parser.py
@@ -117,7 +117,7 @@ def test_param_splitting(each_version):


 def test_unicode_string():
-    s = tree.String(None, 'bö', (0, 0))
+    s = tree.String('bö', (0, 0))
    assert repr(s)  # Should not raise an Error!