codecs: add missing function, reflect runtime import * (#7918)

2026-03-13 10:20:42 +08:00 · 2022-05-22 16:06:21 -07:00
parent 18433202ba
commit 9660ee97ee
5 changed files with 73 additions and 64 deletions
--- a/stdlib/_codecs.pyi
+++ b/stdlib/_codecs.pyi
@@ -1,22 +1,71 @@
 import codecs
 import sys
 from collections.abc import Callable
-from typing import Any
-from typing_extensions import TypeAlias
+from typing import overload
+from typing_extensions import Literal, TypeAlias

 # This type is not exposed; it is defined in unicodeobject.c
 class _EncodingMap:
    def size(self) -> int: ...

 _MapT: TypeAlias = dict[int, int] | _EncodingMap
-_Handler: TypeAlias = Callable[[Exception], tuple[str, int]]
+_Handler: TypeAlias = Callable[[UnicodeError], tuple[str | bytes, int]]
+_SearchFunction: TypeAlias = Callable[[str], codecs.CodecInfo | None]
+
+def register(__search_function: _SearchFunction) -> None: ...
+
+if sys.version_info >= (3, 10):
+    def unregister(__search_function: _SearchFunction) -> None: ...

-def register(__search_function: Callable[[str], Any]) -> None: ...
 def register_error(__errors: str, __handler: _Handler) -> None: ...
-def lookup(__encoding: str) -> codecs.CodecInfo: ...
 def lookup_error(__name: str) -> _Handler: ...
-def decode(obj: Any, encoding: str = ..., errors: str | None = ...) -> Any: ...
-def encode(obj: Any, encoding: str = ..., errors: str | None = ...) -> Any: ...
+
+# The type ignore on `encode` and `decode` is to avoid issues with overlapping overloads, for more details, see #300
+# https://docs.python.org/3/library/codecs.html#binary-transforms
+_BytesToBytesEncoding: TypeAlias = Literal[
+    "base64",
+    "base_64",
+    "base64_codec",
+    "bz2",
+    "bz2_codec",
+    "hex",
+    "hex_codec",
+    "quopri",
+    "quotedprintable",
+    "quoted_printable",
+    "quopri_codec",
+    "uu",
+    "uu_codec",
+    "zip",
+    "zlib",
+    "zlib_codec",
+]
+# https://docs.python.org/3/library/codecs.html#text-transforms
+_StrToStrEncoding: TypeAlias = Literal["rot13", "rot_13"]
+
+@overload
+def encode(obj: bytes, encoding: _BytesToBytesEncoding, errors: str = ...) -> bytes: ...
+@overload
+def encode(obj: str, encoding: _StrToStrEncoding, errors: str = ...) -> str: ...  # type: ignore[misc]
+@overload
+def encode(obj: str, encoding: str = ..., errors: str = ...) -> bytes: ...
+@overload
+def decode(obj: bytes, encoding: _BytesToBytesEncoding, errors: str = ...) -> bytes: ...  # type: ignore[misc]
+@overload
+def decode(obj: str, encoding: _StrToStrEncoding, errors: str = ...) -> str: ...
+
+# these are documented as text encodings but in practice they also accept str as input
+@overload
+def decode(
+    obj: str, encoding: Literal["unicode_escape", "unicode-escape", "raw_unicode_escape", "raw-unicode-escape"], errors: str = ...
+) -> str: ...
+
+# hex is officially documented as a bytes to bytes encoding, but it appears to also work with str
+@overload
+def decode(obj: str, encoding: Literal["hex", "hex_codec"], errors: str = ...) -> bytes: ...
+@overload
+def decode(obj: bytes, encoding: str = ..., errors: str = ...) -> str: ...
+def lookup(__encoding: str) -> codecs.CodecInfo: ...
 def charmap_build(__map: str) -> _MapT: ...
 def ascii_decode(__data: bytes, __errors: str | None = ...) -> tuple[str, int]: ...
 def ascii_encode(__str: str, __errors: str | None = ...) -> tuple[bytes, int]: ...
--- a/stdlib/codecs.pyi
+++ b/stdlib/codecs.pyi
@@ -1,10 +1,11 @@
-import sys
 import types
 from _typeshed import Self
 from abc import abstractmethod
 from collections.abc import Callable, Generator, Iterable
-from typing import Any, BinaryIO, Protocol, TextIO, overload
-from typing_extensions import Literal, TypeAlias
+from typing import Any, BinaryIO, Protocol, TextIO
+from typing_extensions import Literal
+
+from _codecs import *

 __all__ = [
    "register",
@@ -94,49 +95,6 @@ class _IncrementalEncoder(Protocol):
 class _IncrementalDecoder(Protocol):
    def __call__(self, errors: str = ...) -> IncrementalDecoder: ...

-# The type ignore on `encode` and `decode` is to avoid issues with overlapping overloads, for more details, see #300
-# https://docs.python.org/3/library/codecs.html#binary-transforms
-_BytesToBytesEncoding: TypeAlias = Literal[
-    "base64",
-    "base_64",
-    "base64_codec",
-    "bz2",
-    "bz2_codec",
-    "hex",
-    "hex_codec",
-    "quopri",
-    "quotedprintable",
-    "quoted_printable",
-    "quopri_codec",
-    "uu",
-    "uu_codec",
-    "zip",
-    "zlib",
-    "zlib_codec",
-]
-# https://docs.python.org/3/library/codecs.html#text-transforms
-_StrToStrEncoding: TypeAlias = Literal["rot13", "rot_13"]
-
-@overload
-def encode(obj: bytes, encoding: _BytesToBytesEncoding, errors: str = ...) -> bytes: ...
-@overload
-def encode(obj: str, encoding: _StrToStrEncoding, errors: str = ...) -> str: ...  # type: ignore[misc]
-@overload
-def encode(obj: str, encoding: str = ..., errors: str = ...) -> bytes: ...
-@overload
-def decode(obj: bytes, encoding: _BytesToBytesEncoding, errors: str = ...) -> bytes: ...  # type: ignore[misc]
-@overload
-def decode(obj: str, encoding: _StrToStrEncoding, errors: str = ...) -> str: ...
-
-# hex is officially documented as a bytes to bytes encoding, but it appears to also work with str
-@overload
-def decode(obj: str, encoding: Literal["hex", "hex_codec"], errors: str = ...) -> bytes: ...
-@overload
-def decode(obj: bytes, encoding: str = ..., errors: str = ...) -> str: ...
-def lookup(__encoding: str) -> CodecInfo: ...
-def utf_16_be_decode(__data: bytes, __errors: str | None = ..., __final: bool = ...) -> tuple[str, int]: ...  # undocumented
-def utf_16_be_encode(__str: str, __errors: str | None = ...) -> tuple[bytes, int]: ...  # undocumented
-
 class CodecInfo(tuple[_Encoder, _Decoder, _StreamReader, _StreamWriter]):
    @property
    def encode(self) -> _Encoder: ...
@@ -170,7 +128,6 @@ def getincrementalencoder(encoding: str) -> _IncrementalEncoder: ...
 def getincrementaldecoder(encoding: str) -> _IncrementalDecoder: ...
 def getreader(encoding: str) -> _StreamReader: ...
 def getwriter(encoding: str) -> _StreamWriter: ...
-def register(__search_function: Callable[[str], CodecInfo | None]) -> None: ...
 def open(
    filename: str, mode: str = ..., encoding: str | None = ..., errors: str = ..., buffering: int = ...
 ) -> StreamReaderWriter: ...
@@ -178,9 +135,6 @@ def EncodedFile(file: _Stream, data_encoding: str, file_encoding: str | None = .
 def iterencode(iterator: Iterable[str], encoding: str, errors: str = ...) -> Generator[bytes, None, None]: ...
 def iterdecode(iterator: Iterable[bytes], encoding: str, errors: str = ...) -> Generator[str, None, None]: ...

-if sys.version_info >= (3, 10):
-    def unregister(__search_function: Callable[[str], CodecInfo | None]) -> None: ...
-
 BOM: Literal[b"\xff\xfe", b"\xfe\xff"]  # depends on `sys.byteorder`
 BOM_BE: Literal[b"\xfe\xff"]
 BOM_LE: Literal[b"\xff\xfe"]
@@ -192,11 +146,6 @@ BOM_UTF32: Literal[b"\xff\xfe\x00\x00", b"\x00\x00\xfe\xff"]  # depends on `sys.
 BOM_UTF32_BE: Literal[b"\x00\x00\xfe\xff"]
 BOM_UTF32_LE: Literal[b"\xff\xfe\x00\x00"]

-# It is expected that different actions be taken depending on which of the
-# three subclasses of `UnicodeError` is actually ...ed. However, the Union
-# is still needed for at least one of the cases.
-def register_error(__errors: str, __handler: Callable[[UnicodeError], tuple[str | bytes, int]]) -> None: ...
-def lookup_error(__name: str) -> Callable[[UnicodeError], tuple[str | bytes, int]]: ...
 def strict_errors(exception: UnicodeError) -> tuple[str | bytes, int]: ...
 def replace_errors(exception: UnicodeError) -> tuple[str | bytes, int]: ...
 def ignore_errors(exception: UnicodeError) -> tuple[str | bytes, int]: ...
--- a/test_cases/stdlib/test_codecs.py
+++ b/test_cases/stdlib/test_codecs.py
@@ -0,0 +1,13 @@
+# pyright: reportUnnecessaryTypeIgnoreComment=true
+
+import codecs
+from typing_extensions import assert_type
+
+assert_type(codecs.decode("x", "unicode-escape"), str)
+assert_type(codecs.decode(b"x", "unicode-escape"), str)
+
+assert_type(codecs.decode(b"x", "utf-8"), str)
+codecs.decode("x", "utf-8")  # type: ignore[call-overload]
+
+assert_type(codecs.decode("ab", "hex"), bytes)
+assert_type(codecs.decode(b"ab", "hex"), bytes)
--- a/tests/stubtest_allowlists/py310.txt
+++ b/tests/stubtest_allowlists/py310.txt
@@ -67,7 +67,6 @@ tempfile.SpooledTemporaryFile.seekable
 tempfile.SpooledTemporaryFile.writable

 # Exists at runtime, but missing from stubs
-_codecs.unregister
 _collections_abc.AsyncIterable.__class_getitem__
 _collections_abc.Awaitable.__class_getitem__
 _collections_abc.Container.__class_getitem__
--- a/tests/stubtest_allowlists/py311.txt
+++ b/tests/stubtest_allowlists/py311.txt
@@ -1,4 +1,3 @@
-_codecs.unregister
 _collections_abc.AsyncGenerator.ag_await
 _collections_abc.AsyncGenerator.ag_code
 _collections_abc.AsyncGenerator.ag_frame