Update for beautifulsoup4 for version 4.10 (#6059)

Tighten types and add missing fields
2026-02-11 04:01:29 +08:00 · 2021-09-22 17:42:42 +02:00
parent 67e8979ac1
commit 2237daa650
5 changed files with 62 additions and 39 deletions
--- a/stubs/beautifulsoup4/@tests/stubtest_allowlist.txt
+++ b/stubs/beautifulsoup4/@tests/stubtest_allowlist.txt
@@ -1 +0,0 @@
-bs4.element.NavigableString.string  # created through __getattr__
--- a/stubs/beautifulsoup4/METADATA.toml
+++ b/stubs/beautifulsoup4/METADATA.toml
@@ -1 +1 @@
-version = "4.9"
+version = "4.10"
--- a/stubs/beautifulsoup4/bs4/dammit.pyi
+++ b/stubs/beautifulsoup4/bs4/dammit.pyi
@@ -1,4 +1,7 @@
-from typing import Any
+from collections.abc import Iterable, Iterator
+from logging import Logger
+from typing import Any, Tuple
+from typing_extensions import Literal

 chardet_type: Any

@@ -25,44 +28,60 @@ class EntitySubstitution:
    def substitute_html(cls, s): ...

 class EncodingDetector:
-    override_encodings: Any
-    exclude_encodings: Any
-    chardet_encoding: Any
-    is_html: Any
-    declared_encoding: Any
+    known_definite_encodings: list[str]
+    user_encodings: list[str]
+    exclude_encodings: set[str]
+    chardet_encoding: Any | None
+    is_html: bool
+    declared_encoding: str | None
+    markup: Any
+    sniffed_encoding: str | None
    def __init__(
-        self, markup, override_encodings: Any | None = ..., is_html: bool = ..., exclude_encodings: Any | None = ...
+        self,
+        markup,
+        known_definite_encodings: Iterable[str] | None = ...,
+        is_html: bool = ...,
+        exclude_encodings: list[str] | None = ...,
+        user_encodings: list[str] | None = ...,
+        override_encodings: list[str] | None = ...,
    ) -> None: ...
    @property
-    def encodings(self) -> None: ...
+    def encodings(self) -> Iterator[str]: ...
    @classmethod
    def strip_byte_order_mark(cls, data): ...
    @classmethod
-    def find_declared_encoding(cls, markup, is_html: bool = ..., search_entire_document: bool = ...): ...
+    def find_declared_encoding(cls, markup, is_html: bool = ..., search_entire_document: bool = ...) -> str | None: ...

 class UnicodeDammit:
-    CHARSET_ALIASES: Any
-    ENCODINGS_WITH_SMART_QUOTES: Any
-    smart_quotes_to: Any
-    tried_encodings: Any
+    CHARSET_ALIASES: dict[str, str]
+    ENCODINGS_WITH_SMART_QUOTES: list[str]
+    smart_quotes_to: Literal["ascii", "xml", "html"] | None
+    tried_encodings: list[tuple[str, str]]
    contains_replacement_characters: bool
-    is_html: Any
-    log: Any
-    detector: Any
+    is_html: bool
+    log: Logger
+    detector: EncodingDetector
    markup: Any
-    unicode_markup: Any
-    original_encoding: Any
+    unicode_markup: str
+    original_encoding: Any | None
    def __init__(
-        self, markup, override_encodings=..., smart_quotes_to: Any | None = ..., is_html: bool = ..., exclude_encodings=...
+        self,
+        markup,
+        known_definite_encodings: list[str] | None = ...,
+        smart_quotes_to: Literal["ascii", "xml", "html"] | None = ...,
+        is_html: bool = ...,
+        exclude_encodings: list[str] | None = ...,
+        user_encodings: list[str] | None = ...,
+        override_encodings: list[str] | None = ...,
    ) -> None: ...
    @property
-    def declared_html_encoding(self): ...
-    def find_codec(self, charset): ...
-    MS_CHARS: Any
-    MS_CHARS_TO_ASCII: Any
-    WINDOWS_1252_TO_UTF8: Any
-    MULTIBYTE_MARKERS_AND_SIZES: Any
-    FIRST_MULTIBYTE_MARKER: Any
-    LAST_MULTIBYTE_MARKER: Any
+    def declared_html_encoding(self) -> str | None: ...
+    def find_codec(self, charset: str) -> str | None: ...
+    MS_CHARS: dict[bytes, str | Tuple[str, ...]]
+    MS_CHARS_TO_ASCII: dict[bytes, str]
+    WINDOWS_1252_TO_UTF8: dict[int, bytes]
+    MULTIBYTE_MARKERS_AND_SIZES: list[tuple[int, int, int]]
+    FIRST_MULTIBYTE_MARKER: int
+    LAST_MULTIBYTE_MARKER: int
    @classmethod
-    def detwingle(cls, in_bytes, main_encoding: str = ..., embedded_encoding: str = ...): ...
+    def detwingle(cls, in_bytes: bytes, main_encoding: str = ..., embedded_encoding: str = ...) -> bytes: ...
--- a/stubs/beautifulsoup4/bs4/element.pyi
+++ b/stubs/beautifulsoup4/bs4/element.pyi
@@ -1,4 +1,5 @@
 from _typeshed import Self
+from collections.abc import Iterator
 from typing import Any, Callable, Generic, Iterable, List, Mapping, Pattern, Set, Tuple, Type, TypeVar, Union, overload

 from . import BeautifulSoup
@@ -50,7 +51,13 @@ class PageElement:
    def formatter_for_name(self, formatter: Formatter | str | _EntitySubstitution): ...
    nextSibling: PageElement | None
    previousSibling: PageElement | None
-    def replace_with(self: Self, replace_with: PageElement | str) -> Self: ...
+    @property
+    def stripped_strings(self) -> Iterator[str]: ...
+    def get_text(self, separator: str = ..., strip: bool = ..., types: Tuple[Type[NavigableString], ...] = ...) -> str: ...
+    getText = get_text
+    @property
+    def text(self) -> str: ...
+    def replace_with(self: Self, *args: PageElement | str) -> Self: ...
    replaceWith = replace_with
    def unwrap(self: Self) -> Self: ...
    replace_with_children = unwrap
@@ -178,11 +185,11 @@ class NavigableString(str, PageElement):
    def __new__(cls: Type[Self], value: str | bytes) -> Self: ...
    def __copy__(self: Self) -> Self: ...
    def __getnewargs__(self) -> tuple[str]: ...
-    @property
-    def string(self) -> str: ...
    def output_ready(self, formatter: Formatter | str | None = ...) -> str: ...
    @property
    def name(self) -> None: ...
+    @property
+    def strings(self) -> Iterable[str]: ...

 class PreformattedString(NavigableString):
    PREFIX: str
@@ -249,6 +256,7 @@ class Tag(PageElement):
        can_be_empty_element: bool | None = ...,
        cdata_list_attributes: list[str] | None = ...,
        preserve_whitespace_tags: list[str] | None = ...,
+        interesting_string_types: Type[NavigableString] | Tuple[Type[NavigableString], ...] | None = ...,
    ) -> None: ...
    parserClass: Type[BeautifulSoup] | None
    def __copy__(self: Self) -> Self: ...
@@ -259,14 +267,9 @@ class Tag(PageElement):
    def string(self) -> str | None: ...
    @string.setter
    def string(self, string: str) -> None: ...
+    DEFAULT_INTERESTING_STRING_TYPES: Tuple[Type[NavigableString], ...]
    @property
    def strings(self) -> Iterable[str]: ...
-    @property
-    def stripped_strings(self) -> Iterable[str]: ...
-    def get_text(self, separator: str = ..., strip: bool = ..., types: Tuple[Type[NavigableString], ...] = ...) -> str: ...
-    getText = get_text
-    @property
-    def text(self) -> str: ...
    def decompose(self) -> None: ...
    def clear(self, decompose: bool = ...) -> None: ...
    def smooth(self) -> None: ...
--- a/stubs/beautifulsoup4/bs4/formatter.pyi
+++ b/stubs/beautifulsoup4/bs4/formatter.pyi
@@ -12,12 +12,14 @@ class Formatter(EntitySubstitution):
    entity_substitution: _EntitySubstitution
    void_element_close_prefix: str
    cdata_containing_tags: list[str]
+    empty_attributes_are_booleans: bool
    def __init__(
        self,
        language: str | None = ...,
        entity_substitution: _EntitySubstitution | None = ...,
        void_element_close_prefix: str = ...,
        cdata_containing_tags: list[str] | None = ...,
+        empty_attributes_are_booleans: bool = ...,
    ) -> None: ...
    def substitute(self, ns: str) -> str: ...
    def attribute_value(self, value: str) -> str: ...
				`@@ -1 +0,0 @@`
				`bs4.element.NavigableString.string # created through __getattr__`