From 7f320c6b9e148fb0ffc8e4e98ff207bb2fb378b7 Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Sun, 11 Jul 2021 07:02:51 -0700 Subject: [PATCH] Stubs for beautifulsoup4 (#5758) Stubgen and manual fixes, don't bother with Python 2 --- pyrightconfig.stricter.json | 3 +- .../@tests/requirements-stubtest.txt | 2 + .../@tests/stubtest_allowlist.txt | 1 + stubs/beautifulsoup4/METADATA.toml | 1 + stubs/beautifulsoup4/bs4/__init__.pyi | 67 ++++ stubs/beautifulsoup4/bs4/builder/__init__.pyi | 66 ++++ .../beautifulsoup4/bs4/builder/_html5lib.pyi | 67 ++++ .../bs4/builder/_htmlparser.pyi | 41 ++ stubs/beautifulsoup4/bs4/builder/_lxml.pyi | 49 +++ stubs/beautifulsoup4/bs4/dammit.pyi | 68 ++++ stubs/beautifulsoup4/bs4/diagnose.pyi | 22 ++ stubs/beautifulsoup4/bs4/element.pyi | 350 ++++++++++++++++++ stubs/beautifulsoup4/bs4/formatter.pyi | 42 +++ 13 files changed, 778 insertions(+), 1 deletion(-) create mode 100644 stubs/beautifulsoup4/@tests/requirements-stubtest.txt create mode 100644 stubs/beautifulsoup4/@tests/stubtest_allowlist.txt create mode 100644 stubs/beautifulsoup4/METADATA.toml create mode 100644 stubs/beautifulsoup4/bs4/__init__.pyi create mode 100644 stubs/beautifulsoup4/bs4/builder/__init__.pyi create mode 100644 stubs/beautifulsoup4/bs4/builder/_html5lib.pyi create mode 100644 stubs/beautifulsoup4/bs4/builder/_htmlparser.pyi create mode 100644 stubs/beautifulsoup4/bs4/builder/_lxml.pyi create mode 100644 stubs/beautifulsoup4/bs4/dammit.pyi create mode 100644 stubs/beautifulsoup4/bs4/diagnose.pyi create mode 100644 stubs/beautifulsoup4/bs4/element.pyi create mode 100644 stubs/beautifulsoup4/bs4/formatter.pyi diff --git a/pyrightconfig.stricter.json b/pyrightconfig.stricter.json index 3950e38cb..ff204d696 100644 --- a/pyrightconfig.stricter.json +++ b/pyrightconfig.stricter.json @@ -49,7 +49,8 @@ "stubs/simplejson", "stubs/vobject", "stubs/waitress", - "stubs/Werkzeug" + "stubs/Werkzeug", + "stubs/beautifulsoup4", ], "typeCheckingMode": "basic", "strictListInference": true, diff --git a/stubs/beautifulsoup4/@tests/requirements-stubtest.txt b/stubs/beautifulsoup4/@tests/requirements-stubtest.txt new file mode 100644 index 000000000..5ed6cb710 --- /dev/null +++ b/stubs/beautifulsoup4/@tests/requirements-stubtest.txt @@ -0,0 +1,2 @@ +html5lib +lxml diff --git a/stubs/beautifulsoup4/@tests/stubtest_allowlist.txt b/stubs/beautifulsoup4/@tests/stubtest_allowlist.txt new file mode 100644 index 000000000..59bba66fd --- /dev/null +++ b/stubs/beautifulsoup4/@tests/stubtest_allowlist.txt @@ -0,0 +1 @@ +bs4.element.NavigableString.string # created through __getattr__ diff --git a/stubs/beautifulsoup4/METADATA.toml b/stubs/beautifulsoup4/METADATA.toml new file mode 100644 index 000000000..ff50f6cbe --- /dev/null +++ b/stubs/beautifulsoup4/METADATA.toml @@ -0,0 +1 @@ +version = "4.9" diff --git a/stubs/beautifulsoup4/bs4/__init__.pyi b/stubs/beautifulsoup4/bs4/__init__.pyi new file mode 100644 index 000000000..d920674a9 --- /dev/null +++ b/stubs/beautifulsoup4/bs4/__init__.pyi @@ -0,0 +1,67 @@ +from _typeshed import Self, SupportsRead +from typing import Any, List, Sequence, Type + +from .builder import TreeBuilder +from .element import PageElement, SoupStrainer, Tag +from .formatter import Formatter + +class GuessedAtParserWarning(UserWarning): ... +class MarkupResemblesLocatorWarning(UserWarning): ... + +class BeautifulSoup(Tag): + ROOT_TAG_NAME: str + DEFAULT_BUILDER_FEATURES: List[str] + ASCII_SPACES: str + NO_PARSER_SPECIFIED_WARNING: str + element_classes: Any + builder: TreeBuilder + is_xml: bool + known_xml: bool + parse_only: SoupStrainer | None + markup: str + def __init__( + self, + markup: str | bytes | SupportsRead[str] | SupportsRead[bytes] = ..., + features: str | Sequence[str] | None = ..., + builder: TreeBuilder | Type[TreeBuilder] | None = ..., + parse_only: SoupStrainer | None = ..., + from_encoding: str | None = ..., + exclude_encodings: Sequence[str] | None = ..., + element_classes: dict[Type[PageElement], Type[Any]] | None = ..., + **kwargs, + ) -> None: ... + def __copy__(self: Self) -> Self: ... + hidden: bool + current_data: Any + currentTag: Any + tagStack: Any + open_tag_counter: Any + preserve_whitespace_tag_stack: Any + string_container_stack: Any + def reset(self) -> None: ... + def new_tag( + self, + name, + namespace: Any | None = ..., + nsprefix: Any | None = ..., + attrs=..., + sourceline: Any | None = ..., + sourcepos: Any | None = ..., + **kwattrs, + ) -> Tag: ... + def string_container(self, base_class: Any | None = ...): ... + def new_string(self, s, subclass: Any | None = ...): ... + def insert_before(self, *args) -> None: ... + def insert_after(self, *args) -> None: ... + def popTag(self): ... + def pushTag(self, tag) -> None: ... + def endData(self, containerClass: Any | None = ...) -> None: ... + def object_was_parsed(self, o, parent: Any | None = ..., most_recent_element: Any | None = ...) -> None: ... + def handle_starttag(self, name, namespace, nsprefix, attrs, sourceline: Any | None = ..., sourcepos: Any | None = ...): ... + def handle_endtag(self, name, nsprefix: Any | None = ...) -> None: ... + def handle_data(self, data) -> None: ... + def decode(self, pretty_print: bool = ..., eventual_encoding: str = ..., formatter: str | Formatter = ...): ... # type: ignore # missing some arguments + +class BeautifulStoneSoup(BeautifulSoup): ... +class StopParsing(Exception): ... +class FeatureNotFound(ValueError): ... diff --git a/stubs/beautifulsoup4/bs4/builder/__init__.pyi b/stubs/beautifulsoup4/bs4/builder/__init__.pyi new file mode 100644 index 000000000..daa6a9722 --- /dev/null +++ b/stubs/beautifulsoup4/bs4/builder/__init__.pyi @@ -0,0 +1,66 @@ +from typing import Any + +class TreeBuilderRegistry: + builders_for_feature: Any + builders: Any + def __init__(self) -> None: ... + def register(self, treebuilder_class) -> None: ... + def lookup(self, *features): ... + +class TreeBuilder: + NAME: str + ALTERNATE_NAMES: Any + features: Any + is_xml: bool + picklable: bool + empty_element_tags: Any + DEFAULT_CDATA_LIST_ATTRIBUTES: Any + DEFAULT_PRESERVE_WHITESPACE_TAGS: Any + DEFAULT_STRING_CONTAINERS: Any + USE_DEFAULT: Any + TRACKS_LINE_NUMBERS: bool + soup: Any + cdata_list_attributes: Any + preserve_whitespace_tags: Any + store_line_numbers: Any + string_containers: Any + def __init__( + self, multi_valued_attributes=..., preserve_whitespace_tags=..., store_line_numbers=..., string_containers=... + ) -> None: ... + def initialize_soup(self, soup) -> None: ... + def reset(self) -> None: ... + def can_be_empty_element(self, tag_name): ... + def feed(self, markup) -> None: ... + def prepare_markup( + self, + markup, + user_specified_encoding: Any | None = ..., + document_declared_encoding: Any | None = ..., + exclude_encodings: Any | None = ..., + ) -> None: ... + def test_fragment_to_document(self, fragment): ... + def set_up_substitutions(self, tag): ... + +class SAXTreeBuilder(TreeBuilder): + def feed(self, markup) -> None: ... + def close(self) -> None: ... + def startElement(self, name, attrs) -> None: ... + def endElement(self, name) -> None: ... + def startElementNS(self, nsTuple, nodeName, attrs) -> None: ... + def endElementNS(self, nsTuple, nodeName) -> None: ... + def startPrefixMapping(self, prefix, nodeValue) -> None: ... + def endPrefixMapping(self, prefix) -> None: ... + def characters(self, content) -> None: ... + def startDocument(self) -> None: ... + def endDocument(self) -> None: ... + +class HTMLTreeBuilder(TreeBuilder): + empty_element_tags: Any + block_elements: Any + DEFAULT_STRING_CONTAINERS: Any + DEFAULT_CDATA_LIST_ATTRIBUTES: Any + DEFAULT_PRESERVE_WHITESPACE_TAGS: Any + def set_up_substitutions(self, tag): ... + +class ParserRejectedMarkup(Exception): + def __init__(self, message_or_exception) -> None: ... diff --git a/stubs/beautifulsoup4/bs4/builder/_html5lib.pyi b/stubs/beautifulsoup4/bs4/builder/_html5lib.pyi new file mode 100644 index 000000000..990d45060 --- /dev/null +++ b/stubs/beautifulsoup4/bs4/builder/_html5lib.pyi @@ -0,0 +1,67 @@ +from typing import Any + +from bs4.builder import HTMLTreeBuilder + +class HTML5TreeBuilder(HTMLTreeBuilder): + NAME: str + features: Any + TRACKS_LINE_NUMBERS: bool + user_specified_encoding: Any + def prepare_markup( # type: ignore # user_specified_encoding doesn't have a default + self, markup, user_specified_encoding, document_declared_encoding: Any | None = ..., exclude_encodings: Any | None = ... + ) -> None: ... + def feed(self, markup) -> None: ... + underlying_builder: Any + def create_treebuilder(self, namespaceHTMLElements): ... + def test_fragment_to_document(self, fragment): ... + +class TreeBuilderForHtml5lib(Any): # type: ignore # html5lib.treebuilders.base.TreeBuilder + soup: Any + parser: Any + store_line_numbers: Any + def __init__(self, namespaceHTMLElements, soup: Any | None = ..., store_line_numbers: bool = ..., **kwargs) -> None: ... + def documentClass(self): ... + def insertDoctype(self, token) -> None: ... + def elementClass(self, name, namespace): ... + def commentClass(self, data): ... + def fragmentClass(self): ... + def appendChild(self, node) -> None: ... + def getDocument(self): ... + def getFragment(self): ... + def testSerializer(self, element): ... + +class AttrList: + element: Any + attrs: Any + def __init__(self, element) -> None: ... + def __iter__(self): ... + def __setitem__(self, name, value) -> None: ... + def items(self): ... + def keys(self): ... + def __len__(self): ... + def __getitem__(self, name): ... + def __contains__(self, name): ... + +class Element(Any): # type: ignore # html5lib.treebuilders.base.Node + element: Any + soup: Any + namespace: Any + def __init__(self, element, soup, namespace) -> None: ... + def appendChild(self, node) -> None: ... + def getAttributes(self): ... + def setAttributes(self, attributes) -> None: ... + attributes: Any + def insertText(self, data, insertBefore: Any | None = ...) -> None: ... + def insertBefore(self, node, refNode) -> None: ... + def removeChild(self, node) -> None: ... + def reparentChildren(self, new_parent) -> None: ... + def cloneNode(self): ... + def hasContent(self): ... + def getNameTuple(self): ... + nameTuple: Any + +class TextNode(Element): + element: Any + soup: Any + def __init__(self, element, soup) -> None: ... + def cloneNode(self) -> None: ... diff --git a/stubs/beautifulsoup4/bs4/builder/_htmlparser.pyi b/stubs/beautifulsoup4/bs4/builder/_htmlparser.pyi new file mode 100644 index 000000000..c990e7890 --- /dev/null +++ b/stubs/beautifulsoup4/bs4/builder/_htmlparser.pyi @@ -0,0 +1,41 @@ +from html.parser import HTMLParser +from typing import Any + +from bs4.builder import HTMLTreeBuilder + +class HTMLParseError(Exception): ... + +class BeautifulSoupHTMLParser(HTMLParser): + IGNORE: str + REPLACE: str + on_duplicate_attribute: Any + already_closed_empty_element: Any + def __init__(self, *args, **kwargs) -> None: ... + def error(self, msg) -> None: ... + def handle_startendtag(self, name, attrs) -> None: ... + def handle_starttag(self, name, attrs, handle_empty_element: bool = ...) -> None: ... + def handle_endtag(self, name, check_already_closed: bool = ...) -> None: ... + def handle_data(self, data) -> None: ... + def handle_charref(self, name) -> None: ... + def handle_entityref(self, name) -> None: ... + def handle_comment(self, data) -> None: ... + def handle_decl(self, data) -> None: ... + def unknown_decl(self, data) -> None: ... + def handle_pi(self, data) -> None: ... + +class HTMLParserTreeBuilder(HTMLTreeBuilder): + is_xml: bool + picklable: bool + NAME: Any + features: Any + TRACKS_LINE_NUMBERS: bool + parser_args: Any + def __init__(self, parser_args: Any | None = ..., parser_kwargs: Any | None = ..., **kwargs) -> None: ... + def prepare_markup( + self, + markup, + user_specified_encoding: Any | None = ..., + document_declared_encoding: Any | None = ..., + exclude_encodings: Any | None = ..., + ) -> None: ... + def feed(self, markup) -> None: ... diff --git a/stubs/beautifulsoup4/bs4/builder/_lxml.pyi b/stubs/beautifulsoup4/bs4/builder/_lxml.pyi new file mode 100644 index 000000000..492848911 --- /dev/null +++ b/stubs/beautifulsoup4/bs4/builder/_lxml.pyi @@ -0,0 +1,49 @@ +from typing import Any + +from bs4.builder import HTMLTreeBuilder, TreeBuilder + +class LXMLTreeBuilderForXML(TreeBuilder): + DEFAULT_PARSER_CLASS: Any + is_xml: bool + processing_instruction_class: Any + NAME: str + ALTERNATE_NAMES: Any + features: Any + CHUNK_SIZE: int + DEFAULT_NSMAPS: Any + DEFAULT_NSMAPS_INVERTED: Any + def initialize_soup(self, soup) -> None: ... + def default_parser(self, encoding): ... + def parser_for(self, encoding): ... + empty_element_tags: Any + soup: Any + nsmaps: Any + def __init__(self, parser: Any | None = ..., empty_element_tags: Any | None = ..., **kwargs) -> None: ... + def prepare_markup( # type: ignore # the order of the parameters is different + self, + markup, + user_specified_encoding: Any | None = ..., + exclude_encodings: Any | None = ..., + document_declared_encoding: Any | None = ..., + ) -> None: ... + parser: Any + def feed(self, markup) -> None: ... + def close(self) -> None: ... + def start(self, name, attrs, nsmap=...) -> None: ... + def end(self, name) -> None: ... + def pi(self, target, data) -> None: ... + def data(self, content) -> None: ... + def doctype(self, name, pubid, system) -> None: ... + def comment(self, content) -> None: ... + def test_fragment_to_document(self, fragment): ... + +class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML): + NAME: Any + ALTERNATE_NAMES: Any + features: Any + is_xml: bool + processing_instruction_class: Any + def default_parser(self, encoding): ... + parser: Any + def feed(self, markup) -> None: ... + def test_fragment_to_document(self, fragment): ... diff --git a/stubs/beautifulsoup4/bs4/dammit.pyi b/stubs/beautifulsoup4/bs4/dammit.pyi new file mode 100644 index 000000000..eaa7bc47d --- /dev/null +++ b/stubs/beautifulsoup4/bs4/dammit.pyi @@ -0,0 +1,68 @@ +from typing import Any + +chardet_type: Any + +def chardet_dammit(s): ... + +xml_encoding: str +html_meta: str +encoding_res: Any + +class EntitySubstitution: + CHARACTER_TO_HTML_ENTITY: Any + HTML_ENTITY_TO_CHARACTER: Any + CHARACTER_TO_HTML_ENTITY_RE: Any + CHARACTER_TO_XML_ENTITY: Any + BARE_AMPERSAND_OR_BRACKET: Any + AMPERSAND_OR_BRACKET: Any + @classmethod + def quoted_attribute_value(cls, value): ... + @classmethod + def substitute_xml(cls, value, make_quoted_attribute: bool = ...): ... + @classmethod + def substitute_xml_containing_entities(cls, value, make_quoted_attribute: bool = ...): ... + @classmethod + def substitute_html(cls, s): ... + +class EncodingDetector: + override_encodings: Any + exclude_encodings: Any + chardet_encoding: Any + is_html: Any + declared_encoding: Any + def __init__( + self, markup, override_encodings: Any | None = ..., is_html: bool = ..., exclude_encodings: Any | None = ... + ) -> None: ... + @property + def encodings(self) -> None: ... + @classmethod + def strip_byte_order_mark(cls, data): ... + @classmethod + def find_declared_encoding(cls, markup, is_html: bool = ..., search_entire_document: bool = ...): ... + +class UnicodeDammit: + CHARSET_ALIASES: Any + ENCODINGS_WITH_SMART_QUOTES: Any + smart_quotes_to: Any + tried_encodings: Any + contains_replacement_characters: bool + is_html: Any + log: Any + detector: Any + markup: Any + unicode_markup: Any + original_encoding: Any + def __init__( + self, markup, override_encodings=..., smart_quotes_to: Any | None = ..., is_html: bool = ..., exclude_encodings=... + ) -> None: ... + @property + def declared_html_encoding(self): ... + def find_codec(self, charset): ... + MS_CHARS: Any + MS_CHARS_TO_ASCII: Any + WINDOWS_1252_TO_UTF8: Any + MULTIBYTE_MARKERS_AND_SIZES: Any + FIRST_MULTIBYTE_MARKER: Any + LAST_MULTIBYTE_MARKER: Any + @classmethod + def detwingle(cls, in_bytes, main_encoding: str = ..., embedded_encoding: str = ...): ... diff --git a/stubs/beautifulsoup4/bs4/diagnose.pyi b/stubs/beautifulsoup4/bs4/diagnose.pyi new file mode 100644 index 000000000..f498f996d --- /dev/null +++ b/stubs/beautifulsoup4/bs4/diagnose.pyi @@ -0,0 +1,22 @@ +from html.parser import HTMLParser + +def diagnose(data) -> None: ... +def lxml_trace(data, html: bool = ..., **kwargs) -> None: ... + +class AnnouncingParser(HTMLParser): + def handle_starttag(self, name, attrs) -> None: ... + def handle_endtag(self, name) -> None: ... + def handle_data(self, data) -> None: ... + def handle_charref(self, name) -> None: ... + def handle_entityref(self, name) -> None: ... + def handle_comment(self, data) -> None: ... + def handle_decl(self, data) -> None: ... + def unknown_decl(self, data) -> None: ... + def handle_pi(self, data) -> None: ... + +def htmlparser_trace(data) -> None: ... +def rword(length: int = ...): ... +def rsentence(length: int = ...): ... +def rdoc(num_elements: int = ...): ... +def benchmark_parsers(num_elements: int = ...) -> None: ... +def profile(num_elements: int = ..., parser: str = ...) -> None: ... diff --git a/stubs/beautifulsoup4/bs4/element.pyi b/stubs/beautifulsoup4/bs4/element.pyi new file mode 100644 index 000000000..3df3ae3f0 --- /dev/null +++ b/stubs/beautifulsoup4/bs4/element.pyi @@ -0,0 +1,350 @@ +from _typeshed import Self +from typing import Any, Callable, Iterable, List, Mapping, Pattern, Set, Tuple, Type, TypeVar, Union, overload + +from . import BeautifulSoup +from .builder import TreeBuilder +from .formatter import Formatter, _EntitySubstitution + +DEFAULT_OUTPUT_ENCODING: str +PY3K: bool +nonwhitespace_re: Pattern[str] +whitespace_re: Pattern[str] +PYTHON_SPECIFIC_ENCODINGS: Set[str] + +class NamespacedAttribute(str): + def __new__(cls: Type[Self], prefix: str, name: str | None = ..., namespace: str | None = ...) -> Self: ... + +class AttributeValueWithCharsetSubstitution(str): ... + +class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution): + def __new__(cls, original_value): ... + def encode(self, encoding: str) -> str: ... # type: ignore # incompatible with str + +class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution): + CHARSET_RE: Pattern[str] + def __new__(cls, original_value): ... + def encode(self, encoding: str) -> str: ... # type: ignore # incompatible with str + +_PageElementT = TypeVar("_PageElementT", bound=PageElement) +# The wrapping Union[] can be removed once mypy fully supports | in type aliases. +_SimpleStrainable = Union[str, bool, None, bytes, Pattern[str], Callable[[str], bool]] +_Strainable = Union[_SimpleStrainable, Iterable[_SimpleStrainable]] +_SimpleNormalizedStrainable = Union[str, bool, None, Pattern[str], Callable[[str], bool]] +_NormalizedStrainable = Union[_SimpleNormalizedStrainable, Iterable[_SimpleNormalizedStrainable]] + +class PageElement: + parent: PageElement | None + previous_element: PageElement | None + next_element: PageElement | None + next_sibling: PageElement | None + previous_sibling: PageElement | None + def setup( + self, + parent: PageElement | None = ..., + previous_element: PageElement | None = ..., + next_element: PageElement | None = ..., + previous_sibling: PageElement | None = ..., + next_sibling: PageElement | None = ..., + ) -> None: ... + def format_string(self, s: str, formatter: Formatter | str | None) -> str: ... + def formatter_for_name(self, formatter: Formatter | str | _EntitySubstitution): ... + nextSibling: PageElement | None + previousSibling: PageElement | None + def replace_with(self: Self, replace_with: PageElement) -> Self: ... + def replaceWith(self: Self, replace_with: PageElement) -> Self: ... + def unwrap(self: Self) -> Self: ... + replace_with_children = unwrap + replaceWithChildren = unwrap + def wrap(self, wrap_inside: _PageElementT) -> _PageElementT: ... + def extract(self: Self, _self_index: int | None = ...) -> Self: ... + def insert(self, position: int, new_child: PageElement) -> None: ... + def append(self, tag: PageElement) -> None: ... + def extend(self, tags: Tag | Iterable[PageElement]) -> None: ... + def insert_before(self, *args: PageElement) -> None: ... + def insert_after(self, *args: PageElement) -> None: ... + def find_next( + self, + name: _Strainable | SoupStrainer | None = ..., + attrs: dict[str, _Strainable] | _Strainable = ..., + text: _Strainable | None = ..., + **kwargs: _Strainable, + ) -> Tag | NavigableString | None: ... + findNext = find_next + def find_all_next( + self, + name: _Strainable | SoupStrainer | None = ..., + attrs: dict[str, _Strainable] | _Strainable = ..., + text: _Strainable | None = ..., + limit: int | None = ..., + **kwargs: _Strainable, + ) -> ResultSet: ... + findAllNext = find_all_next + def find_next_sibling( + self, + name: _Strainable | SoupStrainer | None = ..., + attrs: dict[str, _Strainable] | _Strainable = ..., + text: _Strainable | None = ..., + **kwargs: _Strainable, + ) -> Tag | NavigableString | None: ... + findNextSibling = find_next_sibling + def find_next_siblings( + self, + name: _Strainable | SoupStrainer | None = ..., + attrs: dict[str, _Strainable] | _Strainable = ..., + text: _Strainable | None = ..., + limit: int | None = ..., + **kwargs: _Strainable, + ) -> ResultSet: ... + findNextSiblings = find_next_siblings + fetchNextSiblings = find_next_siblings + def find_previous( + self, + name: _Strainable | SoupStrainer | None = ..., + attrs: dict[str, _Strainable] | _Strainable = ..., + text: _Strainable | None = ..., + **kwargs: _Strainable, + ) -> Tag | NavigableString | None: ... + findPrevious = find_previous + def find_all_previous( + self, + name: _Strainable | SoupStrainer | None = ..., + attrs: dict[str, _Strainable] | _Strainable = ..., + text: _Strainable | None = ..., + limit: int | None = ..., + **kwargs: _Strainable, + ) -> ResultSet: ... + findAllPrevious = find_all_previous + fetchPrevious = find_all_previous + def find_previous_sibling( + self, + name: _Strainable | SoupStrainer | None = ..., + attrs: dict[str, _Strainable] | _Strainable = ..., + text: _Strainable | None = ..., + **kwargs: _Strainable, + ) -> Tag | NavigableString | None: ... + findPreviousSibling = find_previous_sibling + def find_previous_siblings( + self, + name: _Strainable | SoupStrainer | None = ..., + attrs: dict[str, _Strainable] | _Strainable = ..., + text: _Strainable | None = ..., + limit: int | None = ..., + **kwargs: _Strainable, + ) -> ResultSet: ... + findPreviousSiblings = find_previous_siblings + fetchPreviousSiblings = find_previous_siblings + def find_parent( + self, + name: _Strainable | SoupStrainer | None = ..., + attrs: dict[str, _Strainable] | _Strainable = ..., + **kwargs: _Strainable, + ) -> Tag | NavigableString | None: ... + findParent = find_parent + def find_parents( + self, + name: _Strainable | SoupStrainer | None = ..., + attrs: dict[str, _Strainable] | _Strainable = ..., + limit: int | None = ..., + **kwargs: _Strainable, + ) -> ResultSet: ... + findParents = find_parents + fetchParents = find_parents + @property + def next(self) -> Tag | NavigableString | None: ... + @property + def previous(self) -> Tag | NavigableString | None: ... + @property + def next_elements(self) -> Iterable[PageElement]: ... + @property + def next_siblings(self) -> Iterable[PageElement]: ... + @property + def previous_elements(self) -> Iterable[PageElement]: ... + @property + def previous_siblings(self) -> Iterable[PageElement]: ... + @property + def parents(self) -> Iterable[PageElement]: ... + @property + def decomposed(self) -> bool: ... + def nextGenerator(self) -> Iterable[PageElement]: ... + def nextSiblingGenerator(self) -> Iterable[PageElement]: ... + def previousGenerator(self) -> Iterable[PageElement]: ... + def previousSiblingGenerator(self) -> Iterable[PageElement]: ... + def parentGenerator(self) -> Iterable[PageElement]: ... + +class NavigableString(str, PageElement): + PREFIX: str + SUFFIX: str + known_xml: bool | None + def __new__(cls: Type[Self], value: str | bytes) -> Self: ... + def __copy__(self: Self) -> Self: ... + def __getnewargs__(self) -> tuple[str]: ... + @property + def string(self) -> str: ... + def output_ready(self, formatter: Formatter | str | None = ...) -> str: ... + @property + def name(self) -> None: ... + +class PreformattedString(NavigableString): + PREFIX: str + SUFFIX: str + def output_ready(self, formatter: Formatter | str | None = ...) -> str: ... + +class CData(PreformattedString): + PREFIX: str + SUFFIX: str + +class ProcessingInstruction(PreformattedString): + PREFIX: str + SUFFIX: str + +class XMLProcessingInstruction(ProcessingInstruction): + PREFIX: str + SUFFIX: str + +class Comment(PreformattedString): + PREFIX: str + SUFFIX: str + +class Declaration(PreformattedString): + PREFIX: str + SUFFIX: str + +class Doctype(PreformattedString): + @classmethod + def for_name_and_ids(cls, name: str | None, pub_id: str, system_id: str) -> Doctype: ... + PREFIX: str + SUFFIX: str + +class Stylesheet(NavigableString): ... +class Script(NavigableString): ... +class TemplateString(NavigableString): ... + +class Tag(PageElement): + parser_class: Type[BeautifulSoup] | None + name: str + namespace: str | None + prefix: str | None + sourceline: int | None + sourcepos: int | None + known_xml: bool | None + attrs: Mapping[str, Any] + contents: Any + hidden: bool + can_be_empty_element: bool | None + cdata_list_attributes: list[str] | None + preserve_whitespace_tags: list[str] | None + def __init__( + self, + parser: BeautifulSoup | None = ..., + builder: TreeBuilder | None = ..., + name: str | None = ..., + namespace: str | None = ..., + prefix: str | None = ..., + attrs: Mapping[str, Any] | None = ..., + parent: PageElement | None = ..., + previous: PageElement | None = ..., + is_xml: bool | None = ..., + sourceline: int | None = ..., + sourcepos: int | None = ..., + can_be_empty_element: bool | None = ..., + cdata_list_attributes: list[str] | None = ..., + preserve_whitespace_tags: list[str] | None = ..., + ) -> None: ... + parserClass: Type[BeautifulSoup] | None + def __copy__(self: Self) -> Self: ... + @property + def is_empty_element(self) -> bool: ... + isSelfClosing = is_empty_element + @property + def string(self) -> str | None: ... + @string.setter + def string(self, string: str) -> None: ... + @property + def strings(self) -> Iterable[str]: ... + @property + def stripped_strings(self) -> Iterable[str]: ... + def get_text(self, separator: str = ..., strip: bool = ..., types: Tuple[Type[NavigableString], ...] = ...) -> str: ... + getText = get_text + @property + def text(self) -> str: ... + def decompose(self) -> None: ... + def clear(self, decompose: bool = ...) -> None: ... + def smooth(self) -> None: ... + def index(self, element: PageElement) -> int: ... + def get(self, key: str, default: str | list[str] | None = ...) -> str | list[str] | None: ... + def get_attribute_list(self, key: str, default: str | list[str] | None = ...) -> list[str]: ... + def has_attr(self, key: str) -> bool: ... + def __hash__(self) -> int: ... + def __getitem__(self, key: str) -> str | list[str]: ... + def __iter__(self) -> Iterable[PageElement]: ... + def __len__(self) -> int: ... + def __contains__(self, x: object) -> bool: ... + def __bool__(self) -> bool: ... + def __setitem__(self, key: str, value: str | list[str]) -> None: ... + def __delitem__(self, key: str) -> None: ... + def __getattr__(self, tag: str) -> Tag | NavigableString | None: ... + def __eq__(self, other: object) -> bool: ... + def __ne__(self, other: object) -> bool: ... + def __unicode__(self) -> str: ... + def encode( + self, encoding: str = ..., indent_level: int | None = ..., formatter: str | Formatter = ..., errors: str = ... + ) -> bytes: ... + def decode(self, indent_level: int | None = ..., eventual_encoding: str = ..., formatter: str | Formatter = ...) -> str: ... + @overload + def prettify(self, encoding: str, formatter: str | Formatter = ...) -> bytes: ... + @overload + def prettify(self, encoding: None = ..., formatter: str | Formatter = ...) -> str: ... + def decode_contents( + self, indent_level: int | None = ..., eventual_encoding: str = ..., formatter: str | Formatter = ... + ) -> str: ... + def encode_contents(self, indent_level: int | None = ..., encoding: str = ..., formatter: str | Formatter = ...) -> bytes: ... + def renderContents(self, encoding: str = ..., prettyPrint: bool = ..., indentLevel: int = ...) -> bytes: ... + def find( + self, + name: _Strainable | None = ..., + attrs: dict[str, _Strainable] | _Strainable = ..., + recursive: bool = ..., + text: _Strainable | None = ..., + **kwargs: _Strainable, + ) -> Tag | NavigableString | None: ... + findChild = find + def find_all( + self, + name: _Strainable | None = ..., + attrs: dict[str, _Strainable] | _Strainable = ..., + recursive: bool = ..., + text: _Strainable | None = ..., + limit: int | None = ..., + **kwargs: _Strainable, + ) -> ResultSet: ... + __call__ = find_all + findAll = find_all + findChildren = find_all + @property + def children(self) -> Iterable[PageElement]: ... + @property + def descendants(self) -> Iterable[PageElement]: ... + def select_one(self, selector: str, namespaces: Any | None = ..., **kwargs) -> Tag | None: ... + def select(self, selector: str, namespaces: Any | None = ..., limit: int | None = ..., **kwargs) -> ResultSet: ... + def childGenerator(self) -> Iterable[PageElement]: ... + def recursiveChildGenerator(self) -> Iterable[PageElement]: ... + def has_key(self, key: str) -> bool: ... + +class SoupStrainer: + name: _NormalizedStrainable + attrs: dict[str, _NormalizedStrainable] + text: _NormalizedStrainable + def __init__( + self, + name: _Strainable | None = ..., + attrs: dict[str, _Strainable] | _Strainable = ..., + text: _Strainable | None = ..., + **kwargs: _Strainable, + ) -> None: ... + def search_tag(self, markup_name: Tag | str | None = ..., markup_attrs=...): ... + searchTag = search_tag + def search(self, markup: PageElement | Iterable[PageElement]): ... + +class ResultSet(List[PageElement]): + source: SoupStrainer + def __init__(self, source: SoupStrainer, result: Iterable[PageElement] = ...) -> None: ... diff --git a/stubs/beautifulsoup4/bs4/formatter.pyi b/stubs/beautifulsoup4/bs4/formatter.pyi new file mode 100644 index 000000000..e297f88a9 --- /dev/null +++ b/stubs/beautifulsoup4/bs4/formatter.pyi @@ -0,0 +1,42 @@ +from typing import Callable + +from .dammit import EntitySubstitution as EntitySubstitution + +_EntitySubstitution = Callable[[str], str] + +class Formatter(EntitySubstitution): + HTML: str + XML: str + HTML_DEFAULTS: dict[str, set[str]] + language: str | None + entity_substitution: _EntitySubstitution + void_element_close_prefix: str + cdata_containing_tags: list[str] + def __init__( + self, + language: str | None = ..., + entity_substitution: _EntitySubstitution | None = ..., + void_element_close_prefix: str = ..., + cdata_containing_tags: list[str] | None = ..., + ) -> None: ... + def substitute(self, ns: str) -> str: ... + def attribute_value(self, value: str) -> str: ... + def attributes(self, tag): ... + +class HTMLFormatter(Formatter): + REGISTRY: dict[str, HTMLFormatter] + def __init__( + self, + entity_substitution: _EntitySubstitution | None = ..., + void_element_close_prefix: str = ..., + cdata_containing_tags: list[str] | None = ..., + ) -> None: ... + +class XMLFormatter(Formatter): + REGISTRY: dict[str, XMLFormatter] + def __init__( + self, + entity_substitution: _EntitySubstitution | None = ..., + void_element_close_prefix: str = ..., + cdata_containing_tags: list[str] | None = ..., + ) -> None: ...