Complete stubs for bleach (#9314)

Co-authored-by: Alex Waygood <Alex.Waygood@Gmail.com>
Co-authored-by: Avasam <samuel.06@hotmail.com>
This commit is contained in:
Nikita Sobolev
2024-02-21 23:03:24 +03:00
committed by GitHub
parent 78b7dc6167
commit a2095002e4
9 changed files with 128 additions and 66 deletions

View File

@@ -27,7 +27,6 @@
"stubs/antlr4-python3-runtime",
"stubs/aws-xray-sdk",
"stubs/beautifulsoup4",
"stubs/bleach",
"stubs/boltons",
"stubs/boto",
"stubs/braintree",

View File

@@ -1,2 +1,5 @@
bleach.css_sanitizer # Requires tinycss2 to be installed
bleach.html5lib_shim.*
# Internal private stuff:
bleach._vendor.*
# Is a property returning a method, simplified:
bleach.html5lib_shim.InputStreamWithMemory.changeEncoding

View File

@@ -1,6 +1,6 @@
version = "6.1.*"
requires = ["types-html5lib"]
upstream_repository = "https://github.com/mozilla/bleach"
partial_stub = true
[tool.stubtest]
ignore_missing_stub = true
extras = ["css"]

View File

@@ -1,7 +1,8 @@
from collections.abc import Container
from typing import Final
ALLOWED_CSS_PROPERTIES: frozenset[str]
ALLOWED_SVG_PROPERTIES: frozenset[str]
ALLOWED_CSS_PROPERTIES: Final[frozenset[str]]
ALLOWED_SVG_PROPERTIES: Final[frozenset[str]]
class CSSSanitizer:
allowed_css_properties: Container[str]

View File

@@ -1,30 +1,70 @@
from _typeshed import Incomplete
import re
from codecs import CodecInfo
from collections.abc import Generator, Iterable, Iterator
from typing import Any, Final, Protocol
class HTMLParser: # actually html5lib.HTMLParser
def __getattr__(self, __name: str) -> Incomplete: ...
# We don't re-export any `html5lib` types / values here, because they are not
# really public and may change at any time. This is just a helper module,
# import things directly from `html5lib` instead!
from html5lib import HTMLParser
from html5lib._inputstream import HTMLBinaryInputStream, HTMLUnicodeInputStream
from html5lib._tokenizer import HTMLTokenizer
from html5lib._trie import Trie
from html5lib.serializer import HTMLSerializer
from html5lib.treewalkers.base import TreeWalker
class Filter: # actually html5lib.filters.base.Filter
source: Incomplete
def __init__(self, source) -> None: ...
def __iter__(self) -> Iterator[Incomplete]: ...
def __getattr__(self, name: str) -> Incomplete: ... # copy attributes from source
# Is actually webencodings.Encoding
class _Encoding(Protocol):
name: str
codec_info: CodecInfo
def __init__(self, name: str, codec_info: CodecInfo) -> None: ...
class SanitizerFilter: # actually html5lib.filters.sanitizer.Filter
def __getattr__(self, __name: str) -> Incomplete: ...
HTML_TAGS: Final[frozenset[str]]
HTML_TAGS_BLOCK_LEVEL: Final[frozenset[str]]
AMP_SPLIT_RE: Final[re.Pattern[str]]
ENTITIES: Final[dict[str, str]]
ENTITIES_TRIE: Final[Trie]
TAG_TOKEN_TYPES: Final[set[int]]
TAG_TOKEN_TYPE_CHARACTERS: Final[int]
TAG_TOKEN_TYPE_END: Final[int]
TAG_TOKEN_TYPE_PARSEERROR: Final[int]
TAG_TOKEN_TYPE_START: Final[int]
class HTMLSerializer: # actually html5lib.serializer.HTMLSerializer
def __getattr__(self, __name: str) -> Incomplete: ...
class InputStreamWithMemory:
position = HTMLUnicodeInputStream.position
reset = HTMLUnicodeInputStream.reset
def __init__(self, inner_stream: HTMLUnicodeInputStream) -> None: ...
@property
def errors(self) -> list[str]: ...
@property
def charEncoding(self) -> tuple[_Encoding, str]: ...
# If inner_stream wasn't a HTMLBinaryInputStream, this will error at runtime
# Is a property returning a method, simplified:
changeEncoding = HTMLBinaryInputStream.changeEncoding
def char(self) -> str: ...
def charsUntil(self, characters: Iterable[str], opposite: bool = False) -> str: ...
def unget(self, char: str | None) -> None: ...
def get_tag(self) -> str: ...
def start_tag(self) -> None: ...
class BleachHTMLTokenizer(HTMLTokenizer):
consume_entities: bool
stream: InputStreamWithMemory
emitted_last_token: dict[str, Any] | None
def __init__(self, consume_entities: bool = False, **kwargs: Any) -> None: ...
class BleachHTMLParser(HTMLParser):
tags: list[str] | None
strip: bool
consume_entities: bool
def __init__(self, tags: Iterable[str] | None, strip: bool, consume_entities: bool, **kwargs) -> None: ...
def __init__(self, tags: Iterable[str] | None, strip: bool, consume_entities: bool, **kwargs: Any) -> None: ...
class BleachHTMLSerializer(HTMLSerializer):
escape_rcdata: bool
def escape_base_amp(self, stoken: str) -> Generator[str, None, None]: ...
def serialize(self, treewalker, encoding: str | None = None) -> Generator[str, None, None]: ...
def serialize(self, treewalker: TreeWalker, encoding: str | None = None) -> Generator[str, None, None]: ... # type: ignore[override]
def __getattr__(__name: str) -> Incomplete: ...
def convert_entity(value: str) -> str | None: ...
def convert_entities(text: str) -> str: ...
def match_entity(stream: str) -> str | None: ...
def next_possible_entity(text: str) -> Iterator[str]: ...

View File

@@ -1,22 +1,25 @@
from _typeshed import Incomplete
from collections.abc import Container, Iterable, Iterator
from collections.abc import Container, Iterable, Iterator, Sequence
from re import Pattern
from typing import Any, Final
from typing_extensions import TypeAlias
from .callbacks import _Callback
from .html5lib_shim import Filter
from html5lib.filters.base import Filter
from html5lib.treewalkers.base import TreeWalker
DEFAULT_CALLBACKS: list[_Callback]
from .callbacks import _Callback, _HTMLAttrs
TLDS: list[str]
DEFAULT_CALLBACKS: Final[list[_Callback]]
TLDS: Final[list[str]]
def build_url_re(tlds: Iterable[str] = ..., protocols: Iterable[str] = ...) -> Pattern[str]: ...
URL_RE: Pattern[str]
PROTO_RE: Pattern[str]
URL_RE: Final[Pattern[str]]
PROTO_RE: Final[Pattern[str]]
def build_email_re(tlds: Iterable[str] = ...) -> Pattern[str]: ...
EMAIL_RE: Pattern[str]
EMAIL_RE: Final[Pattern[str]]
class Linker:
def __init__(
@@ -30,6 +33,10 @@ class Linker:
) -> None: ...
def linkify(self, text: str) -> str: ...
# TODO: `_Token` might be converted into `TypedDict`
# or `html5lib` token might be reused
_Token: TypeAlias = dict[str, Any]
class LinkifyFilter(Filter):
callbacks: Iterable[_Callback]
skip_tags: Container[str]
@@ -38,18 +45,18 @@ class LinkifyFilter(Filter):
email_re: Pattern[str]
def __init__(
self,
source,
source: TreeWalker,
callbacks: Iterable[_Callback] | None = ...,
skip_tags: Container[str] | None = None,
parse_email: bool = False,
url_re: Pattern[str] = ...,
email_re: Pattern[str] = ...,
) -> None: ...
def apply_callbacks(self, attrs, is_new): ...
def extract_character_data(self, token_list): ...
def handle_email_addresses(self, src_iter): ...
def strip_non_url_bits(self, fragment): ...
def handle_links(self, src_iter): ...
def handle_a_tag(self, token_buffer): ...
def extract_entities(self, token): ...
def apply_callbacks(self, attrs: _HTMLAttrs, is_new: bool) -> _HTMLAttrs | None: ...
def extract_character_data(self, token_list: Iterable[_Token]) -> str: ...
def handle_email_addresses(self, src_iter: Iterable[_Token]) -> Iterator[_Token]: ...
def strip_non_url_bits(self, fragment: str) -> tuple[str, str, str]: ...
def handle_links(self, src_iter: Iterable[_Token]) -> Iterator[_Token]: ...
def handle_a_tag(self, token_buffer: Sequence[_Token]) -> Iterator[_Token]: ...
def extract_entities(self, token: _Token) -> Iterator[_Token]: ...
def __iter__(self) -> Iterator[Incomplete]: ...

View File

@@ -0,0 +1 @@
from urllib import parse as parse

View File

@@ -1,20 +1,27 @@
from _typeshed import Incomplete
from collections.abc import Callable, Iterable
from collections.abc import Callable, Container, Iterable, Iterator
from re import Pattern
from typing import Protocol
from typing import Final, Protocol
from typing_extensions import TypeAlias
from html5lib.filters.base import Filter
from html5lib.filters.sanitizer import Filter as SanitizerFilter
from html5lib.treewalkers.base import TreeWalker
from . import _HTMLAttrKey
from .css_sanitizer import CSSSanitizer
from .html5lib_shim import BleachHTMLParser, BleachHTMLSerializer, SanitizerFilter
from .html5lib_shim import BleachHTMLParser, BleachHTMLSerializer
from .linkifier import _Token
ALLOWED_TAGS: frozenset[str]
ALLOWED_ATTRIBUTES: dict[str, list[str]]
ALLOWED_PROTOCOLS: frozenset[str]
ALLOWED_TAGS: Final[frozenset[str]]
ALLOWED_ATTRIBUTES: Final[dict[str, list[str]]]
ALLOWED_PROTOCOLS: Final[frozenset[str]]
INVISIBLE_CHARACTERS: str
INVISIBLE_CHARACTERS_RE: Pattern[str]
INVISIBLE_REPLACEMENT_CHAR: str
INVISIBLE_CHARACTERS: Final[str]
INVISIBLE_CHARACTERS_RE: Final[Pattern[str]]
INVISIBLE_REPLACEMENT_CHAR: Final = "?"
class NoCssSanitizerWarning(UserWarning): ...
# A html5lib Filter class
class _Filter(Protocol):
@@ -24,18 +31,16 @@ _AttributeFilter: TypeAlias = Callable[[str, str, str], bool]
_AttributeDict: TypeAlias = dict[str, list[str] | _AttributeFilter] | dict[str, list[str]] | dict[str, _AttributeFilter]
_Attributes: TypeAlias = _AttributeFilter | _AttributeDict | list[str]
_TreeWalker: TypeAlias = Callable[[Incomplete], Incomplete]
class Cleaner:
tags: Iterable[str]
attributes: _Attributes
protocols: Iterable[str]
strip: bool
strip_comments: bool
filters: Iterable[_Filter]
filters: Iterable[Filter]
css_sanitizer: CSSSanitizer | None
parser: BleachHTMLParser
walker: _TreeWalker
walker: TreeWalker
serializer: BleachHTMLSerializer
def __init__(
self,
@@ -63,7 +68,7 @@ class BleachSanitizerFilter(SanitizerFilter):
css_sanitizer: CSSSanitizer | None
def __init__(
self,
source,
source: TreeWalker,
allowed_tags: Iterable[str] = ...,
attributes: _Attributes = ...,
allowed_protocols: Iterable[str] = ...,
@@ -74,12 +79,11 @@ class BleachSanitizerFilter(SanitizerFilter):
strip_html_comments: bool = True,
css_sanitizer: CSSSanitizer | None = None,
) -> None: ...
def sanitize_stream(self, token_iterator): ...
def merge_characters(self, token_iterator): ...
def __iter__(self): ...
def sanitize_token(self, token): ...
def sanitize_characters(self, token): ...
def sanitize_uri_value(self, value, allowed_protocols): ...
def allow_token(self, token): ...
def disallowed_token(self, token): ...
def sanitize_css(self, style): ...
def sanitize_stream(self, token_iterator: Iterable[_Token]) -> Iterator[_Token]: ...
def merge_characters(self, token_iterator: Iterable[_Token]) -> Iterator[_Token]: ...
def __iter__(self) -> Iterator[_Token]: ...
def sanitize_token(self, token: _Token) -> _Token | list[_Token] | None: ...
def sanitize_characters(self, token: _Token) -> _Token | list[_Token]: ...
def sanitize_uri_value(self, value: str, allowed_protocols: Container[str]) -> str | None: ...
def allow_token(self, token: _Token) -> _Token: ...
def disallowed_token(self, token: _Token) -> _Token: ...

View File

@@ -1,7 +1,14 @@
from _typeshed import Incomplete, SupportsRead
from typing import Any, overload
from codecs import CodecInfo
from typing import Any, Protocol, overload
from typing_extensions import TypeAlias
# Is actually webencodings.Encoding
class _Encoding(Protocol):
name: str
codec_info: CodecInfo
def __init__(self, name: str, codec_info: CodecInfo) -> None: ...
_UnicodeInputStream: TypeAlias = str | SupportsRead[str]
_BinaryInputStream: TypeAlias = bytes | SupportsRead[bytes]
_InputStream: TypeAlias = _UnicodeInputStream # noqa: Y047 # used in other files
@@ -42,13 +49,13 @@ def HTMLInputStream(
class HTMLUnicodeInputStream:
reportCharacterErrors: Any
newLines: Any
charEncoding: Any
charEncoding: tuple[_Encoding, str]
dataStream: Any
def __init__(self, source: _UnicodeInputStream) -> None: ...
chunk: str
chunkSize: int
chunkOffset: int
errors: Any
errors: list[str]
prevNumLines: int
prevNumCols: int
def reset(self) -> None: ...
@@ -70,7 +77,7 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
same_origin_parent_encoding: Any
likely_encoding: Any
default_encoding: Any
charEncoding: Any
charEncoding: tuple[_Encoding, str]
def __init__(
self,
source: _BinaryInputStream,
@@ -85,7 +92,7 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
def reset(self) -> None: ...
def openStream(self, source): ...
def determineEncoding(self, chardet: bool = True): ...
def changeEncoding(self, newEncoding) -> None: ...
def changeEncoding(self, newEncoding: str | bytes | None) -> None: ...
def detectBOM(self): ...
def detectEncodingMeta(self): ...