mirror of
https://github.com/davidhalter/typeshed.git
synced 2025-12-07 12:44:28 +08:00
Complete stubs for bleach (#9314)
Co-authored-by: Alex Waygood <Alex.Waygood@Gmail.com> Co-authored-by: Avasam <samuel.06@hotmail.com>
This commit is contained in:
@@ -27,7 +27,6 @@
|
||||
"stubs/antlr4-python3-runtime",
|
||||
"stubs/aws-xray-sdk",
|
||||
"stubs/beautifulsoup4",
|
||||
"stubs/bleach",
|
||||
"stubs/boltons",
|
||||
"stubs/boto",
|
||||
"stubs/braintree",
|
||||
|
||||
@@ -1,2 +1,5 @@
|
||||
bleach.css_sanitizer # Requires tinycss2 to be installed
|
||||
bleach.html5lib_shim.*
|
||||
# Internal private stuff:
|
||||
bleach._vendor.*
|
||||
|
||||
# Is a property returning a method, simplified:
|
||||
bleach.html5lib_shim.InputStreamWithMemory.changeEncoding
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
version = "6.1.*"
|
||||
requires = ["types-html5lib"]
|
||||
upstream_repository = "https://github.com/mozilla/bleach"
|
||||
partial_stub = true
|
||||
|
||||
[tool.stubtest]
|
||||
ignore_missing_stub = true
|
||||
extras = ["css"]
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
from collections.abc import Container
|
||||
from typing import Final
|
||||
|
||||
ALLOWED_CSS_PROPERTIES: frozenset[str]
|
||||
ALLOWED_SVG_PROPERTIES: frozenset[str]
|
||||
ALLOWED_CSS_PROPERTIES: Final[frozenset[str]]
|
||||
ALLOWED_SVG_PROPERTIES: Final[frozenset[str]]
|
||||
|
||||
class CSSSanitizer:
|
||||
allowed_css_properties: Container[str]
|
||||
|
||||
@@ -1,30 +1,70 @@
|
||||
from _typeshed import Incomplete
|
||||
import re
|
||||
from codecs import CodecInfo
|
||||
from collections.abc import Generator, Iterable, Iterator
|
||||
from typing import Any, Final, Protocol
|
||||
|
||||
class HTMLParser: # actually html5lib.HTMLParser
|
||||
def __getattr__(self, __name: str) -> Incomplete: ...
|
||||
# We don't re-export any `html5lib` types / values here, because they are not
|
||||
# really public and may change at any time. This is just a helper module,
|
||||
# import things directly from `html5lib` instead!
|
||||
from html5lib import HTMLParser
|
||||
from html5lib._inputstream import HTMLBinaryInputStream, HTMLUnicodeInputStream
|
||||
from html5lib._tokenizer import HTMLTokenizer
|
||||
from html5lib._trie import Trie
|
||||
from html5lib.serializer import HTMLSerializer
|
||||
from html5lib.treewalkers.base import TreeWalker
|
||||
|
||||
class Filter: # actually html5lib.filters.base.Filter
|
||||
source: Incomplete
|
||||
def __init__(self, source) -> None: ...
|
||||
def __iter__(self) -> Iterator[Incomplete]: ...
|
||||
def __getattr__(self, name: str) -> Incomplete: ... # copy attributes from source
|
||||
# Is actually webencodings.Encoding
|
||||
class _Encoding(Protocol):
|
||||
name: str
|
||||
codec_info: CodecInfo
|
||||
def __init__(self, name: str, codec_info: CodecInfo) -> None: ...
|
||||
|
||||
class SanitizerFilter: # actually html5lib.filters.sanitizer.Filter
|
||||
def __getattr__(self, __name: str) -> Incomplete: ...
|
||||
HTML_TAGS: Final[frozenset[str]]
|
||||
HTML_TAGS_BLOCK_LEVEL: Final[frozenset[str]]
|
||||
AMP_SPLIT_RE: Final[re.Pattern[str]]
|
||||
ENTITIES: Final[dict[str, str]]
|
||||
ENTITIES_TRIE: Final[Trie]
|
||||
TAG_TOKEN_TYPES: Final[set[int]]
|
||||
TAG_TOKEN_TYPE_CHARACTERS: Final[int]
|
||||
TAG_TOKEN_TYPE_END: Final[int]
|
||||
TAG_TOKEN_TYPE_PARSEERROR: Final[int]
|
||||
TAG_TOKEN_TYPE_START: Final[int]
|
||||
|
||||
class HTMLSerializer: # actually html5lib.serializer.HTMLSerializer
|
||||
def __getattr__(self, __name: str) -> Incomplete: ...
|
||||
class InputStreamWithMemory:
|
||||
position = HTMLUnicodeInputStream.position
|
||||
reset = HTMLUnicodeInputStream.reset
|
||||
def __init__(self, inner_stream: HTMLUnicodeInputStream) -> None: ...
|
||||
@property
|
||||
def errors(self) -> list[str]: ...
|
||||
@property
|
||||
def charEncoding(self) -> tuple[_Encoding, str]: ...
|
||||
# If inner_stream wasn't a HTMLBinaryInputStream, this will error at runtime
|
||||
# Is a property returning a method, simplified:
|
||||
changeEncoding = HTMLBinaryInputStream.changeEncoding
|
||||
def char(self) -> str: ...
|
||||
def charsUntil(self, characters: Iterable[str], opposite: bool = False) -> str: ...
|
||||
def unget(self, char: str | None) -> None: ...
|
||||
def get_tag(self) -> str: ...
|
||||
def start_tag(self) -> None: ...
|
||||
|
||||
class BleachHTMLTokenizer(HTMLTokenizer):
|
||||
consume_entities: bool
|
||||
stream: InputStreamWithMemory
|
||||
emitted_last_token: dict[str, Any] | None
|
||||
def __init__(self, consume_entities: bool = False, **kwargs: Any) -> None: ...
|
||||
|
||||
class BleachHTMLParser(HTMLParser):
|
||||
tags: list[str] | None
|
||||
strip: bool
|
||||
consume_entities: bool
|
||||
def __init__(self, tags: Iterable[str] | None, strip: bool, consume_entities: bool, **kwargs) -> None: ...
|
||||
def __init__(self, tags: Iterable[str] | None, strip: bool, consume_entities: bool, **kwargs: Any) -> None: ...
|
||||
|
||||
class BleachHTMLSerializer(HTMLSerializer):
|
||||
escape_rcdata: bool
|
||||
def escape_base_amp(self, stoken: str) -> Generator[str, None, None]: ...
|
||||
def serialize(self, treewalker, encoding: str | None = None) -> Generator[str, None, None]: ...
|
||||
def serialize(self, treewalker: TreeWalker, encoding: str | None = None) -> Generator[str, None, None]: ... # type: ignore[override]
|
||||
|
||||
def __getattr__(__name: str) -> Incomplete: ...
|
||||
def convert_entity(value: str) -> str | None: ...
|
||||
def convert_entities(text: str) -> str: ...
|
||||
def match_entity(stream: str) -> str | None: ...
|
||||
def next_possible_entity(text: str) -> Iterator[str]: ...
|
||||
|
||||
@@ -1,22 +1,25 @@
|
||||
from _typeshed import Incomplete
|
||||
from collections.abc import Container, Iterable, Iterator
|
||||
from collections.abc import Container, Iterable, Iterator, Sequence
|
||||
from re import Pattern
|
||||
from typing import Any, Final
|
||||
from typing_extensions import TypeAlias
|
||||
|
||||
from .callbacks import _Callback
|
||||
from .html5lib_shim import Filter
|
||||
from html5lib.filters.base import Filter
|
||||
from html5lib.treewalkers.base import TreeWalker
|
||||
|
||||
DEFAULT_CALLBACKS: list[_Callback]
|
||||
from .callbacks import _Callback, _HTMLAttrs
|
||||
|
||||
TLDS: list[str]
|
||||
DEFAULT_CALLBACKS: Final[list[_Callback]]
|
||||
TLDS: Final[list[str]]
|
||||
|
||||
def build_url_re(tlds: Iterable[str] = ..., protocols: Iterable[str] = ...) -> Pattern[str]: ...
|
||||
|
||||
URL_RE: Pattern[str]
|
||||
PROTO_RE: Pattern[str]
|
||||
URL_RE: Final[Pattern[str]]
|
||||
PROTO_RE: Final[Pattern[str]]
|
||||
|
||||
def build_email_re(tlds: Iterable[str] = ...) -> Pattern[str]: ...
|
||||
|
||||
EMAIL_RE: Pattern[str]
|
||||
EMAIL_RE: Final[Pattern[str]]
|
||||
|
||||
class Linker:
|
||||
def __init__(
|
||||
@@ -30,6 +33,10 @@ class Linker:
|
||||
) -> None: ...
|
||||
def linkify(self, text: str) -> str: ...
|
||||
|
||||
# TODO: `_Token` might be converted into `TypedDict`
|
||||
# or `html5lib` token might be reused
|
||||
_Token: TypeAlias = dict[str, Any]
|
||||
|
||||
class LinkifyFilter(Filter):
|
||||
callbacks: Iterable[_Callback]
|
||||
skip_tags: Container[str]
|
||||
@@ -38,18 +45,18 @@ class LinkifyFilter(Filter):
|
||||
email_re: Pattern[str]
|
||||
def __init__(
|
||||
self,
|
||||
source,
|
||||
source: TreeWalker,
|
||||
callbacks: Iterable[_Callback] | None = ...,
|
||||
skip_tags: Container[str] | None = None,
|
||||
parse_email: bool = False,
|
||||
url_re: Pattern[str] = ...,
|
||||
email_re: Pattern[str] = ...,
|
||||
) -> None: ...
|
||||
def apply_callbacks(self, attrs, is_new): ...
|
||||
def extract_character_data(self, token_list): ...
|
||||
def handle_email_addresses(self, src_iter): ...
|
||||
def strip_non_url_bits(self, fragment): ...
|
||||
def handle_links(self, src_iter): ...
|
||||
def handle_a_tag(self, token_buffer): ...
|
||||
def extract_entities(self, token): ...
|
||||
def apply_callbacks(self, attrs: _HTMLAttrs, is_new: bool) -> _HTMLAttrs | None: ...
|
||||
def extract_character_data(self, token_list: Iterable[_Token]) -> str: ...
|
||||
def handle_email_addresses(self, src_iter: Iterable[_Token]) -> Iterator[_Token]: ...
|
||||
def strip_non_url_bits(self, fragment: str) -> tuple[str, str, str]: ...
|
||||
def handle_links(self, src_iter: Iterable[_Token]) -> Iterator[_Token]: ...
|
||||
def handle_a_tag(self, token_buffer: Sequence[_Token]) -> Iterator[_Token]: ...
|
||||
def extract_entities(self, token: _Token) -> Iterator[_Token]: ...
|
||||
def __iter__(self) -> Iterator[Incomplete]: ...
|
||||
|
||||
1
stubs/bleach/bleach/parse_shim.pyi
Normal file
1
stubs/bleach/bleach/parse_shim.pyi
Normal file
@@ -0,0 +1 @@
|
||||
from urllib import parse as parse
|
||||
@@ -1,20 +1,27 @@
|
||||
from _typeshed import Incomplete
|
||||
from collections.abc import Callable, Iterable
|
||||
from collections.abc import Callable, Container, Iterable, Iterator
|
||||
from re import Pattern
|
||||
from typing import Protocol
|
||||
from typing import Final, Protocol
|
||||
from typing_extensions import TypeAlias
|
||||
|
||||
from html5lib.filters.base import Filter
|
||||
from html5lib.filters.sanitizer import Filter as SanitizerFilter
|
||||
from html5lib.treewalkers.base import TreeWalker
|
||||
|
||||
from . import _HTMLAttrKey
|
||||
from .css_sanitizer import CSSSanitizer
|
||||
from .html5lib_shim import BleachHTMLParser, BleachHTMLSerializer, SanitizerFilter
|
||||
from .html5lib_shim import BleachHTMLParser, BleachHTMLSerializer
|
||||
from .linkifier import _Token
|
||||
|
||||
ALLOWED_TAGS: frozenset[str]
|
||||
ALLOWED_ATTRIBUTES: dict[str, list[str]]
|
||||
ALLOWED_PROTOCOLS: frozenset[str]
|
||||
ALLOWED_TAGS: Final[frozenset[str]]
|
||||
ALLOWED_ATTRIBUTES: Final[dict[str, list[str]]]
|
||||
ALLOWED_PROTOCOLS: Final[frozenset[str]]
|
||||
|
||||
INVISIBLE_CHARACTERS: str
|
||||
INVISIBLE_CHARACTERS_RE: Pattern[str]
|
||||
INVISIBLE_REPLACEMENT_CHAR: str
|
||||
INVISIBLE_CHARACTERS: Final[str]
|
||||
INVISIBLE_CHARACTERS_RE: Final[Pattern[str]]
|
||||
INVISIBLE_REPLACEMENT_CHAR: Final = "?"
|
||||
|
||||
class NoCssSanitizerWarning(UserWarning): ...
|
||||
|
||||
# A html5lib Filter class
|
||||
class _Filter(Protocol):
|
||||
@@ -24,18 +31,16 @@ _AttributeFilter: TypeAlias = Callable[[str, str, str], bool]
|
||||
_AttributeDict: TypeAlias = dict[str, list[str] | _AttributeFilter] | dict[str, list[str]] | dict[str, _AttributeFilter]
|
||||
_Attributes: TypeAlias = _AttributeFilter | _AttributeDict | list[str]
|
||||
|
||||
_TreeWalker: TypeAlias = Callable[[Incomplete], Incomplete]
|
||||
|
||||
class Cleaner:
|
||||
tags: Iterable[str]
|
||||
attributes: _Attributes
|
||||
protocols: Iterable[str]
|
||||
strip: bool
|
||||
strip_comments: bool
|
||||
filters: Iterable[_Filter]
|
||||
filters: Iterable[Filter]
|
||||
css_sanitizer: CSSSanitizer | None
|
||||
parser: BleachHTMLParser
|
||||
walker: _TreeWalker
|
||||
walker: TreeWalker
|
||||
serializer: BleachHTMLSerializer
|
||||
def __init__(
|
||||
self,
|
||||
@@ -63,7 +68,7 @@ class BleachSanitizerFilter(SanitizerFilter):
|
||||
css_sanitizer: CSSSanitizer | None
|
||||
def __init__(
|
||||
self,
|
||||
source,
|
||||
source: TreeWalker,
|
||||
allowed_tags: Iterable[str] = ...,
|
||||
attributes: _Attributes = ...,
|
||||
allowed_protocols: Iterable[str] = ...,
|
||||
@@ -74,12 +79,11 @@ class BleachSanitizerFilter(SanitizerFilter):
|
||||
strip_html_comments: bool = True,
|
||||
css_sanitizer: CSSSanitizer | None = None,
|
||||
) -> None: ...
|
||||
def sanitize_stream(self, token_iterator): ...
|
||||
def merge_characters(self, token_iterator): ...
|
||||
def __iter__(self): ...
|
||||
def sanitize_token(self, token): ...
|
||||
def sanitize_characters(self, token): ...
|
||||
def sanitize_uri_value(self, value, allowed_protocols): ...
|
||||
def allow_token(self, token): ...
|
||||
def disallowed_token(self, token): ...
|
||||
def sanitize_css(self, style): ...
|
||||
def sanitize_stream(self, token_iterator: Iterable[_Token]) -> Iterator[_Token]: ...
|
||||
def merge_characters(self, token_iterator: Iterable[_Token]) -> Iterator[_Token]: ...
|
||||
def __iter__(self) -> Iterator[_Token]: ...
|
||||
def sanitize_token(self, token: _Token) -> _Token | list[_Token] | None: ...
|
||||
def sanitize_characters(self, token: _Token) -> _Token | list[_Token]: ...
|
||||
def sanitize_uri_value(self, value: str, allowed_protocols: Container[str]) -> str | None: ...
|
||||
def allow_token(self, token: _Token) -> _Token: ...
|
||||
def disallowed_token(self, token: _Token) -> _Token: ...
|
||||
|
||||
@@ -1,7 +1,14 @@
|
||||
from _typeshed import Incomplete, SupportsRead
|
||||
from typing import Any, overload
|
||||
from codecs import CodecInfo
|
||||
from typing import Any, Protocol, overload
|
||||
from typing_extensions import TypeAlias
|
||||
|
||||
# Is actually webencodings.Encoding
|
||||
class _Encoding(Protocol):
|
||||
name: str
|
||||
codec_info: CodecInfo
|
||||
def __init__(self, name: str, codec_info: CodecInfo) -> None: ...
|
||||
|
||||
_UnicodeInputStream: TypeAlias = str | SupportsRead[str]
|
||||
_BinaryInputStream: TypeAlias = bytes | SupportsRead[bytes]
|
||||
_InputStream: TypeAlias = _UnicodeInputStream # noqa: Y047 # used in other files
|
||||
@@ -42,13 +49,13 @@ def HTMLInputStream(
|
||||
class HTMLUnicodeInputStream:
|
||||
reportCharacterErrors: Any
|
||||
newLines: Any
|
||||
charEncoding: Any
|
||||
charEncoding: tuple[_Encoding, str]
|
||||
dataStream: Any
|
||||
def __init__(self, source: _UnicodeInputStream) -> None: ...
|
||||
chunk: str
|
||||
chunkSize: int
|
||||
chunkOffset: int
|
||||
errors: Any
|
||||
errors: list[str]
|
||||
prevNumLines: int
|
||||
prevNumCols: int
|
||||
def reset(self) -> None: ...
|
||||
@@ -70,7 +77,7 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
|
||||
same_origin_parent_encoding: Any
|
||||
likely_encoding: Any
|
||||
default_encoding: Any
|
||||
charEncoding: Any
|
||||
charEncoding: tuple[_Encoding, str]
|
||||
def __init__(
|
||||
self,
|
||||
source: _BinaryInputStream,
|
||||
@@ -85,7 +92,7 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
|
||||
def reset(self) -> None: ...
|
||||
def openStream(self, source): ...
|
||||
def determineEncoding(self, chardet: bool = True): ...
|
||||
def changeEncoding(self, newEncoding) -> None: ...
|
||||
def changeEncoding(self, newEncoding: str | bytes | None) -> None: ...
|
||||
def detectBOM(self): ...
|
||||
def detectEncodingMeta(self): ...
|
||||
|
||||
|
||||
Reference in New Issue
Block a user