html5lib: Add various types (#11429)

I started out investigating comments in #11411 and ended up adding a few other
types that were reasonably obvious from the source code. For reference:
https://github.com/html5lib/html5lib-python/tree/master/html5lib
This commit is contained in:
Jelle Zijlstra
2024-02-20 00:18:17 -08:00
committed by GitHub
parent 601587e71d
commit 78b7dc6167
3 changed files with 74 additions and 54 deletions

View File

@@ -1,5 +1,10 @@
from _typeshed import Incomplete
from typing import Any
from _typeshed import Incomplete, SupportsRead
from typing import Any, overload
from typing_extensions import TypeAlias
_UnicodeInputStream: TypeAlias = str | SupportsRead[str]
_BinaryInputStream: TypeAlias = bytes | SupportsRead[bytes]
_InputStream: TypeAlias = _UnicodeInputStream # noqa: Y047 # used in other files
spaceCharactersBytes: Any
asciiLettersBytes: Any
@@ -20,14 +25,26 @@ class BufferedStream:
def seek(self, pos) -> None: ...
def read(self, bytes): ...
def HTMLInputStream(source, **kwargs): ...
@overload
def HTMLInputStream(source: _UnicodeInputStream) -> HTMLUnicodeInputStream: ...
@overload
def HTMLInputStream(
source: _BinaryInputStream,
*,
override_encoding: str | bytes | None = None,
transport_encoding: str | bytes | None = None,
same_origin_parent_encoding: str | bytes | None = None,
likely_encoding: str | bytes | None = None,
default_encoding: str = "windows-1252",
useChardet: bool = True,
) -> HTMLBinaryInputStream: ...
class HTMLUnicodeInputStream:
reportCharacterErrors: Any
newLines: Any
charEncoding: Any
dataStream: Any
def __init__(self, source) -> None: ...
def __init__(self, source: _UnicodeInputStream) -> None: ...
chunk: str
chunkSize: int
chunkOffset: int
@@ -56,11 +73,11 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
charEncoding: Any
def __init__(
self,
source,
override_encoding: Incomplete | None = None,
transport_encoding: Incomplete | None = None,
same_origin_parent_encoding: Incomplete | None = None,
likely_encoding: Incomplete | None = None,
source: _BinaryInputStream,
override_encoding: str | bytes | None = None,
transport_encoding: str | bytes | None = None,
same_origin_parent_encoding: str | bytes | None = None,
likely_encoding: str | bytes | None = None,
default_encoding: str = "windows-1252",
useChardet: bool = True,
) -> None: ...
@@ -108,4 +125,4 @@ class ContentAttrParser:
def __init__(self, data) -> None: ...
def parse(self): ...
def lookupEncoding(encoding): ...
def lookupEncoding(encoding: str | bytes | None) -> str | None: ...

View File

@@ -1,6 +1,8 @@
from _typeshed import Incomplete
from typing import Any
from ._inputstream import _InputStream
entitiesTrie: Any
attributeMap = dict
@@ -12,7 +14,7 @@ class HTMLTokenizer:
state: Any
escape: bool
currentToken: Any
def __init__(self, stream, parser: Incomplete | None = None, **kwargs) -> None: ...
def __init__(self, stream: _InputStream, parser: Incomplete | None = None, **kwargs) -> None: ...
tokenQueue: Any
def __iter__(self): ...
def consumeNumberEntity(self, isHex): ...
@@ -36,23 +38,23 @@ class HTMLTokenizer:
def rawtextLessThanSignState(self): ...
def rawtextEndTagOpenState(self): ...
def rawtextEndTagNameState(self): ...
def scriptDataLessThanSignState(self): ...
def scriptDataEndTagOpenState(self): ...
def scriptDataEndTagNameState(self): ...
def scriptDataEscapeStartState(self): ...
def scriptDataEscapeStartDashState(self): ...
def scriptDataEscapedState(self): ...
def scriptDataEscapedDashState(self): ...
def scriptDataEscapedDashDashState(self): ...
def scriptDataEscapedLessThanSignState(self): ...
def scriptDataEscapedEndTagOpenState(self): ...
def scriptDataEscapedEndTagNameState(self): ...
def scriptDataDoubleEscapeStartState(self): ...
def scriptDataDoubleEscapedState(self): ...
def scriptDataDoubleEscapedDashState(self): ...
def scriptDataDoubleEscapedDashDashState(self): ...
def scriptDataDoubleEscapedLessThanSignState(self): ...
def scriptDataDoubleEscapeEndState(self): ...
def scriptDataLessThanSignState(self) -> bool: ...
def scriptDataEndTagOpenState(self) -> bool: ...
def scriptDataEndTagNameState(self) -> bool: ...
def scriptDataEscapeStartState(self) -> bool: ...
def scriptDataEscapeStartDashState(self) -> bool: ...
def scriptDataEscapedState(self) -> bool: ...
def scriptDataEscapedDashState(self) -> bool: ...
def scriptDataEscapedDashDashState(self) -> bool: ...
def scriptDataEscapedLessThanSignState(self) -> bool: ...
def scriptDataEscapedEndTagOpenState(self) -> bool: ...
def scriptDataEscapedEndTagNameState(self) -> bool: ...
def scriptDataDoubleEscapeStartState(self) -> bool: ...
def scriptDataDoubleEscapedState(self) -> bool: ...
def scriptDataDoubleEscapedDashState(self) -> bool: ...
def scriptDataDoubleEscapedDashDashState(self) -> bool: ...
def scriptDataDoubleEscapedLessThanSignState(self) -> bool: ...
def scriptDataDoubleEscapeEndState(self) -> bool: ...
def beforeAttributeNameState(self): ...
def attributeNameState(self): ...
def afterAttributeNameState(self): ...
@@ -64,17 +66,17 @@ class HTMLTokenizer:
def selfClosingStartTagState(self): ...
def bogusCommentState(self): ...
def markupDeclarationOpenState(self): ...
def commentStartState(self): ...
def commentStartDashState(self): ...
def commentState(self): ...
def commentEndDashState(self): ...
def commentEndState(self): ...
def commentEndBangState(self): ...
def doctypeState(self): ...
def beforeDoctypeNameState(self): ...
def doctypeNameState(self): ...
def afterDoctypeNameState(self): ...
def afterDoctypePublicKeywordState(self): ...
def commentStartState(self) -> bool: ...
def commentStartDashState(self) -> bool: ...
def commentState(self) -> bool: ...
def commentEndDashState(self) -> bool: ...
def commentEndState(self) -> bool: ...
def commentEndBangState(self) -> bool: ...
def doctypeState(self) -> bool: ...
def beforeDoctypeNameState(self) -> bool: ...
def doctypeNameState(self) -> bool: ...
def afterDoctypeNameState(self) -> bool: ...
def afterDoctypePublicKeywordState(self) -> bool: ...
def beforeDoctypePublicIdentifierState(self): ...
def doctypePublicIdentifierDoubleQuotedState(self): ...
def doctypePublicIdentifierSingleQuotedState(self): ...

View File

@@ -1,25 +1,25 @@
from _typeshed import Incomplete, SupportsRead
from _typeshed import Incomplete
from typing import Any, Literal, overload
from xml.etree.ElementTree import Element
from ._inputstream import _InputStream
from ._tokenizer import HTMLTokenizer
@overload
def parse(
doc: str | bytes | SupportsRead[str] | SupportsRead[bytes],
treebuilder: Literal["etree"] = "etree",
namespaceHTMLElements: bool = True,
**kwargs,
doc: _InputStream, treebuilder: Literal["etree"] = "etree", namespaceHTMLElements: bool = True, **kwargs
) -> Element: ...
@overload
def parse(
doc: str | bytes | SupportsRead[str] | SupportsRead[bytes], treebuilder: str, namespaceHTMLElements: bool = True, **kwargs
def parse(doc: _InputStream, treebuilder: str, namespaceHTMLElements: bool = True, **kwargs): ...
def parseFragment(
doc: _InputStream, container: str = "div", treebuilder: str = "etree", namespaceHTMLElements: bool = True, **kwargs
): ...
def parseFragment(doc, container: str = "div", treebuilder: str = "etree", namespaceHTMLElements: bool = True, **kwargs): ...
def method_decorator_metaclass(function): ...
class HTMLParser:
strict: Any
strict: bool
tree: Any
errors: Any
errors: list[Incomplete]
phases: Any
def __init__(
self, tree: Incomplete | None = None, strict: bool = False, namespaceHTMLElements: bool = True, debug: bool = False
@@ -27,20 +27,21 @@ class HTMLParser:
firstStartTag: bool
log: Any
compatMode: str
container: str
innerHTML: Any
phase: Any
lastPhase: Any
beforeRCDataPhase: Any
framesetOK: bool
tokenizer: Any
tokenizer: HTMLTokenizer
def reset(self) -> None: ...
@property
def documentEncoding(self) -> str | None: ...
def isHTMLIntegrationPoint(self, element) -> bool: ...
def isMathMLTextIntegrationPoint(self, element) -> bool: ...
def isHTMLIntegrationPoint(self, element: Element) -> bool: ...
def isMathMLTextIntegrationPoint(self, element: Element) -> bool: ...
def mainLoop(self) -> None: ...
def parse(self, stream, scripting: bool = ..., **kwargs): ...
def parseFragment(self, stream, *args, **kwargs): ...
def parse(self, stream: _InputStream, scripting: bool = ..., **kwargs): ...
def parseFragment(self, stream: _InputStream, *args, **kwargs): ...
def parseError(self, errorcode: str = "XXX-undefined-error", datavars: Incomplete | None = None) -> None: ...
def adjustMathMLAttributes(self, token) -> None: ...
def adjustSVGAttributes(self, token) -> None: ...