add baseline types for natasha-corus library of nlp sources (#12518)

2026-03-16 11:34:56 +08:00 · 2024-08-13 11:33:23 +01:00
parent bed832da8a
commit 89bb3c148c
46 changed files with 1188 additions and 0 deletions
--- a/pyrightconfig.stricter.json
+++ b/pyrightconfig.stricter.json
@@ -40,6 +40,7 @@
        "stubs/cffi",
        "stubs/click-default-group",
        "stubs/commonmark",
+        "stubs/corus",
        "stubs/dateparser",
        "stubs/defusedxml",
        "stubs/docker",
--- a/stubs/corus/METADATA.toml
+++ b/stubs/corus/METADATA.toml
@@ -0,0 +1,2 @@
+version = "0.10.*"
+upstream_repository = "https://github.com/natasha/corus"
--- a/stubs/corus/corus/init.pyi
+++ b/stubs/corus/corus/init.pyi
@@ -0,0 +1 @@
+from .sources import *
--- a/stubs/corus/corus/io.pyi
+++ b/stubs/corus/corus/io.pyi
@@ -0,0 +1,21 @@
+from _typeshed import Incomplete
+from collections.abc import Generator
+
+def match_names(records, pattern) -> Generator[Incomplete]: ...
+def rstrip(text): ...
+def load_text(path): ...
+def dump_text(text, path) -> None: ...
+def load_lines(path, encoding: str = "utf-8") -> Generator[Incomplete]: ...
+def parse_xml(text): ...
+def load_z_lines(path, open, encoding: str = "utf8") -> Generator[Incomplete]: ...
+def load_gz_lines(path): ...
+def load_bz2_lines(path): ...
+def load_xz_lines(path): ...
+def list_zip(path): ...
+def load_zip_lines(path, name, encoding: str = "utf8") -> Generator[Incomplete]: ...
+def load_zip_texts(path, names, encoding: str = "utf8") -> Generator[Incomplete]: ...
+def parse_csv(lines, delimiter: str = ",", max_field: Incomplete | None = None): ...
+def parse_tsv(lines): ...
+def skip_header(rows): ...
+def dict_csv(rows) -> Generator[Incomplete]: ...
+def parse_jsonl(lines) -> Generator[Incomplete]: ...
--- a/stubs/corus/corus/path.pyi
+++ b/stubs/corus/corus/path.pyi
--- a/stubs/corus/corus/readme.pyi
+++ b/stubs/corus/corus/readme.pyi
@@ -0,0 +1,17 @@
+from _typeshed import Incomplete
+from collections.abc import Generator
+
+COMMANDS: Incomplete
+KB: int
+MB: Incomplete
+GB: Incomplete
+LABELS: Incomplete
+
+def is_command(step, commands=("wget", "unzip", "unrar", "rm", "mv", "tar")): ...
+def format_bytes(value): ...
+def format_count(value): ...
+def unfold_metas(items) -> Generator[Incomplete]: ...
+def format_metas_(metas, nbviewer: Incomplete | None = None) -> Generator[Incomplete]: ...
+def format_metas(metas, url: Incomplete | None = None): ...
+def show_html(html) -> None: ...
+def patch_readme(html, path) -> None: ...
--- a/stubs/corus/corus/record.pyi
+++ b/stubs/corus/corus/record.pyi
@@ -0,0 +1,8 @@
+from _typeshed import Incomplete
+
+class Record:
+    __attributes__: Incomplete
+    def __eq__(self, other): ...
+    def __ne__(self, other): ...
+    def __iter__(self): ...
+    def __hash__(self): ...
--- a/stubs/corus/corus/sources/init.pyi
+++ b/stubs/corus/corus/sources/init.pyi
@@ -0,0 +1,23 @@
+from .bsnlp import load_bsnlp as load_bsnlp
+from .buriy import *
+from .corpora import load_corpora as load_corpora
+from .factru import load_factru as load_factru
+from .gareev import load_gareev as load_gareev
+from .gramru import load_gramru as load_gramru
+from .lenta import load_lenta as load_lenta, load_lenta2 as load_lenta2
+from .librusec import load_librusec as load_librusec
+from .mokoron import *
+from .morphoru import *
+from .ne5 import load_ne5 as load_ne5
+from .ods import *
+from .omnia import load_omnia as load_omnia
+from .persons import load_persons as load_persons
+from .ria import *
+from .rudrec import load_rudrec as load_rudrec
+from .russe import *
+from .simlex import load_simlex as load_simlex
+from .taiga import *
+from .toloka import load_ruadrect as load_ruadrect, load_toloka_lrwc as load_toloka_lrwc
+from .ud import *
+from .wiki import load_wiki as load_wiki
+from .wikiner import load_wikiner as load_wikiner
--- a/stubs/corus/corus/sources/bsnlp.pyi
+++ b/stubs/corus/corus/sources/bsnlp.pyi
@@ -0,0 +1,75 @@
+from _typeshed import Incomplete
+from collections.abc import Generator
+
+from corus.record import Record
+
+RU: str
+BG: str
+CS: str
+PL: str
+LANGS: Incomplete
+ANNOTATED: str
+RAW: str
+TXT: str
+OUT: str
+
+class BsnlpId(Record):
+    __attributes__: Incomplete
+    lang: Incomplete
+    type: Incomplete
+    name: Incomplete
+    path: Incomplete
+    def __init__(self, lang, type, name, path) -> None: ...
+
+class BsnlpRaw(Record):
+    __attributes__: Incomplete
+    id: Incomplete
+    name: Incomplete
+    lang: Incomplete
+    date: Incomplete
+    url: Incomplete
+    text: Incomplete
+    def __init__(self, id, name, lang, date, url, text) -> None: ...
+
+class BsnlpAnnotated(Record):
+    __attributes__: Incomplete
+    id: Incomplete
+    name: Incomplete
+    substrings: Incomplete
+    def __init__(self, id, name, substrings) -> None: ...
+
+class BsnlpSubstring(Record):
+    __attributes__: Incomplete
+    text: Incomplete
+    normal: Incomplete
+    type: Incomplete
+    id: Incomplete
+    def __init__(self, text, normal, type, id) -> None: ...
+
+class BsnlpMarkup(Record):
+    __attributes__: Incomplete
+    id: Incomplete
+    name: Incomplete
+    lang: Incomplete
+    date: Incomplete
+    url: Incomplete
+    text: Incomplete
+    substrings: Incomplete
+    def __init__(self, id, name, lang, date, url, text, substrings) -> None: ...
+
+def walk(dir): ...
+def load_ids(dir, langs) -> Generator[Incomplete]: ...
+def select_type(ids, type) -> Generator[Incomplete]: ...
+
+RAW_PATTERN: Incomplete
+
+def parse_raw(name, text): ...
+def load_raw(records) -> Generator[Incomplete]: ...
+
+ANNOTATED_PATTERN: Incomplete
+
+def parse_substrings(lines) -> Generator[Incomplete]: ...
+def parse_annotated(name, lines): ...
+def load_annotated(records) -> Generator[Incomplete]: ...
+def merge(raw, annotated) -> Generator[Incomplete]: ...
+def load_bsnlp(dir, langs=["ru"]): ...
--- a/stubs/corus/corus/sources/buriy.pyi
+++ b/stubs/corus/corus/sources/buriy.pyi
@@ -0,0 +1,18 @@
+from _typeshed import Incomplete
+
+from corus.record import Record
+
+__all__ = ["load_buriy_news", "load_buriy_webhose"]
+
+class BuriyRecord(Record):
+    __attributes__: Incomplete
+    timestamp: Incomplete
+    url: Incomplete
+    edition: Incomplete
+    topics: Incomplete
+    title: Incomplete
+    text: Incomplete
+    def __init__(self, timestamp, url, edition, topics, title, text) -> None: ...
+
+def load_buriy_news(path): ...
+def load_buriy_webhose(path): ...
--- a/stubs/corus/corus/sources/corpora.pyi
+++ b/stubs/corus/corus/sources/corpora.pyi
@@ -0,0 +1,50 @@
+from _typeshed import Incomplete
+from collections.abc import Generator
+
+from corus.record import Record
+
+class CorporaText(Record):
+    __attributes__: Incomplete
+    id: Incomplete
+    parent_id: Incomplete
+    name: Incomplete
+    tags: Incomplete
+    pars: Incomplete
+    def __init__(self, id, parent_id, name, tags, pars) -> None: ...
+
+class CorporaPar(Record):
+    __attributes__: Incomplete
+    id: Incomplete
+    sents: Incomplete
+    def __init__(self, id, sents) -> None: ...
+
+class CorporaSent(Record):
+    __attributes__: Incomplete
+    id: Incomplete
+    text: Incomplete
+    tokens: Incomplete
+    def __init__(self, id, text, tokens) -> None: ...
+
+class CorporaToken(Record):
+    __attributes__: Incomplete
+    id: Incomplete
+    rev_id: Incomplete
+    text: Incomplete
+    forms: Incomplete
+    def __init__(self, id, rev_id, text, forms) -> None: ...
+
+class CorporaForm(Record):
+    __attributes__: Incomplete
+    id: Incomplete
+    text: Incomplete
+    grams: Incomplete
+    def __init__(self, id, text, grams) -> None: ...
+
+def parse_grams(xml) -> Generator[Incomplete]: ...
+def parse_forms(xml) -> Generator[Incomplete]: ...
+def parse_tokens(xml) -> Generator[Incomplete]: ...
+def parse_sents(xml) -> Generator[Incomplete]: ...
+def parse_pars(xml) -> Generator[Incomplete]: ...
+def parse_tags(xml) -> Generator[Incomplete]: ...
+def parse_text(xml): ...
+def load_corpora(path) -> Generator[Incomplete]: ...
--- a/stubs/corus/corus/sources/factru.pyi
+++ b/stubs/corus/corus/sources/factru.pyi
@@ -0,0 +1,74 @@
+from _typeshed import Incomplete
+from collections.abc import Generator
+
+from corus.record import Record
+
+DEVSET: str
+TESTSET: str
+TXT: str
+SPANS: str
+OBJECTS: str
+COREF: str
+FACTS: str
+
+class FactruSpan(Record):
+    __attributes__: Incomplete
+    id: Incomplete
+    type: Incomplete
+    start: Incomplete
+    stop: Incomplete
+    def __init__(self, id, type, start, stop) -> None: ...
+
+class FactruObject(Record):
+    __attributes__: Incomplete
+    id: Incomplete
+    type: Incomplete
+    spans: Incomplete
+    def __init__(self, id, type, spans) -> None: ...
+
+class FactruCorefSlot(Record):
+    __attributes__: Incomplete
+    type: Incomplete
+    value: Incomplete
+    def __init__(self, type, value) -> None: ...
+
+class FactruCoref(Record):
+    __attributes__: Incomplete
+    id: Incomplete
+    objects: Incomplete
+    slots: Incomplete
+    def __init__(self, id, objects, slots) -> None: ...
+
+class FactruFactSlot(Record):
+    __attributes__: Incomplete
+    type: Incomplete
+    ref: Incomplete
+    value: Incomplete
+    def __init__(self, type, ref, value) -> None: ...
+
+class FactruFact(Record):
+    __attributes__: Incomplete
+    id: Incomplete
+    type: Incomplete
+    slots: Incomplete
+    def __init__(self, id, type, slots) -> None: ...
+
+class FactruMarkup(Record):
+    __attributes__: Incomplete
+    id: Incomplete
+    text: Incomplete
+    objects: Incomplete
+    corefs: Incomplete
+    facts: Incomplete
+    def __init__(self, id, text, objects, corefs, facts) -> None: ...
+
+def list_ids(dir, set) -> Generator[Incomplete]: ...
+def part_path(id, dir, set, part): ...
+def parse_spans(lines) -> Generator[Incomplete]: ...
+def parse_objects(lines, spans) -> Generator[Incomplete]: ...
+def parse_coref_slots(lines) -> Generator[Incomplete]: ...
+def parse_corefs(lines, objects) -> Generator[Incomplete]: ...
+def parse_facts_slots(lines, id_corefs, id_spans) -> Generator[Incomplete]: ...
+def parse_facts(lines, corefs, spans) -> Generator[Incomplete]: ...
+def load_id(id, dir, set): ...
+def load_factru(dir, sets=["devset", "testset"]) -> Generator[Incomplete]: ...
--- a/stubs/corus/corus/sources/gareev.pyi
+++ b/stubs/corus/corus/sources/gareev.pyi
@@ -0,0 +1,21 @@
+from _typeshed import Incomplete
+from collections.abc import Generator
+
+from corus.record import Record
+
+class GareevToken(Record):
+    __attributes__: Incomplete
+    text: Incomplete
+    tag: Incomplete
+    def __init__(self, text, tag) -> None: ...
+
+class GareevRecord(Record):
+    __attributes__: Incomplete
+    tokens: Incomplete
+    def __init__(self, tokens) -> None: ...
+
+def parse_conll(lines) -> Generator[Incomplete]: ...
+def parse_gareev(lines): ...
+def load_id(id, dir): ...
+def list_ids(dir) -> Generator[Incomplete]: ...
+def load_gareev(dir) -> Generator[Incomplete]: ...
--- a/stubs/corus/corus/sources/gramru.pyi
+++ b/stubs/corus/corus/sources/gramru.pyi
@@ -0,0 +1 @@
+def load_gramru(path): ...
--- a/stubs/corus/corus/sources/lenta.pyi
+++ b/stubs/corus/corus/sources/lenta.pyi
@@ -0,0 +1,19 @@
+from _typeshed import Incomplete
+from collections.abc import Generator
+
+from corus.record import Record
+
+class LentaRecord(Record):
+    __attributes__: Incomplete
+    url: Incomplete
+    title: Incomplete
+    text: Incomplete
+    topic: Incomplete
+    tags: Incomplete
+    date: Incomplete
+    def __init__(self, url, title, text, topic, tags, date: Incomplete | None = None) -> None: ...
+
+def parse_lenta(lines) -> Generator[Incomplete]: ...
+def parse_lenta2(lines) -> Generator[Incomplete]: ...
+def load_lenta(path): ...
+def load_lenta2(path): ...
--- a/stubs/corus/corus/sources/librusec.pyi
+++ b/stubs/corus/corus/sources/librusec.pyi
@@ -0,0 +1,14 @@
+from _typeshed import Incomplete
+from collections.abc import Generator
+
+from corus.record import Record
+
+class LibrusecRecord(Record):
+    __attributes__: Incomplete
+    id: Incomplete
+    text: Incomplete
+    def __init__(self, id, text) -> None: ...
+
+def flush(id, buffer): ...
+def parse_librusec(lines) -> Generator[Incomplete]: ...
+def load_librusec(path): ...
--- a/stubs/corus/corus/sources/meta.pyi
+++ b/stubs/corus/corus/sources/meta.pyi
@@ -0,0 +1,54 @@
+from _typeshed import Incomplete
+
+from corus.record import Record
+
+class Meta(Record):
+    __attributes__: Incomplete
+    title: Incomplete
+    url: Incomplete
+    description: Incomplete
+    stats: Incomplete
+    instruction: Incomplete
+    tags: Incomplete
+    functions: Incomplete
+    def __init__(
+        self,
+        title,
+        url: Incomplete | None = None,
+        description: Incomplete | None = None,
+        stats: Incomplete | None = None,
+        instruction=(),
+        tags=(),
+        functions=(),
+    ) -> None: ...
+
+class Group(Record):
+    __attributes__: Incomplete
+    title: Incomplete
+    url: Incomplete
+    description: Incomplete
+    instruction: Incomplete
+    metas: Incomplete
+    def __init__(
+        self, title, url: Incomplete | None = None, description: Incomplete | None = None, instruction=(), metas=()
+    ) -> None: ...
+
+def is_group(item): ...
+
+class Stats(Record):
+    __attributes__: Incomplete
+    bytes: Incomplete
+    count: Incomplete
+    def __init__(self, bytes: Incomplete | None = None, count: Incomplete | None = None) -> None: ...
+
+NER: str
+NEWS: str
+FICTION: str
+SOCIAL: str
+MORPH: str
+SYNTAX: str
+EMB: str
+SIM: str
+SENTIMENT: str
+WEB: str
+METAS: Incomplete
--- a/stubs/corus/corus/sources/mokoron.pyi
+++ b/stubs/corus/corus/sources/mokoron.pyi
@@ -0,0 +1,28 @@
+from _typeshed import Incomplete
+from collections.abc import Generator
+
+from corus.record import Record
+
+__all__ = ["load_mokoron"]
+
+class MokoronRecord(Record):
+    __attributes__: Incomplete
+    id: Incomplete
+    timestamp: Incomplete
+    user: Incomplete
+    text: Incomplete
+    sentiment: Incomplete
+    replies: Incomplete
+    retweets: Incomplete
+    favourites: Incomplete
+    posts: Incomplete
+    followers: Incomplete
+    friends: Incomplete
+    lists: Incomplete
+    def __init__(
+        self, id, timestamp, user, text, sentiment, replies, retweets, favourites, posts, followers, friends, lists
+    ) -> None: ...
+    @classmethod
+    def from_match(cls, match): ...
+
+def load_mokoron(path) -> Generator[Incomplete]: ...
--- a/stubs/corus/corus/sources/morphoru.pyi
+++ b/stubs/corus/corus/sources/morphoru.pyi
@@ -0,0 +1,24 @@
+from _typeshed import Incomplete
+
+from corus.record import Record
+
+__all__ = ["load_morphoru_gicrya", "load_morphoru_rnc", "load_morphoru_corpora"]
+
+class MorphoSent(Record):
+    __attributes__: Incomplete
+    tokens: Incomplete
+    attrs: Incomplete
+    def __init__(self, tokens, attrs=()) -> None: ...
+
+class MorphoToken(Record):
+    __attributes__: Incomplete
+    text: Incomplete
+    lemma: Incomplete
+    pos: Incomplete
+    feats: Incomplete
+    feats2: Incomplete
+    def __init__(self, text, lemma, pos, feats, feats2: Incomplete | None = None) -> None: ...
+
+def load_morphoru_gicrya(path): ...
+def load_morphoru_rnc(path): ...
+def load_morphoru_corpora(path): ...
--- a/stubs/corus/corus/sources/ne5.pyi
+++ b/stubs/corus/corus/sources/ne5.pyi
@@ -0,0 +1,28 @@
+from _typeshed import Incomplete
+from collections.abc import Generator
+
+from corus.record import Record
+
+class Ne5Span(Record):
+    __attributes__: Incomplete
+    index: Incomplete
+    type: Incomplete
+    start: Incomplete
+    stop: Incomplete
+    text: Incomplete
+    def __init__(self, index, type, start, stop, text) -> None: ...
+
+class Ne5Markup(Record):
+    __attributes__: Incomplete
+    id: Incomplete
+    text: Incomplete
+    spans: Incomplete
+    def __init__(self, id, text, spans) -> None: ...
+
+def list_ids(dir) -> Generator[Incomplete]: ...
+def txt_path(id, dir): ...
+def ann_path(id, dir): ...
+def parse_spans(lines) -> Generator[Incomplete]: ...
+def load_text(path): ...
+def load_id(id, dir): ...
+def load_ne5(dir) -> Generator[Incomplete]: ...
--- a/stubs/corus/corus/sources/ods.pyi
+++ b/stubs/corus/corus/sources/ods.pyi
@@ -0,0 +1,46 @@
+from _typeshed import Incomplete
+
+from corus.record import Record
+
+__all__ = [
+    "load_ods_interfax",
+    "load_ods_gazeta",
+    "load_ods_izvestia",
+    "load_ods_meduza",
+    "load_ods_ria",
+    "load_ods_rt",
+    "load_ods_tass",
+]
+
+class NewsRecord(Record):
+    __attributes__: Incomplete
+    timestamp: Incomplete
+    url: Incomplete
+    edition: Incomplete
+    topics: Incomplete
+    authors: Incomplete
+    title: Incomplete
+    text: Incomplete
+    stats: Incomplete
+    def __init__(self, timestamp, url, edition, topics, authors, title, text, stats) -> None: ...
+
+class Stats(Record):
+    __attributes__: Incomplete
+    fb: Incomplete
+    vk: Incomplete
+    ok: Incomplete
+    twitter: Incomplete
+    lj: Incomplete
+    tg: Incomplete
+    likes: Incomplete
+    views: Incomplete
+    comments: Incomplete
+    def __init__(self, fb, vk, ok, twitter, lj, tg, likes, views, comments) -> None: ...
+
+def load_ods_interfax(path): ...
+def load_ods_gazeta(path): ...
+def load_ods_izvestia(path): ...
+def load_ods_meduza(path): ...
+def load_ods_ria(path): ...
+def load_ods_rt(path): ...
+def load_ods_tass(path): ...
--- a/stubs/corus/corus/sources/omnia.pyi
+++ b/stubs/corus/corus/sources/omnia.pyi
@@ -0,0 +1,49 @@
+from _typeshed import Incomplete
+from collections.abc import Generator
+
+from corus.record import Record
+
+class OmniaDoc(Record):
+    __attributes__: Incomplete
+    id: Incomplete
+    attrs: Incomplete
+    pars: Incomplete
+    def __init__(self, id, attrs, pars) -> None: ...
+
+class OmniaPar(Record):
+    __attributes__: Incomplete
+    sents: Incomplete
+    def __init__(self, sents) -> None: ...
+
+class OmniaSent(Record):
+    __attributes__: Incomplete
+    tokens: Incomplete
+    def __init__(self, tokens) -> None: ...
+
+class OmniaToken(Record):
+    __attributes__: Incomplete
+    text: Incomplete
+    lemma: Incomplete
+    atag: Incomplete
+    tag: Incomplete
+    ztag: Incomplete
+    g: Incomplete
+    def __init__(self, text, lemma, atag, tag, ztag, g) -> None: ...
+
+DID: str
+G_TAG: str
+S_END: str
+P_END: str
+DOC_END: str
+
+def take_until(stream, value) -> Generator[Incomplete]: ...
+def group_bounds(stream, end) -> Generator[Incomplete]: ...
+def group_doc_bounds(stream) -> Generator[Incomplete]: ...
+def group_pairs(stream) -> Generator[Incomplete]: ...
+def parse_tokens(lines) -> Generator[Incomplete]: ...
+def parse_sents(lines) -> Generator[Incomplete]: ...
+def parse_pars(lines) -> Generator[Incomplete]: ...
+def parse_tag_attrs(tag) -> Generator[Incomplete]: ...
+def parse_doc_header(header): ...
+def parse_docs(lines) -> Generator[Incomplete]: ...
+def load_omnia(path) -> Generator[Incomplete, Incomplete, None]: ...
--- a/stubs/corus/corus/sources/persons.pyi
+++ b/stubs/corus/corus/sources/persons.pyi
@@ -0,0 +1,27 @@
+from _typeshed import Incomplete
+from collections.abc import Generator
+
+from corus.record import Record
+
+TEXT: str
+ANNO: str
+
+class PersonsSpan(Record):
+    __attributes__: Incomplete
+    id: Incomplete
+    start: Incomplete
+    stop: Incomplete
+    value: Incomplete
+    def __init__(self, id, start, stop, value) -> None: ...
+
+class PersonsMarkup(Record):
+    __attributes__: Incomplete
+    text: Incomplete
+    spans: Incomplete
+    def __init__(self, text, spans) -> None: ...
+
+def list_ids(path) -> Generator[Incomplete]: ...
+def part_names(ids, part) -> Generator[Incomplete]: ...
+def parse_anno(text) -> Generator[Incomplete]: ...
+def load_ids(ids, path) -> Generator[Incomplete]: ...
+def load_persons(path): ...
--- a/stubs/corus/corus/sources/ria.pyi
+++ b/stubs/corus/corus/sources/ria.pyi
@@ -0,0 +1,21 @@
+from _typeshed import Incomplete
+
+from corus.record import Record
+
+__all__ = ["load_ria_raw", "load_ria"]
+
+class RiaRawRecord(Record):
+    __attributes__: Incomplete
+    title: Incomplete
+    text: Incomplete
+    def __init__(self, title, text) -> None: ...
+
+class RiaRecord(Record):
+    __attributes__: Incomplete
+    title: Incomplete
+    prefix: Incomplete
+    text: Incomplete
+    def __init__(self, title, prefix, text) -> None: ...
+
+def load_ria_raw(path): ...
+def load_ria(path): ...
--- a/stubs/corus/corus/sources/rudrec.pyi
+++ b/stubs/corus/corus/sources/rudrec.pyi
@@ -0,0 +1,27 @@
+from _typeshed import Incomplete
+from collections.abc import Generator
+
+from corus.record import Record
+
+class RuDReCRecord(Record):
+    __attributes__: Incomplete
+    file_name: Incomplete
+    text: Incomplete
+    sentence_id: Incomplete
+    entities: Incomplete
+    def __init__(self, file_name, text, sentence_id, entities) -> None: ...
+
+class RuDReCEntity(Record):
+    __attributes__: Incomplete
+    entity_id: Incomplete
+    entity_text: Incomplete
+    entity_type: Incomplete
+    start: Incomplete
+    end: Incomplete
+    concept_id: Incomplete
+    concept_name: Incomplete
+    def __init__(self, entity_id, entity_text, entity_type, start, end, concept_id, concept_name) -> None: ...
+
+def parse_entities(items) -> Generator[Incomplete]: ...
+def parse_rudrec(items) -> Generator[Incomplete]: ...
+def load_rudrec(path): ...
--- a/stubs/corus/corus/sources/russe.pyi
+++ b/stubs/corus/corus/sources/russe.pyi
@@ -0,0 +1,16 @@
+from _typeshed import Incomplete
+
+from corus.record import Record
+
+__all__ = ["load_russe_hj", "load_russe_rt", "load_russe_ae"]
+
+class RusseSemRecord(Record):
+    __attributes__: Incomplete
+    word1: Incomplete
+    word2: Incomplete
+    sim: Incomplete
+    def __init__(self, word1, word2, sim) -> None: ...
+
+def load_russe_hj(path): ...
+def load_russe_rt(path): ...
+def load_russe_ae(path): ...
--- a/stubs/corus/corus/sources/simlex.pyi
+++ b/stubs/corus/corus/sources/simlex.pyi
@@ -0,0 +1,14 @@
+from _typeshed import Incomplete
+from collections.abc import Generator
+
+from corus.record import Record
+
+class SimlexRecord(Record):
+    __attributes__: Incomplete
+    word1: Incomplete
+    word2: Incomplete
+    score: Incomplete
+    def __init__(self, word1, word2, score) -> None: ...
+
+def parse_simlex(lines) -> Generator[Incomplete]: ...
+def load_simlex(path): ...
--- a/stubs/corus/corus/sources/taiga/init.pyi
+++ b/stubs/corus/corus/sources/taiga/init.pyi
@@ -0,0 +1,10 @@
+from .arzamas import *
+from .fontanka import *
+from .interfax import *
+from .kp import *
+from .lenta import *
+from .magazines import *
+from .nplus1 import *
+from .proza import *
+from .social import *
+from .subtitles import *
--- a/stubs/corus/corus/sources/taiga/arzamas.pyi
+++ b/stubs/corus/corus/sources/taiga/arzamas.pyi
@@ -0,0 +1,6 @@
+from _typeshed import Incomplete
+
+__all__ = ["load_taiga_arzamas_metas", "load_taiga_arzamas"]
+
+def load_taiga_arzamas_metas(path, offset: int = 0, count: int = 1): ...
+def load_taiga_arzamas(path, metas: Incomplete | None = None, offset: int = 144896, count: int = 311): ...
--- a/stubs/corus/corus/sources/taiga/common.pyi
+++ b/stubs/corus/corus/sources/taiga/common.pyi
@@ -0,0 +1,77 @@
+from _typeshed import Incomplete
+from collections.abc import Generator
+
+from corus.record import Record
+
+class ArchiveRecord(Record):
+    __attributes__: Incomplete
+    name: Incomplete
+    offset: Incomplete
+    file: Incomplete
+    def __init__(self, name, offset, file) -> None: ...
+
+class TaigaRecord(Record):
+    __attributes__: Incomplete
+    id: Incomplete
+    meta: Incomplete
+    text: Incomplete
+    def __init__(self, id, meta, text) -> None: ...
+
+class Author(Record):
+    __attributes__: Incomplete
+    name: Incomplete
+    readers: Incomplete
+    texts: Incomplete
+    profession: Incomplete
+    about: Incomplete
+    url: Incomplete
+    def __init__(
+        self,
+        name,
+        readers: Incomplete | None = None,
+        texts: Incomplete | None = None,
+        profession: Incomplete | None = None,
+        about: Incomplete | None = None,
+        url: Incomplete | None = None,
+    ) -> None: ...
+
+class Meta(Record):
+    __attributes__: Incomplete
+    id: Incomplete
+    timestamp: Incomplete
+    tags: Incomplete
+    themes: Incomplete
+    rubric: Incomplete
+    genre: Incomplete
+    topic: Incomplete
+    author: Incomplete
+    lang: Incomplete
+    title: Incomplete
+    url: Incomplete
+    def __init__(
+        self,
+        id,
+        timestamp: Incomplete | None = None,
+        tags: Incomplete | None = None,
+        themes: Incomplete | None = None,
+        rubric: Incomplete | None = None,
+        genre: Incomplete | None = None,
+        topic: Incomplete | None = None,
+        author: Incomplete | None = None,
+        lang: Incomplete | None = None,
+        title: Incomplete | None = None,
+        url: Incomplete | None = None,
+    ) -> None: ...
+
+def load_tar(path, offset: int = 0) -> Generator[Incomplete]: ...
+def load_zip(path, offset: int = 0) -> Generator[Incomplete]: ...
+def parse_meta(file, encoding: str = "utf8") -> Generator[Incomplete]: ...
+def load_metas(path, pattern, offset, count, load) -> Generator[Incomplete]: ...
+def load_tar_metas(path, pattern, offset, count): ...
+def load_zip_metas(path, pattern, offset, count): ...
+def load_texts(path, pattern, offset, count, parse_id, load, encoding: str = "utf8") -> Generator[Incomplete]: ...
+def parse_filename_id(path): ...
+def load_tar_texts(path, pattern, offset, count, parse_id=...): ...
+def load_zip_texts(path, pattern, offset, count, parse_id=...): ...
+def merge_metas(records, metas: Incomplete | None = None) -> Generator[Incomplete]: ...
+def patch_month(date, months): ...
--- a/stubs/corus/corus/sources/taiga/fontanka.pyi
+++ b/stubs/corus/corus/sources/taiga/fontanka.pyi
@@ -0,0 +1,6 @@
+from _typeshed import Incomplete
+
+__all__ = ["load_taiga_fontanka_metas", "load_taiga_fontanka"]
+
+def load_taiga_fontanka_metas(path, offset: int = 0, count=13): ...
+def load_taiga_fontanka(path, metas: Incomplete | None = None, offset: int = 306359296, count: int = 342683): ...
--- a/stubs/corus/corus/sources/taiga/interfax.pyi
+++ b/stubs/corus/corus/sources/taiga/interfax.pyi
@@ -0,0 +1,6 @@
+from _typeshed import Incomplete
+
+__all__ = ["load_taiga_interfax_metas", "load_taiga_interfax"]
+
+def load_taiga_interfax_metas(path, offset: int = 0, count: int = 1): ...
+def load_taiga_interfax(path, metas: Incomplete | None = None, offset: int = 11447296, count: int = 46429): ...
--- a/stubs/corus/corus/sources/taiga/kp.pyi
+++ b/stubs/corus/corus/sources/taiga/kp.pyi
@@ -0,0 +1,6 @@
+from _typeshed import Incomplete
+
+__all__ = ["load_taiga_kp_metas", "load_taiga_kp"]
+
+def load_taiga_kp_metas(path, offset: int = 0, count: int = 1): ...
+def load_taiga_kp(path, metas: Incomplete | None = None, offset: int = 13042176, count: int = 45503): ...
--- a/stubs/corus/corus/sources/taiga/lenta.pyi
+++ b/stubs/corus/corus/sources/taiga/lenta.pyi
@@ -0,0 +1,6 @@
+from _typeshed import Incomplete
+
+__all__ = ["load_taiga_lenta_metas", "load_taiga_lenta"]
+
+def load_taiga_lenta_metas(path, offset: int = 0, count: int = 1): ...
+def load_taiga_lenta(path, metas: Incomplete | None = None, offset: int = 12800000, count: int = 36446): ...
--- a/stubs/corus/corus/sources/taiga/magazines.pyi
+++ b/stubs/corus/corus/sources/taiga/magazines.pyi
@@ -0,0 +1,6 @@
+from _typeshed import Incomplete
+
+__all__ = ["load_taiga_magazines_metas", "load_taiga_magazines"]
+
+def load_taiga_magazines_metas(path, offset: int = 0, count: int = 36): ...
+def load_taiga_magazines(path, metas: Incomplete | None = None, offset: int = 7292416, count: int = 39890): ...
--- a/stubs/corus/corus/sources/taiga/nplus1.pyi
+++ b/stubs/corus/corus/sources/taiga/nplus1.pyi
@@ -0,0 +1,6 @@
+from _typeshed import Incomplete
+
+__all__ = ["load_taiga_nplus1_metas", "load_taiga_nplus1"]
+
+def load_taiga_nplus1_metas(path, offset: int = 0, count: int = 1): ...
+def load_taiga_nplus1(path, metas: Incomplete | None = None, offset: int = 1919488, count: int = 7696): ...
--- a/stubs/corus/corus/sources/taiga/proza.pyi
+++ b/stubs/corus/corus/sources/taiga/proza.pyi
@@ -0,0 +1,8 @@
+from _typeshed import Incomplete
+
+__all__ = ["load_taiga_proza_metas", "load_taiga_proza", "load_taiga_stihi_metas", "load_taiga_stihi"]
+
+def load_taiga_proza_metas(path, offset: int = 0, count=13): ...
+def load_taiga_stihi_metas(path, offset: int = 0, count=3): ...
+def load_taiga_proza(path, metas: Incomplete | None = None, offset: int = ..., count: int = ...): ...
+def load_taiga_stihi(path, metas: Incomplete | None = None, offset: int = ..., count: int = ...): ...
--- a/stubs/corus/corus/sources/taiga/social.pyi
+++ b/stubs/corus/corus/sources/taiga/social.pyi
@@ -0,0 +1,15 @@
+from _typeshed import Incomplete
+from collections.abc import Generator
+
+from corus.record import Record
+
+__all__ = ["load_taiga_social"]
+
+class TaigaSocialRecord(Record):
+    __attributes__: Incomplete
+    id: Incomplete
+    network: Incomplete
+    text: Incomplete
+    def __init__(self, id, network, text) -> None: ...
+
+def load_taiga_social(path, offset: int = 3985892864, count: int = 4) -> Generator[Incomplete]: ...
--- a/stubs/corus/corus/sources/taiga/subtitles.pyi
+++ b/stubs/corus/corus/sources/taiga/subtitles.pyi
@@ -0,0 +1,6 @@
+from _typeshed import Incomplete
+from collections.abc import Generator
+
+def parse_metas(items) -> Generator[Incomplete]: ...
+def load_taiga_subtitles_metas(path, offset: int = 0, count: int = 1): ...
+def load_taiga_subtitles(path, metas: Incomplete | None = None, offset: int = 2113024, count: int = 19011): ...
--- a/stubs/corus/corus/sources/toloka.pyi
+++ b/stubs/corus/corus/sources/toloka.pyi
@@ -0,0 +1,28 @@
+from _typeshed import Incomplete
+from collections.abc import Generator
+
+from corus.record import Record
+
+class LRWCRecord(Record):
+    __attributes__: Incomplete
+    hyponym: Incomplete
+    hypernym: Incomplete
+    genitive: Incomplete
+    judgement: Incomplete
+    confidence: Incomplete
+    def __init__(self, hyponym, hypernym, genitive, judgement, confidence) -> None: ...
+
+def parse_judgement(value): ...
+def parse_confidence(value): ...
+def parse_toloka_lrwc(lines) -> Generator[Incomplete]: ...
+def load_toloka_lrwc(path): ...
+
+class RuADReCTRecord(Record):
+    __attributes__: Incomplete
+    tweet_id: Incomplete
+    tweet: Incomplete
+    label: Incomplete
+    def __init__(self, tweet_id, tweet, label) -> None: ...
+
+def parse_ruadrect(lines) -> Generator[Incomplete]: ...
+def load_ruadrect(path): ...
--- a/stubs/corus/corus/sources/ud.pyi
+++ b/stubs/corus/corus/sources/ud.pyi
@@ -0,0 +1,29 @@
+from _typeshed import Incomplete
+
+from corus.record import Record
+
+__all__ = ["load_ud_gsd", "load_ud_taiga", "load_ud_pud", "load_ud_syntag"]
+
+class UDSent(Record):
+    __attributes__: Incomplete
+    id: Incomplete
+    text: Incomplete
+    attrs: Incomplete
+    tokens: Incomplete
+    def __init__(self, id, text, attrs, tokens) -> None: ...
+
+class UDToken(Record):
+    __attributes__: Incomplete
+    id: Incomplete
+    text: Incomplete
+    lemma: Incomplete
+    pos: Incomplete
+    feats: Incomplete
+    head_id: Incomplete
+    rel: Incomplete
+    def __init__(self, id, text, lemma, pos, feats, head_id, rel) -> None: ...
+
+def load_ud_gsd(path): ...
+def load_ud_taiga(path): ...
+def load_ud_pud(path): ...
+def load_ud_syntag(path): ...
--- a/stubs/corus/corus/sources/wiki.pyi
+++ b/stubs/corus/corus/sources/wiki.pyi
@@ -0,0 +1,20 @@
+from _typeshed import Incomplete
+from collections.abc import Generator
+
+from corus.record import Record
+from corus.third.WikiExtractor import Extractor
+
+class WikiRecord(Record):
+    __attributes__: Incomplete
+    id: Incomplete
+    url: Incomplete
+    title: Incomplete
+    text: Incomplete
+    def __init__(self, id, url, title, text) -> None: ...
+    @classmethod
+    def from_json(cls, data): ...
+
+class Extractor_(Extractor):
+    def extract_(self): ...
+
+def load_wiki(path) -> Generator[Incomplete]: ...
--- a/stubs/corus/corus/sources/wikiner.pyi
+++ b/stubs/corus/corus/sources/wikiner.pyi
@@ -0,0 +1,19 @@
+from _typeshed import Incomplete
+from collections.abc import Generator
+
+from corus.record import Record
+
+class WikinerToken(Record):
+    __attributes__: Incomplete
+    text: Incomplete
+    pos: Incomplete
+    tag: Incomplete
+    def __init__(self, text, pos, tag) -> None: ...
+
+class WikinerMarkup(Record):
+    __attributes__: Incomplete
+    tokens: Incomplete
+    def __init__(self, tokens) -> None: ...
+
+def parse_wikiner(line): ...
+def load_wikiner(path) -> Generator[Incomplete]: ...
--- a/stubs/corus/corus/third/WikiExtractor.pyi
+++ b/stubs/corus/corus/third/WikiExtractor.pyi
@@ -0,0 +1,227 @@
+import typing
+from _typeshed import Incomplete
+from collections.abc import Generator
+from math import (
+    acos as acos,
+    asin as asin,
+    atan as atan,
+    ceil as ceil,
+    cos as cos,
+    exp as exp,
+    floor as floor,
+    pi as pi,
+    sin as sin,
+    tan as tan,
+    trunc as trunc,
+)
+
+PY2: Incomplete
+text_type = str
+version: str
+options: Incomplete
+templateKeys: Incomplete
+filter_disambig_page_pattern: Incomplete
+g_page_total: int
+g_page_articl_total: int
+g_page_articl_used_total: int
+
+def keepPage(ns, catSet, page): ...
+def get_url(uid): ...
+
+selfClosingTags: Incomplete
+placeholder_tags: Incomplete
+
+def normalizeTitle(title): ...
+def unescape(text): ...
+
+comment: Incomplete
+nowiki: Incomplete
+
+def ignoreTag(tag) -> None: ...
+
+selfClosing_tag_patterns: Incomplete
+placeholder_tag_patterns: Incomplete
+preformatted: Incomplete
+externalLink: Incomplete
+externalLinkNoAnchor: Incomplete
+bold_italic: Incomplete
+bold: Incomplete
+italic_quote: Incomplete
+italic: Incomplete
+quote_quote: Incomplete
+spaces: Incomplete
+dots: Incomplete
+
+_T = typing.TypeVar("_T")
+
+class Template(list[_T]):
+    @classmethod
+    def parse(cls, body): ...
+    def subst(self, params, extractor, depth: int = 0): ...
+
+class TemplateText(text_type):
+    def subst(self, params, extractor, depth): ...
+
+class TemplateArg:
+    name: Incomplete
+    default: Incomplete
+    def __init__(self, parameter) -> None: ...
+    def subst(self, params, extractor, depth): ...
+
+class Frame:
+    title: Incomplete
+    args: Incomplete
+    prev: Incomplete
+    depth: Incomplete
+    def __init__(self, title: str = "", args=[], prev: Incomplete | None = None) -> None: ...
+    def push(self, title, args): ...
+    def pop(self): ...
+
+substWords: str
+
+class Extractor:
+    id: Incomplete
+    revid: Incomplete
+    title: Incomplete
+    text: Incomplete
+    magicWords: Incomplete
+    frame: Incomplete
+    recursion_exceeded_1_errs: int
+    recursion_exceeded_2_errs: int
+    recursion_exceeded_3_errs: int
+    template_title_errs: int
+    def __init__(self, id, revid, title, lines) -> None: ...
+    def write_output(self, out, text) -> None: ...
+    def extract(self, out) -> None: ...
+    def transform(self, wikitext): ...
+    def transform1(self, text): ...
+    def wiki2text(self, text): ...
+    def clean(self, text): ...
+    maxTemplateRecursionLevels: int
+    maxParameterRecursionLevels: int
+    reOpen: Incomplete
+    def expand(self, wikitext): ...
+    def templateParams(self, parameters): ...
+    def expandTemplate(self, body): ...
+
+def splitParts(paramsList): ...
+def findMatchingBraces(text, ldelim: int = 0) -> Generator[Incomplete]: ...
+def findBalanced(text, openDelim=["[["], closeDelim=["]]"]) -> Generator[Incomplete]: ...
+def if_empty(*rest): ...
+def functionParams(args, vars): ...
+def string_sub(args): ...
+def string_sublength(args): ...
+def string_len(args): ...
+def string_find(args): ...
+def string_pos(args): ...
+def string_replace(args): ...
+def string_rep(args): ...
+def roman_main(args): ...
+
+modules: Incomplete
+
+class MagicWords:
+    names: Incomplete
+    values: Incomplete
+    def __init__(self) -> None: ...
+    def __getitem__(self, name): ...
+    def __setitem__(self, name, value) -> None: ...
+    switches: Incomplete
+
+magicWordsRE: Incomplete
+
+def ucfirst(string): ...
+def lcfirst(string): ...
+def fullyQualifiedTemplateTitle(templateTitle): ...
+def normalizeNamespace(ns): ...
+
+class Infix:
+    function: Incomplete
+    def __init__(self, function) -> None: ...
+    def __ror__(self, other): ...
+    def __or__(self, other): ...
+    def __rlshift__(self, other): ...
+    def __rshift__(self, other): ...
+    def __call__(self, value1, value2): ...
+
+ROUND: Incomplete
+
+def sharp_expr(extr, expr): ...
+def sharp_if(extr, testValue, valueIfTrue, valueIfFalse: Incomplete | None = None, *args): ...
+def sharp_ifeq(extr, lvalue, rvalue, valueIfTrue, valueIfFalse: Incomplete | None = None, *args): ...
+def sharp_iferror(extr, test, then: str = "", Else: Incomplete | None = None, *args): ...
+def sharp_switch(extr, primary, *params): ...
+def sharp_invoke(module, function, args): ...
+
+parserFunctions: Incomplete
+
+def callParserFunction(functionName, args, extractor): ...
+
+reNoinclude: Incomplete
+reIncludeonly: Incomplete
+
+def define_template(title, page) -> None: ...
+def dropNested(text, openDelim, closeDelim): ...
+def dropSpans(spans, text): ...
+def replaceInternalLinks(text): ...
+def makeInternalLink(title, label): ...
+
+wgUrlProtocols: Incomplete
+EXT_LINK_URL_CLASS: str
+ANCHOR_CLASS: str
+ExtLinkBracketedRegex: Incomplete
+EXT_IMAGE_REGEX: Incomplete
+
+def replaceExternalLinks(text): ...
+def makeExternalLink(url, anchor): ...
+def makeExternalImage(url, alt: str = ""): ...
+
+tailRE: Incomplete
+syntaxhighlight: Incomplete
+section: Incomplete
+listOpen: Incomplete
+listClose: Incomplete
+listItem: Incomplete
+
+def compact(text): ...
+def handle_unicode(entity): ...
+
+class NextFile:
+    filesPerDir: int
+    path_name: Incomplete
+    dir_index: int
+    file_index: int
+    def __init__(self, path_name) -> None: ...
+    def __next__(self): ...
+    next = __next__
+
+class OutputSplitter:
+    nextFile: Incomplete
+    compress: Incomplete
+    max_file_size: Incomplete
+    file: Incomplete
+    def __init__(self, nextFile, max_file_size: int = 0, compress: bool = True) -> None: ...
+    def reserve(self, size) -> None: ...
+    def write(self, data) -> None: ...
+    def close(self) -> None: ...
+    def open(self, filename): ...
+
+tagRE: Incomplete
+keyRE: Incomplete
+catRE: Incomplete
+
+def load_templates(file, output_file: Incomplete | None = None) -> None: ...
+def pages_from(input) -> Generator[Incomplete]: ...
+def process_dump(input_file, template_file, out_file, file_size, file_compress, process_count) -> None: ...
+def extract_process(opts, i, jobs_queue, output_queue) -> None: ...
+
+report_period: int
+
+def reduce_process(
+    opts, output_queue, spool_length, out_file: Incomplete | None = None, file_size: int = 0, file_compress: bool = True
+) -> None: ...
+
+minFileSize: Incomplete
+
+def main() -> None: ...
+def createLogger(quiet, debug, log_file) -> None: ...
--- a/stubs/corus/corus/third/init.pyi
+++ b/stubs/corus/corus/third/init.pyi
--- a/stubs/corus/corus/zip.pyi
+++ b/stubs/corus/corus/zip.pyi
@@ -0,0 +1,28 @@
+from _typeshed import Incomplete
+from typing import NamedTuple
+
+def open_zip(path): ...
+
+HEADER_FORMAT: str
+HEADER_SIGNATURE: bytes
+NO_COMPRESSION: int
+DEFLATED: int
+
+class ZipHeader(NamedTuple):
+    signature: Incomplete
+    extract_by: Incomplete
+    flags: Incomplete
+    compression: Incomplete
+    time: Incomplete
+    date: Incomplete
+    crc: Incomplete
+    compressed: Incomplete
+    uncompressed: Incomplete
+    name: Incomplete
+    extra: Incomplete
+
+def decode_name(name): ...
+def read_zip_header(file): ...
+def is_zip_header(record): ...
+def assert_zip_header(record) -> None: ...
+def read_zip_data(file, header): ...