add baseline types for natasha-corus library of nlp sources (#12518)

This commit is contained in:
Oleg Valiulin
2024-08-13 11:33:23 +01:00
committed by GitHub
parent bed832da8a
commit 89bb3c148c
46 changed files with 1188 additions and 0 deletions

View File

@@ -40,6 +40,7 @@
"stubs/cffi",
"stubs/click-default-group",
"stubs/commonmark",
"stubs/corus",
"stubs/dateparser",
"stubs/defusedxml",
"stubs/docker",

View File

@@ -0,0 +1,2 @@
version = "0.10.*"
upstream_repository = "https://github.com/natasha/corus"

View File

@@ -0,0 +1 @@
from .sources import *

21
stubs/corus/corus/io.pyi Normal file
View File

@@ -0,0 +1,21 @@
from _typeshed import Incomplete
from collections.abc import Generator
def match_names(records, pattern) -> Generator[Incomplete]: ...
def rstrip(text): ...
def load_text(path): ...
def dump_text(text, path) -> None: ...
def load_lines(path, encoding: str = "utf-8") -> Generator[Incomplete]: ...
def parse_xml(text): ...
def load_z_lines(path, open, encoding: str = "utf8") -> Generator[Incomplete]: ...
def load_gz_lines(path): ...
def load_bz2_lines(path): ...
def load_xz_lines(path): ...
def list_zip(path): ...
def load_zip_lines(path, name, encoding: str = "utf8") -> Generator[Incomplete]: ...
def load_zip_texts(path, names, encoding: str = "utf8") -> Generator[Incomplete]: ...
def parse_csv(lines, delimiter: str = ",", max_field: Incomplete | None = None): ...
def parse_tsv(lines): ...
def skip_header(rows): ...
def dict_csv(rows) -> Generator[Incomplete]: ...
def parse_jsonl(lines) -> Generator[Incomplete]: ...

View File

View File

@@ -0,0 +1,17 @@
from _typeshed import Incomplete
from collections.abc import Generator
COMMANDS: Incomplete
KB: int
MB: Incomplete
GB: Incomplete
LABELS: Incomplete
def is_command(step, commands=("wget", "unzip", "unrar", "rm", "mv", "tar")): ...
def format_bytes(value): ...
def format_count(value): ...
def unfold_metas(items) -> Generator[Incomplete]: ...
def format_metas_(metas, nbviewer: Incomplete | None = None) -> Generator[Incomplete]: ...
def format_metas(metas, url: Incomplete | None = None): ...
def show_html(html) -> None: ...
def patch_readme(html, path) -> None: ...

View File

@@ -0,0 +1,8 @@
from _typeshed import Incomplete
class Record:
__attributes__: Incomplete
def __eq__(self, other): ...
def __ne__(self, other): ...
def __iter__(self): ...
def __hash__(self): ...

View File

@@ -0,0 +1,23 @@
from .bsnlp import load_bsnlp as load_bsnlp
from .buriy import *
from .corpora import load_corpora as load_corpora
from .factru import load_factru as load_factru
from .gareev import load_gareev as load_gareev
from .gramru import load_gramru as load_gramru
from .lenta import load_lenta as load_lenta, load_lenta2 as load_lenta2
from .librusec import load_librusec as load_librusec
from .mokoron import *
from .morphoru import *
from .ne5 import load_ne5 as load_ne5
from .ods import *
from .omnia import load_omnia as load_omnia
from .persons import load_persons as load_persons
from .ria import *
from .rudrec import load_rudrec as load_rudrec
from .russe import *
from .simlex import load_simlex as load_simlex
from .taiga import *
from .toloka import load_ruadrect as load_ruadrect, load_toloka_lrwc as load_toloka_lrwc
from .ud import *
from .wiki import load_wiki as load_wiki
from .wikiner import load_wikiner as load_wikiner

View File

@@ -0,0 +1,75 @@
from _typeshed import Incomplete
from collections.abc import Generator
from corus.record import Record
RU: str
BG: str
CS: str
PL: str
LANGS: Incomplete
ANNOTATED: str
RAW: str
TXT: str
OUT: str
class BsnlpId(Record):
__attributes__: Incomplete
lang: Incomplete
type: Incomplete
name: Incomplete
path: Incomplete
def __init__(self, lang, type, name, path) -> None: ...
class BsnlpRaw(Record):
__attributes__: Incomplete
id: Incomplete
name: Incomplete
lang: Incomplete
date: Incomplete
url: Incomplete
text: Incomplete
def __init__(self, id, name, lang, date, url, text) -> None: ...
class BsnlpAnnotated(Record):
__attributes__: Incomplete
id: Incomplete
name: Incomplete
substrings: Incomplete
def __init__(self, id, name, substrings) -> None: ...
class BsnlpSubstring(Record):
__attributes__: Incomplete
text: Incomplete
normal: Incomplete
type: Incomplete
id: Incomplete
def __init__(self, text, normal, type, id) -> None: ...
class BsnlpMarkup(Record):
__attributes__: Incomplete
id: Incomplete
name: Incomplete
lang: Incomplete
date: Incomplete
url: Incomplete
text: Incomplete
substrings: Incomplete
def __init__(self, id, name, lang, date, url, text, substrings) -> None: ...
def walk(dir): ...
def load_ids(dir, langs) -> Generator[Incomplete]: ...
def select_type(ids, type) -> Generator[Incomplete]: ...
RAW_PATTERN: Incomplete
def parse_raw(name, text): ...
def load_raw(records) -> Generator[Incomplete]: ...
ANNOTATED_PATTERN: Incomplete
def parse_substrings(lines) -> Generator[Incomplete]: ...
def parse_annotated(name, lines): ...
def load_annotated(records) -> Generator[Incomplete]: ...
def merge(raw, annotated) -> Generator[Incomplete]: ...
def load_bsnlp(dir, langs=["ru"]): ...

View File

@@ -0,0 +1,18 @@
from _typeshed import Incomplete
from corus.record import Record
__all__ = ["load_buriy_news", "load_buriy_webhose"]
class BuriyRecord(Record):
__attributes__: Incomplete
timestamp: Incomplete
url: Incomplete
edition: Incomplete
topics: Incomplete
title: Incomplete
text: Incomplete
def __init__(self, timestamp, url, edition, topics, title, text) -> None: ...
def load_buriy_news(path): ...
def load_buriy_webhose(path): ...

View File

@@ -0,0 +1,50 @@
from _typeshed import Incomplete
from collections.abc import Generator
from corus.record import Record
class CorporaText(Record):
__attributes__: Incomplete
id: Incomplete
parent_id: Incomplete
name: Incomplete
tags: Incomplete
pars: Incomplete
def __init__(self, id, parent_id, name, tags, pars) -> None: ...
class CorporaPar(Record):
__attributes__: Incomplete
id: Incomplete
sents: Incomplete
def __init__(self, id, sents) -> None: ...
class CorporaSent(Record):
__attributes__: Incomplete
id: Incomplete
text: Incomplete
tokens: Incomplete
def __init__(self, id, text, tokens) -> None: ...
class CorporaToken(Record):
__attributes__: Incomplete
id: Incomplete
rev_id: Incomplete
text: Incomplete
forms: Incomplete
def __init__(self, id, rev_id, text, forms) -> None: ...
class CorporaForm(Record):
__attributes__: Incomplete
id: Incomplete
text: Incomplete
grams: Incomplete
def __init__(self, id, text, grams) -> None: ...
def parse_grams(xml) -> Generator[Incomplete]: ...
def parse_forms(xml) -> Generator[Incomplete]: ...
def parse_tokens(xml) -> Generator[Incomplete]: ...
def parse_sents(xml) -> Generator[Incomplete]: ...
def parse_pars(xml) -> Generator[Incomplete]: ...
def parse_tags(xml) -> Generator[Incomplete]: ...
def parse_text(xml): ...
def load_corpora(path) -> Generator[Incomplete]: ...

View File

@@ -0,0 +1,74 @@
from _typeshed import Incomplete
from collections.abc import Generator
from corus.record import Record
DEVSET: str
TESTSET: str
TXT: str
SPANS: str
OBJECTS: str
COREF: str
FACTS: str
class FactruSpan(Record):
__attributes__: Incomplete
id: Incomplete
type: Incomplete
start: Incomplete
stop: Incomplete
def __init__(self, id, type, start, stop) -> None: ...
class FactruObject(Record):
__attributes__: Incomplete
id: Incomplete
type: Incomplete
spans: Incomplete
def __init__(self, id, type, spans) -> None: ...
class FactruCorefSlot(Record):
__attributes__: Incomplete
type: Incomplete
value: Incomplete
def __init__(self, type, value) -> None: ...
class FactruCoref(Record):
__attributes__: Incomplete
id: Incomplete
objects: Incomplete
slots: Incomplete
def __init__(self, id, objects, slots) -> None: ...
class FactruFactSlot(Record):
__attributes__: Incomplete
type: Incomplete
ref: Incomplete
value: Incomplete
def __init__(self, type, ref, value) -> None: ...
class FactruFact(Record):
__attributes__: Incomplete
id: Incomplete
type: Incomplete
slots: Incomplete
def __init__(self, id, type, slots) -> None: ...
class FactruMarkup(Record):
__attributes__: Incomplete
id: Incomplete
text: Incomplete
objects: Incomplete
corefs: Incomplete
facts: Incomplete
def __init__(self, id, text, objects, corefs, facts) -> None: ...
def list_ids(dir, set) -> Generator[Incomplete]: ...
def part_path(id, dir, set, part): ...
def parse_spans(lines) -> Generator[Incomplete]: ...
def parse_objects(lines, spans) -> Generator[Incomplete]: ...
def parse_coref_slots(lines) -> Generator[Incomplete]: ...
def parse_corefs(lines, objects) -> Generator[Incomplete]: ...
def parse_facts_slots(lines, id_corefs, id_spans) -> Generator[Incomplete]: ...
def parse_facts(lines, corefs, spans) -> Generator[Incomplete]: ...
def load_id(id, dir, set): ...
def load_factru(dir, sets=["devset", "testset"]) -> Generator[Incomplete]: ...

View File

@@ -0,0 +1,21 @@
from _typeshed import Incomplete
from collections.abc import Generator
from corus.record import Record
class GareevToken(Record):
__attributes__: Incomplete
text: Incomplete
tag: Incomplete
def __init__(self, text, tag) -> None: ...
class GareevRecord(Record):
__attributes__: Incomplete
tokens: Incomplete
def __init__(self, tokens) -> None: ...
def parse_conll(lines) -> Generator[Incomplete]: ...
def parse_gareev(lines): ...
def load_id(id, dir): ...
def list_ids(dir) -> Generator[Incomplete]: ...
def load_gareev(dir) -> Generator[Incomplete]: ...

View File

@@ -0,0 +1 @@
def load_gramru(path): ...

View File

@@ -0,0 +1,19 @@
from _typeshed import Incomplete
from collections.abc import Generator
from corus.record import Record
class LentaRecord(Record):
__attributes__: Incomplete
url: Incomplete
title: Incomplete
text: Incomplete
topic: Incomplete
tags: Incomplete
date: Incomplete
def __init__(self, url, title, text, topic, tags, date: Incomplete | None = None) -> None: ...
def parse_lenta(lines) -> Generator[Incomplete]: ...
def parse_lenta2(lines) -> Generator[Incomplete]: ...
def load_lenta(path): ...
def load_lenta2(path): ...

View File

@@ -0,0 +1,14 @@
from _typeshed import Incomplete
from collections.abc import Generator
from corus.record import Record
class LibrusecRecord(Record):
__attributes__: Incomplete
id: Incomplete
text: Incomplete
def __init__(self, id, text) -> None: ...
def flush(id, buffer): ...
def parse_librusec(lines) -> Generator[Incomplete]: ...
def load_librusec(path): ...

View File

@@ -0,0 +1,54 @@
from _typeshed import Incomplete
from corus.record import Record
class Meta(Record):
__attributes__: Incomplete
title: Incomplete
url: Incomplete
description: Incomplete
stats: Incomplete
instruction: Incomplete
tags: Incomplete
functions: Incomplete
def __init__(
self,
title,
url: Incomplete | None = None,
description: Incomplete | None = None,
stats: Incomplete | None = None,
instruction=(),
tags=(),
functions=(),
) -> None: ...
class Group(Record):
__attributes__: Incomplete
title: Incomplete
url: Incomplete
description: Incomplete
instruction: Incomplete
metas: Incomplete
def __init__(
self, title, url: Incomplete | None = None, description: Incomplete | None = None, instruction=(), metas=()
) -> None: ...
def is_group(item): ...
class Stats(Record):
__attributes__: Incomplete
bytes: Incomplete
count: Incomplete
def __init__(self, bytes: Incomplete | None = None, count: Incomplete | None = None) -> None: ...
NER: str
NEWS: str
FICTION: str
SOCIAL: str
MORPH: str
SYNTAX: str
EMB: str
SIM: str
SENTIMENT: str
WEB: str
METAS: Incomplete

View File

@@ -0,0 +1,28 @@
from _typeshed import Incomplete
from collections.abc import Generator
from corus.record import Record
__all__ = ["load_mokoron"]
class MokoronRecord(Record):
__attributes__: Incomplete
id: Incomplete
timestamp: Incomplete
user: Incomplete
text: Incomplete
sentiment: Incomplete
replies: Incomplete
retweets: Incomplete
favourites: Incomplete
posts: Incomplete
followers: Incomplete
friends: Incomplete
lists: Incomplete
def __init__(
self, id, timestamp, user, text, sentiment, replies, retweets, favourites, posts, followers, friends, lists
) -> None: ...
@classmethod
def from_match(cls, match): ...
def load_mokoron(path) -> Generator[Incomplete]: ...

View File

@@ -0,0 +1,24 @@
from _typeshed import Incomplete
from corus.record import Record
__all__ = ["load_morphoru_gicrya", "load_morphoru_rnc", "load_morphoru_corpora"]
class MorphoSent(Record):
__attributes__: Incomplete
tokens: Incomplete
attrs: Incomplete
def __init__(self, tokens, attrs=()) -> None: ...
class MorphoToken(Record):
__attributes__: Incomplete
text: Incomplete
lemma: Incomplete
pos: Incomplete
feats: Incomplete
feats2: Incomplete
def __init__(self, text, lemma, pos, feats, feats2: Incomplete | None = None) -> None: ...
def load_morphoru_gicrya(path): ...
def load_morphoru_rnc(path): ...
def load_morphoru_corpora(path): ...

View File

@@ -0,0 +1,28 @@
from _typeshed import Incomplete
from collections.abc import Generator
from corus.record import Record
class Ne5Span(Record):
__attributes__: Incomplete
index: Incomplete
type: Incomplete
start: Incomplete
stop: Incomplete
text: Incomplete
def __init__(self, index, type, start, stop, text) -> None: ...
class Ne5Markup(Record):
__attributes__: Incomplete
id: Incomplete
text: Incomplete
spans: Incomplete
def __init__(self, id, text, spans) -> None: ...
def list_ids(dir) -> Generator[Incomplete]: ...
def txt_path(id, dir): ...
def ann_path(id, dir): ...
def parse_spans(lines) -> Generator[Incomplete]: ...
def load_text(path): ...
def load_id(id, dir): ...
def load_ne5(dir) -> Generator[Incomplete]: ...

View File

@@ -0,0 +1,46 @@
from _typeshed import Incomplete
from corus.record import Record
__all__ = [
"load_ods_interfax",
"load_ods_gazeta",
"load_ods_izvestia",
"load_ods_meduza",
"load_ods_ria",
"load_ods_rt",
"load_ods_tass",
]
class NewsRecord(Record):
__attributes__: Incomplete
timestamp: Incomplete
url: Incomplete
edition: Incomplete
topics: Incomplete
authors: Incomplete
title: Incomplete
text: Incomplete
stats: Incomplete
def __init__(self, timestamp, url, edition, topics, authors, title, text, stats) -> None: ...
class Stats(Record):
__attributes__: Incomplete
fb: Incomplete
vk: Incomplete
ok: Incomplete
twitter: Incomplete
lj: Incomplete
tg: Incomplete
likes: Incomplete
views: Incomplete
comments: Incomplete
def __init__(self, fb, vk, ok, twitter, lj, tg, likes, views, comments) -> None: ...
def load_ods_interfax(path): ...
def load_ods_gazeta(path): ...
def load_ods_izvestia(path): ...
def load_ods_meduza(path): ...
def load_ods_ria(path): ...
def load_ods_rt(path): ...
def load_ods_tass(path): ...

View File

@@ -0,0 +1,49 @@
from _typeshed import Incomplete
from collections.abc import Generator
from corus.record import Record
class OmniaDoc(Record):
__attributes__: Incomplete
id: Incomplete
attrs: Incomplete
pars: Incomplete
def __init__(self, id, attrs, pars) -> None: ...
class OmniaPar(Record):
__attributes__: Incomplete
sents: Incomplete
def __init__(self, sents) -> None: ...
class OmniaSent(Record):
__attributes__: Incomplete
tokens: Incomplete
def __init__(self, tokens) -> None: ...
class OmniaToken(Record):
__attributes__: Incomplete
text: Incomplete
lemma: Incomplete
atag: Incomplete
tag: Incomplete
ztag: Incomplete
g: Incomplete
def __init__(self, text, lemma, atag, tag, ztag, g) -> None: ...
DID: str
G_TAG: str
S_END: str
P_END: str
DOC_END: str
def take_until(stream, value) -> Generator[Incomplete]: ...
def group_bounds(stream, end) -> Generator[Incomplete]: ...
def group_doc_bounds(stream) -> Generator[Incomplete]: ...
def group_pairs(stream) -> Generator[Incomplete]: ...
def parse_tokens(lines) -> Generator[Incomplete]: ...
def parse_sents(lines) -> Generator[Incomplete]: ...
def parse_pars(lines) -> Generator[Incomplete]: ...
def parse_tag_attrs(tag) -> Generator[Incomplete]: ...
def parse_doc_header(header): ...
def parse_docs(lines) -> Generator[Incomplete]: ...
def load_omnia(path) -> Generator[Incomplete, Incomplete, None]: ...

View File

@@ -0,0 +1,27 @@
from _typeshed import Incomplete
from collections.abc import Generator
from corus.record import Record
TEXT: str
ANNO: str
class PersonsSpan(Record):
__attributes__: Incomplete
id: Incomplete
start: Incomplete
stop: Incomplete
value: Incomplete
def __init__(self, id, start, stop, value) -> None: ...
class PersonsMarkup(Record):
__attributes__: Incomplete
text: Incomplete
spans: Incomplete
def __init__(self, text, spans) -> None: ...
def list_ids(path) -> Generator[Incomplete]: ...
def part_names(ids, part) -> Generator[Incomplete]: ...
def parse_anno(text) -> Generator[Incomplete]: ...
def load_ids(ids, path) -> Generator[Incomplete]: ...
def load_persons(path): ...

View File

@@ -0,0 +1,21 @@
from _typeshed import Incomplete
from corus.record import Record
__all__ = ["load_ria_raw", "load_ria"]
class RiaRawRecord(Record):
__attributes__: Incomplete
title: Incomplete
text: Incomplete
def __init__(self, title, text) -> None: ...
class RiaRecord(Record):
__attributes__: Incomplete
title: Incomplete
prefix: Incomplete
text: Incomplete
def __init__(self, title, prefix, text) -> None: ...
def load_ria_raw(path): ...
def load_ria(path): ...

View File

@@ -0,0 +1,27 @@
from _typeshed import Incomplete
from collections.abc import Generator
from corus.record import Record
class RuDReCRecord(Record):
__attributes__: Incomplete
file_name: Incomplete
text: Incomplete
sentence_id: Incomplete
entities: Incomplete
def __init__(self, file_name, text, sentence_id, entities) -> None: ...
class RuDReCEntity(Record):
__attributes__: Incomplete
entity_id: Incomplete
entity_text: Incomplete
entity_type: Incomplete
start: Incomplete
end: Incomplete
concept_id: Incomplete
concept_name: Incomplete
def __init__(self, entity_id, entity_text, entity_type, start, end, concept_id, concept_name) -> None: ...
def parse_entities(items) -> Generator[Incomplete]: ...
def parse_rudrec(items) -> Generator[Incomplete]: ...
def load_rudrec(path): ...

View File

@@ -0,0 +1,16 @@
from _typeshed import Incomplete
from corus.record import Record
__all__ = ["load_russe_hj", "load_russe_rt", "load_russe_ae"]
class RusseSemRecord(Record):
__attributes__: Incomplete
word1: Incomplete
word2: Incomplete
sim: Incomplete
def __init__(self, word1, word2, sim) -> None: ...
def load_russe_hj(path): ...
def load_russe_rt(path): ...
def load_russe_ae(path): ...

View File

@@ -0,0 +1,14 @@
from _typeshed import Incomplete
from collections.abc import Generator
from corus.record import Record
class SimlexRecord(Record):
__attributes__: Incomplete
word1: Incomplete
word2: Incomplete
score: Incomplete
def __init__(self, word1, word2, score) -> None: ...
def parse_simlex(lines) -> Generator[Incomplete]: ...
def load_simlex(path): ...

View File

@@ -0,0 +1,10 @@
from .arzamas import *
from .fontanka import *
from .interfax import *
from .kp import *
from .lenta import *
from .magazines import *
from .nplus1 import *
from .proza import *
from .social import *
from .subtitles import *

View File

@@ -0,0 +1,6 @@
from _typeshed import Incomplete
__all__ = ["load_taiga_arzamas_metas", "load_taiga_arzamas"]
def load_taiga_arzamas_metas(path, offset: int = 0, count: int = 1): ...
def load_taiga_arzamas(path, metas: Incomplete | None = None, offset: int = 144896, count: int = 311): ...

View File

@@ -0,0 +1,77 @@
from _typeshed import Incomplete
from collections.abc import Generator
from corus.record import Record
class ArchiveRecord(Record):
__attributes__: Incomplete
name: Incomplete
offset: Incomplete
file: Incomplete
def __init__(self, name, offset, file) -> None: ...
class TaigaRecord(Record):
__attributes__: Incomplete
id: Incomplete
meta: Incomplete
text: Incomplete
def __init__(self, id, meta, text) -> None: ...
class Author(Record):
__attributes__: Incomplete
name: Incomplete
readers: Incomplete
texts: Incomplete
profession: Incomplete
about: Incomplete
url: Incomplete
def __init__(
self,
name,
readers: Incomplete | None = None,
texts: Incomplete | None = None,
profession: Incomplete | None = None,
about: Incomplete | None = None,
url: Incomplete | None = None,
) -> None: ...
class Meta(Record):
__attributes__: Incomplete
id: Incomplete
timestamp: Incomplete
tags: Incomplete
themes: Incomplete
rubric: Incomplete
genre: Incomplete
topic: Incomplete
author: Incomplete
lang: Incomplete
title: Incomplete
url: Incomplete
def __init__(
self,
id,
timestamp: Incomplete | None = None,
tags: Incomplete | None = None,
themes: Incomplete | None = None,
rubric: Incomplete | None = None,
genre: Incomplete | None = None,
topic: Incomplete | None = None,
author: Incomplete | None = None,
lang: Incomplete | None = None,
title: Incomplete | None = None,
url: Incomplete | None = None,
) -> None: ...
def load_tar(path, offset: int = 0) -> Generator[Incomplete]: ...
def load_zip(path, offset: int = 0) -> Generator[Incomplete]: ...
def parse_meta(file, encoding: str = "utf8") -> Generator[Incomplete]: ...
def load_metas(path, pattern, offset, count, load) -> Generator[Incomplete]: ...
def load_tar_metas(path, pattern, offset, count): ...
def load_zip_metas(path, pattern, offset, count): ...
def load_texts(path, pattern, offset, count, parse_id, load, encoding: str = "utf8") -> Generator[Incomplete]: ...
def parse_filename_id(path): ...
def load_tar_texts(path, pattern, offset, count, parse_id=...): ...
def load_zip_texts(path, pattern, offset, count, parse_id=...): ...
def merge_metas(records, metas: Incomplete | None = None) -> Generator[Incomplete]: ...
def patch_month(date, months): ...

View File

@@ -0,0 +1,6 @@
from _typeshed import Incomplete
__all__ = ["load_taiga_fontanka_metas", "load_taiga_fontanka"]
def load_taiga_fontanka_metas(path, offset: int = 0, count=13): ...
def load_taiga_fontanka(path, metas: Incomplete | None = None, offset: int = 306359296, count: int = 342683): ...

View File

@@ -0,0 +1,6 @@
from _typeshed import Incomplete
__all__ = ["load_taiga_interfax_metas", "load_taiga_interfax"]
def load_taiga_interfax_metas(path, offset: int = 0, count: int = 1): ...
def load_taiga_interfax(path, metas: Incomplete | None = None, offset: int = 11447296, count: int = 46429): ...

View File

@@ -0,0 +1,6 @@
from _typeshed import Incomplete
__all__ = ["load_taiga_kp_metas", "load_taiga_kp"]
def load_taiga_kp_metas(path, offset: int = 0, count: int = 1): ...
def load_taiga_kp(path, metas: Incomplete | None = None, offset: int = 13042176, count: int = 45503): ...

View File

@@ -0,0 +1,6 @@
from _typeshed import Incomplete
__all__ = ["load_taiga_lenta_metas", "load_taiga_lenta"]
def load_taiga_lenta_metas(path, offset: int = 0, count: int = 1): ...
def load_taiga_lenta(path, metas: Incomplete | None = None, offset: int = 12800000, count: int = 36446): ...

View File

@@ -0,0 +1,6 @@
from _typeshed import Incomplete
__all__ = ["load_taiga_magazines_metas", "load_taiga_magazines"]
def load_taiga_magazines_metas(path, offset: int = 0, count: int = 36): ...
def load_taiga_magazines(path, metas: Incomplete | None = None, offset: int = 7292416, count: int = 39890): ...

View File

@@ -0,0 +1,6 @@
from _typeshed import Incomplete
__all__ = ["load_taiga_nplus1_metas", "load_taiga_nplus1"]
def load_taiga_nplus1_metas(path, offset: int = 0, count: int = 1): ...
def load_taiga_nplus1(path, metas: Incomplete | None = None, offset: int = 1919488, count: int = 7696): ...

View File

@@ -0,0 +1,8 @@
from _typeshed import Incomplete
__all__ = ["load_taiga_proza_metas", "load_taiga_proza", "load_taiga_stihi_metas", "load_taiga_stihi"]
def load_taiga_proza_metas(path, offset: int = 0, count=13): ...
def load_taiga_stihi_metas(path, offset: int = 0, count=3): ...
def load_taiga_proza(path, metas: Incomplete | None = None, offset: int = ..., count: int = ...): ...
def load_taiga_stihi(path, metas: Incomplete | None = None, offset: int = ..., count: int = ...): ...

View File

@@ -0,0 +1,15 @@
from _typeshed import Incomplete
from collections.abc import Generator
from corus.record import Record
__all__ = ["load_taiga_social"]
class TaigaSocialRecord(Record):
__attributes__: Incomplete
id: Incomplete
network: Incomplete
text: Incomplete
def __init__(self, id, network, text) -> None: ...
def load_taiga_social(path, offset: int = 3985892864, count: int = 4) -> Generator[Incomplete]: ...

View File

@@ -0,0 +1,6 @@
from _typeshed import Incomplete
from collections.abc import Generator
def parse_metas(items) -> Generator[Incomplete]: ...
def load_taiga_subtitles_metas(path, offset: int = 0, count: int = 1): ...
def load_taiga_subtitles(path, metas: Incomplete | None = None, offset: int = 2113024, count: int = 19011): ...

View File

@@ -0,0 +1,28 @@
from _typeshed import Incomplete
from collections.abc import Generator
from corus.record import Record
class LRWCRecord(Record):
__attributes__: Incomplete
hyponym: Incomplete
hypernym: Incomplete
genitive: Incomplete
judgement: Incomplete
confidence: Incomplete
def __init__(self, hyponym, hypernym, genitive, judgement, confidence) -> None: ...
def parse_judgement(value): ...
def parse_confidence(value): ...
def parse_toloka_lrwc(lines) -> Generator[Incomplete]: ...
def load_toloka_lrwc(path): ...
class RuADReCTRecord(Record):
__attributes__: Incomplete
tweet_id: Incomplete
tweet: Incomplete
label: Incomplete
def __init__(self, tweet_id, tweet, label) -> None: ...
def parse_ruadrect(lines) -> Generator[Incomplete]: ...
def load_ruadrect(path): ...

View File

@@ -0,0 +1,29 @@
from _typeshed import Incomplete
from corus.record import Record
__all__ = ["load_ud_gsd", "load_ud_taiga", "load_ud_pud", "load_ud_syntag"]
class UDSent(Record):
__attributes__: Incomplete
id: Incomplete
text: Incomplete
attrs: Incomplete
tokens: Incomplete
def __init__(self, id, text, attrs, tokens) -> None: ...
class UDToken(Record):
__attributes__: Incomplete
id: Incomplete
text: Incomplete
lemma: Incomplete
pos: Incomplete
feats: Incomplete
head_id: Incomplete
rel: Incomplete
def __init__(self, id, text, lemma, pos, feats, head_id, rel) -> None: ...
def load_ud_gsd(path): ...
def load_ud_taiga(path): ...
def load_ud_pud(path): ...
def load_ud_syntag(path): ...

View File

@@ -0,0 +1,20 @@
from _typeshed import Incomplete
from collections.abc import Generator
from corus.record import Record
from corus.third.WikiExtractor import Extractor
class WikiRecord(Record):
__attributes__: Incomplete
id: Incomplete
url: Incomplete
title: Incomplete
text: Incomplete
def __init__(self, id, url, title, text) -> None: ...
@classmethod
def from_json(cls, data): ...
class Extractor_(Extractor):
def extract_(self): ...
def load_wiki(path) -> Generator[Incomplete]: ...

View File

@@ -0,0 +1,19 @@
from _typeshed import Incomplete
from collections.abc import Generator
from corus.record import Record
class WikinerToken(Record):
__attributes__: Incomplete
text: Incomplete
pos: Incomplete
tag: Incomplete
def __init__(self, text, pos, tag) -> None: ...
class WikinerMarkup(Record):
__attributes__: Incomplete
tokens: Incomplete
def __init__(self, tokens) -> None: ...
def parse_wikiner(line): ...
def load_wikiner(path) -> Generator[Incomplete]: ...

View File

@@ -0,0 +1,227 @@
import typing
from _typeshed import Incomplete
from collections.abc import Generator
from math import (
acos as acos,
asin as asin,
atan as atan,
ceil as ceil,
cos as cos,
exp as exp,
floor as floor,
pi as pi,
sin as sin,
tan as tan,
trunc as trunc,
)
PY2: Incomplete
text_type = str
version: str
options: Incomplete
templateKeys: Incomplete
filter_disambig_page_pattern: Incomplete
g_page_total: int
g_page_articl_total: int
g_page_articl_used_total: int
def keepPage(ns, catSet, page): ...
def get_url(uid): ...
selfClosingTags: Incomplete
placeholder_tags: Incomplete
def normalizeTitle(title): ...
def unescape(text): ...
comment: Incomplete
nowiki: Incomplete
def ignoreTag(tag) -> None: ...
selfClosing_tag_patterns: Incomplete
placeholder_tag_patterns: Incomplete
preformatted: Incomplete
externalLink: Incomplete
externalLinkNoAnchor: Incomplete
bold_italic: Incomplete
bold: Incomplete
italic_quote: Incomplete
italic: Incomplete
quote_quote: Incomplete
spaces: Incomplete
dots: Incomplete
_T = typing.TypeVar("_T")
class Template(list[_T]):
@classmethod
def parse(cls, body): ...
def subst(self, params, extractor, depth: int = 0): ...
class TemplateText(text_type):
def subst(self, params, extractor, depth): ...
class TemplateArg:
name: Incomplete
default: Incomplete
def __init__(self, parameter) -> None: ...
def subst(self, params, extractor, depth): ...
class Frame:
title: Incomplete
args: Incomplete
prev: Incomplete
depth: Incomplete
def __init__(self, title: str = "", args=[], prev: Incomplete | None = None) -> None: ...
def push(self, title, args): ...
def pop(self): ...
substWords: str
class Extractor:
id: Incomplete
revid: Incomplete
title: Incomplete
text: Incomplete
magicWords: Incomplete
frame: Incomplete
recursion_exceeded_1_errs: int
recursion_exceeded_2_errs: int
recursion_exceeded_3_errs: int
template_title_errs: int
def __init__(self, id, revid, title, lines) -> None: ...
def write_output(self, out, text) -> None: ...
def extract(self, out) -> None: ...
def transform(self, wikitext): ...
def transform1(self, text): ...
def wiki2text(self, text): ...
def clean(self, text): ...
maxTemplateRecursionLevels: int
maxParameterRecursionLevels: int
reOpen: Incomplete
def expand(self, wikitext): ...
def templateParams(self, parameters): ...
def expandTemplate(self, body): ...
def splitParts(paramsList): ...
def findMatchingBraces(text, ldelim: int = 0) -> Generator[Incomplete]: ...
def findBalanced(text, openDelim=["[["], closeDelim=["]]"]) -> Generator[Incomplete]: ...
def if_empty(*rest): ...
def functionParams(args, vars): ...
def string_sub(args): ...
def string_sublength(args): ...
def string_len(args): ...
def string_find(args): ...
def string_pos(args): ...
def string_replace(args): ...
def string_rep(args): ...
def roman_main(args): ...
modules: Incomplete
class MagicWords:
names: Incomplete
values: Incomplete
def __init__(self) -> None: ...
def __getitem__(self, name): ...
def __setitem__(self, name, value) -> None: ...
switches: Incomplete
magicWordsRE: Incomplete
def ucfirst(string): ...
def lcfirst(string): ...
def fullyQualifiedTemplateTitle(templateTitle): ...
def normalizeNamespace(ns): ...
class Infix:
function: Incomplete
def __init__(self, function) -> None: ...
def __ror__(self, other): ...
def __or__(self, other): ...
def __rlshift__(self, other): ...
def __rshift__(self, other): ...
def __call__(self, value1, value2): ...
ROUND: Incomplete
def sharp_expr(extr, expr): ...
def sharp_if(extr, testValue, valueIfTrue, valueIfFalse: Incomplete | None = None, *args): ...
def sharp_ifeq(extr, lvalue, rvalue, valueIfTrue, valueIfFalse: Incomplete | None = None, *args): ...
def sharp_iferror(extr, test, then: str = "", Else: Incomplete | None = None, *args): ...
def sharp_switch(extr, primary, *params): ...
def sharp_invoke(module, function, args): ...
parserFunctions: Incomplete
def callParserFunction(functionName, args, extractor): ...
reNoinclude: Incomplete
reIncludeonly: Incomplete
def define_template(title, page) -> None: ...
def dropNested(text, openDelim, closeDelim): ...
def dropSpans(spans, text): ...
def replaceInternalLinks(text): ...
def makeInternalLink(title, label): ...
wgUrlProtocols: Incomplete
EXT_LINK_URL_CLASS: str
ANCHOR_CLASS: str
ExtLinkBracketedRegex: Incomplete
EXT_IMAGE_REGEX: Incomplete
def replaceExternalLinks(text): ...
def makeExternalLink(url, anchor): ...
def makeExternalImage(url, alt: str = ""): ...
tailRE: Incomplete
syntaxhighlight: Incomplete
section: Incomplete
listOpen: Incomplete
listClose: Incomplete
listItem: Incomplete
def compact(text): ...
def handle_unicode(entity): ...
class NextFile:
filesPerDir: int
path_name: Incomplete
dir_index: int
file_index: int
def __init__(self, path_name) -> None: ...
def __next__(self): ...
next = __next__
class OutputSplitter:
nextFile: Incomplete
compress: Incomplete
max_file_size: Incomplete
file: Incomplete
def __init__(self, nextFile, max_file_size: int = 0, compress: bool = True) -> None: ...
def reserve(self, size) -> None: ...
def write(self, data) -> None: ...
def close(self) -> None: ...
def open(self, filename): ...
tagRE: Incomplete
keyRE: Incomplete
catRE: Incomplete
def load_templates(file, output_file: Incomplete | None = None) -> None: ...
def pages_from(input) -> Generator[Incomplete]: ...
def process_dump(input_file, template_file, out_file, file_size, file_compress, process_count) -> None: ...
def extract_process(opts, i, jobs_queue, output_queue) -> None: ...
report_period: int
def reduce_process(
opts, output_queue, spool_length, out_file: Incomplete | None = None, file_size: int = 0, file_compress: bool = True
) -> None: ...
minFileSize: Incomplete
def main() -> None: ...
def createLogger(quiet, debug, log_file) -> None: ...

View File

28
stubs/corus/corus/zip.pyi Normal file
View File

@@ -0,0 +1,28 @@
from _typeshed import Incomplete
from typing import NamedTuple
def open_zip(path): ...
HEADER_FORMAT: str
HEADER_SIGNATURE: bytes
NO_COMPRESSION: int
DEFLATED: int
class ZipHeader(NamedTuple):
signature: Incomplete
extract_by: Incomplete
flags: Incomplete
compression: Incomplete
time: Incomplete
date: Incomplete
crc: Incomplete
compressed: Incomplete
uncompressed: Incomplete
name: Incomplete
extra: Incomplete
def decode_name(name): ...
def read_zip_header(file): ...
def is_zip_header(record): ...
def assert_zip_header(record) -> None: ...
def read_zip_data(file, header): ...