Centralize METADATA.toml parsing in the test suite (#9534)

This commit is contained in:
Alex Waygood
2023-01-28 15:13:46 +00:00
committed by GitHub
parent 7f986bdf85
commit c216b74e39
11 changed files with 294 additions and 191 deletions

View File

@@ -33,7 +33,7 @@ jobs:
cache: pip
cache-dependency-path: requirements-tests.txt
- run: pip install -r requirements-tests.txt
- run: ./tests/check_consistent.py
- run: python ./tests/check_consistent.py
new-syntax:
name: Ensure new syntax usage

View File

@@ -55,7 +55,7 @@ extra_standard_library = [
"opcode",
"pyexpat",
]
known_first_party = ["utils"]
known_first_party = ["utils", "parse_metadata"]
[tool.pycln]
all = true

78
tests/check_consistent.py Executable file → Normal file
View File

@@ -11,46 +11,14 @@ import sys
import urllib.parse
from pathlib import Path
import tomli
import yaml
from packaging.requirements import Requirement
from packaging.specifiers import SpecifierSet
from packaging.version import Version
from utils import (
METADATA_MAPPING,
VERSIONS_RE,
get_all_testcase_directories,
get_gitignore_spec,
spec_matches_path,
strip_comments,
)
from parse_metadata import read_metadata
from utils import VERSIONS_RE, get_all_testcase_directories, get_gitignore_spec, spec_matches_path, strip_comments
metadata_keys = {
"version",
"requires",
"extra_description",
"stub_distribution",
"obsolete_since",
"no_longer_updated",
"upload",
"tool",
}
tool_keys = {
"stubtest": {
"skip",
"apt_dependencies",
"brew_dependencies",
"choco_dependencies",
"extras",
"ignore_missing_stub",
"platforms",
}
}
extension_descriptions = {".pyi": "stub", ".py": ".py"}
supported_stubtest_platforms = {"win32", "darwin", "linux"}
dist_name_re = re.compile(r"^[a-z0-9]([a-z0-9._-]*[a-z0-9])?$", re.IGNORECASE)
def assert_consistent_filetypes(
@@ -163,46 +131,8 @@ def _find_stdlib_modules() -> set[str]:
def check_metadata() -> None:
for distribution in os.listdir("stubs"):
with open(os.path.join("stubs", distribution, "METADATA.toml"), encoding="UTF-8") as f:
data = tomli.loads(f.read())
assert "version" in data, f"Missing version for {distribution}"
version = data["version"]
msg = f"Unsupported version {repr(version)}"
assert isinstance(version, str), msg
# Check that the version parses
Version(version.removesuffix(".*"))
for key in data:
assert key in metadata_keys, f"Unexpected key {key} for {distribution}"
assert isinstance(data.get("requires", []), list), f"Invalid requires value for {distribution}"
for dep in data.get("requires", []):
assert isinstance(dep, str), f"Invalid requirement {repr(dep)} for {distribution}"
for space in " \t\n":
assert space not in dep, f"For consistency, requirement should not have whitespace: {dep}"
# Check that the requirement parses
Requirement(dep)
if "stub_distribution" in data:
assert dist_name_re.fullmatch(data["stub_distribution"]), f"Invalid 'stub_distribution' value for {distribution!r}"
assert isinstance(data.get("upload", True), bool), f"Invalid 'upload' value for {distribution!r}"
assert set(data.get("tool", [])).issubset(tool_keys.keys()), f"Unrecognised tool for {distribution}"
for tool, tk in tool_keys.items():
for key in data.get("tool", {}).get(tool, {}):
assert key in tk, f"Unrecognised {tool} key {key} for {distribution}"
tool_stubtest = data.get("tool", {}).get("stubtest", {})
specified_stubtest_platforms = set(tool_stubtest.get("platforms", ["linux"]))
assert (
specified_stubtest_platforms <= supported_stubtest_platforms
), f"Unrecognised platforms specified: {supported_stubtest_platforms - specified_stubtest_platforms} for {distribution}"
# Check that only specified platforms install packages:
for supported_plat in supported_stubtest_platforms:
if supported_plat not in specified_stubtest_platforms:
assert (
METADATA_MAPPING[supported_plat] not in tool_stubtest
), f"Installing system deps for unspecified platform {supported_plat} for {distribution}"
# This function does various sanity checks for METADATA.toml files
read_metadata(distribution)
def get_txt_requirements() -> dict[str, SpecifierSet]:

View File

@@ -4,7 +4,7 @@ from __future__ import annotations
import os
import sys
from utils import read_dependencies
from parse_metadata import read_dependencies
distributions = sys.argv[1:]
if not distributions:

View File

@@ -2,17 +2,14 @@
import os
import sys
import tomli
from utils import METADATA_MAPPING
from parse_metadata import read_stubtest_settings
platform = sys.platform
distributions = sys.argv[1:]
if not distributions:
distributions = os.listdir("stubs")
if platform in METADATA_MAPPING:
for distribution in distributions:
with open(f"stubs/{distribution}/METADATA.toml", "rb") as file:
for package in tomli.load(file).get("tool", {}).get("stubtest", {}).get(METADATA_MAPPING[platform], []):
print(package)
for distribution in distributions:
stubtest_settings = read_stubtest_settings(distribution)
for package in stubtest_settings.system_requirements_for_platform(platform):
print(package)

View File

@@ -26,14 +26,13 @@ from typing_extensions import Annotated, TypeAlias
import tomli
from parse_metadata import PackageDependencies, get_recursive_requirements
from utils import (
VERSIONS_RE as VERSION_LINE_RE,
PackageDependencies,
VenvInfo,
colored,
get_gitignore_spec,
get_mypy_req,
get_recursive_requirements,
make_venv,
print_error,
print_success_msg,

264
tests/parse_metadata.py Normal file
View File

@@ -0,0 +1,264 @@
"""Tools to help parse and validate information stored in METADATA.toml files."""
from __future__ import annotations
import os
import re
from collections.abc import Mapping
from dataclasses import dataclass
from pathlib import Path
from typing import NamedTuple
from typing_extensions import Annotated, Final, TypeGuard, final
import tomli
from packaging.requirements import Requirement
from packaging.version import Version
from utils import cache
__all__ = [
"StubMetadata",
"PackageDependencies",
"StubtestSettings",
"get_recursive_requirements",
"read_dependencies",
"read_metadata",
"read_stubtest_settings",
]
_STUBTEST_PLATFORM_MAPPING: Final = {"linux": "apt_dependencies", "darwin": "brew_dependencies", "win32": "choco_dependencies"}
def _is_list_of_strings(obj: object) -> TypeGuard[list[str]]:
return isinstance(obj, list) and all(isinstance(item, str) for item in obj)
@final
@dataclass(frozen=True)
class StubtestSettings:
"""The stubtest settings for a single stubs distribution.
Don't construct instances directly; use the `read_stubtest_settings` function.
"""
skipped: bool
apt_dependencies: list[str]
brew_dependencies: list[str]
choco_dependencies: list[str]
extras: list[str]
ignore_missing_stub: bool
platforms: list[str]
def system_requirements_for_platform(self, platform: str) -> list[str]:
assert platform in _STUBTEST_PLATFORM_MAPPING, f"Unrecognised platform {platform!r}"
ret = getattr(self, _STUBTEST_PLATFORM_MAPPING[platform])
assert _is_list_of_strings(ret)
return ret
@cache
def read_stubtest_settings(distribution: str) -> StubtestSettings:
"""Return an object describing the stubtest settings for a single stubs distribution."""
with Path("stubs", distribution, "METADATA.toml").open("rb") as f:
data: dict[str, object] = tomli.load(f).get("tool", {}).get("stubtest", {})
skipped = data.get("skipped", False)
apt_dependencies = data.get("apt_dependencies", [])
brew_dependencies = data.get("brew_dependencies", [])
choco_dependencies = data.get("choco_dependencies", [])
extras = data.get("extras", [])
ignore_missing_stub = data.get("ignore_missing_stub", True)
specified_platforms = data.get("platforms", ["linux"])
assert type(skipped) is bool
assert type(ignore_missing_stub) is bool
# It doesn't work for type-narrowing if we use a for loop here...
assert _is_list_of_strings(specified_platforms)
assert _is_list_of_strings(apt_dependencies)
assert _is_list_of_strings(brew_dependencies)
assert _is_list_of_strings(choco_dependencies)
assert _is_list_of_strings(extras)
unrecognised_platforms = set(specified_platforms) - _STUBTEST_PLATFORM_MAPPING.keys()
assert not unrecognised_platforms, f"Unrecognised platforms specified for {distribution!r}: {unrecognised_platforms}"
for platform, dep_key in _STUBTEST_PLATFORM_MAPPING.items():
if platform not in specified_platforms:
assert dep_key not in data, (
f"Stubtest is not run on {platform} in CI for {distribution!r}, "
f"but {dep_key!r} are specified in METADATA.toml"
)
return StubtestSettings(
skipped=skipped,
apt_dependencies=apt_dependencies,
brew_dependencies=brew_dependencies,
choco_dependencies=choco_dependencies,
extras=extras,
ignore_missing_stub=ignore_missing_stub,
platforms=specified_platforms,
)
@final
@dataclass(frozen=True)
class StubMetadata:
"""The metadata for a single stubs distribution.
Don't construct instances directly; use the `read_metadata` function.
"""
version: str
requires: Annotated[list[str], "The raw requirements as listed in METADATA.toml"]
extra_description: str | None
stub_distribution: Annotated[str, "The name under which the distribution is uploaded to PyPI"]
obsolete_since: Annotated[str, "A string representing a specific version"] | None
no_longer_updated: bool
uploaded_to_pypi: Annotated[bool, "Whether or not a distribution is uploaded to PyPI"]
stubtest_settings: StubtestSettings
_KNOWN_METADATA_FIELDS: Final = frozenset(
{"version", "requires", "extra_description", "stub_distribution", "obsolete_since", "no_longer_updated", "upload", "tool"}
)
_KNOWN_METADATA_TOOL_FIELDS: Final = {
"stubtest": {
"skip",
"apt_dependencies",
"brew_dependencies",
"choco_dependencies",
"extras",
"ignore_missing_stub",
"platforms",
}
}
_DIST_NAME_RE: Final = re.compile(r"^[a-z0-9]([a-z0-9._-]*[a-z0-9])?$", re.IGNORECASE)
@cache
def read_metadata(distribution: str) -> StubMetadata:
"""Return an object describing the metadata of a stub as given in the METADATA.toml file.
This function does some basic validation,
but does no parsing, transforming or normalization of the metadata.
Use `read_dependencies` if you need to parse the dependencies
given in the `requires` field, for example.
"""
with Path("stubs", distribution, "METADATA.toml").open("rb") as f:
data: dict[str, object] = tomli.load(f)
unknown_metadata_fields = data.keys() - _KNOWN_METADATA_FIELDS
assert not unknown_metadata_fields, f"Unexpected keys in METADATA.toml for {distribution!r}: {unknown_metadata_fields}"
assert "version" in data, f"Missing 'version' field in METADATA.toml for {distribution!r}"
version = data["version"]
assert isinstance(version, str)
# Check that the version parses
Version(version[:-2] if version.endswith(".*") else version)
requires = data.get("requires", [])
assert isinstance(requires, list)
for req in requires:
assert isinstance(req, str), f"Invalid requirement {req!r} for {distribution!r}"
for space in " \t\n":
assert space not in req, f"For consistency, requirement should not have whitespace: {req!r}"
# Check that the requirement parses
Requirement(req)
extra_description = data.get("extra_description")
assert isinstance(extra_description, (str, type(None)))
if "stub_distribution" in data:
stub_distribution = data["stub_distribution"]
assert isinstance(stub_distribution, str)
assert _DIST_NAME_RE.fullmatch(stub_distribution), f"Invalid 'stub_distribution' value for {distribution!r}"
else:
stub_distribution = f"types-{distribution}"
obsolete_since = data.get("obsolete_since")
assert isinstance(obsolete_since, (str, type(None)))
no_longer_updated = data.get("no_longer_updated", False)
assert type(no_longer_updated) is bool
uploaded_to_pypi = data.get("upload", True)
assert type(uploaded_to_pypi) is bool
tools_settings = data.get("tool", {})
assert isinstance(tools_settings, dict)
assert tools_settings.keys() <= _KNOWN_METADATA_TOOL_FIELDS.keys(), f"Unrecognised tool for {distribution!r}"
for tool, tk in _KNOWN_METADATA_TOOL_FIELDS.items():
settings_for_tool = tools_settings.get(tool, {})
assert isinstance(settings_for_tool, dict)
for key in settings_for_tool:
assert key in tk, f"Unrecognised {tool} key {key!r} for {distribution!r}"
return StubMetadata(
version=version,
requires=requires,
extra_description=extra_description,
stub_distribution=stub_distribution,
obsolete_since=obsolete_since,
no_longer_updated=no_longer_updated,
uploaded_to_pypi=uploaded_to_pypi,
stubtest_settings=read_stubtest_settings(distribution),
)
class PackageDependencies(NamedTuple):
typeshed_pkgs: tuple[str, ...]
external_pkgs: tuple[str, ...]
@cache
def get_pypi_name_to_typeshed_name_mapping() -> Mapping[str, str]:
return {read_metadata(typeshed_name).stub_distribution: typeshed_name for typeshed_name in os.listdir("stubs")}
@cache
def read_dependencies(distribution: str) -> PackageDependencies:
"""Read the dependencies listed in a METADATA.toml file for a stubs package.
Once the dependencies have been read,
determine which dependencies are typeshed-internal dependencies,
and which dependencies are external (non-types) dependencies.
For typeshed dependencies, translate the "dependency name" into the "package name";
for external dependencies, leave them as they are in the METADATA.toml file.
Note that this function may consider things to be typeshed stubs
even if they haven't yet been uploaded to PyPI.
If a typeshed stub is removed, this function will consider it to be an external dependency.
"""
pypi_name_to_typeshed_name_mapping = get_pypi_name_to_typeshed_name_mapping()
typeshed, external = [], []
for dependency in read_metadata(distribution).requires:
maybe_typeshed_dependency = Requirement(dependency).name
if maybe_typeshed_dependency in pypi_name_to_typeshed_name_mapping:
typeshed.append(pypi_name_to_typeshed_name_mapping[maybe_typeshed_dependency])
else:
# convert to Requirement and then back to str
# to make sure that the requirements all have a normalised string representation
# (This will also catch any malformed requirements early)
external.append(str(Requirement(dependency)))
return PackageDependencies(tuple(typeshed), tuple(external))
@cache
def get_recursive_requirements(package_name: str) -> PackageDependencies:
"""Recursively gather dependencies for a single stubs package.
For example, if the stubs for `caldav`
declare a dependency on typeshed's stubs for `requests`,
and the stubs for requests declare a dependency on typeshed's stubs for `urllib3`,
`get_recursive_requirements("caldav")` will determine that the stubs for `caldav`
have both `requests` and `urllib3` as typeshed-internal dependencies.
"""
typeshed: set[str] = set()
external: set[str] = set()
non_recursive_requirements = read_dependencies(package_name)
typeshed.update(non_recursive_requirements.typeshed_pkgs)
external.update(non_recursive_requirements.external_pkgs)
for pkg in non_recursive_requirements.typeshed_pkgs:
reqs = get_recursive_requirements(pkg)
typeshed.update(reqs.typeshed_pkgs)
external.update(reqs.external_pkgs)
return PackageDependencies(tuple(sorted(typeshed)), tuple(sorted(external)))

View File

@@ -22,7 +22,7 @@ import pkg_resources
from pytype import config as pytype_config, load_pytd # type: ignore[import]
from pytype.imports import typeshed # type: ignore[import]
import utils
from parse_metadata import read_dependencies
TYPESHED_SUBDIRS = ["stdlib", "stubs"]
TYPESHED_HOME = "TYPESHED_HOME"
@@ -153,7 +153,7 @@ def get_missing_modules(files_to_test: Sequence[str]) -> Iterable[str]:
stub_distributions.add(parts[idx + 1])
missing_modules = set()
for distribution in stub_distributions:
for pkg in utils.read_dependencies(distribution).external_pkgs:
for pkg in read_dependencies(distribution).external_pkgs:
# See https://stackoverflow.com/a/54853084
top_level_file = os.path.join(pkg_resources.get_distribution(pkg).egg_info, "top_level.txt") # type: ignore[attr-defined]
with open(top_level_file) as f:

View File

@@ -15,13 +15,13 @@ from itertools import product
from pathlib import Path
from typing_extensions import TypeAlias
from parse_metadata import get_recursive_requirements
from utils import (
PackageInfo,
VenvInfo,
colored,
get_all_testcase_directories,
get_mypy_req,
get_recursive_requirements,
make_venv,
print_error,
print_success_msg,

View File

@@ -11,28 +11,25 @@ import tempfile
from pathlib import Path
from typing import NoReturn
import tomli
from utils import colored, get_mypy_req, get_recursive_requirements, make_venv, print_error, print_success_msg
from parse_metadata import get_recursive_requirements, read_metadata
from utils import colored, get_mypy_req, make_venv, print_error, print_success_msg
def run_stubtest(dist: Path, *, verbose: bool = False, specified_stubs_only: bool = False) -> bool:
with open(dist / "METADATA.toml", encoding="UTF-8") as f:
metadata = dict(tomli.loads(f.read()))
dist_name = dist.name
metadata = read_metadata(dist_name)
print(f"{dist_name}... ", end="")
print(f"{dist.name}... ", end="")
stubtest_meta = metadata.get("tool", {}).get("stubtest", {})
if stubtest_meta.get("skip", False):
stubtest_settings = metadata.stubtest_settings
if stubtest_settings.skipped:
print(colored("skipping", "yellow"))
return True
platforms_to_test = stubtest_meta.get("platforms", ["linux"])
if sys.platform not in platforms_to_test:
if sys.platform not in stubtest_settings.platforms:
if specified_stubs_only:
print(colored("skipping (platform not specified in METADATA.toml)", "yellow"))
return True
print(colored(f"Note: {dist.name} is not currently tested on {sys.platform} in typeshed's CI.", "yellow"))
print(colored(f"Note: {dist_name} is not currently tested on {sys.platform} in typeshed's CI.", "yellow"))
with tempfile.TemporaryDirectory() as tmp:
venv_dir = Path(tmp)
@@ -41,12 +38,8 @@ def run_stubtest(dist: Path, *, verbose: bool = False, specified_stubs_only: boo
except Exception:
print_error("fail")
raise
dist_version = metadata["version"]
extras = stubtest_meta.get("extras", [])
assert isinstance(dist_version, str)
assert isinstance(extras, list)
dist_extras = ", ".join(extras)
dist_req = f"{dist.name}[{dist_extras}]=={dist_version}"
dist_extras = ", ".join(stubtest_settings.extras)
dist_req = f"{dist_name}[{dist_extras}]=={metadata.version}"
# If @tests/requirements-stubtest.txt exists, run "pip install" on it.
req_path = dist / "@tests" / "requirements-stubtest.txt"
@@ -58,7 +51,7 @@ def run_stubtest(dist: Path, *, verbose: bool = False, specified_stubs_only: boo
print_command_failure("Failed to install requirements", e)
return False
requirements = get_recursive_requirements(dist.name)
requirements = get_recursive_requirements(dist_name)
# We need stubtest to be able to import the package, so install mypy into the venv
# Hopefully mypy continues to not need too many dependencies
@@ -72,7 +65,7 @@ def run_stubtest(dist: Path, *, verbose: bool = False, specified_stubs_only: boo
print_command_failure("Failed to install", e)
return False
ignore_missing_stub = ["--ignore-missing-stub"] if stubtest_meta.get("ignore_missing_stub", True) else []
ignore_missing_stub = ["--ignore-missing-stub"] if stubtest_settings.ignore_missing_stub else []
packages_to_check = [d.name for d in dist.iterdir() if d.is_dir() and d.name.isidentifier()]
modules_to_check = [d.stem for d in dist.iterdir() if d.is_file() and d.suffix == ".pyi"]
stubtest_cmd = [

View File

@@ -7,15 +7,13 @@ import re
import subprocess
import sys
import venv
from collections.abc import Iterable, Mapping
from collections.abc import Iterable
from functools import lru_cache
from pathlib import Path
from typing import NamedTuple
from typing_extensions import Annotated
import pathspec
import tomli
from packaging.requirements import Requirement
try:
from termcolor import colored as colored
@@ -29,9 +27,6 @@ except ImportError:
# This module is imported by mypy_test.py, which needs to run on 3.7 in CI
cache = lru_cache(None)
# Used to install system-wide packages for different OS types:
METADATA_MAPPING = {"linux": "apt_dependencies", "darwin": "brew_dependencies", "win32": "choco_dependencies"}
def strip_comments(text: str) -> str:
return text.split("#")[0].strip()
@@ -49,81 +44,6 @@ def print_success_msg() -> None:
print(colored("success", "green"))
# ====================================================================
# Reading dependencies from METADATA.toml files
# ====================================================================
class PackageDependencies(NamedTuple):
typeshed_pkgs: tuple[str, ...]
external_pkgs: tuple[str, ...]
@cache
def get_pypi_name_to_typeshed_name_mapping() -> Mapping[str, str]:
stub_name_map = {}
for typeshed_name in os.listdir("stubs"):
with Path("stubs", typeshed_name, "METADATA.toml").open("rb") as f:
pypi_name = tomli.load(f).get("stub_distribution", f"types-{typeshed_name}")
assert isinstance(pypi_name, str)
stub_name_map[pypi_name] = typeshed_name
return stub_name_map
@cache
def read_dependencies(distribution: str) -> PackageDependencies:
"""Read the dependencies listed in a METADATA.toml file for a stubs package.
Once the dependencies have been read,
determine which dependencies are typeshed-internal dependencies,
and which dependencies are external (non-types) dependencies.
For typeshed dependencies, translate the "dependency name" into the "package name";
for external dependencies, leave them as they are in the METADATA.toml file.
Note that this function may consider things to be typeshed stubs
even if they haven't yet been uploaded to PyPI.
If a typeshed stub is removed, this function will consider it to be an external dependency.
"""
pypi_name_to_typeshed_name_mapping = get_pypi_name_to_typeshed_name_mapping()
with Path("stubs", distribution, "METADATA.toml").open("rb") as f:
dependencies = tomli.load(f).get("requires", [])
assert isinstance(dependencies, list)
typeshed, external = [], []
for dependency in dependencies:
assert isinstance(dependency, str)
maybe_typeshed_dependency = Requirement(dependency).name
if maybe_typeshed_dependency in pypi_name_to_typeshed_name_mapping:
typeshed.append(pypi_name_to_typeshed_name_mapping[maybe_typeshed_dependency])
else:
# convert to Requirement and then back to str
# to make sure that the requirements all have a normalised string representation
# (This will also catch any malformed requirements early)
external.append(str(Requirement(dependency)))
return PackageDependencies(tuple(typeshed), tuple(external))
@cache
def get_recursive_requirements(package_name: str) -> PackageDependencies:
"""Recursively gather dependencies for a single stubs package.
For example, if the stubs for `caldav`
declare a dependency on typeshed's stubs for `requests`,
and the stubs for requests declare a dependency on typeshed's stubs for `urllib3`,
`get_recursive_requirements("caldav")` will determine that the stubs for `caldav`
have both `requests` and `urllib3` as typeshed-internal dependencies.
"""
typeshed: set[str] = set()
external: set[str] = set()
non_recursive_requirements = read_dependencies(package_name)
typeshed.update(non_recursive_requirements.typeshed_pkgs)
external.update(non_recursive_requirements.external_pkgs)
for pkg in non_recursive_requirements.typeshed_pkgs:
reqs = get_recursive_requirements(pkg)
typeshed.update(reqs.typeshed_pkgs)
external.update(reqs.external_pkgs)
return PackageDependencies(tuple(sorted(typeshed)), tuple(sorted(external)))
# ====================================================================
# Dynamic venv creation
# ====================================================================