Auto-add upstream URLs in create_baseline_stubs.py (#10491)

Co-authored-by: Akuli <akuviljanen17@gmail.com>
This commit is contained in:
Alex Waygood
2023-07-21 15:56:53 +01:00
committed by GitHub
parent 5498cabfe2
commit 49a2371d7a

View File

@@ -11,10 +11,15 @@ Run with -h for more help.
from __future__ import annotations
import argparse
import asyncio
import os
import re
import subprocess
import sys
import urllib.parse
import aiohttp
import termcolor
if sys.version_info >= (3, 8):
from importlib.metadata import distribution
@@ -70,7 +75,50 @@ def run_ruff(stub_dir: str) -> None:
subprocess.run([sys.executable, "-m", "ruff", stub_dir])
def create_metadata(stub_dir: str, version: str) -> None:
async def get_project_urls_from_pypi(project: str, session: aiohttp.ClientSession) -> dict[str, str]:
pypi_root = f"https://pypi.org/pypi/{urllib.parse.quote(project)}"
async with session.get(f"{pypi_root}/json") as response:
if response.status != 200:
return {}
j: dict[str, dict[str, dict[str, str]]]
j = await response.json()
return j["info"].get("project_urls") or {}
async def get_upstream_repo_url(project: str) -> str | None:
# aiohttp is overkill here, but it would also just be silly
# to have both requests and aiohttp in our requirements-tests.txt file.
async with aiohttp.ClientSession() as session:
project_urls = await get_project_urls_from_pypi(project, session)
if not project_urls:
return None
# Order the project URLs so that we put the ones
# that are most likely to point to the source code first
urls_to_check: list[str] = []
url_names_probably_pointing_to_source = ("Source", "Repository", "Homepage")
for url_name in url_names_probably_pointing_to_source:
if url := project_urls.get(url_name):
urls_to_check.append(url)
urls_to_check.extend(
url for url_name, url in project_urls.items() if url_name not in url_names_probably_pointing_to_source
)
for url in urls_to_check:
# Remove `www.`; replace `http://` with `https://`
url = re.sub(r"^(https?://)?(www\.)?", "https://", url)
netloc = urllib.parse.urlparse(url).netloc
if netloc in {"gitlab.com", "github.com", "bitbucket.org", "foss.heptapod.net"}:
# truncate to https://site.com/user/repo
upstream_repo_url = "/".join(url.split("/")[:5])
async with session.get(upstream_repo_url) as response:
if response.status == 200:
return upstream_repo_url
return None
def create_metadata(project: str, stub_dir: str, version: str) -> None:
"""Create a METADATA.toml file."""
match = re.match(r"[0-9]+.[0-9]+", version)
if match is None:
@@ -79,9 +127,19 @@ def create_metadata(stub_dir: str, version: str) -> None:
version = match.group(0)
if os.path.exists(filename):
return
metadata = f'version = "{version}.*"'
upstream_repo_url = asyncio.run(get_upstream_repo_url(project))
if upstream_repo_url is None:
warning = (
f"\nCould not find a URL pointing to the source code for {project!r}.\n"
f"Please add it as `upstream_repository` to `stubs/{project}/METADATA.toml`, if possible!\n"
)
print(termcolor.colored(warning, "red"))
else:
metadata += f'\nupstream_repository = "{upstream_repo_url}"'
print(f"Writing {filename}")
with open(filename, "w", encoding="UTF-8") as file:
file.write(f'version = "{version}.*"')
file.write(metadata)
def add_pyright_exclusion(stub_dir: str) -> None:
@@ -168,7 +226,7 @@ def main() -> None:
run_isort(stub_dir)
run_black(stub_dir)
create_metadata(stub_dir, version)
create_metadata(project, stub_dir, version)
# Since the generated stubs won't have many type annotations, we
# have to exclude them from strict pyright checks.