mirror of
https://github.com/davidhalter/typeshed.git
synced 2025-12-31 00:24:24 +08:00
Add more checks in parse_metadata.py for the upstream_repository field (#10513)
This commit is contained in:
@@ -255,16 +255,18 @@ async def get_github_repo_info(session: aiohttp.ClientSession, stub_info: StubIn
|
||||
Else, return None.
|
||||
"""
|
||||
if stub_info.upstream_repository:
|
||||
# We have various sanity checks for the upstream_repository field in tests/parse_metadata.py,
|
||||
# so no need to repeat all of them here
|
||||
split_url = urllib.parse.urlsplit(stub_info.upstream_repository)
|
||||
if split_url.netloc == "github.com" and not split_url.query and not split_url.fragment:
|
||||
if split_url.netloc == "github.com":
|
||||
url_path = split_url.path.strip("/")
|
||||
if len(Path(url_path).parts) == 2:
|
||||
github_tags_info_url = f"https://api.github.com/repos/{url_path}/tags"
|
||||
async with session.get(github_tags_info_url, headers=get_github_api_headers()) as response:
|
||||
if response.status == 200:
|
||||
tags: list[dict[str, Any]] = await response.json()
|
||||
assert isinstance(tags, list)
|
||||
return GithubInfo(repo_path=url_path, tags=tags)
|
||||
assert len(Path(url_path).parts) == 2
|
||||
github_tags_info_url = f"https://api.github.com/repos/{url_path}/tags"
|
||||
async with session.get(github_tags_info_url, headers=get_github_api_headers()) as response:
|
||||
if response.status == 200:
|
||||
tags: list[dict[str, Any]] = await response.json()
|
||||
assert isinstance(tags, list)
|
||||
return GithubInfo(repo_path=url_path, tags=tags)
|
||||
return None
|
||||
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
import urllib.parse
|
||||
from collections.abc import Mapping
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
@@ -199,6 +200,21 @@ def read_metadata(distribution: str) -> StubMetadata:
|
||||
|
||||
upstream_repository: object = data.get("upstream_repository")
|
||||
assert isinstance(upstream_repository, (str, type(None)))
|
||||
if isinstance(upstream_repository, str):
|
||||
parsed_url = urllib.parse.urlsplit(upstream_repository)
|
||||
assert parsed_url.scheme == "https", "URLs in the upstream_repository field should use https"
|
||||
assert not parsed_url.netloc.startswith("www."), "`www.` should be removed from URLs in the upstream_repository field"
|
||||
assert not parsed_url.query
|
||||
assert not parsed_url.fragment
|
||||
if parsed_url.netloc == "github.com":
|
||||
cleaned_url_path = parsed_url.path.strip("/")
|
||||
num_url_path_parts = len(Path(cleaned_url_path).parts)
|
||||
bad_github_url_msg = (
|
||||
f"Invalid upstream_repository for {distribution!r}: "
|
||||
"URLs for GitHub repositories always have two parts in their paths"
|
||||
)
|
||||
assert num_url_path_parts == 2, bad_github_url_msg
|
||||
|
||||
obsolete_since: object = data.get("obsolete_since")
|
||||
assert isinstance(obsolete_since, (str, type(None)))
|
||||
no_longer_updated: object = data.get("no_longer_updated", False)
|
||||
|
||||
Reference in New Issue
Block a user