[stubsabot] Support diff analysis for GitLab hosted projects (#14542)

This commit is contained in:
Brian Schubert
2025-08-08 10:50:58 -04:00
committed by GitHub
parent c3a880b428
commit 10dba69727
+99 -33
View File
@@ -22,7 +22,7 @@ from collections.abc import Callable, Iterator, Mapping, Sequence
from dataclasses import dataclass, field
from http import HTTPStatus
from pathlib import Path
from typing import Annotated, Any, ClassVar, NamedTuple, TypeVar
from typing import Annotated, Any, ClassVar, Literal, NamedTuple, TypedDict, TypeVar
from typing_extensions import Self, TypeAlias
if sys.version_info >= (3, 11):
@@ -326,57 +326,80 @@ def get_github_api_headers() -> Mapping[str, str]:
return headers
GitHost: TypeAlias = Literal["github", "gitlab"]
@dataclass
class GitHubInfo:
class GitHostInfo:
host: GitHost
repo_path: str
tags: list[dict[str, Any]] = field(repr=False)
tags: list[str] = field(repr=False)
async def get_github_repo_info(session: aiohttp.ClientSession, stub_info: StubMetadata) -> GitHubInfo | None:
async def get_host_repo_info(session: aiohttp.ClientSession, stub_info: StubMetadata) -> GitHostInfo | None:
"""
If the project represented by `stub_info` is hosted on GitHub,
return information regarding the project as it exists on GitHub.
If the project represented by `stub_info` is publicly hosted (e.g. on GitHub)
return information regarding the project as it exists on the public host.
Else, return None.
"""
if stub_info.upstream_repository:
# We have various sanity checks for the upstream_repository field in ts_utils.metadata,
# so no need to repeat all of them here
split_url = urllib.parse.urlsplit(stub_info.upstream_repository)
if split_url.netloc == "github.com":
url_path = split_url.path.strip("/")
assert len(Path(url_path).parts) == 2
github_tags_info_url = f"https://api.github.com/repos/{url_path}/tags"
async with session.get(github_tags_info_url, headers=get_github_api_headers()) as response:
if response.status == HTTPStatus.OK:
tags: list[dict[str, Any]] = await response.json()
assert isinstance(tags, list)
return GitHubInfo(repo_path=url_path, tags=tags)
if not stub_info.upstream_repository:
return None
# We have various sanity checks for the upstream_repository field in ts_utils.metadata,
# so no need to repeat all of them here
split_url = urllib.parse.urlsplit(stub_info.upstream_repository)
host = split_url.netloc.removesuffix(".com")
if host not in ("github", "gitlab"):
return None
url_path = split_url.path.strip("/")
assert len(Path(url_path).parts) == 2
if host == "github":
# https://docs.github.com/en/rest/git/tags
info_url = f"https://api.github.com/repos/{url_path}/tags"
headers = get_github_api_headers()
else:
assert host == "gitlab"
# https://docs.gitlab.com/api/tags/
project_id = urllib.parse.quote(url_path, safe="")
info_url = f"https://gitlab.com/api/v4/projects/{project_id}/repository/tags"
headers = None
async with session.get(info_url, headers=headers) as response:
if response.status == HTTPStatus.OK:
# Conveniently both GitHub and GitLab use the same key name.
tags = [tag["name"] for tag in await response.json()]
return GitHostInfo(host=host, repo_path=url_path, tags=tags) # type: ignore[arg-type]
return None
class GitHubDiffInfo(NamedTuple):
class GitHostDiffInfo(NamedTuple):
host: GitHost
repo_path: str
old_tag: str
new_tag: str
diff_url: str
@property
def diff_url(self) -> str:
if self.host == "github":
return f"https://github.com/{self.repo_path}/compare/{self.old_tag}...{self.new_tag}"
else:
assert self.host == "gitlab"
return f"https://gitlab.com/{self.repo_path}/-/compare/{self.old_tag}...{self.new_tag}"
async def get_diff_info(
session: aiohttp.ClientSession, stub_info: StubMetadata, pypi_version: packaging.version.Version
) -> GitHubDiffInfo | None:
) -> GitHostDiffInfo | None:
"""Return a tuple giving info about the diff between two releases, if possible.
Return `None` if the project isn't hosted on GitHub,
or if a link pointing to the diff couldn't be found for any other reason.
"""
github_info = await get_github_repo_info(session, stub_info)
if github_info is None:
host_info = await get_host_repo_info(session, stub_info)
if host_info is None:
return None
versions_to_tags: dict[packaging.version.Version, str] = {}
for tag in github_info.tags:
tag_name = tag["name"]
for tag_name in host_info.tags:
# Some packages in typeshed have tag names
# that are invalid to be passed to the Version() constructor,
# e.g. v.1.4.2
@@ -395,11 +418,17 @@ async def get_diff_info(
else:
old_tag = versions_to_tags[old_version]
diff_url = f"https://github.com/{github_info.repo_path}/compare/{old_tag}...{new_tag}"
return GitHubDiffInfo(repo_path=github_info.repo_path, old_tag=old_tag, new_tag=new_tag, diff_url=diff_url)
return GitHostDiffInfo(host=host_info.host, repo_path=host_info.repo_path, old_tag=old_tag, new_tag=new_tag)
FileInfo: TypeAlias = dict[str, Any]
FileStatus: TypeAlias = Literal["added", "modified", "removed", "renamed"]
class FileInfo(TypedDict):
filename: str
status: FileStatus
additions: int
deletions: int
def _plural_s(num: int, /) -> str:
@@ -494,10 +523,10 @@ class DiffAnalysis:
return "Stubsabot analysis of the diff between the two releases:\n - " + "\n - ".join(data_points)
async def analyze_diff(
github_repo_path: str, distribution: str, old_tag: str, new_tag: str, *, session: aiohttp.ClientSession
async def analyze_github_diff(
repo_path: str, distribution: str, old_tag: str, new_tag: str, *, session: aiohttp.ClientSession
) -> DiffAnalysis | None:
url = f"https://api.github.com/repos/{github_repo_path}/compare/{old_tag}...{new_tag}"
url = f"https://api.github.com/repos/{repo_path}/compare/{old_tag}...{new_tag}"
async with session.get(url, headers=get_github_api_headers()) as response:
response.raise_for_status()
json_resp: dict[str, list[FileInfo]] = await response.json()
@@ -510,6 +539,42 @@ async def analyze_diff(
return DiffAnalysis(py_files=py_files, py_files_stubbed_in_typeshed=py_files_stubbed_in_typeshed)
async def analyze_gitlab_diff(
repo_path: str, distribution: str, old_tag: str, new_tag: str, *, session: aiohttp.ClientSession
) -> DiffAnalysis | None:
# https://docs.gitlab.com/api/repositories/#compare-branches-tags-or-commits
project_id = urllib.parse.quote(repo_path, safe="")
url = f"https://gitlab.com/api/v4/projects/{project_id}/repository/compare?from={old_tag}&to={new_tag}"
async with session.get(url) as response:
response.raise_for_status()
json_resp: dict[str, Any] = await response.json()
assert isinstance(json_resp, dict)
py_files: list[FileInfo] = []
for file_diff in json_resp["diffs"]:
filename = file_diff["new_path"]
if Path(filename).suffix != ".py":
continue
status: FileStatus
if file_diff["new_file"]:
status = "added"
elif file_diff["renamed_file"]:
status = "renamed"
elif file_diff["deleted_file"]:
status = "removed"
else:
status = "modified"
diff_lines = file_diff["diff"].splitlines()
additions = sum(1 for ln in diff_lines if ln.startswith("+"))
deletions = sum(1 for ln in diff_lines if ln.startswith("-"))
py_files.append(FileInfo(filename=filename, status=status, additions=additions, deletions=deletions))
stub_path = distribution_path(distribution)
files_in_typeshed = set(stub_path.rglob("*.pyi"))
py_files_stubbed_in_typeshed = [file for file in py_files if (stub_path / f"{file['filename']}i") in files_in_typeshed]
return DiffAnalysis(py_files=py_files, py_files_stubbed_in_typeshed=py_files_stubbed_in_typeshed)
def _add_months(date: datetime.date, months: int) -> datetime.date:
month = date.month - 1 + months
year = date.year + month // 12
@@ -627,8 +692,9 @@ async def determine_action_no_error_handling(
if diff_info is None:
diff_analysis: DiffAnalysis | None = None
else:
analyze_diff = {"github": analyze_github_diff, "gitlab": analyze_gitlab_diff}[diff_info.host]
diff_analysis = await analyze_diff(
github_repo_path=diff_info.repo_path,
repo_path=diff_info.repo_path,
distribution=distribution,
old_tag=diff_info.old_tag,
new_tag=diff_info.new_tag,