diff --git a/scripts/stubsabot.py b/scripts/stubsabot.py
index f4ff3dd20..6ceafd387 100644
--- a/scripts/stubsabot.py
+++ b/scripts/stubsabot.py
@@ -255,16 +255,18 @@ async def get_github_repo_info(session: aiohttp.ClientSession, stub_info: StubIn
     Else, return None.
     """
     if stub_info.upstream_repository:
+        # We have various sanity checks for the upstream_repository field in tests/parse_metadata.py,
+        # so no need to repeat all of them here
         split_url = urllib.parse.urlsplit(stub_info.upstream_repository)
-        if split_url.netloc == "github.com" and not split_url.query and not split_url.fragment:
+        if split_url.netloc == "github.com":
             url_path = split_url.path.strip("/")
-            if len(Path(url_path).parts) == 2:
-                github_tags_info_url = f"https://api.github.com/repos/{url_path}/tags"
-                async with session.get(github_tags_info_url, headers=get_github_api_headers()) as response:
-                    if response.status == 200:
-                        tags: list[dict[str, Any]] = await response.json()
-                        assert isinstance(tags, list)
-                        return GithubInfo(repo_path=url_path, tags=tags)
+            assert len(Path(url_path).parts) == 2
+            github_tags_info_url = f"https://api.github.com/repos/{url_path}/tags"
+            async with session.get(github_tags_info_url, headers=get_github_api_headers()) as response:
+                if response.status == 200:
+                    tags: list[dict[str, Any]] = await response.json()
+                    assert isinstance(tags, list)
+                    return GithubInfo(repo_path=url_path, tags=tags)
     return None
 
 
diff --git a/tests/parse_metadata.py b/tests/parse_metadata.py
index 6097049ea..483eb034c 100644
--- a/tests/parse_metadata.py
+++ b/tests/parse_metadata.py
@@ -6,6 +6,7 @@ from __future__ import annotations
 
 import os
 import re
+import urllib.parse
 from collections.abc import Mapping
 from dataclasses import dataclass
 from pathlib import Path
@@ -199,6 +200,21 @@ def read_metadata(distribution: str) -> StubMetadata:
 
     upstream_repository: object = data.get("upstream_repository")
     assert isinstance(upstream_repository, (str, type(None)))
+    if isinstance(upstream_repository, str):
+        parsed_url = urllib.parse.urlsplit(upstream_repository)
+        assert parsed_url.scheme == "https", "URLs in the upstream_repository field should use https"
+        assert not parsed_url.netloc.startswith("www."), "`www.` should be removed from URLs in the upstream_repository field"
+        assert not parsed_url.query
+        assert not parsed_url.fragment
+        if parsed_url.netloc == "github.com":
+            cleaned_url_path = parsed_url.path.strip("/")
+            num_url_path_parts = len(Path(cleaned_url_path).parts)
+            bad_github_url_msg = (
+                f"Invalid upstream_repository for {distribution!r}: "
+                "URLs for GitHub repositories always have two parts in their paths"
+            )
+            assert num_url_path_parts == 2, bad_github_url_msg
+
     obsolete_since: object = data.get("obsolete_since")
     assert isinstance(obsolete_since, (str, type(None)))
     no_longer_updated: object = data.get("no_longer_updated", False)