Rewrote protobuf generation scripts in Python (#12527)

This commit is contained in:
Avasam
2024-09-19 02:11:21 -04:00
committed by GitHub
parent 0689736dce
commit c025e37bbb
11 changed files with 383 additions and 248 deletions

View File

@@ -0,0 +1,67 @@
from __future__ import annotations
import subprocess
import sys
from http.client import HTTPResponse
from pathlib import Path
from typing import TYPE_CHECKING, Iterable
from urllib.request import urlopen
from zipfile import ZipFile
import tomlkit
from mypy_protobuf.main import ( # type: ignore[import-untyped] # pyright: ignore[reportMissingTypeStubs]
__version__ as mypy_protobuf__version__,
)
if TYPE_CHECKING:
from _typeshed import StrOrBytesPath, StrPath
REPO_ROOT = Path(__file__).absolute().parent.parent.parent
MYPY_PROTOBUF_VERSION = mypy_protobuf__version__
def download_file(url: str, destination: StrPath) -> None:
print(f"Downloading '{url}' to '{destination}'")
resp: HTTPResponse
with urlopen(url) as resp:
if resp.getcode() != 200:
raise RuntimeError(f"Error downloading {url}")
with open(destination, "wb") as file:
file.write(resp.read())
def extract_archive(archive_path: StrPath, destination: StrPath) -> None:
print(f"Extracting '{archive_path}' to '{destination}'")
with ZipFile(archive_path) as file_in:
file_in.extractall(destination)
def update_metadata(metadata_folder: StrPath, new_extra_description: str) -> None:
metadata_path = Path(metadata_folder) / "METADATA.toml"
with open(metadata_path) as file:
metadata = tomlkit.load(file)
metadata["extra_description"] = new_extra_description
with open(metadata_path, "w") as file:
# tomlkit.dump has partially unknown IO type
tomlkit.dump(metadata, file) # pyright: ignore[reportUnknownMemberType]
print(f"Updated {metadata_path}")
def run_protoc(
proto_paths: Iterable[StrPath], mypy_out: StrPath, proto_globs: Iterable[str], cwd: StrOrBytesPath | None = None
) -> str:
"""TODO: Describe parameters and return"""
protoc_version = (
subprocess.run([sys.executable, "-m", "grpc_tools.protoc", "--version"], capture_output=True).stdout.decode().strip()
)
print()
print(protoc_version)
protoc_args = [
*[f"--proto_path={proto_path}" for proto_path in proto_paths],
"--mypy_out",
f"relax_strict_optional_primitives:{mypy_out}",
*proto_globs,
]
print("Running: protoc\n " + "\n ".join(protoc_args) + "\n")
subprocess.run((sys.executable, "-m", "grpc_tools.protoc", *protoc_args), cwd=cwd, check=True)
return protoc_version

View File

@@ -0,0 +1,94 @@
"""
Generates the protobuf stubs for the given protobuf version using mypy-protobuf.
Generally, new minor versions are a good time to update the stubs.
"""
from __future__ import annotations
import json
import re
import shutil
import subprocess
import sys
import tempfile
from pathlib import Path
from typing import Any
from _utils import MYPY_PROTOBUF_VERSION, REPO_ROOT, download_file, extract_archive, run_protoc, update_metadata
# Whenever you update PACKAGE_VERSION here, version should be updated
# in stubs/protobuf/METADATA.toml and vice-versa.
PACKAGE_VERSION = "27.1"
STUBS_FOLDER = REPO_ROOT / "stubs" / "protobuf"
ARCHIVE_FILENAME = f"protobuf-{PACKAGE_VERSION}.zip"
ARCHIVE_URL = f"https://github.com/protocolbuffers/protobuf/releases/download/v{PACKAGE_VERSION}/{ARCHIVE_FILENAME}"
EXTRACTED_PACKAGE_DIR = f"protobuf-{PACKAGE_VERSION}"
VERSION_PATTERN = re.compile(r'def game_version\(\):\n return "(.+?)"')
PROTO_FILE_PATTERN = re.compile(r'"//:(.*)_proto"')
def extract_python_version(file_path: Path) -> str:
"""Extract the Python version from https://github.com/protocolbuffers/protobuf/blob/main/version.json"""
with open(file_path) as file:
data: dict[str, Any] = json.load(file)
# The root key will be the protobuf source code version
version = next(iter(data.values()))["languages"]["python"]
assert isinstance(version, str)
return version
def extract_proto_file_paths(temp_dir: Path) -> list[str]:
"""
Roughly reproduce the subset of .proto files on the public interface
as described in py_proto_library calls in
https://github.com/protocolbuffers/protobuf/blob/main/python/dist/BUILD.bazel
"""
with open(temp_dir / EXTRACTED_PACKAGE_DIR / "python" / "dist" / "BUILD.bazel") as file:
matched_lines = filter(None, (re.search(PROTO_FILE_PATTERN, line) for line in file))
proto_files = [
EXTRACTED_PACKAGE_DIR + "/src/google/protobuf/" + match.group(1).replace("compiler_", "compiler/") + ".proto"
for match in matched_lines
]
return proto_files
def main() -> None:
temp_dir = Path(tempfile.mkdtemp())
# Fetch s2clientprotocol (which contains all the .proto files)
archive_path = temp_dir / ARCHIVE_FILENAME
download_file(ARCHIVE_URL, archive_path)
extract_archive(archive_path, temp_dir)
# Remove existing pyi
for old_stub in STUBS_FOLDER.rglob("*_pb2.pyi"):
old_stub.unlink()
PROTOC_VERSION = run_protoc(
proto_paths=(f"{EXTRACTED_PACKAGE_DIR}/src",),
mypy_out=STUBS_FOLDER,
proto_globs=extract_proto_file_paths(temp_dir),
cwd=temp_dir,
)
PYTHON_PROTOBUF_VERSION = extract_python_version(temp_dir / EXTRACTED_PACKAGE_DIR / "version.json")
# Cleanup after ourselves, this is a temp dir, but it can still grow fast if run multiple times
shutil.rmtree(temp_dir)
update_metadata(
STUBS_FOLDER,
f"""Partially generated using \
[mypy-protobuf=={MYPY_PROTOBUF_VERSION}](https://github.com/nipunn1313/mypy-protobuf/tree/v{MYPY_PROTOBUF_VERSION}) \
and {PROTOC_VERSION} on \
[protobuf v{PACKAGE_VERSION}](https://github.com/protocolbuffers/protobuf/releases/tag/v{PACKAGE_VERSION}) \
(python `protobuf=={PYTHON_PROTOBUF_VERSION}`).""",
)
# Run pre-commit to cleanup the stubs
subprocess.run((sys.executable, "-m", "pre_commit", "run", "--files", *STUBS_FOLDER.rglob("*_pb2.pyi")))
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,72 @@
"""
Generates the protobuf stubs for the given s2clientprotocol version using mypy-protobuf.
Generally, new minor versions are a good time to update the stubs.
"""
from __future__ import annotations
import re
import shutil
import subprocess
import sys
import tempfile
from pathlib import Path
from _utils import MYPY_PROTOBUF_VERSION, REPO_ROOT, download_file, extract_archive, run_protoc, update_metadata
# Whenever you update PACKAGE_VERSION here, version should be updated
# in stubs/s2clientprotocol/METADATA.toml and vice-versa.
PACKAGE_VERSION = "c04df4adbe274858a4eb8417175ee32ad02fd609"
STUBS_FOLDER = REPO_ROOT / "stubs" / "s2clientprotocol"
ARCHIVE_FILENAME = f"{PACKAGE_VERSION}.zip"
ARCHIVE_URL = f"https://github.com/Blizzard/s2client-proto/archive/{ARCHIVE_FILENAME}"
EXTRACTED_PACKAGE_DIR = f"s2client-proto-{PACKAGE_VERSION}"
VERSION_PATTERN = re.compile(r'def game_version\(\):\n return "(.+?)"')
def extract_python_version(file_path: Path) -> str:
"""Extract Python version from s2clientprotocol's build file"""
match = re.search(VERSION_PATTERN, file_path.read_text())
assert match
return match.group(1)
def main() -> None:
temp_dir = Path(tempfile.mkdtemp())
# Fetch s2clientprotocol (which contains all the .proto files)
archive_path = temp_dir / ARCHIVE_FILENAME
download_file(ARCHIVE_URL, archive_path)
extract_archive(archive_path, temp_dir)
# Remove existing pyi
for old_stub in STUBS_FOLDER.rglob("*_pb2.pyi"):
old_stub.unlink()
PROTOC_VERSION = run_protoc(
proto_paths=(EXTRACTED_PACKAGE_DIR,),
mypy_out=STUBS_FOLDER,
proto_globs=(f"{EXTRACTED_PACKAGE_DIR}/s2clientprotocol/*.proto",),
cwd=temp_dir,
)
PYTHON_S2_CLIENT_PROTO_VERSION = extract_python_version(temp_dir / EXTRACTED_PACKAGE_DIR / "s2clientprotocol" / "build.py")
# Cleanup after ourselves, this is a temp dir, but it can still grow fast if run multiple times
shutil.rmtree(temp_dir)
update_metadata(
STUBS_FOLDER,
f"""Partially generated using \
[mypy-protobuf=={MYPY_PROTOBUF_VERSION}](https://github.com/nipunn1313/mypy-protobuf/tree/v{MYPY_PROTOBUF_VERSION}) \
and {PROTOC_VERSION} on \
[s2client-proto {PYTHON_S2_CLIENT_PROTO_VERSION}](https://github.com/Blizzard/s2client-proto/tree/{PACKAGE_VERSION}).""",
)
# Run pre-commit to cleanup the stubs
subprocess.run((sys.executable, "-m", "pre_commit", "run", "--files", *STUBS_FOLDER.rglob("*_pb2.pyi")))
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,137 @@
"""
Generates the protobuf stubs for the given tensorflow version using mypy-protobuf.
Generally, new minor versions are a good time to update the stubs.
"""
from __future__ import annotations
import os
import re
import shutil
import subprocess
import sys
import tempfile
from pathlib import Path
from _utils import MYPY_PROTOBUF_VERSION, REPO_ROOT, download_file, extract_archive, run_protoc, update_metadata
# Whenever you update PACKAGE_VERSION here, version should be updated
# in stubs/tensorflow/METADATA.toml and vice-versa.
PACKAGE_VERSION = "2.17.0"
STUBS_FOLDER = REPO_ROOT / "stubs" / "tensorflow"
ARCHIVE_FILENAME = f"v{PACKAGE_VERSION}.zip"
ARCHIVE_URL = f"https://github.com/tensorflow/tensorflow/archive/refs/tags/{ARCHIVE_FILENAME}"
EXTRACTED_PACKAGE_DIR = f"tensorflow-{PACKAGE_VERSION}"
PROTOS_TO_REMOVE = (
"compiler/xla/autotune_results_pb2.pyi",
"compiler/xla/autotuning_pb2.pyi",
"compiler/xla/service/buffer_assignment_pb2.pyi",
"compiler/xla/service/hlo_execution_profile_data_pb2.pyi",
"core/protobuf/autotuning_pb2.pyi",
"core/protobuf/conv_autotuning_pb2.pyi",
"core/protobuf/critical_section_pb2.pyi",
"core/protobuf/eager_service_pb2.pyi",
"core/protobuf/master_pb2.pyi",
"core/protobuf/master_service_pb2.pyi",
"core/protobuf/replay_log_pb2.pyi",
"core/protobuf/tpu/compile_metadata_pb2.pyi",
"core/protobuf/worker_pb2.pyi",
"core/protobuf/worker_service_pb2.pyi",
"core/util/example_proto_fast_parsing_test_pb2.pyi",
)
"""
These protos exist in a folder with protos used in python,
but are not included in the python wheel.
They are likely only used for other language builds.
stubtest was used to identify them by looking for ModuleNotFoundError.
(comment out ".*_pb2.*" from the allowlist)
"""
TSL_IMPORT_PATTERN = re.compile(r"(\[|\s)tsl\.")
XLA_IMPORT_PATTERN = re.compile(r"(\[|\s)xla\.")
def post_creation() -> None:
"""Move third-party and fix imports"""
# Can't use shutil.move because it can't merge existing directories.
print()
print(f"Moving '{STUBS_FOLDER}/tsl' to '{STUBS_FOLDER}/tensorflow/tsl'")
shutil.copytree(f"{STUBS_FOLDER}/tsl", f"{STUBS_FOLDER}/tensorflow/tsl", dirs_exist_ok=True)
shutil.rmtree(f"{STUBS_FOLDER}/tsl")
print(f"Moving '{STUBS_FOLDER}/xla' to '{STUBS_FOLDER}/tensorflow/compiler/xla'")
shutil.copytree(f"{STUBS_FOLDER}/xla", f"{STUBS_FOLDER}/tensorflow/compiler/xla", dirs_exist_ok=True)
shutil.rmtree(f"{STUBS_FOLDER}/xla")
for path in STUBS_FOLDER.rglob("*_pb2.pyi"):
print(f"Fixing imports in '{path}'")
with open(path) as file:
filedata = file.read()
# Replace the target string
filedata = re.sub(TSL_IMPORT_PATTERN, "\\1tensorflow.tsl.", filedata)
filedata = re.sub(XLA_IMPORT_PATTERN, "\\1tensorflow.compiler.xla.", filedata)
# Write the file out again
with open(path, "w") as file:
file.write(filedata)
print()
for to_remove in PROTOS_TO_REMOVE:
file_path = STUBS_FOLDER / "tensorflow" / to_remove
os.remove(file_path)
print(f"Removed '{file_path}'")
def main() -> None:
temp_dir = Path(tempfile.mkdtemp())
# Fetch tensorflow (which contains all the .proto files)
archive_path = temp_dir / ARCHIVE_FILENAME
download_file(ARCHIVE_URL, archive_path)
extract_archive(archive_path, temp_dir)
# Remove existing pyi
for old_stub in STUBS_FOLDER.rglob("*_pb2.pyi"):
old_stub.unlink()
PROTOC_VERSION = run_protoc(
proto_paths=(
f"{EXTRACTED_PACKAGE_DIR}/third_party/xla/third_party/tsl",
f"{EXTRACTED_PACKAGE_DIR}/third_party/xla",
f"{EXTRACTED_PACKAGE_DIR}",
),
mypy_out=STUBS_FOLDER,
proto_globs=(
f"{EXTRACTED_PACKAGE_DIR}/third_party/xla/xla/*.proto",
f"{EXTRACTED_PACKAGE_DIR}/third_party/xla/xla/service/*.proto",
f"{EXTRACTED_PACKAGE_DIR}/tensorflow/core/example/*.proto",
f"{EXTRACTED_PACKAGE_DIR}/tensorflow/core/framework/*.proto",
f"{EXTRACTED_PACKAGE_DIR}/tensorflow/core/protobuf/*.proto",
f"{EXTRACTED_PACKAGE_DIR}/tensorflow/core/protobuf/tpu/*.proto",
f"{EXTRACTED_PACKAGE_DIR}/tensorflow/core/util/*.proto",
f"{EXTRACTED_PACKAGE_DIR}/tensorflow/python/keras/protobuf/*.proto",
f"{EXTRACTED_PACKAGE_DIR}/third_party/xla/third_party/tsl/tsl/protobuf/*.proto",
),
cwd=temp_dir,
)
# Cleanup after ourselves, this is a temp dir, but it can still grow fast if run multiple times
shutil.rmtree(temp_dir)
post_creation()
update_metadata(
STUBS_FOLDER,
f"""Partially generated using \
[mypy-protobuf=={MYPY_PROTOBUF_VERSION}](https://github.com/nipunn1313/mypy-protobuf/tree/v{MYPY_PROTOBUF_VERSION}) \
and {PROTOC_VERSION} on `tensorflow=={PACKAGE_VERSION}`.""",
)
# Run pre-commit to cleanup the stubs
subprocess.run((sys.executable, "-m", "pre_commit", "run", "--files", *STUBS_FOLDER.rglob("*_pb2.pyi")))
if __name__ == "__main__":
main()