Bump mypy-protobuf in sync_tensorflow script and improve generation scripts (#11740)

This commit is contained in:
Avasam
2024-04-10 09:12:07 -04:00
committed by GitHub
parent efad2fb315
commit fe02cba606
94 changed files with 2408 additions and 2116 deletions

View File

@@ -35,6 +35,7 @@ echo "Working in $TMP_DIR"
wget "$PROTOC_URL"
mkdir protoc_install
unzip "$PROTOC_FILENAME" -d protoc_install
protoc_install/bin/protoc --version
# Fetch protoc-python (which contains all the .proto files)
wget "$PYTHON_PROTOBUF_URL"
@@ -67,16 +68,22 @@ PROTO_FILES=$(grep "GenProto.*google" $PYTHON_PROTOBUF_DIR/python/setup.py | \
# And regenerate!
# shellcheck disable=SC2086
protoc_install/bin/protoc --proto_path="$PYTHON_PROTOBUF_DIR/src" --mypy_out="relax_strict_optional_primitives:$REPO_ROOT/stubs/protobuf" $PROTO_FILES
protoc_install/bin/protoc \
--proto_path="$PYTHON_PROTOBUF_DIR/src" \
--mypy_out="relax_strict_optional_primitives:$REPO_ROOT/stubs/protobuf" \
$PROTO_FILES
PYTHON_PROTOBUF_VERSION=$(jq -r '.[] | .languages.python' "$PYTHON_PROTOBUF_DIR/version.json")
# Cleanup after ourselves, this is a temp dir, but it can still grow fast if run multiple times
rm -rf "$TMP_DIR"
# Must be in a git repository to run pre-commit
cd "$REPO_ROOT"
sed --in-place="" \
"s/extra_description = .*$/extra_description = \"Generated using [mypy-protobuf==$MYPY_PROTOBUF_VERSION](https:\/\/github.com\/nipunn1313\/mypy-protobuf\/tree\/v$MYPY_PROTOBUF_VERSION) on [protobuf v$PROTOBUF_VERSION](https:\/\/github.com\/protocolbuffers\/protobuf\/releases\/tag\/v$PROTOBUF_VERSION) (python protobuf==$PYTHON_PROTOBUF_VERSION)\"/" \
"$REPO_ROOT/stubs/protobuf/METADATA.toml"
stubs/protobuf/METADATA.toml
# Must be run in a git repository
cd "$REPO_ROOT"
# use `|| true` so the script still continues even if a pre-commit hook
# applies autofixes (which will result in a nonzero exit code)
pre-commit run --files $(git ls-files -- "$REPO_ROOT/stubs/protobuf/**_pb2.pyi") || true
pre-commit run --files $(git ls-files -- "stubs/protobuf/**_pb2.pyi") || true

View File

@@ -1,79 +1,97 @@
#!/bin/bash
set -euxo pipefail
# Partly based on scripts/generate_proto_stubs.sh.
# Based on scripts/generate_proto_stubs.sh.
# Generates the protobuf stubs for the given tensorflow version using mypy-protobuf.
# Generally, new minor versions are a good time to update the stubs.
REPO_ROOT="$(realpath "$(dirname "${BASH_SOURCE[0]}")"/..)"
# This version should be consistent with the version in tensorflow's METADATA.toml.
set -euxo pipefail
# Need protoc >= 3.15 for explicit optional
PROTOBUF_VERSION=25.3 # 4.25.3
# Whenever you update TENSORFLOW_VERSION here, version should be updated
# in stubs/tensorflow/METADATA.toml and vice-versa.
TENSORFLOW_VERSION=2.12.1
# Latest mypy-protobuf has dependency on protobuf >4, which is incompatible at runtime
# with tensorflow. However, the stubs produced do still work with tensorflow. So after
# installing mypy-protobuf, before running stubtest on tensorflow you should downgrade
# protobuf<4.
MYPY_PROTOBUF_VERSION=3.5.0
MYPY_PROTOBUF_VERSION=3.6.0
pip install pre-commit mypy-protobuf=="$MYPY_PROTOBUF_VERSION"
if uname -a | grep Darwin; then
# brew install coreutils wget
PLAT=osx
else
PLAT=linux
fi
REPO_ROOT="$(realpath "$(dirname "${BASH_SOURCE[0]}")"/..)"
TMP_DIR="$(mktemp -d)"
TENSORFLOW_FILENAME="v$TENSORFLOW_VERSION.zip"
PROTOC_FILENAME="protoc-$PROTOBUF_VERSION-$PLAT-x86_64.zip"
PROTOC_URL="https://github.com/protocolbuffers/protobuf/releases/download/v$PROTOBUF_VERSION/$PROTOC_FILENAME"
TENSORFLOW_URL="https://github.com/tensorflow/tensorflow/archive/refs/tags/$TENSORFLOW_FILENAME"
cd "$(dirname "$0")" > /dev/null
cd ../stubs/tensorflow
mkdir -p repository
pushd repository &> /dev/null
# If the script fails halfway, it's nice to be able to re-run it immediately
if [ ! -d "tensorflow" ] ; then
git clone --depth 1 --branch v"$TENSORFLOW_VERSION" https://github.com/tensorflow/tensorflow.git
fi
pushd tensorflow &> /dev/null
# Folders here cover the more commonly used protobufs externally and
# their dependencies. Tensorflow has more protobufs and can be added if requested.
protoc --mypy_out "relax_strict_optional_primitives:$REPO_ROOT/stubs/tensorflow" \
tensorflow/compiler/xla/*.proto \
tensorflow/compiler/xla/service/*.proto \
tensorflow/core/example/*.proto \
tensorflow/core/framework/*.proto \
tensorflow/core/protobuf/*.proto \
tensorflow/core/protobuf/tpu/*.proto \
tensorflow/core/util/*.proto \
tensorflow/python/keras/protobuf/*.proto \
tensorflow/tsl/protobuf/*.proto
popd &> /dev/null
popd &> /dev/null
cd "$TMP_DIR"
echo "Working in $TMP_DIR"
# Install protoc
wget "$PROTOC_URL"
mkdir protoc_install
unzip "$PROTOC_FILENAME" -d protoc_install
protoc_install/bin/protoc --version
# Fetch tensorflow (which contains all the .proto files)
wget "$TENSORFLOW_URL"
unzip "$TENSORFLOW_FILENAME"
TENSORFLOW_DIR="tensorflow-$TENSORFLOW_VERSION"
# Prepare virtualenv
python3 -m venv .venv
source .venv/bin/activate
python3 -m pip install pre-commit mypy-protobuf=="$MYPY_PROTOBUF_VERSION"
# Remove existing pyi
find "$REPO_ROOT/stubs/tensorflow/" -name "*_pb2.pyi" -delete
# Folders here cover the more commonly used protobufs externally and
# their dependencies. Tensorflow has more protobufs and can be added if requested.
protoc_install/bin/protoc \
--proto_path="$TENSORFLOW_DIR" \
--mypy_out "relax_strict_optional_primitives:$REPO_ROOT/stubs/tensorflow" \
$TENSORFLOW_DIR/tensorflow/compiler/xla/*.proto \
$TENSORFLOW_DIR/tensorflow/compiler/xla/service/*.proto \
$TENSORFLOW_DIR/tensorflow/core/example/*.proto \
$TENSORFLOW_DIR/tensorflow/core/framework/*.proto \
$TENSORFLOW_DIR/tensorflow/core/protobuf/*.proto \
$TENSORFLOW_DIR/tensorflow/core/protobuf/tpu/*.proto \
$TENSORFLOW_DIR/tensorflow/core/util/*.proto \
$TENSORFLOW_DIR/tensorflow/python/keras/protobuf/*.proto \
$TENSORFLOW_DIR/tensorflow/tsl/protobuf/*.proto \
# Cleanup after ourselves, this is a temp dir, but it can still grow fast if run multiple times
rm -rf "$TMP_DIR"
# Must be in a git repository to run pre-commit
cd "$REPO_ROOT"
# These protos exist in a folder with protos used in python, but are not
# included in the python wheel. They are likely only used for other
# language builds. stubtest was used to identify them by looking for
# ModuleNotFoundError.
rm tensorflow/compiler/xla/service/hlo_execution_profile_data_pb2.pyi \
tensorflow/compiler/xla/service/hlo_profile_printer_data_pb2.pyi \
tensorflow/compiler/xla/service/test_compilation_environment_pb2.pyi \
tensorflow/compiler/xla/xla_pb2.pyi \
tensorflow/core/protobuf/autotuning_pb2.pyi \
tensorflow/core/protobuf/conv_autotuning_pb2.pyi \
tensorflow/core/protobuf/critical_section_pb2.pyi \
tensorflow/core/protobuf/eager_service_pb2.pyi \
tensorflow/core/protobuf/master_pb2.pyi \
tensorflow/core/protobuf/master_service_pb2.pyi \
tensorflow/core/protobuf/replay_log_pb2.pyi \
tensorflow/core/protobuf/tpu/compile_metadata_pb2.pyi \
tensorflow/core/protobuf/worker_pb2.pyi \
tensorflow/core/protobuf/worker_service_pb2.pyi \
tensorflow/core/util/example_proto_fast_parsing_test_pb2.pyi
rm \
stubs/tensorflow/tensorflow/compiler/xla/service/hlo_execution_profile_data_pb2.pyi \
stubs/tensorflow/tensorflow/compiler/xla/service/hlo_profile_printer_data_pb2.pyi \
stubs/tensorflow/tensorflow/compiler/xla/service/test_compilation_environment_pb2.pyi \
stubs/tensorflow/tensorflow/compiler/xla/xla_pb2.pyi \
stubs/tensorflow/tensorflow/core/protobuf/autotuning_pb2.pyi \
stubs/tensorflow/tensorflow/core/protobuf/conv_autotuning_pb2.pyi \
stubs/tensorflow/tensorflow/core/protobuf/critical_section_pb2.pyi \
stubs/tensorflow/tensorflow/core/protobuf/eager_service_pb2.pyi \
stubs/tensorflow/tensorflow/core/protobuf/master_pb2.pyi \
stubs/tensorflow/tensorflow/core/protobuf/master_service_pb2.pyi \
stubs/tensorflow/tensorflow/core/protobuf/replay_log_pb2.pyi \
stubs/tensorflow/tensorflow/core/protobuf/tpu/compile_metadata_pb2.pyi \
stubs/tensorflow/tensorflow/core/protobuf/worker_pb2.pyi \
stubs/tensorflow/tensorflow/core/protobuf/worker_service_pb2.pyi \
stubs/tensorflow/tensorflow/core/util/example_proto_fast_parsing_test_pb2.pyi \
sed --in-place="" \
"s/extra_description = .*$/extra_description = \"Partially generated using [mypy-protobuf==$MYPY_PROTOBUF_VERSION](https:\/\/github.com\/nipunn1313\/mypy-protobuf\/tree\/v$MYPY_PROTOBUF_VERSION) on tensorflow==$TENSORFLOW_VERSION\"/" \
"$REPO_ROOT/stubs/tensorflow/METADATA.toml"
stubs/tensorflow/METADATA.toml
# Cleanup last. If the script fails halfway, it's nice to be able to re-run it immediately
rm -rf repository/
# Must be run in a git repository
cd $REPO_ROOT
# use `|| true` so the script still continues even if a pre-commit hook
# applies autofixes (which will result in a nonzero exit code)
pre-commit run --files $(git ls-files -- "$REPO_ROOT/stubs/tensorflow/tensorflow") || true
# Ruff takes two passes to fix everything, re-running all of pre-commit is *slow*
# and we don't need --unsafe-fixes to remove imports
ruff check "$REPO_ROOT/stubs/tensorflow/tensorflow" --fix --exit-zero
pre-commit run --files $(git ls-files -- "stubs/tensorflow/**_pb2.pyi") || true