tensorflow: Add legacy optimizers (#9997)

2026-06-30 12:33:18 +08:00 · 2023-04-26 15:15:41 -07:00
parent f7443a748e
commit 01972e0e51
10 changed files with 288 additions and 4 deletions
@@ -16,12 +16,13 @@ tensorflow.Graph.__getattr__
 tensorflow.Operation.__getattr__
 tensorflow.Variable.__getattr__
 tensorflow.keras.layers.Layer.__getattr__
+tensorflow.GradientTape.__getattr__
 # Internal undocumented API
 tensorflow.RaggedTensor.__init__
 # Has an undocumented extra argument that tf.Variable which acts like subclass
 # (by dynamically patching tf.Tensor methods) does not preserve.
 tensorflow.Tensor.__getitem__
-# stub internal utility
+# stub internal utilities
 tensorflow._aliases

 # Tensorflow imports are cursed.
@@ -1,7 +1,7 @@
 from _typeshed import Incomplete, Unused
 from abc import ABCMeta
 from builtins import bool as _bool
-from collections.abc import Callable, Iterable, Iterator, Sequence
+from collections.abc import Callable, Generator, Iterable, Iterator, Mapping, Sequence
 from contextlib import contextmanager
 from enum import Enum
 from types import TracebackType
@@ -10,6 +10,7 @@ from typing_extensions import ParamSpec, Self, TypeAlias

 import numpy
 from tensorflow import initializers as initializers, keras as keras, math as math
+from tensorflow._aliases import ContainerGradients, ContainerTensors, ContainerTensorsLike, Gradients, TensorLike

 # Explicit import of DType is covered by the wildcard, but
 # is necessary to avoid a crash in pytype.
@@ -53,7 +54,8 @@ from tensorflow.math import (
    subtract as subtract,
    tanh as tanh,
 )
-from tensorflow.sparse import SparseTensor
+from tensorflow.python.trackable.autotrackable import AutoTrackable
+from tensorflow.sparse import SparseTensor as SparseTensor

 # Tensors ideally should be a generic type, but properly typing data type/shape
 # will be a lot of work. Until we have good non-generic tensorflow stubs,
@@ -263,7 +265,7 @@ class name_scope:
 _P = ParamSpec("_P")
 _R = TypeVar("_R")

-class Module:
+class Module(AutoTrackable):
    def __init__(self, name: str | None = None) -> None: ...
    @property
    def name(self) -> str: ...
@@ -282,4 +284,52 @@ class Module:
    @classmethod
    def with_name_scope(cls, method: Callable[_P, _R]) -> Callable[_P, _R]: ...

+class UnconnectedGradients(Enum):
+    NONE = "none"
+    ZERO = "zero"
+
+class GradientTape:
+    def __init__(self, persistent: _bool = False, watch_accessed_variables: _bool = True) -> None: ...
+    def __enter__(self) -> Self: ...
+    def __exit__(self, typ: type[BaseException] | None, value: BaseException | None, traceback: TracebackType | None) -> None: ...
+    # Higher kinded types would be nice here and these overloads are a way to simulate some of them.
+    @overload
+    def gradient(
+        self,
+        target: ContainerTensors,
+        sources: TensorLike,
+        output_gradients: list[Tensor] | None = None,
+        unconnected_gradients: UnconnectedGradients = ...,
+    ) -> Gradients: ...
+    @overload
+    def gradient(
+        self,
+        target: ContainerTensors,
+        sources: Sequence[Tensor],
+        output_gradients: list[Tensor] | None = None,
+        unconnected_gradients: UnconnectedGradients = ...,
+    ) -> list[Gradients]: ...
+    @overload
+    def gradient(
+        self,
+        target: ContainerTensors,
+        sources: Mapping[str, Tensor],
+        output_gradients: list[Tensor] | None = None,
+        unconnected_gradients: UnconnectedGradients = ...,
+    ) -> dict[str, Gradients]: ...
+    @overload
+    def gradient(
+        self,
+        target: ContainerTensors,
+        sources: ContainerTensors,
+        output_gradients: list[Tensor] | None = None,
+        unconnected_gradients: UnconnectedGradients = ...,
+    ) -> ContainerGradients: ...
+    @contextmanager
+    def stop_recording(self) -> Generator[None, None, None]: ...
+    def reset(self) -> None: ...
+    def watch(self, tensor: ContainerTensorsLike) -> None: ...
+    def watched_variables(self) -> tuple[Variable, ...]: ...
+    def __getattr__(self, name: str) -> Incomplete: ...
+
 def __getattr__(name: str) -> Incomplete: ...
@@ -7,8 +7,16 @@ from typing import Any, TypeVar
 from typing_extensions import TypeAlias

 import numpy
+import tensorflow as tf

 _T1 = TypeVar("_T1")
 ContainerGeneric: TypeAlias = Mapping[str, ContainerGeneric[_T1]] | Sequence[ContainerGeneric[_T1]] | _T1

+TensorLike: TypeAlias = tf.Tensor | tf.RaggedTensor | tf.SparseTensor
+Gradients: TypeAlias = tf.Tensor | tf.IndexedSlices
+
+ContainerTensorsLike: TypeAlias = ContainerGeneric[TensorLike]
+ContainerTensors: TypeAlias = ContainerGeneric[tf.Tensor]
+ContainerGradients: TypeAlias = ContainerGeneric[Gradients]
+
 AnyArray: TypeAlias = numpy.ndarray[Any, Any]
@@ -5,6 +5,7 @@ from tensorflow.keras import (
    constraints as constraints,
    initializers as initializers,
    layers as layers,
+    optimizers as optimizers,
    regularizers as regularizers,
 )

@@ -0,0 +1,5 @@
+from _typeshed import Incomplete
+
+from tensorflow.keras.optimizers import legacy as legacy, schedules as schedules
+
+def __getattr__(name: str) -> Incomplete: ...
@@ -0,0 +1,112 @@
+from _typeshed import Incomplete
+from abc import abstractmethod
+from collections.abc import Callable, Iterable
+from typing import Any
+from typing_extensions import Self, TypeAlias
+
+import tensorflow as tf
+from tensorflow._aliases import Gradients
+from tensorflow.keras.optimizers import schedules as schedules
+from tensorflow.python.trackable.base import Trackable
+
+_Initializer: TypeAlias = str | Callable[[], tf.Tensor] | dict[str, Any]
+_Shape: TypeAlias = tf.TensorShape | Iterable[int | None]
+_Dtype: TypeAlias = tf.DType | str | None
+_LearningRate: TypeAlias = float | tf.Tensor | schedules.LearningRateSchedule | Callable[[], float | tf.Tensor]
+_GradientAggregator: TypeAlias = Callable[[list[tuple[Gradients, tf.Variable]]], list[tuple[Gradients, tf.Variable]]] | None
+_GradientTransformer: TypeAlias = (
+    Iterable[Callable[[list[tuple[Gradients, tf.Variable]]], list[tuple[Gradients, tf.Variable]]]] | None
+)
+
+# kwargs here and in other optimizers can be given better type after Unpack[TypedDict], PEP 692, is supported.
+class Optimizer(Trackable):
+    _name: str
+    _iterations: tf.Variable | None
+    _weights: list[tf.Variable]
+    gradient_aggregator: _GradientAggregator
+    gradient_transformers: _GradientTransformer
+    learning_rate: _LearningRate
+    def __init__(
+        self,
+        name: str,
+        gradient_aggregator: _GradientAggregator = None,
+        gradient_transformers: _GradientTransformer = None,
+        **kwargs: Any,
+    ) -> None: ...
+    def _create_all_weights(self, var_list: Iterable[tf.Variable]) -> None: ...
+    @property
+    def iterations(self) -> tf.Variable: ...
+    @iterations.setter
+    def iterations(self, variable: tf.Variable) -> None: ...
+    def add_slot(
+        self, var: tf.Variable, slot_name: str, initializer: _Initializer = "zeros", shape: tf.TensorShape | None = None
+    ) -> tf.Variable: ...
+    def add_weight(
+        self,
+        name: str,
+        shape: _Shape,
+        dtype: _Dtype = None,
+        initializer: _Initializer = "zeros",
+        trainable: None | bool = None,
+        synchronization: tf.VariableSynchronization = ...,
+        aggregation: tf.VariableAggregation = ...,
+    ) -> tf.Variable: ...
+    def apply_gradients(
+        self,
+        grads_and_vars: Iterable[tuple[Gradients, tf.Variable]],
+        name: str | None = None,
+        experimental_aggregate_gradients: bool = True,
+    ) -> tf.Operation | None: ...
+    @classmethod
+    def from_config(cls, config: dict[str, Any], custom_objects: dict[str, type] | None = None) -> Self: ...
+    # Missing ABC is intentional as class is not abstract at runtime.
+    @abstractmethod
+    def get_config(self) -> dict[str, Any]: ...
+    def get_slot(self, var: tf.Variable, slot_name: str) -> tf.Variable: ...
+    def get_slot_names(self) -> list[str]: ...
+    def get_gradients(self, loss: tf.Tensor, params: list[tf.Variable]) -> list[Gradients]: ...
+    def minimize(
+        self,
+        loss: tf.Tensor | Callable[[], tf.Tensor],
+        var_list: list[tf.Variable] | tuple[tf.Variable, ...] | Callable[[], list[tf.Variable] | tuple[tf.Variable, ...]],
+        grad_loss: tf.Tensor | None = None,
+        name: str | None = None,
+        tape: tf.GradientTape | None = None,
+    ) -> tf.Operation: ...
+    def variables(self) -> list[tf.Variable]: ...
+    @property
+    def weights(self) -> list[tf.Variable]: ...
+
+class Adam(Optimizer):
+    def __init__(
+        self,
+        learning_rate: _LearningRate = 0.001,
+        beta_1: float = 0.9,
+        beta_2: float = 0.999,
+        epsilon: float = 1e-07,
+        amsgrad: bool = False,
+        name: str = "Adam",
+        **kwargs: Any,
+    ) -> None: ...
+    def get_config(self) -> dict[str, Any]: ...
+
+class Adagrad(Optimizer):
+    _initial_accumulator_value: float
+
+    def __init__(
+        self,
+        learning_rate: _LearningRate = 0.001,
+        initial_accumulator_value: float = 0.1,
+        epsilon: float = 1e-7,
+        name: str = "Adagrad",
+        **kwargs: Any,
+    ) -> None: ...
+    def get_config(self) -> dict[str, Any]: ...
+
+class SGD(Optimizer):
+    def __init__(
+        self, learning_rate: _LearningRate = 0.01, momentum: float = 0.0, nesterov: bool = False, name: str = "SGD", **kwargs: Any
+    ) -> None: ...
+    def get_config(self) -> dict[str, Any]: ...
+
+def __getattr__(name: str) -> Incomplete: ...
@@ -0,0 +1,99 @@
+from abc import abstractmethod
+from collections.abc import Sequence
+from typing import Any
+from typing_extensions import Self
+
+import tensorflow as tf
+
+class LearningRateSchedule:
+    # At runtime these methods are abstract even though class is not ABC.
+    @abstractmethod
+    def __call__(self, step: int | tf.Tensor) -> float | tf.Tensor: ...
+    @abstractmethod
+    def get_config(self) -> dict[str, Any]: ...
+    @classmethod
+    def from_config(cls, config: dict[str, Any]) -> Self: ...
+
+class PiecewiseConstantDecay(LearningRateSchedule):
+    def __init__(
+        self,
+        boundaries: Sequence[tf.Tensor] | Sequence[float],
+        values: Sequence[float] | Sequence[tf.Tensor],
+        name: str | None = None,
+    ) -> None: ...
+    def __call__(self, step: int | tf.Tensor) -> float | tf.Tensor: ...
+    def get_config(self) -> dict[str, Any]: ...
+    @classmethod
+    def from_config(cls, config: dict[str, Any]) -> Self: ...
+
+class InverseTimeDecay(LearningRateSchedule):
+    def __init__(
+        self,
+        initial_learning_rate: float | tf.Tensor,
+        decay_steps: int,
+        decay_rate: float,
+        staircase: bool = False,
+        name: str | None = None,
+    ) -> None: ...
+    def __call__(self, step: int | tf.Tensor) -> float | tf.Tensor: ...
+    def get_config(self) -> dict[str, Any]: ...
+    @classmethod
+    def from_config(cls, config: dict[str, Any]) -> Self: ...
+
+class PolynomialDecay(LearningRateSchedule):
+    def __init__(
+        self,
+        initial_learning_rate: float | tf.Tensor,
+        decay_steps: int,
+        end_learning_rate: float | tf.Tensor = 0.0001,
+        power: float = 1.0,
+        cycle: bool = False,
+        name: str | None = None,
+    ) -> None: ...
+    def __call__(self, step: int | tf.Tensor) -> float | tf.Tensor: ...
+    def get_config(self) -> dict[str, Any]: ...
+    @classmethod
+    def from_config(cls, config: dict[str, Any]) -> Self: ...
+
+class CosineDecay(LearningRateSchedule):
+    def __init__(
+        self, initial_learning_rate: float | tf.Tensor, decay_steps: int, alpha: float | tf.Tensor = 0.0, name: str | None = None
+    ) -> None: ...
+    def __call__(self, step: int | tf.Tensor) -> float | tf.Tensor: ...
+    def get_config(self) -> dict[str, Any]: ...
+    @classmethod
+    def from_config(cls, config: dict[str, Any]) -> Self: ...
+
+class CosineDecayRestarts(LearningRateSchedule):
+    def __init__(
+        self,
+        initial_learning_rate: float | tf.Tensor,
+        first_decay_steps: int | tf.Tensor,
+        t_mul: float | tf.Tensor = 2.0,
+        m_mul: float | tf.Tensor = 1.0,
+        alpha: float | tf.Tensor = 0.0,
+        name: str | None = None,
+    ) -> None: ...
+    def __call__(self, step: int | tf.Tensor) -> float | tf.Tensor: ...
+    def get_config(self) -> dict[str, Any]: ...
+    @classmethod
+    def from_config(cls, config: dict[str, Any]) -> Self: ...
+
+class ExponentialDecay(LearningRateSchedule):
+    def __init__(
+        self,
+        initial_learning_rate: float | tf.Tensor,
+        decay_steps: int | tf.Tensor,
+        decay_rate: float | tf.Tensor,
+        staircase: bool = False,
+        name: str | None = None,
+    ) -> None: ...
+    def __call__(self, step: int | tf.Tensor) -> float | tf.Tensor: ...
+    def get_config(self) -> dict[str, Any]: ...
+    @classmethod
+    def from_config(cls, config: dict[str, Any]) -> Self: ...
+
+def deserialize(
+    config: dict[str, Any], custom_objects: dict[str, type] | None = None, use_legacy_format: bool = False
+) -> LearningRateSchedule: ...
+def serialize(learning_rate_schedule: LearningRateSchedule, use_legacy_format: bool = False) -> dict[str, Any]: ...
@@ -0,0 +1,3 @@
+from tensorflow.python.trackable.base import Trackable
+
+class AutoTrackable(Trackable): ...
@@ -0,0 +1,5 @@
+# Internal type that is commonly used as a base class
+# and some public apis the signature needs it. As type
+# is internal exact module it lives in is unstable across
+# versions.
+class Trackable: ...