Source code for torchopt.optim.base

# Copyright 2022-2024 MetaOPT Team. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""The base class for optimizers."""

from __future__ import annotations

from typing import Callable, Iterable, Sequence

import torch

from torchopt import pytree
from torchopt.base import UninitializedState
from torchopt.typing import GradientTransformation, OptState, Params, TupleOfTensors
from torchopt.update import apply_updates


__all__ = ['Optimizer']


[docs] class Optimizer: """A base class for classic optimizers that similar to :class:`torch.optim.Optimizer`."""
[docs] def __init__(self, params: Iterable[torch.Tensor], impl: GradientTransformation) -> None: r"""Initialize the optimizer. Args: params (iterable of torch.Tensor): An iterable of :class:`torch.Tensor`\s. Specifies what tensors should be optimized. impl (GradientTransformation): A low level optimizer function, it could be a optimizer function provided in :mod:`torchopt.alias` or a customized :func:`torchopt.chain`\ed transformation. Note that using ``Optimizer(sgd())`` or ``Optimizer(chain(sgd()))`` is equivalent to :class:`torchopt.SGD`. """ if not isinstance(impl, GradientTransformation): raise TypeError(f'{impl} (type: {type(impl).__name__}) is not a GradientTransformation') self.impl: GradientTransformation = impl self.param_groups: list[TupleOfTensors] = [] self.param_treespecs: list[pytree.PyTreeSpec] = [] self.state_groups: list[OptState] = [] if not isinstance(params, (list, tuple)): params = tuple(params) self.add_param_group(params)
[docs] def zero_grad(self, set_to_none: bool = False) -> None: r"""Set the gradients of all optimized :class:`torch.Tensor`\s to zero. The behavior is similar to :meth:`torch.optim.Optimizer.zero_grad`. Args: set_to_none (bool, optional): Instead of setting to zero, set the ``grads`` to :data:`None`. (default: :data:`False`) """ if set_to_none: def f(p: torch.Tensor) -> None: p.grad = None else: def f(p: torch.Tensor) -> None: if p.grad is None: return if p.grad.grad_fn is not None: p.grad.detach_() else: p.grad.requires_grad_(False) p.grad.zero_() pytree.tree_map_(f, self.param_groups) # type: ignore[arg-type]
[docs] def state_dict(self) -> tuple[OptState, ...]: """Return the state of the optimizer.""" return tuple(self.state_groups)
[docs] def load_state_dict(self, state_dict: Sequence[OptState]) -> None: """Load the optimizer state. Args: state_dict (sequence of tree of Tensor): Optimizer state. Should be an object returned from a call to :meth:`state_dict`. """ self.state_groups[:] = list(state_dict)
[docs] def step(self, closure: Callable[[], torch.Tensor] | None = None) -> torch.Tensor | None: """Perform a single optimization step. The behavior is similar to :meth:`torch.optim.Optimizer.step`. Args: closure (callable or None, optional): A closure that reevaluates the model and returns the loss. Optional for most optimizers. (default: :data:`None`) """ loss = None if closure is not None: with torch.enable_grad(): loss = closure() def f(p: torch.Tensor) -> torch.Tensor | None: return p.grad for i, (params, state) in enumerate(zip(self.param_groups, self.state_groups)): if isinstance(state, UninitializedState): state = self.impl.init(params) grads = pytree.tree_map(f, params) # type: ignore[arg-type] updates, new_state = self.impl.update(grads, state, params=params, inplace=True) self.param_groups[i] = apply_updates(params, updates, inplace=True) self.state_groups[i] = new_state return loss
[docs] def add_param_group(self, params: Params) -> None: """Add a param group to the optimizer's ``param_groups``.""" flat_params: TupleOfTensors flat_params, params_treespec = pytree.tree_flatten_as_tuple(params) self.param_groups.append(flat_params) self.param_treespecs.append(params_treespec) self.state_groups.append(UninitializedState())