Source code for d3rlpy.preprocessing.action_scalers

from typing import Any, ClassVar, Dict, List, Optional, Type

import gym
import numpy as np
import torch

from ..dataset import Episode, MDPDataset
from ..decorators import pretty_repr


@pretty_repr
class ActionScaler:
    TYPE: ClassVar[str] = "none"

    def fit(self, episodes: List[Episode]) -> None:
        """Estimates scaling parameters from dataset.

        Args:
            episodes: a list of episode objects.

        """
        raise NotImplementedError

    def fit_with_env(self, env: gym.Env) -> None:
        """Gets scaling parameters from environment.

        Args:
            env: gym environment.

        """
        raise NotImplementedError

    def transform(self, action: torch.Tensor) -> torch.Tensor:
        """Returns processed action.

        Args:
            action: action vector.

        Returns:
            processed action.

        """
        raise NotImplementedError

    def reverse_transform(self, action: torch.Tensor) -> torch.Tensor:
        """Returns reversely transformed action.

        Args:
            action: action vector.

        Returns:
            reversely transformed action.

        """
        raise NotImplementedError

    def reverse_transform_numpy(self, action: np.ndarray) -> np.ndarray:
        """Returns reversely transformed action in numpy array.

        Args:
            action: action vector.

        Returns:
            reversely transformed action.

        """
        raise NotImplementedError

    def get_type(self) -> str:
        """Returns action scaler type.

        Returns:
            action scaler type.

        """
        return self.TYPE

    def get_params(self, deep: bool = False) -> Dict[str, Any]:
        """Returns action scaler params.

        Args:
            deep: flag to deepcopy parameters.

        Returns:
            action scaler parameters.

        """
        raise NotImplementedError


[docs]class MinMaxActionScaler(ActionScaler):
    r"""Min-Max normalization action preprocessing.

    Actions will be normalized in range ``[-1.0, 1.0]``.

    .. math::

        a' = (a - \min{a}) / (\max{a} - \min{a}) * 2 - 1

    .. code-block:: python

        from d3rlpy.dataset import MDPDataset
        from d3rlpy.algos import CQL

        dataset = MDPDataset(observations, actions, rewards, terminals)

        # initialize algorithm with MinMaxActionScaler
        cql = CQL(action_scaler='min_max')

        # scaler is initialized from the given episodes
        cql.fit(dataset.episodes)

    You can also initialize with :class:`d3rlpy.dataset.MDPDataset` object or
    manually.

    .. code-block:: python

        from d3rlpy.preprocessing import MinMaxActionScaler

        # initialize with dataset
        scaler = MinMaxActionScaler(dataset)

        # initialize manually
        minimum = actions.min(axis=0)
        maximum = actions.max(axis=0)
        action_scaler = MinMaxActionScaler(minimum=minimum, maximum=maximum)

        cql = CQL(action_scaler=action_scaler)

    Args:
        dataset (d3rlpy.dataset.MDPDataset): dataset object.
        min (numpy.ndarray): minimum values at each entry.
        max (numpy.ndarray): maximum values at each entry.

    """

    TYPE: ClassVar[str] = "min_max"
    _minimum: Optional[np.ndarray]
    _maximum: Optional[np.ndarray]

    def __init__(
        self,
        dataset: Optional[MDPDataset] = None,
        maximum: Optional[np.ndarray] = None,
        minimum: Optional[np.ndarray] = None,
    ):
        self._minimum = None
        self._maximum = None
        if dataset:
            self.fit(dataset.episodes)
        elif maximum is not None and minimum is not None:
            self._minimum = np.asarray(minimum)
            self._maximum = np.asarray(maximum)

[docs]    def fit(self, episodes: List[Episode]) -> None:
        if self._minimum is not None and self._maximum is not None:
            return

        for i, e in enumerate(episodes):
            actions = np.asarray(e.actions)
            if i == 0:
                minimum = actions.min(axis=0)
                maximum = actions.max(axis=0)
            else:
                minimum = np.minimum(minimum, actions.min(axis=0))
                maximum = np.maximum(maximum, actions.max(axis=0))

        self._minimum = minimum.reshape((1,) + minimum.shape)
        self._maximum = maximum.reshape((1,) + maximum.shape)

[docs]    def fit_with_env(self, env: gym.Env) -> None:
        if self._minimum is not None and self._maximum is not None:
            return

        assert isinstance(env.action_space, gym.spaces.Box)
        shape = env.action_space.shape
        low = np.asarray(env.action_space.low)
        high = np.asarray(env.action_space.high)
        self._minimum = low.reshape((1,) + shape)
        self._maximum = high.reshape((1,) + shape)

[docs]    def transform(self, action: torch.Tensor) -> torch.Tensor:
        assert self._minimum is not None and self._maximum is not None
        minimum = torch.tensor(
            self._minimum, dtype=torch.float32, device=action.device
        )
        maximum = torch.tensor(
            self._maximum, dtype=torch.float32, device=action.device
        )
        # transform action into [-1.0, 1.0]
        return ((action - minimum) / (maximum - minimum)) * 2.0 - 1.0

[docs]    def reverse_transform(self, action: torch.Tensor) -> torch.Tensor:
        assert self._minimum is not None and self._maximum is not None
        minimum = torch.tensor(
            self._minimum, dtype=torch.float32, device=action.device
        )
        maximum = torch.tensor(
            self._maximum, dtype=torch.float32, device=action.device
        )
        # transform action from [-1.0, 1.0]
        return ((maximum - minimum) * ((action + 1.0) / 2.0)) + minimum

[docs]    def reverse_transform_numpy(self, action: np.ndarray) -> np.ndarray:
        assert self._minimum is not None and self._maximum is not None
        minimum, maximum = self._minimum, self._maximum
        # transform action from [-1.0, 1.0]
        return ((maximum - minimum) * ((action + 1.0) / 2.0)) + minimum

[docs]    def get_params(self, deep: bool = False) -> Dict[str, Any]:
        if self._minimum is not None:
            minimum = self._minimum.copy() if deep else self._minimum
        else:
            minimum = None

        if self._maximum is not None:
            maximum = self._maximum.copy() if deep else self._maximum
        else:
            maximum = None

        return {"minimum": minimum, "maximum": maximum}


ACTION_SCALER_LIST: Dict[str, Type[ActionScaler]] = {}


def register_action_scaler(cls: Type[ActionScaler]) -> None:
    """Registers action scaler class.

    Args:
        cls: action scaler class inheriting ``ActionScaler``.

    """
    is_registered = cls.TYPE in ACTION_SCALER_LIST
    assert not is_registered, "%s seems to be already registered" % cls.TYPE
    ACTION_SCALER_LIST[cls.TYPE] = cls


def create_action_scaler(name: str, **kwargs: Any) -> ActionScaler:
    """Returns registered action scaler object.

    Args:
        name: regsitered scaler type name.
        kwargs: scaler arguments.

    Returns:
        scaler object.

    """
    assert name in ACTION_SCALER_LIST, "%s seems not to be registered." % name
    scaler = ACTION_SCALER_LIST[name](**kwargs)  # type: ignore
    assert isinstance(scaler, ActionScaler)
    return scaler


register_action_scaler(MinMaxActionScaler)