Source code for d3rlpy.preprocessing.scalers

from abc import ABCMeta, abstractmethod
from typing import Any, ClassVar, Dict, List, Optional, Type

import numpy as np
import torch

from ..dataset import Episode, MDPDataset


class Scaler(metaclass=ABCMeta):

    TYPE: ClassVar[str] = "none"

    @abstractmethod
    def fit(self, episodes: List[Episode]) -> None:
        pass

    @abstractmethod
    def transform(self, x: torch.Tensor) -> torch.Tensor:
        pass

    @abstractmethod
    def reverse_transform(self, x: torch.Tensor) -> torch.Tensor:
        pass

    def get_type(self) -> str:
        """Returns a scaler type.

        Returns:
            scaler type.

        """
        return self.TYPE

    @abstractmethod
    def get_params(self, deep: bool = False) -> Dict[str, Any]:
        pass


[docs]class PixelScaler(Scaler):
    """Pixel normalization preprocessing.

    .. math::

        x' = x / 255

    .. code-block:: python

        from d3rlpy.dataset import MDPDataset
        from d3rlpy.algos import CQL

        dataset = MDPDataset(observations, actions, rewards, terminals)

        # initialize algorithm with PixelScaler
        cql = CQL(scaler='pixel')

        cql.fit(dataset.episodes)

    """

    TYPE: ClassVar[str] = "pixel"

[docs]    def fit(self, episodes: List[Episode]) -> None:
        pass

[docs]    def transform(self, x: torch.Tensor) -> torch.Tensor:
        """Returns normalized pixel observations.

        Args:
            x: pixel observation tensor.

        Returns:
            normalized pixel observation tensor.

        """
        return x.float() / 255.0

[docs]    def reverse_transform(self, x: torch.Tensor) -> torch.Tensor:
        """Returns reversely transformed observations.

        Args:
            x: normalized observation tensor.

        Returns:
            unnormalized pixel observation tensor.

        """
        return (x * 255.0).long()

[docs]    def get_params(self, deep: bool = False) -> Dict[str, Any]:
        """Returns scaling parameters.

        PixelScaler returns empty dictiornary.

        Args:
            deep: flag to deeply copy objects.

        Returns:
            empty dictionary.

        """
        return {}


[docs]class MinMaxScaler(Scaler):
    r"""Min-Max normalization preprocessing.

    .. math::

        x' = (x - \min{x}) / (\max{x} - \min{x})

    .. code-block:: python

        from d3rlpy.dataset import MDPDataset
        from d3rlpy.algos import CQL

        dataset = MDPDataset(observations, actions, rewards, terminals)

        # initialize algorithm with MinMaxScaler
        cql = CQL(scaler='min_max')

        # scaler is initialized from the given episodes
        cql.fit(dataset.episodes)

    You can also initialize with :class:`d3rlpy.dataset.MDPDataset` object or
    manually.

    .. code-block:: python

        from d3rlpy.preprocessing import MinMaxScaler

        # initialize with dataset
        scaler = MinMaxScaler(dataset)

        # initialize manually
        minimum = observations.min(axis=0)
        maximum = observations.max(axis=0)
        scaler = MinMaxScaler(minimum=minimum, maximum=maximum)

        cql = CQL(scaler=scaler)

    Args:
        dataset (d3rlpy.dataset.MDPDataset): dataset object.
        min (numpy.ndarray): minimum values at each entry.
        max (numpy.ndarray): maximum values at each entry.

    """

    TYPE: ClassVar[str] = "min_max"
    _minimum: Optional[np.ndarray]
    _maximum: Optional[np.ndarray]

    def __init__(
        self,
        dataset: Optional[MDPDataset] = None,
        maximum: Optional[np.ndarray] = None,
        minimum: Optional[np.ndarray] = None,
    ):
        self._minimum = None
        self._maximum = None
        if dataset:
            self.fit(dataset.episodes)
        elif maximum is not None and minimum is not None:
            self._minimum = np.asarray(minimum)
            self._maximum = np.asarray(maximum)

[docs]    def fit(self, episodes: List[Episode]) -> None:
        """Fits minimum and maximum from list of episodes.

        Args:
            episodes: list of episodes.

        """
        if self._minimum is not None and self._maximum is not None:
            return

        for i, e in enumerate(episodes):
            observations = np.asarray(e.observations)
            if i == 0:
                minimum = observations.min(axis=0)
                maximum = observations.max(axis=0)
                continue
            minimum = np.minimum(minimum, observations.min(axis=0))
            maximum = np.maximum(maximum, observations.max(axis=0))

        self._minimum = minimum.reshape((1,) + minimum.shape)
        self._maximum = maximum.reshape((1,) + maximum.shape)

[docs]    def transform(self, x: torch.Tensor) -> torch.Tensor:
        """Returns normalized observation tensor.

        Args:
            x: observation tensor.

        Returns:
            normalized observation tensor.

        """
        assert self._minimum is not None and self._maximum is not None
        minimum = torch.tensor(
            self._minimum, dtype=torch.float32, device=x.device
        )
        maximum = torch.tensor(
            self._maximum, dtype=torch.float32, device=x.device
        )
        return (x - minimum) / (maximum - minimum)

[docs]    def reverse_transform(self, x: torch.Tensor) -> torch.Tensor:
        """Returns reversely transformed observations.

        Args:
            x: normalized observation tensor.

        Returns:
            unnormalized observation tensor.

        """
        assert self._minimum is not None and self._maximum is not None
        minimum = torch.tensor(
            self._minimum, dtype=torch.float32, device=x.device
        )
        maximum = torch.tensor(
            self._maximum, dtype=torch.float32, device=x.device
        )
        return ((maximum - minimum) * x) + minimum

[docs]    def get_params(self, deep: bool = False) -> Dict[str, Any]:
        """Returns scaling parameters.

        Args:
            deep: flag to deeply copy objects.

        Returns:
            `maximum` and `minimum`.

        """
        if self._maximum is not None:
            maximum = self._maximum.copy() if deep else self._maximum
        else:
            maximum = None

        if self._minimum is not None:
            minimum = self._minimum.copy() if deep else self._minimum
        else:
            minimum = None

        return {"maximum": maximum, "minimum": minimum}


[docs]class StandardScaler(Scaler):
    r"""Standardization preprocessing.

    .. math::

        x' = (x - \mu) / \sigma

    .. code-block:: python

        from d3rlpy.dataset import MDPDataset
        from d3rlpy.algos import CQL

        dataset = MDPDataset(observations, actions, rewards, terminals)

        # initialize algorithm with StandardScaler
        cql = CQL(scaler='standard')

        # scaler is initialized from the given episodes
        cql.fit(dataset.episodes)

    You can initialize with :class:`d3rlpy.dataset.MDPDataset` object or
    manually.

    .. code-block:: python

        from d3rlpy.preprocessing import StandardScaler

        # initialize with dataset
        scaler = StandardScaler(dataset)

        # initialize manually
        mean = observations.mean(axis=0)
        std = observations.std(axis=0)
        scaler = StandardScaler(mean=mean, std=std)

        cql = CQL(scaler=scaler)

    Args:
        dataset (d3rlpy.dataset.MDPDataset): dataset object.
        mean (numpy.ndarray): mean values at each entry.
        std (numpy.ndarray): standard deviation at each entry.

    """

    TYPE = "standard"
    _mean: Optional[np.ndarray]
    _std: Optional[np.ndarray]

    def __init__(
        self,
        dataset: Optional[MDPDataset] = None,
        mean: Optional[np.ndarray] = None,
        std: Optional[np.ndarray] = None,
    ):
        self._mean = None
        self._std = None
        if dataset:
            self.fit(dataset.episodes)
        elif mean is not None and std is not None:
            self._mean = np.asarray(mean)
            self._std = np.asarray(std)

[docs]    def fit(self, episodes: List[Episode]) -> None:
        """Fits mean and standard deviation from list of episodes.

        Args:
            episodes: list of episodes.

        """
        if self._mean is not None and self._std is not None:
            return

        # compute mean
        total_sum = np.zeros(episodes[0].get_observation_shape())
        total_count = 0
        for e in episodes:
            observations = np.asarray(e.observations)
            total_sum += observations.sum(axis=0)
            total_count += observations.shape[0]
        mean = total_sum / total_count

        # compute stdandard deviation
        total_sqsum = np.zeros(episodes[0].get_observation_shape())
        expanded_mean = mean.reshape((1,) + mean.shape)
        for e in episodes:
            observations = np.asarray(e.observations)
            total_sqsum += ((observations - expanded_mean) ** 2).sum(axis=0)
        std = np.sqrt(total_sqsum / total_count)

        self._mean = mean.reshape((1,) + mean.shape)
        self._std = std.reshape((1,) + std.shape)

[docs]    def transform(self, x: torch.Tensor) -> torch.Tensor:
        """Returns standardized observation tensor.

        Args:
            x: observation tensor.

        Returns:
            standardized observation tensor.

        """
        assert self._mean is not None and self._std is not None
        mean = torch.tensor(self._mean, dtype=torch.float32, device=x.device)
        std = torch.tensor(self._std, dtype=torch.float32, device=x.device)
        return (x - mean) / std

[docs]    def reverse_transform(self, x: torch.Tensor) -> torch.Tensor:
        """Returns reversely transformed observation tensor.

        Args:
            x: standardized observation tensor.

        Returns:
            unstandardized observation tensor.

        """
        assert self._mean is not None and self._std is not None
        mean = torch.tensor(self._mean, dtype=torch.float32, device=x.device)
        std = torch.tensor(self._std, dtype=torch.float32, device=x.device)
        return (std * x) + mean

[docs]    def get_params(self, deep: bool = False) -> Dict[str, Any]:
        """Returns scaling parameters.

        Args:
            deep: flag to deeply copy objects.

        Returns:
            `mean` and `std`.

        """
        if self._mean is not None:
            mean = self._mean.copy() if deep else self._mean
        else:
            mean = None

        if self._std is not None:
            std = self._std.copy() if deep else self._std
        else:
            std = None

        return {"mean": mean, "std": std}


SCALER_LIST: Dict[str, Type[Scaler]] = {}


def register_scaler(cls: Type[Scaler]) -> None:
    """Registers scaler class.

    Args:
        cls: scaler class inheriting ``Scaler``.

    """
    is_registered = cls.TYPE in SCALER_LIST
    assert not is_registered, "%s seems to be already registered" % cls.TYPE
    SCALER_LIST[cls.TYPE] = cls


def create_scaler(name: str, **kwargs: Any) -> Scaler:
    """Returns registered scaler object.

    Args:
        name: regsitered scaler type name.
        kwargs: scaler arguments.

    Returns:
        scaler object.

    """
    assert name in SCALER_LIST, "%s seems not to be registered." % name
    scaler = SCALER_LIST[name](**kwargs)  # type: ignore
    assert isinstance(scaler, Scaler)
    return scaler


register_scaler(PixelScaler)
register_scaler(MinMaxScaler)
register_scaler(StandardScaler)