Source code for d3rlpy.metrics.comparer

import numpy as np

from .scorer import _make_batches


[docs]def compare_continuous_action_diff(base_algo, window_size=1024):
    """ Returns scorer function of action difference between algorithms. 

    This metrics suggests how different the two algorithms are in continuous
    action-space.
    If the algorithm to compare with is near-optimal, the small action
    difference would be better.

    .. math::

        \\mathbb{E}_{s_t \\sim D}
            [(\\pi_{\\phi_1}(s_t) - \\pi_{\\phi_2}(s_t))^2]

    .. code-block:: python

        from d3rlpy.algos import CQL
        from d3rlpy.metrics.comparer import compare_continuous_action_diff

        cql1 = CQL()
        cql2 = CQL()

        scorer = compare_continuous_action_diff(cql1)

        squared_action_diff = scorer(cql2, ...)

    Args:
        base_algo (d3rlpy.algos.base.AlgoBase): algorithm to comapre with.
        window_size (int): mini-batch size to compute.

    Returns:
        callable: scorer function.

    """
    def scorer(algo, episodes):
        total_diffs = []
        for episode in episodes:
            # TODO: handle different n_frames
            for batch in _make_batches(episode, window_size, algo.n_frames):
                base_actions = base_algo.predict(batch.observations)
                actions = algo.predict(batch.observations)
                diff = ((actions - base_actions)**2).sum(axis=1).tolist()
                total_diffs += diff
        # smaller is better, sometimes?
        return -np.mean(total_diffs)

    return scorer


[docs]def compare_discrete_action_match(base_algo, window_size=1024):
    """ Returns scorer function of action matches between algorithms.

    This metrics suggests how different the two algorithms are in discrete
    action-space.
    If the algorithm to compare with is near-optimal, the small action
    difference would be better.

    .. math::

        \\mathbb{E}_{s_t \\sim D} [\\parallel
            \\{\\text{argmax}_a Q_{\\theta_1}(s_t, a)
            = \\text{argmax}_a Q_{\\theta_2}(s_t, a)\\}]

    .. code-block:: python

        from d3rlpy.algos import DQN
        from d3rlpy.metrics.comparer import compare_continuous_action_diff

        dqn1 = DQN()
        dqn2 = DQN()

        scorer = compare_continuous_action_diff(dqn1)

        percentage_of_identical_actions = scorer(dqn2, ...)

    Args:
        base_algo (d3rlpy.algos.base.AlgoBase): algorithm to comapre with.
        window_size (int): mini-batch size to compute.

    Returns:
        callable: scorer function.

    """
    def scorer(algo, episodes):
        total_matches = []
        for episode in episodes:
            # TODO: handle different n_frames
            for batch in _make_batches(episode, window_size, algo.n_frames):
                base_actions = base_algo.predict(batch.observations)
                actions = algo.predict(batch.observations)
                match = (base_actions == actions).tolist()
                total_matches += match
        return np.mean(total_matches)

    return scorer