Source code for d3rlpy.metrics.comparer

import numpy as np

from .scorer import _make_batches


[docs]def compare_continuous_action_diff(base_algo, window_size=1024): """ Returns scorer function of action difference between algorithms. This metrics suggests how different the two algorithms are in continuous action-space. If the algorithm to compare with is near-optimal, the small action difference would be better. .. math:: \\mathbb{E}_{s_t \\sim D} [(\\pi_{\\phi_1}(s_t) - \\pi_{\\phi_2}(s_t))^2] .. code-block:: python from d3rlpy.algos import CQL from d3rlpy.metrics.comparer import compare_continuous_action_diff cql1 = CQL() cql2 = CQL() scorer = compare_continuous_action_diff(cql1) squared_action_diff = scorer(cql2, ...) Args: base_algo (d3rlpy.algos.base.AlgoBase): algorithm to comapre with. window_size (int): mini-batch size to compute. Returns: callable: scorer function. """ def scorer(algo, episodes): total_diffs = [] for episode in episodes: # TODO: handle different n_frames for batch in _make_batches(episode, window_size, algo.n_frames): base_actions = base_algo.predict(batch.observations) actions = algo.predict(batch.observations) diff = ((actions - base_actions)**2).sum(axis=1).tolist() total_diffs += diff # smaller is better, sometimes? return -np.mean(total_diffs) return scorer
[docs]def compare_discrete_action_match(base_algo, window_size=1024): """ Returns scorer function of action matches between algorithms. This metrics suggests how different the two algorithms are in discrete action-space. If the algorithm to compare with is near-optimal, the small action difference would be better. .. math:: \\mathbb{E}_{s_t \\sim D} [\\parallel \\{\\text{argmax}_a Q_{\\theta_1}(s_t, a) = \\text{argmax}_a Q_{\\theta_2}(s_t, a)\\}] .. code-block:: python from d3rlpy.algos import DQN from d3rlpy.metrics.comparer import compare_continuous_action_diff dqn1 = DQN() dqn2 = DQN() scorer = compare_continuous_action_diff(dqn1) percentage_of_identical_actions = scorer(dqn2, ...) Args: base_algo (d3rlpy.algos.base.AlgoBase): algorithm to comapre with. window_size (int): mini-batch size to compute. Returns: callable: scorer function. """ def scorer(algo, episodes): total_matches = [] for episode in episodes: # TODO: handle different n_frames for batch in _make_batches(episode, window_size, algo.n_frames): base_actions = base_algo.predict(batch.observations) actions = algo.predict(batch.observations) match = (base_actions == actions).tolist() total_matches += match return np.mean(total_matches) return scorer