Skip to content

policy

This module contains the CategoricalPolicy implementation.

CategoricalPolicy

A categorical policy parameterized by a neural network.

Source code in src/behavior_generation_lecture_python/mdp/policy.py
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
class CategoricalPolicy:
    """A categorical policy parameterized by a neural network."""

    def __init__(
        self, sizes: List[int], actions: List[Any], seed: Optional[int] = None
    ) -> None:
        """Initialize the categorical policy.

        Args:
            sizes: List of layer sizes for the MLP.
            actions: List of available actions.
            seed: Random seed for reproducibility (default: None).
        """
        assert sizes[-1] == len(actions)
        if seed is not None:
            torch.manual_seed(seed)
        self.net = multi_layer_perceptron(sizes=sizes)
        self.actions = actions
        self._actions_tensor = torch.tensor(actions, dtype=torch.long).view(
            len(actions), -1
        )

    def _get_distribution(self, state: torch.Tensor) -> Categorical:
        """Calls the model and returns a categorical distribution over the actions.

        Args:
            state: The current state tensor.

        Returns:
            A categorical distribution over actions.
        """
        logits = self.net(state)
        return Categorical(logits=logits)

    def get_action(self, state: torch.Tensor, deterministic: bool = False) -> Any:
        """Returns an action sample for the given state.

        Args:
            state: The current state tensor.
            deterministic: If True, return the most likely action.

        Returns:
            The selected action.
        """
        policy = self._get_distribution(state)
        if deterministic:
            return self.actions[policy.mode.item()]
        return self.actions[policy.sample().item()]

    def get_log_prob(self, states: torch.Tensor, actions: torch.Tensor) -> torch.Tensor:
        """Returns the log-probability for taking the action, when being in the given state.

        Args:
            states: Batch of state tensors.
            actions: Batch of action tensors.

        Returns:
            Log-probabilities of the actions.
        """
        return self._get_distribution(states).log_prob(
            self._get_action_id_from_action(actions)
        )

    def _get_action_id_from_action(self, actions: torch.Tensor) -> torch.Tensor:
        """Returns the indices of the passed actions in self.actions.

        Args:
            actions: Batch of action tensors.

        Returns:
            Tensor of action indices.
        """
        reshaped_actions = actions.unsqueeze(1).expand(
            -1, self._actions_tensor.size(0), -1
        )
        reshaped_actions_tensor = self._actions_tensor.unsqueeze(0).expand(
            actions.size(0), -1, -1
        )
        return torch.where(
            torch.all(reshaped_actions == reshaped_actions_tensor, dim=-1)
        )[1]

__init__(sizes, actions, seed=None)

Initialize the categorical policy.

Parameters:

Name Type Description Default
sizes List[int]

List of layer sizes for the MLP.

required
actions List[Any]

List of available actions.

required
seed Optional[int]

Random seed for reproducibility (default: None).

None
Source code in src/behavior_generation_lecture_python/mdp/policy.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
def __init__(
    self, sizes: List[int], actions: List[Any], seed: Optional[int] = None
) -> None:
    """Initialize the categorical policy.

    Args:
        sizes: List of layer sizes for the MLP.
        actions: List of available actions.
        seed: Random seed for reproducibility (default: None).
    """
    assert sizes[-1] == len(actions)
    if seed is not None:
        torch.manual_seed(seed)
    self.net = multi_layer_perceptron(sizes=sizes)
    self.actions = actions
    self._actions_tensor = torch.tensor(actions, dtype=torch.long).view(
        len(actions), -1
    )

get_action(state, deterministic=False)

Returns an action sample for the given state.

Parameters:

Name Type Description Default
state Tensor

The current state tensor.

required
deterministic bool

If True, return the most likely action.

False

Returns:

Type Description
Any

The selected action.

Source code in src/behavior_generation_lecture_python/mdp/policy.py
60
61
62
63
64
65
66
67
68
69
70
71
72
73
def get_action(self, state: torch.Tensor, deterministic: bool = False) -> Any:
    """Returns an action sample for the given state.

    Args:
        state: The current state tensor.
        deterministic: If True, return the most likely action.

    Returns:
        The selected action.
    """
    policy = self._get_distribution(state)
    if deterministic:
        return self.actions[policy.mode.item()]
    return self.actions[policy.sample().item()]

get_log_prob(states, actions)

Returns the log-probability for taking the action, when being in the given state.

Parameters:

Name Type Description Default
states Tensor

Batch of state tensors.

required
actions Tensor

Batch of action tensors.

required

Returns:

Type Description
Tensor

Log-probabilities of the actions.

Source code in src/behavior_generation_lecture_python/mdp/policy.py
75
76
77
78
79
80
81
82
83
84
85
86
87
def get_log_prob(self, states: torch.Tensor, actions: torch.Tensor) -> torch.Tensor:
    """Returns the log-probability for taking the action, when being in the given state.

    Args:
        states: Batch of state tensors.
        actions: Batch of action tensors.

    Returns:
        Log-probabilities of the actions.
    """
    return self._get_distribution(states).log_prob(
        self._get_action_id_from_action(actions)
    )

multi_layer_perceptron(sizes, activation=nn.ReLU, output_activation=nn.Identity)

Returns a multi-layer perceptron

Source code in src/behavior_generation_lecture_python/mdp/policy.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
def multi_layer_perceptron(
    sizes: List[int],
    activation: Type[nn.Module] = nn.ReLU,
    output_activation: Type[nn.Module] = nn.Identity,
):
    """Returns a multi-layer perceptron"""
    mlp = nn.Sequential()
    for i in range(len(sizes) - 1):
        mlp.append(nn.Linear(sizes[i], sizes[i + 1]))
        if i < len(sizes) - 2:
            mlp.append(activation())
        else:
            mlp.append(output_activation())
    return mlp