Q-Learning
In [1]:
Copied!
from behavior_generation_lecture_python.mdp.mdp import (
MDP,
GridMDP,
expected_utility_of_action,
derive_policy,
q_learning,
GRID_MDP_DICT,
HIGHWAY_MDP_DICT,
LC_RIGHT_ACTION,
STAY_IN_LANE_ACTION,
)
from behavior_generation_lecture_python.utils.grid_plotting import (
make_plot_grid_step_function,
make_plot_policy_step_function,
)
from behavior_generation_lecture_python.mdp.mdp import (
MDP,
GridMDP,
expected_utility_of_action,
derive_policy,
q_learning,
GRID_MDP_DICT,
HIGHWAY_MDP_DICT,
LC_RIGHT_ACTION,
STAY_IN_LANE_ACTION,
)
from behavior_generation_lecture_python.utils.grid_plotting import (
make_plot_grid_step_function,
make_plot_policy_step_function,
)
TOY EXAMPLE¶
In [2]:
Copied!
grid_mdp = GridMDP(**GRID_MDP_DICT)
grid_mdp = GridMDP(**GRID_MDP_DICT)
In [3]:
Copied!
computed_utility_history = q_learning(
mdp=grid_mdp, alpha=0.1, epsilon=0.1, iterations=10000, return_history=True
)
computed_utility_history = q_learning(
mdp=grid_mdp, alpha=0.1, epsilon=0.1, iterations=10000, return_history=True
)
In [4]:
Copied!
%matplotlib inline
plot_grid_step = make_plot_grid_step_function(
columns=4, rows=3, U_over_time=computed_utility_history
)
%matplotlib inline
plot_grid_step = make_plot_grid_step_function(
columns=4, rows=3, U_over_time=computed_utility_history
)
In [5]:
Copied!
mkdocs_flag = False # set to true if you are running the notebook locally
if mkdocs_flag:
import ipywidgets
from IPython.display import display
iteration_slider = ipywidgets.IntSlider(
min=0, max=len(computed_utility_history) - 1, step=1, value=0
)
w = ipywidgets.interactive(plot_grid_step, iteration=iteration_slider)
display(w)
mkdocs_flag = False # set to true if you are running the notebook locally
if mkdocs_flag:
import ipywidgets
from IPython.display import display
iteration_slider = ipywidgets.IntSlider(
min=0, max=len(computed_utility_history) - 1, step=1, value=0
)
w = ipywidgets.interactive(plot_grid_step, iteration=iteration_slider)
display(w)
In [6]:
Copied!
plot_grid_step(1000)
plot_grid_step(1000)
HIGHWAY EXAMPLE¶
In [7]:
Copied!
if False:
# we will change this to true later on, to see the effect
HIGHWAY_MDP_DICT["transition_probabilities_per_action"][LC_RIGHT_ACTION] = [
(0.4, LC_RIGHT_ACTION),
(0.6, STAY_IN_LANE_ACTION),
]
if False:
# we will change this to true later on, to see the effect
HIGHWAY_MDP_DICT["transition_probabilities_per_action"][LC_RIGHT_ACTION] = [
(0.4, LC_RIGHT_ACTION),
(0.6, STAY_IN_LANE_ACTION),
]
In [8]:
Copied!
highway_mdp = GridMDP(**HIGHWAY_MDP_DICT)
highway_mdp = GridMDP(**HIGHWAY_MDP_DICT)
In [9]:
Copied!
utility_history_highway = q_learning(
mdp=highway_mdp, alpha=0.1, epsilon=0.1, iterations=10000, return_history=True
)
utility_history_highway = q_learning(
mdp=highway_mdp, alpha=0.1, epsilon=0.1, iterations=10000, return_history=True
)
In [10]:
Copied!
plot_grid_step_highway = make_plot_grid_step_function(
columns=10, rows=4, U_over_time=utility_history_highway
)
plot_grid_step_highway = make_plot_grid_step_function(
columns=10, rows=4, U_over_time=utility_history_highway
)
In [11]:
Copied!
if mkdocs_flag:
iteration_slider = ipywidgets.IntSlider(
min=0, max=len(utility_history_highway) - 1, step=1, value=0
)
w = ipywidgets.interactive(plot_grid_step_highway, iteration=iteration_slider)
display(w)
if mkdocs_flag:
iteration_slider = ipywidgets.IntSlider(
min=0, max=len(utility_history_highway) - 1, step=1, value=0
)
w = ipywidgets.interactive(plot_grid_step_highway, iteration=iteration_slider)
display(w)
In [12]:
Copied!
plot_grid_step_highway(1000)
plot_grid_step_highway(1000)
In [13]:
Copied!
policy_array = [
derive_policy(highway_mdp, utility) for utility in utility_history_highway
]
policy_array = [
derive_policy(highway_mdp, utility) for utility in utility_history_highway
]
In [14]:
Copied!
plot_policy_step_highway = make_plot_policy_step_function(
columns=10, rows=4, policy_over_time=policy_array
)
plot_policy_step_highway = make_plot_policy_step_function(
columns=10, rows=4, policy_over_time=policy_array
)
In [15]:
Copied!
if mkdocs_flag:
iteration_slider = ipywidgets.IntSlider(
min=0, max=len(utility_history_highway) - 1, step=1, value=0
)
w = ipywidgets.interactive(plot_policy_step_highway, iteration=iteration_slider)
display(w)
if mkdocs_flag:
iteration_slider = ipywidgets.IntSlider(
min=0, max=len(utility_history_highway) - 1, step=1, value=0
)
w = ipywidgets.interactive(plot_policy_step_highway, iteration=iteration_slider)
display(w)
In [16]:
Copied!
plot_policy_step_highway(1000)
plot_policy_step_highway(1000)