Examples¶
Q-Learning¶
import random
import numpy as np
from smartstart.algorithms.qlearning import QLearning
from smartstart.environments.gridworld import GridWorld
from smartstart.environments.gridworldvisualizer import GridWorldVisualizer
from smartstart.utilities.plot import plot_summary, show_plot, \
mean_reward_episode, steps_episode
# Reset the seed for random number generation
random.seed()
np.random.seed()
# Create environment and visualizer
grid_world = GridWorld.generate(GridWorld.EASY)
visualizer = GridWorldVisualizer(grid_world)
visualizer.add_visualizer(GridWorldVisualizer.LIVE_AGENT,
GridWorldVisualizer.CONSOLE,
GridWorldVisualizer.VALUE_FUNCTION,
GridWorldVisualizer.DENSITY)
# Initialize agent, see class for available parameters
agent = QLearning(grid_world,
alpha=0.1,
epsilon=0.05,
num_episodes=500,
max_steps=1000,
exploration=QLearning.E_GREEDY)
# Train the agent, summary contains training data
summary = agent.train(render=True,
render_episode=False,
print_results=True)
# Plot results
plot_summary(summary, mean_reward_episode, ma_window=5,
title="Easy GridWorld Q-Learning Average Reward per Episode")
plot_summary(summary, steps_episode, ma_window=5,
title="Easy GridWorld Q-Learning Steps per Episode")
show_plot()
SmartStart¶
import random
import numpy as np
from smartstart.algorithms.qlearning import QLearning
from smartstart.smartexploration.smartexploration import generate_smartstart_object
from smartstart.environments.gridworld import GridWorld
from smartstart.environments.gridworldvisualizer import GridWorldVisualizer
from smartstart.utilities.plot import plot_summary, show_plot, \
mean_reward_episode, steps_episode
# Reset the seed for random number generation
random.seed()
np.random.seed()
# Create environment and visualizer
grid_world = GridWorld.generate(GridWorld.EASY)
visualizer = GridWorldVisualizer(grid_world)
visualizer.add_visualizer(GridWorldVisualizer.LIVE_AGENT,
GridWorldVisualizer.CONSOLE,
GridWorldVisualizer.VALUE_FUNCTION,
GridWorldVisualizer.DENSITY)
# Initialize agent, see class for available parameters
agent = generate_smartstart_object(QLearning,
env=grid_world,
alpha=0.1,
epsilon=0.05,
num_episodes=500,
max_steps=1000,
exploration=QLearning.E_GREEDY)
# Train the agent, summary contains training data
summary = agent.train(render=True,
render_episode=False,
print_results=True)
# Plot results
plot_summary(summary, mean_reward_episode, ma_window=5,
title="Easy GridWorld Q-Learning Average Reward per Episode")
plot_summary(summary, steps_episode, ma_window=5,
title="Easy GridWorld Q-Learning Steps per Episode")
show_plot()
Experimenter¶
import random
import numpy as np
from smartstart.algorithms.qlearning import QLearning
from smartstart.smartexploration.smartexploration import generate_smartstart_object
from smartstart.environments.gridworld import GridWorld
from smartstart.utilities.experimenter import run_experiment
from smartstart.utilities.utilities import get_data_directory
# Get the path to the data folder in the same directory as this file.
# If the folder does not exists it will be created
summary_dir = get_data_directory(__file__)
# Define the task function for the experiment
def task(params):
# Reset the seed for random number generation
random.seed()
np.random.seed()
# Create environment
env = GridWorld.generate(GridWorld.MEDIUM)
# Here we use a dict to define the parameters, this makes it easy to
# make sure the experiments use the same parameters
kwargs = {
'alpha': 0.1,
'epsilon': 0.05,
'num_episodes': 1000,
'max_steps': 2500,
'exploration': QLearning.E_GREEDY
}
# Initialize agent, check params if it needs to use SmartStart or not
if params['use_smart_start']:
agent = generate_smartstart_object(QLearning, env, **kwargs)
else:
agent = QLearning(env, **kwargs)
# Train the agent, summary contains training data. Make sure to set the
# rendering and printing to False when multiple experiments run in
# parallel. Else it will consume a lot of computation power.
summary = agent.train(render=False,
render_episode=False,
print_results=False)
# Save the summary. The post_fix parameter can be used to create a unique
# file name.
summary.save(directory=summary_dir, post_fix=params['run'])
# Define a parameter grid that can be supplied to the run_experiment method
param_grid = {
'task': task,
'num_exp': 5,
'use_smart_start': [True, False]
}
run_experiment(param_grid, n_processes=-1)
Plotting Results¶
import os
from smartstart.utilities.plot import plot_summary, \
mean_reward_std_episode, steps_episode, show_plot
from smartstart.utilities.utilities import get_data_directory
# Get directory where the summaries are saved. Since it is the same folder as
# the experimenter we can use the get_data_directory method
summary_dir = get_data_directory(__file__)
# Define the files list
files = [os.path.join(summary_dir, "QLearning_GridWorldMedium"),
os.path.join(summary_dir, "SmartStart_QLearning_GridWorldMedium")]
legend = ["Q-Learning", "SmartStart Q-Learning"]
# We are going to save the plots in img folder
output_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'img')
# Plot average reward and standard deviation per episode
# When an output directory is supplied the plots will not be rendered with
# a title. The title is used as filename for the plot.
plot_summary(files,
mean_reward_std_episode,
ma_window=5,
title="Q-Learning GridWorldMedium Average Reward per Episode",
legend=legend,
output_dir=output_dir)
plot_summary(files,
steps_episode,
ma_window=5,
title="Q-Learning GridWorldMedium Steps per Episode",
legend=legend,
format=".png",
output_dir=output_dir)
show_plot()