Examples¶

Q-Learning¶

import random

import numpy as np

from smartstart.algorithms.qlearning import QLearning
from smartstart.environments.gridworld import GridWorld
from smartstart.environments.gridworldvisualizer import GridWorldVisualizer
from smartstart.utilities.plot import plot_summary, show_plot, \
    mean_reward_episode, steps_episode

# Reset the seed for random number generation
random.seed()
np.random.seed()

# Create environment and visualizer
grid_world = GridWorld.generate(GridWorld.EASY)
visualizer = GridWorldVisualizer(grid_world)
visualizer.add_visualizer(GridWorldVisualizer.LIVE_AGENT,
                          GridWorldVisualizer.CONSOLE,
                          GridWorldVisualizer.VALUE_FUNCTION,
                          GridWorldVisualizer.DENSITY)

# Initialize agent, see class for available parameters
agent = QLearning(grid_world,
                  alpha=0.1,
                  epsilon=0.05,
                  num_episodes=500,
                  max_steps=1000,
                  exploration=QLearning.E_GREEDY)

# Train the agent, summary contains training data
summary = agent.train(render=True,
                      render_episode=False,
                      print_results=True)

# Plot results
plot_summary(summary, mean_reward_episode, ma_window=5,
             title="Easy GridWorld Q-Learning Average Reward per Episode")
plot_summary(summary, steps_episode, ma_window=5,
             title="Easy GridWorld Q-Learning Steps per Episode")
show_plot()

SmartStart¶

import random

import numpy as np

from smartstart.algorithms.qlearning import QLearning
from smartstart.smartexploration.smartexploration import generate_smartstart_object
from smartstart.environments.gridworld import GridWorld
from smartstart.environments.gridworldvisualizer import GridWorldVisualizer
from smartstart.utilities.plot import plot_summary, show_plot, \
    mean_reward_episode, steps_episode

# Reset the seed for random number generation
random.seed()
np.random.seed()

# Create environment and visualizer
grid_world = GridWorld.generate(GridWorld.EASY)
visualizer = GridWorldVisualizer(grid_world)
visualizer.add_visualizer(GridWorldVisualizer.LIVE_AGENT,
                          GridWorldVisualizer.CONSOLE,
                          GridWorldVisualizer.VALUE_FUNCTION,
                          GridWorldVisualizer.DENSITY)

# Initialize agent, see class for available parameters
agent = generate_smartstart_object(QLearning,
                                   env=grid_world,
                                   alpha=0.1,
                                   epsilon=0.05,
                                   num_episodes=500,
                                   max_steps=1000,
                                   exploration=QLearning.E_GREEDY)

# Train the agent, summary contains training data
summary = agent.train(render=True,
                      render_episode=False,
                      print_results=True)

# Plot results
plot_summary(summary, mean_reward_episode, ma_window=5,
             title="Easy GridWorld Q-Learning Average Reward per Episode")
plot_summary(summary, steps_episode, ma_window=5,
             title="Easy GridWorld Q-Learning Steps per Episode")
show_plot()

Experimenter¶

import random

import numpy as np

from smartstart.algorithms.qlearning import QLearning
from smartstart.smartexploration.smartexploration import generate_smartstart_object
from smartstart.environments.gridworld import GridWorld
from smartstart.utilities.experimenter import run_experiment
from smartstart.utilities.utilities import get_data_directory

# Get the path to the data folder in the same directory as this file.
# If the folder does not exists it will be created
summary_dir = get_data_directory(__file__)


# Define the task function for the experiment
def task(params):
    # Reset the seed for random number generation
    random.seed()
    np.random.seed()

    # Create environment
    env = GridWorld.generate(GridWorld.MEDIUM)

    # Here we use a dict to define the parameters, this makes it easy to
    # make sure the experiments use the same parameters
    kwargs = {
        'alpha': 0.1,
        'epsilon': 0.05,
        'num_episodes': 1000,
        'max_steps': 2500,
        'exploration': QLearning.E_GREEDY
    }

    # Initialize agent, check params if it needs to use SmartStart or not
    if params['use_smart_start']:
        agent = generate_smartstart_object(QLearning, env, **kwargs)
    else:
        agent = QLearning(env, **kwargs)

    # Train the agent, summary contains training data. Make sure to set the
    # rendering and printing to False when multiple experiments run in
    # parallel. Else it will consume a lot of computation power.
    summary = agent.train(render=False,
                          render_episode=False,
                          print_results=False)

    # Save the summary. The post_fix parameter can be used to create a unique
    #  file name.
    summary.save(directory=summary_dir, post_fix=params['run'])


# Define a parameter grid that can be supplied to the run_experiment method
param_grid = {
    'task': task,
    'num_exp': 5,
    'use_smart_start': [True, False]
}

run_experiment(param_grid, n_processes=-1)

Plotting Results¶

import os

from smartstart.utilities.plot import plot_summary, \
    mean_reward_std_episode, steps_episode, show_plot
from smartstart.utilities.utilities import get_data_directory

# Get directory where the summaries are saved. Since it is the same folder as
#  the experimenter we can use the get_data_directory method
summary_dir = get_data_directory(__file__)

# Define the files list
files = [os.path.join(summary_dir, "QLearning_GridWorldMedium"),
         os.path.join(summary_dir, "SmartStart_QLearning_GridWorldMedium")]

legend = ["Q-Learning", "SmartStart Q-Learning"]

# We are going to save the plots in img folder
output_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'img')

# Plot average reward and standard deviation per episode
# When an output directory is supplied the plots will not be rendered with
# a title. The title is used as filename for the plot.
plot_summary(files,
             mean_reward_std_episode,
             ma_window=5,
             title="Q-Learning GridWorldMedium Average Reward per Episode",
             legend=legend,
             output_dir=output_dir)

plot_summary(files,
             steps_episode,
             ma_window=5,
             title="Q-Learning GridWorldMedium Steps per Episode",
             legend=legend,
             format=".png",
             output_dir=output_dir)

show_plot()