Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions src/garage/tf/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from garage.tf.models.base import Model
from garage.tf.models.cnn_model import CNNModel
from garage.tf.models.cnn_model_max_pooling import CNNModelWithMaxPooling
from garage.tf.models.gaussian_lstm_model import GaussianLSTMModel
from garage.tf.models.gaussian_mlp_model import GaussianMLPModel
from garage.tf.models.gru_model import GRUModel
from garage.tf.models.lstm_model import LSTMModel
Expand All @@ -12,6 +13,6 @@

__all__ = [
'CNNModel', 'CNNModelWithMaxPooling', 'LSTMModel', 'Model',
'GaussianMLPModel', 'GRUModel', 'MLPDuelingModel', 'MLPModel',
'NormalizedInputMLPModel', 'Sequential'
'GaussianLSTMModel', 'GaussianMLPModel', 'GRUModel', 'MLPDuelingModel',
'MLPModel', 'NormalizedInputMLPModel', 'Sequential'
]
234 changes: 234 additions & 0 deletions src/garage/tf/models/gaussian_lstm_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,234 @@
"""GaussianLSTMModel."""
import numpy as np
import tensorflow as tf

from garage.tf.core.lstm import lstm
from garage.tf.core.parameter import parameter
from garage.tf.distributions import DiagonalGaussian
from garage.tf.models import Model


class GaussianLSTMModel(Model):
"""
GaussianLSTMModel.

Args:
output_dim (int): Output dimension of the model.
hidden_dim (int): Hidden dimension for LSTM cell for mean.
name (str): Model name, also the variable scope.
hidden_nonlinearity (callable): Activation function for intermediate
dense layer(s). It should return a tf.Tensor. Set it to
None to maintain a linear activation.
hidden_w_init (callable): Initializer function for the weight
of intermediate dense layer(s). The function should return a
tf.Tensor.
hidden_b_init (callable): Initializer function for the bias
of intermediate dense layer(s). The function should return a
tf.Tensor.
recurrent_nonlinearity (callable): Activation function for recurrent
layers. It should return a tf.Tensor. Set it to None to
maintain a linear activation.
recurrent_w_init (callable): Initializer function for the weight
Comment thread
ryanjulian marked this conversation as resolved.
of recurrent layer(s). The function should return a
tf.Tensor.
output_nonlinearity (callable): Activation function for output dense
layer. It should return a tf.Tensor. Set it to None to
maintain a linear activation.
output_w_init (callable): Initializer function for the weight
of output dense layer(s). The function should return a
tf.Tensor.
output_b_init (callable): Initializer function for the bias
of output dense layer(s). The function should return a
tf.Tensor.
hidden_state_init (callable): Initializer function for the
initial hidden state. The functino should return a tf.Tensor.
hidden_state_init_trainable (bool): Bool for whether the initial
hidden state is trainable.
cell_state_init (callable): Initializer function for the
initial cell state. The functino should return a tf.Tensor.
cell_state_init_trainable (bool): Bool for whether the initial
cell state is trainable.
forget_bias (bool): If True, add 1 to the bias of the forget gate at
initialization. It's used to reduce the scale of forgetting at the
beginning of the training.
learn_std (bool): Is std trainable.
init_std (float): Initial value for std.
std_share_network (bool): Boolean for whether mean and std share
the same network.
layer_normalization (bool): Bool for using layer normalization or not.
"""

def __init__(self,
output_dim,
hidden_dim=32,
name=None,
hidden_nonlinearity=tf.nn.tanh,
hidden_w_init=tf.glorot_uniform_initializer(),
hidden_b_init=tf.zeros_initializer(),
recurrent_nonlinearity=tf.nn.sigmoid,
recurrent_w_init=tf.glorot_uniform_initializer(),
output_nonlinearity=None,
output_w_init=tf.glorot_uniform_initializer(),
output_b_init=tf.zeros_initializer(),
hidden_state_init=tf.zeros_initializer(),
hidden_state_init_trainable=False,
cell_state_init=tf.zeros_initializer(),
cell_state_init_trainable=False,
forget_bias=True,
learn_std=True,
init_std=1.0,
std_share_network=False,
layer_normalization=False):
super().__init__(name)
self._output_dim = output_dim
self._hidden_dim = hidden_dim
self._hidden_nonlinearity = hidden_nonlinearity
self._hidden_w_init = hidden_w_init
self._hidden_b_init = hidden_b_init
self._recurrent_nonlinearity = recurrent_nonlinearity
self._recurrent_w_init = recurrent_w_init
self._output_nonlinearity = output_nonlinearity
self._output_w_init = output_w_init
self._output_b_init = output_b_init
self._hidden_state_init = hidden_state_init
self._hidden_state_init_trainable = hidden_state_init_trainable
self._cell_state_init = cell_state_init
self._cell_state_init_trainable = cell_state_init_trainable
self._forget_bias = forget_bias
self._layer_normalization = layer_normalization
self._learn_std = learn_std
self._std_share_network = std_share_network
self._init_std_param = np.log(init_std)
self._initialize()

def _initialize(self):
action_dim = self._output_dim
self._mean_std_lstm_cell = tf.keras.layers.LSTMCell(
units=self._hidden_dim,
activation=self._hidden_nonlinearity,
kernel_initializer=self._hidden_w_init,
bias_initializer=self._hidden_b_init,
recurrent_activation=self._recurrent_nonlinearity,
recurrent_initializer=self._recurrent_w_init,
unit_forget_bias=self._forget_bias,
name='mean_std_lstm_layer')
self._mean_lstm_cell = tf.keras.layers.LSTMCell(
units=self._hidden_dim,
activation=self._hidden_nonlinearity,
kernel_initializer=self._hidden_w_init,
bias_initializer=self._hidden_b_init,
recurrent_activation=self._recurrent_nonlinearity,
recurrent_initializer=self._recurrent_w_init,
unit_forget_bias=self._forget_bias,
name='mean_lstm_layer')
self._mean_std_output_nonlinearity_layer = tf.keras.layers.Dense(
units=action_dim * 2,
activation=self._output_nonlinearity,
kernel_initializer=self._output_w_init,
bias_initializer=self._output_b_init,
name='mean_std_output_layer')
self._mean_output_nonlinearity_layer = tf.keras.layers.Dense(
units=action_dim,
activation=self._output_nonlinearity,
kernel_initializer=self._output_w_init,
bias_initializer=self._output_b_init,
name='mean_output_layer')

def network_input_spec(self):
"""Network input spec."""
return [
'full_input', 'step_input', 'step_hidden_input', 'step_cell_input'
]

def network_output_spec(self):
"""Network output spec."""
return [
'sample', 'mean', 'step_mean', 'log_std', 'step_log_std',
'step_hidden', 'step_cell', 'init_hidden', 'init_cell', 'dist'
]

def _build(self,
state_input,
step_input,
hidden_input,
cell_input,
name=None):
action_dim = self._output_dim

with tf.variable_scope('dist_params'):
if self._std_share_network:
# mean and std networks share an MLP
(outputs, step_outputs, step_hidden, step_cell,
hidden_init_var, cell_init_var) = lstm(
name='mean_std_network',
lstm_cell=self._mean_std_lstm_cell,
all_input_var=state_input,
step_input_var=step_input,
step_hidden_var=hidden_input,
step_cell_var=cell_input,
hidden_state_init=self._hidden_state_init,
hidden_state_init_trainable=self.
_hidden_state_init_trainable,
cell_state_init=self._cell_state_init,
cell_state_init_trainable=self._cell_state_init_trainable,
output_nonlinearity_layer=self.
_mean_std_output_nonlinearity_layer)
with tf.variable_scope('mean_network'):
mean_var = outputs[..., :action_dim]
step_mean_var = step_outputs[..., :action_dim]
with tf.variable_scope('log_std_network'):
log_std_var = outputs[..., action_dim:]
step_log_std_var = step_outputs[..., action_dim:]

else:
# separate MLPs for mean and std networks
# mean network
(mean_var, step_mean_var, step_hidden, step_cell,
hidden_init_var, cell_init_var) = lstm(
name='mean_network',
lstm_cell=self._mean_lstm_cell,
all_input_var=state_input,
step_input_var=step_input,
step_hidden_var=hidden_input,
step_cell_var=cell_input,
hidden_state_init=self._hidden_state_init,
hidden_state_init_trainable=self.
_hidden_state_init_trainable,
cell_state_init=self._cell_state_init,
cell_state_init_trainable=self._cell_state_init_trainable,
output_nonlinearity_layer=self.
_mean_output_nonlinearity_layer)
log_std_var = parameter(
state_input,
length=action_dim,
initializer=tf.constant_initializer(self._init_std_param),
trainable=self._learn_std,
name='log_std_param')
step_log_std_var = parameter(
step_input,
length=action_dim,
initializer=tf.constant_initializer(self._init_std_param),
trainable=self._learn_std,
name='step_log_std_param')

dist = DiagonalGaussian(self._output_dim)
rnd = tf.random.normal(shape=step_mean_var.get_shape().as_list()[1:])
action_var = rnd * tf.exp(step_log_std_var) + step_mean_var

return (action_var, mean_var, step_mean_var, log_std_var,
step_log_std_var, step_hidden, step_cell, hidden_init_var,
cell_init_var, dist)

def __getstate__(self):
"""Object.__getstate__."""
new_dict = super().__getstate__()
del new_dict['_mean_std_lstm_cell']
del new_dict['_mean_lstm_cell']
del new_dict['_mean_std_output_nonlinearity_layer']
del new_dict['_mean_output_nonlinearity_layer']
return new_dict

def __setstate__(self, state):
"""Object.__setstate__."""
super().__setstate__(state)
self._initialize()
5 changes: 4 additions & 1 deletion src/garage/tf/policies/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
DiscreteQfDerivedPolicy)
from garage.tf.policies.gaussian_gru_policy import GaussianGRUPolicy
from garage.tf.policies.gaussian_lstm_policy import GaussianLSTMPolicy
from garage.tf.policies.gaussian_lstm_policy_with_model import (
GaussianLSTMPolicyWithModel)
from garage.tf.policies.gaussian_mlp_policy import GaussianMLPPolicy
from garage.tf.policies.gaussian_mlp_policy_with_model import (
GaussianMLPPolicyWithModel)
Expand All @@ -32,5 +34,6 @@
'CategoricalMLPPolicyWithModel', 'ContinuousMLPPolicy',
'DiscreteQfDerivedPolicy', 'DeterministicMLPPolicy',
'DeterministicMLPPolicyWithModel', 'GaussianGRUPolicy',
'GaussianLSTMPolicy', 'GaussianMLPPolicy', 'GaussianMLPPolicyWithModel'
'GaussianLSTMPolicy', 'GaussianLSTMPolicyWithModel', 'GaussianMLPPolicy',
'GaussianMLPPolicyWithModel'
]
16 changes: 14 additions & 2 deletions src/garage/tf/policies/gaussian_lstm_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,16 @@ def __init__(
env_spec,
name='GaussianLSTMPolicy',
hidden_dim=32,
hidden_nonlinearity=tf.tanh,
recurrent_nonlinearity=tf.nn.sigmoid,
recurrent_w_x_init=L.XavierUniformInitializer(),
recurrent_w_h_init=L.OrthogonalInitializer(),
output_nonlinearity=None,
output_w_init=L.XavierUniformInitializer(),
feature_network=None,
state_include_action=True,
hidden_nonlinearity=tf.tanh,
learn_std=True,
init_std=1.0,
output_nonlinearity=None,
lstm_layer_cls=L.LSTMLayer,
use_peepholes=False,
std_share_network=False,
Expand Down Expand Up @@ -79,7 +83,11 @@ def __init__(
output_dim=2 * action_dim,
hidden_dim=hidden_dim,
hidden_nonlinearity=hidden_nonlinearity,
recurrent_nonlinearity=recurrent_nonlinearity,
recurrent_w_x_init=recurrent_w_x_init,
recurrent_w_h_init=recurrent_w_h_init,
output_nonlinearity=output_nonlinearity,
output_w_init=output_w_init,
lstm_layer_cls=lstm_layer_cls,
name='lstm_mean_network',
use_peepholes=use_peepholes,
Expand Down Expand Up @@ -115,7 +123,11 @@ def __init__(
output_dim=action_dim,
hidden_dim=hidden_dim,
hidden_nonlinearity=hidden_nonlinearity,
recurrent_nonlinearity=recurrent_nonlinearity,
recurrent_w_x_init=recurrent_w_x_init,
recurrent_w_h_init=recurrent_w_h_init,
output_nonlinearity=output_nonlinearity,
output_w_init=output_w_init,
lstm_layer_cls=lstm_layer_cls,
name='lstm_mean_network',
use_peepholes=use_peepholes,
Expand Down
Loading