Skip to content

Commit b2983a1

Browse files
committed
Docstring
1 parent 40437df commit b2983a1

4 files changed

Lines changed: 64 additions & 39 deletions

File tree

garage/tf/policies/discrete_qf_derived_policy.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ class DiscreteQfDerivedPolicy(Policy2):
1818
Args:
1919
env_spec (garage.envs.env_spec.EnvSpec): Environment specification.
2020
qf (garage.q_functions.QFunction): The q-function used.
21+
name (str): Name of the policy.
2122
"""
2223

2324
def __init__(self, env_spec, qf, name='DiscreteQfDerivedPolicy'):
@@ -45,11 +46,10 @@ def get_action(self, observation):
4546
Get action from this policy for the input observation.
4647
4748
Args:
48-
observation: Observation from environment.
49-
sess: tf.Session provided.
49+
observation (numpy.ndarray): Observation from environment.
5050
5151
Returns:
52-
opt_action: Optimal action from this policy.
52+
Single optimal action from this policy.
5353
5454
"""
5555
q_vals = self._f_qval([observation])
@@ -63,11 +63,10 @@ def get_actions(self, observations):
6363
Get actions from this policy for the input observations.
6464
6565
Args:
66-
observations: Observations from environment.
67-
sess: tf.Session provided.
66+
observations (numpy.ndarray): Observations from environment.
6867
6968
Returns:
70-
opt_actions: Optimal actions from this policy.
69+
Optimal actions from this policy.
7170
7271
"""
7372
q_vals = self._f_qval(observations)

garage/tf/q_functions/base2.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ class QFunction2:
66
Q-function base class without Parameterzied.
77
88
Args:
9-
name: Name of the Q-fucntion.
9+
name (str): Name of the Q-fucntion, also the variable scope.
1010
1111
"""
1212

@@ -21,7 +21,7 @@ def get_qval_sym(self, *input_phs):
2121
All derived classes should implement this function.
2222
2323
Args:
24-
input_phs: List of tf.Tensor inputs. recommended to be positional
24+
input_phs (list[tf.Tensor]): Recommended to be positional
2525
arguments, e.g. def get_qval_sym(self, state_input, action_input).
2626
"""
2727
raise NotImplementedError
@@ -31,7 +31,7 @@ def clone(self, name):
3131
Return a clone of the Q-function.
3232
3333
Args:
34-
name: Name of the newly created q-function.
34+
name (str): Name of the newly created q-function.
3535
"""
3636
raise NotImplementedError
3737

garage/tf/q_functions/discrete_cnn_q_function.py

Lines changed: 47 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -18,29 +18,53 @@ class DiscreteCNNQFunction(QFunction2):
1818
of Q(s, a).
1919
2020
Args:
21-
env_spec: environment specification
22-
filter_dims: Dimension of the filters.
23-
num_filters: Number of filters.
24-
strides: The strides of the sliding window.
25-
hidden_sizes: Output dimension of dense layer(s).
26-
name: Variable scope of the cnn.
27-
padding: The type of padding algorithm to use, from "SAME", "VALID".
28-
max_pooling: Boolean for using max pooling layer or not.
29-
pool_shape: Dimension of the pooling layer(s).
30-
hidden_nonlinearity: Activation function for
31-
intermediate dense layer(s).
32-
hidden_w_init: Initializer function for the weight
33-
of intermediate dense layer(s).
34-
hidden_b_init: Initializer function for the bias
35-
of intermediate dense layer(s).
36-
output_nonlinearity: Activation function for
37-
output dense layer.
38-
output_w_init: Initializer function for the weight
39-
of output dense layer(s).
40-
output_b_init: Initializer function for the bias
41-
of output dense layer(s).
42-
dueling: Bool for using dueling network or not.
43-
layer_normalization: Bool for using layer normalization or not.
21+
env_spec (garage.envs.env_spec.EnvSpec): Environment specification.
22+
filter_dims (tuple[int]): Dimension of the filters. For example,
23+
(3, 5) means there are two convolutional layers. The filter for
24+
first layer is of dimension (3 x 3) and the second one is of
25+
dimension (5 x 5).
26+
num_filters (tuple[int]): Number of filters. For example, (3, 32) means
27+
there are two convolutional layers. The filter for the first layer
28+
has 3 channels and the second one with 32 channels.
29+
strides (tuple[int]): The stride of the sliding window. For example,
30+
(1, 2) means there are two convolutional layers. The stride of the
31+
filter for first layer is 1 and that of the second layer is 2.
32+
hidden_sizes (list[int]): Output dimension of dense layer(s).
33+
For example, (32, 32) means the MLP of this q-function consists of
34+
two hidden layers, each with 32 hidden units.
35+
name (str): Variable scope of the cnn.
36+
padding (str): The type of padding algorithm to use,
37+
either 'SAME' or 'VALID'.
38+
max_pooling (bool): Boolean for using max pooling layer or not.
39+
pool_shapes (tuple[int]): Dimension of the pooling layer(s). For
40+
example, (2, 2) means that all the pooling layers have
41+
shape (2, 2).
42+
pool_strides (tuple[int]): The strides of the pooling layer(s). For
43+
example, (2, 2) means that all the pooling layers have
44+
strides (2, 2).
45+
cnn_hidden_nonlinearity (callable): Activation function for
46+
intermediate dense layer(s) in the CNN. It should return a
47+
tf.Tensor. Set it to None to maintain a linear activation.
48+
hidden_nonlinearity (callable): Activation function for intermediate
49+
dense layer(s) in the MLP. It should return a tf.Tensor. Set it to
50+
None to maintain a linear activation.
51+
hidden_w_init (callable): Initializer function for the weight
52+
of intermediate dense layer(s) in the MLP. The function should
53+
return a tf.Tensor.
54+
hidden_b_init (callable): Initializer function for the bias
55+
of intermediate dense layer(s) in the MLP. The function should
56+
return a tf.Tensor.
57+
output_nonlinearity (callable): Activation function for output dense
58+
layer in the MLP. It should return a tf.Tensor. Set it to None
59+
to maintain a linear activation.
60+
output_w_init (callable): Initializer function for the weight
61+
of output dense layer(s) in the MLP. The function should return
62+
a tf.Tensor.
63+
output_b_init (callable): Initializer function for the bias
64+
of output dense layer(s) in the MLP. The function should return
65+
a tf.Tensor.
66+
dueling (bool): Bool for using dueling network or not.
67+
layer_normalization (bool): Bool for using layer normalization or not.
4468
"""
4569

4670
def __init__(self,

garage/tf/q_functions/discrete_mlp_q_function.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,11 @@ class DiscreteMLPQFunction(QFunction2):
1515
input state and action. It uses an MLP to fit the function Q(s, a).
1616
1717
Args:
18-
env_spec: Environment specification.
19-
name: Name of the q-function, also serves as the variable scope.
20-
hidden_sizes: Output dimension of dense layer(s).
18+
env_spec (garage.envs.env_spec.EnvSpec): Environment specification.
19+
name (str): Name of the q-function, also serves as the variable scope.
20+
hidden_sizes (list[int]): Output dimension of dense layer(s).
21+
For example, (32, 32) means the MLP of this q-function consists of
22+
two hidden layers, each with 32 hidden units.
2123
hidden_nonlinearity (callable): Activation function for intermediate
2224
dense layer(s). It should return a tf.Tensor. Set it to
2325
None to maintain a linear activation.
@@ -36,7 +38,7 @@ class DiscreteMLPQFunction(QFunction2):
3638
output_b_init (callable): Initializer function for the bias
3739
of output dense layer(s). The function should return a
3840
tf.Tensor.
39-
layer_normalization: Bool for using layer normalization or not.
41+
layer_normalization (bool): Bool for using layer normalization.
4042
"""
4143

4244
def __init__(self,
@@ -114,8 +116,8 @@ def get_qval_sym(self, state_input, name):
114116
Symbolic graph for q-network.
115117
116118
Args:
117-
state_input: The state input tf.Tensor to the network.
118-
name: Network variable scope.
119+
state_input (tf.Tensor): The state input tf.Tensor to the network.
120+
name (str): Network variable scope.
119121
120122
Return:
121123
The tf.Tensor output of Discrete MLP QFunction.
@@ -128,7 +130,7 @@ def clone(self, name):
128130
Return a clone of the Q-function.
129131
130132
Args:
131-
name: Name of the newly created q-function.
133+
name (str): Name of the newly created q-function.
132134
"""
133135
return self.__class__(
134136
name=name,

0 commit comments

Comments
 (0)