Docstring

ahtsan · ahtsan · commit b2983a148f86 · 2019-05-09T00:01:26.000-07:00
diff --git a/garage/tf/policies/discrete_qf_derived_policy.py b/garage/tf/policies/discrete_qf_derived_policy.py
@@ -18,6 +18,7 @@ class DiscreteQfDerivedPolicy(Policy2):
     Args:
         env_spec (garage.envs.env_spec.EnvSpec): Environment specification.
         qf (garage.q_functions.QFunction): The q-function used.
+        name (str): Name of the policy.
     """
 
     def __init__(self, env_spec, qf, name='DiscreteQfDerivedPolicy'):
@@ -45,11 +46,10 @@ def get_action(self, observation):
         Get action from this policy for the input observation.
 
         Args:
-            observation: Observation from environment.
-            sess: tf.Session provided.
+            observation (numpy.ndarray): Observation from environment.
 
         Returns:
-            opt_action: Optimal action from this policy.
+            Single optimal action from this policy.
 
         """
         q_vals = self._f_qval([observation])
@@ -63,11 +63,10 @@ def get_actions(self, observations):
         Get actions from this policy for the input observations.
 
         Args:
-            observations: Observations from environment.
-            sess: tf.Session provided.
+            observations (numpy.ndarray): Observations from environment.
 
         Returns:
-            opt_actions: Optimal actions from this policy.
+            Optimal actions from this policy.
 
         """
         q_vals = self._f_qval(observations)
diff --git a/garage/tf/q_functions/base2.py b/garage/tf/q_functions/base2.py
@@ -6,7 +6,7 @@ class QFunction2:
     Q-function base class without Parameterzied.
 
     Args:
-        name: Name of the Q-fucntion.
+        name (str): Name of the Q-fucntion, also the variable scope.
 
     """
 
@@ -21,7 +21,7 @@ def get_qval_sym(self, *input_phs):
         All derived classes should implement this function.
 
         Args:
-            input_phs: List of tf.Tensor inputs. recommended to be positional
+            input_phs (list[tf.Tensor]): Recommended to be positional
             arguments, e.g. def get_qval_sym(self, state_input, action_input).
         """
         raise NotImplementedError
@@ -31,7 +31,7 @@ def clone(self, name):
         Return a clone of the Q-function.
 
         Args:
-            name: Name of the newly created q-function.
+            name (str): Name of the newly created q-function.
         """
         raise NotImplementedError
 
diff --git a/garage/tf/q_functions/discrete_cnn_q_function.py b/garage/tf/q_functions/discrete_cnn_q_function.py
@@ -18,29 +18,53 @@ class DiscreteCNNQFunction(QFunction2):
     of Q(s, a).
 
     Args:
-        env_spec: environment specification
-        filter_dims: Dimension of the filters.
-        num_filters: Number of filters.
-        strides: The strides of the sliding window.
-        hidden_sizes: Output dimension of dense layer(s).
-        name: Variable scope of the cnn.
-        padding: The type of padding algorithm to use, from "SAME", "VALID".
-        max_pooling: Boolean for using max pooling layer or not.
-        pool_shape: Dimension of the pooling layer(s).
-        hidden_nonlinearity: Activation function for
-                    intermediate dense layer(s).
-        hidden_w_init: Initializer function for the weight
-                    of intermediate dense layer(s).
-        hidden_b_init: Initializer function for the bias
-                    of intermediate dense layer(s).
-        output_nonlinearity: Activation function for
-                    output dense layer.
-        output_w_init: Initializer function for the weight
-                    of output dense layer(s).
-        output_b_init: Initializer function for the bias
-                    of output dense layer(s).
-        dueling: Bool for using dueling network or not.
-        layer_normalization: Bool for using layer normalization or not.
+        env_spec (garage.envs.env_spec.EnvSpec): Environment specification.
+        filter_dims (tuple[int]): Dimension of the filters. For example,
+            (3, 5) means there are two convolutional layers. The filter for
+            first layer is of dimension (3 x 3) and the second one is of
+            dimension (5 x 5).
+        num_filters (tuple[int]): Number of filters. For example, (3, 32) means
+            there are two convolutional layers. The filter for the first layer
+            has 3 channels and the second one with 32 channels.
+        strides (tuple[int]): The stride of the sliding window. For example,
+            (1, 2) means there are two convolutional layers. The stride of the
+            filter for first layer is 1 and that of the second layer is 2.
+        hidden_sizes (list[int]): Output dimension of dense layer(s).
+            For example, (32, 32) means the MLP of this q-function consists of
+            two hidden layers, each with 32 hidden units.
+        name (str): Variable scope of the cnn.
+        padding (str): The type of padding algorithm to use,
+            either 'SAME' or 'VALID'.
+        max_pooling (bool): Boolean for using max pooling layer or not.
+        pool_shapes (tuple[int]): Dimension of the pooling layer(s). For
+            example, (2, 2) means that all the pooling layers have
+            shape (2, 2).
+        pool_strides (tuple[int]): The strides of the pooling layer(s). For
+            example, (2, 2) means that all the pooling layers have
+            strides (2, 2).
+        cnn_hidden_nonlinearity (callable): Activation function for
+            intermediate dense layer(s) in the CNN. It should return a
+            tf.Tensor. Set it to None to maintain a linear activation.
+        hidden_nonlinearity (callable): Activation function for intermediate
+            dense layer(s) in the MLP. It should return a tf.Tensor. Set it to
+            None to maintain a linear activation.
+        hidden_w_init (callable): Initializer function for the weight
+            of intermediate dense layer(s) in the MLP. The function should
+            return a tf.Tensor.
+        hidden_b_init (callable): Initializer function for the bias
+            of intermediate dense layer(s) in the MLP. The function should
+            return a tf.Tensor.
+        output_nonlinearity (callable): Activation function for output dense
+            layer in the MLP. It should return a tf.Tensor. Set it to None
+            to maintain a linear activation.
+        output_w_init (callable): Initializer function for the weight
+            of output dense layer(s) in the MLP. The function should return
+            a tf.Tensor.
+        output_b_init (callable): Initializer function for the bias
+            of output dense layer(s) in the MLP. The function should return
+            a tf.Tensor.
+        dueling (bool): Bool for using dueling network or not.
+        layer_normalization (bool): Bool for using layer normalization or not.
     """
 
     def __init__(self,
diff --git a/garage/tf/q_functions/discrete_mlp_q_function.py b/garage/tf/q_functions/discrete_mlp_q_function.py
@@ -15,9 +15,11 @@ class DiscreteMLPQFunction(QFunction2):
     input state and action. It uses an MLP to fit the function Q(s, a).
 
     Args:
-        env_spec: Environment specification.
-        name: Name of the q-function, also serves as the variable scope.
-        hidden_sizes: Output dimension of dense layer(s).
+        env_spec (garage.envs.env_spec.EnvSpec): Environment specification.
+        name (str): Name of the q-function, also serves as the variable scope.
+        hidden_sizes (list[int]): Output dimension of dense layer(s).
+            For example, (32, 32) means the MLP of this q-function consists of
+            two hidden layers, each with 32 hidden units.
         hidden_nonlinearity (callable): Activation function for intermediate
             dense layer(s). It should return a tf.Tensor. Set it to
             None to maintain a linear activation.
@@ -36,7 +38,7 @@ class DiscreteMLPQFunction(QFunction2):
         output_b_init (callable): Initializer function for the bias
             of output dense layer(s). The function should return a
             tf.Tensor.
-        layer_normalization: Bool for using layer normalization or not.
+        layer_normalization (bool): Bool for using layer normalization.
     """
 
     def __init__(self,
@@ -114,8 +116,8 @@ def get_qval_sym(self, state_input, name):
         Symbolic graph for q-network.
 
         Args:
-            state_input: The state input tf.Tensor to the network.
-            name: Network variable scope.
+            state_input (tf.Tensor): The state input tf.Tensor to the network.
+            name (str): Network variable scope.
 
         Return:
             The tf.Tensor output of Discrete MLP QFunction.
@@ -128,7 +130,7 @@ def clone(self, name):
         Return a clone of the Q-function.
 
         Args:
-            name: Name of the newly created q-function.
+            name (str): Name of the newly created q-function.
         """
         return self.__class__(
             name=name,