agi-brain
diff --git a/‎xuance/configs/ippo/atari.yaml‎
Lines changed: 76 additions & 0 deletions b/‎xuance/configs/ippo/atari.yaml‎
Lines changed: 76 additions & 0 deletions
diff --git a/‎xuance/configs/iql/atari.yaml‎
Lines changed: 0 additions & 2 deletions b/‎xuance/configs/iql/atari.yaml‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎xuance/configs/mappo/atari.yaml‎
Lines changed: 76 additions & 0 deletions b/‎xuance/configs/mappo/atari.yaml‎
Lines changed: 76 additions & 0 deletions
diff --git a/‎xuance/configs/qmix/atari.yaml‎
Lines changed: 60 additions & 0 deletions b/‎xuance/configs/qmix/atari.yaml‎
Lines changed: 60 additions & 0 deletions
diff --git a/‎xuance/configs/vdac/atari.yaml‎
Lines changed: 78 additions & 0 deletions b/‎xuance/configs/vdac/atari.yaml‎
Lines changed: 78 additions & 0 deletions
diff --git a/‎xuance/configs/vdn/atari.yaml‎
Lines changed: 0 additions & 2 deletions b/‎xuance/configs/vdn/atari.yaml‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎xuance/configs/wqmix/atari.yaml‎
Lines changed: 63 additions & 0 deletions b/‎xuance/configs/wqmix/atari.yaml‎
Lines changed: 63 additions & 0 deletions
@@ -0,0 +1,76 @@
+agent: "IPPO"  # The agent name.
+env_name: "atari"  # Name of the environment.
+env_id: "basketball_pong_v3"
+env_seed: 1  # The random seed of the environment.
+obs_type: "grayscale_image"  # choice for Atari env: ram, rgb_image, grayscale_image
+img_size: [84, 84]  # default is 210 x 160 in gym[Atari]
+num_stack: 4  # frame stack trick
+frame_skip: 4  # frame skip trick
+noop_max: 30  # Do no-op action for a number of steps in [1, noop_max].
+learner: "IPPO_Learner"  # The learner name.
+policy: "Categorical_MAAC_Policy"  # The policy name.
+representation: "Basic_CNN"  # The representation name.
+vectorize: "SubprocVecMultiAgentEnv"  # The method to vectorize your environment such that can run in parallel.
+runner: "MARL"  # The runner.
+
+# the following three arguments are for "Basic_CNN" representation.
+use_cnn: True  # Whether to use convolutional neural networks as representation.
+filters: [32, 32, 64, 64]
+kernels: [8, 4, 4, 4]
+strides: [4, 2, 2, 2]
+
+# recurrent settings for Basic_RNN representation.
+use_rnn: False  # If to use recurrent neural network as representation. (The representation should be "Basic_RNN").
+rnn: "GRU"  # The type of recurrent layer.
+fc_hidden_sizes: [64, 64, 64]  # The hidden size of feed forward layer in RNN representation.
+recurrent_hidden_size: 64  # The hidden size of the recurrent layer.
+N_recurrent_layers: 1  # The number of recurrent layer.
+dropout: 0  # dropout should be a number in range [0, 1], the probability of an element being zeroed.
+normalize: "LayerNorm"  # Layer normalization.
+initialize: "orthogonal"  # Network initializer.
+gain: 0.01  # Gain value for network initialization.
+
+representation_hidden_size: [64, ]  # A list of hidden units for each layer of Basic_MLP representation networks.
+actor_hidden_size: [64, ]  # A list of hidden units for each layer of actor network.
+critic_hidden_size: [64, ]  # A list of hidden units for each layer of critic network.
+activation: "relu"  # The activation function of each hidden layer.
+activation_action: "sigmoid"  # The activation function for the last layer of the actor.
+use_parameter_sharing: True  # If to use parameter sharing for all agents' policies.
+use_actions_mask: False  # If to use actions mask for unavailable actions.
+
+seed: 1  # Random seed.
+parallels: 16  # The number of environments to run in parallel.
+buffer_size: 3200  # Number of the transitions (use_rnn is False), or the episodes (use_rnn is True) in replay buffer.
+n_epochs: 10  # Number of epochs to train.
+n_minibatch: 1 # Number of minibatch to sample and train.  batch_size = buffer_size // n_minibatch.
+learning_rate: 0.0007  # Learning rate.
+weight_decay: 0  # The steps to decay the greedy epsilon.
+
+vf_coef: 0.5  # Coefficient factor for critic loss.
+ent_coef: 0.01  # Coefficient factor for entropy loss.
+target_kl: 0.25  # For MAPPO_KL learner.
+clip_range: 0.2  # The clip range for ratio in MAPPO_Clip learner.
+gamma: 0.99  # Discount factor.
+
+# tricks
+use_linear_lr_decay: False  # If to use linear learning rate decay.
+end_factor_lr_decay: 0.5  # The end factor for learning rate scheduler.
+use_global_state: False  # If to use global state to replace merged observations.
+use_value_clip: True  # Limit the value range.
+value_clip_range: 0.2  # The value clip range.
+use_value_norm: True  # Use running mean and std to normalize rewards.
+use_huber_loss: True  # True: use huber loss; False: use MSE loss.
+huber_delta: 10.0  # The threshold at which to change between delta-scaled L1 and L2 loss. (For huber loss).
+use_advnorm: True  # If to use advantage normalization.
+use_gae: True  # Use GAE trick.
+gae_lambda: 0.95  # The GAE lambda.
+use_grad_clip: True  # Gradient normalization.
+grad_clip_norm: 10.0  # The max norm of the gradient.
+clip_type: 1  # Gradient clip for Mindspore: 0: ms.ops.clip_by_value; 1: ms.nn.ClipByNorm().
+
+running_steps: 10000000  # The total running steps.
+eval_interval: 100000  # The interval between every two trainings.
+test_episode: 5  # The episodes to test in each test period.
+
+log_dir: "logs/ippo/"
+model_dir: "models/ippo/"
@@ -18,8 +18,6 @@ use_cnn: True  # Whether to use convolutional neural networks as representation.
 filters: [32, 32, 64, 64]
 kernels: [8, 4, 4, 4]
 strides: [4, 2, 2, 2]
-actor_hidden_size: [128, 128]  # A list of hidden units for each layer of actor network.
-critic_hidden_size: [128, 128]  # A list of hidden units for each layer of critic network.
 
 use_rnn: False  # Whether to use recurrent neural networks.
 rnn: "GRU"  # Choice of recurrent networks: GRU or LSTM.
 
@@ -0,0 +1,76 @@
+agent: "MAPPO"  # The agent name.
+env_name: "atari"  # Name of the environment.
+env_id: "basketball_pong_v3"
+env_seed: 1  # The random seed of the environment.
+obs_type: "grayscale_image"  # choice for Atari env: ram, rgb_image, grayscale_image
+img_size: [84, 84]  # default is 210 x 160 in gym[Atari]
+num_stack: 4  # frame stack trick
+frame_skip: 4  # frame skip trick
+noop_max: 30  # Do no-op action for a number of steps in [1, noop_max].
+learner: "MAPPO_Clip_Learner"
+policy: "Categorical_MAAC_Policy"  # The policy name.
+representation: "Basic_CNN"  # The representation name.
+vectorize: "SubprocVecMultiAgentEnv"  # The method to vectorize your environment such that can run in parallel.
+runner: "MARL"  # The runner.
+
+# the following three arguments are for "Basic_CNN" representation.
+use_cnn: True  # Whether to use convolutional neural networks as representation.
+filters: [32, 32, 64, 64]
+kernels: [8, 4, 4, 4]
+strides: [4, 2, 2, 2]
+
+# recurrent settings for Basic_RNN representation.
+use_rnn: False  # If to use recurrent neural network as representation. (The representation should be "Basic_RNN").
+rnn: "GRU"  # The type of recurrent layer.
+fc_hidden_sizes: [64, 64, 64]  # The hidden size of feed forward layer in RNN representation.
+recurrent_hidden_size: 64  # The hidden size of the recurrent layer.
+N_recurrent_layers: 1  # The number of recurrent layer.
+dropout: 0  # dropout should be a number in range [0, 1], the probability of an element being zeroed.
+normalize: "LayerNorm"  # Layer normalization.
+initialize: "orthogonal"  # Network initializer.
+gain: 0.01
+
+representation_hidden_size: [64, ]  # A list of hidden units for each layer of Basic_MLP representation networks.
+actor_hidden_size: [64, ]  # A list of hidden units for each layer of actor network.
+critic_hidden_size: [64, ]  # A list of hidden units for each layer of critic network.
+activation: "relu"  # The activation function of each hidden layer.
+activation_action: "sigmoid"  # The activation function for the last layer of the actor.
+use_parameter_sharing: True  # If to use parameter sharing for all agents' policies.
+use_actions_mask: False  # If to use actions mask for unavailable actions.
+
+seed: 1  # Random seed.
+parallels: 16  # The number of environments to run in parallel.
+buffer_size: 400  # Number of the transitions (use_rnn is False), or the episodes (use_rnn is True) in replay buffer.
+n_epochs: 1  # Number of epochs to train.
+n_minibatch: 1  # Number of minibatch to sample and train.  batch_size = buffer_size // n_minibatch.
+learning_rate: 0.0007  # Learning rate.
+weight_decay: 0  # The steps to decay the greedy epsilon.
+
+vf_coef: 0.5  # Coefficient factor for critic loss.
+ent_coef: 0.01  # Coefficient factor for entropy loss.
+target_kl: 0.25  # For MAPPO_KL learner.
+clip_range: 0.2  # Ratio clip range, for MAPPO_Clip learner.
+clip_type: 1  # Gradient clip for Mindspore: 0: ms.ops.clip_by_value; 1: ms.nn.ClipByNorm().
+gamma: 0.95  # Discount factor.
+
+# tricks
+use_linear_lr_decay: False  # If to use linear learning rate decay.
+end_factor_lr_decay: 0.5  # The end factor for learning rate scheduler.
+use_global_state: False  # If to use global state to replace merged observations.
+use_value_clip: True  # Limit the value range.
+value_clip_range: 0.2  # The value clip range.
+use_value_norm: True  # Use running mean and std to normalize rewards.
+use_huber_loss: True  # True: use huber loss; False: use MSE loss.
+huber_delta: 10.0  # The threshold at which to change between delta-scaled L1 and L2 loss. (For huber loss).
+use_advnorm: True  # If to use advantage normalization.
+use_gae: True  # Use GAE trick.
+gae_lambda: 0.95  # The GAE lambda.
+use_grad_clip: True  # Gradient normalization.
+grad_clip_norm: 10.0  # The max norm of the gradient.
+
+running_steps: 10000000  # The total running steps.
+eval_interval: 100000  # The interval between every two trainings.
+test_episode: 5  # The episodes to test in each test period.
+
+log_dir: "logs/mappo/"
+model_dir: "models/mappo/"
@@ -0,0 +1,60 @@
+agent: "QMIX"  # the learning algorithms_marl
+env_name: "atari"  # Name of the environment.
+env_id: "basketball_pong_v3"
+env_seed: 1  # The random seed of the environment.
+obs_type: "grayscale_image"  # choice for Atari env: ram, rgb_image, grayscale_image
+img_size: [84, 84]  # default is 210 x 160 in gym[Atari]
+num_stack: 4  # frame stack trick
+frame_skip: 4  # frame skip trick
+noop_max: 30  # Do no-op action for a number of steps in [1, noop_max].
+learner: "QMIX_Learner"
+policy: "Mixing_Q_network"
+representation: "Basic_CNN"
+vectorize: "DummyVecMultiAgentEnv"
+runner: "MARL"  # Runner
+
+# the following three arguments are for "Basic_CNN" representation.
+use_cnn: True  # Whether to use convolutional neural networks as representation.
+filters: [32, 32, 64, 64]
+kernels: [8, 4, 4, 4]
+strides: [4, 2, 2, 2]
+
+use_rnn: False  # Whether to use recurrent neural networks.
+rnn: "GRU"  # Choice of recurrent networks: GRU or LSTM.
+N_recurrent_layers: 1  # Number of recurrent layers.
+fc_hidden_sizes: [64, ]
+recurrent_hidden_size: 64
+dropout: 0  # dropout should be a number in range [0, 1], the probability of an element being zeroed.
+
+representation_hidden_size: [64, ]
+q_hidden_size: [64, ]  # the units for each hidden layer
+activation: "relu"  # The activation function of each hidden layer.
+use_parameter_sharing: False
+use_actions_mask: False
+
+hidden_dim_mixing_net: 128  # hidden units of mixing network
+hidden_dim_hyper_net: 128  # hidden units of hyper network
+
+seed: 1069
+parallels: 5
+buffer_size: 500000
+batch_size: 32
+learning_rate: 0.0001
+gamma: 0.99  # discount factor
+double_q: True  # use double q learning
+
+start_greedy: 0.5
+end_greedy: 0.05
+decay_step_greedy: 1000000
+start_training: 100  # start training after n steps
+running_steps: 50000000  # 10M
+training_frequency: 100
+sync_frequency: 500
+
+use_grad_clip: False
+grad_clip_norm: 0.5
+
+eval_interval: 500000
+test_episode: 5
+log_dir: "logs/qmix/"
+model_dir: "models/qmix/"
@@ -0,0 +1,78 @@
+agent: "VDAC"
+env_name: "atari"  # Name of the environment.
+env_id: "basketball_pong_v3"
+env_seed: 1  # The random seed of the environment.
+obs_type: "grayscale_image"  # choice for Atari env: ram, rgb_image, grayscale_image
+img_size: [84, 84]  # default is 210 x 160 in gym[Atari]
+num_stack: 4  # frame stack trick
+frame_skip: 4  # frame skip trick
+noop_max: 30  # Do no-op action for a number of steps in [1, noop_max].
+learner: "VDAC_Learner"
+policy: "Categorical_MAAC_Policy"
+representation: "Basic_CNN"
+vectorize: "DummyVecMultiAgentEnv"
+runner: "MARL"  # Runner
+
+# the following three arguments are for "Basic_CNN" representation.
+use_cnn: True  # Whether to use convolutional neural networks as representation.
+filters: [32, 32, 64, 64]
+kernels: [8, 4, 4, 4]
+strides: [4, 2, 2, 2]
+
+# recurrent settings for Basic_RNN representation
+use_rnn: False  # Whether to use recurrent neural networks.
+rnn: "GRU"  # The type of recurrent layer.
+fc_hidden_sizes: [64, 64, 64]  # The hidden size of feed forward layer in RNN representation.
+recurrent_hidden_size: 64  # The hidden size of the recurrent layer.
+N_recurrent_layers: 1  # The number of recurrent layer.
+dropout: 0  # dropout should be a number in range [0, 1], the probability of an element being zeroed.
+normalize: "LayerNorm"  # Layer normalization.
+initialize: "orthogonal"  # Network initializer.
+gain: 0.01  # Gain value for network initialization.
+
+representation_hidden_size: [64, ]  # A list of hidden units for each layer of Basic_MLP representation networks.
+actor_hidden_size: [64, ]  # A list of hidden units for each layer of actor network.
+critic_hidden_size: [64, ]  # A list of hidden units for each layer of critic network.
+activation: "relu"  # The activation function of each hidden layer.
+activation_action: "sigmoid"  # The activation function for the last layer of the actor.
+use_parameter_sharing: True  # If to use parameter sharing for all agents' policies.
+use_actions_mask: False  # If to use actions mask for unavailable actions.
+
+mixer: "VDN"  # choices: VDN (sum), QMIX (monotonic)
+hidden_dim_mixing_net: 32  # hidden units of mixing network (when mixer is QMIX)
+hidden_dim_hyper_net: 32  # hidden units of hyper network (when mixer is QMIX)
+
+seed: 1  # Random seed.
+parallels: 16  # The number of environments to run in parallel.
+buffer_size: 32  # Number of the transitions (use_rnn is False), or the episodes (use_rnn is True) in replay buffer.
+n_epochs: 1  # Number of epochs to train.
+n_minibatch: 1 # Number of minibatch to sample and train.  batch_size = buffer_size // n_minibatch.
+learning_rate: 0.0005  # Learning rate.
+weight_decay: 0  # The steps to decay the greedy epsilon.
+
+vf_coef: 0.1  # Coefficient factor for critic loss.
+ent_coef: 0.01  # Coefficient factor for entropy loss.
+gamma: 0.99  # Discount factor.
+
+# tricks
+use_linear_lr_decay: False  # If to use linear learning rate decay.
+end_factor_lr_decay: 0.5  # The end factor for learning rate scheduler.
+use_global_state: True  # If to use global state to replace merged observations.
+use_value_clip: False  # Limit the value range.
+value_clip_range: 0.2  # The value clip range.
+use_value_norm: False  # Use running mean and std to normalize rewards.
+use_huber_loss: False  # True: use huber loss; False: use MSE loss.
+huber_delta: 10.0  # The threshold at which to change between delta-scaled L1 and L2 loss. (For huber loss).
+use_advnorm: False  # If to use advantage normalization.
+use_gae: True  # Use GAE trick.
+gae_lambda: 0.8  # The GAE lambda.
+use_grad_clip: True  # Gradient normalization.
+grad_clip_norm: 10.0  # The max norm of the gradient.
+clip_type: 1  # Gradient clip for Mindspore: 0: ms.ops.clip_by_value; 1: ms.nn.ClipByNorm().
+
+running_steps: 10000000  # The total running steps.
+eval_interval: 100000  # The interval between every two trainings.
+test_episode: 5  # The episodes to test in each test period.
+
+log_dir: "logs/vdac/"
+model_dir: "models/vdac/"
@@ -18,8 +18,6 @@ use_cnn: True  # Whether to use convolutional neural networks as representation.
 filters: [32, 32, 64, 64]
 kernels: [8, 4, 4, 4]
 strides: [4, 2, 2, 2]
-actor_hidden_size: [128, 128]  # A list of hidden units for each layer of actor network.
-critic_hidden_size: [128, 128]  # A list of hidden units for each layer of critic network.
 
 use_rnn: False  # Whether to use recurrent neural networks.
 rnn: "GRU"  # Choice of recurrent networks: GRU or LSTM.
 
@@ -0,0 +1,63 @@
+agent: "OWQMIX"  # the learning algorithms_marl
+env_name: "atari"  # Name of the environment.
+env_id: "basketball_pong_v3"
+env_seed: 1  # The random seed of the environment.
+obs_type: "grayscale_image"  # choice for Atari env: ram, rgb_image, grayscale_image
+img_size: [84, 84]  # default is 210 x 160 in gym[Atari]
+num_stack: 4  # frame stack trick
+frame_skip: 4  # frame skip trick
+noop_max: 30  # Do no-op action for a number of steps in [1, noop_max].
+learner: "WQMIX_Learner"
+policy: "Weighted_Mixing_Q_network"
+representation: "Basic_CNN"
+vectorize: "DummyVecMultiAgentEnv"
+runner: "MARL"  # Runner
+
+# the following three arguments are for "Basic_CNN" representation.
+use_cnn: True  # Whether to use convolutional neural networks as representation.
+filters: [32, 32, 64, 64]
+kernels: [8, 4, 4, 4]
+strides: [4, 2, 2, 2]
+
+use_rnn: False  # Whether to use recurrent neural networks.
+rnn: "GRU"  # Choice of recurrent networks: GRU or LSTM.
+N_recurrent_layers: 1  # Number of recurrent layers.
+fc_hidden_sizes: [64, ]
+recurrent_hidden_size: 64
+dropout: 0  # dropout should be a number in range [0, 1], the probability of an element being zeroed.
+
+representation_hidden_size: [128, ]  # for Basic_MLP representation
+q_hidden_size: [128, ]  # the units for each hidden layer
+activation: "relu"  # The activation function of each hidden layer.
+alpha: 0.1
+
+hidden_dim_mixing_net: 32  # hidden units of mixing network
+hidden_dim_hyper_net: 64  # hidden units of hyper network
+hidden_dim_ff_mix_net: 256  # hidden units of mixing network
+
+seed: 1
+parallels: 16
+buffer_size: 100000
+batch_size: 256
+learning_rate: 0.001
+gamma: 0.99  # discount factor
+double_q: True  # use double q learning
+
+start_greedy: 1.0
+end_greedy: 0.05
+decay_step_greedy: 5000000
+start_training: 100  # start training after n steps
+running_steps: 10000000  # 10M
+training_frequency: 25
+sync_frequency: 200
+
+use_grad_clip: False
+grad_clip_norm: 0.5
+use_parameter_sharing: True
+use_actions_mask: False
+
+
+eval_interval: 100000
+test_episode: 5
+log_dir: "logs/wqmix/"
+model_dir: "models/wqmix/"