Skip to content

Commit 43e6960

Browse files
committed
[Add] CNN representation for Other MARL algorithms (QMIX, WQMIX, etc)
1 parent 4181533 commit 43e6960

File tree

11 files changed

+416
-30
lines changed

11 files changed

+416
-30
lines changed

xuance/configs/ippo/atari.yaml

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
agent: "IPPO" # The agent name.
2+
env_name: "atari" # Name of the environment.
3+
env_id: "basketball_pong_v3"
4+
env_seed: 1 # The random seed of the environment.
5+
obs_type: "grayscale_image" # choice for Atari env: ram, rgb_image, grayscale_image
6+
img_size: [84, 84] # default is 210 x 160 in gym[Atari]
7+
num_stack: 4 # frame stack trick
8+
frame_skip: 4 # frame skip trick
9+
noop_max: 30 # Do no-op action for a number of steps in [1, noop_max].
10+
learner: "IPPO_Learner" # The learner name.
11+
policy: "Categorical_MAAC_Policy" # The policy name.
12+
representation: "Basic_CNN" # The representation name.
13+
vectorize: "SubprocVecMultiAgentEnv" # The method to vectorize your environment such that can run in parallel.
14+
runner: "MARL" # The runner.
15+
16+
# the following three arguments are for "Basic_CNN" representation.
17+
use_cnn: True # Whether to use convolutional neural networks as representation.
18+
filters: [32, 32, 64, 64]
19+
kernels: [8, 4, 4, 4]
20+
strides: [4, 2, 2, 2]
21+
22+
# recurrent settings for Basic_RNN representation.
23+
use_rnn: False # If to use recurrent neural network as representation. (The representation should be "Basic_RNN").
24+
rnn: "GRU" # The type of recurrent layer.
25+
fc_hidden_sizes: [64, 64, 64] # The hidden size of feed forward layer in RNN representation.
26+
recurrent_hidden_size: 64 # The hidden size of the recurrent layer.
27+
N_recurrent_layers: 1 # The number of recurrent layer.
28+
dropout: 0 # dropout should be a number in range [0, 1], the probability of an element being zeroed.
29+
normalize: "LayerNorm" # Layer normalization.
30+
initialize: "orthogonal" # Network initializer.
31+
gain: 0.01 # Gain value for network initialization.
32+
33+
representation_hidden_size: [64, ] # A list of hidden units for each layer of Basic_MLP representation networks.
34+
actor_hidden_size: [64, ] # A list of hidden units for each layer of actor network.
35+
critic_hidden_size: [64, ] # A list of hidden units for each layer of critic network.
36+
activation: "relu" # The activation function of each hidden layer.
37+
activation_action: "sigmoid" # The activation function for the last layer of the actor.
38+
use_parameter_sharing: True # If to use parameter sharing for all agents' policies.
39+
use_actions_mask: False # If to use actions mask for unavailable actions.
40+
41+
seed: 1 # Random seed.
42+
parallels: 16 # The number of environments to run in parallel.
43+
buffer_size: 3200 # Number of the transitions (use_rnn is False), or the episodes (use_rnn is True) in replay buffer.
44+
n_epochs: 10 # Number of epochs to train.
45+
n_minibatch: 1 # Number of minibatch to sample and train. batch_size = buffer_size // n_minibatch.
46+
learning_rate: 0.0007 # Learning rate.
47+
weight_decay: 0 # The steps to decay the greedy epsilon.
48+
49+
vf_coef: 0.5 # Coefficient factor for critic loss.
50+
ent_coef: 0.01 # Coefficient factor for entropy loss.
51+
target_kl: 0.25 # For MAPPO_KL learner.
52+
clip_range: 0.2 # The clip range for ratio in MAPPO_Clip learner.
53+
gamma: 0.99 # Discount factor.
54+
55+
# tricks
56+
use_linear_lr_decay: False # If to use linear learning rate decay.
57+
end_factor_lr_decay: 0.5 # The end factor for learning rate scheduler.
58+
use_global_state: False # If to use global state to replace merged observations.
59+
use_value_clip: True # Limit the value range.
60+
value_clip_range: 0.2 # The value clip range.
61+
use_value_norm: True # Use running mean and std to normalize rewards.
62+
use_huber_loss: True # True: use huber loss; False: use MSE loss.
63+
huber_delta: 10.0 # The threshold at which to change between delta-scaled L1 and L2 loss. (For huber loss).
64+
use_advnorm: True # If to use advantage normalization.
65+
use_gae: True # Use GAE trick.
66+
gae_lambda: 0.95 # The GAE lambda.
67+
use_grad_clip: True # Gradient normalization.
68+
grad_clip_norm: 10.0 # The max norm of the gradient.
69+
clip_type: 1 # Gradient clip for Mindspore: 0: ms.ops.clip_by_value; 1: ms.nn.ClipByNorm().
70+
71+
running_steps: 10000000 # The total running steps.
72+
eval_interval: 100000 # The interval between every two trainings.
73+
test_episode: 5 # The episodes to test in each test period.
74+
75+
log_dir: "logs/ippo/"
76+
model_dir: "models/ippo/"

xuance/configs/iql/atari.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@ use_cnn: True # Whether to use convolutional neural networks as representation.
1818
filters: [32, 32, 64, 64]
1919
kernels: [8, 4, 4, 4]
2020
strides: [4, 2, 2, 2]
21-
actor_hidden_size: [128, 128] # A list of hidden units for each layer of actor network.
22-
critic_hidden_size: [128, 128] # A list of hidden units for each layer of critic network.
2321

2422
use_rnn: False # Whether to use recurrent neural networks.
2523
rnn: "GRU" # Choice of recurrent networks: GRU or LSTM.

xuance/configs/mappo/atari.yaml

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
agent: "MAPPO" # The agent name.
2+
env_name: "atari" # Name of the environment.
3+
env_id: "basketball_pong_v3"
4+
env_seed: 1 # The random seed of the environment.
5+
obs_type: "grayscale_image" # choice for Atari env: ram, rgb_image, grayscale_image
6+
img_size: [84, 84] # default is 210 x 160 in gym[Atari]
7+
num_stack: 4 # frame stack trick
8+
frame_skip: 4 # frame skip trick
9+
noop_max: 30 # Do no-op action for a number of steps in [1, noop_max].
10+
learner: "MAPPO_Clip_Learner"
11+
policy: "Categorical_MAAC_Policy" # The policy name.
12+
representation: "Basic_CNN" # The representation name.
13+
vectorize: "SubprocVecMultiAgentEnv" # The method to vectorize your environment such that can run in parallel.
14+
runner: "MARL" # The runner.
15+
16+
# the following three arguments are for "Basic_CNN" representation.
17+
use_cnn: True # Whether to use convolutional neural networks as representation.
18+
filters: [32, 32, 64, 64]
19+
kernels: [8, 4, 4, 4]
20+
strides: [4, 2, 2, 2]
21+
22+
# recurrent settings for Basic_RNN representation.
23+
use_rnn: False # If to use recurrent neural network as representation. (The representation should be "Basic_RNN").
24+
rnn: "GRU" # The type of recurrent layer.
25+
fc_hidden_sizes: [64, 64, 64] # The hidden size of feed forward layer in RNN representation.
26+
recurrent_hidden_size: 64 # The hidden size of the recurrent layer.
27+
N_recurrent_layers: 1 # The number of recurrent layer.
28+
dropout: 0 # dropout should be a number in range [0, 1], the probability of an element being zeroed.
29+
normalize: "LayerNorm" # Layer normalization.
30+
initialize: "orthogonal" # Network initializer.
31+
gain: 0.01
32+
33+
representation_hidden_size: [64, ] # A list of hidden units for each layer of Basic_MLP representation networks.
34+
actor_hidden_size: [64, ] # A list of hidden units for each layer of actor network.
35+
critic_hidden_size: [64, ] # A list of hidden units for each layer of critic network.
36+
activation: "relu" # The activation function of each hidden layer.
37+
activation_action: "sigmoid" # The activation function for the last layer of the actor.
38+
use_parameter_sharing: True # If to use parameter sharing for all agents' policies.
39+
use_actions_mask: False # If to use actions mask for unavailable actions.
40+
41+
seed: 1 # Random seed.
42+
parallels: 16 # The number of environments to run in parallel.
43+
buffer_size: 400 # Number of the transitions (use_rnn is False), or the episodes (use_rnn is True) in replay buffer.
44+
n_epochs: 1 # Number of epochs to train.
45+
n_minibatch: 1 # Number of minibatch to sample and train. batch_size = buffer_size // n_minibatch.
46+
learning_rate: 0.0007 # Learning rate.
47+
weight_decay: 0 # The steps to decay the greedy epsilon.
48+
49+
vf_coef: 0.5 # Coefficient factor for critic loss.
50+
ent_coef: 0.01 # Coefficient factor for entropy loss.
51+
target_kl: 0.25 # For MAPPO_KL learner.
52+
clip_range: 0.2 # Ratio clip range, for MAPPO_Clip learner.
53+
clip_type: 1 # Gradient clip for Mindspore: 0: ms.ops.clip_by_value; 1: ms.nn.ClipByNorm().
54+
gamma: 0.95 # Discount factor.
55+
56+
# tricks
57+
use_linear_lr_decay: False # If to use linear learning rate decay.
58+
end_factor_lr_decay: 0.5 # The end factor for learning rate scheduler.
59+
use_global_state: False # If to use global state to replace merged observations.
60+
use_value_clip: True # Limit the value range.
61+
value_clip_range: 0.2 # The value clip range.
62+
use_value_norm: True # Use running mean and std to normalize rewards.
63+
use_huber_loss: True # True: use huber loss; False: use MSE loss.
64+
huber_delta: 10.0 # The threshold at which to change between delta-scaled L1 and L2 loss. (For huber loss).
65+
use_advnorm: True # If to use advantage normalization.
66+
use_gae: True # Use GAE trick.
67+
gae_lambda: 0.95 # The GAE lambda.
68+
use_grad_clip: True # Gradient normalization.
69+
grad_clip_norm: 10.0 # The max norm of the gradient.
70+
71+
running_steps: 10000000 # The total running steps.
72+
eval_interval: 100000 # The interval between every two trainings.
73+
test_episode: 5 # The episodes to test in each test period.
74+
75+
log_dir: "logs/mappo/"
76+
model_dir: "models/mappo/"

xuance/configs/qmix/atari.yaml

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
agent: "QMIX" # the learning algorithms_marl
2+
env_name: "atari" # Name of the environment.
3+
env_id: "basketball_pong_v3"
4+
env_seed: 1 # The random seed of the environment.
5+
obs_type: "grayscale_image" # choice for Atari env: ram, rgb_image, grayscale_image
6+
img_size: [84, 84] # default is 210 x 160 in gym[Atari]
7+
num_stack: 4 # frame stack trick
8+
frame_skip: 4 # frame skip trick
9+
noop_max: 30 # Do no-op action for a number of steps in [1, noop_max].
10+
learner: "QMIX_Learner"
11+
policy: "Mixing_Q_network"
12+
representation: "Basic_CNN"
13+
vectorize: "DummyVecMultiAgentEnv"
14+
runner: "MARL" # Runner
15+
16+
# the following three arguments are for "Basic_CNN" representation.
17+
use_cnn: True # Whether to use convolutional neural networks as representation.
18+
filters: [32, 32, 64, 64]
19+
kernels: [8, 4, 4, 4]
20+
strides: [4, 2, 2, 2]
21+
22+
use_rnn: False # Whether to use recurrent neural networks.
23+
rnn: "GRU" # Choice of recurrent networks: GRU or LSTM.
24+
N_recurrent_layers: 1 # Number of recurrent layers.
25+
fc_hidden_sizes: [64, ]
26+
recurrent_hidden_size: 64
27+
dropout: 0 # dropout should be a number in range [0, 1], the probability of an element being zeroed.
28+
29+
representation_hidden_size: [64, ]
30+
q_hidden_size: [64, ] # the units for each hidden layer
31+
activation: "relu" # The activation function of each hidden layer.
32+
use_parameter_sharing: False
33+
use_actions_mask: False
34+
35+
hidden_dim_mixing_net: 128 # hidden units of mixing network
36+
hidden_dim_hyper_net: 128 # hidden units of hyper network
37+
38+
seed: 1069
39+
parallels: 5
40+
buffer_size: 500000
41+
batch_size: 32
42+
learning_rate: 0.0001
43+
gamma: 0.99 # discount factor
44+
double_q: True # use double q learning
45+
46+
start_greedy: 0.5
47+
end_greedy: 0.05
48+
decay_step_greedy: 1000000
49+
start_training: 100 # start training after n steps
50+
running_steps: 50000000 # 10M
51+
training_frequency: 100
52+
sync_frequency: 500
53+
54+
use_grad_clip: False
55+
grad_clip_norm: 0.5
56+
57+
eval_interval: 500000
58+
test_episode: 5
59+
log_dir: "logs/qmix/"
60+
model_dir: "models/qmix/"

xuance/configs/vdac/atari.yaml

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
agent: "VDAC"
2+
env_name: "atari" # Name of the environment.
3+
env_id: "basketball_pong_v3"
4+
env_seed: 1 # The random seed of the environment.
5+
obs_type: "grayscale_image" # choice for Atari env: ram, rgb_image, grayscale_image
6+
img_size: [84, 84] # default is 210 x 160 in gym[Atari]
7+
num_stack: 4 # frame stack trick
8+
frame_skip: 4 # frame skip trick
9+
noop_max: 30 # Do no-op action for a number of steps in [1, noop_max].
10+
learner: "VDAC_Learner"
11+
policy: "Categorical_MAAC_Policy"
12+
representation: "Basic_CNN"
13+
vectorize: "DummyVecMultiAgentEnv"
14+
runner: "MARL" # Runner
15+
16+
# the following three arguments are for "Basic_CNN" representation.
17+
use_cnn: True # Whether to use convolutional neural networks as representation.
18+
filters: [32, 32, 64, 64]
19+
kernels: [8, 4, 4, 4]
20+
strides: [4, 2, 2, 2]
21+
22+
# recurrent settings for Basic_RNN representation
23+
use_rnn: False # Whether to use recurrent neural networks.
24+
rnn: "GRU" # The type of recurrent layer.
25+
fc_hidden_sizes: [64, 64, 64] # The hidden size of feed forward layer in RNN representation.
26+
recurrent_hidden_size: 64 # The hidden size of the recurrent layer.
27+
N_recurrent_layers: 1 # The number of recurrent layer.
28+
dropout: 0 # dropout should be a number in range [0, 1], the probability of an element being zeroed.
29+
normalize: "LayerNorm" # Layer normalization.
30+
initialize: "orthogonal" # Network initializer.
31+
gain: 0.01 # Gain value for network initialization.
32+
33+
representation_hidden_size: [64, ] # A list of hidden units for each layer of Basic_MLP representation networks.
34+
actor_hidden_size: [64, ] # A list of hidden units for each layer of actor network.
35+
critic_hidden_size: [64, ] # A list of hidden units for each layer of critic network.
36+
activation: "relu" # The activation function of each hidden layer.
37+
activation_action: "sigmoid" # The activation function for the last layer of the actor.
38+
use_parameter_sharing: True # If to use parameter sharing for all agents' policies.
39+
use_actions_mask: False # If to use actions mask for unavailable actions.
40+
41+
mixer: "VDN" # choices: VDN (sum), QMIX (monotonic)
42+
hidden_dim_mixing_net: 32 # hidden units of mixing network (when mixer is QMIX)
43+
hidden_dim_hyper_net: 32 # hidden units of hyper network (when mixer is QMIX)
44+
45+
seed: 1 # Random seed.
46+
parallels: 16 # The number of environments to run in parallel.
47+
buffer_size: 32 # Number of the transitions (use_rnn is False), or the episodes (use_rnn is True) in replay buffer.
48+
n_epochs: 1 # Number of epochs to train.
49+
n_minibatch: 1 # Number of minibatch to sample and train. batch_size = buffer_size // n_minibatch.
50+
learning_rate: 0.0005 # Learning rate.
51+
weight_decay: 0 # The steps to decay the greedy epsilon.
52+
53+
vf_coef: 0.1 # Coefficient factor for critic loss.
54+
ent_coef: 0.01 # Coefficient factor for entropy loss.
55+
gamma: 0.99 # Discount factor.
56+
57+
# tricks
58+
use_linear_lr_decay: False # If to use linear learning rate decay.
59+
end_factor_lr_decay: 0.5 # The end factor for learning rate scheduler.
60+
use_global_state: True # If to use global state to replace merged observations.
61+
use_value_clip: False # Limit the value range.
62+
value_clip_range: 0.2 # The value clip range.
63+
use_value_norm: False # Use running mean and std to normalize rewards.
64+
use_huber_loss: False # True: use huber loss; False: use MSE loss.
65+
huber_delta: 10.0 # The threshold at which to change between delta-scaled L1 and L2 loss. (For huber loss).
66+
use_advnorm: False # If to use advantage normalization.
67+
use_gae: True # Use GAE trick.
68+
gae_lambda: 0.8 # The GAE lambda.
69+
use_grad_clip: True # Gradient normalization.
70+
grad_clip_norm: 10.0 # The max norm of the gradient.
71+
clip_type: 1 # Gradient clip for Mindspore: 0: ms.ops.clip_by_value; 1: ms.nn.ClipByNorm().
72+
73+
running_steps: 10000000 # The total running steps.
74+
eval_interval: 100000 # The interval between every two trainings.
75+
test_episode: 5 # The episodes to test in each test period.
76+
77+
log_dir: "logs/vdac/"
78+
model_dir: "models/vdac/"

xuance/configs/vdn/atari.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@ use_cnn: True # Whether to use convolutional neural networks as representation.
1818
filters: [32, 32, 64, 64]
1919
kernels: [8, 4, 4, 4]
2020
strides: [4, 2, 2, 2]
21-
actor_hidden_size: [128, 128] # A list of hidden units for each layer of actor network.
22-
critic_hidden_size: [128, 128] # A list of hidden units for each layer of critic network.
2321

2422
use_rnn: False # Whether to use recurrent neural networks.
2523
rnn: "GRU" # Choice of recurrent networks: GRU or LSTM.

xuance/configs/wqmix/atari.yaml

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
agent: "OWQMIX" # the learning algorithms_marl
2+
env_name: "atari" # Name of the environment.
3+
env_id: "basketball_pong_v3"
4+
env_seed: 1 # The random seed of the environment.
5+
obs_type: "grayscale_image" # choice for Atari env: ram, rgb_image, grayscale_image
6+
img_size: [84, 84] # default is 210 x 160 in gym[Atari]
7+
num_stack: 4 # frame stack trick
8+
frame_skip: 4 # frame skip trick
9+
noop_max: 30 # Do no-op action for a number of steps in [1, noop_max].
10+
learner: "WQMIX_Learner"
11+
policy: "Weighted_Mixing_Q_network"
12+
representation: "Basic_CNN"
13+
vectorize: "DummyVecMultiAgentEnv"
14+
runner: "MARL" # Runner
15+
16+
# the following three arguments are for "Basic_CNN" representation.
17+
use_cnn: True # Whether to use convolutional neural networks as representation.
18+
filters: [32, 32, 64, 64]
19+
kernels: [8, 4, 4, 4]
20+
strides: [4, 2, 2, 2]
21+
22+
use_rnn: False # Whether to use recurrent neural networks.
23+
rnn: "GRU" # Choice of recurrent networks: GRU or LSTM.
24+
N_recurrent_layers: 1 # Number of recurrent layers.
25+
fc_hidden_sizes: [64, ]
26+
recurrent_hidden_size: 64
27+
dropout: 0 # dropout should be a number in range [0, 1], the probability of an element being zeroed.
28+
29+
representation_hidden_size: [128, ] # for Basic_MLP representation
30+
q_hidden_size: [128, ] # the units for each hidden layer
31+
activation: "relu" # The activation function of each hidden layer.
32+
alpha: 0.1
33+
34+
hidden_dim_mixing_net: 32 # hidden units of mixing network
35+
hidden_dim_hyper_net: 64 # hidden units of hyper network
36+
hidden_dim_ff_mix_net: 256 # hidden units of mixing network
37+
38+
seed: 1
39+
parallels: 16
40+
buffer_size: 100000
41+
batch_size: 256
42+
learning_rate: 0.001
43+
gamma: 0.99 # discount factor
44+
double_q: True # use double q learning
45+
46+
start_greedy: 1.0
47+
end_greedy: 0.05
48+
decay_step_greedy: 5000000
49+
start_training: 100 # start training after n steps
50+
running_steps: 10000000 # 10M
51+
training_frequency: 25
52+
sync_frequency: 200
53+
54+
use_grad_clip: False
55+
grad_clip_norm: 0.5
56+
use_parameter_sharing: True
57+
use_actions_mask: False
58+
59+
60+
eval_interval: 100000
61+
test_episode: 5
62+
log_dir: "logs/wqmix/"
63+
model_dir: "models/wqmix/"

0 commit comments

Comments
 (0)