We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 7f2bb74 commit 881903eCopy full SHA for 881903e
ppo_gae_discrete.py
@@ -75,7 +75,9 @@ def train_net(self):
75
advantage_lst.append([advantage])
76
advantage_lst.reverse()
77
advantage = torch.tensor(advantage_lst, dtype=torch.float)
78
-
+ # this can have significant improvement (efficiency, stability) on performance
79
+ advantage = (advantage - advantage.mean()) / (advantage.std() + 1e-5)
80
+
81
pi = self.pi(s, softmax_dim=-1)
82
dist_entropy = Categorical(pi).entropy()
83
pi_a = pi.gather(1,a)
@@ -126,4 +128,4 @@ def main():
126
128
env.close()
127
129
130
if __name__ == '__main__':
- main()
131
+ main()
0 commit comments