quantumiracle
diff --git a/‎common/__pycache__/policy_networks.cpython-36.pyc‎
-18 Bytes b/‎common/__pycache__/policy_networks.cpython-36.pyc‎
-18 Bytes
diff --git a/‎common/policy_networks.py‎
Lines changed: 4 additions & 4 deletions b/‎common/policy_networks.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎ddpg.py‎
Lines changed: 2 additions & 2 deletions b/‎ddpg.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎ddpg_v2.py‎
Lines changed: 2 additions & 2 deletions b/‎ddpg_v2.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎model/rdpg_policy‎
0 Bytes b/‎model/rdpg_policy‎
0 Bytes
diff --git a/‎model/rdpg_q‎
0 Bytes b/‎model/rdpg_q‎
0 Bytes
diff --git a/‎model/rdpg_target_q‎
0 Bytes b/‎model/rdpg_target_q‎
0 Bytes
diff --git a/‎rdpg.png‎
11 KB b/‎rdpg.png‎
11 KB
diff --git a/‎rdpg.py‎
Lines changed: 2 additions & 2 deletions b/‎rdpg.py‎
Lines changed: 2 additions & 2 deletions
@@ -104,8 +104,8 @@ def __init__(self, state_space, action_space, hidden_dim, action_range=1., init_
         self.linear4 = nn.Linear(hidden_dim, self._action_dim) # output dim = dim of action
 
         # weights initialization
-        self.linear3.weight.data.uniform_(-init_w, init_w)
-        self.linear3.bias.data.uniform_(-init_w, init_w)
+        self.linear4.weight.data.uniform_(-init_w, init_w)
+        self.linear4.bias.data.uniform_(-init_w, init_w)
 
 
     def forward(self, state, last_action, hidden_in):
@@ -127,7 +127,7 @@ def forward(self, state, last_action, hidden_in):
         # merged
         merged_branch=torch.cat([fc_branch, lstm_branch], -1)   
         x = activation(self.linear3(merged_branch))
-        x = F.tanh(self.linear4(x)).clone()
+        x = F.tanh(self.linear4(x))
         x = x.permute(1,0,2)  # permute back
 
         return x, lstm_hidden    # lstm_hidden is actually tuple: (hidden, cell)
@@ -196,7 +196,7 @@ def forward(self, state, last_action, hidden_in):
         # hidden only for initialization, later on hidden states are passed automatically for sequential data
         x,  lstm_hidden = self.lstm1(x, hidden_in)    # no activation after lstm
         x = activation(self.linear2(x))
-        x = F.tanh(self.linear3(x)).clone()
+        x = F.tanh(self.linear3(x))
         x = x.permute(1,0,2)  # permute back
 
         return x, lstm_hidden    # lstm_hidden is actually tuple: (hidden, cell)
 
@@ -258,8 +258,8 @@ def _reverse_action(self, action):
         action_dim = env.num_actions
         state_dim  = env.num_observations
     elif ENV == 'Pendulum':
-        # env = NormalizedActions(gym.make("Pendulum-v0"))
-        env = gym.make("Pendulum-v0")
+        env = NormalizedActions(gym.make("Pendulum-v0"))
+        # env = gym.make("Pendulum-v0")
         action_dim = env.action_space.shape[0]
         state_dim  = env.observation_space.shape[0]
     elif ENV == 'HalfCheetah':
 
@@ -165,8 +165,8 @@ def _reverse_action(self, action):
         state_space  = spaces.Box(low=-np.inf, high=np.inf, shape=(env.num_observations, ))
 
     elif ENV == 'Pendulum':
-        # env = NormalizedActions(gym.make("Pendulum-v0"))
-        env = gym.make("Pendulum-v0")
+        env = NormalizedActions(gym.make("Pendulum-v0"))
+        # env = gym.make("Pendulum-v0")
         action_space = env.action_space
         state_space  = env.observation_space
     hidden_dim = 64
 
@@ -179,8 +179,8 @@ def _reverse_action(self, action):
         state_space  = spaces.Box(low=-np.inf, high=np.inf, shape=(env.num_observations, ))
 
     elif ENV == 'Pendulum':
-        # env = NormalizedActions(gym.make("Pendulum-v0"))
-        env = gym.make("Pendulum-v0")
+        env = NormalizedActions(gym.make("Pendulum-v0"))
+        # env = gym.make("Pendulum-v0")
         action_space = env.action_space
         state_space  = env.observation_space
     hidden_dim = 64