Fixed a couple of bugs.

jamie-mcg · jamie-mcg · commit 68a2e80466a7 · 2023-05-30T22:25:54.000+08:00
diff --git a/examples/autoencoder.py b/examples/autoencoder.py
@@ -126,9 +126,7 @@ def __getitem__(self, idx):
         if torch.is_tensor(idx):
             idx = idx.tolist()
 
-        return torch.tensor(self._images[idx]), torch.tensor(
-            self._labels[idx]
-        )
+        return torch.tensor(self._images[idx]), torch.tensor(self._labels[idx])
 
 
 def _read32(bytestream):
@@ -225,19 +223,19 @@ class DataSets(object):
 
         SOURCE_URL = "http://www.cs.toronto.edu/~jmartens/"
         TRAIN_IMAGES = "newfaces_rot_single.mat"
-        
+
         local_file = maybe_download(SOURCE_URL, TRAIN_IMAGES, train_dir)
         print(f"Data read from {local_file}")
-        
-        numpy_file = os.path.dirname(local_file) + '/faces.npy'
+
+        numpy_file = os.path.dirname(local_file) + "/faces.npy"
         if os.path.exists(numpy_file):
             images_ = np.load(numpy_file)
         else:
             import mat4py
 
             images_ = mat4py.loadmat(local_file)
             images_ = np.asarray(images_["newfaces_single"])
-        
+
             images_ = np.transpose(images_)
             np.save(numpy_file, images_)
             print(f"Data saved to {numpy_file}")
@@ -276,10 +274,9 @@ class DataSets(object):
 if __name__ == "__main__":
 
     argparser = argparse.ArgumentParser()
-    argparser.add_argument('--exp', type=str, help='which dataset', default='FACES')
+    argparser.add_argument("--exp", type=str, help="which dataset", default="FACES")
     args = argparser.parse_args()
 
-
     seed = 13
     torch.manual_seed(seed)
     torch.backends.cudnn.benchmark = False
@@ -291,7 +288,7 @@ class DataSets(object):
     print("device", device)
 
     ## Hyperparams
-    if args.exp == 'FACES':
+    if args.exp == "FACES":
 
         batch_size = 100
         epochs = 5
@@ -304,8 +301,8 @@ class DataSets(object):
         damping = 1.0
 
         dataset = read_data_sets("FACES", "../data/", if_autoencoder=True)
-    
-    if args.exp == 'MNIST':
+
+    if args.exp == "MNIST":
         batch_size = 100
         epochs = 10
         eta_adam = 1e-4
@@ -321,19 +318,23 @@ class DataSets(object):
     ## Dataset
     train_dataset = dataset.train
     test_dataset = dataset.test
-    if args.exp == 'FACES':
-        likelihood = FISH_LIKELIHOODS['fixedgaussian'](sigma=1.0, device=device)
+    if args.exp == "FACES":
+        likelihood = FISH_LIKELIHOODS["fixedgaussian"](sigma=1.0, device=device)
+
         def mse(model, data):
             data_x, data_y = data
             pred_y = model.forward(data_x)
-            return torch.mean(torch.square(pred_y-data_y))
-    if args.exp == 'MNIST':
-        likelihood = FISH_LIKELIHOODS['bernoulli'](device=device)
+            return torch.mean(torch.square(pred_y - data_y))
+
+    if args.exp == "MNIST":
+        likelihood = FISH_LIKELIHOODS["bernoulli"](device=device)
+
         def mse(model, data):
             data_x, data_y = data
             pred_y = model.forward(data_x)
             pred_y = torch.sigmoid(pred_y)
-            return torch.mean(torch.square(pred_y-data_y))
+            return torch.mean(torch.square(pred_y - data_y))
+
     def nll(model, data):
         data_x, data_y = data
         pred_y = model.forward(data_x)
@@ -344,7 +345,6 @@ def draw(model, data):
         pred_y = model.forward(data_x)
         return (data_x, likelihood.draw(pred_y))
 
-
     train_loader = torch.utils.data.DataLoader(
         train_dataset, batch_size=batch_size, shuffle=True
     )
@@ -360,43 +360,42 @@ def draw(model, data):
         test_dataset, batch_size=1000, shuffle=False
     )
 
-    
-    if args.exp == 'FACES':
+    if args.exp == "FACES":
         model = nn.Sequential(
-        nn.Linear(625, 2000),
-        nn.ReLU(),
-        nn.Linear(2000, 1000),
-        nn.ReLU(),
-        nn.Linear(1000, 500),
-        nn.ReLU(),
-        nn.Linear(500, 30),
-        nn.Linear(30, 500),
-        nn.ReLU(),
-        nn.Linear(500, 1000),
-        nn.ReLU(),
-        nn.Linear(1000, 2000),
-        nn.ReLU(),
-        nn.Linear(2000, 625),
+            nn.Linear(625, 2000),
+            nn.ReLU(),
+            nn.Linear(2000, 1000),
+            nn.ReLU(),
+            nn.Linear(1000, 500),
+            nn.ReLU(),
+            nn.Linear(500, 30),
+            nn.Linear(30, 500),
+            nn.ReLU(),
+            nn.Linear(500, 1000),
+            nn.ReLU(),
+            nn.Linear(1000, 2000),
+            nn.ReLU(),
+            nn.Linear(2000, 625),
         ).to(device)
-    
-    if args.exp == 'MNIST':
+
+    if args.exp == "MNIST":
         model = nn.Sequential(
-        nn.Linear(784, 1000, dtype=torch.float32),
-        nn.ReLU(),
-        nn.Linear(1000, 500, dtype=torch.float32),
-        nn.ReLU(),
-        nn.Linear(500, 250, dtype=torch.float32),
-        nn.ReLU(),
-        nn.Linear(250, 30, dtype=torch.float32),
-        nn.Linear(30, 250, dtype=torch.float32),
-        nn.ReLU(),
-        nn.Linear(250, 500, dtype=torch.float32),
-        nn.ReLU(),
-        nn.Linear(500, 1000, dtype=torch.float32),
-        nn.ReLU(),
-        nn.Linear(1000, 784, dtype=torch.float32),
+            nn.Linear(784, 1000, dtype=torch.float32),
+            nn.ReLU(),
+            nn.Linear(1000, 500, dtype=torch.float32),
+            nn.ReLU(),
+            nn.Linear(500, 250, dtype=torch.float32),
+            nn.ReLU(),
+            nn.Linear(250, 30, dtype=torch.float32),
+            nn.Linear(30, 250, dtype=torch.float32),
+            nn.ReLU(),
+            nn.Linear(250, 500, dtype=torch.float32),
+            nn.ReLU(),
+            nn.Linear(500, 1000, dtype=torch.float32),
+            nn.ReLU(),
+            nn.Linear(1000, 784, dtype=torch.float32),
         ).to(device)
-    
+
     model_adam = copy.deepcopy(model)
 
     print("lr fl={}, lr sgd={}, lr aux={}".format(eta_fl, eta_sgd, aux_eta))
@@ -417,10 +416,15 @@ def draw(model, data):
         damping=damping,
         pre_aux_training=0,
         sgd_lr=eta_sgd,
-        initialization='normal',
+        initialization="normal",
         device=device,
     )
 
+    print(opt.__dict__["fish_lr"])
+    print(opt.__dict__["beta"])
+    print(opt.__dict__["aux_lr"])
+    print(opt.__dict__["damping"])
+    print(opt.__dict__["sgd_lr"])
 
     FL_time = []
     LOSS = []
@@ -429,7 +433,7 @@ def draw(model, data):
     iteration = 0
     for e in range(1, epochs + 1):
         print("######## EPOCH", e)
-        for n, (batch_data, batch_labels) in enumerate(train_loader):
+        for n, (batch_data, batch_labels) in enumerate(train_loader, start=1):
             iteration += 1
             batch_data, batch_labels = batch_data.to(device), batch_labels.to(device)
             opt.zero_grad()
@@ -440,16 +444,18 @@ def draw(model, data):
             if n % 50 == 0:
                 FL_time.append(time.time() - st)
                 LOSS.append(loss.detach().cpu().numpy())
-                
+
                 test_batch_data, test_batch_labels = next(iter(test_loader))
-                test_batch_data, test_batch_labels = test_batch_data.to(device), test_batch_labels.to(device)
+                test_batch_data, test_batch_labels = test_batch_data.to(
+                    device
+                ), test_batch_labels.to(device)
                 test_loss = mse(opt.model, (test_batch_data, test_batch_labels))
-                    
+
                 TEST_LOSS.append(test_loss.detach().cpu().numpy())
 
                 print(n, LOSS[-1], TEST_LOSS[-1])
-    
-    fig, axs = plt.subplots(1,2, figsize=(10,5))
+
+    fig, axs = plt.subplots(1, 2, figsize=(10, 5))
     axs[0].plot(FL_time, LOSS, label="Fishleg")  # color=colors_group[i])
     axs[1].plot(
         FL_time, TEST_LOSS, label="Fishleg"
@@ -478,21 +484,23 @@ def draw(model, data):
             opt.step()
 
             if n % 50 == 0:
-                FL_time.append(time.time()-st)
+                FL_time.append(time.time() - st)
                 LOSS.append(loss.detach().cpu().numpy())
                 test_batch_data, test_batch_labels = next(iter(test_loader_adam))
-                test_batch_data, test_batch_labels = test_batch_data.to(device), test_batch_labels.to(device)
+                test_batch_data, test_batch_labels = test_batch_data.to(
+                    device
+                ), test_batch_labels.to(device)
                 test_loss = mse(model_adam, (test_batch_data, test_batch_labels))
                 TEST_LOSS.append(test_loss.detach().cpu().numpy())
 
                 print(n, LOSS[-1], TEST_LOSS[-1])
 
     axs[0].plot(FL_time, LOSS, label="Adam")
     axs[1].plot(FL_time, TEST_LOSS, label="Adam")
-    
+
     axs[0].legend()
     axs[1].legend()
 
-    axs[0].set_title('Training Loss')
-    axs[1].set_title('Test MSE')
+    axs[0].set_title("Training Loss")
+    axs[1].set_title("Test MSE")
     fig.savefig("result/result.png", dpi=300)
diff --git a/src/optim/FishLeg/fishleg.py b/src/optim/FishLeg/fishleg.py
@@ -304,7 +304,7 @@ def update_aux(self) -> None:
         self.aux_opt.zero_grad()
         with torch.no_grad():
             self.store_g = False
-            samples = self.draw(self.model, data[0])
+            samples = self.draw(self.model, data)
             self.store_g = True
 
         g2 = 0.0
@@ -315,13 +315,14 @@ def update_aux(self) -> None:
             else:
                 grad_norm = [0 * p.grad.data for p in group["params"]]
 
-            qg = group["Qv"](grad_norm)
+        g_norm = torch.sqrt(g2)
+        # print(g_norm)
 
         self.zero_grad()
         # How to better implement this?
         # The hook is not updated here, locally, only the gradient to the parameters g.grad is updated
         self.store_g = False
-        self.nll(self.model, samples, data[1]).backward()
+        self.nll(self.model, samples).backward()
         self.store_g = True
 
         gm_norm = 0.0
diff --git a/src/optim/FishLeg/fishleg_layers.py b/src/optim/FishLeg/fishleg_layers.py
@@ -146,6 +146,7 @@ def diagQ(self) -> Tensor:
         the Kronecker product.
 
         """
-        L = torch.sqrt(self.fishleg_aux["scale"]) * self.fishleg_aux["L"]
-        R = torch.sqrt(self.fishleg_aux["scale"]) * self.fishleg_aux["R"]
+        L = self.fishleg_aux["L"]
+        R = self.fishleg_aux["R"]
+        print(L)
         return torch.kron(torch.sum(R * R, axis=1), torch.sum(L * L, axis=1))