Add MNIST

david-baek · david-baek · commit 06e7c204d84d · 2025-01-23T14:56:53.000-05:00
diff --git a/.gitignore b/.gitignore
@@ -4,4 +4,6 @@ results
 scratch*.ipynb
 */slurm*.out
 
-figure
+figures
+harmonic_archive.zip
+data
diff --git a/environment.yaml b/environment.yaml
@@ -107,4 +107,4 @@ dependencies:
       - tqdm==4.67.1
       - triton==3.1.0
       - tzdata==2024.2
-prefix: /home/gridsan/dbaek/.conda/envs/crystal
+prefix: /om/user/dbaek/.conda/envs/crystal
diff --git a/notebooks/case_study_circle.ipynb b/notebooks/case_study_circle.ipynb
@@ -50,18 +50,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/tmp/ipykernel_3578131/2344840747.py:40: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
+      "/tmp/ipykernel_3613932/1967124822.py:42: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
       "  weights = torch.load(f'../results/case_study_{config_list[i-3][0]}_{config_list[i-3][1]}.pt')\n",
-      "/tmp/ipykernel_3578131/2344840747.py:40: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
+      "/tmp/ipykernel_3613932/1967124822.py:42: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
       "  weights = torch.load(f'../results/case_study_{config_list[i-3][0]}_{config_list[i-3][1]}.pt')\n",
-      "/tmp/ipykernel_3578131/2344840747.py:40: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
+      "/tmp/ipykernel_3613932/1967124822.py:42: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
       "  weights = torch.load(f'../results/case_study_{config_list[i-3][0]}_{config_list[i-3][1]}.pt')\n"
      ]
     },
@@ -82,6 +82,8 @@
     "import matplotlib.pyplot as plt\n",
     "from sklearn.decomposition import PCA\n",
     "from adjustText import adjust_text\n",
+    "import json\n",
+    "import torch\n",
     "\n",
     "plt.rcParams.update({'font.size': 13})\n",
     "\n",
@@ -144,6 +146,13 @@
     "plt.subplots_adjust(wspace=0.0, hspace=0.3)\n",
     "plt.savefig('../figures/circle_case_study.pdf', bbox_inches='tight')"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
diff --git a/notebooks/final_figures.ipynb b/notebooks/final_figures.ipynb
diff --git a/notebooks/mnist.ipynb b/notebooks/mnist.ipynb
diff --git a/scripts/HM_equiv.sh b/scripts/HM_equiv.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
-#SBATCH -t 16:00:00
+#SBATCH -t 23:59:00
 #SBATCH --gres=gpu:a100:1
 
 python ../src/run_exp.py --data_id equivalence --model_id H_MLP
-
+python ../src/run_exp.py --data_id circle --model_id H_MLP
diff --git a/scripts/HM_lattice.sh b/scripts/HM_lattice.sh
@@ -1,6 +1,8 @@
 #!/bin/bash
-#SBATCH -t 16:00:00
+#SBATCH -t 23:59:00
 #SBATCH --gres=gpu:a100:1
 
 python ../src/run_exp.py --data_id lattice --model_id H_MLP
-
+python ../src/run_exp.py --data_id family_tree --model_id H_MLP
+python ../src/run_exp.py --data_id equivalence --model_id H_MLP
+python ../src/run_exp.py --data_id circle --model_id H_MLP
diff --git a/scripts/M_equiv.sh b/scripts/M_equiv.sh
@@ -1,6 +1,7 @@
 #!/bin/bash
-#SBATCH -t 16:00:00
+#SBATCH -t 23:59:00
 #SBATCH --gres=gpu:a100:1
 
 python ../src/run_exp.py --data_id equivalence --model_id standard_MLP
+python ../src/run_exp.py --data_id circle --model_id standard_MLP
 
diff --git a/scripts/M_lattice.sh b/scripts/M_lattice.sh
@@ -3,4 +3,7 @@
 #SBATCH --gres=gpu:a100:1
 
 python ../src/run_exp.py --data_id lattice --model_id standard_MLP
+python ../src/run_exp.py --data_id family_tree --model_id standard_MLP
+python ../src/run_exp.py --data_id equivalence --model_id standard_MLP
+python ../src/run_exp.py --data_id circle --model_id standard_MLP
 
diff --git a/src/run_exp.py b/src/run_exp.py
@@ -35,7 +35,7 @@
 
 data_size = 1000
 train_ratio = 0.8
-embd_dim = 10
+embd_dim = 16
 
 lr = 0.002
 weight_decay = 0.01
@@ -58,7 +58,7 @@
 results_root = "../results"
 
 current_datetime = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
-results_root = f"{results_root}/{seed}-{data_id}-{model_id}"
+results_root = f"{results_root}/{current_datetime}-{seed}-{data_id}-{model_id}"
 os.mkdir(results_root)
 
 param_dict_json = {k: v for k, v in param_dict.items() if k != 'device'} #  since torch.device is not JSON serializable
@@ -138,42 +138,42 @@
 #         json.dump(metric_dict, f, indent=4)
 
 # ## Exp3: Metric vs Train Fraction (fixed dataset size)
-# print(f"Experiment 3: Metric vs Train Fraction (fixed dataset size)")
-# train_ratio_list = np.arange(1, 10) / 10
-# data_size = 1000
-# for i in tqdm(range(len(train_ratio_list))):
-#     train_ratio = train_ratio_list[i]
-#     param_dict = {
-#         'seed': seed,
-#         'data_id': data_id,
-#         'data_size': data_size,
-#         'train_ratio': train_ratio,
-#         'model_id': model_id,
-#         'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
-#         'embd_dim': embd_dim,
-#         'n_exp': n_exp,
-#         'lr': lr,
-#         'weight_decay':weight_decay
-#     }
-#     print(f"Training model with seed {seed}, data_id {data_id}, model_id {model_id}, n_exp {n_exp}, embd_dim {embd_dim}")
-#     ret_dic = train_single_model(param_dict)
-#     model = ret_dic['model']
-#     dataset = ret_dic['dataset']
+print(f"Experiment 3: Metric vs Train Fraction (fixed dataset size)")
+train_ratio_list = np.arange(1, 10) / 10
+data_size = 1000
+for i in tqdm(range(len(train_ratio_list))):
+    train_ratio = train_ratio_list[i]
+    param_dict = {
+        'seed': seed,
+        'data_id': data_id,
+        'data_size': data_size,
+        'train_ratio': train_ratio,
+        'model_id': model_id,
+        'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
+        'embd_dim': embd_dim,
+        'n_exp': n_exp,
+        'lr': lr,
+        'weight_decay':weight_decay
+    }
+    print(f"Training model with seed {seed}, data_id {data_id}, model_id {model_id}, n_exp {n_exp}, embd_dim {embd_dim}")
+    ret_dic = train_single_model(param_dict)
+    model = ret_dic['model']
+    dataset = ret_dic['dataset']
 
-#     torch.save(model.state_dict(), f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}.pt")
-#     with open(f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}_train_results.json", "w") as f:
-#         json.dump(ret_dic["results"], f, indent=4)
+    torch.save(model.state_dict(), f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}.pt")
+    with open(f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}_train_results.json", "w") as f:
+        json.dump(ret_dic["results"], f, indent=4)
 
-#     if data_id == "family_tree":
-#         aux_info["dict_level"] = dataset['dict_level']
+    if data_id == "family_tree":
+        aux_info["dict_level"] = dataset['dict_level']
     
-#     if hasattr(model.embedding, 'weight'):
-#         metric_dict = crystal_metric(model.embedding.weight.cpu().detach(), data_id, aux_info)
-#     else:
-#         metric_dict = crystal_metric(model.embedding.data.cpu(), data_id, aux_info)
+    if hasattr(model.embedding, 'weight'):
+        metric_dict = crystal_metric(model.embedding.weight.cpu().detach(), data_id, aux_info)
+    else:
+        metric_dict = crystal_metric(model.embedding.data.cpu(), data_id, aux_info)
 
-#     with open(f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}_metric.json", "w") as f:
-#         json.dump(metric_dict, f, indent=4)
+    with open(f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}_metric.json", "w") as f:
+        json.dump(metric_dict, f, indent=4)
 
 ## Exp4: Grokking plot: Run with different seeds
 print(f"Experiment 4: Train with different seeds")
diff --git a/src/utils/driver.py b/src/utils/driver.py
@@ -80,7 +80,7 @@ def train_single_model(param_dict: dict):
         dataset = family_tree_dataset_2(p=127, num=data_size, seed=seed, device=device)
     elif data_id == "equivalence":
         input_token = 2
-        dataset = mod_equiv_dataset(p=50, num=data_size, seed=seed, device=device)
+        dataset = mod_equiv_dataset(p=40, num=data_size, seed=seed, device=device)
     elif data_id == "circle":
         dataset = modular_addition_dataset(p=31, num=data_size, seed=seed, device=device)
     elif data_id=="permutation":
diff --git a/src/utils/visualization.py b/src/utils/visualization.py