|
99 | 99 | visualize_embedding(model.embedding.data.cpu(), title=f"{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}", save_path=f"{results_root}/emb_{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}.png", dict_level = dataset['dict_level'] if 'dict_level' in dataset else None, color_dict = False if data_id == "permutation" else True, adjust_overlapping_text = False) |
100 | 100 |
|
101 | 101 |
|
102 | | -## Exp2: Metric vs Overall Dataset Size (fixed train-test split) |
103 | | -print(f"Experiment 2: Metric vs Overall Dataset Size (fixed train-test split)") |
104 | | -data_size_list = [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000] |
105 | | -for i in tqdm(range(len(data_size_list))): |
106 | | - data_size = data_size_list[i] |
107 | | - param_dict = { |
108 | | - 'seed': seed, |
109 | | - 'data_id': data_id, |
110 | | - 'data_size': data_size, |
111 | | - 'train_ratio': train_ratio, |
112 | | - 'model_id': model_id, |
113 | | - 'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu'), |
114 | | - 'embd_dim': embd_dim, |
115 | | - 'n_exp': n_exp, |
116 | | - 'lr': lr, |
117 | | - 'weight_decay':weight_decay |
118 | | - } |
119 | | - |
120 | | - print(f"Training model with seed {seed}, data_id {data_id}, model_id {model_id}, n_exp {n_exp}, embd_dim {embd_dim}") |
121 | | - ret_dic = train_single_model(param_dict) |
122 | | - model = ret_dic['model'] |
123 | | - dataset = ret_dic['dataset'] |
124 | | - |
125 | | - torch.save(model.state_dict(), f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}.pt") |
126 | | - with open(f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}_train_results.json", "w") as f: |
127 | | - json.dump(ret_dic["results"], f, indent=4) |
| 102 | +# ## Exp2: Metric vs Overall Dataset Size (fixed train-test split) |
| 103 | +# print(f"Experiment 2: Metric vs Overall Dataset Size (fixed train-test split)") |
| 104 | +# data_size_list = [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000] |
| 105 | +# for i in tqdm(range(len(data_size_list))): |
| 106 | +# data_size = data_size_list[i] |
| 107 | +# param_dict = { |
| 108 | +# 'seed': seed, |
| 109 | +# 'data_id': data_id, |
| 110 | +# 'data_size': data_size, |
| 111 | +# 'train_ratio': train_ratio, |
| 112 | +# 'model_id': model_id, |
| 113 | +# 'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu'), |
| 114 | +# 'embd_dim': embd_dim, |
| 115 | +# 'n_exp': n_exp, |
| 116 | +# 'lr': lr, |
| 117 | +# 'weight_decay':weight_decay |
| 118 | +# } |
| 119 | + |
| 120 | +# print(f"Training model with seed {seed}, data_id {data_id}, model_id {model_id}, n_exp {n_exp}, embd_dim {embd_dim}") |
| 121 | +# ret_dic = train_single_model(param_dict) |
| 122 | +# model = ret_dic['model'] |
| 123 | +# dataset = ret_dic['dataset'] |
| 124 | + |
| 125 | +# torch.save(model.state_dict(), f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}.pt") |
| 126 | +# with open(f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}_train_results.json", "w") as f: |
| 127 | +# json.dump(ret_dic["results"], f, indent=4) |
128 | 128 |
|
129 | | - if data_id == "family_tree": |
130 | | - aux_info["dict_level"] = dataset['dict_level'] |
| 129 | +# if data_id == "family_tree": |
| 130 | +# aux_info["dict_level"] = dataset['dict_level'] |
131 | 131 |
|
132 | | - if hasattr(model.embedding, 'weight'): |
133 | | - metric_dict = crystal_metric(model.embedding.weight.cpu().detach(), data_id, aux_info) |
134 | | - else: |
135 | | - metric_dict = crystal_metric(model.embedding.data.cpu(), data_id, aux_info) |
136 | | - |
137 | | - with open(f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}.json", "w") as f: |
138 | | - json.dump(metric_dict, f, indent=4) |
139 | | - |
140 | | -## Exp3: Metric vs Train Fraction (fixed dataset size) |
141 | | -print(f"Experiment 3: Metric vs Train Fraction (fixed dataset size)") |
142 | | -train_ratio_list = np.arange(1, 10) / 10 |
143 | | -data_size = 1000 |
144 | | -for i in tqdm(range(len(train_ratio_list))): |
145 | | - train_ratio = train_ratio_list[i] |
146 | | - param_dict = { |
147 | | - 'seed': seed, |
148 | | - 'data_id': data_id, |
149 | | - 'data_size': data_size, |
150 | | - 'train_ratio': train_ratio, |
151 | | - 'model_id': model_id, |
152 | | - 'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu'), |
153 | | - 'embd_dim': embd_dim, |
154 | | - 'n_exp': n_exp, |
155 | | - 'lr': lr, |
156 | | - 'weight_decay':weight_decay |
157 | | - } |
158 | | - print(f"Training model with seed {seed}, data_id {data_id}, model_id {model_id}, n_exp {n_exp}, embd_dim {embd_dim}") |
159 | | - ret_dic = train_single_model(param_dict) |
160 | | - model = ret_dic['model'] |
161 | | - dataset = ret_dic['dataset'] |
162 | | - |
163 | | - torch.save(model.state_dict(), f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}.pt") |
164 | | - with open(f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}_train_results.json", "w") as f: |
165 | | - json.dump(ret_dic["results"], f, indent=4) |
166 | | - |
167 | | - if data_id == "family_tree": |
168 | | - aux_info["dict_level"] = dataset['dict_level'] |
| 132 | +# if hasattr(model.embedding, 'weight'): |
| 133 | +# metric_dict = crystal_metric(model.embedding.weight.cpu().detach(), data_id, aux_info) |
| 134 | +# else: |
| 135 | +# metric_dict = crystal_metric(model.embedding.data.cpu(), data_id, aux_info) |
| 136 | + |
| 137 | +# with open(f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}.json", "w") as f: |
| 138 | +# json.dump(metric_dict, f, indent=4) |
| 139 | + |
| 140 | +# ## Exp3: Metric vs Train Fraction (fixed dataset size) |
| 141 | +# print(f"Experiment 3: Metric vs Train Fraction (fixed dataset size)") |
| 142 | +# train_ratio_list = np.arange(1, 10) / 10 |
| 143 | +# data_size = 1000 |
| 144 | +# for i in tqdm(range(len(train_ratio_list))): |
| 145 | +# train_ratio = train_ratio_list[i] |
| 146 | +# param_dict = { |
| 147 | +# 'seed': seed, |
| 148 | +# 'data_id': data_id, |
| 149 | +# 'data_size': data_size, |
| 150 | +# 'train_ratio': train_ratio, |
| 151 | +# 'model_id': model_id, |
| 152 | +# 'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu'), |
| 153 | +# 'embd_dim': embd_dim, |
| 154 | +# 'n_exp': n_exp, |
| 155 | +# 'lr': lr, |
| 156 | +# 'weight_decay':weight_decay |
| 157 | +# } |
| 158 | +# print(f"Training model with seed {seed}, data_id {data_id}, model_id {model_id}, n_exp {n_exp}, embd_dim {embd_dim}") |
| 159 | +# ret_dic = train_single_model(param_dict) |
| 160 | +# model = ret_dic['model'] |
| 161 | +# dataset = ret_dic['dataset'] |
| 162 | + |
| 163 | +# torch.save(model.state_dict(), f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}.pt") |
| 164 | +# with open(f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}_train_results.json", "w") as f: |
| 165 | +# json.dump(ret_dic["results"], f, indent=4) |
| 166 | + |
| 167 | +# if data_id == "family_tree": |
| 168 | +# aux_info["dict_level"] = dataset['dict_level'] |
169 | 169 |
|
170 | | - if hasattr(model.embedding, 'weight'): |
171 | | - metric_dict = crystal_metric(model.embedding.weight.cpu().detach(), data_id, aux_info) |
172 | | - else: |
173 | | - metric_dict = crystal_metric(model.embedding.data.cpu(), data_id, aux_info) |
| 170 | +# if hasattr(model.embedding, 'weight'): |
| 171 | +# metric_dict = crystal_metric(model.embedding.weight.cpu().detach(), data_id, aux_info) |
| 172 | +# else: |
| 173 | +# metric_dict = crystal_metric(model.embedding.data.cpu(), data_id, aux_info) |
174 | 174 |
|
175 | | - with open(f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}_metric.json", "w") as f: |
176 | | - json.dump(metric_dict, f, indent=4) |
| 175 | +# with open(f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}_metric.json", "w") as f: |
| 176 | +# json.dump(metric_dict, f, indent=4) |
177 | 177 |
|
178 | 178 | ## Exp4: Grokking plot: Run with different seeds |
179 | 179 | print(f"Experiment 4: Train with different seeds") |
|
215 | 215 | with open(f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}.json", "w") as f: |
216 | 216 | json.dump(metric_dict, f, indent=4) |
217 | 217 |
|
218 | | -#Exp5: N Exponent value plot: Run with different n values, plot test accuracy vs. and explained variance vs. |
219 | | - |
220 | | -print(f"Experiment 5: Train with different exponent values") |
221 | | -n_list = np.arange(1, 17, dtype=int) |
222 | | - |
223 | | -for i in tqdm(range(len(n_list))): |
224 | | - n_exp = n_list[i] |
225 | | - data_size = 1000 |
226 | | - train_ratio = 0.8 |
227 | | - |
228 | | - param_dict = { |
229 | | - 'seed': seed, |
230 | | - 'data_id': data_id, |
231 | | - 'data_size': data_size, |
232 | | - 'train_ratio': train_ratio, |
233 | | - 'model_id': model_id, |
234 | | - 'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu'), |
235 | | - 'embd_dim': embd_dim, |
236 | | - 'n_exp': n_exp |
237 | | - } |
238 | | - print(f"Training model with seed {seed}, data_id {data_id}, model_id {model_id}, n_exp {n_exp}, embd_dim {embd_dim}") |
| 218 | +# #Exp5: N Exponent value plot: Run with different n values, plot test accuracy vs. and explained variance vs. |
| 219 | + |
| 220 | +# print(f"Experiment 5: Train with different exponent values") |
| 221 | +# n_list = np.arange(1, 17, dtype=int) |
| 222 | + |
| 223 | +# for i in tqdm(range(len(n_list))): |
| 224 | +# n_exp = n_list[i] |
| 225 | +# data_size = 1000 |
| 226 | +# train_ratio = 0.8 |
| 227 | + |
| 228 | +# param_dict = { |
| 229 | +# 'seed': seed, |
| 230 | +# 'data_id': data_id, |
| 231 | +# 'data_size': data_size, |
| 232 | +# 'train_ratio': train_ratio, |
| 233 | +# 'model_id': model_id, |
| 234 | +# 'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu'), |
| 235 | +# 'embd_dim': embd_dim, |
| 236 | +# 'n_exp': n_exp |
| 237 | +# } |
| 238 | +# print(f"Training model with seed {seed}, data_id {data_id}, model_id {model_id}, n_exp {n_exp}, embd_dim {embd_dim}") |
239 | 239 |
|
240 | | - ret_dic = train_single_model(param_dict) |
241 | | - model = ret_dic['model'] |
242 | | - dataset = ret_dic['dataset'] |
243 | | - torch.save(model.state_dict(), f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}.pt") |
244 | | - with open(f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}_train_results.json", "w") as f: |
245 | | - json.dump(ret_dic["results"], f, indent=4) |
246 | | - |
247 | | - if data_id == "family_tree": |
248 | | - aux_info["dict_level"] = dataset['dict_level'] |
249 | | - |
250 | | - if hasattr(model.embedding, 'weight'): |
251 | | - metric_dict = crystal_metric(model.embedding.weight.cpu().detach(), data_id, aux_info) |
252 | | - else: |
253 | | - metric_dict = crystal_metric(model.embedding.data.cpu(), data_id, aux_info) |
254 | | - |
255 | | - with open(f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}.json", "w") as f: |
256 | | - json.dump(metric_dict, f, indent=4) |
| 240 | +# ret_dic = train_single_model(param_dict) |
| 241 | +# model = ret_dic['model'] |
| 242 | +# dataset = ret_dic['dataset'] |
| 243 | +# torch.save(model.state_dict(), f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}.pt") |
| 244 | +# with open(f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}_train_results.json", "w") as f: |
| 245 | +# json.dump(ret_dic["results"], f, indent=4) |
| 246 | + |
| 247 | +# if data_id == "family_tree": |
| 248 | +# aux_info["dict_level"] = dataset['dict_level'] |
| 249 | + |
| 250 | +# if hasattr(model.embedding, 'weight'): |
| 251 | +# metric_dict = crystal_metric(model.embedding.weight.cpu().detach(), data_id, aux_info) |
| 252 | +# else: |
| 253 | +# metric_dict = crystal_metric(model.embedding.data.cpu(), data_id, aux_info) |
| 254 | + |
| 255 | +# with open(f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}.json", "w") as f: |
| 256 | +# json.dump(metric_dict, f, indent=4) |
257 | 257 |
|
0 commit comments