|
35 | 35 |
|
36 | 36 | data_size = 1000 |
37 | 37 | train_ratio = 0.8 |
38 | | -embd_dim = 10 |
| 38 | +embd_dim = 16 |
39 | 39 |
|
40 | 40 | lr = 0.002 |
41 | 41 | weight_decay = 0.01 |
|
58 | 58 | results_root = "../results" |
59 | 59 |
|
60 | 60 | current_datetime = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") |
61 | | -results_root = f"{results_root}/{seed}-{data_id}-{model_id}" |
| 61 | +results_root = f"{results_root}/{current_datetime}-{seed}-{data_id}-{model_id}" |
62 | 62 | os.mkdir(results_root) |
63 | 63 |
|
64 | 64 | param_dict_json = {k: v for k, v in param_dict.items() if k != 'device'} # since torch.device is not JSON serializable |
|
138 | 138 | # json.dump(metric_dict, f, indent=4) |
139 | 139 |
|
140 | 140 | # ## Exp3: Metric vs Train Fraction (fixed dataset size) |
141 | | -# print(f"Experiment 3: Metric vs Train Fraction (fixed dataset size)") |
142 | | -# train_ratio_list = np.arange(1, 10) / 10 |
143 | | -# data_size = 1000 |
144 | | -# for i in tqdm(range(len(train_ratio_list))): |
145 | | -# train_ratio = train_ratio_list[i] |
146 | | -# param_dict = { |
147 | | -# 'seed': seed, |
148 | | -# 'data_id': data_id, |
149 | | -# 'data_size': data_size, |
150 | | -# 'train_ratio': train_ratio, |
151 | | -# 'model_id': model_id, |
152 | | -# 'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu'), |
153 | | -# 'embd_dim': embd_dim, |
154 | | -# 'n_exp': n_exp, |
155 | | -# 'lr': lr, |
156 | | -# 'weight_decay':weight_decay |
157 | | -# } |
158 | | -# print(f"Training model with seed {seed}, data_id {data_id}, model_id {model_id}, n_exp {n_exp}, embd_dim {embd_dim}") |
159 | | -# ret_dic = train_single_model(param_dict) |
160 | | -# model = ret_dic['model'] |
161 | | -# dataset = ret_dic['dataset'] |
| 141 | +print(f"Experiment 3: Metric vs Train Fraction (fixed dataset size)") |
| 142 | +train_ratio_list = np.arange(1, 10) / 10 |
| 143 | +data_size = 1000 |
| 144 | +for i in tqdm(range(len(train_ratio_list))): |
| 145 | + train_ratio = train_ratio_list[i] |
| 146 | + param_dict = { |
| 147 | + 'seed': seed, |
| 148 | + 'data_id': data_id, |
| 149 | + 'data_size': data_size, |
| 150 | + 'train_ratio': train_ratio, |
| 151 | + 'model_id': model_id, |
| 152 | + 'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu'), |
| 153 | + 'embd_dim': embd_dim, |
| 154 | + 'n_exp': n_exp, |
| 155 | + 'lr': lr, |
| 156 | + 'weight_decay':weight_decay |
| 157 | + } |
| 158 | + print(f"Training model with seed {seed}, data_id {data_id}, model_id {model_id}, n_exp {n_exp}, embd_dim {embd_dim}") |
| 159 | + ret_dic = train_single_model(param_dict) |
| 160 | + model = ret_dic['model'] |
| 161 | + dataset = ret_dic['dataset'] |
162 | 162 |
|
163 | | -# torch.save(model.state_dict(), f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}.pt") |
164 | | -# with open(f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}_train_results.json", "w") as f: |
165 | | -# json.dump(ret_dic["results"], f, indent=4) |
| 163 | + torch.save(model.state_dict(), f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}.pt") |
| 164 | + with open(f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}_train_results.json", "w") as f: |
| 165 | + json.dump(ret_dic["results"], f, indent=4) |
166 | 166 |
|
167 | | -# if data_id == "family_tree": |
168 | | -# aux_info["dict_level"] = dataset['dict_level'] |
| 167 | + if data_id == "family_tree": |
| 168 | + aux_info["dict_level"] = dataset['dict_level'] |
169 | 169 |
|
170 | | -# if hasattr(model.embedding, 'weight'): |
171 | | -# metric_dict = crystal_metric(model.embedding.weight.cpu().detach(), data_id, aux_info) |
172 | | -# else: |
173 | | -# metric_dict = crystal_metric(model.embedding.data.cpu(), data_id, aux_info) |
| 170 | + if hasattr(model.embedding, 'weight'): |
| 171 | + metric_dict = crystal_metric(model.embedding.weight.cpu().detach(), data_id, aux_info) |
| 172 | + else: |
| 173 | + metric_dict = crystal_metric(model.embedding.data.cpu(), data_id, aux_info) |
174 | 174 |
|
175 | | -# with open(f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}_metric.json", "w") as f: |
176 | | -# json.dump(metric_dict, f, indent=4) |
| 175 | + with open(f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}_metric.json", "w") as f: |
| 176 | + json.dump(metric_dict, f, indent=4) |
177 | 177 |
|
178 | 178 | ## Exp4: Grokking plot: Run with different seeds |
179 | 179 | print(f"Experiment 4: Train with different seeds") |
|
0 commit comments