Skip to content

Commit b9fe0c2

Browse files
committed
Commenting experiments to run on freja
1 parent bacc7f3 commit b9fe0c2

File tree

1 file changed

+109
-109
lines changed

1 file changed

+109
-109
lines changed

src/run_exp.py

Lines changed: 109 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -99,81 +99,81 @@
9999
visualize_embedding(model.embedding.data.cpu(), title=f"{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}", save_path=f"{results_root}/emb_{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}.png", dict_level = dataset['dict_level'] if 'dict_level' in dataset else None, color_dict = False if data_id == "permutation" else True, adjust_overlapping_text = False)
100100

101101

102-
## Exp2: Metric vs Overall Dataset Size (fixed train-test split)
103-
print(f"Experiment 2: Metric vs Overall Dataset Size (fixed train-test split)")
104-
data_size_list = [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]
105-
for i in tqdm(range(len(data_size_list))):
106-
data_size = data_size_list[i]
107-
param_dict = {
108-
'seed': seed,
109-
'data_id': data_id,
110-
'data_size': data_size,
111-
'train_ratio': train_ratio,
112-
'model_id': model_id,
113-
'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
114-
'embd_dim': embd_dim,
115-
'n_exp': n_exp,
116-
'lr': lr,
117-
'weight_decay':weight_decay
118-
}
119-
120-
print(f"Training model with seed {seed}, data_id {data_id}, model_id {model_id}, n_exp {n_exp}, embd_dim {embd_dim}")
121-
ret_dic = train_single_model(param_dict)
122-
model = ret_dic['model']
123-
dataset = ret_dic['dataset']
124-
125-
torch.save(model.state_dict(), f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}.pt")
126-
with open(f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}_train_results.json", "w") as f:
127-
json.dump(ret_dic["results"], f, indent=4)
102+
# ## Exp2: Metric vs Overall Dataset Size (fixed train-test split)
103+
# print(f"Experiment 2: Metric vs Overall Dataset Size (fixed train-test split)")
104+
# data_size_list = [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]
105+
# for i in tqdm(range(len(data_size_list))):
106+
# data_size = data_size_list[i]
107+
# param_dict = {
108+
# 'seed': seed,
109+
# 'data_id': data_id,
110+
# 'data_size': data_size,
111+
# 'train_ratio': train_ratio,
112+
# 'model_id': model_id,
113+
# 'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
114+
# 'embd_dim': embd_dim,
115+
# 'n_exp': n_exp,
116+
# 'lr': lr,
117+
# 'weight_decay':weight_decay
118+
# }
119+
120+
# print(f"Training model with seed {seed}, data_id {data_id}, model_id {model_id}, n_exp {n_exp}, embd_dim {embd_dim}")
121+
# ret_dic = train_single_model(param_dict)
122+
# model = ret_dic['model']
123+
# dataset = ret_dic['dataset']
124+
125+
# torch.save(model.state_dict(), f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}.pt")
126+
# with open(f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}_train_results.json", "w") as f:
127+
# json.dump(ret_dic["results"], f, indent=4)
128128

129-
if data_id == "family_tree":
130-
aux_info["dict_level"] = dataset['dict_level']
129+
# if data_id == "family_tree":
130+
# aux_info["dict_level"] = dataset['dict_level']
131131

132-
if hasattr(model.embedding, 'weight'):
133-
metric_dict = crystal_metric(model.embedding.weight.cpu().detach(), data_id, aux_info)
134-
else:
135-
metric_dict = crystal_metric(model.embedding.data.cpu(), data_id, aux_info)
136-
137-
with open(f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}.json", "w") as f:
138-
json.dump(metric_dict, f, indent=4)
139-
140-
## Exp3: Metric vs Train Fraction (fixed dataset size)
141-
print(f"Experiment 3: Metric vs Train Fraction (fixed dataset size)")
142-
train_ratio_list = np.arange(1, 10) / 10
143-
data_size = 1000
144-
for i in tqdm(range(len(train_ratio_list))):
145-
train_ratio = train_ratio_list[i]
146-
param_dict = {
147-
'seed': seed,
148-
'data_id': data_id,
149-
'data_size': data_size,
150-
'train_ratio': train_ratio,
151-
'model_id': model_id,
152-
'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
153-
'embd_dim': embd_dim,
154-
'n_exp': n_exp,
155-
'lr': lr,
156-
'weight_decay':weight_decay
157-
}
158-
print(f"Training model with seed {seed}, data_id {data_id}, model_id {model_id}, n_exp {n_exp}, embd_dim {embd_dim}")
159-
ret_dic = train_single_model(param_dict)
160-
model = ret_dic['model']
161-
dataset = ret_dic['dataset']
162-
163-
torch.save(model.state_dict(), f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}.pt")
164-
with open(f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}_train_results.json", "w") as f:
165-
json.dump(ret_dic["results"], f, indent=4)
166-
167-
if data_id == "family_tree":
168-
aux_info["dict_level"] = dataset['dict_level']
132+
# if hasattr(model.embedding, 'weight'):
133+
# metric_dict = crystal_metric(model.embedding.weight.cpu().detach(), data_id, aux_info)
134+
# else:
135+
# metric_dict = crystal_metric(model.embedding.data.cpu(), data_id, aux_info)
136+
137+
# with open(f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}.json", "w") as f:
138+
# json.dump(metric_dict, f, indent=4)
139+
140+
# ## Exp3: Metric vs Train Fraction (fixed dataset size)
141+
# print(f"Experiment 3: Metric vs Train Fraction (fixed dataset size)")
142+
# train_ratio_list = np.arange(1, 10) / 10
143+
# data_size = 1000
144+
# for i in tqdm(range(len(train_ratio_list))):
145+
# train_ratio = train_ratio_list[i]
146+
# param_dict = {
147+
# 'seed': seed,
148+
# 'data_id': data_id,
149+
# 'data_size': data_size,
150+
# 'train_ratio': train_ratio,
151+
# 'model_id': model_id,
152+
# 'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
153+
# 'embd_dim': embd_dim,
154+
# 'n_exp': n_exp,
155+
# 'lr': lr,
156+
# 'weight_decay':weight_decay
157+
# }
158+
# print(f"Training model with seed {seed}, data_id {data_id}, model_id {model_id}, n_exp {n_exp}, embd_dim {embd_dim}")
159+
# ret_dic = train_single_model(param_dict)
160+
# model = ret_dic['model']
161+
# dataset = ret_dic['dataset']
162+
163+
# torch.save(model.state_dict(), f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}.pt")
164+
# with open(f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}_train_results.json", "w") as f:
165+
# json.dump(ret_dic["results"], f, indent=4)
166+
167+
# if data_id == "family_tree":
168+
# aux_info["dict_level"] = dataset['dict_level']
169169

170-
if hasattr(model.embedding, 'weight'):
171-
metric_dict = crystal_metric(model.embedding.weight.cpu().detach(), data_id, aux_info)
172-
else:
173-
metric_dict = crystal_metric(model.embedding.data.cpu(), data_id, aux_info)
170+
# if hasattr(model.embedding, 'weight'):
171+
# metric_dict = crystal_metric(model.embedding.weight.cpu().detach(), data_id, aux_info)
172+
# else:
173+
# metric_dict = crystal_metric(model.embedding.data.cpu(), data_id, aux_info)
174174

175-
with open(f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}_metric.json", "w") as f:
176-
json.dump(metric_dict, f, indent=4)
175+
# with open(f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}_metric.json", "w") as f:
176+
# json.dump(metric_dict, f, indent=4)
177177

178178
## Exp4: Grokking plot: Run with different seeds
179179
print(f"Experiment 4: Train with different seeds")
@@ -215,43 +215,43 @@
215215
with open(f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}.json", "w") as f:
216216
json.dump(metric_dict, f, indent=4)
217217

218-
#Exp5: N Exponent value plot: Run with different n values, plot test accuracy vs. and explained variance vs.
219-
220-
print(f"Experiment 5: Train with different exponent values")
221-
n_list = np.arange(1, 17, dtype=int)
222-
223-
for i in tqdm(range(len(n_list))):
224-
n_exp = n_list[i]
225-
data_size = 1000
226-
train_ratio = 0.8
227-
228-
param_dict = {
229-
'seed': seed,
230-
'data_id': data_id,
231-
'data_size': data_size,
232-
'train_ratio': train_ratio,
233-
'model_id': model_id,
234-
'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
235-
'embd_dim': embd_dim,
236-
'n_exp': n_exp
237-
}
238-
print(f"Training model with seed {seed}, data_id {data_id}, model_id {model_id}, n_exp {n_exp}, embd_dim {embd_dim}")
218+
# #Exp5: N Exponent value plot: Run with different n values, plot test accuracy vs. and explained variance vs.
219+
220+
# print(f"Experiment 5: Train with different exponent values")
221+
# n_list = np.arange(1, 17, dtype=int)
222+
223+
# for i in tqdm(range(len(n_list))):
224+
# n_exp = n_list[i]
225+
# data_size = 1000
226+
# train_ratio = 0.8
227+
228+
# param_dict = {
229+
# 'seed': seed,
230+
# 'data_id': data_id,
231+
# 'data_size': data_size,
232+
# 'train_ratio': train_ratio,
233+
# 'model_id': model_id,
234+
# 'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
235+
# 'embd_dim': embd_dim,
236+
# 'n_exp': n_exp
237+
# }
238+
# print(f"Training model with seed {seed}, data_id {data_id}, model_id {model_id}, n_exp {n_exp}, embd_dim {embd_dim}")
239239

240-
ret_dic = train_single_model(param_dict)
241-
model = ret_dic['model']
242-
dataset = ret_dic['dataset']
243-
torch.save(model.state_dict(), f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}.pt")
244-
with open(f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}_train_results.json", "w") as f:
245-
json.dump(ret_dic["results"], f, indent=4)
246-
247-
if data_id == "family_tree":
248-
aux_info["dict_level"] = dataset['dict_level']
249-
250-
if hasattr(model.embedding, 'weight'):
251-
metric_dict = crystal_metric(model.embedding.weight.cpu().detach(), data_id, aux_info)
252-
else:
253-
metric_dict = crystal_metric(model.embedding.data.cpu(), data_id, aux_info)
254-
255-
with open(f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}.json", "w") as f:
256-
json.dump(metric_dict, f, indent=4)
240+
# ret_dic = train_single_model(param_dict)
241+
# model = ret_dic['model']
242+
# dataset = ret_dic['dataset']
243+
# torch.save(model.state_dict(), f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}.pt")
244+
# with open(f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}_train_results.json", "w") as f:
245+
# json.dump(ret_dic["results"], f, indent=4)
246+
247+
# if data_id == "family_tree":
248+
# aux_info["dict_level"] = dataset['dict_level']
249+
250+
# if hasattr(model.embedding, 'weight'):
251+
# metric_dict = crystal_metric(model.embedding.weight.cpu().detach(), data_id, aux_info)
252+
# else:
253+
# metric_dict = crystal_metric(model.embedding.data.cpu(), data_id, aux_info)
254+
255+
# with open(f"{results_root}/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_{n_exp}.json", "w") as f:
256+
# json.dump(metric_dict, f, indent=4)
257257

0 commit comments

Comments
 (0)