Skip to content

Commit c4dac27

Browse files
committed
Updated visualization notebook
1 parent 456ea77 commit c4dac27

File tree

6 files changed

+950
-18
lines changed

6 files changed

+950
-18
lines changed

notebooks/plot_runs.ipynb

Lines changed: 915 additions & 4 deletions
Large diffs are not rendered by default.

scripts/u_circle_new.sh

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#!/bin/bash
2+
#SBATCH -t 16:00:00
3+
#SBATCH -p tegmark
4+
#SBATCH --gres=gpu:a100:1
5+
6+
for ARG in $(python -c "import numpy as np; print(' '.join(map(str, np.linspace(0, 1000, 20, dtype=int))))"); do
7+
echo "Running with seed $ARG:"
8+
python ../src/unit_exp.py --data_id circle --model_id H_transformer --seed $ARG
9+
echo
10+
done

scripts/u_family_new.sh

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/bin/bash
2+
#SBATCH -t 16:00:00
3+
#SBATCH --gres=gpu:a100:1
4+
5+
for ARG in $(python -c "import numpy as np; print(' '.join(map(str, np.linspace(0, 1000, 20, dtype=int))))"); do
6+
echo "Running with seed $ARG:"
7+
python ../src/unit_exp.py --data_id family_tree --model_id H_transformer --seed $ARG
8+
echo
9+
done

src/unit_exp.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,15 +51,15 @@
5151
print(f"Experiment 1: Visualize Embeddings")
5252
model = ret_dic['model']
5353
dataset = ret_dic['dataset']
54-
#torch.save(model.state_dict(), f"../results/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}.pt")
54+
torch.save(model.state_dict(), f"../results/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_d=sqrtembed_1.pt")
5555

5656
if hasattr(model.embedding, 'weight'):
57-
visualize_embedding(model.embedding.weight.cpu(), title=f"{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}", save_path=f"../results/unit_tests/emb_{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}.png", dict_level = dataset['dict_level'] if 'dict_level' in dataset else None)
57+
visualize_embedding(model.embedding.weight.cpu(), title=f"{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}", save_path=f"../results/unit_tests/emb_{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_new.png", dict_level = dataset['dict_level'] if 'dict_level' in dataset else None)
5858
else:
59-
visualize_embedding(model.embedding.data.cpu(), title=f"{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}", save_path=f"../results/unit_tests/emb_{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}.png", dict_level = dataset['dict_level'] if 'dict_level' in dataset else None)
59+
visualize_embedding(model.embedding.data.cpu(), title=f"{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}", save_path=f"../results/unit_tests/emb_{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_new.png", dict_level = dataset['dict_level'] if 'dict_level' in dataset else None)
6060

61-
with open(f"../results/unit_tests/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_train_results.json", "w") as f:
62-
json.dump(ret_dic["results"], f, indent=4)
61+
with open(f"../results/unit_tests/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_train_results_new.json", "w") as f:
62+
json.dump(ret_dic["results"], f, indent=4)
6363

6464
aux_info = {}
6565
if data_id == "lattice":
@@ -80,6 +80,6 @@
8080
else:
8181
metric_dict = crystal_metric(model.embedding.data.cpu(), data_id, aux_info)
8282

83-
with open(f"../results/unit_tests/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}.json", "w") as f:
83+
with open(f"../results/unit_tests/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}_new.json", "w") as f:
8484
json.dump(metric_dict, f, indent=4)
8585

src/utils/driver.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
from src.utils.model import *
55
import os
66

7+
import numpy as np
8+
79
def set_seed(seed: int) -> None:
810
"""
911
Sets the seed to make everything deterministic, for reproducibility of experiments
@@ -85,17 +87,17 @@ def train_single_model(param_dict: dict):
8587
weight_tied = True
8688
hidden_size = 100
8789
shp = [input_token * embd_dim, hidden_size, embd_dim, vocab_size]
88-
model = MLP_HS(shp=shp, vocab_size=vocab_size, embd_dim=embd_dim, input_token=input_token, weight_tied=weight_tied, seed=seed).to(device)
90+
model = MLP_HS(shp=shp, vocab_size=vocab_size, embd_dim=embd_dim, input_token=input_token, weight_tied=weight_tied, seed=seed, n=np.sqrt(embd_dim), init_scale=1).to(device)
8991
elif model_id == "standard_MLP":
9092
unembd = True
9193
weight_tied = True
9294
hidden_size = 100
9395
shp = [input_token * embd_dim, hidden_size, embd_dim, vocab_size]
94-
model = MLP(shp=shp, vocab_size=vocab_size, embd_dim=embd_dim, input_token=input_token, unembd=unembd, weight_tied=weight_tied, seed=seed).to(device)
96+
model = MLP(shp=shp, vocab_size=vocab_size, embd_dim=embd_dim, input_token=input_token, unembd=unembd, weight_tied=weight_tied, seed=seed, init_scale=1).to(device)
9597
elif model_id == "H_transformer":
96-
model = ToyTransformer(vocab_size=vocab_size, d_model=embd_dim, nhead=2, num_layers=2, seq_len=input_token, seed=seed, use_dist_layer=True).to(device)
98+
model = ToyTransformer(vocab_size=vocab_size, d_model=embd_dim, nhead=2, num_layers=2, n_dist=np.sqrt(embd_dim),seq_len=input_token, seed=seed, use_dist_layer=True, init_scale=1).to(device)
9799
elif model_id == "standard_transformer":
98-
model = ToyTransformer(vocab_size=vocab_size, d_model=embd_dim, nhead=2, num_layers=2, seq_len=input_token, seed=seed, use_dist_layer=False).to(device)
100+
model = ToyTransformer(vocab_size=vocab_size, d_model=embd_dim, nhead=2, num_layers=2, seq_len=input_token, seed=seed, use_dist_layer=False, init_scale=1).to(device)
99101
else:
100102
raise ValueError(f"Unknown model_id: {model_id}")
101103

src/utils/model.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ def __init__(self, shp, vocab_size, embd_dim, input_token=2, init_scale=1., weig
212212
linear_list.append(DistLayer(shp[i], shp[i+1], n=n))
213213

214214
self.embedding = nn.Embedding(vocab_size, embd_dim)
215-
nn.init.normal_(self.embedding.weight, mean=0, std=1/np.sqrt(embd_dim))
215+
nn.init.normal_(self.embedding.weight, mean=0, std=1/np.sqrt(embd_dim)*init_scale)
216216
#self.embedding = torch.nn.Parameter(torch.normal(0,1/torch.tensor(embd_dim),size=(vocab_size, embd_dim))*init_scale)
217217
# self.embedding = torch.nn.Parameter(torch.normal(0,1,size=(vocab_size, embd_dim))*init_scale)
218218
self.linears = nn.ModuleList(linear_list)
@@ -256,15 +256,15 @@ def pred_logit(self, x):
256256

257257
# 2-Layer Transformer Model with Explicit Residual Connections
258258
class ToyTransformer(customNNModule):
259-
def __init__(self, vocab_size, d_model, nhead, num_layers, seq_len = 16, use_dist_layer = False, seed=0):
259+
def __init__(self, vocab_size, d_model, nhead, num_layers, seq_len = 16, init_scale=1.,use_dist_layer = False, seed=0, n_dist=1.):
260260
super(ToyTransformer, self).__init__()
261261

262262
torch.manual_seed(seed)
263263
np.random.seed(seed)
264264

265265

266266
self.embedding = nn.Embedding(vocab_size, d_model)
267-
nn.init.normal_(self.embedding.weight, mean=0, std=1/np.sqrt(d_model))
267+
nn.init.normal_(self.embedding.weight, mean=0, std=1/np.sqrt(d_model)*init_scale)
268268
self.positional_encoding = nn.Parameter(torch.randn(seq_len, d_model))
269269

270270
# Define transformer encoder layers
@@ -275,7 +275,7 @@ def __init__(self, vocab_size, d_model, nhead, num_layers, seq_len = 16, use_dis
275275
])
276276
self.use_dist_layer = use_dist_layer
277277
if use_dist_layer:
278-
self.dist = DistLayer(d_model, vocab_size, n=1., eps=1e-4, bias=False)
278+
self.dist = DistLayer(d_model, vocab_size, n=n_dist, eps=1e-4, bias=False)
279279
self.fc = nn.Linear(d_model, vocab_size)
280280
self.vocab_size = vocab_size
281281

0 commit comments

Comments
 (0)