Skip to content

Commit 3ccb489

Browse files
committed
Minor tweaks
1 parent f1e99d3 commit 3ccb489

File tree

5 files changed

+25
-17
lines changed

5 files changed

+25
-17
lines changed

scripts/u_circle.sh

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
#!/bin/bash
22
#SBATCH -t 16:00:00
3-
#SBATCH --gres=gpu:1
3+
#SBATCH --gres=gpu:a100:1
44
#SBATCH -n 16
55

6-
python ../src/unit_exp.py --data_id circle --model_id standard_transformer
7-
python ../src/unit_exp.py --data_id circle --model_id H_transformer
8-
python ../src/unit_exp.py --data_id circle --model_id standard_MLP
9-
python ../src/unit_exp.py --data_id circle --model_id H_MLP
10-
6+
for ARG in $(python -c "import numpy as np; print(' '.join(map(str, np.linspace(49, 100, 20, dtype=int))))"); do
7+
echo "Running with seed $ARG:"
8+
python ../src/unit_exp.py --data_id circle --model_id H_transformer --seed $ARG
9+
echo
10+
done

src/run_exp.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
model_id_choices = ["H_MLP", "standard_MLP", "H_transformer", "standard_transformer"]
2020
if __name__ == '__main__':
2121
parser = argparse.ArgumentParser(description='Experiment')
22-
parser.add_argument('--seed', type=int, default=29, help='random seed')
22+
parser.add_argument('--seed', type=int, default=49, help='random seed')
2323
parser.add_argument('--data_id', type=str, required=True, choices=data_id_choices, help='Data ID')
2424
parser.add_argument('--model_id', type=str, required=True, choices=model_id_choices, help='Model ID')
2525

@@ -74,7 +74,7 @@
7474

7575
## Exp2: Metric vs Overall Dataset Size (fixed train-test split)
7676
print(f"Experiment 2: Metric vs Overall Dataset Size (fixed train-test split)")
77-
data_size_list = [400, 500, 600, 700, 800, 900, 1000, 1100, 1200]
77+
data_size_list = [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]
7878
for i in tqdm(range(len(data_size_list))):
7979
data_size = data_size_list[i]
8080
param_dict = {

src/unit_exp.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
print(f"Experiment 1: Visualize Embeddings")
5252
model = ret_dic['model']
5353
dataset = ret_dic['dataset']
54-
torch.save(model.state_dict(), f"../results/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}.pt")
54+
#torch.save(model.state_dict(), f"../results/{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}.pt")
5555

5656
if hasattr(model.embedding, 'weight'):
5757
visualize_embedding(model.embedding.weight.cpu(), title=f"{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}", save_path=f"../results/unit_tests/emb_{seed}_{data_id}_{model_id}_{data_size}_{train_ratio}.png", dict_level = dataset['dict_level'] if 'dict_level' in dataset else None)
@@ -71,7 +71,7 @@
7171
elif data_id == "equivalence":
7272
aux_info["mod"] = 5
7373
elif data_id == "circle":
74-
aux_info["p"] = 31
74+
aux_info["p"] = 17
7575
else:
7676
raise ValueError(f"Unknown data_id: {data_id}")
7777

src/utils/driver.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -91,21 +91,21 @@ def train_single_model(param_dict: dict):
9191
shp = [input_token * embd_dim, hidden_size, embd_dim, vocab_size]
9292
model = MLP(shp=shp, vocab_size=vocab_size, embd_dim=embd_dim, input_token=input_token, unembd=unembd, weight_tied=weight_tied, seed=seed).to(device)
9393
elif model_id == "H_transformer":
94-
model = ToyTransformer(vocab_size=vocab_size, d_model=embd_dim, nhead=8, num_layers=1, seq_len=input_token, use_dist_layer=True).to(device)
94+
model = ToyTransformer(vocab_size=vocab_size, d_model=embd_dim, nhead=2, num_layers=2, seq_len=input_token, seed=seed, use_dist_layer=True).to(device)
9595
elif model_id == "standard_transformer":
96-
model = ToyTransformer(vocab_size=vocab_size, d_model=embd_dim, nhead=8, num_layers=1, seq_len=input_token, use_dist_layer=False).to(device)
96+
model = ToyTransformer(vocab_size=vocab_size, d_model=embd_dim, nhead=2, num_layers=2, seq_len=input_token, seed=seed, use_dist_layer=False).to(device)
9797
else:
9898
raise ValueError(f"Unknown model_id: {model_id}")
9999

100100
# define dataloader
101-
batch_size = 16
101+
batch_size = 32
102102
train_dataset = ToyDataset(dataset['train_data_id'], dataset['train_label'])
103103
test_dataset = ToyDataset(dataset['test_data_id'], dataset['test_label'])
104104
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
105105
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
106106

107107
ret_dic = {}
108-
ret_dic["results"] = model.train(param_dict={'num_epochs': 4000, 'learning_rate': 0.001, 'train_dataloader': train_dataloader, 'test_dataloader': test_dataloader, 'device': device})
108+
ret_dic["results"] = model.train(param_dict={'num_epochs': 7000, 'learning_rate': 0.002, 'train_dataloader': train_dataloader, 'test_dataloader': test_dataloader, 'device': device})
109109
ret_dic["model"] = model
110110
ret_dic["dataset"] = dataset
111111

src/utils/model.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
import numpy as np
66
import math
77

8+
import sys
9+
810
from tqdm import tqdm
911

1012
class customNNModule(nn.Module):
@@ -34,7 +36,7 @@ def train(self, param_dict: dict):
3436
counter = 0
3537

3638
optimizer = optim.AdamW(self.parameters(), lr=learning_rate, weight_decay=0.01)
37-
lamb_reg = 0.1
39+
lamb_reg = 0.01
3840
for epoch in tqdm(range(num_epochs)):
3941
train_loss = 0
4042
train_correct = 0
@@ -86,6 +88,7 @@ def train(self, param_dict: dict):
8688

8789
if (epoch + 1) % 50 == 0 and verbose:
8890
print(f"Epoch {epoch + 1}/{num_epochs}, Train Loss: {train_loss / len(train_dataloader):.4f}, Train Acc: {train_correct / train_total:.4f}, Test Loss: {test_loss / len(test_dataloader):.4f}, Test Acc: {test_correct / test_total:.4f}")
91+
sys.stdout.flush()
8992

9093
train_losses.append(train_loss / len(train_dataloader))
9194
test_losses.append(test_loss / len(test_dataloader))
@@ -252,16 +255,21 @@ def pred_logit(self, x):
252255

253256
# 2-Layer Transformer Model with Explicit Residual Connections
254257
class ToyTransformer(customNNModule):
255-
def __init__(self, vocab_size, d_model, nhead, num_layers, seq_len = 16, use_dist_layer = False):
258+
def __init__(self, vocab_size, d_model, nhead, num_layers, seq_len = 16, use_dist_layer = False, seed=0):
256259
super(ToyTransformer, self).__init__()
260+
261+
torch.manual_seed(seed)
262+
np.random.seed(seed)
263+
264+
257265
self.embedding = nn.Embedding(vocab_size, d_model)
258266
nn.init.normal_(self.embedding.weight, mean=0, std=1/np.sqrt(d_model))
259267
self.positional_encoding = nn.Parameter(torch.randn(seq_len, d_model))
260268

261269
# Define transformer encoder layers
262270
self.layers = nn.ModuleList([
263271
nn.TransformerEncoderLayer(
264-
d_model=d_model, nhead=nhead, dim_feedforward=64, batch_first=True
272+
d_model=d_model, nhead=nhead, dim_feedforward=d_model*4, batch_first=True
265273
) for _ in range(num_layers)
266274
])
267275
self.use_dist_layer = use_dist_layer

0 commit comments

Comments
 (0)