-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtraining.py
More file actions
34 lines (29 loc) · 1.12 KB
/
training.py
File metadata and controls
34 lines (29 loc) · 1.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
""" Tiny Word Embedding Trainer (inputs-only update) """
import numpy as np
# --- Vocabulary and Corpus ---
vocab = ["apple", "mango", "orange", "fruit", "broccoli", "potato", "spinach", "vegetable"]
pairs = [
("apple", "fruit"),
("mango", "fruit"),
("orange", "fruit"),
("broccoli", "vegetable"),
("potato", "vegetable"),
("spinach", "vegetable")
]
# --- Hyperparameters ---
V = len(vocab) # vocabulary size
N = 2 # embedding dimensions
epochs = 20 # training epochs
lr = 0.05 # learning rate
# --- Initialize embeddings randomly ---
one_hot = np.diag(np.ones(V)) # one-hot vectors
W = np.random.uniform(-1, 1, (V, N))
# --- Training Loop ---
for epoch in range(epochs):
for w1, w2 in pairs:
i, j = vocab.index(w1), vocab.index(w2)
h = W[i] # embedding of input word: matmul(one_hot[i],W)
z = np.matmul(h, W.T) # raw scores for all vocab words
error = z - one_hot[j] # simple prediction error
W[i] -= lr * np.matmul(W.T, error) # update only input embedding
print(W) # final embeddings