forked from karpathy/nanochat
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvalohai.yaml
More file actions
113 lines (107 loc) · 3.44 KB
/
valohai.yaml
File metadata and controls
113 lines (107 loc) · 3.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
- step:
name: runcpu
image: docker.io/valohai/nanochat:0.0.2
environment: aws-eu-west-1-c5-24xlarge
inputs:
- name: eval-bundle
default: https://karpathy-public.s3.us-west-2.amazonaws.com/eval_bundle.zip
command:
- uv venv
- uv sync --extra cpu
- source .venv/bin/activate
- export NANOCHAT_BASE_DIR="/valohai/outputs/"
# - bash speedrun.sh
- python -m nanochat.report reset
# train tokenizer on ~1B characters
- python -m nanochat.dataset -n 4
- python -m scripts.tok_train --max_chars=1000000000
- python -m scripts.tok_eval
# train a very small 4 layer model on the CPU
# each optimization step processes a single sequence of 1024 tokens
# we only run 50 steps of optimization (bump this to get better results)
- python -m scripts.base_train {parameters}
- python -m scripts.base_loss --device_batch_size=1 --split_tokens=4096
- python -m scripts.base_eval --max-per-task=16
# midtraining
- python -m scripts.mid_train --max_seq_len=1024 --device_batch_size=1 --eval_every=50 --eval_tokens=4096 --total_batch_size=1024 --num_iterations=100
# eval results will be terrible, this is just to execute the code paths.
# note that we lower the execution memory limit to 1MB to avoid warnings on smaller systems
- python -m scripts.chat_eval --source=mid --max-new-tokens=128 --max-problems=20
# SFT
- python -m scripts.chat_sft --device_batch_size=1 --target_examples_per_step=4 --num_iterations=100 --eval_steps=4 --eval_metrics_max_problems=16
# Chat CLI
# python -m scripts.chat_cli -p "Why is the sky blue?"
# Chat Web
# python -m scripts.chat_web
- python -m nanochat.report generate
parameters:
- name: depth
type: integer
default: 4
- name: max_seq_len
type: integer
default: 1024
- name: device_batch_size
type: integer
default: 1
- name: total_batch_size
type: integer
default: 1024
- name: eval_every
type: integer
default: 50
- name: eval_tokens
type: integer
default: 4096
- name: core_metric_every
type: integer
default: 50
- name: core_metric_max_per_task
type: integer
default: 12
- name: sample_every
type: integer
default: 50
- name: num_iterations
type: integer
default: 50
- step:
name: runcpu
image: docker.io/valohai/nanochat:0.0.2
environment: aws-eu-west-1-c5-24xlarge
inputs:
- name: eval-bundle
default: https://karpathy-public.s3.us-west-2.amazonaws.com/eval_bundle.zip
command:
- bash ./speedrun.sh
parameters:
- name: depth
type: integer
default: 4
- name: max_seq_len
type: integer
default: 1024
- name: device_batch_size
type: integer
default: 1
- name: total_batch_size
type: integer
default: 1024
- name: eval_every
type: integer
default: 50
- name: eval_tokens
type: integer
default: 4096
- name: core_metric_every
type: integer
default: 50
- name: core_metric_max_per_task
type: integer
default: 12
- name: sample_every
type: integer
default: 50
- name: num_iterations
type: integer
default: 50