Skip to content

Commit 695a7ee

Browse files
committed
update ctr config
1 parent 02b8e0b commit 695a7ee

File tree

10 files changed

+123
-66
lines changed

10 files changed

+123
-66
lines changed

docker/local.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ shift
3737

3838
port=8000
3939
bin="muli/parameter-server /build/ps"
40-
bin_v="-v /home/muli/work/ps/build:/build"
40+
# bin_v="-v /home/muli/work/ps/build:/build"
4141
app_v="-v $app:/app.conf"
4242
data_v="-v $data:/data -v $model:/model"
4343
mount="$bin_v $app_v $data_v"

docker/rm_local.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
#!/bin/bash
2+
docker rm -f $(docker ps -a -q)

example/linear/README.org

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,26 @@
1-
Examples run applications on data RCV1.
1+
* Sample configurations to run linear method
22

3-
First, download the dataset by =./rcv1_small.sh= or =./rcv1_large.sh=
3+
** Data
44

5-
Then we can run the system by either building the binary or downloading a docker images.
5+
Use the script such as =rcv1/download.sh= and =ctr/download.sh= to download data
66

7-
*Run in local*
7+
** Run in local machine
8+
9+
The system can be run by either building the binary or downloading a docker images.
10+
11+
*by build binary*
812

913
Use the binary you compiled to run l1-regularized logistic regression:
1014

1115
#+BEGIN_SRC bash
12-
# block coordinate descent with 1 server and 4 workers:
13-
../../../script/local.sh ../../../build/ps 1 4 -app_file batch_l1lr.conf
14-
# online gradient descent with 1 server and 4 workers:
15-
../../../script/local.sh ../../../build/ps 1 4 -app_file online_l1lr.conf
16-
# test the trained models
17-
../../../script/local.sh ../../../build/ps 0 0 -app_file eval_batch.conf
18-
../../../script/local.sh ../../../build/ps 0 0 -app_file eval_online.conf
16+
# run block coordinate descent with 2 servers and 2 workers:
17+
../../script/local.sh ../../build/ps 2 2 -app_file ctr/batch_l1lr.conf
18+
# evaluate the model
19+
../../script/local.sh ../../build/ps 0 0 -app_file ctr/eval_batch.conf
1920
#+END_SRC
2021

21-
Or run the same application by [[www.docker.com][docker:]]
22+
*by [[www.docker.com][docker]]*
2223

2324
#+BEGIN_SRC bash
24-
sudo ../../../docker/local.sh 1 1 batch_l1lr.conf data model
25+
sudo ../../docker/local.sh 2 2 ctr/batch_l1lr.conf data model
2526
#+END_SRC

example/linear/ctr/batch_l1lr.conf

Lines changed: 21 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,19 @@
1-
#j configuration to run l1-regularized logistic regression on the ctr dataset
21
linear_method {
32

43
training_data {
54
format: TEXT
65
text: SPARSE_BINARY
7-
# file: "data/train/part.*"
8-
file: "/home/muli/work/data/ctra/train/part.*"
9-
}
6+
file: "data/ctr/train/part.*"
107

11-
# training_data {
12-
# format: TEXT
13-
# text: ADFEA
14-
# max_num_files_per_worker: 10
15-
# # file: "/user/muli/ctrb/part.*"
16-
# # hdfs {
17-
# # # "which hadoop" returns /usr/bin/hadoop
18-
# # home: "/usr"
19-
# # }
20-
# file: "/home/muli/work/data/ctrd/part.*"
8+
# If the data is placed on hdfs and HADOOP_HOME="/usr"
9+
# hdfs {
10+
# home: "/usr"
2111
# }
12+
}
2213

2314
model_output {
2415
format: TEXT
25-
file: "model/ctr_batch_l1lr"
16+
file: "model/ctr_batch"
2617
}
2718

2819
loss {
@@ -52,52 +43,57 @@ epsilon : 2e-5
5243
# blocks. A larger ratio often accelerate the convergence, however, it may slow
5344
# down the system performance because of the increased number of global barriers.
5445
feature_block_ratio : 4
46+
5547
# The maximal number of blocks can be updating in parallel (bounded-delay
5648
# consistency). A larger delay may slow down the convergence rate, but improves
5749
# the system performance.
5850
max_block_delay: 8
5951

6052
# important feature groups, update them earlier to get a better model
6153
# initialization.
62-
prior_fea_group: 127 # the bias feature (all one)
63-
prior_fea_group: 120 # the position rank feature
54+
prior_fea_group: 127
55+
prior_fea_group: 120
6456

6557
# features which occurs <= *tail_feature_freq* will be filtered before
6658
# training. it save both memory and bandwidth.
67-
tail_feature_freq: 10
59+
tail_feature_freq: 4
60+
6861
# It controls the countmin size. We filter the tail features by countmin, which
6962
# is more efficient than hash, but still is the memory bottleneck for servers. A
7063
# smaller ratio reduces the memory footprint, but may increase the size of
7164
# filtered feature.
65+
7266
countmin_n_ratio: .66
7367

7468
# In preprocessing, feature group is processed one by one. It is the main memory
7569
# bottleneck for workers. This number control how many feature groups can be in
7670
# memory at the same time. A smaller number reduce the workers' memory
7771
# footprint, but may slow down the preprocessing speed.
78-
max_num_parallel_groups_in_preprocessing: 1000
7972

80-
# A random order accelerate the convergence. Turn it off only when debugging.
81-
random_feature_block_order : true
73+
# max_num_parallel_groups_in_preprocessing: 1000
8274

8375
# During preprocessing, each (text) file is parsed and then write into the local
8476
# cache in binary format to save the memory. These data are then used by the
8577
# preprocessing stage, and also can be re-used when running next time.
8678
local_cache {
8779
format: BIN
88-
file: "/tmp/ctrc/"
80+
file: "data/cache/ctr_train_"
8981
}
9082

9183
# Parameters used by the trust region method. The change of w_i (the i-th
9284
# parameter) is bouned by [-delta_i, delta_i], where delta_i is an adaptive
9385
# value according to the convergence. The initial value of delta_i is
9486
# *delta_init_value* and maximal value is *delta_max_value*. You can increase
9587
# these parameters for easy datasets.
96-
[PS.LM.delta_init_value] : 1
97-
[PS.LM.delta_max_value] : 5
88+
89+
# [PS.LM.delta_init_value] : 1
90+
# [PS.LM.delta_max_value] : 5
91+
9892
# This parameter controls the aggressiveness of the KKT filter. Increasing this
9993
# number will decrease the effect of KKT filter. a very large number, such as
10094
# 1e20 will turn off the KKT filter.
101-
[PS.LM.kkt_filter_threshold_ratio] : 10
95+
96+
# [PS.LM.kkt_filter_threshold_ratio] : 10
10297
}
98+
10399
}

example/linear/ctr/eval_batch.conf

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,12 @@ linear_method {
33
validation_data {
44
format: TEXT
55
text: SPARSE_BINARY
6-
# file: "data/train/part.*"
7-
file: "/home/muli/work/data/ctra/test/part.*"
6+
file: "data/ctr/test/part.*"
87
}
98

109
model_input {
1110
format: TEXT
12-
file: "model/ctr_batch_l1lr.*"
11+
file: "model/ctr_batch.*"
1312
}
1413

1514
}

example/linear/ctr/eval_online.conf

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,13 @@ linear_method {
22

33
validation_data {
44
format: TEXT
5-
text: ADFEA
6-
max_num_files_per_worker: 10
7-
file: "/home/muli/work/data/ctrd/part.*"
5+
text: SPARSE_BINARY
6+
file: "data/ctr/test/part.*"
87
}
98

109
model_input {
1110
format: TEXT
12-
# file: "../output/ctr_online.*"
13-
file: "/home/muli/work/ps_bak/output/ctr.*"
11+
file: "model/ctr_online.*"
1412
}
1513

1614
}

example/linear/ctr/online_l1lr.conf

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@ linear_method {
33
training_data {
44
format: TEXT
55
text: SPARSE_BINARY
6-
# file: "data/train/part.*"
7-
file: "/home/muli/work/data/ctra/train/part.*"
6+
file: "data/ctr/train/part.*"
87
ignore_feature_group: true
98
}
109

@@ -17,13 +16,14 @@ loss {
1716
type: LOGIT
1817
}
1918

20-
# coef * |w|_1
19+
# lambda_0 * |w|_1 + lambda_1 * |w|^2_2
2120
penalty {
2221
type: L1
2322
lambda: 10
2423
lambda: 1
2524
}
2625

26+
# lr = alpha / (beta + x), where x dependes on the progress
2727
learning_rate {
2828
type: DECAY
2929
alpha: .01
@@ -32,10 +32,22 @@ beta: 10
3232

3333
async_sgd {
3434
algo: FTRL
35+
36+
# The size of minibatch
3537
minibatch : 10000
38+
39+
# The number of data passes
40+
num_data_pass: 10
41+
42+
# features which occurs <= *tail_feature_freq* will be filtered before
43+
# training. it save both memory and bandwidth.
3644
tail_feature_freq : 4
37-
countmin_n : 1e9
38-
report_interval: 1
45+
46+
# It controls the countmin size. We filter the tail features by countmin, which
47+
# is more efficient than hash, but still is the memory bottleneck for servers. A
48+
# smaller ratio reduces the memory footprint, but may increase the size of
49+
# filtered feature.
50+
countmin_n : 1e8
3951
}
4052

4153
}

src/app/linear_method/async_sgd.h

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,23 @@ class AsyncSGDWorker : public ISGDCompNode, public LinearMethod {
197197
void updateModel(const SGDCall& call) {
198198
const auto& sgd = conf_.async_sgd();
199199
MinibatchReader<V> reader;
200-
reader.setReader(call.data(), sgd.minibatch(), sgd.data_buf());
200+
201+
// random shuffle the file order
202+
int n = std::max(sgd.num_data_pass(), 1);
203+
int m = call.data().file_size();
204+
std::vector<int> idx(m);
205+
for (int i = 0; i < m; ++i) idx[i] = i;
206+
207+
DataConfig data = call.data();
208+
data.clear_file();
209+
for (int i = 0; i < n; ++i) {
210+
std::random_shuffle(idx.begin(), idx.end());
211+
for (int j = 0; j < m; ++j) {
212+
data.add_file(call.data().file(idx[j]));
213+
}
214+
}
215+
216+
reader.setReader(data, sgd.minibatch(), sgd.data_buf());
201217
reader.setFilter(sgd.countmin_n(), sgd.countmin_k(), sgd.tail_feature_freq());
202218
reader.start();
203219

src/app/linear_method/proto/linear.pb.h

Lines changed: 45 additions & 13 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)