|
1 | | -#j configuration to run l1-regularized logistic regression on the ctr dataset |
2 | 1 | linear_method { |
3 | 2 |
|
4 | 3 | training_data { |
5 | 4 | format: TEXT |
6 | 5 | text: SPARSE_BINARY |
7 | | -# file: "data/train/part.*" |
8 | | -file: "/home/muli/work/data/ctra/train/part.*" |
9 | | -} |
| 6 | +file: "data/ctr/train/part.*" |
10 | 7 |
|
11 | | -# training_data { |
12 | | -# format: TEXT |
13 | | -# text: ADFEA |
14 | | -# max_num_files_per_worker: 10 |
15 | | -# # file: "/user/muli/ctrb/part.*" |
16 | | -# # hdfs { |
17 | | -# # # "which hadoop" returns /usr/bin/hadoop |
18 | | -# # home: "/usr" |
19 | | -# # } |
20 | | -# file: "/home/muli/work/data/ctrd/part.*" |
| 8 | +# If the data is placed on hdfs and HADOOP_HOME="/usr" |
| 9 | +# hdfs { |
| 10 | +# home: "/usr" |
21 | 11 | # } |
| 12 | +} |
22 | 13 |
|
23 | 14 | model_output { |
24 | 15 | format: TEXT |
25 | | -file: "model/ctr_batch_l1lr" |
| 16 | +file: "model/ctr_batch" |
26 | 17 | } |
27 | 18 |
|
28 | 19 | loss { |
@@ -52,52 +43,57 @@ epsilon : 2e-5 |
52 | 43 | # blocks. A larger ratio often accelerate the convergence, however, it may slow |
53 | 44 | # down the system performance because of the increased number of global barriers. |
54 | 45 | feature_block_ratio : 4 |
| 46 | + |
55 | 47 | # The maximal number of blocks can be updating in parallel (bounded-delay |
56 | 48 | # consistency). A larger delay may slow down the convergence rate, but improves |
57 | 49 | # the system performance. |
58 | 50 | max_block_delay: 8 |
59 | 51 |
|
60 | 52 | # important feature groups, update them earlier to get a better model |
61 | 53 | # initialization. |
62 | | -prior_fea_group: 127 # the bias feature (all one) |
63 | | -prior_fea_group: 120 # the position rank feature |
| 54 | +prior_fea_group: 127 |
| 55 | +prior_fea_group: 120 |
64 | 56 |
|
65 | 57 | # features which occurs <= *tail_feature_freq* will be filtered before |
66 | 58 | # training. it save both memory and bandwidth. |
67 | | -tail_feature_freq: 10 |
| 59 | +tail_feature_freq: 4 |
| 60 | + |
68 | 61 | # It controls the countmin size. We filter the tail features by countmin, which |
69 | 62 | # is more efficient than hash, but still is the memory bottleneck for servers. A |
70 | 63 | # smaller ratio reduces the memory footprint, but may increase the size of |
71 | 64 | # filtered feature. |
| 65 | + |
72 | 66 | countmin_n_ratio: .66 |
73 | 67 |
|
74 | 68 | # In preprocessing, feature group is processed one by one. It is the main memory |
75 | 69 | # bottleneck for workers. This number control how many feature groups can be in |
76 | 70 | # memory at the same time. A smaller number reduce the workers' memory |
77 | 71 | # footprint, but may slow down the preprocessing speed. |
78 | | -max_num_parallel_groups_in_preprocessing: 1000 |
79 | 72 |
|
80 | | -# A random order accelerate the convergence. Turn it off only when debugging. |
81 | | -random_feature_block_order : true |
| 73 | +# max_num_parallel_groups_in_preprocessing: 1000 |
82 | 74 |
|
83 | 75 | # During preprocessing, each (text) file is parsed and then write into the local |
84 | 76 | # cache in binary format to save the memory. These data are then used by the |
85 | 77 | # preprocessing stage, and also can be re-used when running next time. |
86 | 78 | local_cache { |
87 | 79 | format: BIN |
88 | | -file: "/tmp/ctrc/" |
| 80 | +file: "data/cache/ctr_train_" |
89 | 81 | } |
90 | 82 |
|
91 | 83 | # Parameters used by the trust region method. The change of w_i (the i-th |
92 | 84 | # parameter) is bouned by [-delta_i, delta_i], where delta_i is an adaptive |
93 | 85 | # value according to the convergence. The initial value of delta_i is |
94 | 86 | # *delta_init_value* and maximal value is *delta_max_value*. You can increase |
95 | 87 | # these parameters for easy datasets. |
96 | | -[PS.LM.delta_init_value] : 1 |
97 | | -[PS.LM.delta_max_value] : 5 |
| 88 | + |
| 89 | +# [PS.LM.delta_init_value] : 1 |
| 90 | +# [PS.LM.delta_max_value] : 5 |
| 91 | + |
98 | 92 | # This parameter controls the aggressiveness of the KKT filter. Increasing this |
99 | 93 | # number will decrease the effect of KKT filter. a very large number, such as |
100 | 94 | # 1e20 will turn off the KKT filter. |
101 | | -[PS.LM.kkt_filter_threshold_ratio] : 10 |
| 95 | + |
| 96 | +# [PS.LM.kkt_filter_threshold_ratio] : 10 |
102 | 97 | } |
| 98 | + |
103 | 99 | } |
0 commit comments