user-defined filters in conf

mli · mli · commit 29b0076007ce · 2015-03-27T01:53:53.000-04:00
diff --git a/src/app/linear_method/async_sgd.h b/src/app/linear_method/async_sgd.h
@@ -60,7 +60,6 @@ class AsyncSGDServer : public ISGDCompNode {
       //   model_ = new KVStore<Key, V, AdaGradEntry<V>, SGDState<V>>();
       // }
     }
-    // CHECK_NOTNULL(model_)->set_state(state);
   }
 
   virtual ~AsyncSGDServer() {
@@ -139,7 +138,7 @@ class AsyncSGDServer : public ISGDCompNode {
     V z = 0;
     V sqrt_n = 0;
 
-    void Get(V const* data, void* state) {
+    void Set(const V* data, void* state) {
       SGDState* st = (SGDState*) state;
       // update model
       V w_old = w;
@@ -155,11 +154,15 @@ class AsyncSGDServer : public ISGDCompNode {
       st->UpdateWeight(w, w_old);
     }
 
-    void Set(V* data, void* state) { *data = w; }
+    void Get(V* data, void* state) { *data = w; }
   };
 
 };
 
+/**
+ * @brief A worker node
+ *
+ */
 template <typename V>
 class AsyncSGDWorker : public ISGDCompNode {
  public:
@@ -191,6 +194,11 @@ class AsyncSGDWorker : public ISGDCompNode {
   }
 
  private:
+  /**
+   * @brief process a data file
+   *
+   * @param load
+   */
   void UpdateModel(const Workload& load) {
     LOG(INFO) << MyNodeID() << ": accept workload " << load.id();
     VLOG(1) << "workload data: " << load.data().ShortDebugString();
@@ -213,13 +221,20 @@ class AsyncSGDWorker : public ISGDCompNode {
 
       // pull the weight
       auto req = Parameter::Request(id);
+      for (int i = 0; i < conf_.pull_filter_size(); ++i) {
+        *req.add_filter() = conf_.pull_filter(i);
+      }
       model_.Pull(req, key, [this, id]() { ComputeGradient(id); });
     }
 
     while (processed_batch_ < id) { usleep(500); }
     LOG(INFO) << MyNodeID() << ": finished workload " << load.id();
   }
 
+  /**
+   *
+   * @param id minibatch id
+   */
   void ComputeGradient(int id) {
     mu_.lock();
     auto Y = data_[id].first;
@@ -248,19 +263,17 @@ class AsyncSGDWorker : public ISGDCompNode {
 
     // push the gradient
     auto req = Parameter::Request(id);
-    // LL << grad;
+    for (int i = 0; i < conf_.push_filter_size(); ++i) {
+      // add filters
+      auto filter = conf_.push_filter(i);
+      if (filter.type() == FilterConfig::KEY_CACHING) {
+        filter.set_clear_cache_if_done(true);
+      }
+      *req.add_filter() = filter;
+    }
     model_.Push(req, model_[id].key, grad);
     model_.clear(id);
 
-
-    // msg->add_filter(FilterConfig::KEY_CACHING)->set_clear_cache_if_done(true);
-    // int nbytes = conf_.async_sgd().fixing_float_by_nbytes();
-    // if (nbytes) {
-    //   auto conf = msg->add_filter(FilterConfig::FIXING_FLOAT)->add_fixed_point();
-    //   conf->set_num_bytes(nbytes);
-    // }
-
-
     ++ processed_batch_;
   }
 
diff --git a/src/app/linear_method/proto/linear.proto b/src/app/linear_method/proto/linear.proto
@@ -1,7 +1,7 @@
 package PS.LM;
 import "data/proto/data.proto";
 import "learner/proto/bcd.proto";
-
+import "filter/proto/filter.proto";
 message Config {
   optional DataConfig training_data = 1;
   optional DataConfig validation_data = 2;
@@ -17,6 +17,8 @@ message Config {
   optional SGDConfig async_sgd = 17;
   optional BCDConfig darlin = 15;
 
+  repeated FilterConfig push_filter = 13;
+  repeated FilterConfig pull_filter = 14;
 }
 
 extend BCDConfig {
@@ -62,9 +64,6 @@ message SGDConfig {
   // filtered feature.
   optional float countmin_n = 8 [default = 1e8];
   optional int32 countmin_k = 7 [default = 2];
-
-  // if > 0, then use *fixing_float_by_nbytes* bytes to encode float during communication
-  optional int32 fixing_float_by_nbytes = 13 [default = 0];
 }
 
 message LossConfig {
diff --git a/src/filter/fixing_float.h b/src/filter/fixing_float.h
@@ -30,19 +30,23 @@ class FixingFloatFilter : public Filter {
       auto type = msg->task.value_type(i);
       if (type == DataType::FLOAT) {
         CHECK_GT(filter_conf->fixed_point_size(), k);
-        msg->value[i] = convert<float>(msg->value[i], encode, filter_conf->mutable_fixed_point(k++));
+        msg->value[i] = convert<float>(
+            msg->value[i], encode, filter_conf->num_bytes(),
+            filter_conf->mutable_fixed_point(k++));
       }
       if (type == DataType::DOUBLE) {
         CHECK_GT(filter_conf->fixed_point_size(), k);
-        msg->value[i] = convert<double>(msg->value[i], encode, filter_conf->mutable_fixed_point(k++));
+        msg->value[i] = convert<double>(
+            msg->value[i], encode, filter_conf->num_bytes(),
+            filter_conf->mutable_fixed_point(k++));
       }
     }
   }
 
   // decode / encode an array
   template <typename V>
-  SArray<char> convert(const SArray<char>& array, bool encode, FilterConfig::FixedFloatConfig* conf) {
-    int nbytes = conf->num_bytes();
+  SArray<char> convert(const SArray<char>& array, bool encode, int nbytes,
+                       FilterConfig::FixedFloatConfig* conf) {
     CHECK_GT(nbytes, 0);
     CHECK_LT(nbytes, 8);
     double ratio = static_cast<double>(1 << (nbytes*8)) - 2;
diff --git a/src/filter/proto/filter.proto b/src/filter/proto/filter.proto
@@ -14,10 +14,11 @@ message FilterConfig {
   // if the task is done, then clear the cache (to save memory)
   optional bool clear_cache_if_done = 20 [default = false];
 
+  optional int32 num_bytes = 5 [default = 3];
+
   message FixedFloatConfig {
     optional float min_value = 1 [default = -1];
     optional float max_value = 2 [default = 1];
-    optional int32 num_bytes = 3 [default = 3];
   }
   repeated FixedFloatConfig fixed_point = 4;