diff --git a/scripts/builtin/applyAndEvaluate.dml b/scripts/builtin/applyAndEvaluate.dml
index 646c71891c8..96e199d803a 100644
--- a/scripts/builtin/applyAndEvaluate.dml
+++ b/scripts/builtin/applyAndEvaluate.dml
@@ -99,10 +99,10 @@ return (Matrix[Double] result)
   if(max(eYtrain) == min(eYtrain)) 
     stop("Y contains only one class")
 
-  score = eval(evaluationFunc, list(X=eXtrain, Y=eYtrain, Xtest=eXtrain, Ytest=eYtrain, Xorig=as.matrix(0), evalFunHp=evalFunHp, trainML = FALSE))
+  score = eval(evaluationFunc, list(X=eXtrain, Y=eYtrain, Xtest=eXtrain, Ytest=eYtrain, Xorig=as.matrix(0), evalFunHp=evalFunHp))
   trainAccuracy = as.scalar(score[1, 1])
   
-  score = eval(evaluationFunc, list(X=eXtrain, Y=eYtrain, Xtest=eXtest, Ytest=eYtest, Xorig=as.matrix(0), evalFunHp=evalFunHp, trainML = FALSE))
+  score = eval(evaluationFunc, list(X=eXtrain, Y=eYtrain, Xtest=eXtest, Ytest=eYtest, Xorig=as.matrix(0), evalFunHp=evalFunHp))
   testAccuracy = as.scalar(score[1, 1])
 
   
@@ -172,9 +172,9 @@ return(Double dirtyScore)
   mask = as.matrix(metaList['mask']) 
   mask = ifelse(sum(mask == dmask) < ncol(mask), matrix(1, rows=1, cols=ncol(mask)), mask)
   [eXtrain, eXtest] = recodeData(X, Xtest, mask, FALSE, "recode")
-  eXtrain = replace(target=eXtrain, pattern=NaN, replacement=1)
-  eXtest = replace(target=eXtest, pattern=NaN, replacement=1)
+  eXtrain = replace(target=eXtrain, pattern=NaN, replacement=0)
+  eXtest = replace(target=eXtest, pattern=NaN, replacement=0)
   [eXtrain, eXtest] = recodeData(as.frame(eXtrain), as.frame(eXtest), mask, FALSE, "dummycode")
-  score = eval(evaluationFunc, list(X=eXtrain, Y=Y, Xtest=eXtest, Ytest=Ytest, Xorig=as.matrix(0), evalFunHp=evalFunHp, trainML = FALSE))
+  score = eval(evaluationFunc, list(X=eXtrain, Y=Y, Xtest=eXtest, Ytest=Ytest, Xorig=as.matrix(0), evalFunHp=evalFunHp))
   dirtyScore = as.scalar(score[1, 1])
 }
diff --git a/scripts/builtin/bandit.dml b/scripts/builtin/bandit.dml
index 74c1aebf104..22ad7b58e53 100644
--- a/scripts/builtin/bandit.dml
+++ b/scripts/builtin/bandit.dml
@@ -300,16 +300,17 @@ run_with_hyperparam = function(Frame[Unknown] lp, Frame[Unknown] ph_pip, Integer
         {
           pipList = list(lp = lp, ph = ph_pip[i], hp = hp_matrix, flags = no_of_flag_vars)
           [evalFunOutput, hpForPruning, changesByOp] = crossV(X=X, y=Y, cvk=cvk, evalFunHp=evalFunHp, pipList=pipList, metaList=metaList, hpForPruning=hpForPruning, 
-          changesByOp=changesByOp, evalFunc=evaluationFunc, trainML = 0)
+          changesByOp=changesByOp, evalFunc=evaluationFunc)
         }
         else 
         {
-          [eXtrain, eYtrain, eXtest, eYtest, Tr, hpForPruning, changesByOp] = executePipeline(logical=lp, pipeline=ph_pip[i], X=X, Y=Y, Xtest=Xtest, Ytest=Ytest, metaList=metaList,
-            hyperParameters=hp_matrix, hpForPruning=hpForPruning, changesByOp=changesByOp, flagsCount=no_of_flag_vars, test=TRUE, verbose=FALSE)
+          [eXtrain, eYtrain, eXtest, eYtest, Tr, hpForPruning, changesByOp] = executePipeline(logical=lp, pipeline=ph_pip[i], 
+            X=X, Y=Y, Xtest=Xtest, Ytest=Ytest, metaList=metaList,  hyperParameters=hp_matrix, hpForPruning=hpForPruning,
+            changesByOp=changesByOp, flagsCount=no_of_flag_vars, test=TRUE, verbose=FALSE)
           if(max(eYtrain) == min(eYtrain)) 
             print("Y contains only one class")
           else 
-            evalFunOutput = eval(evaluationFunc, list(X=eXtrain, Y=eYtrain, Xtest=eXtest, Ytest=eYtest, Xorig=as.matrix(0), evalFunHp=evalFunHp, trainML = 0))
+            evalFunOutput = eval(evaluationFunc, list(X=eXtrain, Y=eYtrain, Xtest=eXtest, Ytest=eYtest, Xorig=as.matrix(0), evalFunHp=evalFunHp))
         }
 
         # evalFunOutput = eval(evaluationFunc, argList)  
@@ -564,9 +565,8 @@ return (Matrix[Double] features)
   # OHE features 
   OHE = sum(colMaxs(X) * mask)
   features[1, 10] = OHE
-  tab = table(Y, 1)
-  distVal = nrow(tab)
-  if(nrow(Y) > 1 &  distVal <= 10)
+
+  if(nrow(Y) > 1 &  min(Y) >= 1)
   {
     ctab = table(Y, 1)
     features[1, 11] = nrow(ctab) # number of classes
@@ -630,7 +630,7 @@ return (String s)
 }
 
 crossV = function(Matrix[double] X, Matrix[double] y, Integer cvk, Matrix[Double] evalFunHp, List[Unknown] pipList, List[Unknown] metaList,
-  Matrix[Double] hpForPruning = as.matrix(0), Matrix[Double] changesByOp = as.matrix(0), String evalFunc, Integer trainML = 0) 
+  Matrix[Double] hpForPruning = as.matrix(0), Matrix[Double] changesByOp = as.matrix(0), String evalFunc) 
 return (Matrix[Double] output, Matrix[Double] hpForPruning, Matrix[Double] changesByOp)
 {
   accuracyMatrix = matrix(0, cvk, 1)
@@ -679,7 +679,7 @@ return (Matrix[Double] output, Matrix[Double] hpForPruning, Matrix[Double] chang
         changesByOp=changesByOp, flagsCount=as.scalar(pipList['flags']), test=TRUE, verbose=FALSE)
     }
     # print("test out: "+nrow(testy))
-    res = eval(evalFunc, list(X=trainX, Y=trainy, Xtest=testX, Ytest=testy, Xorig=as.matrix(0), evalFunHp=evalFunHp, trainML = trainML))
+    res = eval(evalFunc, list(X=trainX, Y=trainy, Xtest=testX, Ytest=testy, Xorig=as.matrix(0), evalFunHp=evalFunHp))
     accuracyMatrix[i] = res[1, 1]
     evalFunHp = res[, 2:ncol(res)]
   }
diff --git a/scripts/builtin/executePipeline.dml b/scripts/builtin/executePipeline.dml
index 3a0358a2697..5eacc558158 100644
--- a/scripts/builtin/executePipeline.dml
+++ b/scripts/builtin/executePipeline.dml
@@ -83,7 +83,7 @@ s_executePipeline = function(Frame[String] logical = as.frame("NULL"), Frame[Str
     op = as.scalar(pipeline[1,i])
     lgOp = as.scalar(logical[1,i])
     
-    if(test == FALSE | lgOp != "CI") {
+    if(lgOp != "CI") {
        Xclone = X 
       [hp, dataFlag, yFlag, executeFlag] = matrixToList(X, Y, mask, FD, hyperParameters[i], flagsCount, op)
       if(executeFlag == 1) {
@@ -228,6 +228,9 @@ return (Matrix[Double] X)
     cat = removeEmpty(target=X, margin="cols", select = mask)
     # round categorical (if there is any floating  point)
     cat = round(cat)
+    less_than_1_mask = cat < 1
+    less_than_1 = less_than_1_mask * 9999
+    cat = (cat * (less_than_1_mask == 0)) +  less_than_1
     # reconstruct original X
     X = X * (mask == 0)
     q = table(seq(1, ncol(cat)), removeEmpty(target=seq(1, ncol(mask)), margin="rows", 
@@ -485,11 +488,9 @@ return(Matrix[Double] hpForPruning, Matrix[Double] changesByOp)
 ########################################################
 # The function will flip the noisy labels
 ########################################################
-flipLabels = function(Matrix[Double] X, Matrix[Double] Y, Double threshold, Boolean verbose = FALSE)
+flipLabels = function(Matrix[Double] X, Matrix[Double] Y, Double threshold, Integer maxIter =10, Boolean verbose = FALSE)
 return (Matrix[Double] XY)
 {
-
-  print("---- starting flip labels ---")
   max_y = max(Y)
   if(min(Y) != max(Y))
   {
@@ -498,7 +499,7 @@ return (Matrix[Double] XY)
     inc = ((yhat != Y) & (rowMaxs(prob) > threshold))
     Xcor = removeEmpty(target = X, margin = "rows", select = (inc==0))
     Ycor = removeEmpty(target = Y, margin = "rows", select = (inc==0))
-    while(sum(inc) > 0)
+    while(sum(inc) > 0 & maxIter > 0)
     {
       # print("inc vector "+toString(inc))
       Xinc = removeEmpty(target = X, margin = "rows", select = inc)
@@ -512,9 +513,95 @@ return (Matrix[Double] XY)
       Ycor = rbind(Ycor, YcorI)
       X = Xinc
       Y = Yinc
+      print("maxIter: "+maxIter)
+      maxIter = maxIter - 1
     }
     XY = cbind(Xcor, Ycor)
   }
   else 
     XY = cbind(X, Y)
+}
+
+#######################################################################
+# function frequency conversion
+# Inputs: The input dataset X, and  mask of the columns
+# Output: categorical columns are replaced with their frequencies
+#######################################################################
+
+frequencyEncoding = function(Matrix[Double] X, Matrix[Double] mask)
+return (Matrix[Double] freqX) {
+  
+  freqX = X
+  X = replace(target=X, pattern=NaN, replacement=1)
+  if(sum(mask) > 0)
+  {
+
+    parfor(i in 1:ncol(mask))
+    {
+      if(as.scalar(mask[1, i]) == 1)
+      {
+        Y = X[, i]
+        # print("max of Y: "+max(Y)+" max of Ytrain: "+max(Xtrain[, i]))
+        valueCount = table(Y, 1)
+        resp = matrix(0, nrow(Y), max(Y))
+        resp = (resp + t(seq(1, max(Y)))) == Y
+        # print("cols in resp: "+ncol(resp)+" cols in valueCount: "+nrow(valueCount))
+        # while(FALSE){}
+        resp = resp * t(valueCount)
+        freqX[, i] = rowSums(resp)
+      }
+    
+    }
+  }
+}
+
+#######################################################################
+# function Weight of evidence / information gain
+# Inputs: The input dataset X, and  mask of the columns
+# Output: categorical columns are replaced with their frequencies
+#######################################################################
+
+WoE = function(Matrix[Double] X, Matrix[Double] Y, Matrix[Double] mask)
+return (Matrix[Double] output) {
+  
+  freqX = X
+  X = replace(target=X, pattern=NaN, replacement=1)
+  if(sum(mask) > 0)
+  {
+    parfor(i in 1:ncol(mask))
+    {
+      if(as.scalar(mask[1, i]) == 1)
+      {
+        L = X[, i]
+        entropy = getEntropy(L, Y)
+        resp = matrix(0, nrow(L), max(L))
+        resp = (resp + t(seq(1, max(L)))) == L
+        resp = resp * entropy
+        freqX[, i] = rowSums(resp)
+      }
+    
+    }
+  }
+  output = cbind(freqX, Y)
+}
+
+
+getEntropy = function(Matrix[Double] eX, Matrix[Double] eY)
+return(Matrix[Double] entropyMatrix)
+{
+
+  tab = table(eX, eY)
+  # print("tab \n"+toString(tab))
+  entropyMatrix = matrix(0, rows=1, cols=nrow(tab))
+  catTotal = rowSums(tab)
+  for(i in 1:nrow(tab))
+  {
+    # print("catProb: " +catProb)
+    entropy =  (tab[i,]/catTotal[i])
+    # print("entropy: "+toString(entropy))
+    catEntropy = sum(-entropy * log(entropy, 2))
+    catEntropy = ifelse(is.na(catEntropy), 0, catEntropy)
+    # print("cat entropy: "+catEntropy)
+    entropyMatrix[1, i] = catEntropy
+  }  
 }
\ No newline at end of file
diff --git a/scripts/builtin/topk_cleaning.dml b/scripts/builtin/topk_cleaning.dml
index e9aebafe76e..07bf98b96ad 100644
--- a/scripts/builtin/topk_cleaning.dml
+++ b/scripts/builtin/topk_cleaning.dml
@@ -113,36 +113,39 @@ s_topk_cleaning = function(Frame[Unknown] dataTrain, Frame[Unknown] dataTest = a
   # # # create logical pipeline seeds
   logicalSeedCI =  frame([
                    "4", "ED", "MVI", "OTLR", "EC", "0", "0", "0", "0",
-                   "5", "ED", "MVI", "CI", "SCALE","DUMMY","0", "0", "0", 
-                   "5", "OTLR", "EC", "CI", "SCALE", "DUMMY", "0","0", "0",
-                   "7", "MVI", "OTLR", "ED", "EC", "SCALE", "CI", "DUMMY", "0",
+                   "5", "ED", "EC", "SCALE", "CI","DUMMY","0", "0", "0", 
+                   "5", "OTLR", "EC", "SCALE", "CI", "DUMMY", "0","0", "0",
+                   "8", "ED", "MVI", "OTLR", "ED", "EC", "SCALE", "CI", "DUMMY",
                    "5", "ED",  "MVI",  "SCALE", "CI", "DUMMY", "0", "0", "0",
                    "4", "MVI", "SCALE", "CI", "DUMMY", "0", "0", "0", "0", 
-                   "4", "ED", "EC", "CI", "DUMMY", "0", "0", "0", "0",
-                   "4", "MVI", "OTLR", "CI", "DUMMY", "0", "0", "0", "0",
-                   "6", "MVI", "OTLR", "EC", "CI", "SCALE", "DUMMY", "0", "0",
+                   "6", "ED", "MVI", "EC", "SCALE", "CI", "DUMMY", "0", "0",
+                   "6", "MVI", "OTLR","EC", "SCALE", "CI", "DUMMY", "0", "0",
+                   "7", "OTLR", "MVI", "OTLR", "EC", "SCALE", "CI", "DUMMY", "0",
                    "7", "ED", "MVI", "OTLR", "EC", "SCALE", "CI", "DUMMY", "0"
                    ], rows=10, cols=9)  
                    
-  logicalSeedNoCI = frame([
-                   "4", "ED", "MVI", "OTLR", "EC", "0", "0",
-                   "3", "ED", "MVI", "DUMMY", "0","0","0", 
-                   "3", "OTLR", "EC", "DUMMY", "0","0","0",
-                   "5", "MVI", "OTLR", "ED", "EC", "DUMMY", "0", 
-                   "3", "ED",  "MVI", "DUMMY", "0", "0", "0",
-                   "3", "MVI", "SCALE", "DUMMY", "0", "0", "0", 
-                   "3", "ED", "EC", "DUMMY", "0", "0", "0",
-                   "3", "MVI", "OTLR", "DUMMY", "0", "0", "0", 
-                   "4", "MVI", "OTLR", "EC", "DUMMY", "0", "0", 
-                   "6", "ED", "MVI", "OTLR", "EC", "SCALE", "DUMMY"
-                   ], rows=10, cols=7) 
+  logicalSeedNoCI =  frame([
+                   "3", "ED", "MVI", "OTLR", "EC", "0", "0", "0",
+                   "4", "ED", "EC", "SCALE", "DUMMY","0", "0", "0", 
+                   "4", "OTLR", "EC", "SCALE", "DUMMY", "0","0", "0",
+                   "7", "ED", "MVI", "OTLR", "ED", "EC", "SCALE", "DUMMY",
+                   "4", "ED",  "MVI",  "SCALE", "DUMMY", "0", "0", "0",
+                   "3", "MVI", "SCALE", "DUMMY", "0", "0", "0", "0", 
+                   "5", "ED", "MVI", "EC", "SCALE", "DUMMY", "0", "0",
+                   "5", "MVI", "OTLR","EC", "SCALE", "DUMMY", "0", "0",
+                   "6", "OTLR", "MVI", "OTLR", "EC", "SCALE", "DUMMY", "0",
+                   "6", "ED", "MVI", "OTLR", "EC", "SCALE", "DUMMY", "0"
+                   ], rows=10, cols=8)  
                    
-  tab = table(eYtrain, 1)
-  dist = nrow(tab)
-  if(nrow(eYtrain) > 0 & dist < 15)
+  if(min(eYtrain) >= 1) {
+    tab = table(eYtrain, 1)
+    dist = nrow(tab)
+  }
+  if(nrow(eYtrain) > 0 & min(eYtrain) >= 1 & dist <= 15)
     logical = logicalSeedCI
-  else 
+  else {
     logical = logicalSeedNoCI
+  }
   idx = as.integer(as.scalar(logical[1, 1])) + 1
   category = logical[1, 2:idx]
   
@@ -150,7 +153,7 @@ s_topk_cleaning = function(Frame[Unknown] dataTrain, Frame[Unknown] dataTest = a
   [bestLogical, score] = lg::enumerateLogical(X=eXtrain, y=eYtrain, Xtest=eXtest, ytest=eYtest, cmr=cmr, 
     cat=category, population=logical[2:nrow(logical)], max_iter=ceil(resource_val/topK), metaList = metaList,
     evaluationFunc=evaluationFunc, evalFunHp=evalFunHp, primitives=primitives, param=parameters,
-    num_inst=3 , num_exec=2, cv=cv, cvk=cvk, verbose=TRUE, ctx=ctx)
+    num_inst=nrow(primitives), num_exec=ceil(resource_val/topK), cv=cv, cvk=cvk, verbose=TRUE, ctx=ctx)
   t6 = time(); print("---- finalized in: "+(t6-t5)/1e9+"s");
 
   topKPipelines = as.frame("NULL"); topKHyperParams = matrix(0,0,0); topKScores = matrix(0,0,0); features = as.frame("NULL")
@@ -231,8 +234,8 @@ return(Double dirtyScore, Matrix[Double] evalFunHp)
   mask = as.matrix(metaList['mask']) 
   mask = ifelse(sum(mask == dmask) < ncol(mask), matrix(1, rows=1, cols=ncol(mask)), mask)
   [eXtrain, eXtest] = recodeData(X, Xtest, mask, cv, "recode")
-  eXtrain = replace(target=eXtrain, pattern=NaN, replacement = 1)
-  eXtest = replace(target=eXtest, pattern=NaN, replacement = 1)
+  eXtrain = replace(target=eXtrain, pattern=NaN, replacement = 0)
+  eXtest = replace(target=eXtest, pattern=NaN, replacement = 0)
   dirtyScore = 100
   print(prefix+" sample from train data and dummy code");
   [eXtrain, Ytrain] =  utils::doSample(eXtrain, Y, sample, TRUE)
@@ -242,10 +245,10 @@ return(Double dirtyScore, Matrix[Double] evalFunHp)
   print(prefix+" hyper-parameter tuning");
   if(cv) {
     score = crossV(X=eXtrain, y=Ytrain, cvk=cvk, evalFunHp=evalFunHp,
-      pipList=pipList, metaList=metaList, evalFunc=evaluationFunc, trainML = 1)
+      pipList=pipList, metaList=metaList, evalFunc=evaluationFunc)
   }
   else {
-    score = eval(evaluationFunc, list(X=eXtrain, Y=Ytrain, Xtest=eXtest, Ytest=Ytest, Xorig=as.matrix(0), evalFunHp=evalFunHp, trainML = 1))
+    score = eval(evaluationFunc, list(X=eXtrain, Y=Ytrain, Xtest=eXtest, Ytest=Ytest, Xorig=as.matrix(0), evalFunHp=evalFunHp))
   }
 
   dirtyScore = as.scalar(score[1, 1])
diff --git a/scripts/pipelines/properties/param.csv b/scripts/pipelines/properties/param.csv
index c99a18eb317..cde9c2605ab 100644
--- a/scripts/pipelines/properties/param.csv
+++ b/scripts/pipelines/properties/param.csv
@@ -6,15 +6,17 @@ normalize,0,0,0,0,0,0,,,,,,,,,,,,
 imputeByMean,0,1,0,0,0,2,,,,,,,,,,,,
 imputeByMedian,0,1,0,0,0,2,,,,,,,,,,,,
 mice,2,1,0,0,1,2,INT,FP,1,3,0.5,1,,,,,,
-abstain,1,0,0,1,1,2,FP,0.6,0.9,,,,,,,,,
-flipLabels,1,0,0,1,1,2,FP,0.6,0.9,,,,,,,,,
+abstain,1,0,0,1,1,2,FP,0.6,0.8,,,,,,,,,
+flipLabels,2,0,0,1,1,2,FP,INT,0.6,0.9,1,20,,,,,,
 SMOTE,1,1,0,1,1,2,INT,100,500,,,,,,,,,
 m_pca,3,0,0,0,0,2,INT,BOOL,BOOL,100,200,0,1,0,0,,,
 ppca,4,0,0,0,1,2,INT,INT,FP,FP,100,200,1,10,1.00E-09,1.00E-06,1.00E-02,1.00E-01
 fillDefault,0,0,0,0,0,2,,,,,,,,,,,,
 dummycoding,0,1,0,0,0,2,,,,,,,,,,,,
+frequencyEncoding,0,1,0,0,0,2,,,,,,,,,,,,
+WoE,0,1,0,1,0,2,,,,,,,,,,,,
 scale,2,0,0,0,0,0,BOOL,BOOL,0,1,0,1,,,,,,
 forward_fill,1,0,0,0,1,2,BOOL,0,1,,,,,,,,,
 imputeByFd,1,0,1,0,0,1,FP,0.6,0.9,,,,,,,,,
-underSampling,1,0,0,1,0,2,FP,0.6,0.99,,,,,,,,,
 wtomeklink,0,0,0,1,0,2,,,,,,,,,,,,
+underSampling,1,0,0,1,0,2,FP,0.1,0.6,,,,,,,,,
diff --git a/scripts/pipelines/properties/primitives.csv b/scripts/pipelines/properties/primitives.csv
index 0afcc52e2b4..53d916079bd 100644
--- a/scripts/pipelines/properties/primitives.csv
+++ b/scripts/pipelines/properties/primitives.csv
@@ -1,7 +1,7 @@
 ED,MVI,OTLR,EC,SCALE,CI,DUMMY,DIM
 imputeByFd,imputeByMean,winsorize,imputeByMean,scale,abstain,dummycoding,m_pca
-outlierBySd,imputeByMedian,outlierBySd,imputeByMedian,,wtomeklink,,ppca
-outlierByIQR,mice,outlierByIQR,fillDefault,,SMOTE,,
+outlierBySd,imputeByMedian,outlierBySd,imputeByMedian,,wtomeklink,frequencyEncoding,ppca
+outlierByIQR,mice,outlierByIQR,fillDefault,,SMOTE,WoE,
 ,fillDefault,,,,flipLabels,,
-,imputeByFd,,,,,,
+,imputeByFd,,,,underSampling,,
 ,forward_fill,,,,,,
diff --git a/scripts/pipelines/properties/testPrimitives.csv b/scripts/pipelines/properties/testPrimitives.csv
index c1e743396b3..a5cdc3ed8e2 100644
--- a/scripts/pipelines/properties/testPrimitives.csv
+++ b/scripts/pipelines/properties/testPrimitives.csv
@@ -1,3 +1,3 @@
 ED,MVI,OTLR,EC,SCALE,CI,DUMMY,DIM
 ,imputeByMean,winsorize,imputeByMean,scale,abstain,dummycoding,m_pca
-outlierBySd,imputeByMedian,outlierBySd,imputeByMedian,,wtomeklink,,ppca
+outlierBySd,imputeByMedian,outlierBySd,imputeByMedian,,underSampling,frequencyEncoding,ppca
diff --git a/src/test/scripts/functions/pipelines/applyEvaluateTest.dml b/src/test/scripts/functions/pipelines/applyEvaluateTest.dml
index 6edd23904ca..a4e1c8c7e9b 100644
--- a/src/test/scripts/functions/pipelines/applyEvaluateTest.dml
+++ b/src/test/scripts/functions/pipelines/applyEvaluateTest.dml
@@ -60,7 +60,7 @@ trainData = F[1:split,]
 testData = F[split+1:nrow(F),]
 
 
-result = applyAndEvaluate(trainData, testData, metaInfo, lg, pip[1,], hp[1,], "evalML", matrix("1 1e-3 1e-9 100", rows=1, cols=4), TRUE, FALSE)
+result = applyAndEvaluate(trainData, testData, metaInfo, lg, pip[1,], hp[1,], "evalML", evalHp, TRUE, FALSE)
 
 header = frame(["dirty acc", "train acc", "test acc"], rows=1, cols=3)
 result = as.frame(result)
@@ -75,13 +75,13 @@ write(result, $6)
 # UDF for evaluation  
 # choice of parameters provided by API, X, Y, clone_X, evalFunHp (hyper-param), trainML (boolean for optimizing hp internally or passed by externally )
 evalML = function(Matrix[Double] X, Matrix[Double] Y, Matrix[Double] Xtest, Matrix[Double] Ytest, Matrix[Double] Xorig=as.matrix(0),
-  Matrix[Double] evalFunHp, Boolean trainML = FALSE)
+  Matrix[Double] evalFunHp)
   
 return(Matrix[Double] accuracy)
 {
 
   beta = multiLogReg(X=X, Y=Y, icpt=as.scalar(evalFunHp[1,1]), reg=as.scalar(evalFunHp[1,2]), tol=as.scalar(evalFunHp[1,3]), 
-    maxi=as.scalar(evalFunHp[1,4]), maxii=50, verbose=FALSE);
+    maxi=1000, maxii=100, verbose=FALSE);
   [prob, yhat, accuracy] = multiLogRegPredict(Xtest, beta, Ytest, FALSE)
   a = getAccuracy(Ytest, yhat, TRUE)
   print("accuracy: "+ accuracy+", accuracy weighted: "+a)
diff --git a/src/test/scripts/functions/pipelines/intermediates/classification/bestAcc.csv b/src/test/scripts/functions/pipelines/intermediates/classification/bestAcc.csv
index 8350d69b5a7..746303da873 100644
--- a/src/test/scripts/functions/pipelines/intermediates/classification/bestAcc.csv
+++ b/src/test/scripts/functions/pipelines/intermediates/classification/bestAcc.csv
@@ -1,3 +1,3 @@
-77.42222222222223
-77.15555555555555
-76.97777777777777
+93.69369369369369
+93.69369369369369
+93.69369369369369
diff --git a/src/test/scripts/functions/pipelines/intermediates/classification/bestAcc.csv.mtd b/src/test/scripts/functions/pipelines/intermediates/classification/bestAcc.csv.mtd
deleted file mode 100644
index d3f8f295c66..00000000000
--- a/src/test/scripts/functions/pipelines/intermediates/classification/bestAcc.csv.mtd
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "data_type": "matrix",
-    "value_type": "double",
-    "rows": 3,
-    "cols": 1,
-    "nnz": 3,
-    "format": "csv",
-    "author": "olga_ovcharenko",
-    "header": false,
-    "sep": ",",
-    "created": "2021-09-15 13:08:58 CEST"
-}
\ No newline at end of file
diff --git a/src/test/scripts/functions/pipelines/intermediates/classification/dirtyScore.csv b/src/test/scripts/functions/pipelines/intermediates/classification/dirtyScore.csv
index 3c0b94009e2..14992b730f3 100644
--- a/src/test/scripts/functions/pipelines/intermediates/classification/dirtyScore.csv
+++ b/src/test/scripts/functions/pipelines/intermediates/classification/dirtyScore.csv
@@ -1 +1 @@
-74.13333333333333
\ No newline at end of file
+90.990990990991
\ No newline at end of file
diff --git a/src/test/scripts/functions/pipelines/intermediates/classification/dirtyScore.csv.mtd b/src/test/scripts/functions/pipelines/intermediates/classification/dirtyScore.csv.mtd
deleted file mode 100644
index 4689778cab9..00000000000
--- a/src/test/scripts/functions/pipelines/intermediates/classification/dirtyScore.csv.mtd
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-    "data_type": "scalar",
-    "value_type": "double",
-    "format": "text",
-    "author": "olga_ovcharenko",
-    "created": "2021-09-15 13:08:58 CEST"
-}
\ No newline at end of file
diff --git a/src/test/scripts/functions/pipelines/intermediates/classification/evalHp.csv b/src/test/scripts/functions/pipelines/intermediates/classification/evalHp.csv
index b0891774a7a..c3223bad408 100644
--- a/src/test/scripts/functions/pipelines/intermediates/classification/evalHp.csv
+++ b/src/test/scripts/functions/pipelines/intermediates/classification/evalHp.csv
@@ -1 +1 @@
-10.0,0.001,1.0E-9,1000.0
+2.0,0.001,1.0
diff --git a/src/test/scripts/functions/pipelines/intermediates/classification/evalHp.csv.mtd b/src/test/scripts/functions/pipelines/intermediates/classification/evalHp.csv.mtd
deleted file mode 100644
index 98f02f09ea8..00000000000
--- a/src/test/scripts/functions/pipelines/intermediates/classification/evalHp.csv.mtd
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "data_type": "matrix",
-    "value_type": "double",
-    "rows": 1,
-    "cols": 4,
-    "nnz": 4,
-    "format": "csv",
-    "author": "olga_ovcharenko",
-    "header": false,
-    "sep": ",",
-    "created": "2021-09-15 13:08:58 CEST"
-}
\ No newline at end of file
diff --git a/src/test/scripts/functions/pipelines/intermediates/classification/featureFrame.csv.mtd b/src/test/scripts/functions/pipelines/intermediates/classification/featureFrame.csv.mtd
deleted file mode 100644
index f73d79482b1..00000000000
--- a/src/test/scripts/functions/pipelines/intermediates/classification/featureFrame.csv.mtd
+++ /dev/null
@@ -1,11 +0,0 @@
-{
-    "data_type": "frame",
-    "schema": "STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,",
-    "rows": 1,
-    "cols": 18,
-    "format": "csv",
-    "author": "olga_ovcharenko",
-    "header": false,
-    "sep": ",",
-    "created": "2021-09-15 13:08:58 CEST"
-}
\ No newline at end of file
diff --git a/src/test/scripts/functions/pipelines/intermediates/classification/hp.csv b/src/test/scripts/functions/pipelines/intermediates/classification/hp.csv
index c1201c5b388..51db8062157 100644
--- a/src/test/scripts/functions/pipelines/intermediates/classification/hp.csv
+++ b/src/test/scripts/functions/pipelines/intermediates/classification/hp.csv
@@ -1,3 +1,3 @@
-48.0,1.0,0.6455927908212413,0,0,1.0,0,0,1.0,0,0,0,1.0,0,0,0,2.0,1.0,0.7028229812430514,0,0,1.0,0,0,1.0,2.0,0,0,0,0,0,0,0,1.0,0.7518372764174678,0,0,0,1.0,1.0,2.0,0,0,0,1.0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-48.0,1.0,0.6687888403388711,0,0,1.0,0,0,1.0,0,0,0,1.0,0,0,0,2.0,1.0,0.8636413728699717,0,0,1.0,0,0,1.0,2.0,0,1.0,0,0,0,0,0,1.0,0.6999444414086964,0,0,0,1.0,1.0,2.0,0,0,0,1.0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-54.0,1.0,0.8858480964079888,0,0,0,1.0,0,0,1.0,0,0,0,0,1.0,0,0,0,2.0,3.0,7.0,1.0,1.0,0,0,0,1.0,0,2.0,0,0,0,0,0,0,0,0,1.0,0.8436419752757551,0,0,0,0,1.0,1.0,2.0,0,0,0,0,1.0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+56.0,0,0,0,1.0,0,0,0,2.0,0,0,0,1.0,0,0,0,2.0,2.0,0.01012948685771077,0.9700112361003191,0,0,0,1.0,0,0,0,0,1.0,0,0,0,2.0,2.0,1.0,1.0,0,0,0,0,0,1.0,0.7879135917206637,0,0,0,1.0,1.0,2.0,0,0,0,1.0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+56.0,0,0,0,1.0,0,0,0,2.0,0,0,0,1.0,0,0,0,2.0,2.0,0.03120261172075603,0.9862240788883125,0,0,0,1.0,0,0,0,0,1.0,0,0,0,2.0,2.0,0,1.0,0,0,0,0,0,1.0,0.6444173997759863,0,0,0,1.0,1.0,2.0,0,0,0,1.0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+56.0,0,0,0,1.0,0,0,0,2.0,0,0,0,1.0,0,0,0,2.0,2.0,0.03697717557557067,0.9732999162362644,0,0,0,1.0,0,0,0,0,1.0,0,0,0,2.0,2.0,0,1.0,0,0,0,0,0,1.0,0.6848186130743412,0,0,0,1.0,1.0,2.0,0,0,0,1.0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
diff --git a/src/test/scripts/functions/pipelines/intermediates/classification/hp.csv.mtd b/src/test/scripts/functions/pipelines/intermediates/classification/hp.csv.mtd
deleted file mode 100644
index 80fe788bfb1..00000000000
--- a/src/test/scripts/functions/pipelines/intermediates/classification/hp.csv.mtd
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "data_type": "matrix",
-    "value_type": "double",
-    "rows": 3,
-    "cols": 60,
-    "nnz": 28,
-    "format": "csv",
-    "author": "olga_ovcharenko",
-    "header": false,
-    "sep": ",",
-    "created": "2021-09-15 13:08:58 CEST"
-}
\ No newline at end of file
diff --git a/src/test/scripts/functions/pipelines/intermediates/classification/lp.csv b/src/test/scripts/functions/pipelines/intermediates/classification/lp.csv
index b6c716bf08d..e2f5bc4d986 100644
--- a/src/test/scripts/functions/pipelines/intermediates/classification/lp.csv
+++ b/src/test/scripts/functions/pipelines/intermediates/classification/lp.csv
@@ -1 +1 @@
-ED,MVI,ED,SCALE,CI,DUMMY
+EC,MVI,OTLR,EC,SCALE,CI,DUMMY
diff --git a/src/test/scripts/functions/pipelines/intermediates/classification/lp.csv.mtd b/src/test/scripts/functions/pipelines/intermediates/classification/lp.csv.mtd
deleted file mode 100644
index 241a6a065db..00000000000
--- a/src/test/scripts/functions/pipelines/intermediates/classification/lp.csv.mtd
+++ /dev/null
@@ -1,11 +0,0 @@
-{
-    "data_type": "frame",
-    "schema": "STRING,STRING,STRING,STRING,",
-    "rows": 1,
-    "cols": 4,
-    "format": "csv",
-    "author": "olga_ovcharenko",
-    "header": false,
-    "sep": ",",
-    "created": "2021-09-15 13:08:58 CEST"
-}
\ No newline at end of file
diff --git a/src/test/scripts/functions/pipelines/intermediates/classification/pip.csv b/src/test/scripts/functions/pipelines/intermediates/classification/pip.csv
index 1367ddfbcd7..37f2ffbfff4 100644
--- a/src/test/scripts/functions/pipelines/intermediates/classification/pip.csv
+++ b/src/test/scripts/functions/pipelines/intermediates/classification/pip.csv
@@ -1,3 +1,3 @@
-imputeByFd,imputeByMean,imputeByFd,scale,flipLabels,dummycoding
-imputeByFd,imputeByMean,imputeByFd,scale,flipLabels,dummycoding
-imputeByFd,imputeByMean,outlierBySd,scale,abstain,dummycoding
+imputeByMean,imputeByMean,winsorize,imputeByMedian,scale,abstain,dummycoding
+imputeByMean,imputeByMean,winsorize,imputeByMedian,scale,abstain,dummycoding
+imputeByMean,imputeByMean,winsorize,imputeByMedian,scale,abstain,dummycoding
diff --git a/src/test/scripts/functions/pipelines/intermediates/classification/pip.csv.mtd b/src/test/scripts/functions/pipelines/intermediates/classification/pip.csv.mtd
deleted file mode 100644
index 33bc1d4d7e5..00000000000
--- a/src/test/scripts/functions/pipelines/intermediates/classification/pip.csv.mtd
+++ /dev/null
@@ -1,11 +0,0 @@
-{
-    "data_type": "frame",
-    "schema": "STRING,STRING,STRING,STRING,",
-    "rows": 3,
-    "cols": 4,
-    "format": "csv",
-    "author": "olga_ovcharenko",
-    "header": false,
-    "sep": ",",
-    "created": "2021-09-15 13:08:58 CEST"
-}
\ No newline at end of file
diff --git a/src/test/scripts/functions/pipelines/topkLogicalTest.dml b/src/test/scripts/functions/pipelines/topkLogicalTest.dml
index 481fb66135b..fdabe020ac5 100644
--- a/src/test/scripts/functions/pipelines/topkLogicalTest.dml
+++ b/src/test/scripts/functions/pipelines/topkLogicalTest.dml
@@ -107,7 +107,7 @@ write(result , $O)
 # UDF for evaluation  
 # choice of parameters provided by API, X, Y, clone_X, evalFunHp (hyper-param), trainML (boolean for optimizing hp internally or passed by externally )
 evalML = function(Matrix[Double] X, Matrix[Double] Y, Matrix[Double] Xtest, Matrix[Double] Ytest, Matrix[Double] Xorig=as.matrix(0),
-  Matrix[Double] evalFunHp, Boolean trainML = FALSE)
+  Matrix[Double] evalFunHp)
   
 return(Matrix[Double] accuracy)
 {
diff --git a/src/test/scripts/functions/pipelines/topkcleaningClassificationTest.dml b/src/test/scripts/functions/pipelines/topkcleaningClassificationTest.dml
index 1ba5bdaca67..91d186c1334 100644
--- a/src/test/scripts/functions/pipelines/topkcleaningClassificationTest.dml
+++ b/src/test/scripts/functions/pipelines/topkcleaningClassificationTest.dml
@@ -24,7 +24,7 @@ source("scripts/pipelines/scripts/utils.dml") as utils;
 
 # read the inputs
 F = read($dirtyData, data_type="frame", format="csv", header=TRUE, 
-  naStrings= ["NA", "null","  ","NaN", "nan", "", "?", "99999"]);
+  naStrings= ["NA", "null","  ","NaN", "nan", "", " ", "_nan_", "inf", "?", "NAN", "99999"]);
 
 metaInfo = read($metaData, data_type="frame", format="csv", header=FALSE);
 primitives = read($primitives, data_type = "frame", format="csv", header= TRUE)
@@ -57,7 +57,7 @@ metaInfo = metaInfo[, 2:ncol(metaInfo)]
 
 # [topKPipelines, topKHyperParams, topKScores, bestLogical, features, dirtyScore, evalHp] = 
 result = topk_cleaning(dataTrain=trainData, dataTest=testData, metaData=metaInfo, primitives=primitives, parameters=param,
-  cmr=matrix("2 0.7 1", rows=1, cols=3), evaluationFunc=evalFunc, evalFunHp=as.matrix(0),
+  cmr=matrix("2 0.7 1", rows=1, cols=3), evaluationFunc=evalFunc, evalFunHp=as.matrix(NaN),
   topK=topK, resource_val=resources, cv=testCV, cvk=cvk, sample=sample, isLastLabel=TRUE, correctTypos=FALSE, output=output) 
 
 write(result, $O)
@@ -66,15 +66,15 @@ write(result, $O)
 # UDF for evaluation  
 # choice of parameters provided by API, X, Y, clone_X, evalFunHp (hyper-param), trainML (boolean for optimizing hp internally or passed by externally )
 evalClassification = function(Matrix[Double] X, Matrix[Double] Y, Matrix[Double] Xtest, Matrix[Double] Ytest, Matrix[Double] Xorig=as.matrix(0),
-  Matrix[Double] evalFunHp, Integer trainML)
+  Matrix[Double] evalFunHp)
   
 return(Matrix[Double] output)
 {
-  if(trainML == 1)
+  if(is.na(as.scalar(evalFunHp[1,1])))
   {
-    params = list("icpt", "reg", "tol", "maxii")
-    paramRanges = list(seq(0, 2, 1), 10^seq(1,-3), 10^seq(1,-5), 10^seq(1,3));
-    trainArgs = list(X=X, Y=Y, icpt=-1, reg=-1, tol=-1, maxi=100, maxii=-1, verbose=FALSE);
+    params = list("icpt", "reg", "tol")
+    paramRanges = list(seq(0, 2, 1), 10^seq(1,-3), 10^seq(1,-5));
+    trainArgs = list(X=X, Y=Y, icpt=-1, reg=-1, tol=-1, maxi=1000, maxii=100, verbose=FALSE);
     [B1, opt] = utils::topk_gridSearch(X=X, y=Y, Xtest=Xtest, ytest=Ytest, train="multiLogReg", predict="accuracy", numB=ncol(X)+1, cv=FALSE, cvk=0,
       params=params, paramValues=paramRanges, trainArgs=trainArgs, verbose=FALSE);
     evalFunHp = as.matrix(opt)  
@@ -86,7 +86,7 @@ return(Matrix[Double] output)
   }
   else {
     beta = multiLogReg(X=X, Y=Y, icpt=as.scalar(evalFunHp[1,1]), reg=as.scalar(evalFunHp[1,2]), tol=as.scalar(evalFunHp[1,3]), 
-      maxi=as.scalar(evalFunHp[1,4]), maxii=50, verbose=FALSE);
+      maxi=1000, maxii=100, verbose=FALSE);
     [prob, yhat, accuracy] = multiLogRegPredict(Xtest, beta, Ytest, FALSE)
     a = getAccuracy(Ytest, yhat, TRUE)
     print("accuracy: "+toString(accuracy)+" weighted accuracy: "+a)
diff --git a/src/test/scripts/functions/pipelines/topkcleaningRegressionTest.dml b/src/test/scripts/functions/pipelines/topkcleaningRegressionTest.dml
index a797db206db..7682ae444b1 100644
--- a/src/test/scripts/functions/pipelines/topkcleaningRegressionTest.dml
+++ b/src/test/scripts/functions/pipelines/topkcleaningRegressionTest.dml
@@ -23,7 +23,7 @@ source("scripts/pipelines/scripts/utils.dml") as utils;
 
 # read the inputs
 F = read($dirtyData, data_type="frame", format="csv", header=TRUE, 
-  naStrings= ["NA", "null","  ","NaN", "nan", "", "?", "99999"]);
+  naStrings= ["NA", "null","  ","NaN", "nan", "", " ", "_nan_", "inf", "?", "NAN", "99999"]);
 F = F[,2:ncol(F)]
 primitives = read($primitives, data_type = "frame", format="csv", header= TRUE)
 param = read($parameters, data_type = "frame", format="csv", header= TRUE)
@@ -59,10 +59,10 @@ write(result, $O)
 # UDF for evaluation  
 # choice of parameters provided by API, X, Y, clone_X, evalFunHp (hyper-param), trainML (boolean for optimizing hp internally or passed by externally )
 evalRegression = function(Matrix[Double] X, Matrix[Double] Y, Matrix[Double] Xtest, Matrix[Double] Ytest, Matrix[Double] Xorig=as.matrix(0),
-  Matrix[Double] evalFunHp, Boolean trainML = FALSE)
+  Matrix[Double] evalFunHp)
 return(Matrix[Double] output)
 {
-  if(trainML == 1)
+  if(is.na(as.scalar(evalFunHp[1,1])))
   {
     # do the gridsearch for hyper-parameters
     params = list("icpt","reg", "tol", "maxi");
@@ -82,5 +82,5 @@ wmape = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] B, Integer i
   # loss = as.matrix(sum((y - X%*%B)^2));
   pred = lmPredict(X=X, B=B, ytest=y, icpt=icpt);
   WMAPE = sum(abs(y - pred))/sum(abs(y)) #this will give the lose into range of [0,1]
-  loss = as.matrix(WMAPE) 
+  loss = ifelse(is.na(as.matrix(WMAPE)), as.matrix(0), as.matrix(WMAPE))  
 }