apache · Shafaq-Siddiqi · Dec 30, 2021 · Dec 16, 2021
diff --git a/scripts/builtin/applyAndEvaluate.dml b/scripts/builtin/applyAndEvaluate.dml
@@ -99,10 +99,10 @@ return (Matrix[Double] result)
   if(max(eYtrain) == min(eYtrain)) 
     stop("Y contains only one class")
 
-  score = eval(evaluationFunc, list(X=eXtrain, Y=eYtrain, Xtest=eXtrain, Ytest=eYtrain, Xorig=as.matrix(0), evalFunHp=evalFunHp, trainML = FALSE))
+  score = eval(evaluationFunc, list(X=eXtrain, Y=eYtrain, Xtest=eXtrain, Ytest=eYtrain, Xorig=as.matrix(0), evalFunHp=evalFunHp))
   trainAccuracy = as.scalar(score[1, 1])
 
-  score = eval(evaluationFunc, list(X=eXtrain, Y=eYtrain, Xtest=eXtest, Ytest=eYtest, Xorig=as.matrix(0), evalFunHp=evalFunHp, trainML = FALSE))
+  score = eval(evaluationFunc, list(X=eXtrain, Y=eYtrain, Xtest=eXtest, Ytest=eYtest, Xorig=as.matrix(0), evalFunHp=evalFunHp))
   testAccuracy = as.scalar(score[1, 1])
 
 
@@ -172,9 +172,9 @@ return(Double dirtyScore)
   mask = as.matrix(metaList['mask']) 
   mask = ifelse(sum(mask == dmask) < ncol(mask), matrix(1, rows=1, cols=ncol(mask)), mask)
   [eXtrain, eXtest] = recodeData(X, Xtest, mask, FALSE, "recode")
-  eXtrain = replace(target=eXtrain, pattern=NaN, replacement=1)
-  eXtest = replace(target=eXtest, pattern=NaN, replacement=1)
+  eXtrain = replace(target=eXtrain, pattern=NaN, replacement=0)
+  eXtest = replace(target=eXtest, pattern=NaN, replacement=0)
   [eXtrain, eXtest] = recodeData(as.frame(eXtrain), as.frame(eXtest), mask, FALSE, "dummycode")
-  score = eval(evaluationFunc, list(X=eXtrain, Y=Y, Xtest=eXtest, Ytest=Ytest, Xorig=as.matrix(0), evalFunHp=evalFunHp, trainML = FALSE))
+  score = eval(evaluationFunc, list(X=eXtrain, Y=Y, Xtest=eXtest, Ytest=Ytest, Xorig=as.matrix(0), evalFunHp=evalFunHp))
   dirtyScore = as.scalar(score[1, 1])
 }
diff --git a/scripts/builtin/bandit.dml b/scripts/builtin/bandit.dml
@@ -300,16 +300,17 @@ run_with_hyperparam = function(Frame[Unknown] lp, Frame[Unknown] ph_pip, Integer
         {
           pipList = list(lp = lp, ph = ph_pip[i], hp = hp_matrix, flags = no_of_flag_vars)
           [evalFunOutput, hpForPruning, changesByOp] = crossV(X=X, y=Y, cvk=cvk, evalFunHp=evalFunHp, pipList=pipList, metaList=metaList, hpForPruning=hpForPruning, 
-          changesByOp=changesByOp, evalFunc=evaluationFunc, trainML = 0)
+          changesByOp=changesByOp, evalFunc=evaluationFunc)
         }
         else 
         {
-          [eXtrain, eYtrain, eXtest, eYtest, Tr, hpForPruning, changesByOp] = executePipeline(logical=lp, pipeline=ph_pip[i], X=X, Y=Y, Xtest=Xtest, Ytest=Ytest, metaList=metaList,
-            hyperParameters=hp_matrix, hpForPruning=hpForPruning, changesByOp=changesByOp, flagsCount=no_of_flag_vars, test=TRUE, verbose=FALSE)
+          [eXtrain, eYtrain, eXtest, eYtest, Tr, hpForPruning, changesByOp] = executePipeline(logical=lp, pipeline=ph_pip[i], 
+            X=X, Y=Y, Xtest=Xtest, Ytest=Ytest, metaList=metaList,  hyperParameters=hp_matrix, hpForPruning=hpForPruning,
+            changesByOp=changesByOp, flagsCount=no_of_flag_vars, test=TRUE, verbose=FALSE)
           if(max(eYtrain) == min(eYtrain)) 
             print("Y contains only one class")
           else 
-            evalFunOutput = eval(evaluationFunc, list(X=eXtrain, Y=eYtrain, Xtest=eXtest, Ytest=eYtest, Xorig=as.matrix(0), evalFunHp=evalFunHp, trainML = 0))
+            evalFunOutput = eval(evaluationFunc, list(X=eXtrain, Y=eYtrain, Xtest=eXtest, Ytest=eYtest, Xorig=as.matrix(0), evalFunHp=evalFunHp))
         }
 
         # evalFunOutput = eval(evaluationFunc, argList)  
@@ -564,9 +565,8 @@ return (Matrix[Double] features)
   # OHE features 
   OHE = sum(colMaxs(X) * mask)
   features[1, 10] = OHE
-  tab = table(Y, 1)
-  distVal = nrow(tab)
-  if(nrow(Y) > 1 &  distVal <= 10)
+
+  if(nrow(Y) > 1 &  min(Y) >= 1)
   {
     ctab = table(Y, 1)
     features[1, 11] = nrow(ctab) # number of classes
@@ -630,7 +630,7 @@ return (String s)
 }
 
 crossV = function(Matrix[double] X, Matrix[double] y, Integer cvk, Matrix[Double] evalFunHp, List[Unknown] pipList, List[Unknown] metaList,
-  Matrix[Double] hpForPruning = as.matrix(0), Matrix[Double] changesByOp = as.matrix(0), String evalFunc, Integer trainML = 0) 
+  Matrix[Double] hpForPruning = as.matrix(0), Matrix[Double] changesByOp = as.matrix(0), String evalFunc) 
 return (Matrix[Double] output, Matrix[Double] hpForPruning, Matrix[Double] changesByOp)
 {
   accuracyMatrix = matrix(0, cvk, 1)
@@ -679,7 +679,7 @@ return (Matrix[Double] output, Matrix[Double] hpForPruning, Matrix[Double] chang
         changesByOp=changesByOp, flagsCount=as.scalar(pipList['flags']), test=TRUE, verbose=FALSE)
     }
     # print("test out: "+nrow(testy))
-    res = eval(evalFunc, list(X=trainX, Y=trainy, Xtest=testX, Ytest=testy, Xorig=as.matrix(0), evalFunHp=evalFunHp, trainML = trainML))
+    res = eval(evalFunc, list(X=trainX, Y=trainy, Xtest=testX, Ytest=testy, Xorig=as.matrix(0), evalFunHp=evalFunHp))
     accuracyMatrix[i] = res[1, 1]
     evalFunHp = res[, 2:ncol(res)]
   }

diff --git a/scripts/builtin/executePipeline.dml b/scripts/builtin/executePipeline.dml
@@ -83,7 +83,7 @@ s_executePipeline = function(Frame[String] logical = as.frame("NULL"), Frame[Str
     op = as.scalar(pipeline[1,i])
     lgOp = as.scalar(logical[1,i])
 
-    if(test == FALSE | lgOp != "CI") {
+    if(lgOp != "CI") {
        Xclone = X 
       [hp, dataFlag, yFlag, executeFlag] = matrixToList(X, Y, mask, FD, hyperParameters[i], flagsCount, op)
       if(executeFlag == 1) {
@@ -228,6 +228,9 @@ return (Matrix[Double] X)
     cat = removeEmpty(target=X, margin="cols", select = mask)
     # round categorical (if there is any floating  point)
     cat = round(cat)
+    less_than_1_mask = cat < 1
+    less_than_1 = less_than_1_mask * 9999
+    cat = (cat * (less_than_1_mask == 0)) +  less_than_1
     # reconstruct original X
     X = X * (mask == 0)
     q = table(seq(1, ncol(cat)), removeEmpty(target=seq(1, ncol(mask)), margin="rows", 
@@ -485,11 +488,9 @@ return(Matrix[Double] hpForPruning, Matrix[Double] changesByOp)
 ########################################################
 # The function will flip the noisy labels
 ########################################################
-flipLabels = function(Matrix[Double] X, Matrix[Double] Y, Double threshold, Boolean verbose = FALSE)
+flipLabels = function(Matrix[Double] X, Matrix[Double] Y, Double threshold, Integer maxIter =10, Boolean verbose = FALSE)
 return (Matrix[Double] XY)
 {
-
-  print("---- starting flip labels ---")
   max_y = max(Y)
   if(min(Y) != max(Y))
   {
@@ -498,7 +499,7 @@ return (Matrix[Double] XY)
     inc = ((yhat != Y) & (rowMaxs(prob) > threshold))
     Xcor = removeEmpty(target = X, margin = "rows", select = (inc==0))
     Ycor = removeEmpty(target = Y, margin = "rows", select = (inc==0))
-    while(sum(inc) > 0)
+    while(sum(inc) > 0 & maxIter > 0)
     {
       # print("inc vector "+toString(inc))
       Xinc = removeEmpty(target = X, margin = "rows", select = inc)
@@ -512,9 +513,95 @@ return (Matrix[Double] XY)
       Ycor = rbind(Ycor, YcorI)
       X = Xinc
       Y = Yinc
+      print("maxIter: "+maxIter)
+      maxIter = maxIter - 1
     }
     XY = cbind(Xcor, Ycor)
   }
   else 
     XY = cbind(X, Y)
+}
+
+#######################################################################
+# function frequency conversion
+# Inputs: The input dataset X, and  mask of the columns
+# Output: categorical columns are replaced with their frequencies
+#######################################################################
+
+frequencyEncoding = function(Matrix[Double] X, Matrix[Double] mask)
+return (Matrix[Double] freqX) {
+
+  freqX = X
+  X = replace(target=X, pattern=NaN, replacement=1)
+  if(sum(mask) > 0)
+  {
+
+    parfor(i in 1:ncol(mask))
+    {
+      if(as.scalar(mask[1, i]) == 1)
+      {
+        Y = X[, i]
+        # print("max of Y: "+max(Y)+" max of Ytrain: "+max(Xtrain[, i]))
+        valueCount = table(Y, 1)
+        resp = matrix(0, nrow(Y), max(Y))
+        resp = (resp + t(seq(1, max(Y)))) == Y
+        # print("cols in resp: "+ncol(resp)+" cols in valueCount: "+nrow(valueCount))
+        # while(FALSE){}
+        resp = resp * t(valueCount)
+        freqX[, i] = rowSums(resp)
+      }
+
+    }
+  }
+}
+
+#######################################################################
+# function Weight of evidence / information gain
+# Inputs: The input dataset X, and  mask of the columns
+# Output: categorical columns are replaced with their frequencies
+#######################################################################
+
+WoE = function(Matrix[Double] X, Matrix[Double] Y, Matrix[Double] mask)
+return (Matrix[Double] output) {
+
+  freqX = X
+  X = replace(target=X, pattern=NaN, replacement=1)
+  if(sum(mask) > 0)
+  {
+    parfor(i in 1:ncol(mask))
+    {
+      if(as.scalar(mask[1, i]) == 1)
+      {
+        L = X[, i]
+        entropy = getEntropy(L, Y)
+        resp = matrix(0, nrow(L), max(L))
+        resp = (resp + t(seq(1, max(L)))) == L
+        resp = resp * entropy
+        freqX[, i] = rowSums(resp)
+      }
+
+    }
+  }
+  output = cbind(freqX, Y)
+}
+
+
+getEntropy = function(Matrix[Double] eX, Matrix[Double] eY)
+return(Matrix[Double] entropyMatrix)
+{
+
+  tab = table(eX, eY)
+  # print("tab \n"+toString(tab))
+  entropyMatrix = matrix(0, rows=1, cols=nrow(tab))
+  catTotal = rowSums(tab)
+  for(i in 1:nrow(tab))
+  {
+    # print("catProb: " +catProb)
+    entropy =  (tab[i,]/catTotal[i])
+    # print("entropy: "+toString(entropy))
+    catEntropy = sum(-entropy * log(entropy, 2))
+    catEntropy = ifelse(is.na(catEntropy), 0, catEntropy)
+    # print("cat entropy: "+catEntropy)
+    entropyMatrix[1, i] = catEntropy
+  }  
 }
diff --git a/scripts/builtin/topk_cleaning.dml b/scripts/builtin/topk_cleaning.dml
@@ -113,44 +113,47 @@ s_topk_cleaning = function(Frame[Unknown] dataTrain, Frame[Unknown] dataTest = a
   # # # create logical pipeline seeds
   logicalSeedCI =  frame([
                    "4", "ED", "MVI", "OTLR", "EC", "0", "0", "0", "0",
-                   "5", "ED", "MVI", "CI", "SCALE","DUMMY","0", "0", "0", 
-                   "5", "OTLR", "EC", "CI", "SCALE", "DUMMY", "0","0", "0",
-                   "7", "MVI", "OTLR", "ED", "EC", "SCALE", "CI", "DUMMY", "0",
+                   "5", "ED", "EC", "SCALE", "CI","DUMMY","0", "0", "0", 
+                   "5", "OTLR", "EC", "SCALE", "CI", "DUMMY", "0","0", "0",
+                   "8", "ED", "MVI", "OTLR", "ED", "EC", "SCALE", "CI", "DUMMY",
                    "5", "ED",  "MVI",  "SCALE", "CI", "DUMMY", "0", "0", "0",
                    "4", "MVI", "SCALE", "CI", "DUMMY", "0", "0", "0", "0", 
-                   "4", "ED", "EC", "CI", "DUMMY", "0", "0", "0", "0",
-                   "4", "MVI", "OTLR", "CI", "DUMMY", "0", "0", "0", "0",
-                   "6", "MVI", "OTLR", "EC", "CI", "SCALE", "DUMMY", "0", "0",
+                   "6", "ED", "MVI", "EC", "SCALE", "CI", "DUMMY", "0", "0",
+                   "6", "MVI", "OTLR","EC", "SCALE", "CI", "DUMMY", "0", "0",
+                   "7", "OTLR", "MVI", "OTLR", "EC", "SCALE", "CI", "DUMMY", "0",
                    "7", "ED", "MVI", "OTLR", "EC", "SCALE", "CI", "DUMMY", "0"
                    ], rows=10, cols=9)  
 
-  logicalSeedNoCI = frame([
-                   "4", "ED", "MVI", "OTLR", "EC", "0", "0",
-                   "3", "ED", "MVI", "DUMMY", "0","0","0", 
-                   "3", "OTLR", "EC", "DUMMY", "0","0","0",
-                   "5", "MVI", "OTLR", "ED", "EC", "DUMMY", "0", 
-                   "3", "ED",  "MVI", "DUMMY", "0", "0", "0",
-                   "3", "MVI", "SCALE", "DUMMY", "0", "0", "0", 
-                   "3", "ED", "EC", "DUMMY", "0", "0", "0",
-                   "3", "MVI", "OTLR", "DUMMY", "0", "0", "0", 
-                   "4", "MVI", "OTLR", "EC", "DUMMY", "0", "0", 
-                   "6", "ED", "MVI", "OTLR", "EC", "SCALE", "DUMMY"
-                   ], rows=10, cols=7) 
+  logicalSeedNoCI =  frame([
+                   "3", "ED", "MVI", "OTLR", "EC", "0", "0", "0",
+                   "4", "ED", "EC", "SCALE", "DUMMY","0", "0", "0", 
+                   "4", "OTLR", "EC", "SCALE", "DUMMY", "0","0", "0",
+                   "7", "ED", "MVI", "OTLR", "ED", "EC", "SCALE", "DUMMY",
+                   "4", "ED",  "MVI",  "SCALE", "DUMMY", "0", "0", "0",
+                   "3", "MVI", "SCALE", "DUMMY", "0", "0", "0", "0", 
+                   "5", "ED", "MVI", "EC", "SCALE", "DUMMY", "0", "0",
+                   "5", "MVI", "OTLR","EC", "SCALE", "DUMMY", "0", "0",
+                   "6", "OTLR", "MVI", "OTLR", "EC", "SCALE", "DUMMY", "0",
+                   "6", "ED", "MVI", "OTLR", "EC", "SCALE", "DUMMY", "0"
+                   ], rows=10, cols=8)  
 
-  tab = table(eYtrain, 1)
-  dist = nrow(tab)
-  if(nrow(eYtrain) > 0 & dist < 15)
+  if(min(eYtrain) >= 1) {
+    tab = table(eYtrain, 1)
+    dist = nrow(tab)
+  }
+  if(nrow(eYtrain) > 0 & min(eYtrain) >= 1 & dist <= 15)
     logical = logicalSeedCI
-  else 
+  else {
     logical = logicalSeedNoCI
+  }
   idx = as.integer(as.scalar(logical[1, 1])) + 1
   category = logical[1, 2:idx]
 
   print("-- Cleaning - Enum Logical Pipelines: ");
   [bestLogical, score] = lg::enumerateLogical(X=eXtrain, y=eYtrain, Xtest=eXtest, ytest=eYtest, cmr=cmr, 
     cat=category, population=logical[2:nrow(logical)], max_iter=ceil(resource_val/topK), metaList = metaList,
     evaluationFunc=evaluationFunc, evalFunHp=evalFunHp, primitives=primitives, param=parameters,
-    num_inst=3 , num_exec=2, cv=cv, cvk=cvk, verbose=TRUE, ctx=ctx)
+    num_inst=nrow(primitives), num_exec=ceil(resource_val/topK), cv=cv, cvk=cvk, verbose=TRUE, ctx=ctx)
   t6 = time(); print("---- finalized in: "+(t6-t5)/1e9+"s");
 
   topKPipelines = as.frame("NULL"); topKHyperParams = matrix(0,0,0); topKScores = matrix(0,0,0); features = as.frame("NULL")
@@ -231,8 +234,8 @@ return(Double dirtyScore, Matrix[Double] evalFunHp)
   mask = as.matrix(metaList['mask']) 
   mask = ifelse(sum(mask == dmask) < ncol(mask), matrix(1, rows=1, cols=ncol(mask)), mask)
   [eXtrain, eXtest] = recodeData(X, Xtest, mask, cv, "recode")
-  eXtrain = replace(target=eXtrain, pattern=NaN, replacement = 1)
-  eXtest = replace(target=eXtest, pattern=NaN, replacement = 1)
+  eXtrain = replace(target=eXtrain, pattern=NaN, replacement = 0)
+  eXtest = replace(target=eXtest, pattern=NaN, replacement = 0)
   dirtyScore = 100
   print(prefix+" sample from train data and dummy code");
   [eXtrain, Ytrain] =  utils::doSample(eXtrain, Y, sample, TRUE)
@@ -242,10 +245,10 @@ return(Double dirtyScore, Matrix[Double] evalFunHp)
   print(prefix+" hyper-parameter tuning");
   if(cv) {
     score = crossV(X=eXtrain, y=Ytrain, cvk=cvk, evalFunHp=evalFunHp,
-      pipList=pipList, metaList=metaList, evalFunc=evaluationFunc, trainML = 1)
+      pipList=pipList, metaList=metaList, evalFunc=evaluationFunc)
   }
   else {
-    score = eval(evaluationFunc, list(X=eXtrain, Y=Ytrain, Xtest=eXtest, Ytest=Ytest, Xorig=as.matrix(0), evalFunHp=evalFunHp, trainML = 1))
+    score = eval(evaluationFunc, list(X=eXtrain, Y=Ytrain, Xtest=eXtest, Ytest=Ytest, Xorig=as.matrix(0), evalFunHp=evalFunHp))
   }
 
   dirtyScore = as.scalar(score[1, 1])

diff --git a/scripts/pipelines/properties/param.csv b/scripts/pipelines/properties/param.csv
@@ -6,15 +6,17 @@ normalize,0,0,0,0,0,0,,,,,,,,,,,,
 imputeByMean,0,1,0,0,0,2,,,,,,,,,,,,
 imputeByMedian,0,1,0,0,0,2,,,,,,,,,,,,
 mice,2,1,0,0,1,2,INT,FP,1,3,0.5,1,,,,,,
-abstain,1,0,0,1,1,2,FP,0.6,0.9,,,,,,,,,
-flipLabels,1,0,0,1,1,2,FP,0.6,0.9,,,,,,,,,
+abstain,1,0,0,1,1,2,FP,0.6,0.8,,,,,,,,,
+flipLabels,2,0,0,1,1,2,FP,INT,0.6,0.9,1,20,,,,,,
 SMOTE,1,1,0,1,1,2,INT,100,500,,,,,,,,,
 m_pca,3,0,0,0,0,2,INT,BOOL,BOOL,100,200,0,1,0,0,,,
 ppca,4,0,0,0,1,2,INT,INT,FP,FP,100,200,1,10,1.00E-09,1.00E-06,1.00E-02,1.00E-01
 fillDefault,0,0,0,0,0,2,,,,,,,,,,,,
 dummycoding,0,1,0,0,0,2,,,,,,,,,,,,
+frequencyEncoding,0,1,0,0,0,2,,,,,,,,,,,,
+WoE,0,1,0,1,0,2,,,,,,,,,,,,
 scale,2,0,0,0,0,0,BOOL,BOOL,0,1,0,1,,,,,,
 forward_fill,1,0,0,0,1,2,BOOL,0,1,,,,,,,,,
 imputeByFd,1,0,1,0,0,1,FP,0.6,0.9,,,,,,,,,
-underSampling,1,0,0,1,0,2,FP,0.6,0.99,,,,,,,,,
 wtomeklink,0,0,0,1,0,2,,,,,,,,,,,,
+underSampling,1,0,0,1,0,2,FP,0.1,0.6,,,,,,,,,
diff --git a/scripts/pipelines/properties/primitives.csv b/scripts/pipelines/properties/primitives.csv
@@ -1,7 +1,7 @@
 ED,MVI,OTLR,EC,SCALE,CI,DUMMY,DIM
 imputeByFd,imputeByMean,winsorize,imputeByMean,scale,abstain,dummycoding,m_pca
-outlierBySd,imputeByMedian,outlierBySd,imputeByMedian,,wtomeklink,,ppca
-outlierByIQR,mice,outlierByIQR,fillDefault,,SMOTE,,
+outlierBySd,imputeByMedian,outlierBySd,imputeByMedian,,wtomeklink,frequencyEncoding,ppca
+outlierByIQR,mice,outlierByIQR,fillDefault,,SMOTE,WoE,
 ,fillDefault,,,,flipLabels,,
-,imputeByFd,,,,,,
+,imputeByFd,,,,underSampling,,
 ,forward_fill,,,,,,
diff --git a/scripts/pipelines/properties/testPrimitives.csv b/scripts/pipelines/properties/testPrimitives.csv
@@ -1,3 +1,3 @@
 ED,MVI,OTLR,EC,SCALE,CI,DUMMY,DIM
 ,imputeByMean,winsorize,imputeByMean,scale,abstain,dummycoding,m_pca
-outlierBySd,imputeByMedian,outlierBySd,imputeByMedian,,wtomeklink,,ppca
+outlierBySd,imputeByMedian,outlierBySd,imputeByMedian,,underSampling,frequencyEncoding,ppca
diff --git a/src/test/scripts/functions/pipelines/applyEvaluateTest.dml b/src/test/scripts/functions/pipelines/applyEvaluateTest.dml
@@ -60,7 +60,7 @@ trainData = F[1:split,]
 testData = F[split+1:nrow(F),]
 
 
-result = applyAndEvaluate(trainData, testData, metaInfo, lg, pip[1,], hp[1,], "evalML", matrix("1 1e-3 1e-9 100", rows=1, cols=4), TRUE, FALSE)
+result = applyAndEvaluate(trainData, testData, metaInfo, lg, pip[1,], hp[1,], "evalML", evalHp, TRUE, FALSE)
 
 header = frame(["dirty acc", "train acc", "test acc"], rows=1, cols=3)
 result = as.frame(result)
@@ -75,13 +75,13 @@ write(result, $6)
 # UDF for evaluation  
 # choice of parameters provided by API, X, Y, clone_X, evalFunHp (hyper-param), trainML (boolean for optimizing hp internally or passed by externally )
 evalML = function(Matrix[Double] X, Matrix[Double] Y, Matrix[Double] Xtest, Matrix[Double] Ytest, Matrix[Double] Xorig=as.matrix(0),
-  Matrix[Double] evalFunHp, Boolean trainML = FALSE)
+  Matrix[Double] evalFunHp)
 
 return(Matrix[Double] accuracy)
 {
 
   beta = multiLogReg(X=X, Y=Y, icpt=as.scalar(evalFunHp[1,1]), reg=as.scalar(evalFunHp[1,2]), tol=as.scalar(evalFunHp[1,3]), 
-    maxi=as.scalar(evalFunHp[1,4]), maxii=50, verbose=FALSE);
+    maxi=1000, maxii=100, verbose=FALSE);
   [prob, yhat, accuracy] = multiLogRegPredict(Xtest, beta, Ytest, FALSE)
   a = getAccuracy(Ytest, yhat, TRUE)
   print("accuracy: "+ accuracy+", accuracy weighted: "+a)

diff --git a/src/test/scripts/functions/pipelines/intermediates/classification/bestAcc.csv b/src/test/scripts/functions/pipelines/intermediates/classification/bestAcc.csv
@@ -1,3 +1,3 @@
-77.42222222222223
-77.15555555555555
-76.97777777777777
+93.69369369369369
+93.69369369369369
+93.69369369369369
diff --git a/src/test/scripts/functions/pipelines/intermediates/classification/bestAcc.csv.mtd b/src/test/scripts/functions/pipelines/intermediates/classification/bestAcc.csv.mtd
diff --git a/src/test/scripts/functions/pipelines/intermediates/classification/dirtyScore.csv b/src/test/scripts/functions/pipelines/intermediates/classification/dirtyScore.csv
@@ -1 +1 @@
-74.13333333333333
+90.990990990991