Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -150,3 +150,4 @@ venv/*
# resource optimization
scripts/resource/output
*.pem
ADDED_Testing/hello.dml
181 changes: 181 additions & 0 deletions scripts/builtin/raGroupby_exp1.dml
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------

# This raGroupby-function takes a matrix dataset as input from where it performs
# relational operations : groupby
#
# INPUT:
# ------------------------------------------------------------------------------
# X Matrix of input data [shape: N x M]
# col Integer indicating the column index to execute grupby command
# method Groupby implemention method (nested-loop, permutation-matrix)
# ------------------------------------------------------------------------------
#
# OUTPUT:
# ------------------------------------------------------------------------------
# Y Matrix of selected data [shape N' x M] with N' <= N
# ------------------------------------------------------------------------------

m_raGroupby_new = function (Matrix[Double] X, Integer col, String method)
return (Matrix[Double] Y)
{
if (method == "nested-loop") {
# Extract and sort unique values from the specified column (1-based index)
uniqueValues = unique(X[, col])
order_uniqueValues = order(target = uniqueValues, by = 1);

# Calcute the number of groups
numGroups = nrow(uniqueValues)

# Determine the maximum number of rows in any group
maxRowsInGroup = max(table(X[,col],1));

# Define a zero matrix to put the group data into
Y = matrix(0,numGroups,maxRowsInGroup*(ncol(X)-1)+1)

# Put the ordered uniqueValues into first column of Y as group_id
Y[,1] = uniqueValues

# create matrix to store the amount of rows for each group
rows_per_group = matrix(0, numGroups, 1)

# order the initial matrix
Xordered = order(target = X, by = col, decreasing = FALSE, index.return = FALSE)

# create a Matrix with the ordered groups
Yordered = order(target = Y, by = 1, decreasing = FALSE, index.return = FALSE)

# the amount of unique groups that are remaining
restingGroups = nrow(uniqueValues)

# intial group
Ypos = 1

# intial positon in the row final matrix
YrowStart = 1
i = 1

#loop over the initial matrix
while(restingGroups > 0){

currentGroup = as.scalar(Yordered[Ypos,1])

#amount of rows that need to be copied
amountRows = 0

GROUP = 1

while(GROUP > 0){
if(i > nrow(Xordered)){
GROUP = 0
}
else if(as.scalar(Xordered[i, col]) == currentGroup){
amountRows = amountRows + 1
i = i + 1
}
else{
GROUP = 0
}
}

# row position in the final matrix
restingGroups = restingGroups - 1

if (amountRows > 0){

# copy the values into the final matrix
YrowEnd = YrowStart + amountRows - 1
newMatrix = cbind(Xordered[YrowStart:YrowEnd, 1:(col-1)], Xordered[YrowStart:YrowEnd, (col+1):ncol(X)])

#flatten the new row
newRow = matrix(newMatrix, rows = 1, cols = nrow(newMatrix) * ncol(newMatrix))
newRowColumns = amountRows * (ncol(X)-1)

# sort the ordered Y matrix back to the initial order
rowFinder = 1
while(currentGroup != as.scalar(Y[rowFinder, 1])){
rowFinder = rowFinder + 1
}
Y[rowFinder, 2: (newRowColumns + 1)] = newRow
}
# continue with the next group
Ypos = Ypos + 1
YrowStart = YrowStart + amountRows

}





}
else if (method == "permutation-matrix") {
# Extract the grouping column and create unique groups
key = X[,col]
key_unique = unique(X[, col])
numGroups = nrow(key_unique)

# Matrix for comparison
key_compare = key_unique %*% matrix(1, rows=1, cols=nrow(X))
key_matrix = matrix(1, rows=nrow(key_unique), cols=1) %*% t(key)

# Find group index
groupIndex = rowIndexMax(t(key_compare == key_matrix))

# Determine the maximum number of rows in any group
maxRowsInGroup = max(table(X[,col],1))
totalCells = (maxRowsInGroup) * (ncol(X)-1) +1

# Create permutation matrix P copy relevant tuples with a single matrix multiplication
P = matrix(0, rows=nrow(X), cols=numGroups * maxRowsInGroup)
# Create offsets to store the first column of each group
offsets = matrix(seq(0, (numGroups-1)*maxRowsInGroup, maxRowsInGroup), rows=numGroups, cols=1)

# Create row and column index for the permutation matrix
rowIndex = seq(1, nrow(X))
indexWithInGroups = cumsum(t(table(groupIndex, seq(1, nrow(X)), numGroups, nrow(X))))
selectedMatrix = table(seq(1, nrow(indexWithInGroups)), groupIndex)
colIndex = groupIndex * maxRowsInGroup - maxRowsInGroup + rowSums(indexWithInGroups * selectedMatrix)

# Set values in P
P = table(seq(1, nrow(X)), colIndex)

# Perform matrix multiplication
Y_temp = t(P) %*% X

# Remove the selected column from Y_temp
if( col == 1 ) {
Y_temp_reduce = Y_temp[, col+1:ncol(Y_temp)]
}
else if( col == ncol(X) ) {
Y_temp_reduce = Y_temp[, 1:col-1]
}
else{
Y_temp_reduce = cbind(Y_temp[, 1:col-1],Y_temp[, col+1:ncol(Y_temp)])
}

# Set value of final output
Y = matrix(0, rows=numGroups, cols=totalCells)
Y[,1] = key_unique
Y[,2:ncol(Y)] = matrix(Y_temp_reduce, rows=numGroups, cols=totalCells-1)
}
}

9 changes: 9 additions & 0 deletions testing/groupby_new_time.dml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# create an input matrix
input_matrix = round(rand(rows = 10000, cols = 5, min = 1, max = 5))

# load functions
source("scripts/builtin/raGroupby_exp1.dml") as ra_new

# use the new ra_groupby function
new_func = ra_new::m_raGroupby_new(input_matrix, 2, "nested-loop")

8 changes: 8 additions & 0 deletions testing/groupby_old_time.dml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# create an input matrix
input_matrix = round(rand(rows = 10000, cols = 5, min = 1, max = 5))

# load functions
source("scripts/builtin/raGroupby.dml") as ra_old

# use the initial ra_groupby function
old_func = ra_old::m_raGroupby(input_matrix, 2, "nested-loop")
51 changes: 51 additions & 0 deletions testing/testing.dml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# create an input matrix
input_matrix = round(rand(rows = 5, cols = 5, min = 1, max = 5))

# load functions
source("scripts/builtin/raGroupby.dml") as ra_old
source("scripts/builtin/raGroupby_exp1.dml") as ra_new

# use the initial ra_groupby function
old_func = ra_old::m_raGroupby(input_matrix, 2, "nested-loop")

# use the new ra_groupby function
new_func = ra_new::m_raGroupby_new(input_matrix, 2, "nested-loop")

# check if the new function still operates correctly
for(i in 1:nrow(old_func)){
for(j in 1:ncol(old_func)){
old_val = as.scalar(old_func[i,j])
new_val = as.scalar(new_func[i,j])
if (old_val != new_val){
print("The values are not identical")
print("The index is i x j")
print(i)
print(j)
}else{
if(j == 1){
print("The first value was correct")
}
if(j == ncol(old_func)){
print("The last value was correct")
}
}
}
}

print("The amount of rows in the old matrix")
print(nrow(old_func))

print("The amount of col in the old matrix")
print(ncol(old_func))

print("The amount of rows * columns in the old matrix")
print(ncol(old_func)*nrow(old_func))

print("The amount of rows in the new matrix")
print(nrow(new_func))

print("The amount of col in the new matrix")
print(ncol(new_func))

print("The amount of rows * columns in the new matrix")
print(ncol(new_func)*nrow(new_func))
Loading