Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
import org.apache.sysds.runtime.compress.lib.CLALibRexpand;
import org.apache.sysds.runtime.compress.lib.CLALibScalar;
import org.apache.sysds.runtime.compress.lib.CLALibSlice;
import org.apache.sysds.runtime.compress.lib.CLALibSort;
import org.apache.sysds.runtime.compress.lib.CLALibSquash;
import org.apache.sysds.runtime.compress.lib.CLALibTSMM;
import org.apache.sysds.runtime.compress.lib.CLALibTernaryOp;
Expand Down Expand Up @@ -847,9 +848,8 @@ public CmCovObject covOperations(COVOperator op, MatrixBlock that, MatrixBlock w
}

@Override
public MatrixBlock sortOperations(MatrixValue weights, MatrixBlock result) {
MatrixBlock right = getUncompressed(weights);
return getUncompressed("sortOperations").sortOperations(right, result);
public MatrixBlock sortOperations(MatrixValue weights, MatrixBlock result, int k) {
return CLALibSort.sort(this, weights, result, k);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -974,6 +974,16 @@ public AColGroup[] splitReshapePushDown(final int multiplier, final int nRow, fi
return splitReshape(multiplier, nRow, nColOrg);
}

/**
* Sort the values of the column group according to double comparison operations and return as another compressed
* group.
*
* This sorting assumes that the column group is sorted independently of everything else.
*
* @return The sorted group
*/
public abstract AColGroup sort();

@Override
public String toString() {
StringBuilder sb = new StringBuilder();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -769,4 +769,9 @@ public AColGroup removeEmptyRows(boolean[] selectV, int rOut) {
protected AColGroup removeEmptyColsSubset(IColIndex newColumnIDs, IntArrayList selectedColumns) {
return ColGroupConst.create(newColumnIDs, _dict.sliceColumns(selectedColumns, getNumCols()));
}

@Override
public AColGroup sort() {
return this;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1178,4 +1178,23 @@ public AColGroup convertToDeltaDDC() {
public AColGroup convertToDDCLZW() {
return ColGroupDDCLZW.create(_colIndexes, _dict, _data, null);
}

@Override
public AColGroup sort() {
// TODO restore support for run length encoding to exploit the runs

int[] counts = getCounts();
// get the sort index
int[] r = _dict.sort();

AMapToData m = MapToFactory.create(_data.size(), counts.length);
int off = 0;
for(int i = 0; i < counts.length; i++) {
for(int j = 0; j < counts[r[i]]; j++) {
m.set(off++, r[i]);
}
}

return ColGroupDDC.create(_colIndexes, _dict, m, counts);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -571,4 +571,23 @@ public String toString() {
sb.append(Arrays.toString(_reference));
return sb.toString();
}

@Override
public AColGroup sort() {
// TODO restore support for run length encoding.

int[] counts = getCounts();
// get the sort index
int[] r = _dict.sort();

AMapToData m = MapToFactory.create(_data.size(), counts.length);
int off = 0;
for(int i = 0; i < counts.length; i++) {
for(int j = 0; j < counts[r[i]]; j++) {
m.set(off++, r[i]);
}
}

return ColGroupDDCFOR.create(_colIndexes, _dict, m, counts, _reference);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1022,4 +1022,10 @@ protected AColGroup removeEmptyColsSubset(IColIndex newColumnIDs,
ColGroupDDC g = (ColGroupDDC) convertToDDC();
return g.removeEmptyColsSubset(newColumnIDs, selectedColumns);
}

@Override
public AColGroup sort() {
ColGroupDDC g = (ColGroupDDC) convertToDDC();
return g.sort();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -488,4 +488,9 @@ public AColGroup removeEmptyRows(boolean[] selectV, int rOut){
protected AColGroup removeEmptyColsSubset(IColIndex newColumnIDs, IntArrayList selectedColumns){
return new ColGroupEmpty(newColumnIDs);
}

@Override
public AColGroup sort() {
return this;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -750,4 +750,9 @@ public AColGroup removeEmptyRows(boolean[] selectV, int rOut) {
protected AColGroup removeEmptyColsSubset(IColIndex newColumnIDs, IntArrayList selectedColumns){
throw new NotImplementedException("Unimplemented method 'removeEmptyColumns'");
}

@Override
public AColGroup sort() {
throw new NotImplementedException("Unimplemented method 'sort'");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -741,4 +741,9 @@ public AColGroup removeEmptyRows(boolean[] selectV, int rOut) {
protected AColGroup removeEmptyColsSubset(IColIndex newColumnIDs, IntArrayList selectedColumns){
throw new NotImplementedException("Unimplemented method 'removeEmptyColumns'");
}

@Override
public AColGroup sort() {
throw new NotImplementedException();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1200,4 +1200,9 @@ public AColGroup removeEmptyRows(boolean[] selectV, int rOut) {
protected AColGroup removeEmptyColsSubset(IColIndex newColumnIDs, IntArrayList selectedColumns){
throw new NotImplementedException("Unimplemented method 'removeEmptyColumns'");
}

@Override
public AColGroup sort() {
throw new NotImplementedException();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -903,4 +903,50 @@ public String toString() {
sb.append(_data.toString());
return sb.toString();
}

@Override
public AColGroup sort() {
if(getNumCols() > 1)
throw new NotImplementedException();
// TODO restore support for run length encoding.

final int[] counts = getCounts();
// get the sort index
final int[] r = _dict.sort();

// find default value position.
// todo use binary search for minor improvements.
final double def = _defaultTuple[0];
int defIdx = counts.length;
for(int i = 0; i < r.length; i++) {
if(_dict.getValue(r[i], 0, 1) >= def) {
defIdx = i;
break;
}
}

int nondefault = _data.size();
int defaultLength = _numRows - nondefault;
AMapToData m = MapToFactory.create(nondefault, counts.length);
int[] offsets = new int[nondefault];

int off = 0;
for(int i = 0; i < counts.length; i++) {
if(i < defIdx) {
for(int j = 0; j < counts[r[i]]; j++) {
offsets[off] = off;
m.set(off++, r[i]);
}
}
else {// if( i >= defIdx){
for(int j = 0; j < counts[r[i]]; j++) {
offsets[off] = off + defaultLength;
m.set(off++, r[i]);
}
}
}

AOffset o = OffsetFactory.createOffset(offsets);
return ColGroupSDC.create(_colIndexes, _numRows, _dict, _defaultTuple, o, m, counts);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -651,4 +651,49 @@ public String toString() {
return sb.toString();
}

@Override
public AColGroup sort() {
if(getNumCols() > 1)
throw new NotImplementedException();
// TODO restore support for run length encoding.

final int[] counts = getCounts();
// get the sort index
final int[] r = _dict.sort();

// find default value position.
// todo use binary search for minor improvements.
int defIdx = counts.length;
for(int i = 0; i < r.length; i++) {
if(_dict.getValue(r[i], 0, 1) >= 0) {
defIdx = i;
break;
}
}

int nondefault = _data.size();
int defaultLength = _numRows - nondefault;
AMapToData m = MapToFactory.create(nondefault, counts.length);
int[] offsets = new int[nondefault];

int off = 0;
for(int i = 0; i < counts.length; i++) {
if(i < defIdx) {
for(int j = 0; j < counts[r[i]]; j++) {
offsets[off] = off;
m.set(off++, r[i]);
}
}
else {// if( i >= defIdx){
for(int j = 0; j < counts[r[i]]; j++) {
offsets[off] = off + defaultLength;
m.set(off++, r[i]);
}
}
}

AOffset o = OffsetFactory.createOffset(offsets);
return ColGroupSDCFOR.create(_colIndexes, _numRows, _dict, o, m, counts, _reference);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -746,4 +746,24 @@ public String toString() {
sb.append(_indexes.toString());
return sb.toString();
}

@Override
public AColGroup sort() {
if(getNumCols() > 1)
throw new NotImplementedException();

// Only a single non-default value exists, so sorting is a contiguous block of that value placed before the
// default values if it is smaller than the default, and after them otherwise.
final int[] counts = getCounts();
final int nondefault = counts[0];
final int defaultLength = _numRows - nondefault;
final int base = _dict.getValue(0, 0, 1) >= _defaultTuple[0] ? defaultLength : 0;

final int[] offsets = new int[nondefault];
for(int j = 0; j < nondefault; j++)
offsets[j] = base + j;

AOffset o = OffsetFactory.createOffset(offsets);
return ColGroupSDCSingle.create(_colIndexes, _numRows, _dict, _defaultTuple, o, counts);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1071,4 +1071,24 @@ public String toString() {
sb.append(_indexes.toString());
return sb.toString();
}

@Override
public AColGroup sort() {
if(getNumCols() > 1)
throw new NotImplementedException();

// Only a single non-default value exists, so sorting is a contiguous block of that value placed before the
// zeros (default) if it is negative, and after the zeros otherwise.
final int[] counts = getCounts();
final int nondefault = counts[0];
final int defaultLength = _numRows - nondefault;
final int base = _dict.getValue(0, 0, 1) >= 0 ? defaultLength : 0;

final int[] offsets = new int[nondefault];
for(int j = 0; j < nondefault; j++)
offsets[j] = base + j;

AOffset o = OffsetFactory.createOffset(offsets);
return ColGroupSDCSingleZeros.create(_colIndexes, _numRows, _dict, o, counts);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1113,4 +1113,49 @@ public String toString() {
sb.append(_data);
return sb.toString();
}

@Override
public AColGroup sort() {
if(getNumCols() > 1)
throw new NotImplementedException();
// TODO restore support for run length encoding.

final int[] counts = getCounts();
// get the sort index
final int[] r = _dict.sort();

// find default value position.
// todo use binary search for minor improvements.
int defIdx = counts.length;
for(int i = 0; i < r.length; i++) {
if(_dict.getValue(r[i], 0, 1) >= 0) {
defIdx = i;
break;
}
}

int nondefault = _data.size();
int defaultLength = _numRows - nondefault;
AMapToData m = MapToFactory.create(nondefault, counts.length);
int[] offsets = new int[nondefault];

int off = 0;
for(int i = 0; i < counts.length; i++) {
if(i < defIdx) {
for(int j = 0; j < counts[r[i]]; j++) {
offsets[off] = off;
m.set(off++, r[i]);
}
}
else {// if( i >= defIdx){
for(int j = 0; j < counts[r[i]]; j++) {
offsets[off] = off + defaultLength;
m.set(off++, r[i]);
}
}
}

AOffset o = OffsetFactory.createOffset(offsets);
return ColGroupSDCZeros.create(_colIndexes, _numRows, _dict, o, m, counts);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
import org.apache.sysds.runtime.functionobjects.Multiply;
import org.apache.sysds.runtime.functionobjects.ReduceAll;
import org.apache.sysds.runtime.functionobjects.ReduceRow;
import org.apache.sysds.runtime.functionobjects.SortIndex;
import org.apache.sysds.runtime.functionobjects.ValueFunction;
import org.apache.sysds.runtime.instructions.cp.CmCovObject;
import org.apache.sysds.runtime.matrix.data.LibMatrixMult;
Expand All @@ -65,6 +66,7 @@
import org.apache.sysds.runtime.matrix.operators.AggregateUnaryOperator;
import org.apache.sysds.runtime.matrix.operators.BinaryOperator;
import org.apache.sysds.runtime.matrix.operators.CMOperator;
import org.apache.sysds.runtime.matrix.operators.ReorgOperator;
import org.apache.sysds.runtime.matrix.operators.ScalarOperator;
import org.apache.sysds.runtime.matrix.operators.UnaryOperator;
import org.apache.sysds.utils.stats.InfrastructureAnalyzer;
Expand Down Expand Up @@ -1331,4 +1333,14 @@ public String toString() {

return sb.toString();
}

@Override
public AColGroup sort() {
if(getNumCols() > 1)
throw new NotImplementedException();
// sortOperations builds a value/weight table for quantiles; for an ascending column sort we reorder the rows.
MatrixBlock sorted = _data.reorgOperations(new ReorgOperator(new SortIndex(1, false, false), 1),
new MatrixBlock(), 0, 0, 0);
return create(sorted, _colIndexes);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -293,4 +293,9 @@ public AColGroup removeEmptyRows(boolean[] selectV, int rOut) {
protected AColGroup removeEmptyColsSubset(IColIndex newColumnIDs, IntArrayList selectedColumns){
throw new NotImplementedException("Unimplemented method 'removeEmptyColumns'");
}

@Override
public AColGroup sort() {
throw new NotImplementedException("Unimplemented method 'sort'");
}
}
Loading
Loading