Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
import org.apache.sysds.runtime.compress.lib.CLALibMMChain;
import org.apache.sysds.runtime.compress.lib.CLALibMatrixMult;
import org.apache.sysds.runtime.compress.lib.CLALibMerge;
import org.apache.sysds.runtime.compress.lib.CLALibRemoveEmpty;
import org.apache.sysds.runtime.compress.lib.CLALibReplace;
import org.apache.sysds.runtime.compress.lib.CLALibReorg;
import org.apache.sysds.runtime.compress.lib.CLALibReshape;
Expand Down Expand Up @@ -871,9 +872,7 @@ public MatrixBlock groupedAggOperations(MatrixValue tgt, MatrixValue wghts, Matr

@Override
public MatrixBlock removeEmptyOperations(MatrixBlock ret, boolean rows, boolean emptyReturn, MatrixBlock select) {
printDecompressWarning("removeEmptyOperations");
MatrixBlock tmp = getUncompressed();
return tmp.removeEmptyOperations(ret, rows, emptyReturn, select);
return CLALibRemoveEmpty.rmempty(this, ret, rows, emptyReturn, select);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@
import org.apache.commons.lang3.NotImplementedException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.sysds.runtime.compress.colgroup.ColGroupUtils.P;
import org.apache.sysds.runtime.compress.CompressionSettings;
import org.apache.sysds.runtime.compress.CompressionSettingsBuilder;
import org.apache.sysds.runtime.compress.colgroup.ColGroupUtils.P;
import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory;
import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex;
import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex.SliceResult;
Expand All @@ -41,6 +41,7 @@
import org.apache.sysds.runtime.compress.estim.CompressedSizeInfoColGroup;
import org.apache.sysds.runtime.compress.estim.encoding.IEncode;
import org.apache.sysds.runtime.compress.lib.CLALibCombineGroups;
import org.apache.sysds.runtime.compress.utils.IntArrayList;
import org.apache.sysds.runtime.data.DenseBlock;
import org.apache.sysds.runtime.data.SparseBlock;
import org.apache.sysds.runtime.data.SparseBlockMCSR;
Expand Down Expand Up @@ -401,8 +402,9 @@ public final AColGroup rightMultByMatrix(MatrixBlock right) {
* @param cru The right hand side column upper
* @param nRows The number of rows in this column group
*/
public void rightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int nRows, int crl, int cru){
throw new NotImplementedException("not supporting right Decompressing Multiply on class: " + this.getClass().getSimpleName());
public void rightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int nRows, int crl, int cru) {
throw new NotImplementedException(
"not supporting right Decompressing Multiply on class: " + this.getClass().getSimpleName());
}

/**
Expand Down Expand Up @@ -806,7 +808,7 @@ public final void selectionMultiply(MatrixBlock selection, P[] points, MatrixBlo
else
denseSelection(selection, points, ret, rl, ru);
}

/**
* Get an approximate sparsity of this column group
*
Expand Down Expand Up @@ -981,4 +983,70 @@ public String toString() {
sb.append(_colIndexes);
return sb.toString();
}

/**
* Return a new column group containing only the selected rows in the given boolean vector.
*
* Whenever possible only modify the index structure, not the dictionary of the column groups.
*
* @param selectV The selection vector
* @param rOut The number of rows in the output
* @return The new column group
*/
public abstract AColGroup removeEmptyRows(boolean[] selectV, int rOut);

/**
* Return a new column group containing only the selected columns in the given boolean vector.
*
* Whenever possible only modify the column index, and reduce the dictionaries of the column groups.
*
* @param selectV The selection vector
* @return The new column group, or {@code null} if no column of this group is selected
*/
public AColGroup removeEmptyCols(boolean[] selectV) {
if(!inSelection(selectV))
return null;

final IntArrayList selectedColumns = new IntArrayList();
final IntArrayList newIDs = new IntArrayList();
int idx = 0;
int idxOwn = 0;
final int end = Math.min(selectV.length, _colIndexes.get(_colIndexes.size() - 1) + 1);
for(int i = 0; i < end; i++) {

if(i == _colIndexes.get(idxOwn)) {
if(selectV[i]) {
selectedColumns.appendValue(idxOwn);
newIDs.appendValue(idx);
}
idxOwn++;
}
if(selectV[i])
idx++;
}

final IColIndex newColumnIDs = ColIndexFactory.create(newIDs);
if(newColumnIDs.size() == _colIndexes.size())
return copyAndSet(newColumnIDs);
else
return removeEmptyColsSubset(newColumnIDs, selectedColumns);
}

/**
* Using the selection of columns, slice out those and return in a new column group with the given column indexes.
* Ideally this method should only modify the dictionaries.
*
* @param newColumnIDs the new column indexes
* @param selectedColumns The selected columns of this column group (guaranteed < current number of columns)
* @return A new Column group
*/
protected abstract AColGroup removeEmptyColsSubset(IColIndex newColumnIDs, IntArrayList selectedColumns);

private boolean inSelection(boolean[] selection) {
for(int i = 0; i < _colIndexes.size(); i++) {
if(selection[_colIndexes.get(i)])
return true;
}
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,6 @@ public int getNumValues() {
* produce an overhead in cases where the count is calculated, but the overhead will be limited to number of distinct
* tuples in the dictionary.
*
* The returned counts always contains the number of zero tuples as well if there are some contained, even if they
* are not materialized.
*
* @return The count of each value in the MatrixBlock.
*/
Expand Down Expand Up @@ -212,6 +210,7 @@ public void clear() {
counts = null;
}


@Override
public String toString() {
StringBuilder sb = new StringBuilder();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -402,4 +402,5 @@ protected IDictionary combineDictionaries(int nCol, List<AColGroup> right) {
public double getSparsity() {
return _dict.getSparsity();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,22 @@ private final void leftMultByMatrixNoPreAggRowsDense(MatrixBlock mb, double[] re
*/
protected abstract void multiplyScalar(double v, double[] resV, int offRet, AIterator it);

public void decompressToSparseBlock(SparseBlock sb, int rl, int ru, int offR, int offC, AIterator it) {
if(_dict instanceof MatrixBlockDictionary) {
final MatrixBlockDictionary md = (MatrixBlockDictionary) _dict;
final MatrixBlock mb = md.getMatrixBlock();
// The dictionary is never empty.
if(mb.isInSparseFormat())
// TODO make sparse decompression where the iterator is known in argument
decompressToSparseBlockSparseDictionary(sb, rl, ru, offR, offC, mb.getSparseBlock());
else
decompressToSparseBlockDenseDictionaryWithProvidedIterator(sb, rl, ru, offR, offC, mb.getDenseBlockValues(),
it);
}
else
decompressToSparseBlockDenseDictionaryWithProvidedIterator(sb, rl, ru, offR, offC, _dict.getValues(), it);
}

public void decompressToDenseBlock(DenseBlock db, int rl, int ru, int offR, int offC, AIterator it) {
if(_dict instanceof MatrixBlockDictionary) {
final MatrixBlockDictionary md = (MatrixBlockDictionary) _dict;
Expand All @@ -223,6 +239,9 @@ public void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru,
decompressToDenseBlockDenseDictionaryWithProvidedIterator(db, rl, ru, offR, offC, _dict.getValues(), it);
}

public abstract void decompressToSparseBlockDenseDictionaryWithProvidedIterator(SparseBlock db, int rl, int ru,
int offR, int offC, double[] values, AIterator it);

public abstract void decompressToDenseBlockDenseDictionaryWithProvidedIterator(DenseBlock db, int rl, int ru,
int offR, int offC, double[] values, AIterator it);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
import org.apache.sysds.runtime.compress.estim.encoding.EncodingFactory;
import org.apache.sysds.runtime.compress.estim.encoding.IEncode;
import org.apache.sysds.runtime.compress.lib.CLALibLeftMultBy;
import org.apache.sysds.runtime.compress.utils.IntArrayList;
import org.apache.sysds.runtime.data.DenseBlock;
import org.apache.sysds.runtime.data.SparseBlock;
import org.apache.sysds.runtime.data.SparseBlockMCSR;
Expand Down Expand Up @@ -527,7 +528,7 @@ public CmCovObject centralMoment(CMOperator op, int nRows) {
@Override
public AColGroup rexpandCols(int max, boolean ignore, boolean cast, int nRows) {
IDictionary d = _dict.rexpandCols(max, ignore, cast, _colIndexes.size());
if(d == null){
if(d == null) {
if(max <= 0)
return null;
return ColGroupEmpty.create(max);
Expand Down Expand Up @@ -758,4 +759,14 @@ public AColGroup combineWithSameIndex(int nRow, int nCol, List<AColGroup> right)
protected boolean allowShallowIdentityRightMult() {
return true;
}

@Override
public AColGroup removeEmptyRows(boolean[] selectV, int rOut) {
return this;
}

@Override
protected AColGroup removeEmptyColsSubset(IColIndex newColumnIDs, IntArrayList selectedColumns) {
return ColGroupConst.create(newColumnIDs, _dict.sliceColumns(selectedColumns, getNumCols()));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@
import java.util.List;
import java.util.concurrent.ExecutorService;

import jdk.incubator.vector.DoubleVector;
import jdk.incubator.vector.VectorSpecies;
import org.apache.commons.lang3.NotImplementedException;
import org.apache.sysds.runtime.DMLRuntimeException;
import org.apache.sysds.runtime.compress.CompressedMatrixBlock;
Expand Down Expand Up @@ -56,6 +54,7 @@
import org.apache.sysds.runtime.compress.estim.EstimationFactors;
import org.apache.sysds.runtime.compress.estim.encoding.EncodingFactory;
import org.apache.sysds.runtime.compress.estim.encoding.IEncode;
import org.apache.sysds.runtime.compress.utils.IntArrayList;
import org.apache.sysds.runtime.data.DenseBlock;
import org.apache.sysds.runtime.data.SparseBlock;
import org.apache.sysds.runtime.data.SparseBlockMCSR;
Expand All @@ -71,6 +70,9 @@
import org.apache.sysds.runtime.matrix.operators.UnaryOperator;
import org.jboss.netty.handler.codec.compression.CompressionException;

import jdk.incubator.vector.DoubleVector;
import jdk.incubator.vector.VectorSpecies;

/**
* Class to encapsulate information about a column group that is encoded with dense dictionary encoding (DDC).
*/
Expand Down Expand Up @@ -672,7 +674,8 @@ private void defaultRightDecompressingMult(MatrixBlock right, MatrixBlock ret, i
}
}

final void vectMM(double aa, double[] b, double[] c, int endT, int jd, int crl, int cru, int offOut, int k, int vLen, DoubleVector vVec) {
final void vectMM(double aa, double[] b, double[] c, int endT, int jd, int crl, int cru, int offOut, int k, int vLen,
DoubleVector vVec) {
vVec = vVec.broadcast(aa);
final int offj = k * jd;
final int end = endT + offj;
Expand Down Expand Up @@ -1095,6 +1098,21 @@ public AColGroup[] splitReshapePushDown(int multiplier, int nRow, int nColOrg, E
return res;
}

@Override
public AColGroup removeEmptyRows(boolean[] selectV, int rOut) {
return ColGroupDDC.create(_colIndexes, _dict, _data.removeEmpty(selectV, rOut), null);
}

@Override
protected boolean allowShallowIdentityRightMult() {
return true;
}

@Override
protected AColGroup removeEmptyColsSubset(IColIndex newColumnIDs, IntArrayList selectedColumns) {
return ColGroupDDC.create(newColumnIDs, _dict.sliceColumns(selectedColumns, getNumCols()), _data, null);
}

@Override
public String toString() {
StringBuilder sb = new StringBuilder();
Expand All @@ -1104,11 +1122,6 @@ public String toString() {
return sb.toString();
}

@Override
protected boolean allowShallowIdentityRightMult() {
return true;
}

public AColGroup convertToDeltaDDC() {
int numCols = _colIndexes.size();
int numRows = _data.size();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import org.apache.sysds.runtime.compress.estim.EstimationFactors;
import org.apache.sysds.runtime.compress.estim.encoding.EncodingFactory;
import org.apache.sysds.runtime.compress.estim.encoding.IEncode;
import org.apache.sysds.runtime.compress.utils.IntArrayList;
import org.apache.sysds.runtime.compress.utils.Util;
import org.apache.sysds.runtime.data.DenseBlock;
import org.apache.sysds.runtime.data.SparseBlock;
Expand Down Expand Up @@ -546,6 +547,20 @@ protected boolean allowShallowIdentityRightMult() {
return false;
}

@Override
public AColGroup removeEmptyRows(boolean[] selectV, int rOut) {
return ColGroupDDCFOR.create(_colIndexes, _dict, _data.removeEmpty(selectV, rOut), null, _reference);
}

@Override
protected AColGroup removeEmptyColsSubset(IColIndex newColumnIDs, IntArrayList selectedColumns) {
double[] ref = new double[selectedColumns.size()];
for(int i = 0; i < selectedColumns.size(); i++) {
ref[i] = _reference[selectedColumns.get(i)];
}
return ColGroupDDCFOR.create(newColumnIDs, _dict.sliceColumns(selectedColumns, getNumCols()), _data, null, ref);
}

@Override
public String toString() {
StringBuilder sb = new StringBuilder();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1009,4 +1009,17 @@ protected void computeRowProduct(double[] c, int rl, int ru, double[] preAgg) {
for(int rix = rl; rix < ru; rix++)
c[rix] *= preAgg[it.next()];
}

@Override
public AColGroup removeEmptyRows(boolean[] selectV, int rOut) {
ColGroupDDC g = (ColGroupDDC) convertToDDC();
return g.removeEmptyRows(selectV, rOut);
}

@Override
protected AColGroup removeEmptyColsSubset(IColIndex newColumnIDs,
org.apache.sysds.runtime.compress.utils.IntArrayList selectedColumns) {
ColGroupDDC g = (ColGroupDDC) convertToDDC();
return g.removeEmptyColsSubset(newColumnIDs, selectedColumns);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
import org.apache.sysds.runtime.compress.estim.EstimationFactors;
import org.apache.sysds.runtime.compress.estim.encoding.EncodingFactory;
import org.apache.sysds.runtime.compress.estim.encoding.IEncode;
import org.apache.sysds.runtime.compress.utils.IntArrayList;
import org.apache.sysds.runtime.data.DenseBlock;
import org.apache.sysds.runtime.data.SparseBlock;
import org.apache.sysds.runtime.data.SparseBlockMCSR;
Expand Down Expand Up @@ -476,4 +477,15 @@ public AColGroup combineWithSameIndex(int nRow, int nCol, List<AColGroup> right)

return new ColGroupEmpty(combinedIndex);
}

@Override
public AColGroup removeEmptyRows(boolean[] selectV, int rOut){
return this;
}


@Override
protected AColGroup removeEmptyColsSubset(IColIndex newColumnIDs, IntArrayList selectedColumns){
return new ColGroupEmpty(newColumnIDs);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,7 @@ public static long getExactSizeOnDisk(List<AColGroup> colGroups) {
}
ret += grp.getExactSizeOnDisk();
}
if(LOG.isWarnEnabled())
LOG.warn(" duplicate dicts on exact Size on Disk : " + (colGroups.size() - dicts.size()) );


return ret;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.apache.sysds.runtime.compress.colgroup.scheme.ICLAScheme;
import org.apache.sysds.runtime.compress.cost.ComputationCostEstimator;
import org.apache.sysds.runtime.compress.estim.CompressedSizeInfoColGroup;
import org.apache.sysds.runtime.compress.utils.IntArrayList;
import org.apache.sysds.runtime.data.DenseBlock;
import org.apache.sysds.runtime.data.SparseBlock;
import org.apache.sysds.runtime.data.SparseBlockMCSR;
Expand Down Expand Up @@ -740,4 +741,13 @@ public AColGroup[] splitReshape(int multiplier, int nRow, int nColOrg) {
throw new NotImplementedException("Unimplemented method 'splitReshape'");
}

@Override
public AColGroup removeEmptyRows(boolean[] selectV, int rOut) {
throw new NotImplementedException("Unimplemented method 'removeEmptyRows'");
}

@Override
protected AColGroup removeEmptyColsSubset(IColIndex newColumnIDs, IntArrayList selectedColumns){
throw new NotImplementedException("Unimplemented method 'removeEmptyColumns'");
}
}
Loading
Loading