Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1201,8 +1201,9 @@ public void examSparsity(boolean allowCSR, int k) {
}

@Override
public void sparseToDense(int k) {
// do nothing
public MatrixBlock sparseToDense(int k) {
// a compressed block has no sparse representation to convert; return unchanged
return this;
}

@Override
Expand Down Expand Up @@ -1235,16 +1236,6 @@ public double interQuartileMean() {
return getUncompressed("interQuartileMean").interQuartileMean();
}

@Override
public MatrixBlock pickValues(MatrixValue quantiles, MatrixValue ret) {
return getUncompressed("pickValues").pickValues(quantiles, ret);
}

@Override
public double pickValue(double quantile, boolean average) {
return getUncompressed("pickValue").pickValue(quantile, average);
}

@Override
public double sumWeightForQuantile() {
return getUncompressed("sumWeightForQuantile").sumWeightForQuantile();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1387,12 +1387,13 @@ public void denseToSparse(boolean allowCSR, int k){
LibMatrixDenseToSparse.denseToSparse(this, allowCSR, k);
}

public final void sparseToDense() {
sparseToDense(1);
public final MatrixBlock sparseToDense() {
return sparseToDense(1);
}

public void sparseToDense(int k) {
public MatrixBlock sparseToDense(int k) {
LibMatrixSparseToDense.sparseToDense(this, k);
return this;
}

/**
Expand Down Expand Up @@ -4650,7 +4651,7 @@ public final MatrixBlock sortOperations(MatrixValue weights){
return sortOperations(weights, null);
}

public MatrixBlock sortOperations(MatrixValue weights, MatrixBlock result) {
public final MatrixBlock sortOperations(MatrixValue weights, MatrixBlock result) {
return sortOperations(weights, result, 1);
}

Expand Down Expand Up @@ -4754,7 +4755,17 @@ public static double computeIQMCorrection(double sum, double sum_wt,
return (sum + q25Part*q25Val - q75Part*q75Val) / (sum_wt*0.5);
}

public MatrixBlock pickValues(MatrixValue quantiles, MatrixValue ret) {
/**
* Pick the quantiles out of this matrix. If this matrix contains two columns it is weighted quantile picking.
* If a single column it is unweighted.
*
* Note the values are assumed to be sorted.
*
* @param quantiles The quantiles to pick
* @param ret The result matrix
* @return The result matrix
*/
public final MatrixBlock pickValues(MatrixValue quantiles, MatrixValue ret) {
return pickValues(quantiles, ret, false);
}

Expand All @@ -4779,16 +4790,62 @@ public MatrixBlock pickValues(MatrixValue quantiles, MatrixValue ret, boolean av
return output;
}

/**
* Pick the median value from this matrix. If this matrix has two columns it is weighted picking using the
* weight column, otherwise it is unweighted over the single column.
*
* Note the values are assumed to be sorted.
*
* @return The median value
*/
public double median() {
if(getNumColumns() == 1)
return pickValue(0.5, getNumRows() % 2 == 0);
double sum_wt = sumWeightForQuantile();
return pickValue(0.5, sum_wt%2==0);
}


/**
* Pick a specific quantile from this matrix. If this matrix has two columns it is weighted picking, otherwise it is unweighted.
*
* Note the values are assumed to be sorted.
*
* @param quantile The quantile to pick
* @return The quantile
*/
public final double pickValue(double quantile){
return pickValue(quantile, false);
}

public double pickValue(double quantile, boolean average) {
/**
* Pick a specific quantile from this matrix. If this matrix has two columns it is weighted picking, otherwise it is unweighted.
*
* Note the values are assumed to be sorted.
*
* @param quantile The quantile to pick
* @param average If the quantile is averaged.
* @return The quantile
*/
public final double pickValue(double quantile, boolean average) {
if(this.getNumColumns() == 1)
return pickUnweightedValue(quantile, average);
return pickWeightedValue(quantile, average);
}

private double pickUnweightedValue(double quantile, boolean average) {
// Mirror the weighted convention (pickWeightedValue) with an implicit weight of 1 per value, so a single
// column yields the same quantile as the equivalent two-column (value, weight) representation: take the
// ceil-based rank and only average adjacent order statistics when an even number of values straddles it.
final int rows = getNumRows();
average = average && (rows % 2 == 0);
final int pos = (int) Math.ceil(quantile * rows); // 1-based rank
final int i = Math.min(Math.max(pos - 1, 0), rows - 1);
if(average && pos > 0 && pos < rows)
return (get(i, 0) + get(i + 1, 0)) / 2;
return get(i, 0);
}

private double pickWeightedValue(double quantile, boolean average) {
double sum_wt = sumWeightForQuantile();

// do averaging only if it is asked for; and sum_wt is even
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,40 @@ public void quantileWeightedFallback() {
TestUtils.compareMatrices(expected, actual, 0.0, "weighted sortOperations fallback");
}

@Test
public void pickDirectlyOnCompressedColumnDDC() {
runDirectPick(generate(ROWS, 1, 8, 1.0, 1, 50, 7), CompressionType.DDC);
}

@Test
public void pickDirectlyOnCompressedColumnSDCZeros() {
runDirectPick(generate(ROWS, 1, 6, 0.2, 1, 40, 23), CompressionType.SDC);
}

@Test
public void pickDirectlyOnCompressedColumnWithNegatives() {
runDirectPick(generate(ROWS, 1, 8, 0.3, -20, 20, 41), CompressionType.SDC);
}

/**
* Quantile picking normally runs on the uncompressed value/weight table produced by sortOperations, so the
* inherited (no longer overridden) pickValue path is never reached on a compressed block through that flow. This
* exercises it directly: the single column is sorted while staying compressed, then pickValue is invoked on the
* CompressedMatrixBlock itself and must match the uncompressed sorted column element for element. median() is not
* used here because it requires the two-column weighted representation.
*/
private void runDirectPick(MatrixBlock mb, CompressionType ct) {
CompressedMatrixBlock cmb = compress(mb, ct);
MatrixBlock sortedC = cmb.reorgOperations(ASC, new MatrixBlock(), 0, 0, 0);
assertTrue("Expected the sorted result to stay compressed for " + ct, sortedC instanceof CompressedMatrixBlock);
MatrixBlock sortedU = mb.reorgOperations(ASC, new MatrixBlock(), 0, 0, 0);

for(double q : new double[] {0.0, 0.25, 0.5, 0.75, 0.9, 1.0}) {
assertEquals("pick q=" + q + " " + ct, sortedU.pickValue(q, false), sortedC.pickValue(q, false), 0.0);
assertEquals("pick avg q=" + q + " " + ct, sortedU.pickValue(q, true), sortedC.pickValue(q, true), 0.0);
}
}

private void runQuantile(MatrixBlock mb, CompressionType ct) {
// reference is computed on a copy because compression may consume the input.
MatrixBlock expected = new MatrixBlock(mb).sortOperations(null, new MatrixBlock(), 1);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.sysds.test.component.matrix;

import static org.junit.Assert.assertEquals;

import org.apache.sysds.runtime.matrix.data.MatrixBlock;
import org.junit.Test;

/**
* Tests the single-column (unweighted) branch of {@link MatrixBlock#pickValue(double, boolean)} and
* {@link MatrixBlock#median()}. The values are assumed to be sorted in ascending order, mirroring the contract used
* by the quantile pick instructions. The unweighted branch uses the same ceil-based rank as the two-column weighted
* branch (with an implicit weight of 1 per value), so a single column yields the same quantile as the equivalent
* (value, weight) representation. The two-column (weighted) branch is exercised separately through the compressed
* sort tests.
*/
public class QuantilePickTest {

private static MatrixBlock singleColumn(double[] values, boolean sparse) {
MatrixBlock mb = new MatrixBlock(values.length, 1, sparse);
for(int i = 0; i < values.length; i++)
mb.set(i, 0, values[i]);
mb.recomputeNonZeros();
return mb;
}

@Test
public void pickOddLengthNoAverage() {
// rank = ceil(quantile * 5), value at (rank-1).
MatrixBlock mb = singleColumn(new double[] {10, 20, 30, 40, 50}, false);
assertEquals("q=0.0", 10, mb.pickValue(0.0, false), 0); // rank 0 -> idx 0
assertEquals("q=0.2", 10, mb.pickValue(0.2, false), 0); // rank ceil(1.0)=1 -> idx 0
assertEquals("q=0.5", 30, mb.pickValue(0.5, false), 0); // rank ceil(2.5)=3 -> idx 2
assertEquals("q=0.75", 40, mb.pickValue(0.75, false), 0); // rank ceil(3.75)=4 -> idx 3
assertEquals("q=1.0", 50, mb.pickValue(1.0, false), 0); // rank ceil(5.0)=5 -> idx 4
}

@Test
public void pickOddLengthAverageSuppressed() {
// Odd number of values -> averaging is suppressed, so average matches no-average.
MatrixBlock mb = singleColumn(new double[] {10, 20, 30, 40, 50}, false);
assertEquals("q=0.5 avg", 30, mb.pickValue(0.5, true), 0);
assertEquals("q=0.75 avg", 40, mb.pickValue(0.75, true), 0);
}

@Test
public void pickEvenLengthAverage() {
// Even number of values -> averaging of adjacent order statistics applies.
MatrixBlock mb = singleColumn(new double[] {10, 20, 30, 40}, false);
assertEquals("q=0.25 avg", 15, mb.pickValue(0.25, true), 0); // rank 1 -> (idx0+idx1)/2
assertEquals("q=0.375 avg", 25, mb.pickValue(0.375, true), 0); // rank ceil(1.5)=2 -> (idx1+idx2)/2
assertEquals("q=0.5 avg", 25, mb.pickValue(0.5, true), 0); // rank 2 -> (idx1+idx2)/2
assertEquals("q=0.75 avg", 35, mb.pickValue(0.75, true), 0); // rank 3 -> (idx2+idx3)/2
}

@Test
public void pickEvenLengthNoAverage() {
MatrixBlock mb = singleColumn(new double[] {10, 20, 30, 40}, false);
assertEquals("q=0.25", 10, mb.pickValue(0.25, false), 0); // rank 1 -> idx 0
assertEquals("q=0.5", 20, mb.pickValue(0.5, false), 0); // rank 2 -> idx 1
assertEquals("q=0.75", 30, mb.pickValue(0.75, false), 0); // rank 3 -> idx 2
}

@Test
public void pickAverageClampedAtTop() {
// Top quantile: rank reaches the last element so there is no successor to average with.
MatrixBlock even = singleColumn(new double[] {10, 20, 30, 40}, false);
assertEquals("even q=0.95 avg", 40, even.pickValue(0.95, true), 0); // rank ceil(3.8)=4 -> idx 3, no avg
assertEquals("even q=1.0 avg", 40, even.pickValue(1.0, true), 0);
MatrixBlock odd = singleColumn(new double[] {10, 20, 30, 40, 50}, false);
assertEquals("odd q=0.95 avg", 50, odd.pickValue(0.95, true), 0); // odd -> avg suppressed
}

@Test
public void pickSingleElement() {
MatrixBlock mb = singleColumn(new double[] {42}, false);
assertEquals("q=0.0", 42, mb.pickValue(0.0, false), 0);
assertEquals("q=0.5", 42, mb.pickValue(0.5, false), 0);
assertEquals("q=1.0", 42, mb.pickValue(1.0, false), 0);
assertEquals("q=0.5 avg", 42, mb.pickValue(0.5, true), 0);
assertEquals("median", 42, mb.median(), 0);
}

@Test
public void pickSparseSingleColumnWithZeros() {
// Sorted ascending including leading zeros, stored sparse.
MatrixBlock mb = singleColumn(new double[] {0, 0, 10, 20, 30}, true);
assertEquals("q=0.0", 0, mb.pickValue(0.0, false), 0); // rank 0 -> idx 0 (zero)
assertEquals("q=0.5", 10, mb.pickValue(0.5, false), 0); // rank ceil(2.5)=3 -> idx 2
assertEquals("q=0.75", 20, mb.pickValue(0.75, false), 0); // rank ceil(3.75)=4 -> idx 3
assertEquals("q=1.0", 30, mb.pickValue(1.0, false), 0); // rank 5 -> idx 4
}

@Test
public void medianSingleColumn() {
// Odd length -> middle element; even length -> average of the two middle elements.
assertEquals("odd median", 30, singleColumn(new double[] {10, 20, 30, 40, 50}, false).median(), 0);
assertEquals("even median", 25, singleColumn(new double[] {10, 20, 30, 40}, false).median(), 0);
assertEquals("sparse median", 10, singleColumn(new double[] {0, 0, 10, 20, 30}, true).median(), 0);
}

@Test
public void pickSingleColumnMatchesDenseAndSparse() {
double[] v = {-5, -1, 0, 2, 7, 9};
MatrixBlock dense = singleColumn(v, false);
MatrixBlock sparse = singleColumn(v, true);
for(double q : new double[] {0.0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0})
for(boolean avg : new boolean[] {false, true})
assertEquals("q=" + q + " avg=" + avg, dense.pickValue(q, avg), sparse.pickValue(q, avg), 0);
}
}
Loading