diff --git a/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java b/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java index dae13ed9f94..3a79443157b 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java +++ b/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java @@ -1201,8 +1201,9 @@ public void examSparsity(boolean allowCSR, int k) { } @Override - public void sparseToDense(int k) { - // do nothing + public MatrixBlock sparseToDense(int k) { + // a compressed block has no sparse representation to convert; return unchanged + return this; } @Override @@ -1235,16 +1236,6 @@ public double interQuartileMean() { return getUncompressed("interQuartileMean").interQuartileMean(); } - @Override - public MatrixBlock pickValues(MatrixValue quantiles, MatrixValue ret) { - return getUncompressed("pickValues").pickValues(quantiles, ret); - } - - @Override - public double pickValue(double quantile, boolean average) { - return getUncompressed("pickValue").pickValue(quantile, average); - } - @Override public double sumWeightForQuantile() { return getUncompressed("sumWeightForQuantile").sumWeightForQuantile(); diff --git a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java index b1c06cdd51e..361d190bd02 100644 --- a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java +++ b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java @@ -1387,12 +1387,13 @@ public void denseToSparse(boolean allowCSR, int k){ LibMatrixDenseToSparse.denseToSparse(this, allowCSR, k); } - public final void sparseToDense() { - sparseToDense(1); + public final MatrixBlock sparseToDense() { + return sparseToDense(1); } - public void sparseToDense(int k) { + public MatrixBlock sparseToDense(int k) { LibMatrixSparseToDense.sparseToDense(this, k); + return this; } /** @@ -4650,7 +4651,7 @@ public final MatrixBlock sortOperations(MatrixValue weights){ return sortOperations(weights, null); } - public MatrixBlock sortOperations(MatrixValue weights, MatrixBlock result) { + public final MatrixBlock sortOperations(MatrixValue weights, MatrixBlock result) { return sortOperations(weights, result, 1); } @@ -4754,7 +4755,17 @@ public static double computeIQMCorrection(double sum, double sum_wt, return (sum + q25Part*q25Val - q75Part*q75Val) / (sum_wt*0.5); } - public MatrixBlock pickValues(MatrixValue quantiles, MatrixValue ret) { + /** + * Pick the quantiles out of this matrix. If this matrix contains two columns it is weighted quantile picking. + * If a single column it is unweighted. + * + * Note the values are assumed to be sorted. + * + * @param quantiles The quantiles to pick + * @param ret The result matrix + * @return The result matrix + */ + public final MatrixBlock pickValues(MatrixValue quantiles, MatrixValue ret) { return pickValues(quantiles, ret, false); } @@ -4779,16 +4790,62 @@ public MatrixBlock pickValues(MatrixValue quantiles, MatrixValue ret, boolean av return output; } + /** + * Pick the median value from this matrix. If this matrix has two columns it is weighted picking using the + * weight column, otherwise it is unweighted over the single column. + * + * Note the values are assumed to be sorted. + * + * @return The median value + */ public double median() { + if(getNumColumns() == 1) + return pickValue(0.5, getNumRows() % 2 == 0); double sum_wt = sumWeightForQuantile(); return pickValue(0.5, sum_wt%2==0); } - + + /** + * Pick a specific quantile from this matrix. If this matrix has two columns it is weighted picking, otherwise it is unweighted. + * + * Note the values are assumed to be sorted. + * + * @param quantile The quantile to pick + * @return The quantile + */ public final double pickValue(double quantile){ return pickValue(quantile, false); } - public double pickValue(double quantile, boolean average) { + /** + * Pick a specific quantile from this matrix. If this matrix has two columns it is weighted picking, otherwise it is unweighted. + * + * Note the values are assumed to be sorted. + * + * @param quantile The quantile to pick + * @param average If the quantile is averaged. + * @return The quantile + */ + public final double pickValue(double quantile, boolean average) { + if(this.getNumColumns() == 1) + return pickUnweightedValue(quantile, average); + return pickWeightedValue(quantile, average); + } + + private double pickUnweightedValue(double quantile, boolean average) { + // Mirror the weighted convention (pickWeightedValue) with an implicit weight of 1 per value, so a single + // column yields the same quantile as the equivalent two-column (value, weight) representation: take the + // ceil-based rank and only average adjacent order statistics when an even number of values straddles it. + final int rows = getNumRows(); + average = average && (rows % 2 == 0); + final int pos = (int) Math.ceil(quantile * rows); // 1-based rank + final int i = Math.min(Math.max(pos - 1, 0), rows - 1); + if(average && pos > 0 && pos < rows) + return (get(i, 0) + get(i + 1, 0)) / 2; + return get(i, 0); + } + + private double pickWeightedValue(double quantile, boolean average) { double sum_wt = sumWeightForQuantile(); // do averaging only if it is asked for; and sum_wt is even diff --git a/src/test/java/org/apache/sysds/test/component/compress/CompressedSortTest.java b/src/test/java/org/apache/sysds/test/component/compress/CompressedSortTest.java index 7ab7187eb78..083a29f965b 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/CompressedSortTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/CompressedSortTest.java @@ -203,6 +203,40 @@ public void quantileWeightedFallback() { TestUtils.compareMatrices(expected, actual, 0.0, "weighted sortOperations fallback"); } + @Test + public void pickDirectlyOnCompressedColumnDDC() { + runDirectPick(generate(ROWS, 1, 8, 1.0, 1, 50, 7), CompressionType.DDC); + } + + @Test + public void pickDirectlyOnCompressedColumnSDCZeros() { + runDirectPick(generate(ROWS, 1, 6, 0.2, 1, 40, 23), CompressionType.SDC); + } + + @Test + public void pickDirectlyOnCompressedColumnWithNegatives() { + runDirectPick(generate(ROWS, 1, 8, 0.3, -20, 20, 41), CompressionType.SDC); + } + + /** + * Quantile picking normally runs on the uncompressed value/weight table produced by sortOperations, so the + * inherited (no longer overridden) pickValue path is never reached on a compressed block through that flow. This + * exercises it directly: the single column is sorted while staying compressed, then pickValue is invoked on the + * CompressedMatrixBlock itself and must match the uncompressed sorted column element for element. median() is not + * used here because it requires the two-column weighted representation. + */ + private void runDirectPick(MatrixBlock mb, CompressionType ct) { + CompressedMatrixBlock cmb = compress(mb, ct); + MatrixBlock sortedC = cmb.reorgOperations(ASC, new MatrixBlock(), 0, 0, 0); + assertTrue("Expected the sorted result to stay compressed for " + ct, sortedC instanceof CompressedMatrixBlock); + MatrixBlock sortedU = mb.reorgOperations(ASC, new MatrixBlock(), 0, 0, 0); + + for(double q : new double[] {0.0, 0.25, 0.5, 0.75, 0.9, 1.0}) { + assertEquals("pick q=" + q + " " + ct, sortedU.pickValue(q, false), sortedC.pickValue(q, false), 0.0); + assertEquals("pick avg q=" + q + " " + ct, sortedU.pickValue(q, true), sortedC.pickValue(q, true), 0.0); + } + } + private void runQuantile(MatrixBlock mb, CompressionType ct) { // reference is computed on a copy because compression may consume the input. MatrixBlock expected = new MatrixBlock(mb).sortOperations(null, new MatrixBlock(), 1); diff --git a/src/test/java/org/apache/sysds/test/component/matrix/QuantilePickTest.java b/src/test/java/org/apache/sysds/test/component/matrix/QuantilePickTest.java new file mode 100644 index 00000000000..472b61d8cd6 --- /dev/null +++ b/src/test/java/org/apache/sysds/test/component/matrix/QuantilePickTest.java @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.test.component.matrix; + +import static org.junit.Assert.assertEquals; + +import org.apache.sysds.runtime.matrix.data.MatrixBlock; +import org.junit.Test; + +/** + * Tests the single-column (unweighted) branch of {@link MatrixBlock#pickValue(double, boolean)} and + * {@link MatrixBlock#median()}. The values are assumed to be sorted in ascending order, mirroring the contract used + * by the quantile pick instructions. The unweighted branch uses the same ceil-based rank as the two-column weighted + * branch (with an implicit weight of 1 per value), so a single column yields the same quantile as the equivalent + * (value, weight) representation. The two-column (weighted) branch is exercised separately through the compressed + * sort tests. + */ +public class QuantilePickTest { + + private static MatrixBlock singleColumn(double[] values, boolean sparse) { + MatrixBlock mb = new MatrixBlock(values.length, 1, sparse); + for(int i = 0; i < values.length; i++) + mb.set(i, 0, values[i]); + mb.recomputeNonZeros(); + return mb; + } + + @Test + public void pickOddLengthNoAverage() { + // rank = ceil(quantile * 5), value at (rank-1). + MatrixBlock mb = singleColumn(new double[] {10, 20, 30, 40, 50}, false); + assertEquals("q=0.0", 10, mb.pickValue(0.0, false), 0); // rank 0 -> idx 0 + assertEquals("q=0.2", 10, mb.pickValue(0.2, false), 0); // rank ceil(1.0)=1 -> idx 0 + assertEquals("q=0.5", 30, mb.pickValue(0.5, false), 0); // rank ceil(2.5)=3 -> idx 2 + assertEquals("q=0.75", 40, mb.pickValue(0.75, false), 0); // rank ceil(3.75)=4 -> idx 3 + assertEquals("q=1.0", 50, mb.pickValue(1.0, false), 0); // rank ceil(5.0)=5 -> idx 4 + } + + @Test + public void pickOddLengthAverageSuppressed() { + // Odd number of values -> averaging is suppressed, so average matches no-average. + MatrixBlock mb = singleColumn(new double[] {10, 20, 30, 40, 50}, false); + assertEquals("q=0.5 avg", 30, mb.pickValue(0.5, true), 0); + assertEquals("q=0.75 avg", 40, mb.pickValue(0.75, true), 0); + } + + @Test + public void pickEvenLengthAverage() { + // Even number of values -> averaging of adjacent order statistics applies. + MatrixBlock mb = singleColumn(new double[] {10, 20, 30, 40}, false); + assertEquals("q=0.25 avg", 15, mb.pickValue(0.25, true), 0); // rank 1 -> (idx0+idx1)/2 + assertEquals("q=0.375 avg", 25, mb.pickValue(0.375, true), 0); // rank ceil(1.5)=2 -> (idx1+idx2)/2 + assertEquals("q=0.5 avg", 25, mb.pickValue(0.5, true), 0); // rank 2 -> (idx1+idx2)/2 + assertEquals("q=0.75 avg", 35, mb.pickValue(0.75, true), 0); // rank 3 -> (idx2+idx3)/2 + } + + @Test + public void pickEvenLengthNoAverage() { + MatrixBlock mb = singleColumn(new double[] {10, 20, 30, 40}, false); + assertEquals("q=0.25", 10, mb.pickValue(0.25, false), 0); // rank 1 -> idx 0 + assertEquals("q=0.5", 20, mb.pickValue(0.5, false), 0); // rank 2 -> idx 1 + assertEquals("q=0.75", 30, mb.pickValue(0.75, false), 0); // rank 3 -> idx 2 + } + + @Test + public void pickAverageClampedAtTop() { + // Top quantile: rank reaches the last element so there is no successor to average with. + MatrixBlock even = singleColumn(new double[] {10, 20, 30, 40}, false); + assertEquals("even q=0.95 avg", 40, even.pickValue(0.95, true), 0); // rank ceil(3.8)=4 -> idx 3, no avg + assertEquals("even q=1.0 avg", 40, even.pickValue(1.0, true), 0); + MatrixBlock odd = singleColumn(new double[] {10, 20, 30, 40, 50}, false); + assertEquals("odd q=0.95 avg", 50, odd.pickValue(0.95, true), 0); // odd -> avg suppressed + } + + @Test + public void pickSingleElement() { + MatrixBlock mb = singleColumn(new double[] {42}, false); + assertEquals("q=0.0", 42, mb.pickValue(0.0, false), 0); + assertEquals("q=0.5", 42, mb.pickValue(0.5, false), 0); + assertEquals("q=1.0", 42, mb.pickValue(1.0, false), 0); + assertEquals("q=0.5 avg", 42, mb.pickValue(0.5, true), 0); + assertEquals("median", 42, mb.median(), 0); + } + + @Test + public void pickSparseSingleColumnWithZeros() { + // Sorted ascending including leading zeros, stored sparse. + MatrixBlock mb = singleColumn(new double[] {0, 0, 10, 20, 30}, true); + assertEquals("q=0.0", 0, mb.pickValue(0.0, false), 0); // rank 0 -> idx 0 (zero) + assertEquals("q=0.5", 10, mb.pickValue(0.5, false), 0); // rank ceil(2.5)=3 -> idx 2 + assertEquals("q=0.75", 20, mb.pickValue(0.75, false), 0); // rank ceil(3.75)=4 -> idx 3 + assertEquals("q=1.0", 30, mb.pickValue(1.0, false), 0); // rank 5 -> idx 4 + } + + @Test + public void medianSingleColumn() { + // Odd length -> middle element; even length -> average of the two middle elements. + assertEquals("odd median", 30, singleColumn(new double[] {10, 20, 30, 40, 50}, false).median(), 0); + assertEquals("even median", 25, singleColumn(new double[] {10, 20, 30, 40}, false).median(), 0); + assertEquals("sparse median", 10, singleColumn(new double[] {0, 0, 10, 20, 30}, true).median(), 0); + } + + @Test + public void pickSingleColumnMatchesDenseAndSparse() { + double[] v = {-5, -1, 0, 2, 7, 9}; + MatrixBlock dense = singleColumn(v, false); + MatrixBlock sparse = singleColumn(v, true); + for(double q : new double[] {0.0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0}) + for(boolean avg : new boolean[] {false, true}) + assertEquals("q=" + q + " avg=" + avg, dense.pickValue(q, avg), sparse.pickValue(q, avg), 0); + } +}