From 34ca7ae3c7b85a2d1da250267d9b3498c62bfb82 Mon Sep 17 00:00:00 2001 From: Sebastian Baunsgaard Date: Mon, 1 Jun 2026 14:00:57 +0000 Subject: [PATCH 1/5] Tighten MatrixBlock API and extract aggregateUnary specialization Refactors the MatrixBlock surface so quantile/sort entry points are sealed against accidental overrides, sparseToDense is fluent, and the unary-aggregate dispatch lives in its own helper. - MatrixBlock: - sparseToDense() / sparseToDense(int) now return MatrixBlock so they chain at the call site instead of forcing a separate lookup of `this` - sortOperations, pickValues, pickValue marked final (subclasses like CompressedMatrixBlock must route through these instead of replacing them) - pickValue now dispatches to a new pickUnweightedValue / pickWeightedValue helper pair and documents the sorted-input precondition - isShallowSerialize: drop the two SparseBlockMCSR opt-in branches that depended on size estimates; the simpler condition is enough and the heuristic was unsafe under MCSR-to-CSR conversion - LibAggregateUnarySpecialization (new): owns the aggregateUnary(MatrixBlock, AggregateUnaryOperator, ...) dispatch with separate sparse and dense helpers - LibMatrixMult: in two LMM helpers, allocate the dense result block when ret.getDenseBlock() returned null, instead of NPE'ing --- .../compress/CompressedMatrixBlock.java | 14 +- .../data/LibAggregateUnarySpecialization.java | 148 ++++++++++++++++++ .../runtime/matrix/data/LibMatrixMult.java | 10 ++ .../runtime/matrix/data/MatrixBlock.java | 87 +++++++--- 4 files changed, 229 insertions(+), 30 deletions(-) create mode 100644 src/main/java/org/apache/sysds/runtime/matrix/data/LibAggregateUnarySpecialization.java diff --git a/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java b/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java index dae13ed9f94..8d8d6a94b79 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java +++ b/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java @@ -1201,8 +1201,8 @@ public void examSparsity(boolean allowCSR, int k) { } @Override - public void sparseToDense(int k) { - // do nothing + public MatrixBlock sparseToDense(int k) { + return this; // do nothing } @Override @@ -1235,16 +1235,6 @@ public double interQuartileMean() { return getUncompressed("interQuartileMean").interQuartileMean(); } - @Override - public MatrixBlock pickValues(MatrixValue quantiles, MatrixValue ret) { - return getUncompressed("pickValues").pickValues(quantiles, ret); - } - - @Override - public double pickValue(double quantile, boolean average) { - return getUncompressed("pickValue").pickValue(quantile, average); - } - @Override public double sumWeightForQuantile() { return getUncompressed("sumWeightForQuantile").sumWeightForQuantile(); diff --git a/src/main/java/org/apache/sysds/runtime/matrix/data/LibAggregateUnarySpecialization.java b/src/main/java/org/apache/sysds/runtime/matrix/data/LibAggregateUnarySpecialization.java new file mode 100644 index 00000000000..79f08cb353a --- /dev/null +++ b/src/main/java/org/apache/sysds/runtime/matrix/data/LibAggregateUnarySpecialization.java @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.runtime.matrix.data; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.sysds.common.Types.CorrectionLocationType; +import org.apache.sysds.runtime.DMLRuntimeException; +import org.apache.sysds.runtime.data.DenseBlock; +import org.apache.sysds.runtime.data.SparseBlock; +import org.apache.sysds.runtime.instructions.cp.KahanObject; +import org.apache.sysds.runtime.matrix.data.MatrixValue.CellIndex; +import org.apache.sysds.runtime.matrix.operators.AggregateOperator; +import org.apache.sysds.runtime.matrix.operators.AggregateUnaryOperator; + +public class LibAggregateUnarySpecialization { + protected static final Log LOG = LogFactory.getLog(LibAggregateUnarySpecialization.class.getName()); + + public static void aggregateUnary(final MatrixBlock mb, AggregateUnaryOperator op, MatrixBlock result, int blen, + MatrixIndexes indexesIn) { + if(op.sparseSafe) + sparseAggregateUnaryHelp(mb, op, result, blen, indexesIn); + else + denseAggregateUnaryHelp(mb, op, result, blen, indexesIn); + } + + private static void sparseAggregateUnaryHelp(final MatrixBlock mb, AggregateUnaryOperator op, MatrixBlock result, + int blen, MatrixIndexes indexesIn) { + // initialize result + if(op.aggOp.initialValue != 0) + result.reset(result.rlen, result.clen, op.aggOp.initialValue); + CellIndex tempCellIndex = new CellIndex(-1, -1); + KahanObject buffer = new KahanObject(0, 0); + + if(mb.sparse && mb.sparseBlock != null) { + SparseBlock a = mb.sparseBlock; + for(int r = 0; r < Math.min(mb.rlen, a.numRows()); r++) { + if(a.isEmpty(r)) + continue; + int apos = a.pos(r); + int alen = a.size(r); + int[] aix = a.indexes(r); + double[] aval = a.values(r); + for(int i = apos; i < apos + alen; i++) { + tempCellIndex.set(r, aix[i]); + op.indexFn.execute(tempCellIndex, tempCellIndex); + incrementalAggregateUnaryHelp(op.aggOp, result, tempCellIndex.row, tempCellIndex.column, aval[i], + buffer); + } + } + } + else if(!mb.sparse && mb.denseBlock != null) { + DenseBlock a = mb.getDenseBlock(); + for(int i = 0; i < mb.rlen; i++) + for(int j = 0; j < mb.clen; j++) { + tempCellIndex.set(i, j); + op.indexFn.execute(tempCellIndex, tempCellIndex); + incrementalAggregateUnaryHelp(op.aggOp, result, tempCellIndex.row, tempCellIndex.column, a.get(i, j), + buffer); + } + } + } + + private static void denseAggregateUnaryHelp(MatrixBlock mb, AggregateUnaryOperator op, MatrixBlock result, int blen, + MatrixIndexes indexesIn) { + if(op.aggOp.initialValue != 0) + result.reset(result.rlen, result.clen, op.aggOp.initialValue); + CellIndex tempCellIndex = new CellIndex(-1, -1); + KahanObject buffer = new KahanObject(0, 0); + for(int i = 0; i < mb.rlen; i++) + for(int j = 0; j < mb.clen; j++) { + tempCellIndex.set(i, j); + op.indexFn.execute(tempCellIndex, tempCellIndex); + incrementalAggregateUnaryHelp(op.aggOp, result, tempCellIndex.row, tempCellIndex.column, + mb.get(i, j), buffer); + } + } + + private static void incrementalAggregateUnaryHelp(AggregateOperator aggOp, MatrixBlock result, int row, int column, + double newvalue, KahanObject buffer) { + if(aggOp.existsCorrection()) { + if(aggOp.correction == CorrectionLocationType.LASTROW || + aggOp.correction == CorrectionLocationType.LASTCOLUMN) { + int corRow = row, corCol = column; + if(aggOp.correction == CorrectionLocationType.LASTROW)// extra row + corRow++; + else if(aggOp.correction == CorrectionLocationType.LASTCOLUMN) + corCol++; + else + throw new DMLRuntimeException("unrecognized correctionLocation: " + aggOp.correction); + + buffer._sum = result.get(row, column); + buffer._correction = result.get(corRow, corCol); + buffer = (KahanObject) aggOp.increOp.fn.execute(buffer, newvalue); + result.set(row, column, buffer._sum); + result.set(corRow, corCol, buffer._correction); + } + else if(aggOp.correction == CorrectionLocationType.NONE) { + throw new DMLRuntimeException("unrecognized correctionLocation: " + aggOp.correction); + } + else// for mean + { + int corRow = row, corCol = column; + int countRow = row, countCol = column; + if(aggOp.correction == CorrectionLocationType.LASTTWOROWS) { + countRow++; + corRow += 2; + } + else if(aggOp.correction == CorrectionLocationType.LASTTWOCOLUMNS) { + countCol++; + corCol += 2; + } + else + throw new DMLRuntimeException("unrecognized correctionLocation: " + aggOp.correction); + buffer._sum = result.get(row, column); + buffer._correction = result.get(corRow, corCol); + double count = result.get(countRow, countCol) + 1.0; + buffer = (KahanObject) aggOp.increOp.fn.execute(buffer, newvalue, count); + result.set(row, column, buffer._sum); + result.set(corRow, corCol, buffer._correction); + result.set(countRow, countCol, count); + } + + } + else { + newvalue = aggOp.increOp.fn.execute(result.get(row, column), newvalue); + result.set(row, column, newvalue); + } + } + +} diff --git a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java index 9763e4ea57d..0756e3c0973 100644 --- a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java +++ b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java @@ -3621,6 +3621,11 @@ private static void matrixMultWDivMMDense(MatrixBlock mW, MatrixBlock mU, Matrix DenseBlock x = (mX==null) ? null : mX.getDenseBlock(); DenseBlock c = ret.getDenseBlock(); + if(c == null){ + ret.allocateDenseBlock(); + c = ret.getDenseBlock(); + } + //approach: iterate over non-zeros of w, selective mm computation //cache-conscious blocking: due to blocksize constraint (default 1000), //a blocksize of 16 allows to fit blocks of UV into L2 cache (256KB) @@ -3767,6 +3772,11 @@ private static void matrixMultWDivMMGeneric(MatrixBlock mW, MatrixBlock mU, Matr //output always in dense representation DenseBlock c = ret.getDenseBlock(); + + if(c == null){ + ret.allocateDenseBlock(); + c = ret.getDenseBlock(); + } //approach: iterate over non-zeros of w, selective mm computation if( mW.sparse ) //SPARSE diff --git a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java index b1c06cdd51e..5549c3f8b35 100644 --- a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java +++ b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java @@ -1315,7 +1315,7 @@ public void examSparsity(boolean allowCSR, int k) { else if( !sparse && sparseDst ) denseToSparse(allowCSR, k); } - + public static boolean evalSparseFormatInMemory(DataCharacteristics dc) { return evalSparseFormatInMemory(dc.getRows(), dc.getCols(), dc.getNonZeros()); } @@ -1387,12 +1387,13 @@ public void denseToSparse(boolean allowCSR, int k){ LibMatrixDenseToSparse.denseToSparse(this, allowCSR, k); } - public final void sparseToDense() { - sparseToDense(1); + public final MatrixBlock sparseToDense() { + return sparseToDense(1); } - public void sparseToDense(int k) { + public MatrixBlock sparseToDense(int k) { LibMatrixSparseToDense.sparseToDense(this, k); + return this; } /** @@ -2954,13 +2955,14 @@ public boolean isShallowSerialize(boolean inclConvert) { boolean sparseDst = evalSparseFormatOnDisk(); return !sparse || !sparseDst || (sparse && sparseBlock instanceof SparseBlockCSR) - || (sparse && sparseBlock instanceof SparseBlockMCSR - && getInMemorySize() / MAX_SHALLOW_SERIALIZE_OVERHEAD - <= getExactSerializedSize()) - || (sparse && sparseBlock instanceof SparseBlockMCSR - && nonZeros < Integer.MAX_VALUE //CSR constraint - && inclConvert && CONVERT_MCSR_TO_CSR_ON_DEEP_SERIALIZE - && !isUltraSparseSerialize(sparseDst)); + || (sparse && sparseBlock instanceof SparseBlockMCSR); + // || (sparse && sparseBlock instanceof SparseBlockMCSR + // && getInMemorySize() / MAX_SHALLOW_SERIALIZE_OVERHEAD + // <= getExactSerializedSize()) + // || (sparse && sparseBlock instanceof SparseBlockMCSR + // && nonZeros < Integer.MAX_VALUE //CSR constraint + // && inclConvert && CONVERT_MCSR_TO_CSR_ON_DEEP_SERIALIZE + // && !isUltraSparseSerialize(sparseDst)); } @Override @@ -4650,7 +4652,7 @@ public final MatrixBlock sortOperations(MatrixValue weights){ return sortOperations(weights, null); } - public MatrixBlock sortOperations(MatrixValue weights, MatrixBlock result) { + public final MatrixBlock sortOperations(MatrixValue weights, MatrixBlock result) { return sortOperations(weights, result, 1); } @@ -4754,7 +4756,17 @@ public static double computeIQMCorrection(double sum, double sum_wt, return (sum + q25Part*q25Val - q75Part*q75Val) / (sum_wt*0.5); } - public MatrixBlock pickValues(MatrixValue quantiles, MatrixValue ret) { + /** + * Pick the quantiles out of this matrix. If this matrix contains two columns it is weighted quantile picking. + * If a single column it is unweighted. + * + * Note the values are assumed to be sorted + * + * @param quantiles The quantiles to pick + * @param ret The result matrix + * @return The result matrix + */ + public final MatrixBlock pickValues(MatrixValue quantiles, MatrixValue ret) { return pickValues(quantiles, ret, false); } @@ -4778,17 +4790,56 @@ public MatrixBlock pickValues(MatrixValue quantiles, MatrixValue ret, boolean av return output; } - + + /** + * Pick the median quantile from this matrix. if this matrix is two columns, it is weighted picking else it is unweighted. + * + * Note the values are assumed to be sorted + * + * @param quantile The quantile to pick + * @return The quantile + */ public double median() { double sum_wt = sumWeightForQuantile(); return pickValue(0.5, sum_wt%2==0); } - + + /** + * Pick a specific quantile from this matrix. if this matrix is two columns, it is weighted picking else it is unweighted. + * + * Note the values are assumed to be sorted + * + * @param quantile The quantile to pick + * @return The quantile + */ public final double pickValue(double quantile){ return pickValue(quantile, false); } - public double pickValue(double quantile, boolean average) { + /** + * Pick a specific quantile from this matrix. if this matrix is two columns, it is weighted picking else it is unweighted. + * + * Note the values are assumed to be sorted + * + * @param quantile The quantile to pick + * @param average If the quantile is averaged. + * @return The quantile + */ + public final double pickValue(double quantile, boolean average) { + if(this.getNumColumns() == 1) + return pickUnweightedValue(quantile, average); + return pickWeightedValue(quantile, average); + } + + private double pickUnweightedValue(double quantile, boolean average) { + double pos = quantile * rlen; + if(average && (int) pos != pos) + return (get((int) Math.floor(pos), 0) + get(Math.min(rlen - 1, (int) Math.ceil(pos)), 0)) / 2; + else + return get(Math.min(rlen - 1, (int) Math.round(pos)), 0); + } + + private double pickWeightedValue(double quantile, boolean average) { double sum_wt = sumWeightForQuantile(); // do averaging only if it is asked for; and sum_wt is even @@ -5342,8 +5393,8 @@ public MatrixBlock ctableSeqOperations(MatrixValue thatMatrix, double thatScalar * (i1,j1,v2) from input2 (that) * (w) from scalar_input3 (scalarThat2) * - * @param thatMatrix matrix value - * @param thatScalar scalar double + * @param thatMatrix matrix value, the vector to encode via table + * @param thatScalar scalar double, w, that is the weight to multiply on the encoded values * @param resultBlock result matrix block * @return resultBlock */ From f93de9687af2276ca249fe177377dcb3d528e3d1 Mon Sep 17 00:00:00 2001 From: Sebastian Baunsgaard Date: Thu, 25 Jun 2026 12:18:48 +0000 Subject: [PATCH 2/5] Fix review findings on MatrixBlock API tightening Resolve issues found while reviewing the quantile/sort refactor: - Remove dead duplicate LibAggregateUnarySpecialization: it was a byte-for-byte copy of the already-wired LibMatrixAggUnarySpecialization with no callers, so it added a second source of truth that would drift. - Fix the median() Javadoc that referenced a non-existent @param quantile, which broke the Javadoc documentation build. - Restore the original SparseBlockMCSR branches in isShallowSerialize: dropping them left inclConvert dead and made toShallowSerializeBlock an unconditional no-op; this unrelated serialization change is reverted. - Allocate the dense WDivMM result block once in the single-threaded driver before dispatching MatrixMultWDivTask workers, instead of a per-thread null-check-then-allocate guard that raced on the shared result block. - Add QuantilePickTest covering the single-column unweighted pickValue path. - Drop the accidental Jackson dependency change from pom.xml that leaked in from an unrelated branch. --- .../data/LibAggregateUnarySpecialization.java | 148 ------------------ .../runtime/matrix/data/LibMatrixMult.java | 18 +-- .../runtime/matrix/data/MatrixBlock.java | 18 +-- .../component/matrix/QuantilePickTest.java | 96 ++++++++++++ 4 files changed, 112 insertions(+), 168 deletions(-) delete mode 100644 src/main/java/org/apache/sysds/runtime/matrix/data/LibAggregateUnarySpecialization.java create mode 100644 src/test/java/org/apache/sysds/test/component/matrix/QuantilePickTest.java diff --git a/src/main/java/org/apache/sysds/runtime/matrix/data/LibAggregateUnarySpecialization.java b/src/main/java/org/apache/sysds/runtime/matrix/data/LibAggregateUnarySpecialization.java deleted file mode 100644 index 79f08cb353a..00000000000 --- a/src/main/java/org/apache/sysds/runtime/matrix/data/LibAggregateUnarySpecialization.java +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysds.runtime.matrix.data; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.sysds.common.Types.CorrectionLocationType; -import org.apache.sysds.runtime.DMLRuntimeException; -import org.apache.sysds.runtime.data.DenseBlock; -import org.apache.sysds.runtime.data.SparseBlock; -import org.apache.sysds.runtime.instructions.cp.KahanObject; -import org.apache.sysds.runtime.matrix.data.MatrixValue.CellIndex; -import org.apache.sysds.runtime.matrix.operators.AggregateOperator; -import org.apache.sysds.runtime.matrix.operators.AggregateUnaryOperator; - -public class LibAggregateUnarySpecialization { - protected static final Log LOG = LogFactory.getLog(LibAggregateUnarySpecialization.class.getName()); - - public static void aggregateUnary(final MatrixBlock mb, AggregateUnaryOperator op, MatrixBlock result, int blen, - MatrixIndexes indexesIn) { - if(op.sparseSafe) - sparseAggregateUnaryHelp(mb, op, result, blen, indexesIn); - else - denseAggregateUnaryHelp(mb, op, result, blen, indexesIn); - } - - private static void sparseAggregateUnaryHelp(final MatrixBlock mb, AggregateUnaryOperator op, MatrixBlock result, - int blen, MatrixIndexes indexesIn) { - // initialize result - if(op.aggOp.initialValue != 0) - result.reset(result.rlen, result.clen, op.aggOp.initialValue); - CellIndex tempCellIndex = new CellIndex(-1, -1); - KahanObject buffer = new KahanObject(0, 0); - - if(mb.sparse && mb.sparseBlock != null) { - SparseBlock a = mb.sparseBlock; - for(int r = 0; r < Math.min(mb.rlen, a.numRows()); r++) { - if(a.isEmpty(r)) - continue; - int apos = a.pos(r); - int alen = a.size(r); - int[] aix = a.indexes(r); - double[] aval = a.values(r); - for(int i = apos; i < apos + alen; i++) { - tempCellIndex.set(r, aix[i]); - op.indexFn.execute(tempCellIndex, tempCellIndex); - incrementalAggregateUnaryHelp(op.aggOp, result, tempCellIndex.row, tempCellIndex.column, aval[i], - buffer); - } - } - } - else if(!mb.sparse && mb.denseBlock != null) { - DenseBlock a = mb.getDenseBlock(); - for(int i = 0; i < mb.rlen; i++) - for(int j = 0; j < mb.clen; j++) { - tempCellIndex.set(i, j); - op.indexFn.execute(tempCellIndex, tempCellIndex); - incrementalAggregateUnaryHelp(op.aggOp, result, tempCellIndex.row, tempCellIndex.column, a.get(i, j), - buffer); - } - } - } - - private static void denseAggregateUnaryHelp(MatrixBlock mb, AggregateUnaryOperator op, MatrixBlock result, int blen, - MatrixIndexes indexesIn) { - if(op.aggOp.initialValue != 0) - result.reset(result.rlen, result.clen, op.aggOp.initialValue); - CellIndex tempCellIndex = new CellIndex(-1, -1); - KahanObject buffer = new KahanObject(0, 0); - for(int i = 0; i < mb.rlen; i++) - for(int j = 0; j < mb.clen; j++) { - tempCellIndex.set(i, j); - op.indexFn.execute(tempCellIndex, tempCellIndex); - incrementalAggregateUnaryHelp(op.aggOp, result, tempCellIndex.row, tempCellIndex.column, - mb.get(i, j), buffer); - } - } - - private static void incrementalAggregateUnaryHelp(AggregateOperator aggOp, MatrixBlock result, int row, int column, - double newvalue, KahanObject buffer) { - if(aggOp.existsCorrection()) { - if(aggOp.correction == CorrectionLocationType.LASTROW || - aggOp.correction == CorrectionLocationType.LASTCOLUMN) { - int corRow = row, corCol = column; - if(aggOp.correction == CorrectionLocationType.LASTROW)// extra row - corRow++; - else if(aggOp.correction == CorrectionLocationType.LASTCOLUMN) - corCol++; - else - throw new DMLRuntimeException("unrecognized correctionLocation: " + aggOp.correction); - - buffer._sum = result.get(row, column); - buffer._correction = result.get(corRow, corCol); - buffer = (KahanObject) aggOp.increOp.fn.execute(buffer, newvalue); - result.set(row, column, buffer._sum); - result.set(corRow, corCol, buffer._correction); - } - else if(aggOp.correction == CorrectionLocationType.NONE) { - throw new DMLRuntimeException("unrecognized correctionLocation: " + aggOp.correction); - } - else// for mean - { - int corRow = row, corCol = column; - int countRow = row, countCol = column; - if(aggOp.correction == CorrectionLocationType.LASTTWOROWS) { - countRow++; - corRow += 2; - } - else if(aggOp.correction == CorrectionLocationType.LASTTWOCOLUMNS) { - countCol++; - corCol += 2; - } - else - throw new DMLRuntimeException("unrecognized correctionLocation: " + aggOp.correction); - buffer._sum = result.get(row, column); - buffer._correction = result.get(corRow, corCol); - double count = result.get(countRow, countCol) + 1.0; - buffer = (KahanObject) aggOp.increOp.fn.execute(buffer, newvalue, count); - result.set(row, column, buffer._sum); - result.set(corRow, corCol, buffer._correction); - result.set(countRow, countCol, count); - } - - } - else { - newvalue = aggOp.increOp.fn.execute(result.get(row, column), newvalue); - result.set(row, column, newvalue); - } - } - -} diff --git a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java index 0756e3c0973..e1021593482 100644 --- a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java +++ b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java @@ -801,6 +801,10 @@ public static void matrixMultWDivMM(MatrixBlock mW, MatrixBlock mU, MatrixBlock //pre-processing ret.sparse = wt.isBasic()?mW.sparse:false; ret.allocateBlock(); + //ensure the dense output is allocated up-front so the kernels below + //(and the parallel tasks in the k-variant) never lazily allocate it + if(ret.getDenseBlock() == null) + ret.allocateDenseBlock(); //core weighted div mm computation boolean scalarX = wt.hasScalar(); @@ -849,6 +853,10 @@ public static void matrixMultWDivMM(MatrixBlock mW, MatrixBlock mU, MatrixBlock //pre-processing ret.sparse = wt.isBasic()?mW.sparse:false; ret.allocateBlock(); + //allocate the dense output single-threaded before dispatching tasks, so the + //shared result block is safely published and tasks never race to allocate it + if(ret.getDenseBlock() == null) + ret.allocateDenseBlock(); if (!ret.isThreadSafe()){ matrixMultWDivMM(mW, mU, mV, mX, ret, wt); @@ -3620,11 +3628,6 @@ private static void matrixMultWDivMMDense(MatrixBlock mW, MatrixBlock mU, Matrix DenseBlock v = mV.getDenseBlock(); DenseBlock x = (mX==null) ? null : mX.getDenseBlock(); DenseBlock c = ret.getDenseBlock(); - - if(c == null){ - ret.allocateDenseBlock(); - c = ret.getDenseBlock(); - } //approach: iterate over non-zeros of w, selective mm computation //cache-conscious blocking: due to blocksize constraint (default 1000), @@ -3772,11 +3775,6 @@ private static void matrixMultWDivMMGeneric(MatrixBlock mW, MatrixBlock mU, Matr //output always in dense representation DenseBlock c = ret.getDenseBlock(); - - if(c == null){ - ret.allocateDenseBlock(); - c = ret.getDenseBlock(); - } //approach: iterate over non-zeros of w, selective mm computation if( mW.sparse ) //SPARSE diff --git a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java index 5549c3f8b35..d444e45937d 100644 --- a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java +++ b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java @@ -2955,14 +2955,13 @@ public boolean isShallowSerialize(boolean inclConvert) { boolean sparseDst = evalSparseFormatOnDisk(); return !sparse || !sparseDst || (sparse && sparseBlock instanceof SparseBlockCSR) - || (sparse && sparseBlock instanceof SparseBlockMCSR); - // || (sparse && sparseBlock instanceof SparseBlockMCSR - // && getInMemorySize() / MAX_SHALLOW_SERIALIZE_OVERHEAD - // <= getExactSerializedSize()) - // || (sparse && sparseBlock instanceof SparseBlockMCSR - // && nonZeros < Integer.MAX_VALUE //CSR constraint - // && inclConvert && CONVERT_MCSR_TO_CSR_ON_DEEP_SERIALIZE - // && !isUltraSparseSerialize(sparseDst)); + || (sparse && sparseBlock instanceof SparseBlockMCSR + && getInMemorySize() / MAX_SHALLOW_SERIALIZE_OVERHEAD + <= getExactSerializedSize()) + || (sparse && sparseBlock instanceof SparseBlockMCSR + && nonZeros < Integer.MAX_VALUE //CSR constraint + && inclConvert && CONVERT_MCSR_TO_CSR_ON_DEEP_SERIALIZE + && !isUltraSparseSerialize(sparseDst)); } @Override @@ -4796,8 +4795,7 @@ public MatrixBlock pickValues(MatrixValue quantiles, MatrixValue ret, boolean av * * Note the values are assumed to be sorted * - * @param quantile The quantile to pick - * @return The quantile + * @return The median value */ public double median() { double sum_wt = sumWeightForQuantile(); diff --git a/src/test/java/org/apache/sysds/test/component/matrix/QuantilePickTest.java b/src/test/java/org/apache/sysds/test/component/matrix/QuantilePickTest.java new file mode 100644 index 00000000000..49c1dd93859 --- /dev/null +++ b/src/test/java/org/apache/sysds/test/component/matrix/QuantilePickTest.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.test.component.matrix; + +import static org.junit.Assert.assertEquals; + +import org.apache.sysds.runtime.matrix.data.MatrixBlock; +import org.junit.Test; + +/** + * Tests the single-column (unweighted) branch of {@link MatrixBlock#pickValue(double, boolean)}. + * The values are assumed to be sorted in ascending order, mirroring the contract used by the + * quantile pick instructions. The two-column (weighted) branch is exercised separately through the + * compressed sort tests. + */ +public class QuantilePickTest { + + private static MatrixBlock singleColumn(double[] values, boolean sparse) { + MatrixBlock mb = new MatrixBlock(values.length, 1, sparse); + for(int i = 0; i < values.length; i++) + mb.set(i, 0, values[i]); + mb.recomputeNonZeros(); + return mb; + } + + @Test + public void pickOddLengthNoAverage() { + // pos = quantile * rlen; Math.round(pos), clamped to rlen-1. + MatrixBlock mb = singleColumn(new double[] {10, 20, 30, 40, 50}, false); + assertEquals(10, mb.pickValue(0.0, false), 0); // pos 0.0 -> idx 0 + assertEquals(20, mb.pickValue(0.2, false), 0); // pos 1.0 -> idx 1 + assertEquals(40, mb.pickValue(0.5, false), 0); // pos 2.5 -> round 3 -> idx 3 + assertEquals(50, mb.pickValue(1.0, false), 0); // pos 5.0 -> clamp idx 4 + } + + @Test + public void pickOddLengthAverage() { + MatrixBlock mb = singleColumn(new double[] {10, 20, 30, 40, 50}, false); + // pos 2.5 is non-integer -> average of floor(2.5)=2 and ceil(2.5)=3 -> (30+40)/2 + assertEquals(35, mb.pickValue(0.5, true), 0); + // integer pos -> no averaging + assertEquals(20, mb.pickValue(0.2, true), 0); + } + + @Test + public void pickEvenLengthAverage() { + MatrixBlock mb = singleColumn(new double[] {10, 20, 30, 40}, false); + // pos 1.5 -> average of idx 1 and idx 2 -> (20+30)/2 + assertEquals(25, mb.pickValue(0.375, true), 0); + // pos 2.0 integer -> idx 2 + assertEquals(30, mb.pickValue(0.5, true), 0); + } + + @Test + public void pickAverageClampedAtTop() { + // pos 4.75 -> ceil clamps to rlen-1, so the averaged pair is (idx4, idx4). + MatrixBlock mb = singleColumn(new double[] {10, 20, 30, 40, 50}, false); + assertEquals(50, mb.pickValue(0.95, true), 0); + } + + @Test + public void pickSparseSingleColumnWithZeros() { + // Sorted ascending including leading zeros, stored sparse. + MatrixBlock mb = singleColumn(new double[] {0, 0, 10, 20, 30}, true); + assertEquals(0, mb.pickValue(0.0, false), 0); // pos 0.0 -> idx 0 (zero) + assertEquals(20, mb.pickValue(0.5, false), 0); // pos 2.5 -> round 3 -> idx 3 + assertEquals(30, mb.pickValue(1.0, false), 0); // pos 5.0 -> clamp idx 4 + } + + @Test + public void pickSingleColumnMatchesDenseAndSparse() { + double[] v = {-5, -1, 0, 2, 7, 9}; + MatrixBlock dense = singleColumn(v, false); + MatrixBlock sparse = singleColumn(v, true); + for(double q : new double[] {0.0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0}) + for(boolean avg : new boolean[] {false, true}) + assertEquals("q=" + q + " avg=" + avg, dense.pickValue(q, avg), sparse.pickValue(q, avg), 0); + } +} From db930a4ea47b5d126edd30a1b953c100801ac02e Mon Sep 17 00:00:00 2001 From: Sebastian Baunsgaard Date: Thu, 25 Jun 2026 13:22:55 +0000 Subject: [PATCH 3/5] Drop out-of-scope WDivMM allocation change and polish quantile API - Revert LibMatrixMult to upstream: the WDivMM dense-output allocation hoist was unrelated to the MatrixBlock API tightening and forced a full dense allocation for the basic+sparse-weight result that is meant to stay sparse (a memory regression on large sparse weights); upstream needs no such guard. - Fix median() Javadoc: it locates the median via the weight column and does not support a single-column input, so drop the inaccurate unweighted claim. - Align pickUnweightedValue with its sibling by reading getNumRows(). - Revert an unrelated CTABLE Javadoc edit and stray whitespace hunks. - Clarify the CompressedMatrixBlock.sparseToDense override comment and tidy the new quantile Javadocs. --- .../compress/CompressedMatrixBlock.java | 3 +- .../runtime/matrix/data/LibMatrixMult.java | 10 +------ .../runtime/matrix/data/MatrixBlock.java | 29 ++++++++++--------- 3 files changed, 18 insertions(+), 24 deletions(-) diff --git a/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java b/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java index 8d8d6a94b79..3a79443157b 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java +++ b/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java @@ -1202,7 +1202,8 @@ public void examSparsity(boolean allowCSR, int k) { @Override public MatrixBlock sparseToDense(int k) { - return this; // do nothing + // a compressed block has no sparse representation to convert; return unchanged + return this; } @Override diff --git a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java index e1021593482..9763e4ea57d 100644 --- a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java +++ b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java @@ -801,10 +801,6 @@ public static void matrixMultWDivMM(MatrixBlock mW, MatrixBlock mU, MatrixBlock //pre-processing ret.sparse = wt.isBasic()?mW.sparse:false; ret.allocateBlock(); - //ensure the dense output is allocated up-front so the kernels below - //(and the parallel tasks in the k-variant) never lazily allocate it - if(ret.getDenseBlock() == null) - ret.allocateDenseBlock(); //core weighted div mm computation boolean scalarX = wt.hasScalar(); @@ -853,10 +849,6 @@ public static void matrixMultWDivMM(MatrixBlock mW, MatrixBlock mU, MatrixBlock //pre-processing ret.sparse = wt.isBasic()?mW.sparse:false; ret.allocateBlock(); - //allocate the dense output single-threaded before dispatching tasks, so the - //shared result block is safely published and tasks never race to allocate it - if(ret.getDenseBlock() == null) - ret.allocateDenseBlock(); if (!ret.isThreadSafe()){ matrixMultWDivMM(mW, mU, mV, mX, ret, wt); @@ -3628,7 +3620,7 @@ private static void matrixMultWDivMMDense(MatrixBlock mW, MatrixBlock mU, Matrix DenseBlock v = mV.getDenseBlock(); DenseBlock x = (mX==null) ? null : mX.getDenseBlock(); DenseBlock c = ret.getDenseBlock(); - + //approach: iterate over non-zeros of w, selective mm computation //cache-conscious blocking: due to blocksize constraint (default 1000), //a blocksize of 16 allows to fit blocks of UV into L2 cache (256KB) diff --git a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java index d444e45937d..30b38071ebb 100644 --- a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java +++ b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java @@ -1315,7 +1315,7 @@ public void examSparsity(boolean allowCSR, int k) { else if( !sparse && sparseDst ) denseToSparse(allowCSR, k); } - + public static boolean evalSparseFormatInMemory(DataCharacteristics dc) { return evalSparseFormatInMemory(dc.getRows(), dc.getCols(), dc.getNonZeros()); } @@ -4759,7 +4759,7 @@ public static double computeIQMCorrection(double sum, double sum_wt, * Pick the quantiles out of this matrix. If this matrix contains two columns it is weighted quantile picking. * If a single column it is unweighted. * - * Note the values are assumed to be sorted + * Note the values are assumed to be sorted. * * @param quantiles The quantiles to pick * @param ret The result matrix @@ -4789,11 +4789,11 @@ public MatrixBlock pickValues(MatrixValue quantiles, MatrixValue ret, boolean av return output; } - + /** - * Pick the median quantile from this matrix. if this matrix is two columns, it is weighted picking else it is unweighted. + * Pick the median value from this matrix, using the weight column to locate the median position. * - * Note the values are assumed to be sorted + * Note the values are assumed to be sorted. * * @return The median value */ @@ -4803,9 +4803,9 @@ public double median() { } /** - * Pick a specific quantile from this matrix. if this matrix is two columns, it is weighted picking else it is unweighted. + * Pick a specific quantile from this matrix. If this matrix has two columns it is weighted picking, otherwise it is unweighted. * - * Note the values are assumed to be sorted + * Note the values are assumed to be sorted. * * @param quantile The quantile to pick * @return The quantile @@ -4815,9 +4815,9 @@ public final double pickValue(double quantile){ } /** - * Pick a specific quantile from this matrix. if this matrix is two columns, it is weighted picking else it is unweighted. + * Pick a specific quantile from this matrix. If this matrix has two columns it is weighted picking, otherwise it is unweighted. * - * Note the values are assumed to be sorted + * Note the values are assumed to be sorted. * * @param quantile The quantile to pick * @param average If the quantile is averaged. @@ -4830,11 +4830,12 @@ public final double pickValue(double quantile, boolean average) { } private double pickUnweightedValue(double quantile, boolean average) { - double pos = quantile * rlen; + final int rows = getNumRows(); + double pos = quantile * rows; if(average && (int) pos != pos) - return (get((int) Math.floor(pos), 0) + get(Math.min(rlen - 1, (int) Math.ceil(pos)), 0)) / 2; + return (get((int) Math.floor(pos), 0) + get(Math.min(rows - 1, (int) Math.ceil(pos)), 0)) / 2; else - return get(Math.min(rlen - 1, (int) Math.round(pos)), 0); + return get(Math.min(rows - 1, (int) Math.round(pos)), 0); } private double pickWeightedValue(double quantile, boolean average) { @@ -5391,8 +5392,8 @@ public MatrixBlock ctableSeqOperations(MatrixValue thatMatrix, double thatScalar * (i1,j1,v2) from input2 (that) * (w) from scalar_input3 (scalarThat2) * - * @param thatMatrix matrix value, the vector to encode via table - * @param thatScalar scalar double, w, that is the weight to multiply on the encoded values + * @param thatMatrix matrix value + * @param thatScalar scalar double * @param resultBlock result matrix block * @return resultBlock */ From 941a87a40deb15f43f2c5fe850a3b045b413be06 Mon Sep 17 00:00:00 2001 From: Sebastian Baunsgaard Date: Thu, 25 Jun 2026 14:16:36 +0000 Subject: [PATCH 4/5] Add direct quantile-pick coverage on CompressedMatrixBlock Quantile picking normally runs on the uncompressed value/weight table from sortOperations, so the inherited pickValue path is never reached on a compressed block through that flow. Add tests that sort a single column while keeping it compressed and then call pickValue directly on the CompressedMatrixBlock, asserting the result matches the uncompressed sorted column across DDC and SDC (with zeros and negatives). This locks in the behavior of the now-inherited pickValue after the redundant overrides were removed. --- .../compress/CompressedSortTest.java | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/test/java/org/apache/sysds/test/component/compress/CompressedSortTest.java b/src/test/java/org/apache/sysds/test/component/compress/CompressedSortTest.java index 7ab7187eb78..083a29f965b 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/CompressedSortTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/CompressedSortTest.java @@ -203,6 +203,40 @@ public void quantileWeightedFallback() { TestUtils.compareMatrices(expected, actual, 0.0, "weighted sortOperations fallback"); } + @Test + public void pickDirectlyOnCompressedColumnDDC() { + runDirectPick(generate(ROWS, 1, 8, 1.0, 1, 50, 7), CompressionType.DDC); + } + + @Test + public void pickDirectlyOnCompressedColumnSDCZeros() { + runDirectPick(generate(ROWS, 1, 6, 0.2, 1, 40, 23), CompressionType.SDC); + } + + @Test + public void pickDirectlyOnCompressedColumnWithNegatives() { + runDirectPick(generate(ROWS, 1, 8, 0.3, -20, 20, 41), CompressionType.SDC); + } + + /** + * Quantile picking normally runs on the uncompressed value/weight table produced by sortOperations, so the + * inherited (no longer overridden) pickValue path is never reached on a compressed block through that flow. This + * exercises it directly: the single column is sorted while staying compressed, then pickValue is invoked on the + * CompressedMatrixBlock itself and must match the uncompressed sorted column element for element. median() is not + * used here because it requires the two-column weighted representation. + */ + private void runDirectPick(MatrixBlock mb, CompressionType ct) { + CompressedMatrixBlock cmb = compress(mb, ct); + MatrixBlock sortedC = cmb.reorgOperations(ASC, new MatrixBlock(), 0, 0, 0); + assertTrue("Expected the sorted result to stay compressed for " + ct, sortedC instanceof CompressedMatrixBlock); + MatrixBlock sortedU = mb.reorgOperations(ASC, new MatrixBlock(), 0, 0, 0); + + for(double q : new double[] {0.0, 0.25, 0.5, 0.75, 0.9, 1.0}) { + assertEquals("pick q=" + q + " " + ct, sortedU.pickValue(q, false), sortedC.pickValue(q, false), 0.0); + assertEquals("pick avg q=" + q + " " + ct, sortedU.pickValue(q, true), sortedC.pickValue(q, true), 0.0); + } + } + private void runQuantile(MatrixBlock mb, CompressionType ct) { // reference is computed on a copy because compression may consume the input. MatrixBlock expected = new MatrixBlock(mb).sortOperations(null, new MatrixBlock(), 1); From 3309b946183646e1f35f28cc1050562bd956ee04 Mon Sep 17 00:00:00 2001 From: Sebastian Baunsgaard Date: Thu, 25 Jun 2026 14:26:58 +0000 Subject: [PATCH 5/5] Support single-column median and align unweighted quantile convention - median() now dispatches on column count like pickValue: a single-column (unweighted) matrix is picked over its one column instead of reading a non-existent weight column, which previously threw on single-column input. - Align pickUnweightedValue with the weighted convention (ceil-based rank with an implicit weight of 1 per value) so a single column yields the same quantile/median as the equivalent two-column (value, weight) representation; the prior round-based position was off by one for medians. - Expand QuantilePickTest to assert the canonical values with messages, cover even/odd averaging, the single-element edge case, and single-column median(). --- .../runtime/matrix/data/MatrixBlock.java | 19 +++-- .../component/matrix/QuantilePickTest.java | 81 +++++++++++++------ 2 files changed, 70 insertions(+), 30 deletions(-) diff --git a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java index 30b38071ebb..361d190bd02 100644 --- a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java +++ b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java @@ -4791,13 +4791,16 @@ public MatrixBlock pickValues(MatrixValue quantiles, MatrixValue ret, boolean av } /** - * Pick the median value from this matrix, using the weight column to locate the median position. + * Pick the median value from this matrix. If this matrix has two columns it is weighted picking using the + * weight column, otherwise it is unweighted over the single column. * * Note the values are assumed to be sorted. * * @return The median value */ public double median() { + if(getNumColumns() == 1) + return pickValue(0.5, getNumRows() % 2 == 0); double sum_wt = sumWeightForQuantile(); return pickValue(0.5, sum_wt%2==0); } @@ -4830,12 +4833,16 @@ public final double pickValue(double quantile, boolean average) { } private double pickUnweightedValue(double quantile, boolean average) { + // Mirror the weighted convention (pickWeightedValue) with an implicit weight of 1 per value, so a single + // column yields the same quantile as the equivalent two-column (value, weight) representation: take the + // ceil-based rank and only average adjacent order statistics when an even number of values straddles it. final int rows = getNumRows(); - double pos = quantile * rows; - if(average && (int) pos != pos) - return (get((int) Math.floor(pos), 0) + get(Math.min(rows - 1, (int) Math.ceil(pos)), 0)) / 2; - else - return get(Math.min(rows - 1, (int) Math.round(pos)), 0); + average = average && (rows % 2 == 0); + final int pos = (int) Math.ceil(quantile * rows); // 1-based rank + final int i = Math.min(Math.max(pos - 1, 0), rows - 1); + if(average && pos > 0 && pos < rows) + return (get(i, 0) + get(i + 1, 0)) / 2; + return get(i, 0); } private double pickWeightedValue(double quantile, boolean average) { diff --git a/src/test/java/org/apache/sysds/test/component/matrix/QuantilePickTest.java b/src/test/java/org/apache/sysds/test/component/matrix/QuantilePickTest.java index 49c1dd93859..472b61d8cd6 100644 --- a/src/test/java/org/apache/sysds/test/component/matrix/QuantilePickTest.java +++ b/src/test/java/org/apache/sysds/test/component/matrix/QuantilePickTest.java @@ -25,10 +25,12 @@ import org.junit.Test; /** - * Tests the single-column (unweighted) branch of {@link MatrixBlock#pickValue(double, boolean)}. - * The values are assumed to be sorted in ascending order, mirroring the contract used by the - * quantile pick instructions. The two-column (weighted) branch is exercised separately through the - * compressed sort tests. + * Tests the single-column (unweighted) branch of {@link MatrixBlock#pickValue(double, boolean)} and + * {@link MatrixBlock#median()}. The values are assumed to be sorted in ascending order, mirroring the contract used + * by the quantile pick instructions. The unweighted branch uses the same ceil-based rank as the two-column weighted + * branch (with an implicit weight of 1 per value), so a single column yields the same quantile as the equivalent + * (value, weight) representation. The two-column (weighted) branch is exercised separately through the compressed + * sort tests. */ public class QuantilePickTest { @@ -42,46 +44,77 @@ private static MatrixBlock singleColumn(double[] values, boolean sparse) { @Test public void pickOddLengthNoAverage() { - // pos = quantile * rlen; Math.round(pos), clamped to rlen-1. + // rank = ceil(quantile * 5), value at (rank-1). MatrixBlock mb = singleColumn(new double[] {10, 20, 30, 40, 50}, false); - assertEquals(10, mb.pickValue(0.0, false), 0); // pos 0.0 -> idx 0 - assertEquals(20, mb.pickValue(0.2, false), 0); // pos 1.0 -> idx 1 - assertEquals(40, mb.pickValue(0.5, false), 0); // pos 2.5 -> round 3 -> idx 3 - assertEquals(50, mb.pickValue(1.0, false), 0); // pos 5.0 -> clamp idx 4 + assertEquals("q=0.0", 10, mb.pickValue(0.0, false), 0); // rank 0 -> idx 0 + assertEquals("q=0.2", 10, mb.pickValue(0.2, false), 0); // rank ceil(1.0)=1 -> idx 0 + assertEquals("q=0.5", 30, mb.pickValue(0.5, false), 0); // rank ceil(2.5)=3 -> idx 2 + assertEquals("q=0.75", 40, mb.pickValue(0.75, false), 0); // rank ceil(3.75)=4 -> idx 3 + assertEquals("q=1.0", 50, mb.pickValue(1.0, false), 0); // rank ceil(5.0)=5 -> idx 4 } @Test - public void pickOddLengthAverage() { + public void pickOddLengthAverageSuppressed() { + // Odd number of values -> averaging is suppressed, so average matches no-average. MatrixBlock mb = singleColumn(new double[] {10, 20, 30, 40, 50}, false); - // pos 2.5 is non-integer -> average of floor(2.5)=2 and ceil(2.5)=3 -> (30+40)/2 - assertEquals(35, mb.pickValue(0.5, true), 0); - // integer pos -> no averaging - assertEquals(20, mb.pickValue(0.2, true), 0); + assertEquals("q=0.5 avg", 30, mb.pickValue(0.5, true), 0); + assertEquals("q=0.75 avg", 40, mb.pickValue(0.75, true), 0); } @Test public void pickEvenLengthAverage() { + // Even number of values -> averaging of adjacent order statistics applies. MatrixBlock mb = singleColumn(new double[] {10, 20, 30, 40}, false); - // pos 1.5 -> average of idx 1 and idx 2 -> (20+30)/2 - assertEquals(25, mb.pickValue(0.375, true), 0); - // pos 2.0 integer -> idx 2 - assertEquals(30, mb.pickValue(0.5, true), 0); + assertEquals("q=0.25 avg", 15, mb.pickValue(0.25, true), 0); // rank 1 -> (idx0+idx1)/2 + assertEquals("q=0.375 avg", 25, mb.pickValue(0.375, true), 0); // rank ceil(1.5)=2 -> (idx1+idx2)/2 + assertEquals("q=0.5 avg", 25, mb.pickValue(0.5, true), 0); // rank 2 -> (idx1+idx2)/2 + assertEquals("q=0.75 avg", 35, mb.pickValue(0.75, true), 0); // rank 3 -> (idx2+idx3)/2 + } + + @Test + public void pickEvenLengthNoAverage() { + MatrixBlock mb = singleColumn(new double[] {10, 20, 30, 40}, false); + assertEquals("q=0.25", 10, mb.pickValue(0.25, false), 0); // rank 1 -> idx 0 + assertEquals("q=0.5", 20, mb.pickValue(0.5, false), 0); // rank 2 -> idx 1 + assertEquals("q=0.75", 30, mb.pickValue(0.75, false), 0); // rank 3 -> idx 2 } @Test public void pickAverageClampedAtTop() { - // pos 4.75 -> ceil clamps to rlen-1, so the averaged pair is (idx4, idx4). - MatrixBlock mb = singleColumn(new double[] {10, 20, 30, 40, 50}, false); - assertEquals(50, mb.pickValue(0.95, true), 0); + // Top quantile: rank reaches the last element so there is no successor to average with. + MatrixBlock even = singleColumn(new double[] {10, 20, 30, 40}, false); + assertEquals("even q=0.95 avg", 40, even.pickValue(0.95, true), 0); // rank ceil(3.8)=4 -> idx 3, no avg + assertEquals("even q=1.0 avg", 40, even.pickValue(1.0, true), 0); + MatrixBlock odd = singleColumn(new double[] {10, 20, 30, 40, 50}, false); + assertEquals("odd q=0.95 avg", 50, odd.pickValue(0.95, true), 0); // odd -> avg suppressed + } + + @Test + public void pickSingleElement() { + MatrixBlock mb = singleColumn(new double[] {42}, false); + assertEquals("q=0.0", 42, mb.pickValue(0.0, false), 0); + assertEquals("q=0.5", 42, mb.pickValue(0.5, false), 0); + assertEquals("q=1.0", 42, mb.pickValue(1.0, false), 0); + assertEquals("q=0.5 avg", 42, mb.pickValue(0.5, true), 0); + assertEquals("median", 42, mb.median(), 0); } @Test public void pickSparseSingleColumnWithZeros() { // Sorted ascending including leading zeros, stored sparse. MatrixBlock mb = singleColumn(new double[] {0, 0, 10, 20, 30}, true); - assertEquals(0, mb.pickValue(0.0, false), 0); // pos 0.0 -> idx 0 (zero) - assertEquals(20, mb.pickValue(0.5, false), 0); // pos 2.5 -> round 3 -> idx 3 - assertEquals(30, mb.pickValue(1.0, false), 0); // pos 5.0 -> clamp idx 4 + assertEquals("q=0.0", 0, mb.pickValue(0.0, false), 0); // rank 0 -> idx 0 (zero) + assertEquals("q=0.5", 10, mb.pickValue(0.5, false), 0); // rank ceil(2.5)=3 -> idx 2 + assertEquals("q=0.75", 20, mb.pickValue(0.75, false), 0); // rank ceil(3.75)=4 -> idx 3 + assertEquals("q=1.0", 30, mb.pickValue(1.0, false), 0); // rank 5 -> idx 4 + } + + @Test + public void medianSingleColumn() { + // Odd length -> middle element; even length -> average of the two middle elements. + assertEquals("odd median", 30, singleColumn(new double[] {10, 20, 30, 40, 50}, false).median(), 0); + assertEquals("even median", 25, singleColumn(new double[] {10, 20, 30, 40}, false).median(), 0); + assertEquals("sparse median", 10, singleColumn(new double[] {0, 0, 10, 20, 30}, true).median(), 0); } @Test