diff --git a/processing/src/main/java/org/apache/druid/query/filter/DruidPredicateMatch.java b/processing/src/main/java/org/apache/druid/query/filter/DruidPredicateMatch.java
index 7979481ec8ae..cc34ed0899c6 100644
--- a/processing/src/main/java/org/apache/druid/query/filter/DruidPredicateMatch.java
+++ b/processing/src/main/java/org/apache/druid/query/filter/DruidPredicateMatch.java
@@ -20,7 +20,10 @@
package org.apache.druid.query.filter;
/**
- * Three-value logic result for matching values with predicates produced by {@link DruidPredicateFactory}
+ * Three-value logic result for matching values with predicates produced by {@link DruidPredicateFactory}.
+ *
+ * Also serves as a general 3VL atom for filter-tree composition where a sub-expression can be provably true,
+ * provably false, or undecidable.
*
* @see DruidObjectPredicate
* @see DruidLongPredicate
@@ -58,4 +61,35 @@ public static DruidPredicateMatch of(boolean val)
}
return FALSE;
}
+
+ public static DruidPredicateMatch and(DruidPredicateMatch a, DruidPredicateMatch b)
+ {
+ if (a == FALSE || b == FALSE) {
+ return FALSE;
+ }
+ if (a == TRUE && b == TRUE) {
+ return TRUE;
+ }
+ return UNKNOWN;
+ }
+
+ public static DruidPredicateMatch or(DruidPredicateMatch a, DruidPredicateMatch b)
+ {
+ if (a == TRUE || b == TRUE) {
+ return TRUE;
+ }
+ if (a == FALSE && b == FALSE) {
+ return FALSE;
+ }
+ return UNKNOWN;
+ }
+
+ public static DruidPredicateMatch not(DruidPredicateMatch a)
+ {
+ return switch (a) {
+ case TRUE -> FALSE;
+ case FALSE -> TRUE;
+ default -> UNKNOWN;
+ };
+ }
}
diff --git a/processing/src/main/java/org/apache/druid/segment/ConcatenatingCursor.java b/processing/src/main/java/org/apache/druid/segment/ConcatenatingCursor.java
new file mode 100644
index 000000000000..2cfef8eec930
--- /dev/null
+++ b/processing/src/main/java/org/apache/druid/segment/ConcatenatingCursor.java
@@ -0,0 +1,152 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment;
+
+import com.google.common.base.Supplier;
+import org.apache.druid.error.DruidException;
+import org.apache.druid.segment.projections.ClusteringColumnSelectorFactory;
+
+import javax.annotation.Nullable;
+import java.util.List;
+
+/**
+ * {@link Cursor} that walks a sequence of per-group cursors back-to-back, presenting them to the caller as a single
+ * cursor over a clustered base table. On group transitions the wrapper {@link ColumnSelectorFactory} updates its
+ * underlying delegate and clustering values so previously-acquired delegating selectors observe the new group's data
+ * on their next access.
+ *
+ * Each entry in {@link #holderSuppliers} is a lazy producer of a {@link CursorHolder} for one cluster group. The
+ * outer {@link CursorHolder} owns the lifecycle of the per-group holders.
+ */
+public final class ConcatenatingCursor implements Cursor
+{
+ private final List> holderSuppliers;
+ private final List clusteringValuesByGroup;
+ private final ClusteringColumnSelectorFactory wrapperFactory;
+
+ private int currentIdx;
+ @Nullable
+ private Cursor currentCursor;
+ private boolean initialized;
+
+ public ConcatenatingCursor(
+ List> holderSuppliers,
+ List clusteringValuesByGroup,
+ ClusteringColumnSelectorFactory wrapperFactory
+ )
+ {
+ if (holderSuppliers.size() != clusteringValuesByGroup.size()) {
+ throw DruidException.defensive(
+ "holderSuppliers size [%s] must equal clusteringValuesByGroup size [%s]",
+ holderSuppliers.size(),
+ clusteringValuesByGroup.size()
+ );
+ }
+ if (holderSuppliers.isEmpty()) {
+ throw DruidException.defensive("ConcatenatingCursor requires at least one cluster group");
+ }
+ this.holderSuppliers = holderSuppliers;
+ this.clusteringValuesByGroup = clusteringValuesByGroup;
+ this.wrapperFactory = wrapperFactory;
+ this.currentIdx = -1;
+ }
+
+ private void initializeIfNeeded()
+ {
+ if (initialized) {
+ return;
+ }
+ initialized = true;
+ advanceToNextNonEmptyGroup();
+ }
+
+ /**
+ * Open the next group whose cursor has at least one row. Sets {@code currentCursor = null} when all groups are
+ * exhausted.
+ */
+ private void advanceToNextNonEmptyGroup()
+ {
+ while (++currentIdx < holderSuppliers.size()) {
+ final CursorHolder holder = holderSuppliers.get(currentIdx).get();
+ final Cursor cursor = holder.asCursor();
+ if (cursor != null && !cursor.isDone()) {
+ currentCursor = cursor;
+ wrapperFactory.setDelegate(cursor.getColumnSelectorFactory(), clusteringValuesByGroup.get(currentIdx));
+ return;
+ }
+ // Group has no rows after filter application; try the next.
+ }
+ currentCursor = null;
+ }
+
+ @Override
+ public ColumnSelectorFactory getColumnSelectorFactory()
+ {
+ initializeIfNeeded();
+ return wrapperFactory;
+ }
+
+ @Override
+ public void advance()
+ {
+ initializeIfNeeded();
+ if (currentCursor == null) {
+ return;
+ }
+ currentCursor.advance();
+ if (currentCursor.isDone()) {
+ advanceToNextNonEmptyGroup();
+ }
+ }
+
+ @Override
+ public void advanceUninterruptibly()
+ {
+ initializeIfNeeded();
+ if (currentCursor == null) {
+ return;
+ }
+ currentCursor.advanceUninterruptibly();
+ if (currentCursor.isDone()) {
+ advanceToNextNonEmptyGroup();
+ }
+ }
+
+ @Override
+ public boolean isDone()
+ {
+ initializeIfNeeded();
+ return currentCursor == null;
+ }
+
+ @Override
+ public boolean isDoneOrInterrupted()
+ {
+ return isDone() || Thread.currentThread().isInterrupted();
+ }
+
+ @Override
+ public void reset()
+ {
+ currentIdx = -1;
+ currentCursor = null;
+ initialized = false;
+ }
+}
diff --git a/processing/src/main/java/org/apache/druid/segment/IndexIO.java b/processing/src/main/java/org/apache/druid/segment/IndexIO.java
index f246421f8415..ba9d6d455378 100644
--- a/processing/src/main/java/org/apache/druid/segment/IndexIO.java
+++ b/processing/src/main/java/org/apache/druid/segment/IndexIO.java
@@ -72,9 +72,11 @@
import org.apache.druid.segment.file.SegmentFileMetadata;
import org.apache.druid.segment.projections.AggregateProjectionSchema;
import org.apache.druid.segment.projections.BaseTableProjectionSchema;
+import org.apache.druid.segment.projections.ClusteredValueGroupsBaseTableSchema;
import org.apache.druid.segment.projections.ConstantTimeColumn;
import org.apache.druid.segment.projections.ProjectionMetadata;
import org.apache.druid.segment.projections.Projections;
+import org.apache.druid.segment.projections.TableClusterGroupSpec;
import org.apache.druid.segment.serde.ComplexColumnPartSupplier;
import org.apache.druid.segment.serde.FloatNumericColumnSupplier;
import org.apache.druid.segment.serde.LongNumericColumnSupplier;
@@ -952,16 +954,33 @@ public QueryableIndex load(File inDir, ObjectMapper mapper, boolean lazy, Segmen
// projections can omit a __time column, but one still has to exist, so we use the interval start to make a
// constant for this case
final long intervalStartMillis = Intervals.of(metadata.getInterval()).getStartMillis();
- // read base table projection columns, which are shared with other projections
- final Map> baseColumns = readProjectionColumns(
- metadata,
- baseProjection,
- fileMapper,
- Map.of(),
- intervalStartMillis,
- lazy,
- loadFailed
- );
+
+ // For clustered base tables the columns are always accessed through cluster groups, skip reading any base
+ // columns so we don't try to map files that don't exist
+ final boolean isClusteredSummary = baseSchema instanceof ClusteredValueGroupsBaseTableSchema;
+ final ClusteredValueGroupsBaseTableSchema clusteredBaseSummary;
+ final Map> baseColumns;
+ if (isClusteredSummary) {
+ clusteredBaseSummary = (ClusteredValueGroupsBaseTableSchema) baseSchema;
+ if (clusteredBaseSummary.getSharedColumns().isEmpty()) {
+ baseColumns = Map.of();
+ } else {
+ throw DruidException.defensive(
+ "Reading clustered segments with non-empty sharedColumns is not yet supported"
+ );
+ }
+ } else {
+ clusteredBaseSummary = null;
+ baseColumns = readProjectionColumns(
+ metadata,
+ baseProjection,
+ fileMapper,
+ Map.of(),
+ intervalStartMillis,
+ lazy,
+ loadFailed
+ );
+ }
final Map>> projectionsColumns = new LinkedHashMap<>();
final List aggProjections = new ArrayList<>(metadata.getProjections().size() - 1);
@@ -972,6 +991,13 @@ public QueryableIndex load(File inDir, ObjectMapper mapper, boolean lazy, Segmen
first = false;
continue;
}
+ if (!(projectionSpec.getSchema() instanceof AggregateProjectionSchema)) {
+ throw DruidException.defensive(
+ "Unexpected projection[%s] with type[%s]; only aggregate projections are valid as top-level entries",
+ projectionSpec.getSchema().getName(),
+ projectionSpec.getSchema().getClass()
+ );
+ }
final Map> projectionColumns = readProjectionColumns(
metadata,
projectionSpec,
@@ -983,31 +1009,51 @@ public QueryableIndex load(File inDir, ObjectMapper mapper, boolean lazy, Segmen
);
projectionsColumns.put(projectionSpec.getSchema().getName(), projectionColumns);
- if (projectionSpec.getSchema() instanceof AggregateProjectionSchema) {
- aggProjections.add(
- new AggregateProjectionMetadata(
- (AggregateProjectionSchema) projectionSpec.getSchema(),
- projectionSpec.getNumRows()
- )
- );
- } else {
- throw DruidException.defensive(
- "Unexpected projection[%s] with type[%s]",
- projectionSpec.getSchema().getName(),
- projectionSpec.getSchema().getClass()
- );
+ aggProjections.add(
+ new AggregateProjectionMetadata(
+ (AggregateProjectionSchema) projectionSpec.getSchema(),
+ projectionSpec.getNumRows()
+ )
+ );
+ }
+
+ final List>> clusterGroupColumnsList;
+ if (isClusteredSummary) {
+ final List nestedGroups = clusteredBaseSummary.getClusterGroups();
+ clusterGroupColumnsList = new ArrayList<>(nestedGroups.size());
+ for (int i = 0; i < nestedGroups.size(); i++) {
+ clusterGroupColumnsList.add(readClusterGroupColumns(
+ metadata,
+ clusteredBaseSummary,
+ i,
+ fileMapper,
+ intervalStartMillis,
+ lazy,
+ loadFailed
+ ));
}
+ } else {
+ clusterGroupColumnsList = List.of();
}
+
final Metadata reconstructedMetadata = baseSchema.asMetadata(aggProjections);
+ // For clustered segments, the top-level index has no per-column data of its own, so pass an empty dimensions
+ // list so the SimpleQueryableIndex precondition passes and dimension-handler init has nothing to materialize
+ final Indexed dimensionsIndex = isClusteredSummary
+ ? new ListIndexed<>(List.of())
+ : new ListIndexed<>(baseSchema.getDimensionNames());
+
return new SimpleQueryableIndex(
Intervals.fromString(metadata.getInterval()),
- new ListIndexed<>(baseSchema.getDimensionNames()),
+ dimensionsIndex,
metadata.getBitmapEncoding().getBitmapFactory(),
baseColumns,
fileMapper,
reconstructedMetadata,
- projectionsColumns
+ projectionsColumns,
+ clusteredBaseSummary,
+ clusterGroupColumnsList
)
{
@Override
@@ -1080,6 +1126,63 @@ private Map> readProjectionColumns(
return projectionColumns;
}
+ /**
+ * Read the per-column data for cluster group {@code groupIndex}. Mirrors {@link #readProjectionColumns} but
+ * with the dictionary-id-tuple smoosh prefix {@code __base$_.../ }; the column set
+ * excludes clustering columns (constants, injected at query time).
+ */
+ private Map> readClusterGroupColumns(
+ SegmentFileMetadata metadata,
+ ClusteredValueGroupsBaseTableSchema summary,
+ int groupIndex,
+ SegmentFileMapper segmentFileMapper,
+ long intervalStartMillis,
+ boolean lazy,
+ SegmentLazyLoadFailCallback loadFailed
+ ) throws IOException
+ {
+ final TableClusterGroupSpec spec = summary.getClusterGroups().get(groupIndex);
+ final List clusteringValueIds = spec.getClusteringValueIds();
+ final String timeColumnName = summary.getTimeColumnName();
+ final boolean renameTime = !ColumnHolder.TIME_COLUMN_NAME.equals(timeColumnName);
+ final Map> groupColumns = new LinkedHashMap<>();
+
+ for (String column : summary.getGroupColumnNames()) {
+ final String smooshName = Projections.getClusterGroupSegmentInternalFileName(clusteringValueIds, column);
+ final ByteBuffer colBuffer = segmentFileMapper.mapFile(smooshName);
+ final ColumnDescriptor columnDescriptor = metadata.getColumnDescriptors().get(smooshName);
+ if (columnDescriptor == null) {
+ continue;
+ }
+
+ final String internedColumnName = SmooshedFileMapper.STRING_INTERNER.intern(column);
+ groupColumns.put(
+ internedColumnName,
+ makeColumnHolderSupplier(
+ internedColumnName,
+ columnDescriptor,
+ colBuffer,
+ segmentFileMapper,
+ null,
+ lazy,
+ loadFailed
+ )
+ );
+
+ if (column.equals(timeColumnName) && renameTime) {
+ groupColumns.put(ColumnHolder.TIME_COLUMN_NAME, groupColumns.get(column));
+ groupColumns.remove(column);
+ }
+ }
+ if (timeColumnName == null) {
+ groupColumns.put(
+ ColumnHolder.TIME_COLUMN_NAME,
+ ConstantTimeColumn.makeConstantTimeSupplier(spec.getNumRows(), intervalStartMillis)
+ );
+ }
+ return groupColumns;
+ }
+
private Supplier makeColumnHolderSupplier(
String columnName,
ColumnDescriptor columnDescriptor,
diff --git a/processing/src/main/java/org/apache/druid/segment/QueryableIndex.java b/processing/src/main/java/org/apache/druid/segment/QueryableIndex.java
index ce9bee38ac19..6f604fbb18cc 100644
--- a/processing/src/main/java/org/apache/druid/segment/QueryableIndex.java
+++ b/processing/src/main/java/org/apache/druid/segment/QueryableIndex.java
@@ -25,12 +25,15 @@
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.data.Indexed;
+import org.apache.druid.segment.projections.ClusteredValueGroupsBaseTableSchema;
import org.apache.druid.segment.projections.QueryableProjection;
+import org.apache.druid.segment.projections.TableClusterGroupSpec;
import org.joda.time.Interval;
import javax.annotation.Nullable;
import java.io.Closeable;
import java.io.IOException;
+import java.util.Collections;
import java.util.List;
import java.util.Map;
@@ -108,4 +111,37 @@ default QueryableIndex getProjectionQueryableIndex(String name)
{
return null;
}
+
+ /**
+ * Returns the {@link ClusteredValueGroupsBaseTableSchema} summary if this index represents a clustered base table, or
+ * {@code null} for a non-clustered segment. Default returns {@code null}; only V10-loaded clustered segments
+ * override.
+ */
+ @Nullable
+ default ClusteredValueGroupsBaseTableSchema getClusteredBaseSummary()
+ {
+ return null;
+ }
+
+ /**
+ * Returns the list of {@link TableClusterGroupSpec} entries on this index, one per cluster group. Empty for a
+ * non-clustered segment. For a clustered segment, this is the same list returned by
+ * {@code getClusteredBaseSummary().getClusterGroups()}, surfaced here so query-time dispatch can enumerate cluster
+ * groups (e.g. via {@code Projections.pruneClusterGroups}).
+ */
+ default List getClusterGroupSchemas()
+ {
+ return Collections.emptyList();
+ }
+
+ /**
+ * Returns a {@link QueryableIndex} sub-view scoped to a single cluster group's column data. Mirrors
+ * {@link #getProjectionQueryableIndex(String)} but for cluster groups, addressed by reference rather than name.
+ * Default returns {@code null}; only clustered segments override.
+ */
+ @Nullable
+ default QueryableIndex getClusterGroupQueryableIndex(TableClusterGroupSpec groupSpec)
+ {
+ return null;
+ }
}
diff --git a/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorFactory.java b/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorFactory.java
index f1aa2bb7bf57..982c681cbafe 100644
--- a/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorFactory.java
+++ b/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorFactory.java
@@ -19,18 +19,41 @@
package org.apache.druid.segment;
+import com.google.common.base.Supplier;
+import com.google.common.base.Suppliers;
+import org.apache.druid.error.DruidException;
+import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.query.OrderBy;
import org.apache.druid.query.aggregation.AggregatorFactory;
+import org.apache.druid.query.dimension.DimensionSpec;
import org.apache.druid.segment.column.ColumnCapabilities;
+import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.RowSignature;
+import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.data.Offset;
+import org.apache.druid.segment.projections.ClusteredValueGroupsBaseTableSchema;
+import org.apache.druid.segment.projections.ClusteringColumnSelectorFactory;
+import org.apache.druid.segment.projections.ClusteringVectorColumnSelectorFactory;
+import org.apache.druid.segment.projections.Projections;
import org.apache.druid.segment.projections.QueryableProjection;
+import org.apache.druid.segment.projections.TableClusterGroupSpec;
+import org.apache.druid.segment.vector.ConcatenatingVectorCursor;
+import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector;
+import org.apache.druid.segment.vector.ReadableVectorInspector;
+import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector;
import org.apache.druid.segment.vector.VectorColumnSelectorFactory;
+import org.apache.druid.segment.vector.VectorCursor;
+import org.apache.druid.segment.vector.VectorObjectSelector;
import org.apache.druid.segment.vector.VectorOffset;
+import org.apache.druid.segment.vector.VectorValueSelector;
import javax.annotation.Nullable;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.List;
@@ -61,12 +84,22 @@ public QueryableIndexCursorFactory(QueryableIndex index)
public CursorHolder makeCursorHolder(CursorBuildSpec spec)
{
QueryableProjection projection = index.getProjection(spec);
- if (projection == null) {
- // no projections, create regular cursor holder
- return new QueryableIndexCursorHolder(index, spec, timeBoundaryInspector);
+ if (projection != null) {
+ return makeAggregateProjectionCursorHolder(projection);
}
- // create projection cursor holder
+ // Cluster-group dispatch runs after aggregate-projection match, before the regular base-table fallback
+ final ClusteredValueGroupsBaseTableSchema clusterSummary = index.getClusteredBaseSummary();
+ if (clusterSummary != null) {
+ return makeClusteredCursorHolder(spec, clusterSummary);
+ }
+
+ // No projections, no clustering, regular full-segment cursor.
+ return new QueryableIndexCursorHolder(index, spec, timeBoundaryInspector);
+ }
+
+ private CursorHolder makeAggregateProjectionCursorHolder(QueryableProjection projection)
+ {
return new QueryableIndexCursorHolder(
projection.getRowSelector(),
projection.getCursorBuildSpec(),
@@ -110,9 +143,328 @@ public List getAggregatorsForPreAggregated()
};
}
+ private CursorHolder makeClusteredCursorHolder(CursorBuildSpec spec, ClusteredValueGroupsBaseTableSchema clusterSummary)
+ {
+ final List matching = Projections.pruneClusterGroups(
+ new ArrayList<>(index.getClusterGroupSchemas()),
+ spec.getFilter(),
+ spec.getVirtualColumns()
+ );
+
+ if (matching.isEmpty()) {
+ return EmptyClusteredCursorHolder.INSTANCE;
+ }
+
+ if (matching.size() == 1) {
+ return makeSingleGroupClusteredCursorHolder(spec, matching.get(0));
+ }
+ return makeMultiGroupClusteredCursorHolder(spec, matching);
+ }
+
+ private CursorHolder makeSingleGroupClusteredCursorHolder(
+ CursorBuildSpec spec,
+ TableClusterGroupSpec valueGroup
+ )
+ {
+ final QueryableIndex groupIndex = index.getClusterGroupQueryableIndex(valueGroup);
+ if (groupIndex == null) {
+ throw DruidException.defensive(
+ "No cluster-group sub-index resolvable for clustering values "
+ + Arrays.toString(valueGroup.lookupClusteringValues())
+ );
+ }
+
+ return new QueryableIndexCursorHolder(
+ groupIndex,
+ spec,
+ QueryableIndexTimeBoundaryInspector.create(groupIndex)
+ )
+ {
+ @Override
+ protected ColumnSelectorFactory makeColumnSelectorFactoryForOffset(
+ ColumnCache columnCache,
+ Offset baseOffset
+ )
+ {
+ return new ClusteringColumnSelectorFactory(
+ super.makeColumnSelectorFactoryForOffset(columnCache, baseOffset),
+ valueGroup.getSummary().getClusteringColumns(),
+ valueGroup.lookupClusteringValues()
+ );
+ }
+
+ @Override
+ protected VectorColumnSelectorFactory makeVectorColumnSelectorFactoryForOffset(
+ ColumnCache columnCache,
+ VectorOffset baseOffset
+ )
+ {
+ return new ClusteringVectorColumnSelectorFactory(
+ super.makeVectorColumnSelectorFactoryForOffset(columnCache, baseOffset),
+ valueGroup.getSummary().getClusteringColumns(),
+ valueGroup.lookupClusteringValues()
+ );
+ }
+ };
+ }
+
+ /**
+ * Build a cursor holder that walks multiple matching cluster groups back-to-back via
+ * {@link ConcatenatingCursor}. Each per-group {@link CursorHolder} is built lazily inside the cursor's group
+ * transition, so a query that finishes early (e.g., LIMIT-bounded) doesn't open every group's offset.
+ */
+ private CursorHolder makeMultiGroupClusteredCursorHolder(
+ CursorBuildSpec spec,
+ List matching
+ )
+ {
+ // All matching specs share the same parent summary (they came out of one segment); grab a reference for
+ // getOrdering() and clusteringColumns below.
+ final ClusteredValueGroupsBaseTableSchema clusterSummary = matching.get(0).getSummary();
+ final RowSignature clusteringColumns = clusterSummary.getClusteringColumns();
+ final List clusteringValuesByGroup = new ArrayList<>(matching.size());
+ final List> holderSuppliers = new ArrayList<>(matching.size());
+ // lifecycle management closer for per-group CursorHolders
+ final Closer closer = Closer.create();
+ for (TableClusterGroupSpec valueGroup : matching) {
+ clusteringValuesByGroup.add(valueGroup.lookupClusteringValues());
+ final QueryableIndex groupIndex = index.getClusterGroupQueryableIndex(valueGroup);
+ if (groupIndex == null) {
+ throw DruidException.defensive(
+ "No cluster-group sub-index resolvable for clustering values "
+ + Arrays.toString(valueGroup.lookupClusteringValues())
+ );
+ }
+ holderSuppliers.add(
+ Suppliers.memoize(
+ () -> closer.register(
+ new QueryableIndexCursorHolder(
+ groupIndex,
+ spec,
+ QueryableIndexTimeBoundaryInspector.create(groupIndex)
+ )
+ )
+ )
+ );
+ }
+
+ // Initial wrapper state uses the first group's clustering values + a throwing placeholder delegate. The
+ // ConcatenatingCursor immediately calls setDelegate on init (before any selector is exposed). The vector
+ // wrapper carries the query-level max vector size from the build spec, the placeholder delegate can't be
+ // queried for sizing, and the value is constant across groups anyway.
+ final int vectorSize = spec.getQueryContext().getVectorSize();
+ final ClusteringColumnSelectorFactory wrapperFactory = new ClusteringColumnSelectorFactory(
+ UNINITIALIZED_DELEGATE,
+ clusteringColumns,
+ clusteringValuesByGroup.get(0)
+ );
+ final ClusteringVectorColumnSelectorFactory vectorWrapperFactory = new ClusteringVectorColumnSelectorFactory(
+ UNINITIALIZED_VECTOR_DELEGATE,
+ clusteringColumns,
+ clusteringValuesByGroup.get(0),
+ vectorSize
+ );
+
+ final ConcatenatingCursor cursor = new ConcatenatingCursor(
+ holderSuppliers,
+ clusteringValuesByGroup,
+ wrapperFactory
+ );
+ final ConcatenatingVectorCursor vectorCursor = new ConcatenatingVectorCursor(
+ holderSuppliers,
+ clusteringValuesByGroup,
+ vectorWrapperFactory
+ );
+
+ // canVectorize() is determined by the per-group holders. Probe the first one (lazily, `Suppliers.memoize`
+ // means this opens it once and is reused by ConcatenatingVectorCursor).
+ final boolean canVectorize = holderSuppliers.get(0).get().canVectorize();
+
+ return new CursorHolder()
+ {
+ @Override
+ public Cursor asCursor()
+ {
+ return cursor;
+ }
+
+ @Override
+ public VectorCursor asVectorCursor()
+ {
+ return vectorCursor;
+ }
+
+ @Override
+ public boolean canVectorize()
+ {
+ return canVectorize;
+ }
+
+ @Override
+ public List getOrdering()
+ {
+ // Cluster groups are written in clustering-value order (writer-enforced; see ClusteredValueGroupsBaseTableSchema),
+ // and within each group rows are sorted by the segment ordering's tail (clustering prefix dropped). So
+ // back-to-back walking yields rows in the full segment ordering; the writer-side contract makes the
+ // concatenation order-preserving without any merge work at read time.
+ return clusterSummary.getOrdering();
+ }
+
+ @Override
+ public void close()
+ {
+ try {
+ closer.close();
+ }
+ catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ };
+ }
+
+ /**
+ * Placeholder delegate for the {@link ClusteringColumnSelectorFactory} constructed by
+ * {@link #makeMultiGroupClusteredCursorHolder}. Throws on any access; replaced by the concatenating cursor's
+ * lazy init before the wrapper is exposed to the caller.
+ */
+ private static final ColumnSelectorFactory UNINITIALIZED_DELEGATE = new ColumnSelectorFactory()
+ {
+ @Override
+ public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec)
+ {
+ throw DruidException.defensive("ConcatenatingCursor delegate accessed before initialization");
+ }
+
+ @Override
+ public ColumnValueSelector makeColumnValueSelector(String columnName)
+ {
+ throw DruidException.defensive("ConcatenatingCursor delegate accessed before initialization");
+ }
+
+ @Nullable
+ @Override
+ public ColumnCapabilities getColumnCapabilities(String column)
+ {
+ return null;
+ }
+ };
+
+ /**
+ * Vector counterpart of {@link #UNINITIALIZED_DELEGATE}. Replaced by
+ * {@link ConcatenatingVectorCursor}'s lazy init before the wrapper is exposed.
+ */
+ private static final VectorColumnSelectorFactory UNINITIALIZED_VECTOR_DELEGATE = new VectorColumnSelectorFactory()
+ {
+ @Override
+ public ReadableVectorInspector getReadableVectorInspector()
+ {
+ throw DruidException.defensive("ConcatenatingVectorCursor delegate accessed before initialization");
+ }
+
+ @Override
+ public SingleValueDimensionVectorSelector makeSingleValueDimensionSelector(DimensionSpec dimensionSpec)
+ {
+ throw DruidException.defensive("ConcatenatingVectorCursor delegate accessed before initialization");
+ }
+
+ @Override
+ public MultiValueDimensionVectorSelector makeMultiValueDimensionSelector(DimensionSpec dimensionSpec)
+ {
+ throw DruidException.defensive("ConcatenatingVectorCursor delegate accessed before initialization");
+ }
+
+ @Override
+ public VectorValueSelector makeValueSelector(String column)
+ {
+ throw DruidException.defensive("ConcatenatingVectorCursor delegate accessed before initialization");
+ }
+
+ @Override
+ public VectorObjectSelector makeObjectSelector(String column)
+ {
+ throw DruidException.defensive("ConcatenatingVectorCursor delegate accessed before initialization");
+ }
+
+ @Nullable
+ @Override
+ public ColumnCapabilities getColumnCapabilities(String column)
+ {
+ return null;
+ }
+ };
+
+ /**
+ * CursorHolder that yields no rows. Used when {@link Projections#pruneClusterGroups} excludes every cluster
+ * group, so the filter is provably unsatisfiable on this segment.
+ */
+ private static final class EmptyClusteredCursorHolder implements CursorHolder
+ {
+ static final EmptyClusteredCursorHolder INSTANCE = new EmptyClusteredCursorHolder();
+
+ @Override
+ public Cursor asCursor()
+ {
+ return new Cursor()
+ {
+ @Override
+ public ColumnSelectorFactory getColumnSelectorFactory()
+ {
+ throw DruidException.defensive(
+ "No column selector factory available on an empty cluster-group cursor"
+ );
+ }
+
+ @Override
+ public void advance()
+ {
+ }
+
+ @Override
+ public void advanceUninterruptibly()
+ {
+ }
+
+ @Override
+ public boolean isDone()
+ {
+ return true;
+ }
+
+ @Override
+ public boolean isDoneOrInterrupted()
+ {
+ return true;
+ }
+
+ @Override
+ public void reset()
+ {
+ }
+ };
+ }
+
+ @Override
+ public boolean canVectorize()
+ {
+ return false;
+ }
+
+ @Override
+ public List getOrdering()
+ {
+ return Collections.emptyList();
+ }
+ }
+
@Override
public RowSignature getRowSignature()
{
+ final ClusteredValueGroupsBaseTableSchema clusterSummary = index.getClusteredBaseSummary();
+ if (clusterSummary != null) {
+ return getClusteredRowSignature(clusterSummary);
+ }
+
final LinkedHashSet columns = new LinkedHashSet<>();
for (final OrderBy orderBy : index.getOrdering()) {
@@ -137,10 +489,82 @@ public RowSignature getRowSignature()
return builder.build();
}
+ /**
+ * Build the row signature for a clustered segment. Top-level columns are empty, so column types are sourced from:
+ * - the summary's clustering {@link RowSignature} for clustering columns;
+ * - the first cluster group's sub-index for everything else (all groups share the same data-column shape).
+ */
+ private RowSignature getClusteredRowSignature(ClusteredValueGroupsBaseTableSchema clusterSummary)
+ {
+ final LinkedHashSet columns = new LinkedHashSet<>();
+
+ for (final OrderBy orderBy : clusterSummary.getOrdering()) {
+ columns.add(orderBy.getColumnName());
+ }
+ columns.add(ColumnHolder.TIME_COLUMN_NAME);
+ columns.addAll(clusterSummary.getColumnNames());
+
+ final RowSignature.Builder builder = RowSignature.builder();
+ for (final String column : columns) {
+ final ColumnType columnType = resolveClusteredColumnType(column, clusterSummary);
+ if (columnType != null) {
+ builder.add(column, columnType);
+ }
+ }
+ return builder.build();
+ }
+
+ @Nullable
+ private ColumnType resolveClusteredColumnType(String column, ClusteredValueGroupsBaseTableSchema clusterSummary)
+ {
+ // 1. Clustering columns: typed RowSignature on the summary is authoritative.
+ final ColumnType clusteringType = clusterSummary.getClusteringColumns().getColumnType(column).orElse(null);
+ if (clusteringType != null) {
+ return clusteringType;
+ }
+ // 2. Data columns: ask the first cluster group's sub-index. All groups share the same column shape.
+ final List groups = index.getClusterGroupSchemas();
+ if (groups.isEmpty()) {
+ return null;
+ }
+ final QueryableIndex firstGroupIndex = index.getClusterGroupQueryableIndex(groups.get(0));
+ if (firstGroupIndex == null) {
+ return null;
+ }
+ return ColumnType.fromCapabilities(firstGroupIndex.getColumnCapabilities(column));
+ }
+
@Nullable
@Override
public ColumnCapabilities getColumnCapabilities(String column)
{
+ final ClusteredValueGroupsBaseTableSchema clusterSummary = index.getClusteredBaseSummary();
+ if (clusterSummary != null) {
+ return getClusteredColumnCapabilities(column, clusterSummary);
+ }
return index.getColumnCapabilities(column);
}
+
+ @Nullable
+ private ColumnCapabilities getClusteredColumnCapabilities(String column, ClusteredValueGroupsBaseTableSchema clusterSummary)
+ {
+ // synthesize capabilities from the typed RowSignature.
+ final ColumnType clusteringType = clusterSummary.getClusteringColumns().getColumnType(column).orElse(null);
+ if (clusteringType != null) {
+ if (clusteringType.is(ValueType.STRING)) {
+ return ColumnCapabilitiesImpl.createSimpleSingleValueStringColumnCapabilities();
+ }
+ return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(clusteringType);
+ }
+ // ask the first cluster group's sub-index.
+ final List groups = index.getClusterGroupSchemas();
+ if (groups.isEmpty()) {
+ return null;
+ }
+ final QueryableIndex firstGroupIndex = index.getClusterGroupQueryableIndex(groups.get(0));
+ if (firstGroupIndex == null) {
+ return null;
+ }
+ return firstGroupIndex.getColumnCapabilities(column);
+ }
}
diff --git a/processing/src/main/java/org/apache/druid/segment/SimpleQueryableIndex.java b/processing/src/main/java/org/apache/druid/segment/SimpleQueryableIndex.java
index e6b1ab0c29c1..33386c77eb90 100644
--- a/processing/src/main/java/org/apache/druid/segment/SimpleQueryableIndex.java
+++ b/processing/src/main/java/org/apache/druid/segment/SimpleQueryableIndex.java
@@ -29,14 +29,17 @@
import com.google.common.collect.Maps;
import it.unimi.dsi.fastutil.objects.ObjectAVLTreeSet;
import org.apache.druid.collections.bitmap.BitmapFactory;
+import org.apache.druid.error.DruidException;
import org.apache.druid.query.OrderBy;
import org.apache.druid.segment.column.BaseColumnHolder;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.data.Indexed;
import org.apache.druid.segment.data.ListIndexed;
import org.apache.druid.segment.file.SegmentFileMapper;
+import org.apache.druid.segment.projections.ClusteredValueGroupsBaseTableSchema;
import org.apache.druid.segment.projections.Projections;
import org.apache.druid.segment.projections.QueryableProjection;
+import org.apache.druid.segment.projections.TableClusterGroupSpec;
import org.joda.time.Interval;
import javax.annotation.Nullable;
@@ -63,6 +66,9 @@ public abstract class SimpleQueryableIndex implements QueryableIndex
private final Map projectionsMap;
private final SortedSet projections;
private final Map>> projectionColumns;
+ @Nullable
+ private final ClusteredValueGroupsBaseTableSchema clusteredBaseSummary;
+ private final List>> clusterGroupColumns;
private final SegmentFileMapper fileMapper;
private final Supplier> dimensionHandlers;
@@ -74,7 +80,7 @@ public SimpleQueryableIndex(
SegmentFileMapper fileMapper
)
{
- this(dataInterval, dimNames, bitmapFactory, columns, fileMapper, null, null);
+ this(dataInterval, dimNames, bitmapFactory, columns, fileMapper, null, null, null, null);
}
public SimpleQueryableIndex(
@@ -87,7 +93,26 @@ public SimpleQueryableIndex(
@Nullable Map>> projectionColumns
)
{
- Preconditions.checkNotNull(columns.get(ColumnHolder.TIME_COLUMN_NAME));
+ this(dataInterval, dimNames, bitmapFactory, columns, fileMapper, metadata, projectionColumns, null, null);
+ }
+
+ public SimpleQueryableIndex(
+ Interval dataInterval,
+ Indexed dimNames,
+ BitmapFactory bitmapFactory,
+ Map> columns,
+ SegmentFileMapper fileMapper,
+ @Nullable Metadata metadata,
+ @Nullable Map>> projectionColumns,
+ @Nullable ClusteredValueGroupsBaseTableSchema clusteredBaseSummary,
+ @Nullable List>> clusterGroupColumns
+ )
+ {
+ // For clustered base tables, the top-level columns map is empty; all column data lives under per-cluster-group
+ // entries in clusterGroupColumns. For all other schema shapes, __time must be present in the top-level columns map
+ if (!columns.isEmpty()) {
+ Preconditions.checkNotNull(columns.get(ColumnHolder.TIME_COLUMN_NAME));
+ }
this.dataInterval = Preconditions.checkNotNull(dataInterval, "dataInterval");
ImmutableList.Builder columnNamesBuilder = ImmutableList.builder();
LinkedHashSet dimsFirst = new LinkedHashSet<>();
@@ -107,6 +132,10 @@ public SimpleQueryableIndex(
this.fileMapper = fileMapper;
this.projectionColumns = projectionColumns == null ? Collections.emptyMap() : projectionColumns;
+ this.clusteredBaseSummary = clusteredBaseSummary;
+ this.clusterGroupColumns = clusterGroupColumns == null
+ ? Collections.emptyList()
+ : List.copyOf(clusterGroupColumns);
this.dimensionHandlers = Suppliers.memoize(() -> initDimensionHandlers(availableDimensions));
if (metadata != null) {
@@ -127,7 +156,7 @@ public SimpleQueryableIndex(
this.projections = Collections.emptySortedSet();
}
} else {
- // When sort order isn't set in metadata.drd, assume the segment is sorted by __time.
+ // When sort order isn't available from metadata, assume the segment is sorted by __time.
this.ordering = Cursors.ascendingTimeOrder();
this.projections = Collections.emptySortedSet();
this.projectionsMap = Collections.emptyMap();
@@ -184,6 +213,73 @@ public Map> getColumns()
return columns;
}
+ /**
+ * Returns the {@link ClusteredValueGroupsBaseTableSchema} summary entry if this index is for a clustered segment, or null for
+ * a non-clustered segment. The summary owns segment-wide clustering config (clustering column signature, shared-
+ * column markers, naming-scheme version)
+ */
+ @Override
+ @Nullable
+ public ClusteredValueGroupsBaseTableSchema getClusteredBaseSummary()
+ {
+ return clusteredBaseSummary;
+ }
+
+ /**
+ * Returns the cluster groups nested in this index's summary, in their original order. Empty for a non-clustered
+ * segment. Used by query-time dispatch to enumerate groups and feed {@link Projections#pruneClusterGroups}.
+ */
+ @Override
+ public List getClusterGroupSchemas()
+ {
+ return clusteredBaseSummary == null ? Collections.emptyList() : clusteredBaseSummary.getClusterGroups();
+ }
+
+ /**
+ * Returns a {@link QueryableIndex} sub-view scoped to a single cluster group's column data. Mirrors
+ * {@link #getProjectionQueryableIndex(String)} but for cluster groups: addressed by reference to the spec, not by
+ * name. The returned index's columns are the group's per-group columns; clustering columns are NOT present in
+ * the returned index, they're injected at the cursor-factory level via {@code ClusteringColumnSelectorFactory}.
+ */
+ @Override
+ public QueryableIndex getClusterGroupQueryableIndex(TableClusterGroupSpec groupSpec)
+ {
+ if (clusteredBaseSummary == null) {
+ throw DruidException.defensive("getClusterGroupQueryableIndex called on a non-clustered segment");
+ }
+ final List groups = clusteredBaseSummary.getClusterGroups();
+ final int index = groups.indexOf(groupSpec);
+ if (index < 0) {
+ throw DruidException.defensive("Cluster group spec is not part of this segment");
+ }
+ final Map> groupColumns = clusterGroupColumns.get(index);
+ final Metadata groupMetadata = new Metadata(
+ null,
+ clusteredBaseSummary.getAggregators(),
+ null,
+ clusteredBaseSummary.getEffectiveGranularity(),
+ false,
+ clusteredBaseSummary.getGroupOrdering(),
+ null
+ );
+ return new SimpleQueryableIndex(
+ dataInterval,
+ new ListIndexed<>(clusteredBaseSummary.getGroupDimensionNames()),
+ bitmapFactory,
+ groupColumns,
+ fileMapper,
+ groupMetadata,
+ null
+ )
+ {
+ @Override
+ public Metadata getMetadata()
+ {
+ return groupMetadata;
+ }
+ };
+ }
+
@VisibleForTesting
public SegmentFileMapper getFileMapper()
{
diff --git a/processing/src/main/java/org/apache/druid/segment/projections/ClusteredValueGroupsBaseTableSchema.java b/processing/src/main/java/org/apache/druid/segment/projections/ClusteredValueGroupsBaseTableSchema.java
new file mode 100644
index 000000000000..e90502edc3f4
--- /dev/null
+++ b/processing/src/main/java/org/apache/druid/segment/projections/ClusteredValueGroupsBaseTableSchema.java
@@ -0,0 +1,410 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.projections;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.common.collect.Lists;
+import org.apache.druid.error.DruidException;
+import org.apache.druid.java.util.common.granularity.Granularities;
+import org.apache.druid.java.util.common.granularity.Granularity;
+import org.apache.druid.query.OrderBy;
+import org.apache.druid.query.aggregation.AggregatorFactory;
+import org.apache.druid.segment.AggregateProjectionMetadata;
+import org.apache.druid.segment.Metadata;
+import org.apache.druid.segment.VirtualColumn;
+import org.apache.druid.segment.VirtualColumns;
+import org.apache.druid.segment.column.ColumnHolder;
+import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.column.RowSignature;
+import org.apache.druid.utils.CollectionUtils;
+
+import javax.annotation.Nullable;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Objects;
+import java.util.Set;
+
+/**
+ * Top-level summary for a clustered base table whose groups are identified by discrete clustering-value tuples. Each
+ * tuple group is internally stored as a separate table without storing the cluster columns, which are pulled into this
+ * metadata. This is optimizing for use cases which typically only need to read from a single group via filters present
+ * on a query. Cluster groups nest inside as {@link #getClusterGroups()}; their column data live in the V10 segment
+ * file under dictionary-id-tuple prefixes ({@code __base$_.../ }), where the ids index into
+ * {@link #getClusteringDictionaries()}.
+ */
+public class ClusteredValueGroupsBaseTableSchema implements BaseTableProjectionSchema
+{
+ public static final String TYPE_NAME = "clustered-value-groups-base-table";
+
+ private final VirtualColumns virtualColumns;
+ private final List columnNames;
+ private final AggregatorFactory[] aggregators;
+ private final List ordering;
+ private final RowSignature clusteringColumns;
+ private final List sharedColumns;
+ private final ClusteringDictionaries clusteringDictionaries;
+ private final List clusterGroups;
+
+ // computed
+ private final int timeColumnPosition;
+ private final Granularity effectiveGranularity;
+
+ @JsonCreator
+ public ClusteredValueGroupsBaseTableSchema(
+ @JsonProperty("virtualColumns") VirtualColumns virtualColumns,
+ @JsonProperty("columns") List columns,
+ @JsonProperty("aggregators") @Nullable AggregatorFactory[] aggregators,
+ @JsonProperty("ordering") List ordering,
+ @JsonProperty("clusteringColumns") RowSignature clusteringColumns,
+ @JsonProperty("sharedColumns") @Nullable List sharedColumns,
+ @JsonProperty("clusteringDictionaries") @Nullable ClusteringDictionaries clusteringDictionaries,
+ @JsonProperty("clusterGroups") @Nullable List clusterGroups
+ )
+ {
+ if (CollectionUtils.isNullOrEmpty(columns)) {
+ throw DruidException.defensive("clustered base table schema columns must not be null or empty");
+ }
+ if (ordering == null) {
+ throw DruidException.defensive("clustered base table schema ordering must not be null");
+ }
+ if (clusteringColumns == null || clusteringColumns.size() == 0) {
+ throw DruidException.defensive(
+ "clustered base table schema clusteringColumns must not be null or empty"
+ );
+ }
+ if (ordering.size() < clusteringColumns.size()) {
+ throw DruidException.defensive(
+ "ordering size [%s] must be at least clusteringColumns size [%s] (clustering columns must form a prefix"
+ + " of the segment ordering)",
+ ordering.size(),
+ clusteringColumns.size()
+ );
+ }
+ for (int i = 0; i < clusteringColumns.size(); i++) {
+ final String clusteringColumn = clusteringColumns.getColumnName(i);
+ if (!columns.contains(clusteringColumn)) {
+ throw DruidException.defensive(
+ "clusteringColumn [%s] must appear in columns of the clustered base table summary",
+ clusteringColumn
+ );
+ }
+ final ColumnType type = clusteringColumns.getColumnType(i).orElse(null);
+ if (!Projections.isAllowedClusteringType(type)) {
+ throw DruidException.defensive(
+ "clustering column [%s] has unsupported type [%s]; allowed types are STRING, LONG, DOUBLE, FLOAT",
+ clusteringColumn,
+ type
+ );
+ }
+ // Per-group ordering is derived by dropping this prefix; pruning + cursor concatenation rely on it.
+ final String orderingColumn = ordering.get(i).getColumnName();
+ if (!clusteringColumn.equals(orderingColumn)) {
+ throw DruidException.defensive(
+ "clustering column at position [%s] is [%s] but the segment ordering at the same position is [%s];"
+ + " clustering columns must form a prefix of the segment ordering",
+ i,
+ clusteringColumn,
+ orderingColumn
+ );
+ }
+ }
+ final List resolvedSharedColumns = sharedColumns == null ? List.of() : sharedColumns;
+ for (String shared : resolvedSharedColumns) {
+ if (!columns.contains(shared)) {
+ throw DruidException.defensive(
+ "sharedColumn [%s] must appear in columns of the clustered base table summary",
+ shared
+ );
+ }
+ }
+ this.virtualColumns = virtualColumns == null ? VirtualColumns.EMPTY : virtualColumns;
+ this.columnNames = columns;
+ this.aggregators = aggregators == null ? new AggregatorFactory[0] : aggregators;
+ this.ordering = ordering;
+ this.clusteringColumns = clusteringColumns;
+ this.sharedColumns = resolvedSharedColumns;
+ this.clusterGroups = clusterGroups == null ? List.of() : List.copyOf(clusterGroups);
+ this.clusteringDictionaries = clusteringDictionaries == null
+ ? ClusteringDictionaries.EMPTY
+ : clusteringDictionaries;
+
+ int foundTimePosition = -1;
+ Granularity granularity = null;
+ for (int i = 0; i < ordering.size(); i++) {
+ OrderBy orderBy = ordering.get(i);
+ if (orderBy.getColumnName().equals(ColumnHolder.TIME_COLUMN_NAME)) {
+ foundTimePosition = i;
+ final VirtualColumn vc = this.virtualColumns.getVirtualColumn(Granularities.GRANULARITY_VIRTUAL_COLUMN_NAME);
+ if (vc != null) {
+ granularity = Granularities.fromVirtualColumn(vc);
+ } else {
+ granularity = Granularities.NONE;
+ }
+ }
+ }
+ if (granularity == null) {
+ throw DruidException.defensive(
+ "clustered base table doesn't have a [%s] column?",
+ ColumnHolder.TIME_COLUMN_NAME
+ );
+ }
+ this.timeColumnPosition = foundTimePosition;
+ this.effectiveGranularity = granularity;
+
+ // Specs always start unwired: there's a chicken-and-egg between the summary and its specs, resolved by
+ // deferring all summary-dependent state on the spec to setSummary, which we invoke here once the summary's
+ // own state is populated.
+ for (TableClusterGroupSpec spec : this.clusterGroups) {
+ spec.setSummary(this);
+ }
+ }
+
+ @JsonIgnore
+ @Override
+ public List getColumnNames()
+ {
+ List columns = new ArrayList<>(columnNames.size() + aggregators.length);
+ columns.addAll(columnNames);
+ for (AggregatorFactory aggregator : aggregators) {
+ columns.add(aggregator.getName());
+ }
+ return columns;
+ }
+
+ @JsonProperty
+ @Override
+ public VirtualColumns getVirtualColumns()
+ {
+ return virtualColumns;
+ }
+
+ @JsonProperty
+ public List getColumns()
+ {
+ return columnNames;
+ }
+
+ @JsonProperty
+ @JsonInclude(JsonInclude.Include.NON_DEFAULT)
+ public AggregatorFactory[] getAggregators()
+ {
+ return aggregators;
+ }
+
+ @JsonProperty
+ @Override
+ public List getOrdering()
+ {
+ return ordering;
+ }
+
+ @JsonProperty
+ public RowSignature getClusteringColumns()
+ {
+ return clusteringColumns;
+ }
+
+ /**
+ * Columns which have common data stored once under {@link Projections#BASE_TABLE_PROJECTION_NAME} and shared by
+ * all cluster-group entries
+ */
+ @JsonProperty
+ @JsonInclude(JsonInclude.Include.NON_EMPTY)
+ public List getSharedColumns()
+ {
+ return sharedColumns;
+ }
+
+ /**
+ * Per-type clustering value dictionaries; see {@link ClusteringDictionaries} for routing semantics.
+ */
+ @JsonProperty
+ public ClusteringDictionaries getClusteringDictionaries()
+ {
+ return clusteringDictionaries;
+ }
+
+ /**
+ * Materialize the typed value at clustering position {@code clusteringColumnIndex} with dictionary
+ * {@code dictionaryId}.
+ */
+ Object lookupClusteringValue(int clusteringColumnIndex, int dictionaryId)
+ {
+ return clusteringDictionaries.lookupValue(
+ clusteringColumns.getColumnType(clusteringColumnIndex).orElseThrow(),
+ dictionaryId
+ );
+ }
+
+ /**
+ * The cluster groups nested in this summary, in clustering-value sort order. Walking groups back-to-back in
+ * this order yields rows in the segment's full declared ordering (clustering values monotonically advance
+ * across groups; within each group, rows follow the segment ordering with the clustering prefix dropped).
+ */
+ @JsonProperty
+ @JsonInclude(JsonInclude.Include.NON_EMPTY)
+ public List getClusterGroups()
+ {
+ return clusterGroups;
+ }
+
+ /**
+ * Per-group sort order: the segment ordering with the clustering-column prefix dropped.
+ */
+ @JsonIgnore
+ public List getGroupOrdering()
+ {
+ return ordering.subList(clusteringColumns.size(), ordering.size());
+ }
+
+ /**
+ * Per-group column names: this summary's full column list (including aggregator names) minus the clustering columns.
+ */
+ @JsonIgnore
+ public List getGroupColumnNames()
+ {
+ final Set clusteringNames = new HashSet<>(clusteringColumns.getColumnNames());
+ final List all = getColumnNames();
+ final List result = new ArrayList<>(all.size());
+ for (String c : all) {
+ if (!clusteringNames.contains(c)) {
+ result.add(c);
+ }
+ }
+ return result;
+ }
+
+ /**
+ * Per-group dimension names: {@link #getDimensionNames()} minus the clustering columns.
+ */
+ @JsonIgnore
+ public List getGroupDimensionNames()
+ {
+ final Set clusteringNames = new HashSet<>(clusteringColumns.getColumnNames());
+ final List dims = getDimensionNames();
+ final List result = new ArrayList<>(dims.size());
+ for (String d : dims) {
+ if (!clusteringNames.contains(d)) {
+ result.add(d);
+ }
+ }
+ return result;
+ }
+
+ @JsonIgnore
+ @Override
+ public int getTimeColumnPosition()
+ {
+ return timeColumnPosition;
+ }
+
+ @JsonIgnore
+ @Override
+ public Granularity getEffectiveGranularity()
+ {
+ return effectiveGranularity;
+ }
+
+ @JsonIgnore
+ @Override
+ public List getDimensionNames()
+ {
+ if (timeColumnPosition == 0) {
+ return columnNames.subList(1, columnNames.size());
+ }
+ final List dimsWithoutTime = Lists.newArrayListWithCapacity(columnNames.size() - 1);
+ for (String column : columnNames) {
+ if (ColumnHolder.TIME_COLUMN_NAME.equals(column)) {
+ continue;
+ }
+ dimsWithoutTime.add(column);
+ }
+ return dimsWithoutTime;
+ }
+
+ @Override
+ public Metadata asMetadata(@Nullable List projections)
+ {
+ return new Metadata(
+ null,
+ aggregators,
+ null,
+ effectiveGranularity,
+ false,
+ ordering,
+ projections
+ );
+ }
+
+ @Override
+ public boolean equals(Object o)
+ {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ ClusteredValueGroupsBaseTableSchema that = (ClusteredValueGroupsBaseTableSchema) o;
+ return Objects.equals(virtualColumns, that.virtualColumns)
+ && Objects.equals(columnNames, that.columnNames)
+ && Objects.deepEquals(aggregators, that.aggregators)
+ && Objects.equals(ordering, that.ordering)
+ && Objects.equals(clusteringColumns, that.clusteringColumns)
+ && Objects.equals(sharedColumns, that.sharedColumns)
+ && Objects.equals(clusteringDictionaries, that.clusteringDictionaries)
+ && Objects.equals(clusterGroups, that.clusterGroups);
+ }
+
+ @Override
+ public int hashCode()
+ {
+ return Objects.hash(
+ virtualColumns,
+ columnNames,
+ Arrays.hashCode(aggregators),
+ ordering,
+ clusteringColumns,
+ sharedColumns,
+ clusteringDictionaries,
+ clusterGroups
+ );
+ }
+
+ @Override
+ public String toString()
+ {
+ return "ClusteredValueGroupsBaseTableSchema{" +
+ "virtualColumns=" + virtualColumns +
+ ", columnNames=" + columnNames +
+ ", aggregators=" + Arrays.toString(aggregators) +
+ ", ordering=" + ordering +
+ ", clusteringColumns=" + clusteringColumns +
+ ", sharedColumns=" + sharedColumns +
+ ", clusteringDictionaries=" + clusteringDictionaries +
+ ", clusterGroups=" + clusterGroups +
+ '}';
+ }
+}
diff --git a/processing/src/main/java/org/apache/druid/segment/projections/ClusteringColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/projections/ClusteringColumnSelectorFactory.java
new file mode 100644
index 000000000000..a48610278db1
--- /dev/null
+++ b/processing/src/main/java/org/apache/druid/segment/projections/ClusteringColumnSelectorFactory.java
@@ -0,0 +1,566 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.projections;
+
+import org.apache.druid.error.DruidException;
+import org.apache.druid.math.expr.ExprEval;
+import org.apache.druid.math.expr.ExpressionType;
+import org.apache.druid.query.dimension.DimensionSpec;
+import org.apache.druid.query.filter.DruidPredicateFactory;
+import org.apache.druid.query.filter.ValueMatcher;
+import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
+import org.apache.druid.segment.ColumnSelectorFactory;
+import org.apache.druid.segment.ColumnValueSelector;
+import org.apache.druid.segment.ConstantExprEvalSelector;
+import org.apache.druid.segment.DimensionSelector;
+import org.apache.druid.segment.IdLookup;
+import org.apache.druid.segment.RowIdSupplier;
+import org.apache.druid.segment.column.ColumnCapabilities;
+import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
+import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.column.RowSignature;
+import org.apache.druid.segment.column.ValueType;
+import org.apache.druid.segment.data.IndexedInts;
+
+import javax.annotation.Nullable;
+import java.util.function.Supplier;
+
+/**
+ * {@link ColumnSelectorFactory} wrapper that intercepts requests for clustering columns and returns selectors
+ * carrying the group's constant value, while delegating all other column lookups to a wrapped factory. This is the
+ * mechanism by which a cluster group's clustering columns, which are NOT stored in the per-group column data since
+ * they're constant across the group, are made visible to query engines as if they were ordinary columns.
+ */
+public class ClusteringColumnSelectorFactory implements ColumnSelectorFactory
+{
+ private final RowSignature clusteringColumns;
+ private ColumnSelectorFactory delegate;
+ private Object[] clusteringValues;
+ // Bumped on every setDelegate(...) so per-call selector wrappers can detect group transitions and rebuild their
+ // cached inner state
+ private long generation;
+
+ public ClusteringColumnSelectorFactory(
+ ColumnSelectorFactory delegate,
+ RowSignature clusteringColumns,
+ Object[] clusteringValues
+ )
+ {
+ this.clusteringColumns = clusteringColumns;
+ setDelegate(delegate, clusteringValues);
+ }
+
+ /**
+ * Update the underlying factory and the constant values for the current cluster group. Called by a multi-group
+ * concatenating cursor on each group transition. Selectors previously returned by this factory will, on their next
+ * invocation, observe the updated state; see the per-call indirection in the inner selector classes.
+ */
+ public void setDelegate(ColumnSelectorFactory delegate, Object[] clusteringValues)
+ {
+ if (clusteringValues == null || clusteringValues.length != clusteringColumns.size()) {
+ throw DruidException.defensive(
+ "clusteringValues length [%s] must match clusteringColumns size [%s]",
+ clusteringValues == null ? "null" : clusteringValues.length,
+ clusteringColumns.size()
+ );
+ }
+ this.delegate = delegate;
+ this.clusteringValues = clusteringValues;
+ this.generation++;
+ }
+
+ ColumnSelectorFactory getDelegate()
+ {
+ return delegate;
+ }
+
+ long getGeneration()
+ {
+ return generation;
+ }
+
+ @Override
+ public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec)
+ {
+ final int idx = clusteringColumns.indexOf(dimensionSpec.getDimension());
+ if (idx < 0) {
+ return new DelegatingDimensionSelector(this, dimensionSpec);
+ }
+ return new ClusteringDimensionSelector(this, idx, dimensionSpec);
+ }
+
+ @Override
+ public ColumnValueSelector makeColumnValueSelector(String columnName)
+ {
+ final int idx = clusteringColumns.indexOf(columnName);
+ if (idx < 0) {
+ return new DelegatingColumnValueSelector(this, columnName);
+ }
+ return new ClusteringColumnValueSelector(this, idx, clusteringColumns.getColumnType(idx).orElseThrow());
+ }
+
+ @Nullable
+ @Override
+ public ColumnCapabilities getColumnCapabilities(String column)
+ {
+ final int idx = clusteringColumns.indexOf(column);
+ if (idx < 0) {
+ return delegate.getColumnCapabilities(column);
+ }
+ final ColumnType type = clusteringColumns.getColumnType(idx).orElseThrow();
+ if (type.is(ValueType.STRING)) {
+ return ColumnCapabilitiesImpl.createSimpleSingleValueStringColumnCapabilities();
+ }
+ return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(type);
+ }
+
+ @Nullable
+ @Override
+ public RowIdSupplier getRowIdSupplier()
+ {
+ return delegate.getRowIdSupplier();
+ }
+
+ Object currentValue(int idx)
+ {
+ return clusteringValues[idx];
+ }
+
+ /**
+ * Dimension selector for a clustering column. Delegates the value lookup back to the parent factory each call so
+ * that group transitions (which mutate the parent's clustering values) are observed immediately. Internally
+ * decorates a {@link DimensionSelector#constant(String)} re-built when the underlying value changes.
+ */
+ private static final class ClusteringDimensionSelector implements DimensionSelector
+ {
+ private final ClusteringColumnSelectorFactory parent;
+ private final int idx;
+ private final DimensionSpec spec;
+ private DimensionSelector cachedSelector;
+ private long cachedGeneration = -1;
+
+ private ClusteringDimensionSelector(ClusteringColumnSelectorFactory parent, int idx, DimensionSpec spec)
+ {
+ this.parent = parent;
+ this.idx = idx;
+ this.spec = spec;
+ }
+
+ private DimensionSelector currentSelector()
+ {
+ final long currentGeneration = parent.getGeneration();
+ if (cachedGeneration == currentGeneration) {
+ return cachedSelector;
+ }
+ final Object raw = parent.currentValue(idx);
+ final String stringValue = raw == null ? null : String.valueOf(raw);
+ cachedSelector = DimensionSelector.constant(stringValue, spec.getExtractionFn());
+ cachedGeneration = currentGeneration;
+ return cachedSelector;
+ }
+
+ @Override
+ public IndexedInts getRow()
+ {
+ return currentSelector().getRow();
+ }
+
+ @Override
+ public ValueMatcher makeValueMatcher(@Nullable String value)
+ {
+ // Generation-aware: rebuild the matcher when the clustering value itself changes (group transition).
+ return new ClusteringValueMatcher(() -> currentSelector().makeValueMatcher(value));
+ }
+
+ @Override
+ public ValueMatcher makeValueMatcher(
+ DruidPredicateFactory predicateFactory
+ )
+ {
+ return new ClusteringValueMatcher(() -> currentSelector().makeValueMatcher(predicateFactory));
+ }
+
+ /**
+ * Generation-aware matcher for the clustering-column path. The constant value itself changes between groups,
+ * so a held matcher must re-resolve from the current per-generation constant selector.
+ */
+ private final class ClusteringValueMatcher implements ValueMatcher
+ {
+ private final Supplier factory;
+ private long cachedGeneration = -1;
+ private ValueMatcher cachedMatcher;
+
+ private ClusteringValueMatcher(Supplier factory)
+ {
+ this.factory = factory;
+ }
+
+ private ValueMatcher current()
+ {
+ final long gen = parent.getGeneration();
+ if (cachedGeneration != gen) {
+ cachedMatcher = factory.get();
+ cachedGeneration = gen;
+ }
+ return cachedMatcher;
+ }
+
+ @Override
+ public boolean matches(boolean includeUnknown)
+ {
+ return current().matches(includeUnknown);
+ }
+
+ @Override
+ public void inspectRuntimeShape(RuntimeShapeInspector inspector)
+ {
+ inspector.visit("clustering-matcher", idx);
+ }
+ }
+
+ @Override
+ public int getValueCardinality()
+ {
+ return currentSelector().getValueCardinality();
+ }
+
+ @Nullable
+ @Override
+ public String lookupName(int id)
+ {
+ return currentSelector().lookupName(id);
+ }
+
+ @Override
+ public boolean nameLookupPossibleInAdvance()
+ {
+ return currentSelector().nameLookupPossibleInAdvance();
+ }
+
+ @Nullable
+ @Override
+ public IdLookup idLookup()
+ {
+ return currentSelector().idLookup();
+ }
+
+ @Nullable
+ @Override
+ public Object getObject()
+ {
+ return currentSelector().getObject();
+ }
+
+ @Override
+ public Class> classOfObject()
+ {
+ return currentSelector().classOfObject();
+ }
+
+ @Override
+ public void inspectRuntimeShape(RuntimeShapeInspector inspector)
+ {
+ inspector.visit("clusteringIndex", idx);
+ }
+ }
+
+ /**
+ * Value selector for a clustering column. Caches a {@link ConstantExprEvalSelector} built from the group's typed
+ * value; on group transition, the cache rebuilds against the new value. Mirrors
+ * {@code EvalUnwrappingColumnValueSelector} in {@code ExpressionSelectors}.
+ */
+ private static final class ClusteringColumnValueSelector implements ColumnValueSelector
+ {
+ private final ClusteringColumnSelectorFactory parent;
+ private final int idx;
+ private final ExpressionType expressionType;
+ private long cachedGeneration = -1;
+ private ConstantExprEvalSelector cachedSelector;
+
+ private ClusteringColumnValueSelector(ClusteringColumnSelectorFactory parent, int idx, ColumnType columnType)
+ {
+ this.parent = parent;
+ this.idx = idx;
+ this.expressionType = ExpressionType.fromColumnTypeStrict(columnType);
+ }
+
+ private ConstantExprEvalSelector currentSelector()
+ {
+ final long currentGeneration = parent.getGeneration();
+ if (cachedGeneration == currentGeneration) {
+ return cachedSelector;
+ }
+ cachedSelector = new ConstantExprEvalSelector(ExprEval.ofType(expressionType, parent.currentValue(idx)));
+ cachedGeneration = currentGeneration;
+ return cachedSelector;
+ }
+
+ @Override
+ public double getDouble()
+ {
+ return currentSelector().getDouble();
+ }
+
+ @Override
+ public float getFloat()
+ {
+ return currentSelector().getFloat();
+ }
+
+ @Override
+ public long getLong()
+ {
+ return currentSelector().getLong();
+ }
+
+ @Override
+ public boolean isNull()
+ {
+ return currentSelector().isNull();
+ }
+
+ @Nullable
+ @Override
+ public Object getObject()
+ {
+ return currentSelector().getObject().value();
+ }
+
+ @Override
+ public Class classOfObject()
+ {
+ return Object.class;
+ }
+
+ @Override
+ public void inspectRuntimeShape(RuntimeShapeInspector inspector)
+ {
+ inspector.visit("clusteringIndex", idx);
+ }
+ }
+
+ /**
+ * Stable {@link DimensionSelector} for a non-clustering column. Each call delegates to the parent factory's
+ * current underlying delegate; on group transition, the cached inner selector is rebuilt against the new delegate.
+ * For the single-group case, the cache fills once and never invalidates.
+ *
+ * Value matchers returned by {@link #makeValueMatcher(String)} /
+ * {@link #makeValueMatcher(DruidPredicateFactory)} are also generation-aware; they re-resolve their inner matcher
+ * from the current delegate on group transition, so callers that hold a matcher across transitions observe the
+ * new group's data the same way the selector itself does.
+ */
+ private static final class DelegatingDimensionSelector implements DimensionSelector
+ {
+ private final ClusteringColumnSelectorFactory parent;
+ private final DimensionSpec spec;
+ private long cachedGeneration = -1;
+ private DimensionSelector cachedInner;
+
+ private DelegatingDimensionSelector(ClusteringColumnSelectorFactory parent, DimensionSpec spec)
+ {
+ this.parent = parent;
+ this.spec = spec;
+ }
+
+ private DimensionSelector currentInner()
+ {
+ final long currentGeneration = parent.getGeneration();
+ if (cachedGeneration != currentGeneration) {
+ cachedInner = parent.getDelegate().makeDimensionSelector(spec);
+ cachedGeneration = currentGeneration;
+ }
+ return cachedInner;
+ }
+
+ @Override
+ public IndexedInts getRow()
+ {
+ return currentInner().getRow();
+ }
+
+ @Override
+ public ValueMatcher makeValueMatcher(@Nullable String value)
+ {
+ return new DelegatingValueMatcher(() -> currentInner().makeValueMatcher(value));
+ }
+
+ @Override
+ public ValueMatcher makeValueMatcher(DruidPredicateFactory predicateFactory)
+ {
+ return new DelegatingValueMatcher(() -> currentInner().makeValueMatcher(predicateFactory));
+ }
+
+ /**
+ * Generation-aware {@link ValueMatcher}: re-resolves its inner matcher from the current
+ * {@link DimensionSelector} on each group transition. Non-static inner class so it can read the outer
+ * selector's {@code parent.getGeneration()} and trigger a rebuild via the supplier.
+ */
+ private final class DelegatingValueMatcher implements ValueMatcher
+ {
+ private final Supplier factory;
+ private long cachedGeneration = -1;
+ private ValueMatcher cachedMatcher;
+
+ private DelegatingValueMatcher(Supplier factory)
+ {
+ this.factory = factory;
+ }
+
+ private ValueMatcher current()
+ {
+ final long gen = parent.getGeneration();
+ if (cachedGeneration != gen) {
+ cachedMatcher = factory.get();
+ cachedGeneration = gen;
+ }
+ return cachedMatcher;
+ }
+
+ @Override
+ public boolean matches(boolean includeUnknown)
+ {
+ return current().matches(includeUnknown);
+ }
+
+ @Override
+ public void inspectRuntimeShape(RuntimeShapeInspector inspector)
+ {
+ inspector.visit("delegating-matcher", spec.getDimension());
+ }
+ }
+
+ @Override
+ public int getValueCardinality()
+ {
+ return currentInner().getValueCardinality();
+ }
+
+ @Nullable
+ @Override
+ public String lookupName(int id)
+ {
+ return currentInner().lookupName(id);
+ }
+
+ @Override
+ public boolean nameLookupPossibleInAdvance()
+ {
+ return currentInner().nameLookupPossibleInAdvance();
+ }
+
+ @Nullable
+ @Override
+ public IdLookup idLookup()
+ {
+ return currentInner().idLookup();
+ }
+
+ @Nullable
+ @Override
+ public Object getObject()
+ {
+ return currentInner().getObject();
+ }
+
+ @Override
+ public Class> classOfObject()
+ {
+ return currentInner().classOfObject();
+ }
+
+ @Override
+ public void inspectRuntimeShape(RuntimeShapeInspector inspector)
+ {
+ inspector.visit("delegating", spec.getDimension());
+ }
+ }
+
+ /**
+ * Stable {@link ColumnValueSelector} for a non-clustering column. Same delegating-with-generation-cache pattern as
+ * {@link DelegatingDimensionSelector}.
+ */
+ private static final class DelegatingColumnValueSelector implements ColumnValueSelector
+ {
+ private final ClusteringColumnSelectorFactory parent;
+ private final String columnName;
+ private long cachedGeneration = -1;
+ private ColumnValueSelector cachedInner;
+
+ private DelegatingColumnValueSelector(ClusteringColumnSelectorFactory parent, String columnName)
+ {
+ this.parent = parent;
+ this.columnName = columnName;
+ }
+
+ @SuppressWarnings("unchecked")
+ private ColumnValueSelector currentInner()
+ {
+ final long currentGeneration = parent.getGeneration();
+ if (cachedGeneration != currentGeneration) {
+ cachedInner = parent.getDelegate().makeColumnValueSelector(columnName);
+ cachedGeneration = currentGeneration;
+ }
+ return (ColumnValueSelector) cachedInner;
+ }
+
+ @Override
+ public double getDouble()
+ {
+ return currentInner().getDouble();
+ }
+
+ @Override
+ public float getFloat()
+ {
+ return currentInner().getFloat();
+ }
+
+ @Override
+ public long getLong()
+ {
+ return currentInner().getLong();
+ }
+
+ @Override
+ public boolean isNull()
+ {
+ return currentInner().isNull();
+ }
+
+ @Nullable
+ @Override
+ public Object getObject()
+ {
+ return currentInner().getObject();
+ }
+
+ @Override
+ public Class> classOfObject()
+ {
+ return currentInner().classOfObject();
+ }
+
+ @Override
+ public void inspectRuntimeShape(RuntimeShapeInspector inspector)
+ {
+ inspector.visit("delegating", columnName);
+ }
+ }
+}
diff --git a/processing/src/main/java/org/apache/druid/segment/projections/ClusteringDictionaries.java b/processing/src/main/java/org/apache/druid/segment/projections/ClusteringDictionaries.java
new file mode 100644
index 000000000000..4f94ee45793f
--- /dev/null
+++ b/processing/src/main/java/org/apache/druid/segment/projections/ClusteringDictionaries.java
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.projections;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.common.collect.Interner;
+import org.apache.druid.error.DruidException;
+import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.column.ValueType;
+import org.apache.druid.timeline.DataSegment;
+
+import javax.annotation.Nullable;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * Per-type sorted-nulls-first dictionaries for a clustered base table's clustering values. Each
+ * {@link TableClusterGroupSpec#getClusteringValueIds()} entry at position {@code i} indexes into the dictionary
+ * for the column type at position {@code i}; columns of the same type share a dictionary.
+ *
+ * String entries are interned via {@link DataSegment#stringInterner()} on construction since clustering
+ * strings repeat heavily across cached segments. Numeric types aren't interned.
+ */
+public class ClusteringDictionaries
+{
+ public static final ClusteringDictionaries EMPTY = new ClusteringDictionaries(null, null, null, null);
+
+ private final List stringDictionary;
+ private final List longDictionary;
+ private final List doubleDictionary;
+ private final List floatDictionary;
+
+ @JsonCreator
+ public ClusteringDictionaries(
+ @JsonProperty("string") @Nullable List stringDictionary,
+ @JsonProperty("long") @Nullable List longDictionary,
+ @JsonProperty("double") @Nullable List doubleDictionary,
+ @JsonProperty("float") @Nullable List floatDictionary
+ )
+ {
+ this.stringDictionary = internStringDictionary(stringDictionary);
+ this.longDictionary = longDictionary == null ? List.of() : Collections.unmodifiableList(longDictionary);
+ this.doubleDictionary = doubleDictionary == null ? List.of() : Collections.unmodifiableList(doubleDictionary);
+ this.floatDictionary = floatDictionary == null ? List.of() : Collections.unmodifiableList(floatDictionary);
+ }
+
+ @JsonProperty("string")
+ @JsonInclude(JsonInclude.Include.NON_EMPTY)
+ public List getStringDictionary()
+ {
+ return stringDictionary;
+ }
+
+ @JsonProperty("long")
+ @JsonInclude(JsonInclude.Include.NON_EMPTY)
+ public List getLongDictionary()
+ {
+ return longDictionary;
+ }
+
+ @JsonProperty("double")
+ @JsonInclude(JsonInclude.Include.NON_EMPTY)
+ public List getDoubleDictionary()
+ {
+ return doubleDictionary;
+ }
+
+ @JsonProperty("float")
+ @JsonInclude(JsonInclude.Include.NON_EMPTY)
+ public List getFloatDictionary()
+ {
+ return floatDictionary;
+ }
+
+ /**
+ * Look up the typed value at position {@code id} in the dictionary for {@code type}. Throws on out-of-range
+ * {@code id} or unsupported {@code type}.
+ */
+ @Nullable
+ public Object lookupValue(ColumnType type, int id)
+ {
+ final List> dict = dictionaryForType(type);
+ if (id < 0 || id >= dict.size()) {
+ throw DruidException.defensive(
+ "dictionary id [%s] is out of range for clustering type [%s] (size [%s])",
+ id,
+ type,
+ dict.size()
+ );
+ }
+ return dict.get(id);
+ }
+
+ public List> dictionaryForType(ColumnType type)
+ {
+ if (type == null) {
+ throw DruidException.defensive("clustering type must not be null");
+ }
+ if (type.is(ValueType.STRING)) {
+ return stringDictionary;
+ }
+ if (type.is(ValueType.LONG)) {
+ return longDictionary;
+ }
+ if (type.is(ValueType.DOUBLE)) {
+ return doubleDictionary;
+ }
+ if (type.is(ValueType.FLOAT)) {
+ return floatDictionary;
+ }
+ throw DruidException.defensive("unsupported clustering type [%s]", type);
+ }
+
+ @Override
+ public boolean equals(Object o)
+ {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ ClusteringDictionaries that = (ClusteringDictionaries) o;
+ return Objects.equals(stringDictionary, that.stringDictionary)
+ && Objects.equals(longDictionary, that.longDictionary)
+ && Objects.equals(doubleDictionary, that.doubleDictionary)
+ && Objects.equals(floatDictionary, that.floatDictionary);
+ }
+
+ @Override
+ public int hashCode()
+ {
+ return Objects.hash(stringDictionary, longDictionary, doubleDictionary, floatDictionary);
+ }
+
+ @Override
+ public String toString()
+ {
+ return "ClusteringDictionaries{" +
+ "string=" + stringDictionary +
+ ", long=" + longDictionary +
+ ", double=" + doubleDictionary +
+ ", float=" + floatDictionary +
+ '}';
+ }
+
+ private static List internStringDictionary(@Nullable List dict)
+ {
+ if (dict == null || dict.isEmpty()) {
+ return List.of();
+ }
+ final Interner interner = DataSegment.stringInterner();
+ final List out = new ArrayList<>(dict.size());
+ for (String s : dict) {
+ out.add(s == null ? null : interner.intern(s));
+ }
+ return Collections.unmodifiableList(out);
+ }
+}
diff --git a/processing/src/main/java/org/apache/druid/segment/projections/ClusteringVectorColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/projections/ClusteringVectorColumnSelectorFactory.java
new file mode 100644
index 000000000000..450bd859815d
--- /dev/null
+++ b/processing/src/main/java/org/apache/druid/segment/projections/ClusteringVectorColumnSelectorFactory.java
@@ -0,0 +1,631 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.projections;
+
+import org.apache.druid.error.DruidException;
+import org.apache.druid.query.dimension.DimensionSpec;
+import org.apache.druid.segment.IdLookup;
+import org.apache.druid.segment.column.ColumnCapabilities;
+import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
+import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.column.RowSignature;
+import org.apache.druid.segment.column.ValueType;
+import org.apache.druid.segment.data.IndexedInts;
+import org.apache.druid.segment.vector.ConstantVectorSelectors;
+import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector;
+import org.apache.druid.segment.vector.ReadableVectorInspector;
+import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector;
+import org.apache.druid.segment.vector.VectorColumnSelectorFactory;
+import org.apache.druid.segment.vector.VectorObjectSelector;
+import org.apache.druid.segment.vector.VectorValueSelector;
+
+import javax.annotation.Nullable;
+
+/**
+ * Vectorized counterpart of {@link ClusteringColumnSelectorFactory}. Wraps a delegate
+ * {@link VectorColumnSelectorFactory} and intercepts requests for clustering columns, returning constant-typed
+ * vector selectors (via {@link ConstantVectorSelectors}). Other column requests pass through to delegating wrappers.
+ *
+ * The factory is mutable via {@link #setDelegate(VectorColumnSelectorFactory, Object[])}: a multi-group
+ * {@code ConcatenatingVectorCursor} swaps the underlying delegate + clustering values on each group transition.
+ * Selectors previously returned by this factory observe the new state on their next call thanks to a generation
+ * counter cache invalidation.
+ *
+ * For single-group dispatch, the factory is constructed once and {@code setDelegate} is never called; selectors'
+ * caches fill on first access and never invalidate.
+ */
+public class ClusteringVectorColumnSelectorFactory implements VectorColumnSelectorFactory
+{
+ private final RowSignature clusteringColumns;
+ private final int maxVectorSize;
+ private VectorColumnSelectorFactory delegate;
+ private Object[] clusteringValues;
+ // Bumped on every setDelegate(...) so per-call selector wrappers can detect group transitions and rebuild their
+ // cached inner state.
+ private long generation;
+
+ /**
+ * Convenience overload that derives {@code maxVectorSize} from the supplied delegate. Used by single-group
+ * dispatch where the delegate is the per-group {@link VectorColumnSelectorFactory} from the cursor itself.
+ */
+ public ClusteringVectorColumnSelectorFactory(
+ VectorColumnSelectorFactory delegate,
+ RowSignature clusteringColumns,
+ Object[] clusteringValues
+ )
+ {
+ this(delegate, clusteringColumns, clusteringValues, delegate.getMaxVectorSize());
+ }
+
+ public ClusteringVectorColumnSelectorFactory(
+ VectorColumnSelectorFactory delegate,
+ RowSignature clusteringColumns,
+ Object[] clusteringValues,
+ int maxVectorSize
+ )
+ {
+ this.clusteringColumns = clusteringColumns;
+ this.maxVectorSize = maxVectorSize;
+ setDelegate(delegate, clusteringValues);
+ }
+
+ /**
+ * Update the underlying delegate and the constant clustering values for the current cluster group. Called by a
+ * multi-group {@code ConcatenatingVectorCursor} on each group transition.
+ */
+ public void setDelegate(VectorColumnSelectorFactory delegate, Object[] clusteringValues)
+ {
+ if (clusteringValues == null || clusteringValues.length != clusteringColumns.size()) {
+ throw DruidException.defensive(
+ "clusteringValues length [%s] must match clusteringColumns size [%s]",
+ clusteringValues == null ? "null" : clusteringValues.length,
+ clusteringColumns.size()
+ );
+ }
+ this.delegate = delegate;
+ this.clusteringValues = clusteringValues;
+ this.generation++;
+ }
+
+ VectorColumnSelectorFactory getDelegate()
+ {
+ return delegate;
+ }
+
+ long getGeneration()
+ {
+ return generation;
+ }
+
+ Object currentValue(int idx)
+ {
+ return clusteringValues[idx];
+ }
+
+ @Override
+ public ReadableVectorInspector getReadableVectorInspector()
+ {
+ return delegate.getReadableVectorInspector();
+ }
+
+ @Override
+ public int getMaxVectorSize()
+ {
+ return maxVectorSize;
+ }
+
+ @Override
+ public SingleValueDimensionVectorSelector makeSingleValueDimensionSelector(DimensionSpec dimensionSpec)
+ {
+ final int idx = clusteringColumns.indexOf(dimensionSpec.getDimension());
+ if (idx < 0) {
+ return new DelegatingSingleValueDimensionVectorSelector(this, dimensionSpec);
+ }
+ return new ClusteringSingleValueDimensionVectorSelector(this, idx, dimensionSpec);
+ }
+
+ @Override
+ public MultiValueDimensionVectorSelector makeMultiValueDimensionSelector(DimensionSpec dimensionSpec)
+ {
+ final int idx = clusteringColumns.indexOf(dimensionSpec.getDimension());
+ if (idx < 0) {
+ return new DelegatingMultiValueDimensionVectorSelector(this, dimensionSpec);
+ }
+ // Clustering values are single-typed primitives. Multi-value requests on a clustering column shouldn't happen
+ // in practice; throw to surface caller bugs rather than silently misbehave.
+ throw DruidException.defensive(
+ "multi-value vector selector not supported for clustering column [" + dimensionSpec.getDimension() + "]"
+ );
+ }
+
+ @Override
+ public VectorValueSelector makeValueSelector(String column)
+ {
+ final int idx = clusteringColumns.indexOf(column);
+ if (idx < 0) {
+ return new DelegatingVectorValueSelector(this, column);
+ }
+ return new ClusteringVectorValueSelector(this, idx);
+ }
+
+ @Override
+ public VectorObjectSelector makeObjectSelector(String column)
+ {
+ final int idx = clusteringColumns.indexOf(column);
+ if (idx < 0) {
+ return new DelegatingVectorObjectSelector(this, column);
+ }
+ return new ClusteringVectorObjectSelector(this, idx);
+ }
+
+ @Nullable
+ @Override
+ public ColumnCapabilities getColumnCapabilities(String column)
+ {
+ final int idx = clusteringColumns.indexOf(column);
+ if (idx < 0) {
+ return delegate.getColumnCapabilities(column);
+ }
+ final ColumnType type = clusteringColumns.getColumnType(idx).orElseThrow();
+ if (type.is(ValueType.STRING)) {
+ return ColumnCapabilitiesImpl.createSimpleSingleValueStringColumnCapabilities();
+ }
+ return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(type);
+ }
+
+ private static final class ClusteringSingleValueDimensionVectorSelector implements SingleValueDimensionVectorSelector
+ {
+ private final ClusteringVectorColumnSelectorFactory parent;
+ private final int idx;
+ private final DimensionSpec spec;
+ private long cachedGeneration = -1;
+ private SingleValueDimensionVectorSelector cachedInner;
+
+ private ClusteringSingleValueDimensionVectorSelector(
+ ClusteringVectorColumnSelectorFactory parent,
+ int idx,
+ DimensionSpec spec
+ )
+ {
+ this.parent = parent;
+ this.idx = idx;
+ this.spec = spec;
+ }
+
+ private SingleValueDimensionVectorSelector currentInner()
+ {
+ final long currentGeneration = parent.getGeneration();
+ if (cachedGeneration == currentGeneration) {
+ return cachedInner;
+ }
+ final Object raw = parent.currentValue(idx);
+ final String stringValue = raw == null ? null : String.valueOf(raw);
+ final String afterExtraction =
+ spec.getExtractionFn() == null ? stringValue : spec.getExtractionFn().apply(stringValue);
+ cachedInner = ConstantVectorSelectors.singleValueDimensionVectorSelector(
+ parent.getReadableVectorInspector(),
+ afterExtraction
+ );
+ cachedGeneration = currentGeneration;
+ return cachedInner;
+ }
+
+ @Override
+ public int[] getRowVector()
+ {
+ return currentInner().getRowVector();
+ }
+
+ @Override
+ public int getValueCardinality()
+ {
+ return currentInner().getValueCardinality();
+ }
+
+ @Nullable
+ @Override
+ public String lookupName(int id)
+ {
+ return currentInner().lookupName(id);
+ }
+
+ @Override
+ public boolean nameLookupPossibleInAdvance()
+ {
+ return currentInner().nameLookupPossibleInAdvance();
+ }
+
+ @Nullable
+ @Override
+ public IdLookup idLookup()
+ {
+ return currentInner().idLookup();
+ }
+
+ @Override
+ public int getMaxVectorSize()
+ {
+ return parent.getMaxVectorSize();
+ }
+
+ @Override
+ public int getCurrentVectorSize()
+ {
+ return parent.getReadableVectorInspector().getCurrentVectorSize();
+ }
+ }
+
+ private static final class ClusteringVectorValueSelector implements VectorValueSelector
+ {
+ private final ClusteringVectorColumnSelectorFactory parent;
+ private final int idx;
+ private long cachedGeneration = -1;
+ private VectorValueSelector cachedInner;
+
+ private ClusteringVectorValueSelector(ClusteringVectorColumnSelectorFactory parent, int idx)
+ {
+ this.parent = parent;
+ this.idx = idx;
+ }
+
+ private VectorValueSelector currentInner()
+ {
+ final long currentGeneration = parent.getGeneration();
+ if (cachedGeneration == currentGeneration) {
+ return cachedInner;
+ }
+ final Object raw = parent.currentValue(idx);
+ // VectorValueSelector is meant for DOUBLE/FLOAT/LONG. STRING clustering shouldn't reach this method per the
+ // VectorColumnSelectorFactory contract; pass null (treated as null-numeric) to be conservative.
+ final Number number = (raw instanceof Number) ? (Number) raw : null;
+ cachedInner = ConstantVectorSelectors.vectorValueSelector(parent.getReadableVectorInspector(), number);
+ cachedGeneration = currentGeneration;
+ return cachedInner;
+ }
+
+ @Override
+ public long[] getLongVector()
+ {
+ return currentInner().getLongVector();
+ }
+
+ @Override
+ public float[] getFloatVector()
+ {
+ return currentInner().getFloatVector();
+ }
+
+ @Override
+ public double[] getDoubleVector()
+ {
+ return currentInner().getDoubleVector();
+ }
+
+ @Nullable
+ @Override
+ public boolean[] getNullVector()
+ {
+ return currentInner().getNullVector();
+ }
+
+ @Override
+ public int getMaxVectorSize()
+ {
+ return parent.getMaxVectorSize();
+ }
+
+ @Override
+ public int getCurrentVectorSize()
+ {
+ return parent.getReadableVectorInspector().getCurrentVectorSize();
+ }
+ }
+
+ private static final class ClusteringVectorObjectSelector implements VectorObjectSelector
+ {
+ private final ClusteringVectorColumnSelectorFactory parent;
+ private final int idx;
+ private long cachedGeneration = -1;
+ private VectorObjectSelector cachedInner;
+
+ private ClusteringVectorObjectSelector(ClusteringVectorColumnSelectorFactory parent, int idx)
+ {
+ this.parent = parent;
+ this.idx = idx;
+ }
+
+ private VectorObjectSelector currentInner()
+ {
+ final long currentGeneration = parent.getGeneration();
+ if (cachedGeneration == currentGeneration) {
+ return cachedInner;
+ }
+ cachedInner = ConstantVectorSelectors.vectorObjectSelector(
+ parent.getReadableVectorInspector(),
+ parent.currentValue(idx)
+ );
+ cachedGeneration = currentGeneration;
+ return cachedInner;
+ }
+
+ @Override
+ public Object[] getObjectVector()
+ {
+ return currentInner().getObjectVector();
+ }
+
+ @Override
+ public int getMaxVectorSize()
+ {
+ return parent.getMaxVectorSize();
+ }
+
+ @Override
+ public int getCurrentVectorSize()
+ {
+ return parent.getReadableVectorInspector().getCurrentVectorSize();
+ }
+ }
+
+ private static final class DelegatingSingleValueDimensionVectorSelector implements SingleValueDimensionVectorSelector
+ {
+ private final ClusteringVectorColumnSelectorFactory parent;
+ private final DimensionSpec spec;
+ private long cachedGeneration = -1;
+ private SingleValueDimensionVectorSelector cachedInner;
+
+ private DelegatingSingleValueDimensionVectorSelector(
+ ClusteringVectorColumnSelectorFactory parent,
+ DimensionSpec spec
+ )
+ {
+ this.parent = parent;
+ this.spec = spec;
+ }
+
+ private SingleValueDimensionVectorSelector currentInner()
+ {
+ final long currentGeneration = parent.getGeneration();
+ if (cachedGeneration != currentGeneration) {
+ cachedInner = parent.getDelegate().makeSingleValueDimensionSelector(spec);
+ cachedGeneration = currentGeneration;
+ }
+ return cachedInner;
+ }
+
+ @Override
+ public int[] getRowVector()
+ {
+ return currentInner().getRowVector();
+ }
+
+ @Override
+ public int getValueCardinality()
+ {
+ return currentInner().getValueCardinality();
+ }
+
+ @Nullable
+ @Override
+ public String lookupName(int id)
+ {
+ return currentInner().lookupName(id);
+ }
+
+ @Override
+ public boolean nameLookupPossibleInAdvance()
+ {
+ return currentInner().nameLookupPossibleInAdvance();
+ }
+
+ @Nullable
+ @Override
+ public IdLookup idLookup()
+ {
+ return currentInner().idLookup();
+ }
+
+ @Override
+ public int getMaxVectorSize()
+ {
+ return currentInner().getMaxVectorSize();
+ }
+
+ @Override
+ public int getCurrentVectorSize()
+ {
+ return currentInner().getCurrentVectorSize();
+ }
+ }
+
+ private static final class DelegatingMultiValueDimensionVectorSelector implements MultiValueDimensionVectorSelector
+ {
+ private final ClusteringVectorColumnSelectorFactory parent;
+ private final DimensionSpec spec;
+ private long cachedGeneration = -1;
+ private MultiValueDimensionVectorSelector cachedInner;
+
+ private DelegatingMultiValueDimensionVectorSelector(
+ ClusteringVectorColumnSelectorFactory parent,
+ DimensionSpec spec
+ )
+ {
+ this.parent = parent;
+ this.spec = spec;
+ }
+
+ private MultiValueDimensionVectorSelector currentInner()
+ {
+ final long currentGeneration = parent.getGeneration();
+ if (cachedGeneration != currentGeneration) {
+ cachedInner = parent.getDelegate().makeMultiValueDimensionSelector(spec);
+ cachedGeneration = currentGeneration;
+ }
+ return cachedInner;
+ }
+
+ @Override
+ public IndexedInts[] getRowVector()
+ {
+ return currentInner().getRowVector();
+ }
+
+ @Override
+ public int getValueCardinality()
+ {
+ return currentInner().getValueCardinality();
+ }
+
+ @Nullable
+ @Override
+ public String lookupName(int id)
+ {
+ return currentInner().lookupName(id);
+ }
+
+ @Override
+ public boolean nameLookupPossibleInAdvance()
+ {
+ return currentInner().nameLookupPossibleInAdvance();
+ }
+
+ @Nullable
+ @Override
+ public IdLookup idLookup()
+ {
+ return currentInner().idLookup();
+ }
+
+ @Override
+ public int getMaxVectorSize()
+ {
+ return currentInner().getMaxVectorSize();
+ }
+
+ @Override
+ public int getCurrentVectorSize()
+ {
+ return currentInner().getCurrentVectorSize();
+ }
+ }
+
+ private static final class DelegatingVectorValueSelector implements VectorValueSelector
+ {
+ private final ClusteringVectorColumnSelectorFactory parent;
+ private final String column;
+ private long cachedGeneration = -1;
+ private VectorValueSelector cachedInner;
+
+ private DelegatingVectorValueSelector(ClusteringVectorColumnSelectorFactory parent, String column)
+ {
+ this.parent = parent;
+ this.column = column;
+ }
+
+ private VectorValueSelector currentInner()
+ {
+ final long currentGeneration = parent.getGeneration();
+ if (cachedGeneration != currentGeneration) {
+ cachedInner = parent.getDelegate().makeValueSelector(column);
+ cachedGeneration = currentGeneration;
+ }
+ return cachedInner;
+ }
+
+ @Override
+ public long[] getLongVector()
+ {
+ return currentInner().getLongVector();
+ }
+
+ @Override
+ public float[] getFloatVector()
+ {
+ return currentInner().getFloatVector();
+ }
+
+ @Override
+ public double[] getDoubleVector()
+ {
+ return currentInner().getDoubleVector();
+ }
+
+ @Nullable
+ @Override
+ public boolean[] getNullVector()
+ {
+ return currentInner().getNullVector();
+ }
+
+ @Override
+ public int getMaxVectorSize()
+ {
+ return currentInner().getMaxVectorSize();
+ }
+
+ @Override
+ public int getCurrentVectorSize()
+ {
+ return currentInner().getCurrentVectorSize();
+ }
+ }
+
+ private static final class DelegatingVectorObjectSelector implements VectorObjectSelector
+ {
+ private final ClusteringVectorColumnSelectorFactory parent;
+ private final String column;
+ private long cachedGeneration = -1;
+ private VectorObjectSelector cachedInner;
+
+ private DelegatingVectorObjectSelector(ClusteringVectorColumnSelectorFactory parent, String column)
+ {
+ this.parent = parent;
+ this.column = column;
+ }
+
+ private VectorObjectSelector currentInner()
+ {
+ final long currentGeneration = parent.getGeneration();
+ if (cachedGeneration != currentGeneration) {
+ cachedInner = parent.getDelegate().makeObjectSelector(column);
+ cachedGeneration = currentGeneration;
+ }
+ return cachedInner;
+ }
+
+ @Override
+ public Object[] getObjectVector()
+ {
+ return currentInner().getObjectVector();
+ }
+
+ @Override
+ public int getMaxVectorSize()
+ {
+ return currentInner().getMaxVectorSize();
+ }
+
+ @Override
+ public int getCurrentVectorSize()
+ {
+ return currentInner().getCurrentVectorSize();
+ }
+ }
+}
diff --git a/processing/src/main/java/org/apache/druid/segment/projections/ProjectionSchema.java b/processing/src/main/java/org/apache/druid/segment/projections/ProjectionSchema.java
index e8c6b249de6c..2d9c28dd1be9 100644
--- a/processing/src/main/java/org/apache/druid/segment/projections/ProjectionSchema.java
+++ b/processing/src/main/java/org/apache/druid/segment/projections/ProjectionSchema.java
@@ -43,7 +43,8 @@
@JsonSubTypes(value = {
@JsonSubTypes.Type(name = AggregateProjectionSpec.TYPE_NAME, value = AggregateProjectionSchema.class),
@JsonSubTypes.Type(name = TableProjectionSchema.TYPE_NAME, value = TableProjectionSchema.class),
- @JsonSubTypes.Type(name = RollupTableProjectionSchema.TYPE_NAME, value = RollupTableProjectionSchema.class)
+ @JsonSubTypes.Type(name = RollupTableProjectionSchema.TYPE_NAME, value = RollupTableProjectionSchema.class),
+ @JsonSubTypes.Type(name = ClusteredValueGroupsBaseTableSchema.TYPE_NAME, value = ClusteredValueGroupsBaseTableSchema.class)
})
public interface ProjectionSchema
{
diff --git a/processing/src/main/java/org/apache/druid/segment/projections/Projections.java b/processing/src/main/java/org/apache/druid/segment/projections/Projections.java
index cc037d730dae..19ce75b8cb2b 100644
--- a/processing/src/main/java/org/apache/druid/segment/projections/Projections.java
+++ b/processing/src/main/java/org/apache/druid/segment/projections/Projections.java
@@ -19,8 +19,11 @@
package org.apache.druid.segment.projections;
+import com.google.common.base.Supplier;
+import com.google.common.base.Suppliers;
import com.google.common.collect.RangeSet;
import org.apache.druid.data.input.impl.AggregateProjectionSpec;
+import org.apache.druid.error.DruidException;
import org.apache.druid.error.InvalidInput;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.granularity.Granularity;
@@ -30,18 +33,30 @@
import org.apache.druid.query.aggregation.FilteredAggregatorFactory;
import org.apache.druid.query.cache.CacheKeyBuilder;
import org.apache.druid.query.filter.DimFilter;
+import org.apache.druid.query.filter.DruidPredicateMatch;
+import org.apache.druid.query.filter.EqualityFilter;
import org.apache.druid.query.filter.Filter;
+import org.apache.druid.query.filter.NullFilter;
+import org.apache.druid.query.filter.TypedInFilter;
import org.apache.druid.segment.AggregateProjectionMetadata;
import org.apache.druid.segment.CursorBuildSpec;
import org.apache.druid.segment.CursorHolder;
import org.apache.druid.segment.VirtualColumn;
import org.apache.druid.segment.VirtualColumns;
import org.apache.druid.segment.column.ColumnHolder;
+import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.column.RowSignature;
+import org.apache.druid.segment.column.ValueType;
+import org.apache.druid.segment.filter.AndFilter;
+import org.apache.druid.segment.filter.NotFilter;
+import org.apache.druid.segment.filter.OrFilter;
import org.apache.druid.utils.CollectionUtils;
import org.joda.time.DateTime;
import org.joda.time.Interval;
import javax.annotation.Nullable;
+import java.util.ArrayList;
+import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -55,8 +70,11 @@ public class Projections
{
public static final String BASE_TABLE_PROJECTION_NAME = "__base";
+ private static final String CLUSTER_GROUP_PREFIX = BASE_TABLE_PROJECTION_NAME + "$";
+
private static final ConcurrentHashMap PERIOD_GRAN_CACHE = new ConcurrentHashMap<>();
+
public static String validateProjectionName(@Nullable String name)
{
if (name == null || name.isEmpty()) {
@@ -530,6 +548,241 @@ public static String getProjectionSegmentInternalFilePrefix(ProjectionSchema pro
return projectionSchema.getName() + "/";
}
+ /**
+ * Check whether {@code type} is an allowed cluster group clustering-column type. Clustering is restricted to the
+ * primitive scalar types: {@link ValueType#STRING}, {@link ValueType#LONG}, {@link ValueType#DOUBLE},
+ * {@link ValueType#FLOAT}. Complex and array types are rejected.
+ */
+ public static boolean isAllowedClusteringType(@Nullable ColumnType type)
+ {
+ return type != null && type.anyOf(ValueType.STRING, ValueType.LONG, ValueType.DOUBLE, ValueType.FLOAT);
+ }
+
+ /**
+ * Segment internal file prefix + column for a cluster group's per-group column data:
+ * {@code __base$_.../}
+ */
+ public static String getClusterGroupSegmentInternalFileName(List clusteringValueIds, String column)
+ {
+ return getClusterGroupSegmentInternalFilePrefix(clusteringValueIds) + column;
+ }
+
+ public static String getClusterGroupSegmentInternalFilePrefix(List clusteringValueIds)
+ {
+ if (clusteringValueIds == null || clusteringValueIds.isEmpty()) {
+ throw DruidException.defensive("clusteringValueIds must not be null or empty");
+ }
+ final StringBuilder sb = new StringBuilder(CLUSTER_GROUP_PREFIX);
+ for (int i = 0; i < clusteringValueIds.size(); i++) {
+ if (i > 0) {
+ sb.append('_');
+ }
+ sb.append(clusteringValueIds.get(i));
+ }
+ sb.append('/');
+ return sb.toString();
+ }
+
+ /**
+ * Returns the subset of {@code groups} that a query filter can't rule out from clustering values alone.
+ * Filters not referencing any clustering column are conservatively retained for every group.
+ */
+ public static List pruneClusterGroups(
+ List groups,
+ @Nullable Filter filter,
+ @Nullable VirtualColumns queryVirtualColumns
+ )
+ {
+ if (filter == null || groups.isEmpty()) {
+ return groups;
+ }
+ final VirtualColumns queryVcs = queryVirtualColumns == null ? VirtualColumns.EMPTY : queryVirtualColumns;
+ final List kept = new ArrayList<>(groups.size());
+ for (TableClusterGroupSpec group : groups) {
+ if (matchesClusterGroup(filter, group, queryVcs)) {
+ kept.add(group);
+ }
+ }
+ return kept;
+ }
+
+ private static boolean matchesClusterGroup(
+ Filter filter,
+ TableClusterGroupSpec group,
+ VirtualColumns queryVcs
+ )
+ {
+ final ClusteredValueGroupsBaseTableSchema summary = group.getSummary();
+ final RowSignature clusteringColumns = summary.getClusteringColumns();
+ final VirtualColumns groupVcs = summary.getVirtualColumns();
+
+ // remap query-side column names that are virtual-column-equivalent to a clustering column
+ final Map remap = buildClusterGroupRemap(
+ filter.getRequiredColumns(),
+ clusteringColumns,
+ queryVcs,
+ groupVcs
+ );
+ final Filter rewritten = remap.isEmpty() ? filter : filter.rewriteRequiredColumns(remap);
+
+ // keep unless provably FALSE; UNKNOWN (filter references non-clustering data) keeps conservatively
+ return matchesClusterGroupFilter(rewritten, clusteringColumns, group.lookupClusteringValues())
+ != DruidPredicateMatch.FALSE;
+ }
+
+ /**
+ * Build a name-rewrite map so the pruner can walk the filter against a group's clustering tuple. Three cases per
+ * filter column:
+ *
+ * Query virtual column by that name (shadows any physical of the same name): prunable iff the group has an
+ * equivalent VC whose output is a clustering column. Same-name equivalence is an identity entry; different-name
+ * equivalence remaps. Otherwise, the column is remapped to a non-clustering sentinel so the pruner returns
+ * UNKNOWN, without that, a query virtual sharing a clustering column's name would be mis-matched against the
+ * clustering value.
+ * No query virtual column, but the name is a clustering column: identity entry, filter walks it directly.
+ * Neither query virtual column nor clustering column: identity entry, pruner returns UNKNOWN at that leaf.
+ *
+ * The query virtual column check must come first because query VC names are allowed to shadow physical/clustering
+ * column names.
+ *
+ * If no column needs a non-identity rewrite the result is empty and the caller skips the rewrite call. When any
+ * non-identity rewrite is present, identity entries are populated for all remaining required columns because
+ * {@link Filter#rewriteRequiredColumns} requires an entry for every referenced column (missing entries throw).
+ */
+ private static Map buildClusterGroupRemap(
+ Set requiredColumns,
+ RowSignature clusteringColumns,
+ VirtualColumns queryVcs,
+ VirtualColumns groupVcs
+ )
+ {
+ // if Query virtual column isn't equivalent to a clustering column, we remap to a sentinel name that is guaranteed
+ // not to be a clustering column so the pruner returns UNKNOWN for any leaf referencing it; compute it lazily
+ final Supplier sentinel = Suppliers.memoize(() -> {
+ String candidate = "__nonClusteringRef";
+ while (clusteringColumns.indexOf(candidate) >= 0) {
+ candidate = "_" + candidate;
+ }
+ return candidate;
+ });
+
+ Map remap = null;
+ for (String col : requiredColumns) {
+ final VirtualColumns.Node queryNode = queryVcs.getNode(col);
+ if (queryNode == null) {
+ continue;
+ }
+ // query treats `col` as a virtual column, shadowing any physical/clustering column of the same name
+ final VirtualColumn equivalent = groupVcs.findEquivalent(queryNode);
+ final String target = equivalent != null && clusteringColumns.contains(equivalent.getOutputName())
+ ? equivalent.getOutputName()
+ : null;
+ if (target == null) {
+ // query column has same name as a clustering column, but has no equivalent clustering virtual column; remap to
+ // use sentinel to be safe
+ if (remap == null) {
+ remap = new HashMap<>();
+ }
+ remap.put(col, sentinel.get());
+ } else if (!col.equals(target)) {
+ if (remap == null) {
+ remap = new HashMap<>();
+ }
+ remap.put(col, target);
+ }
+ // else: same name, handled below if remap ends up non-empty
+ }
+ if (remap == null) {
+ return Collections.emptyMap();
+ }
+ // fill identity entries for the remaining required columns; rewriteRequiredColumns rejects partial maps
+ for (String col : requiredColumns) {
+ remap.putIfAbsent(col, col);
+ }
+ return remap;
+ }
+
+ /**
+ * Walk a (remapped) filter against a group's constant clustering values. Returns a {@link DruidPredicateMatch}
+ * 3VL result: TRUE = filter holds for every row in the group, FALSE = filter holds for no row (group can be
+ * pruned), UNKNOWN = can't decide from clustering values alone (filter references non-clustering data, or
+ * unrecognized filter type). UNKNOWN is distinct from FALSE so that {@code NOT(can't-tell)} stays
+ * {@code can't-tell} rather than flipping to "definitely false" and silently pruning live groups.
+ */
+ private static DruidPredicateMatch matchesClusterGroupFilter(
+ Filter filter,
+ RowSignature clusteringColumns,
+ Object[] clusteringValues
+ )
+ {
+ if (filter instanceof AndFilter andFilter) {
+ DruidPredicateMatch result = DruidPredicateMatch.TRUE; // identity for AND
+ for (Filter sub : andFilter.getFilters()) {
+ result = DruidPredicateMatch.and(result, matchesClusterGroupFilter(sub, clusteringColumns, clusteringValues));
+ if (result == DruidPredicateMatch.FALSE) {
+ return result; // short-circuit: AND with false stays false
+ }
+ }
+ return result;
+ }
+
+ if (filter instanceof OrFilter orFilter) {
+ DruidPredicateMatch result = DruidPredicateMatch.FALSE; // identity for OR
+ for (Filter sub : orFilter.getFilters()) {
+ result = DruidPredicateMatch.or(result, matchesClusterGroupFilter(sub, clusteringColumns, clusteringValues));
+ if (result == DruidPredicateMatch.TRUE) {
+ return result; // short-circuit: OR with true stays true
+ }
+ }
+ return result;
+ }
+
+ if (filter instanceof NotFilter notFilter) {
+ return DruidPredicateMatch.not(
+ matchesClusterGroupFilter(notFilter.getBaseFilter(), clusteringColumns, clusteringValues)
+ );
+ }
+
+ if (filter instanceof NullFilter isNull) {
+ final int idx = clusteringColumns.indexOf(isNull.getColumn());
+ if (idx < 0) {
+ return DruidPredicateMatch.UNKNOWN;
+ }
+ return DruidPredicateMatch.of(clusteringValues[idx] == null);
+ }
+
+ if (filter instanceof EqualityFilter eq) {
+ final int idx = clusteringColumns.indexOf(eq.getColumn());
+ if (idx < 0) {
+ return DruidPredicateMatch.UNKNOWN;
+ }
+ // EqualityFilter doesn't match nulls; constructor also rejects null match values.
+ if (clusteringValues[idx] == null) {
+ return DruidPredicateMatch.FALSE;
+ }
+ return DruidPredicateMatch.of(Objects.equals(clusteringValues[idx], eq.getMatchValue()));
+ }
+
+ if (filter instanceof TypedInFilter in) {
+ final int idx = clusteringColumns.indexOf(in.getColumn());
+ if (idx < 0) {
+ return DruidPredicateMatch.UNKNOWN;
+ }
+ // TypedInFilter matches nulls if present in the values list. Iterate explicitly — immutable List impls
+ // (List.of, ImmutableList) NPE on contains(null).
+ final Object val = clusteringValues[idx];
+ for (Object v : in.getSortedValues()) {
+ if (Objects.equals(v, val)) {
+ return DruidPredicateMatch.TRUE;
+ }
+ }
+ return DruidPredicateMatch.FALSE;
+ }
+
+ // anything else, fall through as UNKNOWN (this could be improved in the future)
+ return DruidPredicateMatch.UNKNOWN;
+ }
+
/**
* Check that the query {@link CursorBuildSpec} either contains the entire data interval, or that the query interval
* is aligned with {@link AggregateProjectionSchema#getEffectiveGranularity()}
diff --git a/processing/src/main/java/org/apache/druid/segment/projections/TableClusterGroupSpec.java b/processing/src/main/java/org/apache/druid/segment/projections/TableClusterGroupSpec.java
new file mode 100644
index 000000000000..96d1196cab76
--- /dev/null
+++ b/processing/src/main/java/org/apache/druid/segment/projections/TableClusterGroupSpec.java
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.projections;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonTypeInfo;
+import org.apache.druid.error.DruidException;
+import org.apache.druid.segment.column.RowSignature;
+
+import javax.annotation.Nullable;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A single cluster group within a clustered base table, with all rows sharing the clustering-column tuple given by
+ * {@link #lookupClusteringValues()}.
+ *
+ * Shape is very minimal containing only clustering value ids and row count; other things such as columns, ordering,
+ * dictionaries, etc. live on the parent {@link ClusteredValueGroupsBaseTableSchema} as they are identical from group
+ * to group within a segment. The parent summary is attached after construction via {@link #setSummary} and reached
+ * via {@link #getSummary}.
+ */
+@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type")
+public class TableClusterGroupSpec
+{
+ public static final String TYPE_NAME = "cluster-base-table";
+
+ /**
+ * Clustering value id tuple (positions in the parent summary's per-column dictionaries).
+ */
+ private final List clusteringValueIds;
+
+ /**
+ * Number of rows in this cluster group's per-group column data.
+ */
+ private final int numRows;
+
+ /**
+ * Parent back-reference, set once via {@link #setSummary} during the schema constructor. Readers observe the
+ * spec only after the holding {@code SimpleQueryableIndex}'s final field publishes it, so non-final is safe.
+ */
+ @Nullable
+ private ClusteredValueGroupsBaseTableSchema summary;
+
+ @JsonCreator
+ public TableClusterGroupSpec(
+ @JsonProperty("clusteringValueIds") List clusteringValueIds,
+ @JsonProperty("numRows") @Nullable Integer numRows
+ )
+ {
+ if (clusteringValueIds == null) {
+ throw DruidException.defensive("clusteringValueIds must not be null");
+ }
+ this.clusteringValueIds = List.copyOf(clusteringValueIds);
+ this.numRows = numRows == null ? 0 : numRows;
+ this.summary = null;
+ }
+
+ /**
+ * Wire up the parent summary. Called once per spec by {@link ClusteredValueGroupsBaseTableSchema}'s constructor.
+ */
+ public void setSummary(ClusteredValueGroupsBaseTableSchema summary)
+ {
+ DruidException.conditionalDefensive(summary != null, "summary must not be null");
+ DruidException.conditionalDefensive(this.summary == null, "summary already set");
+
+ final RowSignature clusteringColumns = summary.getClusteringColumns();
+ final int numCols = clusteringColumns.size();
+
+ DruidException.conditionalDefensive(
+ clusteringValueIds.size() == numCols,
+ "clusteringValueIds size [%s] does not match summary clusteringColumns size [%s]",
+ clusteringValueIds.size(),
+ numCols
+ );
+
+ this.summary = summary;
+ }
+
+ /**
+ * Returns the parent summary. Throws if {@link #setSummary} hasn't been called yet.
+ */
+ public ClusteredValueGroupsBaseTableSchema getSummary()
+ {
+ DruidException.conditionalDefensive(
+ this.summary != null,
+ "TableClusterGroupSpec.setSummary must be called before this method"
+ );
+ return summary;
+ }
+
+ /**
+ * Typed clustering values for this group, materialized fresh from the summary's dictionaries on each call.
+ * Length and order match {@link ClusteredValueGroupsBaseTableSchema#getClusteringColumns()}. Callers that walk
+ * the array repeatedly should cache the returned reference.
+ */
+ public Object[] lookupClusteringValues()
+ {
+ final ClusteredValueGroupsBaseTableSchema s = getSummary();
+ final int numCols = clusteringValueIds.size();
+ final Object[] out = new Object[numCols];
+ for (int i = 0; i < numCols; i++) {
+ out[i] = s.lookupClusteringValue(i, clusteringValueIds.get(i));
+ }
+ return out;
+ }
+
+ /**
+ * Dictionary ids identifying this group, one per clustering column.
+ */
+ @JsonProperty
+ public List getClusteringValueIds()
+ {
+ return clusteringValueIds;
+ }
+
+ @JsonProperty
+ @JsonInclude(JsonInclude.Include.NON_DEFAULT)
+ public int getNumRows()
+ {
+ return numRows;
+ }
+
+ @Override
+ public boolean equals(Object o)
+ {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ TableClusterGroupSpec that = (TableClusterGroupSpec) o;
+ // Specs are only compared within a single segment context, where segment-local IDs uniquely identify a tuple.
+ return Objects.equals(clusteringValueIds, that.clusteringValueIds);
+ }
+
+ @Override
+ public int hashCode()
+ {
+ return Objects.hashCode(clusteringValueIds);
+ }
+
+ @Override
+ public String toString()
+ {
+ if (summary == null) {
+ return "TableClusterGroupSpec{" +
+ "clusteringValues=" + "" +
+ '}';
+ }
+ return "TableClusterGroupSpec{" +
+ "clusteringValues=" + Arrays.toString(lookupClusteringValues()) +
+ '}';
+ }
+}
diff --git a/processing/src/main/java/org/apache/druid/segment/vector/ConcatenatingVectorCursor.java b/processing/src/main/java/org/apache/druid/segment/vector/ConcatenatingVectorCursor.java
new file mode 100644
index 000000000000..55f374cd4a58
--- /dev/null
+++ b/processing/src/main/java/org/apache/druid/segment/vector/ConcatenatingVectorCursor.java
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.vector;
+
+import com.google.common.base.Supplier;
+import org.apache.druid.error.DruidException;
+import org.apache.druid.segment.CursorHolder;
+import org.apache.druid.segment.projections.ClusteringVectorColumnSelectorFactory;
+
+import java.util.List;
+
+/**
+ * Vector-cursor counterpart of {@link org.apache.druid.segment.ConcatenatingCursor}. Walks a sequence of per-group
+ * {@link VectorCursor}s back-to-back, presenting them to the caller as a single vector cursor over a clustered base
+ * table. Vector boundaries do not align with cluster-group boundaries; the last vector of each group is naturally
+ * partial (its {@link #getCurrentVectorSize()} returns less than max), and the next {@link #advance()} swaps in the
+ * next group's vector cursor without trying to merge across groups.
+ * On each group transition the wrapper {@link ClusteringVectorColumnSelectorFactory} updates its underlying
+ * delegate and clustering values, so previously-acquired delegating vector selectors observe the new group's data via
+ * generation-counter cache invalidation.
+ *
+ * The outer {@link CursorHolder} owns the lifetime of the per-group holders (typically by registering each one
+ * with a {@link org.apache.druid.java.util.common.io.Closer} as part of the supplier itself)
+ */
+public final class ConcatenatingVectorCursor implements VectorCursor
+{
+ private final List> holderSuppliers;
+ private final List clusteringValuesByGroup;
+ private final ClusteringVectorColumnSelectorFactory wrapperFactory;
+
+ private int currentIdx;
+ private VectorCursor currentCursor;
+ private boolean initialized;
+
+ public ConcatenatingVectorCursor(
+ List> holderSuppliers,
+ List clusteringValuesByGroup,
+ ClusteringVectorColumnSelectorFactory wrapperFactory
+ )
+ {
+ if (holderSuppliers.size() != clusteringValuesByGroup.size()) {
+ throw DruidException.defensive(
+ "holderSuppliers size [%s] must equal clusteringValuesByGroup size [%s]",
+ holderSuppliers.size(),
+ clusteringValuesByGroup.size()
+ );
+ }
+ if (holderSuppliers.isEmpty()) {
+ throw DruidException.defensive("ConcatenatingVectorCursor requires at least one cluster group");
+ }
+ this.holderSuppliers = holderSuppliers;
+ this.clusteringValuesByGroup = clusteringValuesByGroup;
+ this.wrapperFactory = wrapperFactory;
+ this.currentIdx = -1;
+ }
+
+ private void initializeIfNeeded()
+ {
+ if (initialized) {
+ return;
+ }
+ initialized = true;
+ advanceToNextNonEmptyGroup();
+ }
+
+ /**
+ * Open the next group whose vector cursor has at least one row. Skips empty groups. Sets {@code currentCursor =
+ * null} when all groups are exhausted.
+ */
+ private void advanceToNextNonEmptyGroup()
+ {
+ while (++currentIdx < holderSuppliers.size()) {
+ final CursorHolder holder = holderSuppliers.get(currentIdx).get();
+ final VectorCursor cursor = holder.asVectorCursor();
+ if (cursor != null && !cursor.isDone()) {
+ currentCursor = cursor;
+ wrapperFactory.setDelegate(cursor.getColumnSelectorFactory(), clusteringValuesByGroup.get(currentIdx));
+ return;
+ }
+ // Group has no rows after filter application; try the next.
+ }
+ currentCursor = null;
+ }
+
+ @Override
+ public VectorColumnSelectorFactory getColumnSelectorFactory()
+ {
+ initializeIfNeeded();
+ return wrapperFactory;
+ }
+
+ @Override
+ public void advance()
+ {
+ initializeIfNeeded();
+ if (currentCursor == null) {
+ return;
+ }
+ currentCursor.advance();
+ if (currentCursor.isDone()) {
+ advanceToNextNonEmptyGroup();
+ }
+ }
+
+ @Override
+ public boolean isDone()
+ {
+ initializeIfNeeded();
+ return currentCursor == null;
+ }
+
+ /**
+ * Rewind to before the first group. Does not close any per-group holders, those are owned by the outer
+ * {@link CursorHolder}. Subsequent {@link #advance()} / {@link #isDone()} re-fetch each group's vector cursor via
+ * {@link CursorHolder#asVectorCursor}.
+ */
+ @Override
+ public void reset()
+ {
+ currentIdx = -1;
+ currentCursor = null;
+ initialized = false;
+ }
+
+ @Override
+ public int getMaxVectorSize()
+ {
+ return wrapperFactory.getMaxVectorSize();
+ }
+
+ @Override
+ public int getCurrentVectorSize()
+ {
+ initializeIfNeeded();
+ if (currentCursor == null) {
+ return 0;
+ }
+ return currentCursor.getCurrentVectorSize();
+ }
+}
diff --git a/processing/src/test/java/org/apache/druid/segment/ConcatenatingCursorTest.java b/processing/src/test/java/org/apache/druid/segment/ConcatenatingCursorTest.java
new file mode 100644
index 000000000000..36d2f8c547fc
--- /dev/null
+++ b/processing/src/test/java/org/apache/druid/segment/ConcatenatingCursorTest.java
@@ -0,0 +1,478 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment;
+
+import com.google.common.base.Supplier;
+import com.google.common.base.Suppliers;
+import org.apache.druid.java.util.common.io.Closer;
+import org.apache.druid.query.dimension.DimensionSpec;
+import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
+import org.apache.druid.segment.column.ColumnCapabilities;
+import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.column.RowSignature;
+import org.apache.druid.segment.projections.ClusteringColumnSelectorFactory;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import javax.annotation.Nullable;
+import java.util.ArrayList;
+import java.util.List;
+
+class ConcatenatingCursorTest
+{
+ private static final RowSignature CLUSTER_SIGNATURE = RowSignature.builder().add("tenant", ColumnType.STRING).build();
+
+ private final Closer closer = Closer.create();
+
+ @Test
+ void testWalksTwoNonEmptyGroupsBackToBack()
+ {
+ FakeCursorHolder a = new FakeCursorHolder(List.of("a1", "a2"));
+ FakeCursorHolder b = new FakeCursorHolder(List.of("b1"));
+
+ ClusteringColumnSelectorFactory wrapper = new ClusteringColumnSelectorFactory(
+ new FakeFactory(List.of(), new int[]{0}),
+ CLUSTER_SIGNATURE,
+ new Object[]{"acme"}
+ );
+
+ ConcatenatingCursor c = new ConcatenatingCursor(
+ List.of(holderSupplier(a), holderSupplier(b)),
+ List.of(new Object[]{"acme"}, new Object[]{"globex"}),
+ wrapper
+ );
+
+ ColumnValueSelector tenant = c.getColumnSelectorFactory().makeColumnValueSelector("tenant");
+ ColumnValueSelector metric = c.getColumnSelectorFactory().makeColumnValueSelector("metric");
+
+ // Group 1: tenant=acme, metric=a1
+ Assertions.assertEquals("acme", tenant.getObject());
+ Assertions.assertEquals("a1", metric.getObject());
+ c.advance();
+
+ // Group 1: tenant=acme, metric=a2
+ Assertions.assertEquals("acme", tenant.getObject());
+ Assertions.assertEquals("a2", metric.getObject());
+ c.advance();
+
+ // Group transition → tenant=globex, metric=b1
+ Assertions.assertFalse(c.isDone());
+ Assertions.assertEquals("globex", tenant.getObject());
+ Assertions.assertEquals("b1", metric.getObject());
+ c.advance();
+
+ Assertions.assertTrue(c.isDone());
+ Assertions.assertDoesNotThrow(closer::close);
+ Assertions.assertTrue(a.closed);
+ Assertions.assertTrue(b.closed);
+ }
+
+ @Test
+ void testSkipsLeadingEmptyGroup()
+ {
+ FakeCursorHolder empty = new FakeCursorHolder(List.of());
+ FakeCursorHolder full = new FakeCursorHolder(List.of("x"));
+
+ ClusteringColumnSelectorFactory wrapper = new ClusteringColumnSelectorFactory(
+ new FakeFactory(List.of(), new int[]{0}),
+ CLUSTER_SIGNATURE,
+ new Object[]{"placeholder"}
+ );
+
+ ConcatenatingCursor c = new ConcatenatingCursor(
+ List.of(holderSupplier(empty), holderSupplier(full)),
+ List.of(new Object[]{"a"}, new Object[]{"b"}),
+ wrapper
+ );
+
+ ColumnValueSelector tenant = c.getColumnSelectorFactory().makeColumnValueSelector("tenant");
+ Assertions.assertFalse(c.isDone());
+ Assertions.assertEquals("b", tenant.getObject());
+ c.advance();
+ Assertions.assertTrue(c.isDone());
+ Assertions.assertDoesNotThrow(closer::close);
+ Assertions.assertTrue(empty.closed);
+ Assertions.assertTrue(full.closed);
+ }
+
+ @Test
+ void testSkipsTrailingEmptyGroup()
+ {
+ FakeCursorHolder full = new FakeCursorHolder(List.of("x"));
+ FakeCursorHolder empty = new FakeCursorHolder(List.of());
+
+ ClusteringColumnSelectorFactory wrapper = new ClusteringColumnSelectorFactory(
+ new FakeFactory(List.of(), new int[]{0}),
+ CLUSTER_SIGNATURE,
+ new Object[]{"a"}
+ );
+
+ ConcatenatingCursor c = new ConcatenatingCursor(
+ List.of(holderSupplier(full), holderSupplier(empty)),
+ List.of(new Object[]{"a"}, new Object[]{"b"}),
+ wrapper
+ );
+
+ ColumnValueSelector tenant = c.getColumnSelectorFactory().makeColumnValueSelector("tenant");
+ Assertions.assertFalse(c.isDone());
+ Assertions.assertEquals("a", tenant.getObject());
+ c.advance();
+ Assertions.assertTrue(c.isDone());
+ }
+
+ @Test
+ void testAllEmptyGroups()
+ {
+ FakeCursorHolder e1 = new FakeCursorHolder(List.of());
+ FakeCursorHolder e2 = new FakeCursorHolder(List.of());
+
+ ClusteringColumnSelectorFactory wrapper = new ClusteringColumnSelectorFactory(
+ new FakeFactory(List.of(), new int[]{0}),
+ CLUSTER_SIGNATURE,
+ new Object[]{"placeholder"}
+ );
+
+ ConcatenatingCursor c = new ConcatenatingCursor(
+ List.of(holderSupplier(e1), holderSupplier(e2)),
+ List.of(new Object[]{"a"}, new Object[]{"b"}),
+ wrapper
+ );
+
+ Assertions.assertTrue(c.isDone());
+ }
+
+ @Test
+ void testSingleGroupDegenerateCase()
+ {
+ FakeCursorHolder only = new FakeCursorHolder(List.of("x", "y"));
+
+ ClusteringColumnSelectorFactory wrapper = new ClusteringColumnSelectorFactory(
+ new FakeFactory(List.of(), new int[]{0}),
+ CLUSTER_SIGNATURE,
+ new Object[]{"a"}
+ );
+
+ ConcatenatingCursor c = new ConcatenatingCursor(
+ List.of(holderSupplier(only)),
+ List.of(new Object[]{"a"}),
+ wrapper
+ );
+
+ ColumnValueSelector tenant = c.getColumnSelectorFactory().makeColumnValueSelector("tenant");
+ ColumnValueSelector metric = c.getColumnSelectorFactory().makeColumnValueSelector("metric");
+
+ Assertions.assertEquals("a", tenant.getObject());
+ Assertions.assertEquals("x", metric.getObject());
+ c.advance();
+ Assertions.assertEquals("a", tenant.getObject());
+ Assertions.assertEquals("y", metric.getObject());
+ c.advance();
+ Assertions.assertTrue(c.isDone());
+ }
+
+ @Test
+ void testCloseClosesAllOpenedHolders()
+ {
+ FakeCursorHolder a = new FakeCursorHolder(List.of("a1"));
+ FakeCursorHolder b = new FakeCursorHolder(List.of("b1"));
+
+ ClusteringColumnSelectorFactory wrapper = new ClusteringColumnSelectorFactory(
+ new FakeFactory(List.of(), new int[]{0}),
+ CLUSTER_SIGNATURE,
+ new Object[]{"x"}
+ );
+
+ ConcatenatingCursor c = new ConcatenatingCursor(
+ List.of(holderSupplier(a), holderSupplier(b)),
+ List.of(new Object[]{"x"}, new Object[]{"y"}),
+ wrapper
+ );
+
+ // Walk through, opens both holders.
+ c.getColumnSelectorFactory();
+ c.advance();
+ c.advance(); // exhausts
+ Assertions.assertDoesNotThrow(closer::close);
+ Assertions.assertTrue(a.closed);
+ Assertions.assertTrue(b.closed);
+ }
+
+ @Test
+ void testGroupsAreOpenedLazilyOnTransitionNotEagerly()
+ {
+ final boolean[] secondOpened = {false};
+ FakeCursorHolder first = new FakeCursorHolder(List.of("x"));
+
+ ClusteringColumnSelectorFactory wrapper = new ClusteringColumnSelectorFactory(
+ new FakeFactory(List.of(), new int[]{0}),
+ CLUSTER_SIGNATURE,
+ new Object[]{"a"}
+ );
+
+ List> suppliers = new ArrayList<>();
+ suppliers.add(holderSupplier(first));
+ suppliers.add(() -> {
+ secondOpened[0] = true;
+ return new FakeCursorHolder(List.of("y"));
+ });
+
+ ConcatenatingCursor c = new ConcatenatingCursor(
+ suppliers,
+ List.of(new Object[]{"a"}, new Object[]{"b"}),
+ wrapper
+ );
+
+ // Init opens first group only. Second is still untouched.
+ c.getColumnSelectorFactory();
+ Assertions.assertFalse(secondOpened[0]);
+
+ c.advance(); // exhausts first → opens second
+ Assertions.assertTrue(secondOpened[0]);
+ }
+
+ @Test
+ void testResetReIteratesWithoutClosingHolders()
+ {
+ // reset() must not close per-group holders, those are owned by the outer Closer. After reset, the cursor
+ // should re-iterate from the start; the suppliers are memoized so they return the same holders, and
+ // CursorHolder.asCursor() returns a fresh cursor each call.
+ FakeCursorHolder a = new FakeCursorHolder(List.of("a1", "a2"));
+ FakeCursorHolder b = new FakeCursorHolder(List.of("b1"));
+
+ ClusteringColumnSelectorFactory wrapper = new ClusteringColumnSelectorFactory(
+ new FakeFactory(List.of(), new int[]{0}),
+ CLUSTER_SIGNATURE,
+ new Object[]{"acme"}
+ );
+
+ ConcatenatingCursor c = new ConcatenatingCursor(
+ List.of(holderSupplier(a), holderSupplier(b)),
+ List.of(new Object[]{"acme"}, new Object[]{"globex"}),
+ wrapper
+ );
+
+ ColumnValueSelector tenant = c.getColumnSelectorFactory().makeColumnValueSelector("tenant");
+ ColumnValueSelector metric = c.getColumnSelectorFactory().makeColumnValueSelector("metric");
+
+ // Walk to exhaustion.
+ Assertions.assertEquals("a1", metric.getObject());
+ c.advance();
+ Assertions.assertEquals("a2", metric.getObject());
+ c.advance();
+ Assertions.assertEquals("b1", metric.getObject());
+ c.advance();
+ Assertions.assertTrue(c.isDone());
+
+ // Holders MUST remain open across reset; only the outer Closer closes them.
+ Assertions.assertFalse(a.closed);
+ Assertions.assertFalse(b.closed);
+
+ c.reset();
+
+ // Re-walk from the start, exercising both groups again.
+ Assertions.assertFalse(c.isDone());
+ Assertions.assertEquals("acme", tenant.getObject());
+ Assertions.assertEquals("a1", metric.getObject());
+ c.advance();
+ Assertions.assertEquals("a2", metric.getObject());
+ c.advance();
+ Assertions.assertEquals("globex", tenant.getObject());
+ Assertions.assertEquals("b1", metric.getObject());
+ c.advance();
+ Assertions.assertTrue(c.isDone());
+ }
+
+ private Supplier holderSupplier(FakeCursorHolder h)
+ {
+ return Suppliers.memoize(() -> closer.register(h))::get;
+ }
+
+ private static class FakeStringValueSelector implements ColumnValueSelector
+ {
+ private final List rows;
+ private final int[] offset;
+
+ FakeStringValueSelector(List rows, int[] offset)
+ {
+ this.rows = rows;
+ this.offset = offset;
+ }
+
+ @Override
+ public double getDouble()
+ {
+ return 0;
+ }
+
+ @Override
+ public float getFloat()
+ {
+ return 0;
+ }
+
+ @Override
+ public long getLong()
+ {
+ return 0;
+ }
+
+ @Override
+ public boolean isNull()
+ {
+ return rows.get(offset[0]) == null;
+ }
+
+ @Nullable
+ @Override
+ public Object getObject()
+ {
+ return rows.get(offset[0]);
+ }
+
+ @Override
+ public Class> classOfObject()
+ {
+ return String.class;
+ }
+
+ @Override
+ public void inspectRuntimeShape(RuntimeShapeInspector inspector)
+ {
+ }
+ }
+
+ private static class FakeFactory implements ColumnSelectorFactory
+ {
+ private final List metricRows;
+ private final int[] offset;
+
+ FakeFactory(List metricRows, int[] offset)
+ {
+ this.metricRows = metricRows;
+ this.offset = offset;
+ }
+
+ @Override
+ public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec)
+ {
+ throw new UnsupportedOperationException("not used in these tests");
+ }
+
+ @Override
+ public ColumnValueSelector makeColumnValueSelector(String columnName)
+ {
+ if ("metric".equals(columnName)) {
+ return new FakeStringValueSelector(metricRows, offset);
+ }
+ return NilColumnValueSelector.instance();
+ }
+
+ @Nullable
+ @Override
+ public ColumnCapabilities getColumnCapabilities(String column)
+ {
+ return null;
+ }
+ }
+
+ private static class FakeCursor implements Cursor
+ {
+ private final List rows;
+ private final int[] offset = new int[]{0};
+ private final FakeFactory factory;
+
+ FakeCursor(List rows)
+ {
+ this.rows = rows;
+ this.factory = new FakeFactory(rows, offset);
+ }
+
+ @Override
+ public ColumnSelectorFactory getColumnSelectorFactory()
+ {
+ return factory;
+ }
+
+ @Override
+ public void advance()
+ {
+ offset[0]++;
+ }
+
+ @Override
+ public void advanceUninterruptibly()
+ {
+ offset[0]++;
+ }
+
+ @Override
+ public boolean isDone()
+ {
+ return offset[0] >= rows.size();
+ }
+
+ @Override
+ public boolean isDoneOrInterrupted()
+ {
+ return isDone();
+ }
+
+ @Override
+ public void reset()
+ {
+ offset[0] = 0;
+ }
+ }
+
+ private static class FakeCursorHolder implements CursorHolder
+ {
+ private final List rows;
+ private boolean closed;
+
+ FakeCursorHolder(List rows)
+ {
+ this.rows = rows;
+ }
+
+ @Override
+ public Cursor asCursor()
+ {
+ return new FakeCursor(rows);
+ }
+
+ @Override
+ public boolean canVectorize()
+ {
+ return false;
+ }
+
+ @Override
+ public List getOrdering()
+ {
+ return List.of();
+ }
+
+ @Override
+ public void close()
+ {
+ closed = true;
+ }
+ }
+}
diff --git a/processing/src/test/java/org/apache/druid/segment/QueryableIndexCursorFactoryClusteredTest.java b/processing/src/test/java/org/apache/druid/segment/QueryableIndexCursorFactoryClusteredTest.java
new file mode 100644
index 000000000000..881ddae87720
--- /dev/null
+++ b/processing/src/test/java/org/apache/druid/segment/QueryableIndexCursorFactoryClusteredTest.java
@@ -0,0 +1,340 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment;
+
+import org.apache.druid.collections.bitmap.BitmapFactory;
+import org.apache.druid.collections.bitmap.RoaringBitmapFactory;
+import org.apache.druid.java.util.common.Intervals;
+import org.apache.druid.query.OrderBy;
+import org.apache.druid.query.aggregation.AggregatorFactory;
+import org.apache.druid.query.aggregation.CountAggregatorFactory;
+import org.apache.druid.segment.column.BaseColumnHolder;
+import org.apache.druid.segment.column.ColumnCapabilities;
+import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
+import org.apache.druid.segment.column.ColumnHolder;
+import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.column.RowSignature;
+import org.apache.druid.segment.column.ValueType;
+import org.apache.druid.segment.data.Indexed;
+import org.apache.druid.segment.data.ListIndexed;
+import org.apache.druid.segment.projections.ClusterGroupSchemaTestHelpers;
+import org.apache.druid.segment.projections.ClusteredValueGroupsBaseTableSchema;
+import org.apache.druid.segment.projections.TableClusterGroupSpec;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import javax.annotation.Nullable;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.function.Function;
+
+class QueryableIndexCursorFactoryClusteredTest
+{
+ /**
+ * Build a clustered summary from typed tenant values. The helper derives dictionaries and dictionary-IDs for each
+ * group; the resulting specs are nested in the summary and wired via setSummary inside the schema constructor.
+ */
+ private static ClusteredValueGroupsBaseTableSchema summary(String... tenants)
+ {
+ final RowSignature clustering = RowSignature.builder().add("tenant", ColumnType.STRING).build();
+ final List> tuples = new java.util.ArrayList<>(tenants.length);
+ for (String t : tenants) {
+ tuples.add(java.util.Arrays.asList(t));
+ }
+ final ClusterGroupSchemaTestHelpers.Built built = ClusterGroupSchemaTestHelpers.buildClusterGroups(
+ clustering,
+ tuples
+ );
+ return new ClusteredValueGroupsBaseTableSchema(
+ VirtualColumns.EMPTY,
+ List.of("tenant", ColumnHolder.TIME_COLUMN_NAME, "metric"),
+ new AggregatorFactory[]{new CountAggregatorFactory("count")},
+ List.of(OrderBy.ascending("tenant"), OrderBy.ascending(ColumnHolder.TIME_COLUMN_NAME)),
+ clustering,
+ null,
+ built.dictionaries(),
+ built.specs()
+ );
+ }
+
+ /**
+ * Bare-bones {@link QueryableIndex} that exposes the clustered cluster summary + groups via the interface
+ * defaults, plus a stubbed {@link #getClusterGroupQueryableIndex} that returns a fake group sub-index whose
+ * column capabilities are programmable. Used to drive {@link QueryableIndexCursorFactory#getRowSignature} and
+ * {@link QueryableIndexCursorFactory#getColumnCapabilities} for clustered segments without standing up a real
+ * V10 segment file.
+ */
+ private static class FakeClusteredIndex implements QueryableIndex
+ {
+ private final ClusteredValueGroupsBaseTableSchema summary;
+ private final Function dataColumnCaps;
+
+ FakeClusteredIndex(
+ ClusteredValueGroupsBaseTableSchema summary,
+ Function dataColumnCaps
+ )
+ {
+ this.summary = summary;
+ this.dataColumnCaps = dataColumnCaps;
+ }
+
+ @Override
+ public org.joda.time.Interval getDataInterval()
+ {
+ return Intervals.of("2025-01-01/2025-01-02");
+ }
+
+ @Override
+ public int getNumRows()
+ {
+ return 0;
+ }
+
+ @Override
+ public Indexed getAvailableDimensions()
+ {
+ return new ListIndexed<>(List.of());
+ }
+
+ @Override
+ public BitmapFactory getBitmapFactoryForDimensions()
+ {
+ return new RoaringBitmapFactory();
+ }
+
+ @Nullable
+ @Override
+ public Metadata getMetadata()
+ {
+ return null;
+ }
+
+ @Override
+ public Map getDimensionHandlers()
+ {
+ return Collections.emptyMap();
+ }
+
+ @Override
+ public List getColumnNames()
+ {
+ return Collections.emptyList();
+ }
+
+ @Nullable
+ @Override
+ public BaseColumnHolder getColumnHolder(String columnName)
+ {
+ return null;
+ }
+
+ @Override
+ public List getOrdering()
+ {
+ return summary.getOrdering();
+ }
+
+ @Override
+ public void close()
+ {
+ }
+
+ @Nullable
+ @Override
+ public ClusteredValueGroupsBaseTableSchema getClusteredBaseSummary()
+ {
+ return summary;
+ }
+
+ @Override
+ public List getClusterGroupSchemas()
+ {
+ return summary.getClusterGroups();
+ }
+
+ @Nullable
+ @Override
+ public QueryableIndex getClusterGroupQueryableIndex(TableClusterGroupSpec groupSpec)
+ {
+ // Return a fake sub-index that answers getColumnCapabilities from the supplied function. Other methods
+ // throw if reached so we'd notice if the cursor factory's clustered paths start needing more.
+ return new QueryableIndex()
+ {
+ @Override
+ public org.joda.time.Interval getDataInterval()
+ {
+ return Intervals.of("2025-01-01/2025-01-02");
+ }
+
+ @Override
+ public int getNumRows()
+ {
+ return 0;
+ }
+
+ @Override
+ public Indexed getAvailableDimensions()
+ {
+ return new ListIndexed<>(List.of());
+ }
+
+ @Override
+ public BitmapFactory getBitmapFactoryForDimensions()
+ {
+ return new RoaringBitmapFactory();
+ }
+
+ @Nullable
+ @Override
+ public Metadata getMetadata()
+ {
+ return null;
+ }
+
+ @Override
+ public Map getDimensionHandlers()
+ {
+ return Collections.emptyMap();
+ }
+
+ @Override
+ public List getColumnNames()
+ {
+ return Collections.emptyList();
+ }
+
+ @Nullable
+ @Override
+ public BaseColumnHolder getColumnHolder(String columnName)
+ {
+ return null;
+ }
+
+ @Nullable
+ @Override
+ public ColumnCapabilities getColumnCapabilities(String column)
+ {
+ return dataColumnCaps.apply(column);
+ }
+
+ @Override
+ public List getOrdering()
+ {
+ return Collections.emptyList();
+ }
+
+ @Override
+ public void close()
+ {
+ }
+ };
+ }
+ }
+
+ private static ColumnCapabilities longCaps()
+ {
+ return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ColumnType.LONG);
+ }
+
+ private static ColumnCapabilities timeCaps()
+ {
+ return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ColumnType.LONG);
+ }
+
+ @Test
+ void testGetRowSignatureSourcesClusteringColumnFromSummaryAndDataColumnFromGroup()
+ {
+ final FakeClusteredIndex index = new FakeClusteredIndex(
+ summary("acme", "globex"),
+ col -> {
+ if (ColumnHolder.TIME_COLUMN_NAME.equals(col)) {
+ return timeCaps();
+ }
+ if ("metric".equals(col)) {
+ return longCaps();
+ }
+ return null;
+ }
+ );
+
+ QueryableIndexCursorFactory factory = new QueryableIndexCursorFactory(
+ index,
+ QueryableIndexTimeBoundaryInspector.create(index)
+ );
+
+ RowSignature sig = factory.getRowSignature();
+ Assertions.assertEquals(ColumnType.STRING, sig.getColumnType("tenant").orElseThrow());
+ Assertions.assertEquals(ColumnType.LONG, sig.getColumnType(ColumnHolder.TIME_COLUMN_NAME).orElseThrow());
+ Assertions.assertEquals(ColumnType.LONG, sig.getColumnType("metric").orElseThrow());
+ }
+
+ @Test
+ void testGetColumnCapabilitiesForClusteringColumnFromSummary()
+ {
+ final FakeClusteredIndex index = new FakeClusteredIndex(
+ summary("acme"),
+ col -> null
+ );
+
+ QueryableIndexCursorFactory factory = new QueryableIndexCursorFactory(
+ index,
+ QueryableIndexTimeBoundaryInspector.create(index)
+ );
+
+ ColumnCapabilities tenantCaps = factory.getColumnCapabilities("tenant");
+ Assertions.assertNotNull(tenantCaps);
+ Assertions.assertTrue(tenantCaps.is(ValueType.STRING));
+ }
+
+ @Test
+ void testGetColumnCapabilitiesForDataColumnFromFirstGroup()
+ {
+ final FakeClusteredIndex index = new FakeClusteredIndex(
+ summary("acme", "globex"),
+ col -> "metric".equals(col) ? longCaps() : null
+ );
+
+ QueryableIndexCursorFactory factory = new QueryableIndexCursorFactory(
+ index,
+ QueryableIndexTimeBoundaryInspector.create(index)
+ );
+
+ ColumnCapabilities metricCaps = factory.getColumnCapabilities("metric");
+ Assertions.assertNotNull(metricCaps);
+ Assertions.assertTrue(metricCaps.is(ValueType.LONG));
+ }
+
+ @Test
+ void testGetColumnCapabilitiesForUnknownColumnIsNull()
+ {
+ final FakeClusteredIndex index = new FakeClusteredIndex(
+ summary("acme"),
+ col -> null
+ );
+
+ QueryableIndexCursorFactory factory = new QueryableIndexCursorFactory(
+ index,
+ QueryableIndexTimeBoundaryInspector.create(index)
+ );
+
+ Assertions.assertNull(factory.getColumnCapabilities("unknown"));
+ }
+}
diff --git a/processing/src/test/java/org/apache/druid/segment/SimpleQueryableIndexClusteredTest.java b/processing/src/test/java/org/apache/druid/segment/SimpleQueryableIndexClusteredTest.java
new file mode 100644
index 000000000000..8860c08e2686
--- /dev/null
+++ b/processing/src/test/java/org/apache/druid/segment/SimpleQueryableIndexClusteredTest.java
@@ -0,0 +1,196 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment;
+
+import com.google.common.base.Supplier;
+import org.apache.druid.collections.bitmap.RoaringBitmapFactory;
+import org.apache.druid.java.util.common.Intervals;
+import org.apache.druid.query.OrderBy;
+import org.apache.druid.query.aggregation.AggregatorFactory;
+import org.apache.druid.query.aggregation.CountAggregatorFactory;
+import org.apache.druid.segment.column.BaseColumnHolder;
+import org.apache.druid.segment.column.ColumnHolder;
+import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.column.RowSignature;
+import org.apache.druid.segment.data.ListIndexed;
+import org.apache.druid.segment.projections.ClusterGroupSchemaTestHelpers;
+import org.apache.druid.segment.projections.ClusteredValueGroupsBaseTableSchema;
+import org.apache.druid.segment.projections.TableClusterGroupSpec;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Coverage for {@link SimpleQueryableIndex}'s clustered base table awareness: the constructor accepts an empty
+ * columns map for clustered segments, and the cluster-aware helpers surface the summary and per-group schemas that
+ * {@link org.apache.druid.segment.IndexIO}'s V10 loader collects from a clustered segment.
+ */
+class SimpleQueryableIndexClusteredTest
+{
+ /**
+ * Build a one-column STRING-clustered summary plus its specs from typed tenant values. Returns the helper's
+ * {@link ClusterGroupSchemaTestHelpers.Built} bundle plus the constructed schema (whose {@code clusterGroups} are
+ * the same spec instances as {@code built.specs()}, wired via setSummary by the schema constructor).
+ */
+ private static ClusterGroupSchemaTestHelpers.Built summary(String... tenants)
+ {
+ final RowSignature clustering = RowSignature.builder().add("tenant", ColumnType.STRING).build();
+ final List> tuples = new java.util.ArrayList<>(tenants.length);
+ for (String t : tenants) {
+ tuples.add(java.util.Arrays.asList(t));
+ }
+ final ClusterGroupSchemaTestHelpers.Built built = ClusterGroupSchemaTestHelpers.buildClusterGroups(
+ clustering,
+ tuples
+ );
+ new ClusteredValueGroupsBaseTableSchema(
+ VirtualColumns.EMPTY,
+ List.of("tenant", ColumnHolder.TIME_COLUMN_NAME, "region", "metric"),
+ new AggregatorFactory[]{new CountAggregatorFactory("count")},
+ List.of(
+ OrderBy.ascending("tenant"),
+ OrderBy.ascending(ColumnHolder.TIME_COLUMN_NAME),
+ OrderBy.ascending("region")
+ ),
+ clustering,
+ null,
+ built.dictionaries(),
+ built.specs()
+ );
+ return built;
+ }
+
+ private static ClusteredValueGroupsBaseTableSchema summarySchema(String... tenants)
+ {
+ return summary(tenants).specs().get(0).getSummary();
+ }
+
+ private static SimpleQueryableIndex buildClusteredIndex(
+ ClusteredValueGroupsBaseTableSchema summary,
+ List>> clusterGroupColumns,
+ Map>> projectionColumns
+ )
+ {
+ final Metadata reconstructed = summary.asMetadata(null);
+ return new SimpleQueryableIndex(
+ Intervals.of("2025-01-01/2025-01-02"),
+ new ListIndexed<>(List.of()),
+ new RoaringBitmapFactory(),
+ Map.of(), // clustered summary has no top-level columns
+ null, // no SegmentFileMapper for in-memory test
+ reconstructed,
+ projectionColumns,
+ summary,
+ clusterGroupColumns
+ )
+ {
+ @Override
+ public Metadata getMetadata()
+ {
+ return reconstructed;
+ }
+
+ @Override
+ public int getNumRows()
+ {
+ return 0;
+ }
+ };
+ }
+
+ @Test
+ void testConstructorAcceptsEmptyColumnsForClusteredSegment()
+ {
+ // Pre-Phase-2 the constructor would NPE here because __time isn't in the columns map.
+ Assertions.assertDoesNotThrow(
+ () -> buildClusteredIndex(summarySchema("acme"), List.of(Map.of()), Map.of())
+ );
+ }
+
+ @Test
+ void testGetClusteredBaseSummaryReturnsSummary()
+ {
+ final ClusteredValueGroupsBaseTableSchema s = summarySchema("acme");
+ SimpleQueryableIndex index = buildClusteredIndex(s, List.of(Map.of()), Map.of());
+ Assertions.assertSame(s, index.getClusteredBaseSummary());
+ }
+
+ @Test
+ void testGetClusterGroupSchemasReturnsGroupsInOrder()
+ {
+ final ClusterGroupSchemaTestHelpers.Built built = summary("acme", "globex");
+ final ClusteredValueGroupsBaseTableSchema sum = built.specs().get(0).getSummary();
+ SimpleQueryableIndex index = buildClusteredIndex(sum, List.of(Map.of(), Map.of()), Map.of());
+ List result = index.getClusterGroupSchemas();
+ Assertions.assertEquals(2, result.size());
+ Assertions.assertSame(built.specs().get(0), result.get(0));
+ Assertions.assertSame(built.specs().get(1), result.get(1));
+ }
+
+ @Test
+ void testNonClusteredIndexHasNullSummaryAndEmptyGroups()
+ {
+ // Use the no-cluster-args constructor variant directly via the overload that takes no cluster params.
+ SimpleQueryableIndex index = new SimpleQueryableIndex(
+ Intervals.of("2025-01-01/2025-01-02"),
+ new ListIndexed<>(List.of()),
+ new RoaringBitmapFactory(),
+ Map.of(),
+ null,
+ null,
+ null
+ )
+ {
+ @Override
+ public Metadata getMetadata()
+ {
+ return null;
+ }
+
+ @Override
+ public int getNumRows()
+ {
+ return 0;
+ }
+ };
+
+ Assertions.assertNull(index.getClusteredBaseSummary());
+ Assertions.assertTrue(index.getClusterGroupSchemas().isEmpty());
+ }
+
+ @Test
+ void testGetProjectionQueryableIndexResolvesAggregatesNotClusterGroups()
+ {
+ // Sanity: cluster groups don't pollute the aggregate-projection map. getProjection(spec) is the
+ // aggregate-only path; cluster-group dispatch is QueryableIndexCursorFactory's job.
+ SimpleQueryableIndex index = buildClusteredIndex(
+ summarySchema("acme"),
+ List.of(Map.of()),
+ Map.of()
+ );
+ // No aggregate projections were added, so no aggregate by name "tenant=acme" exists either.
+ Assertions.assertThrows(
+ NullPointerException.class,
+ () -> index.getProjectionQueryableIndex("does-not-exist")
+ );
+ }
+}
diff --git a/processing/src/test/java/org/apache/druid/segment/projections/ClusterGroupSchemaTestHelpers.java b/processing/src/test/java/org/apache/druid/segment/projections/ClusterGroupSchemaTestHelpers.java
new file mode 100644
index 000000000000..73e921f19181
--- /dev/null
+++ b/processing/src/test/java/org/apache/druid/segment/projections/ClusterGroupSchemaTestHelpers.java
@@ -0,0 +1,187 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.projections;
+
+import org.apache.druid.error.DruidException;
+import org.apache.druid.segment.DimensionHandlerUtils;
+import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.column.NullableTypeStrategy;
+import org.apache.druid.segment.column.RowSignature;
+import org.apache.druid.segment.column.ValueType;
+
+import javax.annotation.Nullable;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.EnumMap;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeSet;
+
+/**
+ * Test-only utility: build {@link ClusteredValueGroupsBaseTableSchema} inputs from typed clustering tuples.
+ * Derives the per-type dictionaries + per-spec dictionary IDs that the writer will emit at ingest time.
+ */
+public final class ClusterGroupSchemaTestHelpers
+{
+ private ClusterGroupSchemaTestHelpers()
+ {
+ }
+
+ /** Bundle to pass into the {@link ClusteredValueGroupsBaseTableSchema} constructor's last two args. */
+ public record Built(ClusteringDictionaries dictionaries, List specs)
+ {
+ }
+
+ /**
+ * From typed clustering tuples (one per cluster group), build the per-type dictionaries and the matching specs.
+ */
+ public static Built buildClusterGroups(
+ RowSignature clusteringColumns,
+ List extends List>> tuples
+ )
+ {
+ final int numCols = clusteringColumns.size();
+
+ // 1. Coerce each tuple's raw values to the typed primitive expected per column.
+ final List coercedTuples = new ArrayList<>(tuples.size());
+ for (List> tuple : tuples) {
+ if (tuple == null) {
+ throw DruidException.defensive("tuple must not be null");
+ }
+ if (tuple.size() != numCols) {
+ throw DruidException.defensive(
+ "tuple size [%s] must match clusteringColumns size [%s]",
+ tuple.size(),
+ numCols
+ );
+ }
+ final Object[] coerced = new Object[numCols];
+ for (int i = 0; i < numCols; i++) {
+ coerced[i] = coerceClusterGroupValue(
+ clusteringColumns.getColumnName(i),
+ clusteringColumns.getColumnType(i).orElseThrow(),
+ tuple.get(i)
+ );
+ }
+ coercedTuples.add(coerced);
+ }
+
+ // 2. Build per-type dictionaries; TreeSet on the type's strategy gives sort + dedupe in one shot.
+ final Map> perType = new EnumMap<>(ValueType.class);
+ for (int i = 0; i < numCols; i++) {
+ final ColumnType colType = clusteringColumns.getColumnType(i).orElseThrow();
+ perType.computeIfAbsent(typeKey(colType), k -> newTypedTreeSet(colType));
+ }
+ for (Object[] coerced : coercedTuples) {
+ for (int i = 0; i < numCols; i++) {
+ final ColumnType colType = clusteringColumns.getColumnType(i).orElseThrow();
+ perType.get(typeKey(colType)).add(coerced[i]);
+ }
+ }
+ final List stringDict = materialize(perType.get(ValueType.STRING));
+ final List longDict = materialize(perType.get(ValueType.LONG));
+ final List doubleDict = materialize(perType.get(ValueType.DOUBLE));
+ final List floatDict = materialize(perType.get(ValueType.FLOAT));
+ final ClusteringDictionaries dictionaries =
+ new ClusteringDictionaries(stringDict, longDict, doubleDict, floatDict);
+
+ // 3. For each tuple, look each value up in its column type's dictionary to derive the ID list.
+ final List specs = new ArrayList<>(coercedTuples.size());
+ for (Object[] coerced : coercedTuples) {
+ final List ids = new ArrayList<>(numCols);
+ for (int i = 0; i < numCols; i++) {
+ final ColumnType colType = clusteringColumns.getColumnType(i).orElseThrow();
+ @SuppressWarnings({"unchecked", "rawtypes"})
+ final int id = Collections.binarySearch(
+ (List) dictionaries.dictionaryForType(colType),
+ coerced[i],
+ (NullableTypeStrategy) colType.getNullableStrategy()
+ );
+ if (id < 0) {
+ throw DruidException.defensive(
+ "value [%s] not found in dictionary for clustering column [%s]; this is a bug in the helper",
+ coerced[i],
+ clusteringColumns.getColumnName(i)
+ );
+ }
+ ids.add(id);
+ }
+ specs.add(new TableClusterGroupSpec(ids, null));
+ }
+
+ return new Built(dictionaries, Collections.unmodifiableList(specs));
+ }
+
+ private static ValueType typeKey(ColumnType columnType)
+ {
+ return columnType.getType();
+ }
+
+ @SuppressWarnings({"unchecked", "rawtypes"})
+ private static TreeSet newTypedTreeSet(ColumnType columnType)
+ {
+ return new TreeSet<>((NullableTypeStrategy) columnType.getNullableStrategy());
+ }
+
+ @SuppressWarnings("unchecked")
+ private static List materialize(TreeSet set)
+ {
+ if (set == null || set.isEmpty()) {
+ return List.of();
+ }
+ final List out = new ArrayList<>(set.size());
+ for (Object v : set) {
+ out.add((T) v);
+ }
+ return Collections.unmodifiableList(out);
+ }
+
+ /**
+ * Coerce a raw value (from JSON or test code) to the native Java type for {@code columnType}: STRING → String,
+ * LONG → Long, DOUBLE → Double, FLOAT → Float. Null passes through; unsupported column types are rejected.
+ */
+ @Nullable
+ public static Object coerceClusterGroupValue(
+ String columnName,
+ @Nullable ColumnType columnType,
+ @Nullable Object raw
+ )
+ {
+ if (!Projections.isAllowedClusteringType(columnType)) {
+ throw DruidException.defensive(
+ "clustering column [%s] has unsupported type [%s]; allowed types are STRING, LONG, DOUBLE, FLOAT",
+ columnName,
+ columnType
+ );
+ }
+ if (raw == null) {
+ return null;
+ }
+ if (columnType.is(ValueType.STRING)) {
+ return DimensionHandlerUtils.convertObjectToString(raw);
+ } else if (columnType.is(ValueType.LONG)) {
+ return DimensionHandlerUtils.convertObjectToLong(raw);
+ } else if (columnType.is(ValueType.DOUBLE)) {
+ return DimensionHandlerUtils.convertObjectToDouble(raw);
+ } else {
+ return DimensionHandlerUtils.convertObjectToFloat(raw);
+ }
+ }
+}
diff --git a/processing/src/test/java/org/apache/druid/segment/projections/ClusteredValueGroupsBaseTableSchemaTest.java b/processing/src/test/java/org/apache/druid/segment/projections/ClusteredValueGroupsBaseTableSchemaTest.java
new file mode 100644
index 000000000000..67d664e3622c
--- /dev/null
+++ b/processing/src/test/java/org/apache/druid/segment/projections/ClusteredValueGroupsBaseTableSchemaTest.java
@@ -0,0 +1,391 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.projections;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import nl.jqno.equalsverifier.EqualsVerifier;
+import org.apache.druid.error.DruidException;
+import org.apache.druid.query.OrderBy;
+import org.apache.druid.query.aggregation.AggregatorFactory;
+import org.apache.druid.query.aggregation.CountAggregatorFactory;
+import org.apache.druid.query.aggregation.LongSumAggregatorFactory;
+import org.apache.druid.segment.TestHelper;
+import org.apache.druid.segment.VirtualColumns;
+import org.apache.druid.segment.column.ColumnHolder;
+import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.column.RowSignature;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+
+class ClusteredValueGroupsBaseTableSchemaTest
+{
+ private static final ObjectMapper JSON_MAPPER = TestHelper.makeJsonMapper();
+ private static final String TENANT_COL = "tenant";
+ private static final String REGION_COL = "region";
+ private static final String PRIORITY_COL = "priority";
+ private static final String METRIC_COL = "metric";
+ private static final RowSignature TENANT_SIGNATURE = RowSignature.builder()
+ .add(TENANT_COL, ColumnType.STRING)
+ .build();
+
+ @Test
+ void testSerdeMultiColumnClustering() throws JsonProcessingException
+ {
+ ClusteredValueGroupsBaseTableSchema schema = newSchema(
+ RowSignature.builder()
+ .add(TENANT_COL, ColumnType.STRING)
+ .add(PRIORITY_COL, ColumnType.LONG)
+ .build()
+ );
+
+ Assertions.assertEquals(
+ schema,
+ JSON_MAPPER.readValue(JSON_MAPPER.writeValueAsString(schema), ProjectionSchema.class)
+ );
+ }
+
+ @Test
+ void testColumnNamesIncludeClusteringColumnsAndAggregators()
+ {
+ // Summary carries the full logical signature, including clustering columns.
+ Assertions.assertEquals(
+ List.of(ColumnHolder.TIME_COLUMN_NAME, TENANT_COL, PRIORITY_COL, REGION_COL, METRIC_COL, "count", "c"),
+ newSchema(TENANT_SIGNATURE).getColumnNames()
+ );
+ }
+
+ @Test
+ void testDimensionNamesIncludeClusteringColumnsExceptTime()
+ {
+ // Summary's dimension names exclude __time but keep clustering columns (they're logical dims).
+ Assertions.assertEquals(
+ List.of(TENANT_COL, PRIORITY_COL, REGION_COL, METRIC_COL),
+ newSchema(TENANT_SIGNATURE).getDimensionNames()
+ );
+ }
+
+ @Test
+ void testNullColumnsRejected()
+ {
+ Throwable t = Assertions.assertThrows(
+ DruidException.class,
+ () -> new ClusteredValueGroupsBaseTableSchema(
+ null,
+ null,
+ null,
+ List.of(OrderBy.ascending(ColumnHolder.TIME_COLUMN_NAME)),
+ TENANT_SIGNATURE,
+ null,
+ null,
+ null
+ )
+ );
+ Assertions.assertEquals(
+ "clustered base table schema columns must not be null or empty",
+ t.getMessage()
+ );
+ }
+
+ @Test
+ void testNullOrderingRejected()
+ {
+ Throwable t = Assertions.assertThrows(
+ DruidException.class,
+ () -> new ClusteredValueGroupsBaseTableSchema(
+ null,
+ List.of(ColumnHolder.TIME_COLUMN_NAME, TENANT_COL),
+ null,
+ null,
+ TENANT_SIGNATURE,
+ null,
+ null,
+ null
+ )
+ );
+ Assertions.assertEquals(
+ "clustered base table schema ordering must not be null",
+ t.getMessage()
+ );
+ }
+
+ @Test
+ void testEmptyClusteringColumnsRejected()
+ {
+ Throwable t = Assertions.assertThrows(
+ DruidException.class,
+ () -> new ClusteredValueGroupsBaseTableSchema(
+ null,
+ List.of(ColumnHolder.TIME_COLUMN_NAME, TENANT_COL),
+ null,
+ List.of(OrderBy.ascending(ColumnHolder.TIME_COLUMN_NAME)),
+ RowSignature.empty(),
+ null,
+ null,
+ null
+ )
+ );
+ Assertions.assertEquals(
+ "clustered base table schema clusteringColumns must not be null or empty",
+ t.getMessage()
+ );
+ }
+
+ @Test
+ void testClusteringColumnMustAppearInColumns()
+ {
+ Throwable t = Assertions.assertThrows(
+ DruidException.class,
+ () -> new ClusteredValueGroupsBaseTableSchema(
+ null,
+ List.of(ColumnHolder.TIME_COLUMN_NAME, TENANT_COL),
+ null,
+ List.of(OrderBy.ascending(ColumnHolder.TIME_COLUMN_NAME)),
+ RowSignature.builder().add("unknown", ColumnType.STRING).build(),
+ null,
+ null,
+ null
+ )
+ );
+ Assertions.assertEquals(
+ "clusteringColumn [unknown] must appear in columns of the clustered base table summary",
+ t.getMessage()
+ );
+ }
+
+ @Test
+ void testUnsupportedClusteringTypeRejected()
+ {
+ Throwable t = Assertions.assertThrows(
+ DruidException.class,
+ () -> new ClusteredValueGroupsBaseTableSchema(
+ null,
+ List.of(ColumnHolder.TIME_COLUMN_NAME, "tenants"),
+ null,
+ List.of(OrderBy.ascending(ColumnHolder.TIME_COLUMN_NAME)),
+ RowSignature.builder().add("tenants", ColumnType.STRING_ARRAY).build(),
+ null,
+ null,
+ null
+ )
+ );
+ Assertions.assertEquals(
+ "clustering column [tenants] has unsupported type [ARRAY]; "
+ + "allowed types are STRING, LONG, DOUBLE, FLOAT",
+ t.getMessage()
+ );
+ }
+
+ @Test
+ void testSharedColumnsRoundTrip() throws JsonProcessingException
+ {
+ ClusteredValueGroupsBaseTableSchema schema = new ClusteredValueGroupsBaseTableSchema(
+ VirtualColumns.EMPTY,
+ List.of(ColumnHolder.TIME_COLUMN_NAME, TENANT_COL, REGION_COL),
+ null,
+ List.of(OrderBy.ascending(TENANT_COL), OrderBy.ascending(ColumnHolder.TIME_COLUMN_NAME)),
+ TENANT_SIGNATURE,
+ List.of(REGION_COL),
+ ClusteringDictionaries.EMPTY,
+ null
+ );
+
+ ClusteredValueGroupsBaseTableSchema roundTripped = (ClusteredValueGroupsBaseTableSchema) JSON_MAPPER.readValue(
+ JSON_MAPPER.writeValueAsString(schema),
+ ProjectionSchema.class
+ );
+ Assertions.assertEquals(schema, roundTripped);
+ Assertions.assertEquals(List.of(REGION_COL), roundTripped.getSharedColumns());
+ }
+
+ @Test
+ void testSharedColumnMustAppearInColumns()
+ {
+ Throwable t = Assertions.assertThrows(
+ DruidException.class,
+ () -> new ClusteredValueGroupsBaseTableSchema(
+ null,
+ List.of(ColumnHolder.TIME_COLUMN_NAME, TENANT_COL),
+ null,
+ List.of(OrderBy.ascending(TENANT_COL), OrderBy.ascending(ColumnHolder.TIME_COLUMN_NAME)),
+ TENANT_SIGNATURE,
+ List.of("unknown"),
+ null,
+ null
+ )
+ );
+ Assertions.assertEquals(
+ "sharedColumn [unknown] must appear in columns of the clustered base table summary",
+ t.getMessage()
+ );
+ }
+
+ @Test
+ void testClusteringPrefixOfOrderingRequired()
+ {
+ // The first ordering position is __time, but the clustering column is tenant, must reject.
+ Throwable t = Assertions.assertThrows(
+ DruidException.class,
+ () -> new ClusteredValueGroupsBaseTableSchema(
+ null,
+ List.of(ColumnHolder.TIME_COLUMN_NAME, TENANT_COL),
+ null,
+ List.of(OrderBy.ascending(ColumnHolder.TIME_COLUMN_NAME), OrderBy.ascending(TENANT_COL)),
+ TENANT_SIGNATURE,
+ null,
+ null,
+ null
+ )
+ );
+ Assertions.assertTrue(t.getMessage().contains("clustering columns must form a prefix of the segment ordering"));
+ }
+
+ @Test
+ void testClusteringRequiresOrderingAtLeastAsLongAsClusteringSize()
+ {
+ // Two clustering columns but ordering only has one entry, must reject.
+ Throwable t = Assertions.assertThrows(
+ DruidException.class,
+ () -> new ClusteredValueGroupsBaseTableSchema(
+ null,
+ List.of(ColumnHolder.TIME_COLUMN_NAME, TENANT_COL, REGION_COL),
+ null,
+ List.of(OrderBy.ascending(TENANT_COL)),
+ RowSignature.builder()
+ .add(TENANT_COL, ColumnType.STRING)
+ .add(REGION_COL, ColumnType.STRING)
+ .build(),
+ null,
+ null,
+ null
+ )
+ );
+ Assertions.assertTrue(
+ t.getMessage().contains("ordering size [1] must be at least clusteringColumns size [2]")
+ );
+ }
+
+ @Test
+ void testNullDictionariesDefaultsToEmpty()
+ {
+ // A null clusteringDictionaries is interpreted as "no values yet" (empty per-type dicts); valid for a
+ // freshly-constructed clustered schema that doesn't have any groups defined.
+ final ClusteredValueGroupsBaseTableSchema schema = new ClusteredValueGroupsBaseTableSchema(
+ null,
+ List.of(ColumnHolder.TIME_COLUMN_NAME, TENANT_COL),
+ null,
+ List.of(OrderBy.ascending(TENANT_COL), OrderBy.ascending(ColumnHolder.TIME_COLUMN_NAME)),
+ TENANT_SIGNATURE,
+ null,
+ null,
+ null
+ );
+ Assertions.assertEquals(ClusteringDictionaries.EMPTY, schema.getClusteringDictionaries());
+ }
+
+ @Test
+ void testJsonCreatorAcceptsUnsortedDictionary()
+ {
+ // The trusting JsonCreator path does NOT validate; segments produced by a well-behaved writer have already
+ // passed validation, so re-checking on every load would just burn CPU.
+ final ClusteringDictionaries dicts = new ClusteringDictionaries(List.of("b", "a"), null, null, null);
+ Assertions.assertEquals(List.of("b", "a"), dicts.getStringDictionary());
+ }
+
+ @Test
+ void testGetGroupOrderingDropsClusteringPrefix()
+ {
+ // Single clustering column → group ordering is the rest of the segment ordering.
+ Assertions.assertEquals(
+ List.of(OrderBy.ascending(ColumnHolder.TIME_COLUMN_NAME), OrderBy.ascending(REGION_COL)),
+ newSchema(TENANT_SIGNATURE).getGroupOrdering()
+ );
+
+ // Two clustering columns → both prefix entries are dropped.
+ final RowSignature multi = RowSignature.builder()
+ .add(TENANT_COL, ColumnType.STRING)
+ .add(PRIORITY_COL, ColumnType.LONG)
+ .build();
+ Assertions.assertEquals(
+ List.of(OrderBy.ascending(ColumnHolder.TIME_COLUMN_NAME), OrderBy.ascending(REGION_COL)),
+ newSchema(multi).getGroupOrdering()
+ );
+ }
+
+ @Test
+ void testGetGroupColumnNamesExcludesClusteringColumns()
+ {
+ // Summary's columns are (__time, tenant, priority, region, metric); clustering=tenant → group view drops tenant
+ // (and adds aggregator names: count, c).
+ Assertions.assertEquals(
+ List.of(ColumnHolder.TIME_COLUMN_NAME, PRIORITY_COL, REGION_COL, METRIC_COL, "count", "c"),
+ newSchema(TENANT_SIGNATURE).getGroupColumnNames()
+ );
+ }
+
+ @Test
+ void testGetGroupDimensionNamesExcludesClusteringColumnsAndTime()
+ {
+ // Summary's dimension names are (__time excluded) (tenant, priority, region, metric);
+ // clustering=tenant → drops tenant.
+ Assertions.assertEquals(
+ List.of(PRIORITY_COL, REGION_COL, METRIC_COL),
+ newSchema(TENANT_SIGNATURE).getGroupDimensionNames()
+ );
+ }
+
+ @Test
+ void testEqualsAndHashcode()
+ {
+ EqualsVerifier.forClass(ClusteredValueGroupsBaseTableSchema.class)
+ .withIgnoredFields("timeColumnPosition", "effectiveGranularity")
+ .usingGetClass()
+ .verify();
+ }
+
+ private static ClusteredValueGroupsBaseTableSchema newSchema(RowSignature clusteringColumns)
+ {
+ // clustering columns must be a prefix of the segment ordering. Build the ordering by prefixing every clustering
+ // column in order, then __time and the remaining data columns.
+ final ArrayList ordering = new ArrayList<>(clusteringColumns.size() + 2);
+ for (String col : clusteringColumns.getColumnNames()) {
+ ordering.add(OrderBy.ascending(col));
+ }
+ ordering.add(OrderBy.ascending(ColumnHolder.TIME_COLUMN_NAME));
+ ordering.add(OrderBy.ascending(REGION_COL));
+ // No cluster groups in this fixture; dictionaries are empty (null is also acceptable, defaults to EMPTY).
+ return new ClusteredValueGroupsBaseTableSchema(
+ VirtualColumns.EMPTY,
+ List.of(ColumnHolder.TIME_COLUMN_NAME, TENANT_COL, PRIORITY_COL, REGION_COL, METRIC_COL),
+ new AggregatorFactory[]{
+ new CountAggregatorFactory("count"),
+ new LongSumAggregatorFactory("c", "c")
+ },
+ ordering,
+ clusteringColumns,
+ null,
+ ClusteringDictionaries.EMPTY,
+ null
+ );
+ }
+}
diff --git a/processing/src/test/java/org/apache/druid/segment/projections/ClusteringColumnSelectorFactoryTest.java b/processing/src/test/java/org/apache/druid/segment/projections/ClusteringColumnSelectorFactoryTest.java
new file mode 100644
index 000000000000..03f0b9fc3953
--- /dev/null
+++ b/processing/src/test/java/org/apache/druid/segment/projections/ClusteringColumnSelectorFactoryTest.java
@@ -0,0 +1,385 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.projections;
+
+import org.apache.druid.query.dimension.DefaultDimensionSpec;
+import org.apache.druid.segment.ColumnSelectorFactory;
+import org.apache.druid.segment.ColumnValueSelector;
+import org.apache.druid.segment.DimensionSelector;
+import org.apache.druid.segment.NilColumnValueSelector;
+import org.apache.druid.segment.RowIdSupplier;
+import org.apache.druid.segment.column.ColumnCapabilities;
+import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
+import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.column.RowSignature;
+import org.apache.druid.segment.column.ValueType;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import javax.annotation.Nullable;
+
+class ClusteringColumnSelectorFactoryTest
+{
+ private static final RowSignature SIGNATURE = RowSignature.builder().add("tenant", ColumnType.STRING).build();
+
+ @Test
+ void testStringClusteringColumnDimensionSelector()
+ {
+ RecordingDelegate delegate = new RecordingDelegate();
+ ClusteringColumnSelectorFactory f = new ClusteringColumnSelectorFactory(
+ delegate,
+ SIGNATURE,
+ new Object[]{"acme"}
+ );
+
+ DimensionSelector sel = f.makeDimensionSelector(DefaultDimensionSpec.of("tenant"));
+ Assertions.assertEquals("acme", sel.lookupName(0));
+ Assertions.assertNull(delegate.lastDimSelectorName, "delegate must not be hit for clustering columns");
+ }
+
+ @Test
+ void testStringClusteringColumnValueSelector()
+ {
+ ClusteringColumnSelectorFactory f = new ClusteringColumnSelectorFactory(
+ new RecordingDelegate(),
+ SIGNATURE,
+ new Object[]{"acme"}
+ );
+
+ ColumnValueSelector sel = f.makeColumnValueSelector("tenant");
+ Assertions.assertEquals("acme", sel.getObject());
+ // Non-numeric string: ExprEval reports it as numerically null (matches a real string column read via getLong).
+ Assertions.assertTrue(sel.isNull());
+ }
+
+ @Test
+ void testLongClusteringColumnValueSelectorTypedMethods()
+ {
+ ClusteringColumnSelectorFactory f = new ClusteringColumnSelectorFactory(
+ new RecordingDelegate(),
+ RowSignature.builder().add("priority", ColumnType.LONG).build(),
+ new Object[]{42L}
+ );
+
+ ColumnValueSelector sel = f.makeColumnValueSelector("priority");
+ Assertions.assertEquals(42L, sel.getLong());
+ Assertions.assertEquals(42.0, sel.getDouble());
+ Assertions.assertEquals(42.0f, sel.getFloat());
+ Assertions.assertEquals(42L, sel.getObject());
+ Assertions.assertFalse(sel.isNull());
+ }
+
+ @Test
+ void testDoubleClusteringColumnValueSelector()
+ {
+ ClusteringColumnSelectorFactory f = new ClusteringColumnSelectorFactory(
+ new RecordingDelegate(),
+ RowSignature.builder().add("price", ColumnType.DOUBLE).build(),
+ new Object[]{3.14}
+ );
+
+ ColumnValueSelector sel = f.makeColumnValueSelector("price");
+ Assertions.assertEquals(3.14, sel.getDouble());
+ Assertions.assertEquals(3.14, sel.getObject());
+ }
+
+ @Test
+ void testFloatClusteringColumnValueSelector()
+ {
+ ClusteringColumnSelectorFactory f = new ClusteringColumnSelectorFactory(
+ new RecordingDelegate(),
+ RowSignature.builder().add("ratio", ColumnType.FLOAT).build(),
+ new Object[]{0.5f}
+ );
+
+ ColumnValueSelector sel = f.makeColumnValueSelector("ratio");
+ Assertions.assertEquals(0.5f, sel.getFloat());
+ // ExpressionType collapses FLOAT to DOUBLE, so the unwrapped value is a Double.
+ Assertions.assertEquals(0.5d, sel.getObject());
+ }
+
+ @Test
+ void testNullClusteringValueDimensionSelectorIsNil()
+ {
+ ClusteringColumnSelectorFactory f = new ClusteringColumnSelectorFactory(
+ new RecordingDelegate(),
+ SIGNATURE,
+ new Object[]{null}
+ );
+
+ DimensionSelector sel = f.makeDimensionSelector(DefaultDimensionSpec.of("tenant"));
+ Assertions.assertNull(sel.lookupName(sel.getRow().get(0)));
+ }
+
+ @Test
+ void testNullClusteringValueValueSelectorIsNull()
+ {
+ ClusteringColumnSelectorFactory f = new ClusteringColumnSelectorFactory(
+ new RecordingDelegate(),
+ RowSignature.builder().add("priority", ColumnType.LONG).build(),
+ new Object[]{null}
+ );
+
+ ColumnValueSelector sel = f.makeColumnValueSelector("priority");
+ Assertions.assertTrue(sel.isNull());
+ Assertions.assertNull(sel.getObject());
+ }
+
+ @Test
+ void testNonClusteringDimensionSelectorDelegated()
+ {
+ RecordingDelegate delegate = new RecordingDelegate();
+ ClusteringColumnSelectorFactory f = new ClusteringColumnSelectorFactory(
+ delegate,
+ SIGNATURE,
+ new Object[]{"acme"}
+ );
+
+ // Non-clustering selectors are wrapped in a lazy delegating wrapper; the delegate is only consulted on first
+ // use (this enables ConcatenatingCursor to swap delegates between cluster groups without recreating selectors).
+ DimensionSelector sel = f.makeDimensionSelector(DefaultDimensionSpec.of("region"));
+ Assertions.assertNull(delegate.lastDimSelectorName, "delegate must not be hit until selector is used");
+
+ Assertions.assertEquals("delegated:region", sel.lookupName(0));
+ Assertions.assertEquals("region", delegate.lastDimSelectorName);
+ }
+
+ @Test
+ void testNonClusteringValueSelectorDelegated()
+ {
+ RecordingDelegate delegate = new RecordingDelegate();
+ ClusteringColumnSelectorFactory f = new ClusteringColumnSelectorFactory(
+ delegate,
+ SIGNATURE,
+ new Object[]{"acme"}
+ );
+
+ ColumnValueSelector sel = f.makeColumnValueSelector("region");
+ Assertions.assertNull(delegate.lastValueSelectorName, "delegate must not be hit until selector is used");
+
+ sel.getObject();
+ Assertions.assertEquals("region", delegate.lastValueSelectorName);
+ }
+
+ @Test
+ void testNonClusteringSelectorObservesNewDelegateAfterSetDelegate()
+ {
+ RecordingDelegate first = new RecordingDelegate();
+ ClusteringColumnSelectorFactory f = new ClusteringColumnSelectorFactory(
+ first,
+ SIGNATURE,
+ new Object[]{"acme"}
+ );
+
+ DimensionSelector sel = f.makeDimensionSelector(DefaultDimensionSpec.of("region"));
+ Assertions.assertEquals("delegated:region", sel.lookupName(0)); // routed to first delegate
+
+ RecordingDelegate second = new RecordingDelegate();
+ f.setDelegate(second, new Object[]{"globex"});
+
+ // Same selector instance, now backed by the new delegate's makeDimensionSelector result.
+ Assertions.assertEquals("delegated:region", sel.lookupName(0));
+ Assertions.assertEquals("region", second.lastDimSelectorName);
+ }
+
+ @Test
+ void testGetColumnCapabilitiesForClusteringColumns()
+ {
+ ClusteringColumnSelectorFactory f = new ClusteringColumnSelectorFactory(
+ new RecordingDelegate(),
+ RowSignature.builder()
+ .add("tenant", ColumnType.STRING)
+ .add("priority", ColumnType.LONG)
+ .build(),
+ new Object[]{"acme", 5L}
+ );
+
+ ColumnCapabilities tenantCaps = f.getColumnCapabilities("tenant");
+ Assertions.assertEquals(ColumnType.STRING, ColumnType.fromCapabilities(tenantCaps));
+
+ ColumnCapabilities priorityCaps = f.getColumnCapabilities("priority");
+ Assertions.assertEquals(ColumnType.LONG, ColumnType.fromCapabilities(priorityCaps));
+ }
+
+ @Test
+ void testGetColumnCapabilitiesForNonClusteringDelegated()
+ {
+ RecordingDelegate delegate = new RecordingDelegate();
+ ClusteringColumnSelectorFactory f = new ClusteringColumnSelectorFactory(
+ delegate,
+ SIGNATURE,
+ new Object[]{"acme"}
+ );
+
+ f.getColumnCapabilities("metric");
+ Assertions.assertEquals("metric", delegate.lastCapabilitiesColumn);
+ }
+
+ @Test
+ void testSetDelegateUpdatesClusteringValues()
+ {
+ ClusteringColumnSelectorFactory f = new ClusteringColumnSelectorFactory(
+ new RecordingDelegate(),
+ SIGNATURE,
+ new Object[]{"acme"}
+ );
+
+ // Acquire a selector tied to the first group's value.
+ ColumnValueSelector sel = f.makeColumnValueSelector("tenant");
+ Assertions.assertEquals("acme", sel.getObject());
+
+ // Simulate group transition.
+ f.setDelegate(new RecordingDelegate(), new Object[]{"globex"});
+
+ // The same selector should observe the new value via the generation-counter cache invalidation.
+ Assertions.assertEquals("globex", sel.getObject());
+ }
+
+ @Test
+ void testSetDelegateUpdatesDimensionSelector()
+ {
+ ClusteringColumnSelectorFactory f = new ClusteringColumnSelectorFactory(
+ new RecordingDelegate(),
+ SIGNATURE,
+ new Object[]{"acme"}
+ );
+
+ DimensionSelector sel = f.makeDimensionSelector(DefaultDimensionSpec.of("tenant"));
+ Assertions.assertEquals("acme", sel.lookupName(sel.getRow().get(0)));
+
+ f.setDelegate(new RecordingDelegate(), new Object[]{"globex"});
+
+ Assertions.assertEquals("globex", sel.lookupName(sel.getRow().get(0)));
+ }
+
+ @Test
+ void testClusteringMatcherObservesGroupTransition()
+ {
+ // The clustering-column path: a matcher built before the group transition still gives the right verdict
+ // afterwards because it re-resolves through the generation-aware wrapper.
+ ClusteringColumnSelectorFactory f = new ClusteringColumnSelectorFactory(
+ new RecordingDelegate(),
+ SIGNATURE,
+ new Object[]{"acme"}
+ );
+ DimensionSelector sel = f.makeDimensionSelector(DefaultDimensionSpec.of("tenant"));
+ org.apache.druid.query.filter.ValueMatcher acmeMatcher = sel.makeValueMatcher("acme");
+
+ // Pre-transition: the constant clustering value is "acme"; matcher returns true.
+ Assertions.assertTrue(acmeMatcher.matches(false));
+
+ f.setDelegate(new RecordingDelegate(), new Object[]{"globex"});
+
+ // Post-transition: the constant is now "globex"; the same matcher (held across the transition) returns false.
+ Assertions.assertFalse(acmeMatcher.matches(false));
+ }
+
+ @Test
+ void testNonClusteringDelegatingMatcherObservesGroupTransition()
+ {
+ // The non-clustering "delegating" path: a matcher built before the transition still routes through the
+ // current delegate's matcher afterwards. We use two RecordingDelegates whose makeDimensionSelector returns
+ // selectors with predictable lookupName, then drive the matcher across a delegate swap.
+ final RecordingDelegate first = new RecordingDelegate();
+ ClusteringColumnSelectorFactory f = new ClusteringColumnSelectorFactory(
+ first,
+ SIGNATURE,
+ new Object[]{"acme"}
+ );
+ DimensionSelector sel = f.makeDimensionSelector(DefaultDimensionSpec.of("region"));
+ // Force the inner selector to materialize so RecordingDelegate.lastDimSelectorName is "region".
+ sel.lookupName(0);
+ org.apache.druid.query.filter.ValueMatcher matcher = sel.makeValueMatcher("delegated:region");
+
+ // Pre-transition: RecordingDelegate's stub selector returns "delegated:region" for any id, so the matcher
+ // is true.
+ Assertions.assertTrue(matcher.matches(false));
+
+ final RecordingDelegate second = new RecordingDelegate();
+ f.setDelegate(second, new Object[]{"acme"});
+
+ // Post-transition: still true (RecordingDelegate's stub is identical), but the important thing is that
+ // calling matches() now invokes the second delegate's selector (verified by lastDimSelectorName getting set).
+ Assertions.assertTrue(matcher.matches(false));
+ Assertions.assertEquals("region", second.lastDimSelectorName);
+ }
+
+ @Test
+ void testStringDimensionCapabilitiesFlavorIsSingleValue()
+ {
+ ClusteringColumnSelectorFactory f = new ClusteringColumnSelectorFactory(
+ new RecordingDelegate(),
+ SIGNATURE,
+ new Object[]{"acme"}
+ );
+ ColumnCapabilities caps = f.getColumnCapabilities("tenant");
+ // Sanity: the simple-string-capabilities helper returns a STRING type with no multi-value flag set.
+ Assertions.assertNotNull(caps);
+ Assertions.assertTrue(caps.is(ValueType.STRING));
+ Assertions.assertEquals(
+ ColumnCapabilities.Capable.FALSE,
+ caps.hasMultipleValues()
+ );
+ Assertions.assertEquals(
+ ColumnCapabilitiesImpl.createSimpleSingleValueStringColumnCapabilities().getType(),
+ caps.getType()
+ );
+ }
+
+ /**
+ * Stub delegate that returns nothing; used to verify that calls for clustering columns are intercepted before
+ * ever reaching it. For non-clustering columns, methods record the requested name and return Nil/null shapes.
+ */
+ private static class RecordingDelegate implements ColumnSelectorFactory
+ {
+ String lastDimSelectorName;
+ String lastValueSelectorName;
+ String lastCapabilitiesColumn;
+
+ @Override
+ public DimensionSelector makeDimensionSelector(org.apache.druid.query.dimension.DimensionSpec dimensionSpec)
+ {
+ lastDimSelectorName = dimensionSpec.getDimension();
+ return DimensionSelector.constant("delegated:" + dimensionSpec.getDimension());
+ }
+
+ @Override
+ public ColumnValueSelector makeColumnValueSelector(String columnName)
+ {
+ lastValueSelectorName = columnName;
+ return NilColumnValueSelector.instance();
+ }
+
+ @Nullable
+ @Override
+ public ColumnCapabilities getColumnCapabilities(String column)
+ {
+ lastCapabilitiesColumn = column;
+ return null;
+ }
+
+ @Nullable
+ @Override
+ public RowIdSupplier getRowIdSupplier()
+ {
+ return null;
+ }
+ }
+}
diff --git a/processing/src/test/java/org/apache/druid/segment/projections/ClusteringVectorColumnSelectorFactoryTest.java b/processing/src/test/java/org/apache/druid/segment/projections/ClusteringVectorColumnSelectorFactoryTest.java
new file mode 100644
index 000000000000..9a76566b3e1b
--- /dev/null
+++ b/processing/src/test/java/org/apache/druid/segment/projections/ClusteringVectorColumnSelectorFactoryTest.java
@@ -0,0 +1,333 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.projections;
+
+import org.apache.druid.error.DruidException;
+import org.apache.druid.query.dimension.DefaultDimensionSpec;
+import org.apache.druid.segment.column.ColumnCapabilities;
+import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.column.RowSignature;
+import org.apache.druid.segment.column.ValueType;
+import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector;
+import org.apache.druid.segment.vector.NoFilterVectorOffset;
+import org.apache.druid.segment.vector.ReadableVectorInspector;
+import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector;
+import org.apache.druid.segment.vector.VectorColumnSelectorFactory;
+import org.apache.druid.segment.vector.VectorObjectSelector;
+import org.apache.druid.segment.vector.VectorValueSelector;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import javax.annotation.Nullable;
+
+class ClusteringVectorColumnSelectorFactoryTest
+{
+ private static final RowSignature CLUSTER_SIGNATURE = RowSignature.builder().add("tenant", ColumnType.STRING).build();
+
+ @Test
+ void testStringClusteringSingleValueDimensionSelector()
+ {
+ StubDelegate delegate = new StubDelegate(inspectorFor(8));
+ ClusteringVectorColumnSelectorFactory f = new ClusteringVectorColumnSelectorFactory(
+ delegate,
+ CLUSTER_SIGNATURE,
+ new Object[]{"acme"}
+ );
+
+ SingleValueDimensionVectorSelector sel = f.makeSingleValueDimensionSelector(DefaultDimensionSpec.of("tenant"));
+ Assertions.assertNull(delegate.lastSingleValDimRequest, "delegate must not be hit for clustering column");
+ Assertions.assertEquals("acme", sel.lookupName(0));
+ Assertions.assertEquals(1, sel.getValueCardinality());
+ }
+
+ @Test
+ void testLongClusteringValueSelector()
+ {
+ StubDelegate delegate = new StubDelegate(inspectorFor(4));
+ ClusteringVectorColumnSelectorFactory f = new ClusteringVectorColumnSelectorFactory(
+ delegate,
+ RowSignature.builder().add("priority", ColumnType.LONG).build(),
+ new Object[]{42L}
+ );
+
+ VectorValueSelector sel = f.makeValueSelector("priority");
+ Assertions.assertNull(delegate.lastValueRequest);
+ long[] vec = sel.getLongVector();
+ for (long v : vec) {
+ Assertions.assertEquals(42L, v);
+ }
+ }
+
+ @Test
+ void testDoubleClusteringObjectSelector()
+ {
+ StubDelegate delegate = new StubDelegate(inspectorFor(4));
+ ClusteringVectorColumnSelectorFactory f = new ClusteringVectorColumnSelectorFactory(
+ delegate,
+ RowSignature.builder().add("price", ColumnType.DOUBLE).build(),
+ new Object[]{3.14}
+ );
+
+ VectorObjectSelector sel = f.makeObjectSelector("price");
+ Object[] vec = sel.getObjectVector();
+ for (Object v : vec) {
+ Assertions.assertEquals(3.14, v);
+ }
+ }
+
+ @Test
+ void testFloatClusteringValueSelector()
+ {
+ StubDelegate delegate = new StubDelegate(inspectorFor(4));
+ ClusteringVectorColumnSelectorFactory f = new ClusteringVectorColumnSelectorFactory(
+ delegate,
+ RowSignature.builder().add("ratio", ColumnType.FLOAT).build(),
+ new Object[]{0.5f}
+ );
+
+ VectorValueSelector sel = f.makeValueSelector("ratio");
+ float[] vec = sel.getFloatVector();
+ for (float v : vec) {
+ Assertions.assertEquals(0.5f, v);
+ }
+ }
+
+ @Test
+ void testNullStringClusteringValueDimensionSelectorIsNil()
+ {
+ StubDelegate delegate = new StubDelegate(inspectorFor(4));
+ ClusteringVectorColumnSelectorFactory f = new ClusteringVectorColumnSelectorFactory(
+ delegate,
+ CLUSTER_SIGNATURE,
+ new Object[]{null}
+ );
+
+ SingleValueDimensionVectorSelector sel = f.makeSingleValueDimensionSelector(DefaultDimensionSpec.of("tenant"));
+ // Nil vector selector returns null on lookupName(0) regardless of id.
+ Assertions.assertNull(sel.lookupName(0));
+ }
+
+ @Test
+ void testNonClusteringColumnDelegated()
+ {
+ StubDelegate delegate = new StubDelegate(inspectorFor(4));
+ ClusteringVectorColumnSelectorFactory f = new ClusteringVectorColumnSelectorFactory(
+ delegate,
+ CLUSTER_SIGNATURE,
+ new Object[]{"acme"}
+ );
+
+ // Non-clustering selectors are wrapped in lazy delegating wrappers; the delegate is only consulted on first
+ // use, so a multi-group ConcatenatingVectorCursor can swap delegates between groups without recreating the
+ // selector instance.
+ SingleValueDimensionVectorSelector svdSel =
+ f.makeSingleValueDimensionSelector(DefaultDimensionSpec.of("region"));
+ Assertions.assertNull(delegate.lastSingleValDimRequest, "delegate must not be hit until selector is used");
+ try {
+ svdSel.getRowVector();
+ }
+ catch (NullPointerException expected) {
+ // StubDelegate returns null for the inner selector; the wrapper forwards to it. We just want to confirm
+ // the delegate's makeSingleValueDimensionSelector was invoked.
+ }
+ Assertions.assertEquals("region", delegate.lastSingleValDimRequest);
+
+ VectorValueSelector vvSel = f.makeValueSelector("metric");
+ Assertions.assertNull(delegate.lastValueRequest);
+ try {
+ vvSel.getLongVector();
+ }
+ catch (NullPointerException expected) {
+ // same ^, confirming the delegate was reached
+ }
+ Assertions.assertEquals("metric", delegate.lastValueRequest);
+
+ VectorObjectSelector voSel = f.makeObjectSelector("region");
+ try {
+ voSel.getObjectVector();
+ }
+ catch (NullPointerException expected) {
+ // same
+ }
+ Assertions.assertEquals("region", delegate.lastObjectRequest);
+
+ // getColumnCapabilities is NOT lazy; it returns the result directly.
+ f.getColumnCapabilities("metric");
+ Assertions.assertEquals("metric", delegate.lastCapsRequest);
+ }
+
+ @Test
+ void testSetDelegateUpdatesClusteringValueOnExistingSelector()
+ {
+ StubDelegate firstDelegate = new StubDelegate(inspectorFor(4));
+ ClusteringVectorColumnSelectorFactory f = new ClusteringVectorColumnSelectorFactory(
+ firstDelegate,
+ RowSignature.builder().add("priority", ColumnType.LONG).build(),
+ new Object[]{5L}
+ );
+
+ VectorValueSelector sel = f.makeValueSelector("priority");
+ long[] firstVec = sel.getLongVector();
+ for (long v : firstVec) {
+ Assertions.assertEquals(5L, v);
+ }
+
+ // Simulate group transition.
+ StubDelegate secondDelegate = new StubDelegate(inspectorFor(4));
+ f.setDelegate(secondDelegate, new Object[]{42L});
+
+ long[] secondVec = sel.getLongVector();
+ for (long v : secondVec) {
+ Assertions.assertEquals(42L, v);
+ }
+ }
+
+ @Test
+ void testSetDelegateUpdatesNonClusteringSelector()
+ {
+ StubDelegate first = new StubDelegate(inspectorFor(4));
+ ClusteringVectorColumnSelectorFactory f = new ClusteringVectorColumnSelectorFactory(
+ first,
+ CLUSTER_SIGNATURE,
+ new Object[]{"acme"}
+ );
+
+ SingleValueDimensionVectorSelector sel =
+ f.makeSingleValueDimensionSelector(DefaultDimensionSpec.of("region"));
+ try {
+ sel.getRowVector(); // warms the cache against the first delegate
+ }
+ catch (NullPointerException expected) {
+ // expected; just confirming the route
+ }
+ Assertions.assertEquals("region", first.lastSingleValDimRequest);
+
+ StubDelegate second = new StubDelegate(inspectorFor(4));
+ f.setDelegate(second, new Object[]{"globex"});
+
+ try {
+ sel.getRowVector(); // generation bumped → re-fetches against second delegate
+ }
+ catch (NullPointerException expected) {
+ // expected
+ }
+ Assertions.assertEquals("region", second.lastSingleValDimRequest);
+ }
+
+ @Test
+ void testGetColumnCapabilitiesForClusteringColumns()
+ {
+ StubDelegate delegate = new StubDelegate(inspectorFor(4));
+ ClusteringVectorColumnSelectorFactory f = new ClusteringVectorColumnSelectorFactory(
+ delegate,
+ RowSignature.builder()
+ .add("tenant", ColumnType.STRING)
+ .add("priority", ColumnType.LONG)
+ .build(),
+ new Object[]{"acme", 5L}
+ );
+
+ ColumnCapabilities tenantCaps = f.getColumnCapabilities("tenant");
+ Assertions.assertTrue(tenantCaps.is(ValueType.STRING));
+
+ ColumnCapabilities priorityCaps = f.getColumnCapabilities("priority");
+ Assertions.assertTrue(priorityCaps.is(ValueType.LONG));
+
+ Assertions.assertNull(delegate.lastCapsRequest, "delegate must not be hit for clustering capabilities");
+ }
+
+ @Test
+ void testMultiValueDimensionSelectorOnClusteringRejected()
+ {
+ ClusteringVectorColumnSelectorFactory f = new ClusteringVectorColumnSelectorFactory(
+ new StubDelegate(inspectorFor(4)),
+ CLUSTER_SIGNATURE,
+ new Object[]{"acme"}
+ );
+ Assertions.assertThrows(
+ DruidException.class,
+ () -> f.makeMultiValueDimensionSelector(DefaultDimensionSpec.of("tenant"))
+ );
+ }
+
+ private static ReadableVectorInspector inspectorFor(int size)
+ {
+ return new NoFilterVectorOffset(size, 0, size);
+ }
+
+ private static class StubDelegate implements VectorColumnSelectorFactory
+ {
+ final ReadableVectorInspector inspector;
+ String lastSingleValDimRequest;
+ String lastValueRequest;
+ String lastObjectRequest;
+ String lastCapsRequest;
+
+ StubDelegate(ReadableVectorInspector inspector)
+ {
+ this.inspector = inspector;
+ }
+
+ @Override
+ public ReadableVectorInspector getReadableVectorInspector()
+ {
+ return inspector;
+ }
+
+ @Override
+ public SingleValueDimensionVectorSelector makeSingleValueDimensionSelector(
+ org.apache.druid.query.dimension.DimensionSpec dimensionSpec
+ )
+ {
+ lastSingleValDimRequest = dimensionSpec.getDimension();
+ return null;
+ }
+
+ @Override
+ public MultiValueDimensionVectorSelector makeMultiValueDimensionSelector(
+ org.apache.druid.query.dimension.DimensionSpec dimensionSpec
+ )
+ {
+ throw new UnsupportedOperationException("not used");
+ }
+
+ @Override
+ public VectorValueSelector makeValueSelector(String column)
+ {
+ lastValueRequest = column;
+ return null;
+ }
+
+ @Override
+ public VectorObjectSelector makeObjectSelector(String column)
+ {
+ lastObjectRequest = column;
+ return null;
+ }
+
+ @Nullable
+ @Override
+ public ColumnCapabilities getColumnCapabilities(String column)
+ {
+ lastCapsRequest = column;
+ return null;
+ }
+ }
+}
diff --git a/processing/src/test/java/org/apache/druid/segment/projections/ProjectionsPruneClusterGroupsTest.java b/processing/src/test/java/org/apache/druid/segment/projections/ProjectionsPruneClusterGroupsTest.java
new file mode 100644
index 000000000000..624f00a0e08d
--- /dev/null
+++ b/processing/src/test/java/org/apache/druid/segment/projections/ProjectionsPruneClusterGroupsTest.java
@@ -0,0 +1,721 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.projections;
+
+import com.google.common.collect.ImmutableList;
+import org.apache.druid.query.OrderBy;
+import org.apache.druid.query.aggregation.AggregatorFactory;
+import org.apache.druid.query.aggregation.CountAggregatorFactory;
+import org.apache.druid.query.expression.TestExprMacroTable;
+import org.apache.druid.query.filter.EqualityFilter;
+import org.apache.druid.query.filter.Filter;
+import org.apache.druid.query.filter.LikeDimFilter;
+import org.apache.druid.query.filter.NullFilter;
+import org.apache.druid.query.filter.TypedInFilter;
+import org.apache.druid.segment.VirtualColumns;
+import org.apache.druid.segment.column.ColumnHolder;
+import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.column.RowSignature;
+import org.apache.druid.segment.filter.AndFilter;
+import org.apache.druid.segment.filter.NotFilter;
+import org.apache.druid.segment.filter.OrFilter;
+import org.apache.druid.segment.virtual.ExpressionVirtualColumn;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import javax.annotation.Nullable;
+import java.util.Arrays;
+import java.util.LinkedHashSet;
+import java.util.List;
+
+class ProjectionsPruneClusterGroupsTest
+{
+ /** Build a fresh single-spec summary for a STRING-clustered ``tenant`` group; each call gets its own summary. */
+ private static TableClusterGroupSpec stringGroup(String tenant)
+ {
+ final RowSignature clustering = RowSignature.builder().add("tenant", ColumnType.STRING).build();
+ final ClusterGroupSchemaTestHelpers.Built built = ClusterGroupSchemaTestHelpers.buildClusterGroups(
+ clustering,
+ List.of(Arrays.asList(tenant)) // Arrays.asList allows tenant == null
+ );
+ new ClusteredValueGroupsBaseTableSchema(
+ VirtualColumns.EMPTY,
+ List.of("tenant", ColumnHolder.TIME_COLUMN_NAME, "metric"),
+ new AggregatorFactory[]{new CountAggregatorFactory("count")},
+ List.of(OrderBy.ascending("tenant"), OrderBy.ascending(ColumnHolder.TIME_COLUMN_NAME)),
+ clustering,
+ null,
+ built.dictionaries(),
+ built.specs()
+ );
+ return built.specs().get(0);
+ }
+
+ private static TableClusterGroupSpec longGroup(long priority)
+ {
+ final RowSignature clustering = RowSignature.builder().add("priority", ColumnType.LONG).build();
+ final ClusterGroupSchemaTestHelpers.Built built = ClusterGroupSchemaTestHelpers.buildClusterGroups(
+ clustering,
+ List.of(List.of(priority))
+ );
+ new ClusteredValueGroupsBaseTableSchema(
+ VirtualColumns.EMPTY,
+ List.of("priority", ColumnHolder.TIME_COLUMN_NAME, "metric"),
+ new AggregatorFactory[]{new CountAggregatorFactory("count")},
+ List.of(OrderBy.ascending("priority"), OrderBy.ascending(ColumnHolder.TIME_COLUMN_NAME)),
+ clustering,
+ null,
+ built.dictionaries(),
+ built.specs()
+ );
+ return built.specs().get(0);
+ }
+
+ private static TableClusterGroupSpec multiGroup(String tenant, String region)
+ {
+ final RowSignature clustering = RowSignature.builder()
+ .add("tenant", ColumnType.STRING)
+ .add("region", ColumnType.STRING)
+ .build();
+ final ClusterGroupSchemaTestHelpers.Built built = ClusterGroupSchemaTestHelpers.buildClusterGroups(
+ clustering,
+ List.of(Arrays.asList(tenant, region))
+ );
+ new ClusteredValueGroupsBaseTableSchema(
+ VirtualColumns.EMPTY,
+ List.of("tenant", "region", ColumnHolder.TIME_COLUMN_NAME, "metric"),
+ new AggregatorFactory[]{new CountAggregatorFactory("count")},
+ List.of(
+ OrderBy.ascending("tenant"),
+ OrderBy.ascending("region"),
+ OrderBy.ascending(ColumnHolder.TIME_COLUMN_NAME)
+ ),
+ clustering,
+ null,
+ built.dictionaries(),
+ built.specs()
+ );
+ return built.specs().get(0);
+ }
+
+ /** IoT-style mixed-type clustering: {@code (device_id LONG, region STRING)}. */
+ private static TableClusterGroupSpec deviceRegionGroup(long deviceId, String region)
+ {
+ final RowSignature clustering = RowSignature.builder()
+ .add("device_id", ColumnType.LONG)
+ .add("region", ColumnType.STRING)
+ .build();
+ final ClusterGroupSchemaTestHelpers.Built built = ClusterGroupSchemaTestHelpers.buildClusterGroups(
+ clustering,
+ List.of(Arrays.asList(deviceId, region))
+ );
+ new ClusteredValueGroupsBaseTableSchema(
+ VirtualColumns.EMPTY,
+ List.of("device_id", "region", ColumnHolder.TIME_COLUMN_NAME, "temperature"),
+ new AggregatorFactory[]{new CountAggregatorFactory("count")},
+ List.of(
+ OrderBy.ascending("device_id"),
+ OrderBy.ascending("region"),
+ OrderBy.ascending(ColumnHolder.TIME_COLUMN_NAME)
+ ),
+ clustering,
+ null,
+ built.dictionaries(),
+ built.specs()
+ );
+ return built.specs().get(0);
+ }
+
+ /** Generic single-STRING-column clustering with a domain-agnostic column name. */
+ private static TableClusterGroupSpec partitionGroup(String key)
+ {
+ final RowSignature clustering = RowSignature.builder().add("partition", ColumnType.STRING).build();
+ final ClusterGroupSchemaTestHelpers.Built built = ClusterGroupSchemaTestHelpers.buildClusterGroups(
+ clustering,
+ List.of(Arrays.asList(key))
+ );
+ new ClusteredValueGroupsBaseTableSchema(
+ VirtualColumns.EMPTY,
+ List.of("partition", ColumnHolder.TIME_COLUMN_NAME, "payload"),
+ new AggregatorFactory[]{new CountAggregatorFactory("count")},
+ List.of(OrderBy.ascending("partition"), OrderBy.ascending(ColumnHolder.TIME_COLUMN_NAME)),
+ clustering,
+ null,
+ built.dictionaries(),
+ built.specs()
+ );
+ return built.specs().get(0);
+ }
+
+ private static List groups(TableClusterGroupSpec... gs)
+ {
+ return ImmutableList.copyOf(gs);
+ }
+
+ /** Test convenience: prune without query-side virtual columns. */
+ private static List pruneClusterGroups(
+ List groups,
+ @Nullable Filter filter
+ )
+ {
+ return Projections.pruneClusterGroups(groups, filter, VirtualColumns.EMPTY);
+ }
+
+ private static LinkedHashSet filters(Filter... fs)
+ {
+ LinkedHashSet out = new LinkedHashSet<>();
+ for (Filter f : fs) {
+ out.add(f);
+ }
+ return out;
+ }
+
+ @Test
+ void testNullFilterReturnsAllGroups()
+ {
+ List all = groups(stringGroup("acme"), stringGroup("globex"));
+ Assertions.assertSame(all, pruneClusterGroups(all, null));
+ }
+
+ @Test
+ void testEmptyGroupsReturnsAllGroups()
+ {
+ List empty = List.of();
+ Assertions.assertSame(empty, pruneClusterGroups(empty, new EqualityFilter("tenant", ColumnType.STRING, "acme", null)));
+ }
+
+ @Test
+ void testEqualityFilterOnStringClusteringColumnSelectsOneGroup()
+ {
+ List all = groups(stringGroup("acme"), stringGroup("globex"));
+ Filter f = new EqualityFilter("tenant", ColumnType.STRING, "acme", null);
+ List kept = pruneClusterGroups(all, f);
+ Assertions.assertEquals(1, kept.size());
+ Assertions.assertSame(all.get(0), kept.get(0));
+ }
+
+ @Test
+ void testEqualityFilterOnLongClusteringColumn()
+ {
+ List all = groups(longGroup(5L), longGroup(10L), longGroup(20L));
+ Filter f = new EqualityFilter("priority", ColumnType.LONG, 10L, null);
+ List kept = pruneClusterGroups(all, f);
+ Assertions.assertEquals(1, kept.size());
+ Assertions.assertSame(all.get(1), kept.get(0));
+ }
+
+ @Test
+ void testTypedInFilterOnLongClusteringColumn()
+ {
+ List all = groups(longGroup(5L), longGroup(10L), longGroup(20L));
+ Filter f = new TypedInFilter("priority", ColumnType.LONG, List.of(5L, 20L), null, null);
+ List kept = pruneClusterGroups(all, f);
+ Assertions.assertEquals(2, kept.size());
+ Assertions.assertSame(all.get(0), kept.get(0));
+ Assertions.assertSame(all.get(2), kept.get(1));
+ }
+
+ @Test
+ void testFilterOnNonClusteringColumnKeepsAllGroups()
+ {
+ // Filter doesn't reference a clustering column → can't prune anything.
+ List all = groups(stringGroup("acme"), stringGroup("globex"));
+ Filter f = new EqualityFilter("metric", ColumnType.LONG, 42L, null);
+ Assertions.assertEquals(all, pruneClusterGroups(all, f));
+ }
+
+ @Test
+ void testAndFilterIntersectsSubFilterResults()
+ {
+ List all = groups(
+ multiGroup("acme", "us-east-1"),
+ multiGroup("acme", "us-west-2"),
+ multiGroup("globex", "us-east-1")
+ );
+ Filter f = new AndFilter(filters(
+ new EqualityFilter("tenant", ColumnType.STRING, "acme", null),
+ new EqualityFilter("region", ColumnType.STRING, "us-west-2", null)
+ ));
+ List kept = pruneClusterGroups(all, f);
+ Assertions.assertEquals(1, kept.size());
+ Assertions.assertSame(all.get(1), kept.get(0));
+ }
+
+ @Test
+ void testOrFilterUnionsSubFilterResults()
+ {
+ List all = groups(
+ stringGroup("acme"),
+ stringGroup("globex"),
+ stringGroup("oscorp")
+ );
+ Filter f = new OrFilter(filters(
+ new EqualityFilter("tenant", ColumnType.STRING, "acme", null),
+ new EqualityFilter("tenant", ColumnType.STRING, "oscorp", null)
+ ));
+ List kept = pruneClusterGroups(all, f);
+ Assertions.assertEquals(2, kept.size());
+ Assertions.assertSame(all.get(0), kept.get(0));
+ Assertions.assertSame(all.get(2), kept.get(1));
+ }
+
+ @Test
+ void testNotFilterInvertsMatch()
+ {
+ List all = groups(stringGroup("acme"), stringGroup("globex"));
+ Filter f = new NotFilter(new EqualityFilter("tenant", ColumnType.STRING, "acme", null));
+ List kept = pruneClusterGroups(all, f);
+ Assertions.assertEquals(1, kept.size());
+ Assertions.assertSame(all.get(1), kept.get(0));
+ }
+
+ /** Regression: a boolean (kept/pruned) matcher would flip UNKNOWN to "pruned" under NOT, dropping live data. */
+ @Test
+ void testNotOverNonClusteringColumnKeepsAllGroups()
+ {
+ List all = groups(stringGroup("acme"), stringGroup("globex"));
+ Filter f = new NotFilter(new EqualityFilter("metric", ColumnType.LONG, 42L, null));
+ Assertions.assertEquals(all, pruneClusterGroups(all, f));
+ }
+
+ @Test
+ void testDoubleNotPreservesPositive()
+ {
+ List all = groups(stringGroup("acme"), stringGroup("globex"));
+ Filter f = new NotFilter(new NotFilter(new EqualityFilter("tenant", ColumnType.STRING, "acme", null)));
+ List kept = pruneClusterGroups(all, f);
+ Assertions.assertEquals(1, kept.size());
+ Assertions.assertSame(all.get(0), kept.get(0));
+ }
+
+ @Test
+ void testAndOfMustNotMatchAndUnknownPrunes()
+ {
+ // tenant=globex is MUST_NOT_MATCH for tenant=acme group; AND with anything stays MUST_NOT_MATCH → prune.
+ List all = groups(stringGroup("acme"));
+ Filter f = new AndFilter(filters(
+ new EqualityFilter("tenant", ColumnType.STRING, "globex", null), // MUST_NOT_MATCH for acme
+ new EqualityFilter("metric", ColumnType.LONG, 42L, null) // UNKNOWN
+ ));
+ Assertions.assertTrue(pruneClusterGroups(all, f).isEmpty());
+ }
+
+ @Test
+ void testOrOfMustMatchAndUnknownKeeps()
+ {
+ // tenant=acme is MUST_MATCH for tenant=acme group; OR short-circuits to MUST_MATCH regardless of the other.
+ List all = groups(stringGroup("acme"));
+ Filter f = new OrFilter(filters(
+ new EqualityFilter("tenant", ColumnType.STRING, "acme", null), // MUST_MATCH
+ new EqualityFilter("metric", ColumnType.LONG, 42L, null) // UNKNOWN
+ ));
+ Assertions.assertEquals(all, pruneClusterGroups(all, f));
+ }
+
+ @Test
+ void testOrOfMustNotMatchAndUnknownKeeps()
+ {
+ // tenant=globex is MUST_NOT_MATCH for tenant=acme group; metric=42 is UNKNOWN.
+ // OR(MUST_NOT_MATCH, UNKNOWN) = UNKNOWN → keep (rows with metric=42 might exist).
+ List all = groups(stringGroup("acme"));
+ Filter f = new OrFilter(filters(
+ new EqualityFilter("tenant", ColumnType.STRING, "globex", null),
+ new EqualityFilter("metric", ColumnType.LONG, 42L, null)
+ ));
+ Assertions.assertEquals(all, pruneClusterGroups(all, f));
+ }
+
+ @Test
+ void testNotOfAndWithMustNotMatchKeeps()
+ {
+ // inner: AND(tenant=globex on tenant=acme group, otherCol=foo)
+ // AND(MUST_NOT_MATCH, UNKNOWN) = MUST_NOT_MATCH (no row has tenant=globex, so AND is always false)
+ // NOT(MUST_NOT_MATCH) = MUST_MATCH → keep (NOT(false) is true for every row).
+ List all = groups(stringGroup("acme"));
+ Filter f = new NotFilter(new AndFilter(filters(
+ new EqualityFilter("tenant", ColumnType.STRING, "globex", null),
+ new EqualityFilter("metric", ColumnType.LONG, 42L, null)
+ )));
+ Assertions.assertEquals(all, pruneClusterGroups(all, f));
+ }
+
+ @Test
+ void testNotOfAndWithUnknownKeeps()
+ {
+ // inner: AND(tenant=acme, otherCol=foo) on tenant=acme group
+ // AND(MUST_MATCH, UNKNOWN) = UNKNOWN
+ // NOT(UNKNOWN) = UNKNOWN → keep.
+ List all = groups(stringGroup("acme"));
+ Filter f = new NotFilter(new AndFilter(filters(
+ new EqualityFilter("tenant", ColumnType.STRING, "acme", null),
+ new EqualityFilter("metric", ColumnType.LONG, 42L, null)
+ )));
+ Assertions.assertEquals(all, pruneClusterGroups(all, f));
+ }
+
+ @Test
+ void testMixedClusteringAndNonClusteringWithAndFilter()
+ {
+ // AND combines a clustering filter with a non-clustering filter; the latter contributes no info → result is
+ // determined purely by the clustering filter.
+ List all = groups(stringGroup("acme"), stringGroup("globex"));
+ Filter f = new AndFilter(filters(
+ new EqualityFilter("tenant", ColumnType.STRING, "acme", null),
+ new EqualityFilter("metric", ColumnType.LONG, 42L, null)
+ ));
+ List kept = pruneClusterGroups(all, f);
+ Assertions.assertEquals(1, kept.size());
+ Assertions.assertSame(all.get(0), kept.get(0));
+ }
+
+ @Test
+ void testEmptyResultWhenNoGroupMatches()
+ {
+ List all = groups(stringGroup("acme"), stringGroup("globex"));
+ Filter f = new EqualityFilter("tenant", ColumnType.STRING, "unknown", null);
+ Assertions.assertTrue(pruneClusterGroups(all, f).isEmpty());
+ }
+
+ @Test
+ void testUnsupportedFilterTypeIsConservative()
+ {
+ // LikeDimFilter isn't recognized by the pruner → keep all groups (no pruning info).
+ List all = groups(stringGroup("acme"), stringGroup("globex"));
+ Filter f = new LikeDimFilter("tenant", "acm%", null, null).toFilter();
+ Assertions.assertEquals(all, pruneClusterGroups(all, f));
+ }
+
+ /** Group whose clustering column is itself a virtual column (e.g. {@code lower(tenant)}). */
+ private static TableClusterGroupSpec virtualClusteringGroup(String loweredTenant)
+ {
+ final RowSignature clustering = RowSignature.builder().add("tenant_lower", ColumnType.STRING).build();
+ final ClusterGroupSchemaTestHelpers.Built built = ClusterGroupSchemaTestHelpers.buildClusterGroups(
+ clustering,
+ List.of(Arrays.asList(loweredTenant))
+ );
+ new ClusteredValueGroupsBaseTableSchema(
+ VirtualColumns.create(
+ new ExpressionVirtualColumn(
+ "tenant_lower",
+ "lower(tenant)",
+ ColumnType.STRING,
+ TestExprMacroTable.INSTANCE
+ )
+ ),
+ List.of("tenant_lower", ColumnHolder.TIME_COLUMN_NAME, "metric"),
+ new AggregatorFactory[]{new CountAggregatorFactory("count")},
+ List.of(OrderBy.ascending("tenant_lower"), OrderBy.ascending(ColumnHolder.TIME_COLUMN_NAME)),
+ clustering,
+ null,
+ built.dictionaries(),
+ built.specs()
+ );
+ return built.specs().get(0);
+ }
+
+ @Test
+ void testVirtualClusteringMatchesQueryVirtualColumnByEquivalence()
+ {
+ List all = groups(
+ virtualClusteringGroup("acme"),
+ virtualClusteringGroup("globex")
+ );
+ // Query has its own virtual column with a different output name but equivalent expression.
+ final VirtualColumns queryVcs = VirtualColumns.create(
+ new ExpressionVirtualColumn(
+ "query_lower",
+ "lower(tenant)",
+ ColumnType.STRING,
+ TestExprMacroTable.INSTANCE
+ )
+ );
+ Filter f = new EqualityFilter("query_lower", ColumnType.STRING, "acme", null);
+ List kept = Projections.pruneClusterGroups(all, f, queryVcs);
+ Assertions.assertEquals(1, kept.size());
+ Assertions.assertSame(all.get(0), kept.get(0));
+ }
+
+ @Test
+ void testVirtualClusteringRejectsNonEquivalentQueryVirtualColumn()
+ {
+ List all = groups(virtualClusteringGroup("acme"));
+ // Query VC is upper(tenant), not lower(tenant); not equivalent.
+ final VirtualColumns queryVcs = VirtualColumns.create(
+ new ExpressionVirtualColumn(
+ "query_upper",
+ "upper(tenant)",
+ ColumnType.STRING,
+ TestExprMacroTable.INSTANCE
+ )
+ );
+ Filter f = new EqualityFilter("query_upper", ColumnType.STRING, "ACME", null);
+ // No equivalence found → can't prune; keep group conservatively.
+ Assertions.assertEquals(all, Projections.pruneClusterGroups(all, f, queryVcs));
+ }
+
+ @Test
+ void testVirtualClusteringWithoutQueryVirtualColumnsIsConservative()
+ {
+ // If queryVirtualColumns is empty (or not provided), the pruner can't resolve the filter column to a
+ // clustering column via virtual-column equivalence → keep all groups.
+ List all = groups(virtualClusteringGroup("acme"), virtualClusteringGroup("globex"));
+ Filter f = new EqualityFilter("query_lower", ColumnType.STRING, "acme", null);
+ Assertions.assertEquals(all, Projections.pruneClusterGroups(all, f, VirtualColumns.EMPTY));
+ // Same with the no-virtual-columns convenience overload.
+ Assertions.assertEquals(all, pruneClusterGroups(all, f));
+ }
+
+ @Test
+ void testDirectMatchWorksWithUnrelatedQueryVirtualColumns()
+ {
+ // Query VC by a name unrelated to any clustering column → query-VC check finds nothing for "tenant" → falls
+ // through to the direct-name path against the clustering signature. Unrelated VCs don't perturb the match.
+ List all = groups(stringGroup("acme"), stringGroup("globex"));
+ final VirtualColumns queryVcs = VirtualColumns.create(
+ new ExpressionVirtualColumn(
+ "ts_floor",
+ "timestamp_floor(__time, 'P1D', null, null)",
+ ColumnType.LONG,
+ TestExprMacroTable.INSTANCE
+ )
+ );
+ Filter f = new EqualityFilter("tenant", ColumnType.STRING, "acme", null);
+ List kept = Projections.pruneClusterGroups(all, f, queryVcs);
+ Assertions.assertEquals(1, kept.size());
+ Assertions.assertSame(all.get(0), kept.get(0));
+ }
+
+ @Test
+ void testQueryVirtualColumnShadowingClusteringNameWithoutEquivalenceIsConservative()
+ {
+ // Query has a VC named "tenant" that is NOT equivalent to the clustering column "tenant" (different
+ // expression, totally different semantics). Once the query defines a VC by that name, the filter's "tenant"
+ // resolves to the VC's value, NOT the clustering column. The pruner must not mistake the VC reference for a
+ // clustering-column reference and prune on the clustering tuple — that would silently prune live rows whose
+ // VC value happens to equal "acme" even though the clustering tenant is "globex". Without the query-VC-first
+ // check this regressed and pruned the globex group.
+ List all = groups(stringGroup("acme"), stringGroup("globex"));
+ final VirtualColumns queryVcs = VirtualColumns.create(
+ new ExpressionVirtualColumn(
+ "tenant", // shadows the clustering column name
+ "lower(other)", // unrelated expression
+ ColumnType.STRING,
+ TestExprMacroTable.INSTANCE
+ )
+ );
+ Filter f = new EqualityFilter("tenant", ColumnType.STRING, "acme", null);
+ Assertions.assertEquals(all, Projections.pruneClusterGroups(all, f, queryVcs));
+ }
+
+ @Test
+ void testQueryVirtualColumnShadowingClusteringNameWithSameNameEquivalencePrunes()
+ {
+ // Group clusters on "tenant_lower" via a group VC of the same name (lower(tenant)). The query also defines
+ // a VC named "tenant_lower" with the identical expression — names collide AND the VCs are equivalent. The
+ // pruner should treat the filter as a clustering-column reference and prune normally (same-name target →
+ // no remap entry needed; the rewritten filter is identical to the original).
+ List all = groups(
+ virtualClusteringGroup("acme"),
+ virtualClusteringGroup("globex")
+ );
+ final VirtualColumns queryVcs = VirtualColumns.create(
+ new ExpressionVirtualColumn(
+ "tenant_lower", // SAME name as the group's clustering column / group VC
+ "lower(tenant)",
+ ColumnType.STRING,
+ TestExprMacroTable.INSTANCE
+ )
+ );
+ Filter f = new EqualityFilter("tenant_lower", ColumnType.STRING, "acme", null);
+ List kept = Projections.pruneClusterGroups(all, f, queryVcs);
+ Assertions.assertEquals(1, kept.size());
+ Assertions.assertSame(all.get(0), kept.get(0));
+ }
+
+ @Test
+ void testQueryVirtualColumnShadowsOneOfMultipleClusteringColumnsStillPrunesOthers()
+ {
+ // Two-column clustering (tenant, region). Query VC named "tenant" shadows that clustering column with a
+ // non-equivalent expression, but "region" is not shadowed. The AND filter on both columns should still let
+ // the region leaf prune groups where region doesn't match (Kleene: UNKNOWN AND FALSE = FALSE), while groups
+ // whose region matches stay (UNKNOWN AND TRUE = UNKNOWN → keep conservatively).
+ List all = groups(
+ multiGroup("acme", "us-east-1"),
+ multiGroup("acme", "us-west-2"),
+ multiGroup("globex", "us-east-1")
+ );
+ final VirtualColumns queryVcs = VirtualColumns.create(
+ new ExpressionVirtualColumn(
+ "tenant",
+ "lower(other)",
+ ColumnType.STRING,
+ TestExprMacroTable.INSTANCE
+ )
+ );
+ Filter f = new AndFilter(filters(
+ new EqualityFilter("tenant", ColumnType.STRING, "acme", null),
+ new EqualityFilter("region", ColumnType.STRING, "us-east-1", null)
+ ));
+ List kept = Projections.pruneClusterGroups(all, f, queryVcs);
+ Assertions.assertEquals(2, kept.size());
+ Assertions.assertSame(all.get(0), kept.get(0)); // (acme, us-east-1)
+ Assertions.assertSame(all.get(2), kept.get(1)); // (globex, us-east-1)
+ }
+
+ // --- Null clustering value handling ---
+ //
+ // Null clustering values are allowed; these tests pin the pruner's behavior across the supported filter types:
+ // - NullFilter: matches iff the column value is null.
+ // - EqualityFilter: does NOT match nulls by design — returns MUST_NOT_MATCH for a null clustering value.
+ // - TypedInFilter: matches nulls iff null is in the values list.
+
+ @Test
+ void testNullFilterMatchesNullClusteringValue()
+ {
+ final TableClusterGroupSpec nullGroup = stringGroup(null);
+ final TableClusterGroupSpec acmeGroup = stringGroup("acme");
+ List all = groups(nullGroup, acmeGroup);
+ Filter f = NullFilter.forColumn("tenant");
+ List kept = pruneClusterGroups(all, f);
+ Assertions.assertEquals(1, kept.size());
+ Assertions.assertSame(nullGroup, kept.get(0));
+ }
+
+ @Test
+ void testNullFilterPrunesNonNullClusteringValue()
+ {
+ List all = groups(stringGroup("acme"));
+ Filter f = NullFilter.forColumn("tenant");
+ Assertions.assertTrue(pruneClusterGroups(all, f).isEmpty());
+ }
+
+ @Test
+ void testEqualityFilterPrunesNullClusteringValue()
+ {
+ // EqualityFilter does not match nulls (filter constructor rejects null match values; null group's rows never
+ // match a non-null literal). A group whose clustering value is null is pruned regardless of the literal.
+ List all = groups(stringGroup(null), stringGroup("acme"));
+ Filter f = new EqualityFilter("tenant", ColumnType.STRING, "acme", null);
+ List kept = pruneClusterGroups(all, f);
+ Assertions.assertEquals(1, kept.size());
+ Assertions.assertSame(all.get(1), kept.get(0));
+ }
+
+ @Test
+ void testTypedInFilterIncludingNullMatchesNullClusteringValue()
+ {
+ final TableClusterGroupSpec nullGroup = stringGroup(null);
+ final TableClusterGroupSpec acmeGroup = stringGroup("acme");
+ final TableClusterGroupSpec globexGroup = stringGroup("globex");
+ List all = groups(nullGroup, acmeGroup, globexGroup);
+ Filter f = new TypedInFilter("tenant", ColumnType.STRING, Arrays.asList(null, "globex"), null, null);
+ List kept = pruneClusterGroups(all, f);
+ Assertions.assertEquals(2, kept.size());
+ Assertions.assertSame(nullGroup, kept.get(0));
+ Assertions.assertSame(globexGroup, kept.get(1));
+ }
+
+ @Test
+ void testTypedInFilterWithoutNullPrunesNullClusteringValue()
+ {
+ final TableClusterGroupSpec nullGroup = stringGroup(null);
+ final TableClusterGroupSpec acmeGroup = stringGroup("acme");
+ List all = groups(nullGroup, acmeGroup);
+ Filter f = new TypedInFilter("tenant", ColumnType.STRING, List.of("acme", "globex"), null, null);
+ List kept = pruneClusterGroups(all, f);
+ Assertions.assertEquals(1, kept.size());
+ Assertions.assertSame(acmeGroup, kept.get(0));
+ }
+
+ @Test
+ void testNotNullFilterOverClusteringColumnIsTriState()
+ {
+ // NOT IS NULL: must keep groups with non-null clustering values, prune groups with null clustering values.
+ final TableClusterGroupSpec nullGroup = stringGroup(null);
+ final TableClusterGroupSpec acmeGroup = stringGroup("acme");
+ List all = groups(nullGroup, acmeGroup);
+ Filter f = new NotFilter(NullFilter.forColumn("tenant"));
+ List kept = pruneClusterGroups(all, f);
+ Assertions.assertEquals(1, kept.size());
+ Assertions.assertSame(acmeGroup, kept.get(0));
+ }
+
+ // --- Non-tenant domains: IoT-style mixed-type and generic-partition fixtures. ---
+
+ @Test
+ void testIoTEqualityOnLongClusteringColumn()
+ {
+ final TableClusterGroupSpec d101East = deviceRegionGroup(101L, "us-east-1");
+ final TableClusterGroupSpec d202East = deviceRegionGroup(202L, "us-east-1");
+ final TableClusterGroupSpec d101West = deviceRegionGroup(101L, "us-west-2");
+ List all = groups(d101East, d202East, d101West);
+ Filter f = new EqualityFilter("device_id", ColumnType.LONG, 101L, null);
+ List kept = pruneClusterGroups(all, f);
+ Assertions.assertEquals(2, kept.size());
+ Assertions.assertSame(d101East, kept.get(0));
+ Assertions.assertSame(d101West, kept.get(1));
+ }
+
+ @Test
+ void testIoTAndAcrossMixedTypeClusteringColumns()
+ {
+ // AND across a LONG and a STRING clustering column; exercises per-type dictionary routing.
+ final TableClusterGroupSpec d101East = deviceRegionGroup(101L, "us-east-1");
+ final TableClusterGroupSpec d202East = deviceRegionGroup(202L, "us-east-1");
+ final TableClusterGroupSpec d101West = deviceRegionGroup(101L, "us-west-2");
+ List all = groups(d101East, d202East, d101West);
+ Filter f = new AndFilter(filters(
+ new EqualityFilter("device_id", ColumnType.LONG, 101L, null),
+ new EqualityFilter("region", ColumnType.STRING, "us-east-1", null)
+ ));
+ List kept = pruneClusterGroups(all, f);
+ Assertions.assertEquals(1, kept.size());
+ Assertions.assertSame(d101East, kept.get(0));
+ }
+
+ @Test
+ void testIoTTypedInFilterOnLongDeviceId()
+ {
+ // Typed IN on the LONG column; pruner must route via the column's type, not the STRING dict.
+ List all = groups(
+ deviceRegionGroup(101L, "us-east-1"),
+ deviceRegionGroup(202L, "us-east-1"),
+ deviceRegionGroup(303L, "us-east-1")
+ );
+ Filter f = new TypedInFilter("device_id", ColumnType.LONG, List.of(101L, 303L), null, null);
+ List kept = pruneClusterGroups(all, f);
+ Assertions.assertEquals(2, kept.size());
+ Assertions.assertSame(all.get(0), kept.get(0));
+ Assertions.assertSame(all.get(2), kept.get(1));
+ }
+
+ @Test
+ void testGenericPartitionClusteringPrunesOnEqualityFilter()
+ {
+ final TableClusterGroupSpec pA = partitionGroup("A");
+ final TableClusterGroupSpec pB = partitionGroup("B");
+ final TableClusterGroupSpec pC = partitionGroup("C");
+ List all = groups(pA, pB, pC);
+ Filter f = new EqualityFilter("partition", ColumnType.STRING, "B", null);
+ List kept = pruneClusterGroups(all, f);
+ Assertions.assertEquals(1, kept.size());
+ Assertions.assertSame(pB, kept.get(0));
+ }
+}
diff --git a/processing/src/test/java/org/apache/druid/segment/projections/ProjectionsTest.java b/processing/src/test/java/org/apache/druid/segment/projections/ProjectionsTest.java
index 46fb8912c078..01b391001b77 100644
--- a/processing/src/test/java/org/apache/druid/segment/projections/ProjectionsTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/projections/ProjectionsTest.java
@@ -547,6 +547,44 @@ public void testSchemaMatchIntervalProjectionGranularity()
Assertions.assertEquals(expected, projectionMatch);
}
+ private static RowSignature sig(String name, ColumnType type)
+ {
+ return RowSignature.builder().add(name, type).build();
+ }
+
+ @Test
+ void testGetClusterGroupSegmentInternalFileName()
+ {
+ // Smoosh layout: __base$_.../ . The IDs encode the group's clustering identity via the
+ // summary's per-column dictionaries.
+ Assertions.assertEquals(
+ "__base$0/tenant",
+ Projections.getClusterGroupSegmentInternalFileName(List.of(0), "tenant")
+ );
+ Assertions.assertEquals(
+ "__base$5/__time",
+ Projections.getClusterGroupSegmentInternalFileName(List.of(5), "__time")
+ );
+ Assertions.assertEquals(
+ "__base$0_1_3/__time",
+ Projections.getClusterGroupSegmentInternalFileName(List.of(0, 1, 3), "__time")
+ );
+ Assertions.assertEquals("__base$42/", Projections.getClusterGroupSegmentInternalFilePrefix(List.of(42)));
+ Assertions.assertEquals("__base$1_2/", Projections.getClusterGroupSegmentInternalFilePrefix(List.of(1, 2)));
+ }
+
+ @Test
+ void testIsAllowedClusteringType()
+ {
+ Assertions.assertTrue(Projections.isAllowedClusteringType(ColumnType.STRING));
+ Assertions.assertTrue(Projections.isAllowedClusteringType(ColumnType.LONG));
+ Assertions.assertTrue(Projections.isAllowedClusteringType(ColumnType.DOUBLE));
+ Assertions.assertTrue(Projections.isAllowedClusteringType(ColumnType.FLOAT));
+ Assertions.assertFalse(Projections.isAllowedClusteringType(null));
+ Assertions.assertFalse(Projections.isAllowedClusteringType(ColumnType.STRING_ARRAY));
+ Assertions.assertFalse(Projections.isAllowedClusteringType(ColumnType.UNKNOWN_COMPLEX));
+ }
+
private static class RowSignatureChecker implements Projections.PhysicalColumnChecker
{
private final RowSignature rowSignature;
diff --git a/processing/src/test/java/org/apache/druid/segment/projections/TableClusterGroupSpecTest.java b/processing/src/test/java/org/apache/druid/segment/projections/TableClusterGroupSpecTest.java
new file mode 100644
index 000000000000..7026276108d3
--- /dev/null
+++ b/processing/src/test/java/org/apache/druid/segment/projections/TableClusterGroupSpecTest.java
@@ -0,0 +1,324 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.projections;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import nl.jqno.equalsverifier.EqualsVerifier;
+import nl.jqno.equalsverifier.Warning;
+import org.apache.druid.error.DruidException;
+import org.apache.druid.query.OrderBy;
+import org.apache.druid.query.aggregation.AggregatorFactory;
+import org.apache.druid.query.aggregation.CountAggregatorFactory;
+import org.apache.druid.segment.TestHelper;
+import org.apache.druid.segment.VirtualColumns;
+import org.apache.druid.segment.column.ColumnHolder;
+import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.column.RowSignature;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+class TableClusterGroupSpecTest
+{
+ private static final ObjectMapper JSON_MAPPER = TestHelper.makeJsonMapper();
+
+ // Column names that appear in more than one test fixture.
+ private static final String COL_TENANT = "tenant";
+ private static final String COL_REGION = "region";
+ private static final String COL_PRIORITY = "priority";
+
+ // Clustering-value literals used by more than one test.
+ private static final String VAL_ACME = "acme";
+ private static final String VAL_GLOBEX = "globex";
+ private static final String VAL_US_EAST_1 = "us-east-1";
+ private static final String VAL_US_WEST_2 = "us-west-2";
+
+ // Common clustering signatures reused across tests.
+ private static final RowSignature TENANT_CLUSTER_SIGNATURE =
+ RowSignature.builder().add(COL_TENANT, ColumnType.STRING).build();
+ private static final RowSignature TENANT_REGION_CLUSTER_SIGNATURE =
+ RowSignature.builder()
+ .add(COL_TENANT, ColumnType.STRING)
+ .add(COL_REGION, ColumnType.STRING)
+ .build();
+
+ /** Schema + the specs it wraps, returned together so tests can assert on both. */
+ private record Built(ClusteredValueGroupsBaseTableSchema schema, List specs)
+ {
+ }
+
+ /** Build a schema with one cluster group per supplied typed tuple. */
+ private static Built buildSummary(RowSignature clusteringColumns, List extends List>> tuples)
+ {
+ final ClusterGroupSchemaTestHelpers.Built built =
+ ClusterGroupSchemaTestHelpers.buildClusterGroups(clusteringColumns, tuples);
+ final List dataColumns = List.of(ColumnHolder.TIME_COLUMN_NAME, COL_REGION, "metric");
+ final ArrayList allColumns = new ArrayList<>(clusteringColumns.getColumnNames());
+ allColumns.addAll(dataColumns);
+ final ArrayList ordering = new ArrayList<>();
+ for (String col : clusteringColumns.getColumnNames()) {
+ ordering.add(OrderBy.ascending(col));
+ }
+ ordering.add(OrderBy.ascending(ColumnHolder.TIME_COLUMN_NAME));
+ ordering.add(OrderBy.ascending(COL_REGION));
+ final ClusteredValueGroupsBaseTableSchema schema = new ClusteredValueGroupsBaseTableSchema(
+ VirtualColumns.EMPTY,
+ allColumns,
+ new AggregatorFactory[]{new CountAggregatorFactory("count")},
+ ordering,
+ clusteringColumns,
+ null,
+ built.dictionaries(),
+ built.specs()
+ );
+ return new Built(schema, built.specs());
+ }
+
+ /** Typed-tuple convenience: {@link Arrays#asList} allows null entries (unlike {@link List#of}). */
+ private static List> tuple(Object... values)
+ {
+ return Arrays.asList(values);
+ }
+
+ /** Round-trip via the enclosing summary (specs aren't ProjectionSchema subtypes); returns specs[0]. */
+ private static TableClusterGroupSpec roundTrip(ClusteredValueGroupsBaseTableSchema sum) throws JsonProcessingException
+ {
+ final String json = JSON_MAPPER.writeValueAsString(sum);
+ final ClusteredValueGroupsBaseTableSchema deserialized =
+ (ClusteredValueGroupsBaseTableSchema) JSON_MAPPER.readValue(json, ProjectionSchema.class);
+ return deserialized.getClusterGroups().get(0);
+ }
+
+ @Test
+ void testDictionariesAndIdsResolveFromTuples()
+ {
+ // Both columns are STRING, so their values share one merged dict.
+ final Built b = buildSummary(
+ TENANT_REGION_CLUSTER_SIGNATURE,
+ List.of(
+ tuple(VAL_GLOBEX, VAL_US_EAST_1),
+ tuple(VAL_ACME, VAL_US_WEST_2),
+ tuple(VAL_ACME, VAL_US_EAST_1)
+ )
+ );
+ Assertions.assertEquals(
+ List.of(VAL_ACME, VAL_GLOBEX, VAL_US_EAST_1, VAL_US_WEST_2),
+ b.schema().getClusteringDictionaries().getStringDictionary()
+ );
+ Assertions.assertEquals(List.of(), b.schema().getClusteringDictionaries().getLongDictionary());
+ Assertions.assertEquals(List.of(1, 2), b.specs().get(0).getClusteringValueIds());
+ Assertions.assertEquals(List.of(0, 3), b.specs().get(1).getClusteringValueIds());
+ Assertions.assertEquals(List.of(0, 2), b.specs().get(2).getClusteringValueIds());
+ Assertions.assertArrayEquals(new Object[]{VAL_GLOBEX, VAL_US_EAST_1}, b.specs().get(0).lookupClusteringValues());
+ Assertions.assertArrayEquals(new Object[]{VAL_ACME, VAL_US_WEST_2}, b.specs().get(1).lookupClusteringValues());
+ Assertions.assertArrayEquals(new Object[]{VAL_ACME, VAL_US_EAST_1}, b.specs().get(2).lookupClusteringValues());
+ }
+
+ @Test
+ void testDictionarySortsNullsFirst()
+ {
+ // Null is placed at position 0 in the STRING dictionary, with non-nulls following in ascending order.
+ final Built b = buildSummary(TENANT_CLUSTER_SIGNATURE, List.of(tuple((Object) null), tuple(VAL_ACME)));
+ final List stringDict = b.schema().getClusteringDictionaries().getStringDictionary();
+ Assertions.assertEquals(2, stringDict.size());
+ Assertions.assertNull(stringDict.get(0));
+ Assertions.assertEquals(VAL_ACME, stringDict.get(1));
+ Assertions.assertEquals(List.of(0), b.specs().get(0).getClusteringValueIds());
+ Assertions.assertEquals(List.of(1), b.specs().get(1).getClusteringValueIds());
+ }
+
+ @Test
+ void testSerdeStringValue() throws JsonProcessingException
+ {
+ final Built b = buildSummary(TENANT_CLUSTER_SIGNATURE, List.of(tuple(VAL_ACME)));
+ Assertions.assertEquals(b.specs().get(0), roundTrip(b.schema()));
+ }
+
+ @Test
+ void testSerdeLongValueCoercedFromJsonNumber() throws JsonProcessingException
+ {
+ final Built b = buildSummary(
+ RowSignature.builder().add(COL_PRIORITY, ColumnType.LONG).build(),
+ List.of(tuple(5L))
+ );
+ final TableClusterGroupSpec roundTripped = roundTrip(b.schema());
+ Assertions.assertEquals(b.specs().get(0), roundTripped);
+ Assertions.assertEquals(Long.class, roundTripped.lookupClusteringValues()[0].getClass());
+ Assertions.assertEquals(5L, roundTripped.lookupClusteringValues()[0]);
+ }
+
+ @Test
+ void testSerdeFloatValue() throws JsonProcessingException
+ {
+ final Built b = buildSummary(
+ RowSignature.builder().add("ratio", ColumnType.FLOAT).build(),
+ List.of(tuple(0.5f))
+ );
+ final TableClusterGroupSpec roundTripped = roundTrip(b.schema());
+ Assertions.assertEquals(b.specs().get(0), roundTripped);
+ Assertions.assertEquals(Float.class, roundTripped.lookupClusteringValues()[0].getClass());
+ Assertions.assertEquals(0.5f, roundTripped.lookupClusteringValues()[0]);
+ }
+
+ @Test
+ void testSerdeMultiColumnWithSpecialCharsAndMixedTypes() throws JsonProcessingException
+ {
+ final Built b = buildSummary(
+ RowSignature.builder()
+ .add(COL_TENANT, ColumnType.STRING)
+ .add(COL_PRIORITY, ColumnType.LONG)
+ .build(),
+ List.of(tuple("A/B", 5L))
+ );
+ Assertions.assertEquals(b.specs().get(0), roundTrip(b.schema()));
+ }
+
+ @Test
+ void testMixedTypeMultiGroupDictionaryRouting()
+ {
+ final Built b = buildSummary(
+ RowSignature.builder()
+ .add("device_id", ColumnType.LONG)
+ .add("region", ColumnType.STRING)
+ .build(),
+ List.of(
+ tuple(202L, "us-east-1"),
+ tuple(101L, "us-west-2"),
+ tuple(101L, "us-east-1")
+ )
+ );
+ Assertions.assertEquals(List.of(101L, 202L), b.schema().getClusteringDictionaries().getLongDictionary());
+ Assertions.assertEquals(
+ List.of("us-east-1", "us-west-2"),
+ b.schema().getClusteringDictionaries().getStringDictionary()
+ );
+ // IDs are positions in each column's typed dict; LONG for position 0, STRING for position 1.
+ Assertions.assertEquals(List.of(1, 0), b.specs().get(0).getClusteringValueIds());
+ Assertions.assertEquals(List.of(0, 1), b.specs().get(1).getClusteringValueIds());
+ Assertions.assertEquals(List.of(0, 0), b.specs().get(2).getClusteringValueIds());
+ Assertions.assertArrayEquals(new Object[]{202L, "us-east-1"}, b.specs().get(0).lookupClusteringValues());
+ Assertions.assertArrayEquals(new Object[]{101L, "us-west-2"}, b.specs().get(1).lookupClusteringValues());
+ Assertions.assertArrayEquals(new Object[]{101L, "us-east-1"}, b.specs().get(2).lookupClusteringValues());
+ }
+
+ @Test
+ void testJsonShapeIsOnlyClusteringValueIds() throws JsonProcessingException
+ {
+ // Spec carries only clusteringValueIds (+ type tag); summary-owned fields must not leak in.
+ final Built b = buildSummary(TENANT_CLUSTER_SIGNATURE, List.of(tuple(VAL_ACME)));
+ final String json = JSON_MAPPER.writeValueAsString(b.schema());
+ Assertions.assertTrue(json.contains("\"clusterGroups\""), "summary serializes clusterGroups");
+ Assertions.assertTrue(json.contains("\"clusteringDictionaries\""), "summary serializes clusteringDictionaries");
+ Assertions.assertTrue(json.contains("\"clusteringValueIds\":[0]"), "spec carries clusteringValueIds");
+ // Any of the summary-only fields appearing inside the spec object would mean we duplicated them.
+ final int specStart = json.indexOf("\"clusterGroups\":[");
+ final int specEnd = json.indexOf("]", specStart);
+ final String specJson = json.substring(specStart, specEnd + 1);
+ Assertions.assertFalse(specJson.contains("\"columns\""), "spec must not carry columns");
+ Assertions.assertFalse(specJson.contains("\"ordering\""), "spec must not carry ordering");
+ Assertions.assertFalse(specJson.contains("\"clusteringColumns\""), "spec must not carry clusteringColumns");
+ Assertions.assertFalse(specJson.contains("\"aggregators\""), "spec must not carry aggregators");
+ Assertions.assertFalse(specJson.contains("\"virtualColumns\""), "spec must not carry virtualColumns");
+ // serialized form is dictionary IDs, not typed values: spec must not contain the literal value.
+ Assertions.assertFalse(specJson.contains("\"" + VAL_ACME + "\""), "spec must not carry typed values inline");
+ }
+
+ @Test
+ void testNullClusteringValueRoundTrips() throws JsonProcessingException
+ {
+ final Built b = buildSummary(TENANT_CLUSTER_SIGNATURE, List.of(tuple((Object) null)));
+ final TableClusterGroupSpec roundTripped = roundTrip(b.schema());
+ Assertions.assertEquals(b.specs().get(0), roundTripped);
+ Assertions.assertNull(roundTripped.lookupClusteringValues()[0]);
+ }
+
+ @Test
+ void testMixedNullAndValueClusteringRoundTrips() throws JsonProcessingException
+ {
+ final Built b = buildSummary(TENANT_REGION_CLUSTER_SIGNATURE, List.of(tuple(VAL_ACME, null)));
+ Assertions.assertEquals(b.specs().get(0), roundTrip(b.schema()));
+ }
+
+ @Test
+ void testMismatchedTupleSizeRejectedByHelper()
+ {
+ Throwable t = Assertions.assertThrows(
+ DruidException.class,
+ () -> buildSummary(TENANT_REGION_CLUSTER_SIGNATURE, List.of(tuple(VAL_ACME)))
+ );
+ Assertions.assertTrue(t.getMessage().contains("must match clusteringColumns size"));
+ }
+
+ @Test
+ void testGettersBeforeSetSummaryThrow()
+ {
+ // Spec without a summary back-reference: summary-dependent getters throw.
+ final TableClusterGroupSpec spec = new TableClusterGroupSpec(List.of(0), null);
+ Assertions.assertThrows(DruidException.class, spec::lookupClusteringValues);
+ Assertions.assertThrows(DruidException.class, spec::getSummary);
+ }
+
+ @Test
+ void testSetSummaryTwiceRejected()
+ {
+ final Built b = buildSummary(TENANT_CLUSTER_SIGNATURE, List.of(tuple(VAL_ACME)));
+ final Built other = buildSummary(TENANT_CLUSTER_SIGNATURE, List.of(tuple(VAL_GLOBEX)));
+ Throwable t = Assertions.assertThrows(
+ DruidException.class,
+ () -> b.specs().get(0).setSummary(other.schema())
+ );
+ Assertions.assertTrue(t.getMessage().contains("summary already set"));
+ }
+
+ @Test
+ void testEqualsAndHashcode()
+ {
+ final ClusteredValueGroupsBaseTableSchema redSummary = new ClusteredValueGroupsBaseTableSchema(
+ VirtualColumns.EMPTY,
+ List.of(COL_TENANT, ColumnHolder.TIME_COLUMN_NAME),
+ null,
+ List.of(OrderBy.ascending(COL_TENANT), OrderBy.ascending(ColumnHolder.TIME_COLUMN_NAME)),
+ TENANT_CLUSTER_SIGNATURE,
+ null,
+ ClusteringDictionaries.EMPTY,
+ null
+ );
+ final ClusteredValueGroupsBaseTableSchema blueSummary = new ClusteredValueGroupsBaseTableSchema(
+ VirtualColumns.EMPTY,
+ List.of(COL_PRIORITY, ColumnHolder.TIME_COLUMN_NAME),
+ null,
+ List.of(OrderBy.ascending(COL_PRIORITY), OrderBy.ascending(ColumnHolder.TIME_COLUMN_NAME)),
+ RowSignature.builder().add(COL_PRIORITY, ColumnType.LONG).build(),
+ null,
+ ClusteringDictionaries.EMPTY,
+ null
+ );
+ EqualsVerifier.forClass(TableClusterGroupSpec.class)
+ .withIgnoredFields("summary", "numRows")
+ .withPrefabValues(ClusteredValueGroupsBaseTableSchema.class, redSummary, blueSummary)
+ .suppress(Warning.NONFINAL_FIELDS)
+ .usingGetClass()
+ .verify();
+ }
+}
diff --git a/processing/src/test/java/org/apache/druid/segment/vector/ConcatenatingVectorCursorTest.java b/processing/src/test/java/org/apache/druid/segment/vector/ConcatenatingVectorCursorTest.java
new file mode 100644
index 000000000000..5a9ebf1ba2c2
--- /dev/null
+++ b/processing/src/test/java/org/apache/druid/segment/vector/ConcatenatingVectorCursorTest.java
@@ -0,0 +1,540 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.vector;
+
+import com.google.common.base.Supplier;
+import com.google.common.base.Suppliers;
+import org.apache.druid.java.util.common.io.Closer;
+import org.apache.druid.query.OrderBy;
+import org.apache.druid.query.dimension.DimensionSpec;
+import org.apache.druid.segment.Cursor;
+import org.apache.druid.segment.CursorHolder;
+import org.apache.druid.segment.column.ColumnCapabilities;
+import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.column.RowSignature;
+import org.apache.druid.segment.projections.ClusteringVectorColumnSelectorFactory;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import javax.annotation.Nullable;
+import java.util.ArrayList;
+import java.util.List;
+
+class ConcatenatingVectorCursorTest
+{
+ private static final RowSignature CLUSTER_SIGNATURE = RowSignature.builder().add("tenant", ColumnType.STRING).build();
+
+ private final Closer closer = Closer.create();
+
+ @Test
+ void testWalksTwoNonEmptyGroupsBackToBack()
+ {
+ FakeVectorCursorHolder a = new FakeVectorCursorHolder(List.of("a1", "a2"), 4);
+ FakeVectorCursorHolder b = new FakeVectorCursorHolder(List.of("b1"), 4);
+
+ ClusteringVectorColumnSelectorFactory wrapper = new ClusteringVectorColumnSelectorFactory(
+ new FakeVectorFactory(List.of(), new int[]{0}, 4),
+ CLUSTER_SIGNATURE,
+ new Object[]{"acme"}
+ );
+
+ ConcatenatingVectorCursor c = new ConcatenatingVectorCursor(
+ List.of(holderSupplier(a), holderSupplier(b)),
+ List.of(new Object[]{"acme"}, new Object[]{"globex"}),
+ wrapper
+ );
+
+ VectorObjectSelector tenant = c.getColumnSelectorFactory().makeObjectSelector("tenant");
+ VectorObjectSelector metric = c.getColumnSelectorFactory().makeObjectSelector("metric");
+
+ // Group 1 vector: ["a1", "a2"]
+ Assertions.assertFalse(c.isDone());
+ Assertions.assertEquals(2, c.getCurrentVectorSize());
+ Object[] tenantVec1 = tenant.getObjectVector();
+ Object[] metricVec1 = metric.getObjectVector();
+ Assertions.assertEquals("acme", tenantVec1[0]);
+ Assertions.assertEquals("acme", tenantVec1[1]);
+ Assertions.assertEquals("a1", metricVec1[0]);
+ Assertions.assertEquals("a2", metricVec1[1]);
+ c.advance();
+
+ // Group 2 vector: ["b1"]
+ Assertions.assertFalse(c.isDone());
+ Assertions.assertEquals(1, c.getCurrentVectorSize());
+ Object[] tenantVec2 = tenant.getObjectVector();
+ Object[] metricVec2 = metric.getObjectVector();
+ Assertions.assertEquals("globex", tenantVec2[0]);
+ Assertions.assertEquals("b1", metricVec2[0]);
+ c.advance();
+
+ Assertions.assertTrue(c.isDone());
+ Assertions.assertDoesNotThrow(closer::close);
+ Assertions.assertTrue(a.closed);
+ Assertions.assertTrue(b.closed);
+ }
+
+ @Test
+ void testSkipsLeadingEmptyGroup()
+ {
+ FakeVectorCursorHolder empty = new FakeVectorCursorHolder(List.of(), 4);
+ FakeVectorCursorHolder full = new FakeVectorCursorHolder(List.of("x"), 4);
+
+ ClusteringVectorColumnSelectorFactory wrapper = new ClusteringVectorColumnSelectorFactory(
+ new FakeVectorFactory(List.of(), new int[]{0}, 4),
+ CLUSTER_SIGNATURE,
+ new Object[]{"placeholder"}
+ );
+
+ ConcatenatingVectorCursor c = new ConcatenatingVectorCursor(
+ List.of(holderSupplier(empty), holderSupplier(full)),
+ List.of(new Object[]{"a"}, new Object[]{"b"}),
+ wrapper
+ );
+
+ VectorObjectSelector tenant = c.getColumnSelectorFactory().makeObjectSelector("tenant");
+ Assertions.assertFalse(c.isDone());
+ Assertions.assertEquals("b", tenant.getObjectVector()[0]);
+ c.advance();
+ Assertions.assertTrue(c.isDone());
+ Assertions.assertDoesNotThrow(closer::close);
+ Assertions.assertTrue(empty.closed);
+ Assertions.assertTrue(full.closed);
+ }
+
+ @Test
+ void testSkipsTrailingEmptyGroup()
+ {
+ FakeVectorCursorHolder full = new FakeVectorCursorHolder(List.of("x"), 4);
+ FakeVectorCursorHolder empty = new FakeVectorCursorHolder(List.of(), 4);
+
+ ClusteringVectorColumnSelectorFactory wrapper = new ClusteringVectorColumnSelectorFactory(
+ new FakeVectorFactory(List.of(), new int[]{0}, 4),
+ CLUSTER_SIGNATURE,
+ new Object[]{"a"}
+ );
+
+ ConcatenatingVectorCursor c = new ConcatenatingVectorCursor(
+ List.of(holderSupplier(full), holderSupplier(empty)),
+ List.of(new Object[]{"a"}, new Object[]{"b"}),
+ wrapper
+ );
+
+ VectorObjectSelector tenant = c.getColumnSelectorFactory().makeObjectSelector("tenant");
+ Assertions.assertFalse(c.isDone());
+ Assertions.assertEquals("a", tenant.getObjectVector()[0]);
+ c.advance();
+ Assertions.assertTrue(c.isDone());
+ }
+
+ @Test
+ void testAllEmptyGroups()
+ {
+ FakeVectorCursorHolder e1 = new FakeVectorCursorHolder(List.of(), 4);
+ FakeVectorCursorHolder e2 = new FakeVectorCursorHolder(List.of(), 4);
+
+ ClusteringVectorColumnSelectorFactory wrapper = new ClusteringVectorColumnSelectorFactory(
+ new FakeVectorFactory(List.of(), new int[]{0}, 4),
+ CLUSTER_SIGNATURE,
+ new Object[]{"placeholder"}
+ );
+
+ ConcatenatingVectorCursor c = new ConcatenatingVectorCursor(
+ List.of(holderSupplier(e1), holderSupplier(e2)),
+ List.of(new Object[]{"a"}, new Object[]{"b"}),
+ wrapper
+ );
+
+ Assertions.assertTrue(c.isDone());
+ Assertions.assertEquals(0, c.getCurrentVectorSize());
+ }
+
+ @Test
+ void testSingleGroupDegenerateCase()
+ {
+ FakeVectorCursorHolder only = new FakeVectorCursorHolder(List.of("x", "y"), 4);
+
+ ClusteringVectorColumnSelectorFactory wrapper = new ClusteringVectorColumnSelectorFactory(
+ new FakeVectorFactory(List.of(), new int[]{0}, 4),
+ CLUSTER_SIGNATURE,
+ new Object[]{"a"}
+ );
+
+ ConcatenatingVectorCursor c = new ConcatenatingVectorCursor(
+ List.of(holderSupplier(only)),
+ List.of(new Object[]{"a"}),
+ wrapper
+ );
+
+ VectorObjectSelector tenant = c.getColumnSelectorFactory().makeObjectSelector("tenant");
+ VectorObjectSelector metric = c.getColumnSelectorFactory().makeObjectSelector("metric");
+
+ Assertions.assertEquals(2, c.getCurrentVectorSize());
+ Assertions.assertEquals("a", tenant.getObjectVector()[0]);
+ Assertions.assertEquals("a", tenant.getObjectVector()[1]);
+ Assertions.assertEquals("x", metric.getObjectVector()[0]);
+ Assertions.assertEquals("y", metric.getObjectVector()[1]);
+ c.advance();
+ Assertions.assertTrue(c.isDone());
+ }
+
+ @Test
+ void testPartialVectorAtGroupBoundary()
+ {
+ // Group A has 3 rows but max vector size is 4; its vector is partial (size = 3, not 4).
+ FakeVectorCursorHolder a = new FakeVectorCursorHolder(List.of("a1", "a2", "a3"), 4);
+ FakeVectorCursorHolder b = new FakeVectorCursorHolder(List.of("b1", "b2"), 4);
+
+ ClusteringVectorColumnSelectorFactory wrapper = new ClusteringVectorColumnSelectorFactory(
+ new FakeVectorFactory(List.of(), new int[]{0}, 4),
+ CLUSTER_SIGNATURE,
+ new Object[]{"acme"}
+ );
+
+ ConcatenatingVectorCursor c = new ConcatenatingVectorCursor(
+ List.of(holderSupplier(a), holderSupplier(b)),
+ List.of(new Object[]{"acme"}, new Object[]{"globex"}),
+ wrapper
+ );
+
+ VectorObjectSelector tenant = c.getColumnSelectorFactory().makeObjectSelector("tenant");
+
+ // Group A's vector is partial; 3 of max 4.
+ Assertions.assertEquals(3, c.getCurrentVectorSize());
+ Assertions.assertEquals("acme", tenant.getObjectVector()[0]);
+ c.advance();
+
+ // Group B's vector starts fresh; 2 rows.
+ Assertions.assertFalse(c.isDone());
+ Assertions.assertEquals(2, c.getCurrentVectorSize());
+ Assertions.assertEquals("globex", tenant.getObjectVector()[0]);
+ Assertions.assertEquals("globex", tenant.getObjectVector()[1]);
+ c.advance();
+
+ Assertions.assertTrue(c.isDone());
+ }
+
+ @Test
+ void testCloserClosesAllOpenedHolders()
+ {
+ FakeVectorCursorHolder a = new FakeVectorCursorHolder(List.of("a1"), 4);
+ FakeVectorCursorHolder b = new FakeVectorCursorHolder(List.of("b1"), 4);
+
+ ClusteringVectorColumnSelectorFactory wrapper = new ClusteringVectorColumnSelectorFactory(
+ new FakeVectorFactory(List.of(), new int[]{0}, 4),
+ CLUSTER_SIGNATURE,
+ new Object[]{"x"}
+ );
+
+ ConcatenatingVectorCursor c = new ConcatenatingVectorCursor(
+ List.of(holderSupplier(a), holderSupplier(b)),
+ List.of(new Object[]{"x"}, new Object[]{"y"}),
+ wrapper
+ );
+
+ c.getColumnSelectorFactory();
+ c.advance();
+ c.advance(); // exhausts
+ // Outer holder owns the closer; ConcatenatingVectorCursor itself is not Closeable.
+ Assertions.assertDoesNotThrow(closer::close);
+ Assertions.assertTrue(a.closed);
+ Assertions.assertTrue(b.closed);
+ }
+
+ @Test
+ void testGroupsAreOpenedLazilyOnTransitionNotEagerly()
+ {
+ final boolean[] secondOpened = {false};
+ FakeVectorCursorHolder first = new FakeVectorCursorHolder(List.of("x"), 4);
+
+ ClusteringVectorColumnSelectorFactory wrapper = new ClusteringVectorColumnSelectorFactory(
+ new FakeVectorFactory(List.of(), new int[]{0}, 4),
+ CLUSTER_SIGNATURE,
+ new Object[]{"a"}
+ );
+
+ List> suppliers = new ArrayList<>();
+ suppliers.add(holderSupplier(first));
+ suppliers.add(() -> {
+ secondOpened[0] = true;
+ return new FakeVectorCursorHolder(List.of("y"), 4);
+ });
+
+ ConcatenatingVectorCursor c = new ConcatenatingVectorCursor(
+ suppliers,
+ List.of(new Object[]{"a"}, new Object[]{"b"}),
+ wrapper
+ );
+
+ c.getColumnSelectorFactory();
+ Assertions.assertFalse(secondOpened[0]);
+
+ c.advance(); // exhausts first → opens second
+ Assertions.assertTrue(secondOpened[0]);
+ }
+
+ @Test
+ void testMaxVectorSizeIsConfiguredValueAcrossAllStates()
+ {
+ // The configured maxVectorSize is a query-level constant; it must not be derived from the current group's
+ // cursor (which may not exist before init or after exhaustion).
+ final int configuredMaxVectorSize = 17;
+ FakeVectorCursorHolder a = new FakeVectorCursorHolder(List.of("a1"), 4);
+
+ ClusteringVectorColumnSelectorFactory wrapper = new ClusteringVectorColumnSelectorFactory(
+ new FakeVectorFactory(List.of(), new int[]{0}, 4),
+ CLUSTER_SIGNATURE,
+ new Object[]{"acme"},
+ configuredMaxVectorSize
+ );
+
+ ConcatenatingVectorCursor c = new ConcatenatingVectorCursor(
+ List.of(holderSupplier(a)),
+ List.of(new Object[]{"acme"}),
+ wrapper
+ );
+
+ // Pre-init.
+ Assertions.assertEquals(configuredMaxVectorSize, c.getMaxVectorSize());
+ // Wrapper factory reports the same.
+ Assertions.assertEquals(configuredMaxVectorSize, c.getColumnSelectorFactory().getMaxVectorSize());
+ // After init.
+ Assertions.assertEquals(configuredMaxVectorSize, c.getMaxVectorSize());
+ c.advance();
+ // Post-exhaustion.
+ Assertions.assertTrue(c.isDone());
+ Assertions.assertEquals(configuredMaxVectorSize, c.getMaxVectorSize());
+ }
+
+ private Supplier holderSupplier(FakeVectorCursorHolder h)
+ {
+ return Suppliers.memoize(() -> closer.register(h))::get;
+ }
+
+ private static class FakeVectorObjectSelector implements VectorObjectSelector
+ {
+ private final List rows;
+ private final int[] offset;
+ private final int maxVectorSize;
+ private final Object[] buffer;
+
+ FakeVectorObjectSelector(List rows, int[] offset, int maxVectorSize)
+ {
+ this.rows = rows;
+ this.offset = offset;
+ this.maxVectorSize = maxVectorSize;
+ this.buffer = new Object[maxVectorSize];
+ }
+
+ @Override
+ public Object[] getObjectVector()
+ {
+ final int size = Math.min(maxVectorSize, rows.size() - offset[0]);
+ for (int i = 0; i < size; i++) {
+ buffer[i] = rows.get(offset[0] + i);
+ }
+ return buffer;
+ }
+
+ @Override
+ public int getMaxVectorSize()
+ {
+ return maxVectorSize;
+ }
+
+ @Override
+ public int getCurrentVectorSize()
+ {
+ return Math.min(maxVectorSize, rows.size() - offset[0]);
+ }
+ }
+
+ private static class FakeVectorFactory implements VectorColumnSelectorFactory
+ {
+ private final List metricRows;
+ private final int[] offset;
+ private final int maxVectorSize;
+
+ FakeVectorFactory(List