Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -1180,7 +1180,8 @@ private void processDimensionsSpec(final QueryableIndex index)
schema = new StringDimensionSchema(
schema.getName(),
DimensionSchema.MultiValueHandling.ARRAY,
schema.hasBitmapIndex()
schema.hasBitmapIndex(),
((StringDimensionSchema) schema).getColumnFormatSpec()
);
}
dimensionSchemaMap.put(
Expand Down Expand Up @@ -1258,7 +1259,8 @@ private void processProjections(final QueryableIndex index)
new StringDimensionSchema(
columnSchema.getName(),
DimensionSchema.MultiValueHandling.ARRAY,
columnSchema.hasBitmapIndex()
columnSchema.hasBitmapIndex(),
((StringDimensionSchema) columnSchema).getColumnFormatSpec()
)
);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ public DimensionHandler getDimensionHandler()
}
maxStringLength = columnFormatSpec.getMaxStringLength();
}
return new StringDimensionHandler(getName(), mvh, bitmap, false, maxStringLength);
return new StringDimensionHandler(getName(), mvh, bitmap, false, maxStringLength, columnFormatSpec);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ private static IndexedInts getRow(ColumnValueSelector s)
private final boolean hasSpatialIndexes;
@Nullable
private final Integer maxStringLength;
@Nullable
private final StringColumnFormatSpec columnFormatSpec;

public StringDimensionHandler(
String dimensionName,
Expand All @@ -126,12 +128,25 @@ public StringDimensionHandler(
boolean hasSpatialIndexes,
@Nullable Integer maxStringLength
)
{
this(dimensionName, multiValueHandling, hasBitmapIndexes, hasSpatialIndexes, maxStringLength, null);
}

public StringDimensionHandler(
String dimensionName,
MultiValueHandling multiValueHandling,
boolean hasBitmapIndexes,
boolean hasSpatialIndexes,
@Nullable Integer maxStringLength,
@Nullable StringColumnFormatSpec columnFormatSpec
)
{
this.dimensionName = dimensionName;
this.multiValueHandling = multiValueHandling;
this.hasBitmapIndexes = hasBitmapIndexes;
this.hasSpatialIndexes = hasSpatialIndexes;
this.maxStringLength = maxStringLength;
this.columnFormatSpec = columnFormatSpec;
}

@Override
Expand All @@ -146,6 +161,9 @@ public DimensionSchema getDimensionSchema(ColumnCapabilities capabilities)
if (hasSpatialIndexes) {
return new NewSpatialDimensionSchema(dimensionName, Collections.singletonList(dimensionName));
}
if (columnFormatSpec != null) {
return new StringDimensionSchema(dimensionName, multiValueHandling, hasBitmapIndexes, columnFormatSpec);
}
return new StringDimensionSchema(dimensionName, multiValueHandling, hasBitmapIndexes);
}

Expand Down Expand Up @@ -176,7 +194,7 @@ public SettableColumnValueSelector makeNewSettableEncodedValueSelector()
@Override
public DimensionIndexer<Integer, int[], String> makeIndexer()
{
return new StringDimensionIndexer(multiValueHandling, hasBitmapIndexes, hasSpatialIndexes, maxStringLength);
return new StringDimensionIndexer(multiValueHandling, hasBitmapIndexes, hasSpatialIndexes, maxStringLength, columnFormatSpec);
}

@Override
Expand Down Expand Up @@ -207,7 +225,8 @@ public DimensionMergerV9 makeMerger(
capabilities,
progress,
segmentBaseDir,
closer
closer,
columnFormatSpec
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,12 @@
import org.apache.druid.query.filter.StringPredicateDruidPredicateFactory;
import org.apache.druid.query.filter.ValueMatcher;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.column.CapabilitiesBasedFormat;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.column.ColumnFormat;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.StringDictionaryEncodedColumnFormat;
import org.apache.druid.segment.data.ArrayBasedIndexedInts;
import org.apache.druid.segment.data.IndexedInts;
import org.apache.druid.segment.incremental.IncrementalIndex;
Expand All @@ -60,6 +63,8 @@ public class StringDimensionIndexer extends DictionaryEncodedColumnIndexer<int[]
private final boolean hasSpatialIndexes;
@Nullable
private final Integer maxStringLength;
@Nullable
private final StringColumnFormatSpec columnFormatSpec;
private volatile boolean hasMultipleValues = false;

public StringDimensionIndexer(
Expand All @@ -77,12 +82,39 @@ public StringDimensionIndexer(
boolean hasSpatialIndexes,
@Nullable Integer maxStringLength
)
{
this(multiValueHandling, hasBitmapIndexes, hasSpatialIndexes, maxStringLength, null);
}

public StringDimensionIndexer(
@Nullable MultiValueHandling multiValueHandling,
boolean hasBitmapIndexes,
boolean hasSpatialIndexes,
@Nullable Integer maxStringLength,
@Nullable StringColumnFormatSpec columnFormatSpec
)
{
super(new StringDimensionDictionary());
this.multiValueHandling = multiValueHandling == null ? MultiValueHandling.ofDefault() : multiValueHandling;
this.hasBitmapIndexes = hasBitmapIndexes;
this.hasSpatialIndexes = hasSpatialIndexes;
this.maxStringLength = maxStringLength;
this.columnFormatSpec = columnFormatSpec;
}

@Override
public ColumnFormat getFormat()
{
if (columnFormatSpec != null) {
return new StringDictionaryEncodedColumnFormat(
hasMultipleValues,
false,
hasBitmapIndexes,
hasSpatialIndexes,
columnFormatSpec
);
}
return CapabilitiesBasedFormat.forColumnIndexer(getColumnCapabilities());
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ public class StringDimensionMergerV9 extends DictionaryEncodedColumnMerger<Strin

@Nullable
private ByteBufferWriter<ImmutableRTree> spatialWriter;
@Nullable
private final StringColumnFormatSpec columnFormatSpec;

/**
* @param dimensionName column name
Expand All @@ -76,6 +78,7 @@ public class StringDimensionMergerV9 extends DictionaryEncodedColumnMerger<Strin
* @param progress hook to update status of what this merger is doing during segment persist and merging
* @param closer resource closer if this merger needs to attach any closables that should be cleaned up
* when the segment is finished writing
* @param columnFormatSpec string column format spec to persist in segment metadata
*/
public StringDimensionMergerV9(
String dimensionName,
Expand All @@ -85,10 +88,12 @@ public StringDimensionMergerV9(
ColumnCapabilities capabilities,
ProgressIndicator progress,
File segmentBaseDir,
Closer closer
Closer closer,
@Nullable StringColumnFormatSpec columnFormatSpec
)
{
super(dimensionName, outputName, indexSpec, segmentWriteOutMedium, capabilities, progress, segmentBaseDir, closer);
this.columnFormatSpec = columnFormatSpec;
}

@Override
Expand Down Expand Up @@ -156,7 +161,8 @@ public ColumnDescriptor makeColumnDescriptor()
.withBitmapSerdeFactory(bitmapSerdeFactory)
.withBitmapIndex(bitmapWriter)
.withSpatialIndex(spatialWriter)
.withByteOrder(IndexIO.BYTE_ORDER);
.withByteOrder(IndexIO.BYTE_ORDER)
.withColumnFormatSpec(columnFormatSpec);

if (writeDictionary) {
partBuilder = partBuilder.withDictionary(dictionaryWriter);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,17 @@ public ColumnFormat merge(@Nullable ColumnFormat otherFormat)
return this;
}

if (otherFormat instanceof StringDictionaryEncodedColumnFormat) {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[P1] String format merge bypasses type compatibility checks

This delegation happens before CapabilitiesBasedFormat's existing type checks, and StringDictionaryEncodedColumnFormat.merge accepts any CapabilitiesBasedFormat without validating its logical type. If one segment has a string-specific format for a column and another legacy segment reports the same column as LONG, DOUBLE, COMPLEX, etc., the merge now produces a STRING format instead of rejecting the incompatible schemas. Restrict delegation to compatible string capabilities or perform the same logical/element/complex type validation in StringDictionaryEncodedColumnFormat.merge before merging.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added type validation.

if (!this.capabilities.is(ValueType.STRING)) {
throw new ISE(
"Cannot merge columns of type[%s] and [%s]",
this.capabilities.asTypeString(),
otherFormat.getLogicalType()
);
}
return otherFormat.merge(this);
}

ColumnCapabilitiesImpl merged = ColumnCapabilitiesImpl.copyOf(this.toColumnCapabilities());
ColumnCapabilitiesImpl otherSnapshot = ColumnCapabilitiesImpl.copyOf(otherFormat.toColumnCapabilities());
final String mergedType = merged.getType() == null ? null : merged.asTypeString();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.druid.segment.column;

import org.apache.druid.data.input.impl.DimensionSchema;
import org.apache.druid.data.input.impl.DimensionSchema.MultiValueHandling;
import org.apache.druid.data.input.impl.NewSpatialDimensionSchema;
import org.apache.druid.data.input.impl.StringDimensionSchema;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.segment.DimensionHandler;
import org.apache.druid.segment.StringColumnFormatSpec;
import org.apache.druid.segment.StringDimensionHandler;

import javax.annotation.Nullable;
import java.util.Collections;

public class StringDictionaryEncodedColumnFormat implements ColumnFormat
{
private final boolean hasMultipleValues;
private final boolean hasNulls;
private final boolean hasBitmapIndexes;
private final boolean hasSpatialIndexes;
@Nullable
private final StringColumnFormatSpec columnFormatSpec;

public StringDictionaryEncodedColumnFormat(
boolean hasMultipleValues,
boolean hasNulls,
boolean hasBitmapIndexes,
boolean hasSpatialIndexes,
@Nullable StringColumnFormatSpec columnFormatSpec
)
{
this.hasMultipleValues = hasMultipleValues;
this.hasNulls = hasNulls;
this.hasBitmapIndexes = hasBitmapIndexes;
this.hasSpatialIndexes = hasSpatialIndexes;
this.columnFormatSpec = columnFormatSpec;
}

@Override
public ColumnType getLogicalType()
{
return ColumnType.STRING;
}

@Override
public ColumnCapabilities toColumnCapabilities()
{
return ColumnCapabilitiesImpl.createDefault()
.setType(ColumnType.STRING)
.setDictionaryEncoded(true)
.setDictionaryValuesSorted(true)
.setDictionaryValuesUnique(true)
.setHasMultipleValues(hasMultipleValues)
.setHasNulls(hasNulls)
.setHasBitmapIndexes(hasBitmapIndexes)
.setHasSpatialIndexes(hasSpatialIndexes);
}

@Override
public DimensionHandler getColumnHandler(String columnName)
{
Integer maxStringLength = columnFormatSpec != null ? columnFormatSpec.getMaxStringLength() : null;
MultiValueHandling mvh = (columnFormatSpec != null && columnFormatSpec.getMultiValueHandling() != null)
? columnFormatSpec.getMultiValueHandling()
: MultiValueHandling.ofDefault();
return new StringDimensionHandler(
columnName,
mvh,
hasBitmapIndexes,
hasSpatialIndexes,
maxStringLength,
columnFormatSpec
);
}

@Override
public DimensionSchema getColumnSchema(String columnName)
{
if (hasSpatialIndexes) {
return new NewSpatialDimensionSchema(columnName, Collections.singletonList(columnName));
}
return new StringDimensionSchema(columnName, null, hasBitmapIndexes, columnFormatSpec);
}

@Override
public ColumnFormat merge(@Nullable ColumnFormat otherFormat)
{
if (otherFormat == null) {
return this;
}

if (otherFormat instanceof StringDictionaryEncodedColumnFormat) {
final StringDictionaryEncodedColumnFormat other = (StringDictionaryEncodedColumnFormat) otherFormat;
return new StringDictionaryEncodedColumnFormat(
hasMultipleValues || other.hasMultipleValues,
hasNulls || other.hasNulls,
hasBitmapIndexes && other.hasBitmapIndexes,
hasSpatialIndexes || other.hasSpatialIndexes,
columnFormatSpec != null ? columnFormatSpec : other.columnFormatSpec
);
}

if (otherFormat instanceof CapabilitiesBasedFormat) {
final ColumnCapabilities otherCaps = otherFormat.toColumnCapabilities();
if (!otherCaps.is(ValueType.STRING)) {
throw new ISE(
"Cannot merge columns of type[%s] and format[%s] with type[%s] and format[%s]",
ColumnType.STRING,
this.getClass().getName(),
otherFormat.getLogicalType(),
otherFormat.getClass().getName()
);
}
return new StringDictionaryEncodedColumnFormat(
hasMultipleValues || otherCaps.hasMultipleValues().isMaybeTrue(),
hasNulls || otherCaps.hasNulls().isMaybeTrue(),
hasBitmapIndexes && otherCaps.hasBitmapIndexes(),
hasSpatialIndexes || otherCaps.hasSpatialIndexes(),
columnFormatSpec
);
}

throw new ISE(
"Cannot merge columns of type[%s] and format[%s] and with [%s] and [%s]",
ColumnType.STRING,
this.getClass().getName(),
otherFormat.getLogicalType(),
otherFormat.getClass().getName()
);
}
}
Loading
Loading