diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java index 82310272b369..5804507f5d5e 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java @@ -1180,7 +1180,8 @@ private void processDimensionsSpec(final QueryableIndex index) schema = new StringDimensionSchema( schema.getName(), DimensionSchema.MultiValueHandling.ARRAY, - schema.hasBitmapIndex() + schema.hasBitmapIndex(), + ((StringDimensionSchema) schema).getColumnFormatSpec() ); } dimensionSchemaMap.put( @@ -1258,7 +1259,8 @@ private void processProjections(final QueryableIndex index) new StringDimensionSchema( columnSchema.getName(), DimensionSchema.MultiValueHandling.ARRAY, - columnSchema.hasBitmapIndex() + columnSchema.hasBitmapIndex(), + ((StringDimensionSchema) columnSchema).getColumnFormatSpec() ) ); } else { diff --git a/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java b/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java index 20daa347664b..860f5c59354c 100644 --- a/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java +++ b/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java @@ -130,7 +130,7 @@ public DimensionHandler getDimensionHandler() } maxStringLength = columnFormatSpec.getMaxStringLength(); } - return new StringDimensionHandler(getName(), mvh, bitmap, false, maxStringLength); + return new StringDimensionHandler(getName(), mvh, bitmap, false, maxStringLength, columnFormatSpec); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java index 8deb4aca0ed1..ee39aca79d8e 100644 --- a/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java +++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java @@ -108,6 +108,8 @@ private static IndexedInts getRow(ColumnValueSelector s) private final boolean hasSpatialIndexes; @Nullable private final Integer maxStringLength; + @Nullable + private final StringColumnFormatSpec columnFormatSpec; public StringDimensionHandler( String dimensionName, @@ -126,12 +128,25 @@ public StringDimensionHandler( boolean hasSpatialIndexes, @Nullable Integer maxStringLength ) + { + this(dimensionName, multiValueHandling, hasBitmapIndexes, hasSpatialIndexes, maxStringLength, null); + } + + public StringDimensionHandler( + String dimensionName, + MultiValueHandling multiValueHandling, + boolean hasBitmapIndexes, + boolean hasSpatialIndexes, + @Nullable Integer maxStringLength, + @Nullable StringColumnFormatSpec columnFormatSpec + ) { this.dimensionName = dimensionName; this.multiValueHandling = multiValueHandling; this.hasBitmapIndexes = hasBitmapIndexes; this.hasSpatialIndexes = hasSpatialIndexes; this.maxStringLength = maxStringLength; + this.columnFormatSpec = columnFormatSpec; } @Override @@ -146,6 +161,9 @@ public DimensionSchema getDimensionSchema(ColumnCapabilities capabilities) if (hasSpatialIndexes) { return new NewSpatialDimensionSchema(dimensionName, Collections.singletonList(dimensionName)); } + if (columnFormatSpec != null) { + return new StringDimensionSchema(dimensionName, multiValueHandling, hasBitmapIndexes, columnFormatSpec); + } return new StringDimensionSchema(dimensionName, multiValueHandling, hasBitmapIndexes); } @@ -176,7 +194,7 @@ public SettableColumnValueSelector makeNewSettableEncodedValueSelector() @Override public DimensionIndexer makeIndexer() { - return new StringDimensionIndexer(multiValueHandling, hasBitmapIndexes, hasSpatialIndexes, maxStringLength); + return new StringDimensionIndexer(multiValueHandling, hasBitmapIndexes, hasSpatialIndexes, maxStringLength, columnFormatSpec); } @Override @@ -207,7 +225,8 @@ public DimensionMergerV9 makeMerger( capabilities, progress, segmentBaseDir, - closer + closer, + columnFormatSpec ); } } diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java index 8c7a59e2c242..79d2baf05de5 100644 --- a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java @@ -38,9 +38,12 @@ import org.apache.druid.query.filter.StringPredicateDruidPredicateFactory; import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.column.CapabilitiesBasedFormat; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ColumnFormat; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.StringDictionaryEncodedColumnFormat; import org.apache.druid.segment.data.ArrayBasedIndexedInts; import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.incremental.IncrementalIndex; @@ -60,6 +63,8 @@ public class StringDimensionIndexer extends DictionaryEncodedColumnIndexer spatialWriter; + @Nullable + private final StringColumnFormatSpec columnFormatSpec; /** * @param dimensionName column name @@ -76,6 +78,7 @@ public class StringDimensionMergerV9 extends DictionaryEncodedColumnMerger spatialIndexWriter = null; @Nullable private ByteOrder byteOrder = null; + @Nullable + private StringColumnFormatSpec columnFormatSpec = null; public SerializerBuilder withDictionary(DictionaryWriter dictionaryWriter) { @@ -203,6 +222,12 @@ public SerializerBuilder withByteOrder(ByteOrder byteOrder) return this; } + public SerializerBuilder withColumnFormatSpec(@Nullable StringColumnFormatSpec columnFormatSpec) + { + this.columnFormatSpec = columnFormatSpec; + return this; + } + public SerializerBuilder withValue(ColumnarIntsSerializer valueWriter, boolean hasMultiValue, boolean compressed) { this.valueWriter = valueWriter; @@ -234,6 +259,7 @@ public DictionaryEncodedColumnPartSerde build() return new DictionaryEncodedColumnPartSerde( byteOrder, bitmapSerdeFactory, + columnFormatSpec, new Serializer() { @Override @@ -380,6 +406,14 @@ public void read( rSpatialIndex != null ); } + + builder.setColumnFormat(new StringDictionaryEncodedColumnFormat( + hasMultipleValues, + hasNulls, + rBitmaps != null, + rSpatialIndex != null, + columnFormatSpec + )); } private WritableSupplier readSingleValuedColumn( diff --git a/processing/src/test/java/org/apache/druid/segment/column/StringDictionaryEncodedColumnFormatTest.java b/processing/src/test/java/org/apache/druid/segment/column/StringDictionaryEncodedColumnFormatTest.java new file mode 100644 index 000000000000..3fbb3c4711f3 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/column/StringDictionaryEncodedColumnFormatTest.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.column; + +import org.apache.druid.data.input.impl.DimensionSchema; +import org.apache.druid.data.input.impl.StringDimensionSchema; +import org.apache.druid.segment.StringColumnFormatSpec; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class StringDictionaryEncodedColumnFormatTest +{ + private static final StringColumnFormatSpec SPEC = StringColumnFormatSpec.builder() + .setMaxStringLength(50) + .build(); + + @Test + public void testGetColumnSchemaWithSpec() + { + StringDictionaryEncodedColumnFormat format = new StringDictionaryEncodedColumnFormat( + false, + false, + true, + false, + SPEC + ); + DimensionSchema schema = format.getColumnSchema("city"); + StringDimensionSchema stringSchema = (StringDimensionSchema) schema; + Assertions.assertNotNull(stringSchema.getColumnFormatSpec()); + Assertions.assertEquals(Integer.valueOf(50), stringSchema.getColumnFormatSpec().getMaxStringLength()); + } + + @Test + public void testMergeTwoFormatsKeepsSpec() + { + StringDictionaryEncodedColumnFormat formatWithSpec = new StringDictionaryEncodedColumnFormat( + false, + false, + true, + false, + SPEC + ); + StringDictionaryEncodedColumnFormat formatWithoutSpec = new StringDictionaryEncodedColumnFormat( + false, + true, + true, + false, + null + ); + ColumnFormat merged = formatWithSpec.merge(formatWithoutSpec); + + DimensionSchema schema = merged.getColumnSchema("city"); + Assertions.assertEquals(Integer.valueOf(50), ((StringDimensionSchema) schema).getColumnFormatSpec().getMaxStringLength()); + } + + @Test + public void testCapabilitiesBasedFormatMergesDelegatesToStringFormat() + { + StringDictionaryEncodedColumnFormat formatWithSpec = new StringDictionaryEncodedColumnFormat( + false, + false, + true, + false, + SPEC + ); + ColumnCapabilities caps = ColumnCapabilitiesImpl.createDefault() + .setType(ColumnType.STRING) + .setDictionaryEncoded(true) + .setDictionaryValuesSorted(true) + .setDictionaryValuesUnique(true) + .setHasMultipleValues(false) + .setHasNulls(false) + .setHasBitmapIndexes(true); + CapabilitiesBasedFormat capFormat = new CapabilitiesBasedFormat(caps); + + ColumnFormat merged = capFormat.merge(formatWithSpec); + Assertions.assertInstanceOf(StringDictionaryEncodedColumnFormat.class, merged); + DimensionSchema schema = merged.getColumnSchema("city"); + Assertions.assertNotNull(((StringDimensionSchema) schema).getColumnFormatSpec()); + } +} diff --git a/processing/src/test/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerdeTest.java b/processing/src/test/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerdeTest.java index b276d56b0867..81b4386c4807 100644 --- a/processing/src/test/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerdeTest.java +++ b/processing/src/test/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerdeTest.java @@ -70,4 +70,30 @@ public void testSerde() throws Exception Assertions.assertEquals(ByteOrder.LITTLE_ENDIAN, serde.getByteOrder()); Assertions.assertTrue(serde.getBitmapSerdeFactory() instanceof RoaringBitmapSerdeFactory); } + + @Test + public void testSerdeWithColumnFormatSpec() throws Exception + { + String json = "{\n" + + "\"type\": \"stringDictionary\",\n" + + "\"byteOrder\": \"LITTLE_ENDIAN\",\n" + + "\"bitmapSerdeFactory\": { \"type\": \"roaring\" },\n" + + "\"columnFormatSpec\": { \"maxStringLength\": 100 }\n" + + "}"; + + ObjectMapper mapper = TestHelper.makeJsonMapper(); + + DictionaryEncodedColumnPartSerde serde = (DictionaryEncodedColumnPartSerde) mapper.readValue( + mapper.writeValueAsString( + mapper.readValue(json, ColumnPartSerde.class) + ), + ColumnPartSerde.class + ); + + Assertions.assertEquals(ByteOrder.LITTLE_ENDIAN, serde.getByteOrder()); + Assertions.assertTrue(serde.getBitmapSerdeFactory() instanceof RoaringBitmapSerdeFactory); + Assertions.assertNotNull(serde.getColumnFormatSpec()); + Assertions.assertEquals(Integer.valueOf(100), serde.getColumnFormatSpec().getMaxStringLength()); + } + }