From e85330fa47ab91fac9d5e1d156af78086f79f5a4 Mon Sep 17 00:00:00 2001 From: jon-wei Date: Fri, 17 Feb 2017 17:40:52 -0800 Subject: [PATCH 01/13] Support ingestion for long/float dimensions --- .../druid/query/scan/ScanQueryRunnerTest.java | 7 +- .../io/druid/segment/DimensionHandler.java | 2 - .../druid/segment/DimensionHandlerUtils.java | 8 + .../druid/segment/FloatDimensionHandler.java | 117 ++++++++++++++ .../druid/segment/FloatDimensionIndexer.java | 143 ++++++++++++++++++ .../segment/FloatDimensionMergerLegacy.java | 89 +++++++++++ .../druid/segment/FloatDimensionMergerV9.java | 120 +++++++++++++++ .../segment/FloatMetricColumnSerializer.java | 5 + .../druid/segment/LongDimensionHandler.java | 117 ++++++++++++++ .../druid/segment/LongDimensionIndexer.java | 143 ++++++++++++++++++ .../segment/LongDimensionMergerLegacy.java | 91 +++++++++++ .../druid/segment/LongDimensionMergerV9.java | 121 +++++++++++++++ .../segment/LongMetricColumnSerializer.java | 5 + .../druid/segment/MetricColumnSerializer.java | 2 + .../io/druid/segment/column/FloatColumn.java | 2 + .../io/druid/segment/column/LongColumn.java | 2 + .../serde/ComplexMetricColumnSerializer.java | 6 + .../query/metadata/SegmentAnalyzerTest.java | 36 +++-- .../query/select/SelectQueryRunnerTest.java | 12 +- .../test/java/io/druid/segment/TestIndex.java | 38 ++++- 20 files changed, 1037 insertions(+), 29 deletions(-) create mode 100644 processing/src/main/java/io/druid/segment/FloatDimensionHandler.java create mode 100644 processing/src/main/java/io/druid/segment/FloatDimensionIndexer.java create mode 100644 processing/src/main/java/io/druid/segment/FloatDimensionMergerLegacy.java create mode 100644 processing/src/main/java/io/druid/segment/FloatDimensionMergerV9.java create mode 100644 processing/src/main/java/io/druid/segment/LongDimensionHandler.java create mode 100644 processing/src/main/java/io/druid/segment/LongDimensionIndexer.java create mode 100644 processing/src/main/java/io/druid/segment/LongDimensionMergerLegacy.java create mode 100644 processing/src/main/java/io/druid/segment/LongDimensionMergerV9.java diff --git a/extensions-contrib/scan-query/src/test/java/io/druid/query/scan/ScanQueryRunnerTest.java b/extensions-contrib/scan-query/src/test/java/io/druid/query/scan/ScanQueryRunnerTest.java index 94fc1bc5af9f..951c3e2c94a1 100644 --- a/extensions-contrib/scan-query/src/test/java/io/druid/query/scan/ScanQueryRunnerTest.java +++ b/extensions-contrib/scan-query/src/test/java/io/druid/query/scan/ScanQueryRunnerTest.java @@ -133,6 +133,8 @@ public void testFullOnSelect() ScanResultValue.timestampKey, "market", "quality", + "qualityLong", + "qualityFloat", "qualityNumericString", "placement", "placementish", @@ -141,9 +143,8 @@ public void testFullOnSelect() "index", "indexMin", "indexMaxPlusTen", - "quality_uniques", - "qualityLong", - "qualityFloat" + "quality_uniques" + ); ScanQuery query = newTestQuery() .intervals(I_0112_0114) diff --git a/processing/src/main/java/io/druid/segment/DimensionHandler.java b/processing/src/main/java/io/druid/segment/DimensionHandler.java index 8f2e791cf8ee..12f739d9526a 100644 --- a/processing/src/main/java/io/druid/segment/DimensionHandler.java +++ b/processing/src/main/java/io/druid/segment/DimensionHandler.java @@ -161,8 +161,6 @@ DimensionMergerLegacy makeLegacyMerger( * @param rhs array of row values * @param lhsEncodings encoding lookup from lhs's segment, null if not applicable for this dimension's type * @param rhsEncodings encoding lookup from rhs's segment, null if not applicable for this dimension's type - * - * @return integer indicating comparison result of arrays */ void validateSortedEncodedArrays( EncodedTypeArray lhs, diff --git a/processing/src/main/java/io/druid/segment/DimensionHandlerUtils.java b/processing/src/main/java/io/druid/segment/DimensionHandlerUtils.java index 4e2087efe2a0..ebb7c6722078 100644 --- a/processing/src/main/java/io/druid/segment/DimensionHandlerUtils.java +++ b/processing/src/main/java/io/druid/segment/DimensionHandlerUtils.java @@ -64,6 +64,14 @@ public static DimensionHandler getHandlerFromCapabilities( return new StringDimensionHandler(dimensionName, multiValueHandling); } + if (capabilities.getType() == ValueType.LONG) { + return new LongDimensionHandler(dimensionName); + } + + if (capabilities.getType() == ValueType.FLOAT) { + return new FloatDimensionHandler(dimensionName); + } + // Return a StringDimensionHandler by default (null columns will be treated as String typed) return new StringDimensionHandler(dimensionName, multiValueHandling); } diff --git a/processing/src/main/java/io/druid/segment/FloatDimensionHandler.java b/processing/src/main/java/io/druid/segment/FloatDimensionHandler.java new file mode 100644 index 000000000000..b9ba853c7a42 --- /dev/null +++ b/processing/src/main/java/io/druid/segment/FloatDimensionHandler.java @@ -0,0 +1,117 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.segment; + +import io.druid.segment.column.Column; +import io.druid.segment.column.ColumnCapabilities; +import io.druid.segment.column.GenericColumn; +import io.druid.segment.column.FloatColumn; +import io.druid.segment.data.IOPeon; +import io.druid.segment.data.Indexed; + +import java.io.Closeable; +import java.io.File; +import java.io.IOException; + +public class FloatDimensionHandler implements DimensionHandler +{ + private final String dimensionName; + + public FloatDimensionHandler(String dimensionName) + { + this.dimensionName = dimensionName; + } + + @Override + public String getDimensionName() + { + return dimensionName; + } + + @Override + public DimensionIndexer makeIndexer() + { + return new FloatDimensionIndexer(); + } + + @Override + public DimensionMergerV9 makeMerger( + IndexSpec indexSpec, File outDir, IOPeon ioPeon, ColumnCapabilities capabilities, ProgressIndicator progress + ) throws IOException + { + return new FloatDimensionMergerV9( + dimensionName, + indexSpec, + outDir, + ioPeon, + capabilities, + progress + ); + } + + @Override + public DimensionMergerLegacy makeLegacyMerger( + IndexSpec indexSpec, File outDir, IOPeon ioPeon, ColumnCapabilities capabilities, ProgressIndicator progress + ) throws IOException + { + return new FloatDimensionMergerLegacy( + dimensionName, + indexSpec, + outDir, + ioPeon, + capabilities, + progress + ); + } + + @Override + public int getLengthFromEncodedArray(Float dimVals) + { + return FloatColumn.ROW_SIZE; + } + + @Override + public int compareSortedEncodedArrays(Float lhs, Float rhs) + { + return lhs.compareTo(rhs); + } + + @Override + public void validateSortedEncodedArrays( + Float lhs, Float rhs, Indexed lhsEncodings, Indexed rhsEncodings + ) throws SegmentValidationException + { + if (!lhs.equals(rhs)) { + throw new SegmentValidationException("Dim [%s] value not equal. Expected [%s] found [%s]", lhs, rhs); + } + } + + @Override + public Closeable getSubColumn(Column column) + { + return column.getGenericColumn(); + } + + @Override + public Object getRowValueArrayFromColumn(Closeable column, int currRow) + { + return ((GenericColumn) column).getFloatSingleValueRow(currRow); + } +} diff --git a/processing/src/main/java/io/druid/segment/FloatDimensionIndexer.java b/processing/src/main/java/io/druid/segment/FloatDimensionIndexer.java new file mode 100644 index 000000000000..8c2a8bb44886 --- /dev/null +++ b/processing/src/main/java/io/druid/segment/FloatDimensionIndexer.java @@ -0,0 +1,143 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.segment; + +import com.google.common.collect.Lists; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.MutableBitmap; +import io.druid.query.dimension.DimensionSpec; +import io.druid.segment.data.Indexed; +import io.druid.segment.incremental.IncrementalIndex; +import io.druid.segment.incremental.IncrementalIndexStorageAdapter; + +import java.util.List; + +public class FloatDimensionIndexer implements DimensionIndexer +{ + @Override + public Float processRowValsToUnsortedEncodedArray(Object dimValues) + { + if (dimValues instanceof List) { + throw new UnsupportedOperationException("Numeric columns do not support multivalue rows."); + } + + return DimensionHandlerUtils.convertObjectToFloat(dimValues); + } + + @Override + public Float getSortedEncodedValueFromUnsorted(Float unsortedIntermediateValue) + { + return unsortedIntermediateValue; + } + + @Override + public Float getUnsortedEncodedValueFromSorted(Float sortedIntermediateValue) + { + return sortedIntermediateValue; + } + + @Override + public Indexed getSortedIndexedValues() + { + return null; + } + + @Override + public Float getMinValue() + { + return 0.0f; + } + + @Override + public Float getMaxValue() + { + return 0.0f; + } + + @Override + public int getCardinality() + { + return DimensionSelector.CARDINALITY_UNKNOWN; + } + + @Override + public ColumnValueSelector makeColumnValueSelector( + final DimensionSpec spec, + final IncrementalIndexStorageAdapter.EntryHolder currEntry, + final IncrementalIndex.DimensionDesc desc + ) + { + final int dimIndex = desc.getIndex(); + class IndexerFloatColumnSelector implements FloatColumnSelector + { + @Override + public float get() + { + final Object[] dims = currEntry.getKey().getDims(); + + if (dimIndex >= dims.length) { + return 0L; + } + + return (Float) dims[dimIndex]; + } + } + + return new IndexerFloatColumnSelector(); + } + + @Override + public int compareUnsortedEncodedArrays(Float lhs, Float rhs) + { + return lhs.compareTo(rhs); + } + + @Override + public boolean checkUnsortedEncodedArraysEqual(Float lhs, Float rhs) + { + return lhs.equals(rhs); + } + + @Override + public int getUnsortedEncodedArrayHashCode(Float key) + { + return key.hashCode(); + } + + @Override + public Object convertUnsortedEncodedArrayToActualArrayOrList(Float key, boolean asList) + { + return Lists.newArrayList(key); + } + + @Override + public Float convertUnsortedEncodedArrayToSortedEncodedArray(Float key) + { + return key; + } + + @Override + public void fillBitmapsFromUnsortedEncodedArray( + Float key, int rowNum, MutableBitmap[] bitmapIndexes, BitmapFactory factory + ) + { + // floats don't have bitmaps + } +} diff --git a/processing/src/main/java/io/druid/segment/FloatDimensionMergerLegacy.java b/processing/src/main/java/io/druid/segment/FloatDimensionMergerLegacy.java new file mode 100644 index 000000000000..02ff4cd8ef98 --- /dev/null +++ b/processing/src/main/java/io/druid/segment/FloatDimensionMergerLegacy.java @@ -0,0 +1,89 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.segment; + +import com.google.common.io.ByteSink; +import com.google.common.io.OutputSupplier; +import io.druid.common.guava.FileOutputSupplier; +import io.druid.segment.column.ColumnCapabilities; +import io.druid.segment.data.CompressedObjectStrategy; +import io.druid.segment.data.IOPeon; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; + +public class FloatDimensionMergerLegacy extends FloatDimensionMergerV9 implements DimensionMergerLegacy +{ + private FloatMetricColumnSerializer serializerV8; + + public FloatDimensionMergerLegacy( + String dimensionName, + IndexSpec indexSpec, + File outDir, + IOPeon ioPeon, + ColumnCapabilities capabilities, + ProgressIndicator progress + ) + { + super(dimensionName, indexSpec, outDir, ioPeon, capabilities, progress); + } + + @Override + protected void setupEncodedValueWriter() throws IOException + { + final CompressedObjectStrategy.CompressionStrategy metCompression = indexSpec.getMetricCompression(); + serializerV8 = new FloatMetricColumnSerializer(dimensionName, outDir, ioPeon, metCompression); + serializerV8.open(); + } + + @Override + public void processMergedRow(Float rowValues) throws IOException + { + serializerV8.serialize(rowValues); + } + + @Override + public void writeValueMetadataToFile(FileOutputSupplier valueEncodingFile) throws IOException + { + // floats have no metadata to write + } + + @Override + public void writeRowValuesToFile(FileOutputSupplier rowValueOut) throws IOException + { + // closing the serializer writes its data to the file + serializerV8.closeFile(rowValueOut.getFile()); + } + + @Override + public void writeIndexesToFiles( + ByteSink invertedOut, OutputSupplier spatialOut + ) throws IOException + { + // floats have no indices to write + } + + @Override + public File makeDimFile() throws IOException + { + return IndexIO.makeNumericDimFile(outDir, dimensionName, IndexIO.BYTE_ORDER); + } +} diff --git a/processing/src/main/java/io/druid/segment/FloatDimensionMergerV9.java b/processing/src/main/java/io/druid/segment/FloatDimensionMergerV9.java new file mode 100644 index 000000000000..8e84c78fd9df --- /dev/null +++ b/processing/src/main/java/io/druid/segment/FloatDimensionMergerV9.java @@ -0,0 +1,120 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.segment; + +import com.google.common.base.Throwables; +import com.google.common.io.Closer; +import io.druid.segment.column.ColumnCapabilities; +import io.druid.segment.column.ColumnDescriptor; +import io.druid.segment.column.ValueType; +import io.druid.segment.data.CompressedObjectStrategy; +import io.druid.segment.data.IOPeon; +import io.druid.segment.serde.FloatGenericColumnPartSerde; + +import java.io.File; +import java.io.IOException; +import java.nio.IntBuffer; +import java.util.List; + +public class FloatDimensionMergerV9 implements DimensionMergerV9 +{ + protected String dimensionName; + protected ProgressIndicator progress; + protected final IndexSpec indexSpec; + protected ColumnCapabilities capabilities; + protected final File outDir; + protected IOPeon ioPeon; + + private FloatColumnSerializer serializer; + + public FloatDimensionMergerV9( + String dimensionName, + IndexSpec indexSpec, + File outDir, + IOPeon ioPeon, + ColumnCapabilities capabilities, + ProgressIndicator progress + ) + { + this.dimensionName = dimensionName; + this.indexSpec = indexSpec; + this.capabilities = capabilities; + this.outDir = outDir; + this.ioPeon = ioPeon; + this.progress = progress; + + try { + setupEncodedValueWriter(); + } catch (IOException ioe) { + Throwables.propagate(ioe); + } + } + + protected void setupEncodedValueWriter() throws IOException + { + final CompressedObjectStrategy.CompressionStrategy metCompression = indexSpec.getMetricCompression(); + this.serializer = FloatColumnSerializer.create(ioPeon, dimensionName, metCompression); + } + + @Override + public void writeMergedValueMetadata(List adapters) throws IOException + { + // floats have no additional metadata + } + + @Override + public Float convertSegmentRowValuesToMergedRowValues(Float segmentRow, int segmentIndexNumber) + { + return segmentRow; + } + + @Override + public void processMergedRow(Float rowValues) throws IOException + { + serializer.serialize(rowValues); + } + + @Override + public void writeIndexes(List segmentRowNumConversions, Closer closer) throws IOException + { + // floats have no indices to write + } + + @Override + public boolean canSkip() + { + // a float column can never be all null + return false; + } + + @Override + public ColumnDescriptor makeColumnDescriptor() throws IOException + { + final ColumnDescriptor.Builder builder = ColumnDescriptor.builder(); + builder.setValueType(ValueType.FLOAT); + builder.addSerde( + FloatGenericColumnPartSerde.serializerBuilder() + .withByteOrder(IndexIO.BYTE_ORDER) + .withDelegate(serializer) + .build() + ); + return builder.build(); + } +} diff --git a/processing/src/main/java/io/druid/segment/FloatMetricColumnSerializer.java b/processing/src/main/java/io/druid/segment/FloatMetricColumnSerializer.java index 43bd4dd6a01c..7f4923630732 100644 --- a/processing/src/main/java/io/druid/segment/FloatMetricColumnSerializer.java +++ b/processing/src/main/java/io/druid/segment/FloatMetricColumnSerializer.java @@ -74,6 +74,11 @@ public void serialize(Object obj) throws IOException public void close() throws IOException { final File outFile = IndexIO.makeMetricFile(outDir, metricName, IndexIO.BYTE_ORDER); + closeFile(outFile); + } + + public void closeFile(final File outFile) throws IOException + { outFile.delete(); MetricHolder.writeFloatMetric( Files.asByteSink(outFile, FileWriteMode.APPEND), metricName, writer diff --git a/processing/src/main/java/io/druid/segment/LongDimensionHandler.java b/processing/src/main/java/io/druid/segment/LongDimensionHandler.java new file mode 100644 index 000000000000..3030ff87fe47 --- /dev/null +++ b/processing/src/main/java/io/druid/segment/LongDimensionHandler.java @@ -0,0 +1,117 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.segment; + +import io.druid.segment.column.Column; +import io.druid.segment.column.ColumnCapabilities; +import io.druid.segment.column.GenericColumn; +import io.druid.segment.column.LongColumn; +import io.druid.segment.data.IOPeon; +import io.druid.segment.data.Indexed; + +import java.io.Closeable; +import java.io.File; +import java.io.IOException; + +public class LongDimensionHandler implements DimensionHandler +{ + private final String dimensionName; + + public LongDimensionHandler(String dimensionName) + { + this.dimensionName = dimensionName; + } + + @Override + public String getDimensionName() + { + return dimensionName; + } + + @Override + public DimensionIndexer makeIndexer() + { + return new LongDimensionIndexer(); + } + + @Override + public DimensionMergerV9 makeMerger( + IndexSpec indexSpec, File outDir, IOPeon ioPeon, ColumnCapabilities capabilities, ProgressIndicator progress + ) throws IOException + { + return new LongDimensionMergerV9( + dimensionName, + indexSpec, + outDir, + ioPeon, + capabilities, + progress + ); + } + + @Override + public DimensionMergerLegacy makeLegacyMerger( + IndexSpec indexSpec, File outDir, IOPeon ioPeon, ColumnCapabilities capabilities, ProgressIndicator progress + ) throws IOException + { + return new LongDimensionMergerLegacy( + dimensionName, + indexSpec, + outDir, + ioPeon, + capabilities, + progress + ); + } + + @Override + public int getLengthFromEncodedArray(Long dimVals) + { + return LongColumn.ROW_SIZE; + } + + @Override + public int compareSortedEncodedArrays(Long lhs, Long rhs) + { + return lhs.compareTo(rhs); + } + + @Override + public void validateSortedEncodedArrays( + Long lhs, Long rhs, Indexed lhsEncodings, Indexed rhsEncodings + ) throws SegmentValidationException + { + if (!lhs.equals(rhs)) { + throw new SegmentValidationException("Dim [%s] value not equal. Expected [%s] found [%s]", lhs, rhs); + } + } + + @Override + public Closeable getSubColumn(Column column) + { + return column.getGenericColumn(); + } + + @Override + public Object getRowValueArrayFromColumn(Closeable column, int currRow) + { + return ((GenericColumn) column).getLongSingleValueRow(currRow); + } +} diff --git a/processing/src/main/java/io/druid/segment/LongDimensionIndexer.java b/processing/src/main/java/io/druid/segment/LongDimensionIndexer.java new file mode 100644 index 000000000000..88da17ab7cd0 --- /dev/null +++ b/processing/src/main/java/io/druid/segment/LongDimensionIndexer.java @@ -0,0 +1,143 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.segment; + +import com.google.common.collect.Lists; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.MutableBitmap; +import io.druid.query.dimension.DimensionSpec; +import io.druid.segment.data.Indexed; +import io.druid.segment.incremental.IncrementalIndex; +import io.druid.segment.incremental.IncrementalIndexStorageAdapter; + +import java.util.List; + +public class LongDimensionIndexer implements DimensionIndexer +{ + @Override + public Long processRowValsToUnsortedEncodedArray(Object dimValues) + { + if (dimValues instanceof List) { + throw new UnsupportedOperationException("Numeric columns do not support multivalue rows."); + } + + return DimensionHandlerUtils.convertObjectToLong(dimValues); + } + + @Override + public Long getSortedEncodedValueFromUnsorted(Long unsortedIntermediateValue) + { + return unsortedIntermediateValue; + } + + @Override + public Long getUnsortedEncodedValueFromSorted(Long sortedIntermediateValue) + { + return sortedIntermediateValue; + } + + @Override + public Indexed getSortedIndexedValues() + { + return null; + } + + @Override + public Long getMinValue() + { + return 0L; + } + + @Override + public Long getMaxValue() + { + return 0L; + } + + @Override + public int getCardinality() + { + return DimensionSelector.CARDINALITY_UNKNOWN; + } + + @Override + public ColumnValueSelector makeColumnValueSelector( + final DimensionSpec spec, + final IncrementalIndexStorageAdapter.EntryHolder currEntry, + final IncrementalIndex.DimensionDesc desc + ) + { + final int dimIndex = desc.getIndex(); + class IndexerLongColumnSelector implements LongColumnSelector + { + @Override + public long get() + { + final Object[] dims = currEntry.getKey().getDims(); + + if (dimIndex >= dims.length) { + return 0L; + } + + return (Long) dims[dimIndex]; + } + } + + return new IndexerLongColumnSelector(); + } + + @Override + public int compareUnsortedEncodedArrays(Long lhs, Long rhs) + { + return lhs.compareTo(rhs); + } + + @Override + public boolean checkUnsortedEncodedArraysEqual(Long lhs, Long rhs) + { + return lhs.equals(rhs); + } + + @Override + public int getUnsortedEncodedArrayHashCode(Long key) + { + return key.hashCode(); + } + + @Override + public Object convertUnsortedEncodedArrayToActualArrayOrList(Long key, boolean asList) + { + return Lists.newArrayList(key); + } + + @Override + public Long convertUnsortedEncodedArrayToSortedEncodedArray(Long key) + { + return key; + } + + @Override + public void fillBitmapsFromUnsortedEncodedArray( + Long key, int rowNum, MutableBitmap[] bitmapIndexes, BitmapFactory factory + ) + { + // longs don't have bitmaps + } +} diff --git a/processing/src/main/java/io/druid/segment/LongDimensionMergerLegacy.java b/processing/src/main/java/io/druid/segment/LongDimensionMergerLegacy.java new file mode 100644 index 000000000000..046d774c5f75 --- /dev/null +++ b/processing/src/main/java/io/druid/segment/LongDimensionMergerLegacy.java @@ -0,0 +1,91 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.segment; + +import com.google.common.io.ByteSink; +import com.google.common.io.OutputSupplier; +import io.druid.common.guava.FileOutputSupplier; +import io.druid.segment.column.ColumnCapabilities; +import io.druid.segment.data.CompressedObjectStrategy; +import io.druid.segment.data.CompressionFactory; +import io.druid.segment.data.IOPeon; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; + +public class LongDimensionMergerLegacy extends LongDimensionMergerV9 implements DimensionMergerLegacy +{ + private LongMetricColumnSerializer serializerV8; + + public LongDimensionMergerLegacy( + String dimensionName, + IndexSpec indexSpec, + File outDir, + IOPeon ioPeon, + ColumnCapabilities capabilities, + ProgressIndicator progress + ) + { + super(dimensionName, indexSpec, outDir, ioPeon, capabilities, progress); + } + + @Override + protected void setupEncodedValueWriter() throws IOException + { + final CompressedObjectStrategy.CompressionStrategy metCompression = indexSpec.getMetricCompression(); + final CompressionFactory.LongEncodingStrategy longEncoding = indexSpec.getLongEncoding(); + serializerV8 = new LongMetricColumnSerializer(dimensionName, outDir, ioPeon, metCompression, longEncoding); + serializerV8.open(); + } + + @Override + public void processMergedRow(Long rowValues) throws IOException + { + serializerV8.serialize(rowValues); + } + + @Override + public void writeValueMetadataToFile(FileOutputSupplier valueEncodingFile) throws IOException + { + // longs have no metadata to write + } + + @Override + public void writeRowValuesToFile(FileOutputSupplier rowValueOut) throws IOException + { + // closing the serializer writes its data to the file + serializerV8.closeFile(rowValueOut.getFile()); + } + + @Override + public void writeIndexesToFiles( + ByteSink invertedOut, OutputSupplier spatialOut + ) throws IOException + { + // longs have no indices to write + } + + @Override + public File makeDimFile() throws IOException + { + return IndexIO.makeNumericDimFile(outDir, dimensionName, IndexIO.BYTE_ORDER); + } +} diff --git a/processing/src/main/java/io/druid/segment/LongDimensionMergerV9.java b/processing/src/main/java/io/druid/segment/LongDimensionMergerV9.java new file mode 100644 index 000000000000..3166cc1abda1 --- /dev/null +++ b/processing/src/main/java/io/druid/segment/LongDimensionMergerV9.java @@ -0,0 +1,121 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.segment; + +import com.google.common.base.Throwables; +import com.google.common.io.Closer; +import io.druid.segment.column.ColumnCapabilities; +import io.druid.segment.column.ColumnDescriptor; +import io.druid.segment.column.ValueType; +import io.druid.segment.data.CompressedObjectStrategy; +import io.druid.segment.data.CompressionFactory; +import io.druid.segment.data.IOPeon; +import io.druid.segment.serde.LongGenericColumnPartSerde; + +import java.io.File; +import java.io.IOException; +import java.nio.IntBuffer; +import java.util.List; + +public class LongDimensionMergerV9 implements DimensionMergerV9 +{ + protected String dimensionName; + protected ProgressIndicator progress; + protected final IndexSpec indexSpec; + protected ColumnCapabilities capabilities; + protected final File outDir; + protected IOPeon ioPeon; + protected LongColumnSerializer serializer; + + public LongDimensionMergerV9( + String dimensionName, + IndexSpec indexSpec, + File outDir, + IOPeon ioPeon, + ColumnCapabilities capabilities, + ProgressIndicator progress + ) + { + this.dimensionName = dimensionName; + this.indexSpec = indexSpec; + this.capabilities = capabilities; + this.outDir = outDir; + this.ioPeon = ioPeon; + this.progress = progress; + + try { + setupEncodedValueWriter(); + } catch (IOException ioe) { + Throwables.propagate(ioe); + } + } + + protected void setupEncodedValueWriter() throws IOException + { + final CompressedObjectStrategy.CompressionStrategy metCompression = indexSpec.getMetricCompression(); + final CompressionFactory.LongEncodingStrategy longEncoding = indexSpec.getLongEncoding(); + this.serializer = LongColumnSerializer.create(ioPeon, dimensionName, metCompression, longEncoding); + } + + @Override + public void writeMergedValueMetadata(List adapters) throws IOException + { + // longs have no additional metadata + } + + @Override + public Long convertSegmentRowValuesToMergedRowValues(Long segmentRow, int segmentIndexNumber) + { + return segmentRow; + } + + @Override + public void processMergedRow(Long rowValues) throws IOException + { + serializer.serialize(rowValues); + } + + @Override + public void writeIndexes(List segmentRowNumConversions, Closer closer) throws IOException + { + // longs have no indices to write + } + + @Override + public boolean canSkip() + { + // a long column can never be all null + return false; + } + + @Override + public ColumnDescriptor makeColumnDescriptor() throws IOException + { + final ColumnDescriptor.Builder builder = ColumnDescriptor.builder(); + builder.setValueType(ValueType.LONG); + builder.addSerde( + LongGenericColumnPartSerde.serializerBuilder() + .withByteOrder(IndexIO.BYTE_ORDER) + .withDelegate(serializer) + .build() + ); + return builder.build(); + } +} diff --git a/processing/src/main/java/io/druid/segment/LongMetricColumnSerializer.java b/processing/src/main/java/io/druid/segment/LongMetricColumnSerializer.java index 24e71f6c3d8a..12edc7f28e54 100644 --- a/processing/src/main/java/io/druid/segment/LongMetricColumnSerializer.java +++ b/processing/src/main/java/io/druid/segment/LongMetricColumnSerializer.java @@ -77,6 +77,11 @@ public void serialize(Object obj) throws IOException public void close() throws IOException { final File outFile = IndexIO.makeMetricFile(outDir, metricName, IndexIO.BYTE_ORDER); + closeFile(outFile); + } + + public void closeFile(final File outFile) throws IOException + { outFile.delete(); MetricHolder.writeLongMetric( Files.asByteSink(outFile, FileWriteMode.APPEND), metricName, writer diff --git a/processing/src/main/java/io/druid/segment/MetricColumnSerializer.java b/processing/src/main/java/io/druid/segment/MetricColumnSerializer.java index c3841960be6f..6433b342ca15 100644 --- a/processing/src/main/java/io/druid/segment/MetricColumnSerializer.java +++ b/processing/src/main/java/io/druid/segment/MetricColumnSerializer.java @@ -19,6 +19,7 @@ package io.druid.segment; +import java.io.File; import java.io.IOException; /** @@ -28,4 +29,5 @@ public interface MetricColumnSerializer public void open() throws IOException; public void serialize(Object aggs) throws IOException; public void close() throws IOException; + public void closeFile(File outFile) throws IOException; } diff --git a/processing/src/main/java/io/druid/segment/column/FloatColumn.java b/processing/src/main/java/io/druid/segment/column/FloatColumn.java index b97c3b33d98a..ad130807b2fb 100644 --- a/processing/src/main/java/io/druid/segment/column/FloatColumn.java +++ b/processing/src/main/java/io/druid/segment/column/FloatColumn.java @@ -25,6 +25,8 @@ */ public class FloatColumn extends AbstractColumn { + public static final int ROW_SIZE = 1; + private static final ColumnCapabilitiesImpl CAPABILITIES = new ColumnCapabilitiesImpl() .setType(ValueType.FLOAT); diff --git a/processing/src/main/java/io/druid/segment/column/LongColumn.java b/processing/src/main/java/io/druid/segment/column/LongColumn.java index 3e06ea03970e..bad4736d4e7f 100644 --- a/processing/src/main/java/io/druid/segment/column/LongColumn.java +++ b/processing/src/main/java/io/druid/segment/column/LongColumn.java @@ -25,6 +25,8 @@ */ public class LongColumn extends AbstractColumn { + public static final int ROW_SIZE = 1; + private static final ColumnCapabilitiesImpl CAPABILITIES = new ColumnCapabilitiesImpl() .setType(ValueType.LONG); diff --git a/processing/src/main/java/io/druid/segment/serde/ComplexMetricColumnSerializer.java b/processing/src/main/java/io/druid/segment/serde/ComplexMetricColumnSerializer.java index d9fe6678d575..f67ead1e79d9 100644 --- a/processing/src/main/java/io/druid/segment/serde/ComplexMetricColumnSerializer.java +++ b/processing/src/main/java/io/druid/segment/serde/ComplexMetricColumnSerializer.java @@ -76,6 +76,12 @@ public void close() throws IOException writer.close(); final File outFile = IndexIO.makeMetricFile(outDir, metricName, IndexIO.BYTE_ORDER); + closeFile(outFile); + } + + @Override + public void closeFile(final File outFile) throws IOException + { outFile.delete(); MetricHolder.writeComplexMetric( Files.newOutputStreamSupplier(outFile, true), metricName, serde.getTypeName(), writer diff --git a/processing/src/test/java/io/druid/query/metadata/SegmentAnalyzerTest.java b/processing/src/test/java/io/druid/query/metadata/SegmentAnalyzerTest.java index 5c927be6b2f0..987c9fba7cf4 100644 --- a/processing/src/test/java/io/druid/query/metadata/SegmentAnalyzerTest.java +++ b/processing/src/test/java/io/druid/query/metadata/SegmentAnalyzerTest.java @@ -20,6 +20,7 @@ package io.druid.query.metadata; import com.google.common.collect.Lists; +import io.druid.data.input.impl.DimensionSchema; import io.druid.java.util.common.guava.Sequences; import io.druid.query.LegacyDataSource; import io.druid.query.QueryRunner; @@ -75,15 +76,21 @@ private void testIncrementalWorksHelper(EnumSet 0); + Assert.assertEquals(dimension, schema.getValueType().name(), columnAnalysis.getType()); + Assert.assertEquals(dimension, 0, columnAnalysis.getSize()); + if (isString) { + if (analyses == null) { + Assert.assertTrue(dimension, columnAnalysis.getCardinality() > 0); + } else { + Assert.assertEquals(dimension, 0, columnAnalysis.getCardinality().longValue()); + } } else { - Assert.assertEquals(dimension, 0, columnAnalysis.getCardinality().longValue()); - Assert.assertEquals(dimension, 0, columnAnalysis.getSize()); + Assert.assertNull(dimension, columnAnalysis.getCardinality()); } } @@ -121,17 +128,24 @@ private void testMappedWorksHelper(EnumSet an columns.size() ); // All columns including time and excluding empty/null column - for (String dimension : TestIndex.DIMENSIONS) { + for (DimensionSchema schema : TestIndex.DIMENSION_SCHEMAS) { + final String dimension = schema.getName(); final ColumnAnalysis columnAnalysis = columns.get(dimension); if (dimension.equals("null_column")) { Assert.assertNull(columnAnalysis); } else { - Assert.assertEquals(dimension, ValueType.STRING.name(), columnAnalysis.getType()); + final boolean isString = schema.getValueType().name().equals(ValueType.STRING.name()); + Assert.assertEquals(dimension, schema.getValueType().name(), columnAnalysis.getType()); Assert.assertEquals(dimension, 0, columnAnalysis.getSize()); - if (analyses == null) { - Assert.assertTrue(dimension, columnAnalysis.getCardinality() > 0); + + if (isString) { + if (analyses == null) { + Assert.assertTrue(dimension, columnAnalysis.getCardinality() > 0); + } else { + Assert.assertEquals(dimension, 0, columnAnalysis.getCardinality().longValue()); + } } else { - Assert.assertEquals(dimension, 0, columnAnalysis.getCardinality().longValue()); + Assert.assertNull(dimension, columnAnalysis.getCardinality()); } } } diff --git a/processing/src/test/java/io/druid/query/select/SelectQueryRunnerTest.java b/processing/src/test/java/io/druid/query/select/SelectQueryRunnerTest.java index 3342fd5a4868..60ea00bcd311 100644 --- a/processing/src/test/java/io/druid/query/select/SelectQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/select/SelectQueryRunnerTest.java @@ -162,8 +162,8 @@ public void testFullOnSelect() PagingOffset offset = query.getPagingOffset(QueryRunnerTestHelper.segmentId); List> expectedResults = toExpected( toFullEvents(V_0112_0114), - Lists.newArrayList("market", "quality", "qualityNumericString", "placement", "placementish", "partial_null_column", "null_column"), - Lists.newArrayList("index", "quality_uniques", "qualityLong", "qualityFloat", "indexMin", "indexMaxPlusTen"), + Lists.newArrayList("market", "quality", "qualityLong", "qualityFloat", "qualityNumericString", "placement", "placementish", "partial_null_column", "null_column"), + Lists.newArrayList("index", "quality_uniques", "indexMin", "indexMaxPlusTen"), offset.startOffset(), offset.threshold() ); @@ -252,7 +252,7 @@ public void testFullOnSelectWithDimensionSpec() new SelectResultValue( ImmutableMap.of(QueryRunnerTestHelper.segmentId, 2), Sets.newHashSet("mar", "qual", "place"), - Sets.newHashSet("index", "quality_uniques", "qualityLong", "qualityFloat", "indexMin", "indexMaxPlusTen"), + Sets.newHashSet("index", "quality_uniques", "indexMin", "indexMaxPlusTen"), Arrays.asList( new EventHolder( QueryRunnerTestHelper.segmentId, @@ -298,7 +298,7 @@ public void testFullOnSelectWithDimensionSpec() new SelectResultValue( ImmutableMap.of(QueryRunnerTestHelper.segmentId, -3), Sets.newHashSet("mar", "qual", "place"), - Sets.newHashSet("index", "qualityLong", "qualityFloat", "quality_uniques", "indexMin", "indexMaxPlusTen"), + Sets.newHashSet("index", "quality_uniques", "indexMin", "indexMaxPlusTen"), Arrays.asList( new EventHolder( QueryRunnerTestHelper.segmentId, @@ -561,8 +561,8 @@ public void testFullSelectNoResults() new DateTime("2011-01-12T00:00:00.000Z"), new SelectResultValue( ImmutableMap.of(), - Sets.newHashSet("market", "quality", "qualityNumericString", "placement", "placementish", "partial_null_column", "null_column"), - Sets.newHashSet("index", "quality_uniques", "qualityLong", "qualityFloat", "indexMin", "indexMaxPlusTen"), + Sets.newHashSet("market", "quality", "qualityLong", "qualityFloat", "qualityNumericString", "placement", "placementish", "partial_null_column", "null_column"), + Sets.newHashSet("index", "quality_uniques", "indexMin", "indexMaxPlusTen"), Lists.newArrayList() ) ) diff --git a/processing/src/test/java/io/druid/segment/TestIndex.java b/processing/src/test/java/io/druid/segment/TestIndex.java index e5e3fd667688..afec4ba39669 100644 --- a/processing/src/test/java/io/druid/segment/TestIndex.java +++ b/processing/src/test/java/io/druid/segment/TestIndex.java @@ -25,7 +25,11 @@ import com.google.common.io.LineProcessor; import com.google.common.io.Resources; import io.druid.data.input.impl.DelimitedParseSpec; +import io.druid.data.input.impl.DimensionSchema; import io.druid.data.input.impl.DimensionsSpec; +import io.druid.data.input.impl.FloatDimensionSchema; +import io.druid.data.input.impl.LongDimensionSchema; +import io.druid.data.input.impl.StringDimensionSchema; import io.druid.data.input.impl.StringInputRowParser; import io.druid.data.input.impl.TimestampSpec; import io.druid.granularity.QueryGranularities; @@ -35,7 +39,6 @@ import io.druid.query.aggregation.DoubleMaxAggregatorFactory; import io.druid.query.aggregation.DoubleMinAggregatorFactory; import io.druid.query.aggregation.DoubleSumAggregatorFactory; -import io.druid.query.aggregation.LongSumAggregatorFactory; import io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory; import io.druid.query.aggregation.hyperloglog.HyperUniquesSerde; import io.druid.segment.incremental.IncrementalIndex; @@ -50,6 +53,7 @@ import java.io.IOException; import java.net.URL; import java.util.Arrays; +import java.util.List; import java.util.concurrent.atomic.AtomicLong; /** @@ -75,12 +79,33 @@ public class TestIndex public static final String[] DIMENSIONS = new String[]{ "market", "quality", + "qualityLong", + "qualityFloat", "qualityNumericString", "placement", "placementish", "partial_null_column", - "null_column", - }; + "null_column" + }; + + public static final List DIMENSION_SCHEMAS = Arrays.asList( + new StringDimensionSchema("market"), + new StringDimensionSchema("quality"), + new LongDimensionSchema("qualityLong"), + new FloatDimensionSchema("qualityFloat"), + new StringDimensionSchema("qualityNumericString"), + new StringDimensionSchema("placement"), + new StringDimensionSchema("placementish"), + new StringDimensionSchema("partial_null_column"), + new StringDimensionSchema("null_column") + ); + + public static final DimensionsSpec DIMENSIONS_SPEC = new DimensionsSpec( + DIMENSION_SCHEMAS, + null, + null + ); + public static final String[] METRICS = new String[]{"index", "indexMin", "indexMaxPlusTen"}; private static final Logger log = new Logger(TestIndex.class); private static final Interval DATA_INTERVAL = new Interval("2011-01-12T00:00:00.000Z/2011-05-01T00:00:00.000Z"); @@ -93,9 +118,7 @@ public class TestIndex new DoubleSumAggregatorFactory(METRICS[0], METRICS[0]), new DoubleMinAggregatorFactory(METRICS[1], METRICS[0]), new DoubleMaxAggregatorFactory(METRICS[2], VIRTUAL_COLUMNS.getVirtualColumns()[0].getOutputName()), - new HyperUniquesAggregatorFactory("quality_uniques", "quality"), - new LongSumAggregatorFactory("qualityLong", "qualityLong"), - new DoubleSumAggregatorFactory("qualityFloat", "qualityFloat") + new HyperUniquesAggregatorFactory("quality_uniques", "quality") }; private static final IndexSpec indexSpec = new IndexSpec(); @@ -237,6 +260,7 @@ public static IncrementalIndex makeRealtimeIndex(final CharSource source, boolea .withMinTimestamp(new DateTime("2011-01-12T00:00:00.000Z").getMillis()) .withTimestampSpec(new TimestampSpec("ds", "auto", null)) .withQueryGranularity(QueryGranularities.NONE) + .withDimensionsSpec(DIMENSIONS_SPEC) .withVirtualColumns(VIRTUAL_COLUMNS) .withMetrics(METRIC_AGGS) .withRollup(rollup) @@ -264,7 +288,7 @@ public static IncrementalIndex loadIncrementalIndex( final StringInputRowParser parser = new StringInputRowParser( new DelimitedParseSpec( new TimestampSpec("ts", "iso", null), - new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList(DIMENSIONS)), null, null), + new DimensionsSpec(DIMENSION_SCHEMAS, null, null), "\t", "\u0001", Arrays.asList(COLUMNS) From 1a0ba2d8f02ab19c11e19ff7f6ec3f7e0ebaaaa9 Mon Sep 17 00:00:00 2001 From: jon-wei Date: Wed, 22 Feb 2017 16:25:21 -0800 Subject: [PATCH 02/13] Allow non-arrays for key components in indexing type strategy interfaces --- .../benchmark/BitmapIterationBenchmark.java | 2 +- .../io/druid/segment/DimensionHandler.java | 80 +++++++++++-------- .../io/druid/segment/DimensionIndexer.java | 52 +++++++----- .../io/druid/segment/DimensionMerger.java | 25 ++++-- .../druid/segment/DimensionMergerLegacy.java | 2 +- .../io/druid/segment/DimensionMergerV9.java | 2 +- .../druid/segment/FloatDimensionHandler.java | 8 +- .../druid/segment/FloatDimensionIndexer.java | 14 ++-- .../main/java/io/druid/segment/IndexIO.java | 2 +- .../druid/segment/LongDimensionHandler.java | 8 +- .../druid/segment/LongDimensionIndexer.java | 14 ++-- .../QueryableIndexIndexableAdapter.java | 2 +- .../main/java/io/druid/segment/Rowboat.java | 2 +- .../druid/segment/StringDimensionHandler.java | 8 +- .../druid/segment/StringDimensionIndexer.java | 14 ++-- .../segment/incremental/IncrementalIndex.java | 14 ++-- .../incremental/IncrementalIndexAdapter.java | 6 +- .../IncrementalIndexStorageAdapter.java | 2 +- .../segment/StringDimensionHandlerTest.java | 2 +- 19 files changed, 146 insertions(+), 113 deletions(-) diff --git a/benchmarks/src/main/java/io/druid/benchmark/BitmapIterationBenchmark.java b/benchmarks/src/main/java/io/druid/benchmark/BitmapIterationBenchmark.java index e32e2398327c..f68f025764b4 100644 --- a/benchmarks/src/main/java/io/druid/benchmark/BitmapIterationBenchmark.java +++ b/benchmarks/src/main/java/io/druid/benchmark/BitmapIterationBenchmark.java @@ -165,7 +165,7 @@ public void setup(BitmapIterationBenchmark state) /** * Benchmark of cumulative cost of construction of an immutable bitmap and then iterating over it. This is a pattern - * from realtime nodes, see {@link io.druid.segment.StringDimensionIndexer#fillBitmapsFromUnsortedEncodedArray}. + * from realtime nodes, see {@link io.druid.segment.StringDimensionIndexer#fillBitmapsFromUnsortedEncodedKeyComponent}. * However this benchmark is yet approximate and to be improved to better reflect actual workloads of realtime nodes. */ @Benchmark diff --git a/processing/src/main/java/io/druid/segment/DimensionHandler.java b/processing/src/main/java/io/druid/segment/DimensionHandler.java index 12f739d9526a..893779211080 100644 --- a/processing/src/main/java/io/druid/segment/DimensionHandler.java +++ b/processing/src/main/java/io/druid/segment/DimensionHandler.java @@ -50,10 +50,15 @@ * * The EncodedType and ActualType are Comparable because columns used as dimensions must have sortable values. * - * @param class of the encoded values - * @param class of the actual values + * @param class of a single encoded value + * @param A row key contains a component for each dimension, this param specifies the + * class of this dimension's key component. A column type that supports multivalue rows + * should use an array type (Strings would use int[]). Column types without multivalue + * row support should use single objects (e.g., Long, Float). + * @param class of a single actual value */ -public interface DimensionHandler, EncodedTypeArray, ActualType extends Comparable> +public interface DimensionHandler + , EncodedKeyComponentType, ActualType extends Comparable> { /** * Get the name of the column associated with this handler. @@ -66,12 +71,12 @@ public interface DimensionHandler, E /** - * Creates a new DimensionIndexer, a per-dimension object responsible for processing ingested rows in-memory, used by the - * IncrementalIndex. See {@link DimensionIndexer} interface for more information. + * Creates a new DimensionIndexer, a per-dimension object responsible for processing ingested rows in-memory, used + * by the IncrementalIndex. See {@link DimensionIndexer} interface for more information. * * @return A new DimensionIndexer object. */ - DimensionIndexer makeIndexer(); + DimensionIndexer makeIndexer(); /** @@ -88,7 +93,7 @@ public interface DimensionHandler, E * @return A new DimensionMergerV9 object. */ - DimensionMergerV9 makeMerger( + DimensionMergerV9 makeMerger( IndexSpec indexSpec, File outDir, IOPeon ioPeon, @@ -98,8 +103,8 @@ DimensionMergerV9 makeMerger( /** - * Creates a new DimensionMergerLegacy, a per-dimension object responsible for merging indexes/row data across segments - * and building the on-disk representation of a dimension. For use with IndexMerger only. + * Creates a new DimensionMergerLegacy, a per-dimension object responsible for merging indexes/row data across + * segments and building the on-disk representation of a dimension. For use with IndexMerger only. * * See {@link DimensionMergerLegacy} interface for more information. * @@ -111,7 +116,7 @@ DimensionMergerV9 makeMerger( * @return A new DimensionMergerLegacy object. */ - DimensionMergerLegacy makeLegacyMerger( + DimensionMergerLegacy makeLegacyMerger( IndexSpec indexSpec, File outDir, IOPeon ioPeon, @@ -120,51 +125,55 @@ DimensionMergerLegacy makeLegacyMerger( ) throws IOException; /** - * Given an array representing a single set of row value(s) for this dimension as an Object, - * return the length of the array after appropriate type-casting. + * Given an key component representing a single set of row value(s) for this dimension as an Object, + * return the length of the key component after appropriate type-casting. * - * For example, a dictionary encoded String dimension would receive an int[] as an Object. + * For example, a dictionary encoded String dimension would receive an int[] as input to this method, + * while a Long numeric dimension would receive a single Long object (no multivalue support) * - * @param dimVals Array of row values + * @param dimVals Values for this dimension from a row * @return Size of dimVals */ - int getLengthFromEncodedArray(EncodedTypeArray dimVals); + int getLengthOfEncodedKeyComponent(EncodedKeyComponentType dimVals); /** - * Given two arrays representing sorted encoded row value(s), return the result of their comparison. + * Given two key components representing sorted encoded row value(s), return the result of their comparison. * - * If the two arrays have different lengths, the shorter array should be ordered first in the comparison. + * If the two key components have different lengths, the shorter component should be ordered first in the comparison. * - * Otherwise, this function should iterate through the array values and return the comparison of the first difference. + * Otherwise, this function should iterate through the key components and return the comparison of the + * first difference. * - * @param lhs array of row values - * @param rhs array of row values + * For dimensions that do not support multivalue rows, lhs and rhs can be compared directly. * - * @return integer indicating comparison result of arrays + * @param lhs key component from a row + * @param rhs key component from a row + * + * @return integer indicating comparison result of key components */ - int compareSortedEncodedArrays(EncodedTypeArray lhs, EncodedTypeArray rhs); + int compareSortedEncodedKeyComponents(EncodedKeyComponentType lhs, EncodedKeyComponentType rhs); /** - * Given two arrays representing sorted encoded row value(s), check that the two arrays have the same encoded values, - * or if the encoded values differ, that they translate into the same actual values, using the mappings - * provided by lhsEncodings and rhsEncodings (if applicable). + * Given two key components representing sorted encoded row value(s), check that the two key components + * have the same encoded values, or if the encoded values differ, that they translate into the same actual values, + * using the mappings provided by lhsEncodings and rhsEncodings (if applicable). * * If validation fails, this method should throw a SegmentValidationException. * * Used by IndexIO for validating segments. * - * See StringDimensionHandler.validateSortedEncodedArrays() for a reference implementation. + * See StringDimensionHandler.validateSortedEncodedKeyComponents() for a reference implementation. * - * @param lhs array of row values - * @param rhs array of row values + * @param lhs key component from a row + * @param rhs key component from a row * @param lhsEncodings encoding lookup from lhs's segment, null if not applicable for this dimension's type * @param rhsEncodings encoding lookup from rhs's segment, null if not applicable for this dimension's type */ - void validateSortedEncodedArrays( - EncodedTypeArray lhs, - EncodedTypeArray rhs, + void validateSortedEncodedKeyComponents( + EncodedKeyComponentType lhs, + EncodedKeyComponentType rhs, Indexed lhsEncodings, Indexed rhsEncodings ) throws SegmentValidationException; @@ -184,15 +193,16 @@ void validateSortedEncodedArrays( /** - * Given a subcolumn from getSubColumn, and the index of the current row, retrieve a row as an array of values. + * Given a subcolumn from getSubColumn, and the index of the current row, retrieve a dimension's values + * from a row as an EncodedKeyComponentType. * * For example: * - A String-typed implementation would read the current row from a DictionaryEncodedColumn as an int[]. - * - A long-typed implemention would read the current row from a GenericColumn return the current row as a long[]. + * - A long-typed implemention would read the current row from a GenericColumn and return a Long. * * @param column Column for this dimension from a QueryableIndex * @param currRow The index of the row to retrieve - * @return The row from "column" specified by "currRow", as an array of values + * @return The key component for this dimension from the current row of the column. */ - Object getRowValueArrayFromColumn(Closeable column, int currRow); + EncodedKeyComponentType getEncodedKeyComponentFromColumn(Closeable column, int currRow); } diff --git a/processing/src/main/java/io/druid/segment/DimensionIndexer.java b/processing/src/main/java/io/druid/segment/DimensionIndexer.java index 32d8153312c1..c4615a1f522b 100644 --- a/processing/src/main/java/io/druid/segment/DimensionIndexer.java +++ b/processing/src/main/java/io/druid/segment/DimensionIndexer.java @@ -68,9 +68,9 @@ * - getSortedEncodedValueFromUnsorted() * - getUnsortedEncodedValueFromSorted() * - getSortedIndexedValues() - * - convertUnsortedEncodedArrayToSortedEncodedArray() + * - convertUnsortedEncodedKeyComponentToSortedEncodedKeyComponent() * - * calling processRowValsToUnsortedEncodedArray() afterwards can invalidate previously read sorted encoding values + * calling processRowValsToUnsortedEncodedKeyComponent() afterwards can invalidate previously read sorted encoding values * (i.e., new values could be added that are inserted between existing values in the ordering). * * @@ -78,7 +78,7 @@ * -------------------- * Each DimensionIndexer exists within the context of a single IncrementalIndex. Before IndexMerger.persist() is * called on an IncrementalIndex, any associated DimensionIndexers should allow multiple threads to add data to the - * indexer via processRowValsToUnsortedEncodedArray() and allow multiple threads to read data via methods that only + * indexer via processRowValsToUnsortedEncodedKeyComponent() and allow multiple threads to read data via methods that only * deal with unsorted encodings. * * As mentioned in the "Sorting and Ordering" section, writes and calls to the sorted encoding @@ -97,11 +97,16 @@ * For example, in the RealtimePlumber and IndexGeneratorJob, the thread that performs index persist is started * by the same thread that handles the row adds on an index, ensuring the adds are visible to the persist thread. * - * @param class of the encoded values - * @param class of the actual values + * @param class of a single encoded value + * @param A row key contains a component for each dimension, this param specifies the + * class of this dimension's key component. A column type that supports multivalue rows + * should use an array type (e.g., Strings would use int[]). Column types without + * multivalue row support should use single objects (e.g., Long, Float). + * @param class of a single actual value * */ -public interface DimensionIndexer, EncodedTypeArray, ActualType extends Comparable> +public interface DimensionIndexer + , EncodedKeyComponentType, ActualType extends Comparable> { /** * Given a single row value or list of row values (for multi-valued dimensions), update any internal data structures @@ -117,7 +122,7 @@ public interface DimensionIndexer, E * * @return An array containing an encoded representation of the input row value. */ - EncodedTypeArray processRowValsToUnsortedEncodedArray(Object dimValues); + EncodedKeyComponentType processRowValsToUnsortedEncodedKeyComponent(Object dimValues); /** @@ -231,13 +236,13 @@ ColumnValueSelector makeColumnValueSelector( * them to their actual type (e.g., performing a dictionary lookup for a dict-encoded String dimension), * and comparing the actual values until a difference is found. * - * Refer to StringDimensionIndexer.compareUnsortedEncodedArrays() for a reference implementation. + * Refer to StringDimensionIndexer.compareUnsortedEncodedKeyComponents() for a reference implementation. * * @param lhs dimension value array from a TimeAndDims key * @param rhs dimension value array from a TimeAndDims key * @return comparison of the two arrays */ - int compareUnsortedEncodedArrays(EncodedTypeArray lhs, EncodedTypeArray rhs); + int compareUnsortedEncodedKeyComponents(EncodedKeyComponentType lhs, EncodedKeyComponentType rhs); /** @@ -247,7 +252,7 @@ ColumnValueSelector makeColumnValueSelector( * @param rhs dimension value array from a TimeAndDims key * @return true if the two arrays are equal */ - boolean checkUnsortedEncodedArraysEqual(EncodedTypeArray lhs, EncodedTypeArray rhs); + boolean checkUnsortedEncodedKeyComponentsEqual(EncodedKeyComponentType lhs, EncodedKeyComponentType rhs); /** @@ -255,33 +260,35 @@ ColumnValueSelector makeColumnValueSelector( * @param key dimension value array from a TimeAndDims key * @return hashcode of the array */ - int getUnsortedEncodedArrayHashCode(EncodedTypeArray key); + int getUnsortedEncodedKeyComponentHashCode(EncodedKeyComponentType key); boolean LIST = true; boolean ARRAY = false; /** - * Given a row value array from a TimeAndDims key, as described in the documentation for compareUnsortedEncodedArrays(), - * convert the unsorted encoded values to a list or array of actual values. + * Given a row value array from a TimeAndDims key, as described in the documentation for + * compareUnsortedEncodedKeyComponents(), convert the unsorted encoded values to a list or array of actual values. * * If the key has one element, this method should return a single Object instead of an array or list, ignoring * the asList parameter. * * @param key dimension value array from a TimeAndDims key * @param asList if true, return an array; if false, return a list - * @return single value, array, or list containing the actual values corresponding to the encoded values in the input array + * @return single value, array, or list containing the actual values corresponding to the encoded values + * in the input array */ - Object convertUnsortedEncodedArrayToActualArrayOrList(EncodedTypeArray key, boolean asList); + Object convertUnsortedEncodedKeyComponentToActualArrayOrList(EncodedKeyComponentType key, boolean asList); /** - * Given a row value array from a TimeAndDims key, as described in the documentation for compareUnsortedEncodedArrays(), - * convert the unsorted encoded values to an array of sorted encoded values (i.e., sorted by their corresponding actual values) + * Given a row value array from a TimeAndDims key, as described in the documentation for + * compareUnsortedEncodedKeyComponents(), convert the unsorted encoded values to an array of sorted encoded values + * (i.e., sorted by their corresponding actual values) * * @param key dimension value array from a TimeAndDims key * @return array containing the sorted encoded values corresponding to the unsorted encoded values in the input array */ - EncodedTypeArray convertUnsortedEncodedArrayToSortedEncodedArray(EncodedTypeArray key); + EncodedKeyComponentType convertUnsortedEncodedKeyComponentToSortedEncodedKeyComponent(EncodedKeyComponentType key); /** @@ -295,7 +302,7 @@ ColumnValueSelector makeColumnValueSelector( * For example, if key is an int[] array with values [1,3,4] for a dictionary-encoded String dimension, * and rowNum is 27, this function would set bit 27 in bitmapIndexes[1], bitmapIndexes[3], and bitmapIndexes[4] * - * See StringDimensionIndexer.fillBitmapsFromUnsortedEncodedArray() for a reference implementation. + * See StringDimensionIndexer.fillBitmapsFromUnsortedEncodedKeyComponent() for a reference implementation. * * If a dimension type does not support bitmap indexes, this function will not be called * and can be left unimplemented. @@ -305,5 +312,10 @@ ColumnValueSelector makeColumnValueSelector( * @param bitmapIndexes array of bitmaps, indexed by integer dimension value * @param factory bitmap factory */ - void fillBitmapsFromUnsortedEncodedArray(EncodedTypeArray key, int rowNum, MutableBitmap[] bitmapIndexes, BitmapFactory factory); + void fillBitmapsFromUnsortedEncodedKeyComponent( + EncodedKeyComponentType key, + int rowNum, + MutableBitmap[] bitmapIndexes, + BitmapFactory factory + ); } diff --git a/processing/src/main/java/io/druid/segment/DimensionMerger.java b/processing/src/main/java/io/druid/segment/DimensionMerger.java index d22cc5c0a05a..cc215cde8308 100644 --- a/processing/src/main/java/io/druid/segment/DimensionMerger.java +++ b/processing/src/main/java/io/druid/segment/DimensionMerger.java @@ -49,8 +49,13 @@ * * A class implementing this interface is expected to be highly stateful, updating its internal state as these * functions are called. + * + * @param A row key contains a component for each dimension, this param specifies the + * class of this dimension's key component. A column type that supports multivalue rows + * should use an array type (Strings would use int[]). Column types without multivalue + * row support should use single objects (e.g., Long, Float). */ -public interface DimensionMerger +public interface DimensionMerger { /** * Given a list of segment adapters: @@ -72,7 +77,8 @@ public interface DimensionMerger /** - * Convert a row from a single segment to its equivalent representation in the merged set of rows. + * Convert a row's key component with per-segment encoding to its equivalent representation + * in the merged set of rows. * * This function is used by the index merging process to build the merged sequence of rows. * @@ -83,17 +89,22 @@ public interface DimensionMerger * segment-specific dictionary values within the row to the common merged dictionary values * determined during writeMergedValueMetadata(). * - * @param segmentRow A row from a segment to be converted to its representation within the merged sequence of rows. + * @param segmentRow A row's key component for this dimension. The encoding of the key component's + * values will be converted from per-segment encodings to the combined encodings from + * the merged sequence of rows. * @param segmentIndexNumber Integer indicating which segment the row originated from. */ - EncodedTypedArray convertSegmentRowValuesToMergedRowValues(EncodedTypedArray segmentRow, int segmentIndexNumber); + EncodedKeyComponentType convertSegmentRowValuesToMergedRowValues( + EncodedKeyComponentType segmentRow, + int segmentIndexNumber + ); /** - * Process a row from the merged sequence of rows and update the DimensionMerger's internal state. + * Process a key component from the merged sequence of rows and update the DimensionMerger's internal state. * * After constructing a merged sequence of rows across segments, the index merging process will - * iterate through these rows and pass row values from each dimension to their correspodning DimensionMergers. + * iterate through these rows and pass row key components from each dimension to their corresponding DimensionMergers. * * This allows each DimensionMerger to build its internal view of the sequence of merged rows, to be * written out to a segment later. @@ -101,7 +112,7 @@ public interface DimensionMerger * @param rowValues The row values to be added. * @throws IOException */ - void processMergedRow(EncodedTypedArray rowValues) throws IOException; + void processMergedRow(EncodedKeyComponentType rowValues) throws IOException; /** diff --git a/processing/src/main/java/io/druid/segment/DimensionMergerLegacy.java b/processing/src/main/java/io/druid/segment/DimensionMergerLegacy.java index 0da997642fef..590f270e87e5 100644 --- a/processing/src/main/java/io/druid/segment/DimensionMergerLegacy.java +++ b/processing/src/main/java/io/druid/segment/DimensionMergerLegacy.java @@ -34,7 +34,7 @@ * * NOTE: Remove this class when the legacy IndexMerger is deprecated and removed. */ -public interface DimensionMergerLegacy extends DimensionMergerV9 +public interface DimensionMergerLegacy extends DimensionMergerV9 { /** * Write this dimension's value metadata to a file. diff --git a/processing/src/main/java/io/druid/segment/DimensionMergerV9.java b/processing/src/main/java/io/druid/segment/DimensionMergerV9.java index c95a757ee517..51411cf6a393 100644 --- a/processing/src/main/java/io/druid/segment/DimensionMergerV9.java +++ b/processing/src/main/java/io/druid/segment/DimensionMergerV9.java @@ -28,7 +28,7 @@ * * DimensionMerger subclass to be used with IndexMergerV9. */ -public interface DimensionMergerV9 extends DimensionMerger +public interface DimensionMergerV9 extends DimensionMerger { /** * Return a ColumnDescriptor containing ColumnPartSerde objects appropriate for diff --git a/processing/src/main/java/io/druid/segment/FloatDimensionHandler.java b/processing/src/main/java/io/druid/segment/FloatDimensionHandler.java index b9ba853c7a42..e683bf251461 100644 --- a/processing/src/main/java/io/druid/segment/FloatDimensionHandler.java +++ b/processing/src/main/java/io/druid/segment/FloatDimensionHandler.java @@ -82,19 +82,19 @@ public DimensionMergerLegacy makeLegacyMerger( } @Override - public int getLengthFromEncodedArray(Float dimVals) + public int getLengthOfEncodedKeyComponent(Float dimVals) { return FloatColumn.ROW_SIZE; } @Override - public int compareSortedEncodedArrays(Float lhs, Float rhs) + public int compareSortedEncodedKeyComponents(Float lhs, Float rhs) { return lhs.compareTo(rhs); } @Override - public void validateSortedEncodedArrays( + public void validateSortedEncodedKeyComponents( Float lhs, Float rhs, Indexed lhsEncodings, Indexed rhsEncodings ) throws SegmentValidationException { @@ -110,7 +110,7 @@ public Closeable getSubColumn(Column column) } @Override - public Object getRowValueArrayFromColumn(Closeable column, int currRow) + public Float getEncodedKeyComponentFromColumn(Closeable column, int currRow) { return ((GenericColumn) column).getFloatSingleValueRow(currRow); } diff --git a/processing/src/main/java/io/druid/segment/FloatDimensionIndexer.java b/processing/src/main/java/io/druid/segment/FloatDimensionIndexer.java index 8c2a8bb44886..e1ceba3fe090 100644 --- a/processing/src/main/java/io/druid/segment/FloatDimensionIndexer.java +++ b/processing/src/main/java/io/druid/segment/FloatDimensionIndexer.java @@ -32,7 +32,7 @@ public class FloatDimensionIndexer implements DimensionIndexer { @Override - public Float processRowValsToUnsortedEncodedArray(Object dimValues) + public Float processRowValsToUnsortedEncodedKeyComponent(Object dimValues) { if (dimValues instanceof List) { throw new UnsupportedOperationException("Numeric columns do not support multivalue rows."); @@ -104,37 +104,37 @@ public float get() } @Override - public int compareUnsortedEncodedArrays(Float lhs, Float rhs) + public int compareUnsortedEncodedKeyComponents(Float lhs, Float rhs) { return lhs.compareTo(rhs); } @Override - public boolean checkUnsortedEncodedArraysEqual(Float lhs, Float rhs) + public boolean checkUnsortedEncodedKeyComponentsEqual(Float lhs, Float rhs) { return lhs.equals(rhs); } @Override - public int getUnsortedEncodedArrayHashCode(Float key) + public int getUnsortedEncodedKeyComponentHashCode(Float key) { return key.hashCode(); } @Override - public Object convertUnsortedEncodedArrayToActualArrayOrList(Float key, boolean asList) + public Object convertUnsortedEncodedKeyComponentToActualArrayOrList(Float key, boolean asList) { return Lists.newArrayList(key); } @Override - public Float convertUnsortedEncodedArrayToSortedEncodedArray(Float key) + public Float convertUnsortedEncodedKeyComponentToSortedEncodedKeyComponent(Float key) { return key; } @Override - public void fillBitmapsFromUnsortedEncodedArray( + public void fillBitmapsFromUnsortedEncodedKeyComponent( Float key, int rowNum, MutableBitmap[] bitmapIndexes, BitmapFactory factory ) { diff --git a/processing/src/main/java/io/druid/segment/IndexIO.java b/processing/src/main/java/io/druid/segment/IndexIO.java index 169ccb3c149d..87cd8b172a52 100644 --- a/processing/src/main/java/io/druid/segment/IndexIO.java +++ b/processing/src/main/java/io/druid/segment/IndexIO.java @@ -354,7 +354,7 @@ public static void validateRowValues( } DimensionHandler dimHandler = dimHandlers.get(dim1Name); - dimHandler.validateSortedEncodedArrays( + dimHandler.validateSortedEncodedKeyComponents( dim1Vals, dim2Vals, adapter1.getDimValueLookup(dim1Name), diff --git a/processing/src/main/java/io/druid/segment/LongDimensionHandler.java b/processing/src/main/java/io/druid/segment/LongDimensionHandler.java index 3030ff87fe47..7b3ff7685a10 100644 --- a/processing/src/main/java/io/druid/segment/LongDimensionHandler.java +++ b/processing/src/main/java/io/druid/segment/LongDimensionHandler.java @@ -82,19 +82,19 @@ public DimensionMergerLegacy makeLegacyMerger( } @Override - public int getLengthFromEncodedArray(Long dimVals) + public int getLengthOfEncodedKeyComponent(Long dimVals) { return LongColumn.ROW_SIZE; } @Override - public int compareSortedEncodedArrays(Long lhs, Long rhs) + public int compareSortedEncodedKeyComponents(Long lhs, Long rhs) { return lhs.compareTo(rhs); } @Override - public void validateSortedEncodedArrays( + public void validateSortedEncodedKeyComponents( Long lhs, Long rhs, Indexed lhsEncodings, Indexed rhsEncodings ) throws SegmentValidationException { @@ -110,7 +110,7 @@ public Closeable getSubColumn(Column column) } @Override - public Object getRowValueArrayFromColumn(Closeable column, int currRow) + public Long getEncodedKeyComponentFromColumn(Closeable column, int currRow) { return ((GenericColumn) column).getLongSingleValueRow(currRow); } diff --git a/processing/src/main/java/io/druid/segment/LongDimensionIndexer.java b/processing/src/main/java/io/druid/segment/LongDimensionIndexer.java index 88da17ab7cd0..0f028c0c1ea1 100644 --- a/processing/src/main/java/io/druid/segment/LongDimensionIndexer.java +++ b/processing/src/main/java/io/druid/segment/LongDimensionIndexer.java @@ -32,7 +32,7 @@ public class LongDimensionIndexer implements DimensionIndexer { @Override - public Long processRowValsToUnsortedEncodedArray(Object dimValues) + public Long processRowValsToUnsortedEncodedKeyComponent(Object dimValues) { if (dimValues instanceof List) { throw new UnsupportedOperationException("Numeric columns do not support multivalue rows."); @@ -104,37 +104,37 @@ public long get() } @Override - public int compareUnsortedEncodedArrays(Long lhs, Long rhs) + public int compareUnsortedEncodedKeyComponents(Long lhs, Long rhs) { return lhs.compareTo(rhs); } @Override - public boolean checkUnsortedEncodedArraysEqual(Long lhs, Long rhs) + public boolean checkUnsortedEncodedKeyComponentsEqual(Long lhs, Long rhs) { return lhs.equals(rhs); } @Override - public int getUnsortedEncodedArrayHashCode(Long key) + public int getUnsortedEncodedKeyComponentHashCode(Long key) { return key.hashCode(); } @Override - public Object convertUnsortedEncodedArrayToActualArrayOrList(Long key, boolean asList) + public Object convertUnsortedEncodedKeyComponentToActualArrayOrList(Long key, boolean asList) { return Lists.newArrayList(key); } @Override - public Long convertUnsortedEncodedArrayToSortedEncodedArray(Long key) + public Long convertUnsortedEncodedKeyComponentToSortedEncodedKeyComponent(Long key) { return key; } @Override - public void fillBitmapsFromUnsortedEncodedArray( + public void fillBitmapsFromUnsortedEncodedKeyComponent( Long key, int rowNum, MutableBitmap[] bitmapIndexes, BitmapFactory factory ) { diff --git a/processing/src/main/java/io/druid/segment/QueryableIndexIndexableAdapter.java b/processing/src/main/java/io/druid/segment/QueryableIndexIndexableAdapter.java index 8318e6de81f1..6532b695a42a 100644 --- a/processing/src/main/java/io/druid/segment/QueryableIndexIndexableAdapter.java +++ b/processing/src/main/java/io/druid/segment/QueryableIndexIndexableAdapter.java @@ -255,7 +255,7 @@ public Rowboat next() final Object[] dims = new Object[columns.length]; int dimIndex = 0; for (final Closeable column : columns) { - dims[dimIndex] = handlers[dimIndex].getRowValueArrayFromColumn(column, currRow); + dims[dimIndex] = handlers[dimIndex].getEncodedKeyComponentFromColumn(column, currRow); dimIndex++; } diff --git a/processing/src/main/java/io/druid/segment/Rowboat.java b/processing/src/main/java/io/druid/segment/Rowboat.java index cded51f5f028..a8ea20b7aac3 100644 --- a/processing/src/main/java/io/druid/segment/Rowboat.java +++ b/processing/src/main/java/io/druid/segment/Rowboat.java @@ -122,7 +122,7 @@ public int compareTo(Rowboat rhs) } DimensionHandler handler = handlers[index]; - retVal = handler.compareSortedEncodedArrays(lhsVals, rhsVals); + retVal = handler.compareSortedEncodedKeyComponents(lhsVals, rhsVals); ++index; } diff --git a/processing/src/main/java/io/druid/segment/StringDimensionHandler.java b/processing/src/main/java/io/druid/segment/StringDimensionHandler.java index b45f95edec27..c017841f2e86 100644 --- a/processing/src/main/java/io/druid/segment/StringDimensionHandler.java +++ b/processing/src/main/java/io/druid/segment/StringDimensionHandler.java @@ -51,13 +51,13 @@ public String getDimensionName() } @Override - public int getLengthFromEncodedArray(int[] dimVals) + public int getLengthOfEncodedKeyComponent(int[] dimVals) { return dimVals.length; } @Override - public int compareSortedEncodedArrays(int[] lhs, int[] rhs) + public int compareSortedEncodedKeyComponents(int[] lhs, int[] rhs) { int lhsLen = lhs.length; int rhsLen = rhs.length; @@ -73,7 +73,7 @@ public int compareSortedEncodedArrays(int[] lhs, int[] rhs) } @Override - public void validateSortedEncodedArrays( + public void validateSortedEncodedKeyComponents( int[] lhs, int[] rhs, Indexed lhsEncodings, @@ -166,7 +166,7 @@ public Closeable getSubColumn(Column column) } @Override - public Object getRowValueArrayFromColumn(Closeable column, int currRow) + public int[] getEncodedKeyComponentFromColumn(Closeable column, int currRow) { DictionaryEncodedColumn dict = (DictionaryEncodedColumn) column; int[] theVals; diff --git a/processing/src/main/java/io/druid/segment/StringDimensionIndexer.java b/processing/src/main/java/io/druid/segment/StringDimensionIndexer.java index 0d3aeedd5e96..829ea60c31f0 100644 --- a/processing/src/main/java/io/druid/segment/StringDimensionIndexer.java +++ b/processing/src/main/java/io/druid/segment/StringDimensionIndexer.java @@ -220,7 +220,7 @@ public StringDimensionIndexer(MultiValueHandling multiValueHandling) } @Override - public int[] processRowValsToUnsortedEncodedArray(Object dimValues) + public int[] processRowValsToUnsortedEncodedKeyComponent(Object dimValues) { final int[] encodedDimensionValues; final int oldDictSize = dimLookup.size(); @@ -340,7 +340,7 @@ public int getCardinality() } @Override - public int compareUnsortedEncodedArrays(int[] lhs, int[] rhs) + public int compareUnsortedEncodedKeyComponents(int[] lhs, int[] rhs) { int lhsLen = lhs.length; int rhsLen = rhs.length; @@ -365,13 +365,13 @@ public int compareUnsortedEncodedArrays(int[] lhs, int[] rhs) } @Override - public boolean checkUnsortedEncodedArraysEqual(int[] lhs, int[] rhs) + public boolean checkUnsortedEncodedKeyComponentsEqual(int[] lhs, int[] rhs) { return Arrays.equals(lhs, rhs); } @Override - public int getUnsortedEncodedArrayHashCode(int[] key) + public int getUnsortedEncodedKeyComponentHashCode(int[] key) { return Arrays.hashCode(key); } @@ -539,7 +539,7 @@ public int lookupId(String name) } @Override - public Object convertUnsortedEncodedArrayToActualArrayOrList(int[] key, boolean asList) + public Object convertUnsortedEncodedKeyComponentToActualArrayOrList(int[] key, boolean asList) { if (key == null || key.length == 0) { return null; @@ -568,7 +568,7 @@ public Object convertUnsortedEncodedArrayToActualArrayOrList(int[] key, boolean } @Override - public int[] convertUnsortedEncodedArrayToSortedEncodedArray(int[] key) + public int[] convertUnsortedEncodedKeyComponentToSortedEncodedKeyComponent(int[] key) { int[] sortedDimVals = new int[key.length]; for (int i = 0; i < key.length; ++i) { @@ -579,7 +579,7 @@ public int[] convertUnsortedEncodedArrayToSortedEncodedArray(int[] key) } @Override - public void fillBitmapsFromUnsortedEncodedArray( + public void fillBitmapsFromUnsortedEncodedKeyComponent( int[] key, int rowNum, MutableBitmap[] bitmapIndexes, BitmapFactory factory ) { diff --git a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java index 9f71550ee7c6..87b220f9b47f 100644 --- a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java +++ b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java @@ -430,10 +430,10 @@ TimeAndDims toTimeAndDims(InputRow row) throws IndexSizeExceededException } DimensionHandler handler = desc.getHandler(); DimensionIndexer indexer = desc.getIndexer(); - Object dimsKey = indexer.processRowValsToUnsortedEncodedArray(row.getRaw(dimension)); + Object dimsKey = indexer.processRowValsToUnsortedEncodedKeyComponent(row.getRaw(dimension)); // Set column capabilities as data is coming in - if (!capabilities.hasMultipleValues() && dimsKey != null && handler.getLengthFromEncodedArray(dimsKey) > 1) { + if (!capabilities.hasMultipleValues() && dimsKey != null && handler.getLengthOfEncodedKeyComponent(dimsKey) > 1) { capabilities.setHasMultipleValues(true); } @@ -701,12 +701,12 @@ public Row apply(final Map.Entry input) } String dimensionName = dimensionDesc.getName(); DimensionHandler handler = dimensionDesc.getHandler(); - if (dim == null || handler.getLengthFromEncodedArray(dim) == 0) { + if (dim == null || handler.getLengthOfEncodedKeyComponent(dim) == 0) { theVals.put(dimensionName, null); continue; } final DimensionIndexer indexer = dimensionDesc.getIndexer(); - Object rowVals = indexer.convertUnsortedEncodedArrayToActualArrayOrList(dim, DimensionIndexer.LIST); + Object rowVals = indexer.convertUnsortedEncodedKeyComponentToActualArrayOrList(dim, DimensionIndexer.LIST); theVals.put(dimensionName, rowVals); } @@ -891,7 +891,7 @@ public boolean equals(Object o) } for (int i = 0; i < dims.length; i++) { final DimensionIndexer indexer = dimensionDescsList.get(i).getIndexer(); - if (!indexer.checkUnsortedEncodedArraysEqual(dims[i], that.dims[i])) { + if (!indexer.checkUnsortedEncodedKeyComponentsEqual(dims[i], that.dims[i])) { return false; } } @@ -904,7 +904,7 @@ public int hashCode() int hash = (int) timestamp; for (int i = 0; i < dims.length; i++) { final DimensionIndexer indexer = dimensionDescsList.get(i).getIndexer(); - hash = 31 * hash + indexer.getUnsortedEncodedArrayHashCode(dims[i]); + hash = 31 * hash + indexer.getUnsortedEncodedKeyComponentHashCode(dims[i]); } return hash; } @@ -958,7 +958,7 @@ public int compare(TimeAndDims lhs, TimeAndDims rhs) } final DimensionIndexer indexer = dimensionDescs.get(index).getIndexer(); - retVal = indexer.compareUnsortedEncodedArrays(lhsIdxs, rhsIdxs); + retVal = indexer.compareUnsortedEncodedKeyComponents(lhsIdxs, rhsIdxs); ++index; } diff --git a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexAdapter.java b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexAdapter.java index ebd008a75d68..72cd2707049f 100644 --- a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexAdapter.java +++ b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexAdapter.java @@ -104,7 +104,7 @@ public IncrementalIndexAdapter( // Add 'null' to the dimension's dictionary. if (dimIndex >= dims.length || dims[dimIndex] == null) { - accessor.indexer.processRowValsToUnsortedEncodedArray(null); + accessor.indexer.processRowValsToUnsortedEncodedKeyComponent(null); continue; } final ColumnCapabilities capabilities = dimension.getCapabilities(); @@ -112,7 +112,7 @@ public IncrementalIndexAdapter( if(capabilities.hasBitmapIndexes()) { final MutableBitmap[] bitmapIndexes = accessor.invertedIndexes; final DimensionIndexer indexer = accessor.indexer; - indexer.fillBitmapsFromUnsortedEncodedArray(dims[dimIndex], rowNum, bitmapIndexes, bitmapFactory); + indexer.fillBitmapsFromUnsortedEncodedKeyComponent(dims[dimIndex], rowNum, bitmapIndexes, bitmapFactory); } } ++rowNum; @@ -199,7 +199,7 @@ public Rowboat apply(Map.Entry input) } final DimensionIndexer indexer = indexers[dimIndex]; - Object sortedDimVals = indexer.convertUnsortedEncodedArrayToSortedEncodedArray(dimValues[dimIndex]); + Object sortedDimVals = indexer.convertUnsortedEncodedKeyComponentToSortedEncodedKeyComponent(dimValues[dimIndex]); dims[dimIndex] = sortedDimVals; } diff --git a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java index 503ba9f12fdd..fb65a56d5dcb 100644 --- a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java +++ b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java @@ -542,7 +542,7 @@ public Object get() return null; } - return indexer.convertUnsortedEncodedArrayToActualArrayOrList( + return indexer.convertUnsortedEncodedKeyComponentToActualArrayOrList( dims[dimensionIndex], DimensionIndexer.ARRAY ); } diff --git a/processing/src/test/java/io/druid/segment/StringDimensionHandlerTest.java b/processing/src/test/java/io/druid/segment/StringDimensionHandlerTest.java index 5bd8362478fe..929139cdece1 100644 --- a/processing/src/test/java/io/druid/segment/StringDimensionHandlerTest.java +++ b/processing/src/test/java/io/druid/segment/StringDimensionHandlerTest.java @@ -124,7 +124,7 @@ private static void validate(IncrementalIndexAdapter adapter1, IncrementalIndexA String name1 = dimNames1.get(i); String name2 = dimNames2.get(i); DimensionHandler handler = handlers.get(name1); - handler.validateSortedEncodedArrays( + handler.validateSortedEncodedKeyComponents( val1, val2, adapter1.getDimValueLookup(name1), From 9a548390f5facd1c87ac569c3e43c62222ec7b36 Mon Sep 17 00:00:00 2001 From: jon-wei Date: Wed, 22 Feb 2017 17:18:58 -0800 Subject: [PATCH 03/13] Add numeric index merge test, fixes --- .../druid/segment/FloatDimensionMergerV9.java | 2 + .../druid/segment/LongDimensionMergerV9.java | 2 + .../io/druid/segment/IndexMergerTest.java | 125 ++++++++++++++++++ 3 files changed, 129 insertions(+) diff --git a/processing/src/main/java/io/druid/segment/FloatDimensionMergerV9.java b/processing/src/main/java/io/druid/segment/FloatDimensionMergerV9.java index 8e84c78fd9df..591fedc67bd9 100644 --- a/processing/src/main/java/io/druid/segment/FloatDimensionMergerV9.java +++ b/processing/src/main/java/io/druid/segment/FloatDimensionMergerV9.java @@ -71,6 +71,7 @@ protected void setupEncodedValueWriter() throws IOException { final CompressedObjectStrategy.CompressionStrategy metCompression = indexSpec.getMetricCompression(); this.serializer = FloatColumnSerializer.create(ioPeon, dimensionName, metCompression); + serializer.open(); } @Override @@ -107,6 +108,7 @@ public boolean canSkip() @Override public ColumnDescriptor makeColumnDescriptor() throws IOException { + serializer.close(); final ColumnDescriptor.Builder builder = ColumnDescriptor.builder(); builder.setValueType(ValueType.FLOAT); builder.addSerde( diff --git a/processing/src/main/java/io/druid/segment/LongDimensionMergerV9.java b/processing/src/main/java/io/druid/segment/LongDimensionMergerV9.java index 3166cc1abda1..f27d3e929ff7 100644 --- a/processing/src/main/java/io/druid/segment/LongDimensionMergerV9.java +++ b/processing/src/main/java/io/druid/segment/LongDimensionMergerV9.java @@ -72,6 +72,7 @@ protected void setupEncodedValueWriter() throws IOException final CompressedObjectStrategy.CompressionStrategy metCompression = indexSpec.getMetricCompression(); final CompressionFactory.LongEncodingStrategy longEncoding = indexSpec.getLongEncoding(); this.serializer = LongColumnSerializer.create(ioPeon, dimensionName, metCompression, longEncoding); + serializer.open(); } @Override @@ -108,6 +109,7 @@ public boolean canSkip() @Override public ColumnDescriptor makeColumnDescriptor() throws IOException { + serializer.close(); final ColumnDescriptor.Builder builder = ColumnDescriptor.builder(); builder.setValueType(ValueType.LONG); builder.addSerde( diff --git a/processing/src/test/java/io/druid/segment/IndexMergerTest.java b/processing/src/test/java/io/druid/segment/IndexMergerTest.java index 85aef5928e41..f45a49690d01 100644 --- a/processing/src/test/java/io/druid/segment/IndexMergerTest.java +++ b/processing/src/test/java/io/druid/segment/IndexMergerTest.java @@ -34,7 +34,11 @@ import io.druid.data.input.impl.DimensionSchema; import io.druid.data.input.impl.DimensionSchema.MultiValueHandling; import io.druid.data.input.impl.DimensionsSpec; +import io.druid.data.input.impl.FloatDimensionSchema; +import io.druid.data.input.impl.LongDimensionSchema; +import io.druid.data.input.impl.StringDimensionSchema; import io.druid.granularity.QueryGranularities; +import io.druid.granularity.QueryGranularity; import io.druid.java.util.common.IAE; import io.druid.java.util.common.ISE; import io.druid.java.util.common.io.smoosh.SmooshedFileMapper; @@ -2005,6 +2009,127 @@ public void testMismatchedMetricsVarying() throws IOException Assert.assertEquals(ImmutableSet.of("A", "B", "C"), ImmutableSet.copyOf(adapter.getAvailableMetrics())); } + @Test + public void testMergeNumericDims() throws Exception + { + IncrementalIndex toPersist1 = getIndexWithNumericDims(); + IncrementalIndex toPersist2 = getIndexWithNumericDims(); + + final File tmpDir = temporaryFolder.newFolder(); + final File tmpDir2 = temporaryFolder.newFolder(); + final File tmpDirMerged = temporaryFolder.newFolder(); + + QueryableIndex index1 = closer.closeLater( + INDEX_IO.loadIndex( + INDEX_MERGER.persist( + toPersist1, + tmpDir, + indexSpec + ) + ) + ); + + QueryableIndex index2 = closer.closeLater( + INDEX_IO.loadIndex( + INDEX_MERGER.persist( + toPersist2, + tmpDir2, + indexSpec + ) + ) + ); + + final QueryableIndex merged = closer.closeLater( + INDEX_IO.loadIndex( + INDEX_MERGER.mergeQueryableIndex( + Arrays.asList(index1, index2), + true, + new AggregatorFactory[]{new CountAggregatorFactory("count")}, + tmpDirMerged, + indexSpec + ) + ) + ); + + final IndexableAdapter adapter = new QueryableIndexIndexableAdapter(merged); + Iterable boats = adapter.getRows(); + List boatList = Lists.newArrayList(boats); + + Assert.assertEquals(ImmutableList.of("dimA", "dimB", "dimC"), ImmutableList.copyOf(adapter.getDimensionNames())); + Assert.assertEquals(4, boatList.size()); + + Assert.assertArrayEquals(new Object[]{0L, 0.0f, new int[]{2}}, boatList.get(0).getDims()); + Assert.assertArrayEquals(new Object[]{2L}, boatList.get(0).getMetrics()); + + Assert.assertArrayEquals(new Object[]{72L, 60000.789f, new int[]{3}}, boatList.get(1).getDims()); + Assert.assertArrayEquals(new Object[]{2L}, boatList.get(0).getMetrics()); + + Assert.assertArrayEquals(new Object[]{100L, 4000.567f, new int[]{1}}, boatList.get(2).getDims()); + Assert.assertArrayEquals(new Object[]{2L}, boatList.get(1).getMetrics()); + + Assert.assertArrayEquals(new Object[]{3001L, 1.2345f, new int[]{0}}, boatList.get(3).getDims()); + Assert.assertArrayEquals(new Object[]{2L}, boatList.get(2).getMetrics()); + } + + private IncrementalIndex getIndexWithNumericDims() throws Exception + { + IncrementalIndex index = getIndexWithDimsFromSchemata( + Arrays.asList( + new LongDimensionSchema("dimA"), + new FloatDimensionSchema("dimB"), + new StringDimensionSchema("dimC") + ) + ); + + index.add( + new MapBasedInputRow( + 1, + Arrays.asList("dimA", "dimB", "dimC"), + ImmutableMap.of("dimA", 100L, "dimB", 4000.567, "dimC", "Hello") + ) + ); + + index.add( + new MapBasedInputRow( + 1, + Arrays.asList("dimA", "dimB", "dimC"), + ImmutableMap.of("dimA", 72L, "dimB", 60000.789, "dimC", "World") + ) + ); + + index.add( + new MapBasedInputRow( + 1, + Arrays.asList("dimA", "dimB", "dimC"), + ImmutableMap.of("dimA", 3001L, "dimB", 1.2345, "dimC", "Foobar") + ) + ); + + index.add( + new MapBasedInputRow( + 1, + Arrays.asList("dimA", "dimB", "dimC"), + ImmutableMap.of("dimC", "Nully Row") + ) + ); + + return index; + } + + private IncrementalIndex getIndexWithDimsFromSchemata(List dims) + { + IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder() + .withMinTimestamp(0L) + .withQueryGranularity(QueryGranularities.NONE) + .withDimensionsSpec(new DimensionsSpec(dims, null, null)) + .withMetrics(new AggregatorFactory[]{new CountAggregatorFactory("count")}) + .withRollup(true) + .build(); + + return new OnheapIncrementalIndex(schema, true, 1000); + } + + @Test public void testPersistNullColumnSkipping() throws Exception { From 1a69a20ac652f7e78b620bd8ee29e93c07dba445 Mon Sep 17 00:00:00 2001 From: jon-wei Date: Wed, 22 Feb 2017 17:58:34 -0800 Subject: [PATCH 04/13] Docs for numeric dims at ingestion --- docs/content/ingestion/index.md | 46 +++++++++++++++++++++++-- docs/content/ingestion/schema-design.md | 11 +++++- 2 files changed, 54 insertions(+), 3 deletions(-) diff --git a/docs/content/ingestion/index.md b/docs/content/ingestion/index.md index 117e54009bd8..a02465677d7d 100644 --- a/docs/content/ingestion/index.md +++ b/docs/content/ingestion/index.md @@ -36,7 +36,21 @@ An example dataSchema is shown below: "format" : "auto" }, "dimensionsSpec" : { - "dimensions": ["page","language","user","unpatrolled","newPage","robot","anonymous","namespace","continent","country","region","city"], + "dimensions": [ + "page","language","user","unpatrolled","newPage","robot","anonymous","namespace","continent","country","region","city", + { + "type": "LONG", + "name": "countryNum" + }, + { + "type": "FLOAT", + "name": "userLatitude" + }, + { + "type": "FLOAT", + "name": "userLongitude" + } + ], "dimensionExclusions" : [], "spatialDimensions" : [] } @@ -169,10 +183,38 @@ handle all formatting decisions on their own, without using the ParseSpec. | Field | Type | Description | Required | |-------|------|-------------|----------| -| dimensions | JSON String array | The names of the dimensions. If this is an empty array, Druid will treat all columns that are not timestamp or metric columns as dimension columns. | yes | +| dimensions | JSON Object array | A list of [dimension schema](#dimension-schema) objects or dimension names. Providing a name is equivalent to providing a String-typed dimension schema with the given name. If this is an empty array, Druid will treat all columns that are not timestamp or metric columns as String-typed dimension columns. | yes | | dimensionExclusions | JSON String array | The names of dimensions to exclude from ingestion. | no (default == [] | | spatialDimensions | JSON Object array | An array of [spatial dimensions](../development/geo.html) | no (default == [] | +#### Dimension Schema +A dimension schema specifies the type and name of a dimension to be ingested. + +For example, the following `dimensionsSpec` section from a `dataSchema` ingests one column as Long (`countryNum`), two columns as Float (`userLatitude`, `userLongitude`), and the other columns as Strings: + +```json +"dimensionsSpec" : { + "dimensions": [ + "page","language","user","unpatrolled","newPage","robot","anonymous","namespace","continent","country","region","city", + { + "type": "LONG", + "name": "countryNum" + }, + { + "type": "FLOAT", + "name": "userLatitude" + }, + { + "type": "FLOAT", + "name": "userLongitude" + } + ], + "dimensionExclusions" : [], + "spatialDimensions" : [] +} +``` + + ## GranularitySpec The default granularity spec is `uniform`. diff --git a/docs/content/ingestion/schema-design.md b/docs/content/ingestion/schema-design.md index ecf1ec64fad7..07ea4e77fd6e 100644 --- a/docs/content/ingestion/schema-design.md +++ b/docs/content/ingestion/schema-design.md @@ -12,7 +12,7 @@ of OLAP data. For more detailed information: * Every row in Druid must have a timestamp. Data is always partitioned by time, and every query has a time filter. Query results can also be broken down by time buckets like minutes, hours, days, and so on. -* Dimensions are fields that can be filtered on or grouped by. They are always either single Strings or arrays of Strings. +* Dimensions are fields that can be filtered on or grouped by. They are always single Strings, arrays of Strings, single Longs, or single Floats. * Metrics are fields that can be aggregated. They are often stored as numbers (integers or floats) but can also be stored as complex objects like HyperLogLog sketches or approximate histogram sketches. Typical production tables (or datasources as they are known in Druid) have fewer than 100 dimensions and fewer @@ -20,6 +20,13 @@ than 100 metrics, although, based on user testimony, datasources with thousands Below, we outline some best practices with schema design: +## Numeric dimensions + +If the user wishes to ingest a column as a numeric-typed dimension (Long or Float), it is necessary to specify the type of the column in the `dimensions` section of the `dimensionsSpec`. If the type is omitted, Druid will ingest a column as the default String type. + +See [Dimension Schema](../ingestion/index.html#dimension-schema) for more information. + + ## High cardinality dimensions (e.g. unique IDs) In practice, we see that exact counts for unique IDs are often not required. Storing unique IDs as a column will kill @@ -77,6 +84,8 @@ a dimension that has been excluded, or a metric column as a dimension. It should these segments will be slightly larger than if the list of dimensions was explicitly specified in lexicographic order. This limitation does not impact query correctness- just storage requirements. +Note that when using schema-less ingestion, all dimensions will be ingested as String-typed dimensions. + ## Including the same column as a dimension and a metric One workflow with unique IDs is to be able to filter on a particular ID, while still being able to do fast unique counts on the ID column. From 4d5462532f6e9554e03b672fc95582a0fb367c67 Mon Sep 17 00:00:00 2001 From: jon-wei Date: Wed, 22 Feb 2017 18:16:56 -0800 Subject: [PATCH 05/13] Remove unused import --- processing/src/test/java/io/druid/segment/IndexMergerTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/processing/src/test/java/io/druid/segment/IndexMergerTest.java b/processing/src/test/java/io/druid/segment/IndexMergerTest.java index f45a49690d01..f489ac377abc 100644 --- a/processing/src/test/java/io/druid/segment/IndexMergerTest.java +++ b/processing/src/test/java/io/druid/segment/IndexMergerTest.java @@ -38,7 +38,6 @@ import io.druid.data.input.impl.LongDimensionSchema; import io.druid.data.input.impl.StringDimensionSchema; import io.druid.granularity.QueryGranularities; -import io.druid.granularity.QueryGranularity; import io.druid.java.util.common.IAE; import io.druid.java.util.common.ISE; import io.druid.java.util.common.io.smoosh.SmooshedFileMapper; From 21790e403b905114d7b5e55cbcbebd4a7aa85010 Mon Sep 17 00:00:00 2001 From: jon-wei Date: Mon, 27 Feb 2017 14:51:31 -0800 Subject: [PATCH 06/13] Adjust docs, add aggregate on numeric dims tests --- docs/content/ingestion/index.md | 40 +++++++--- .../io/druid/segment/DimensionIndexer.java | 6 ++ .../druid/segment/FloatDimensionIndexer.java | 16 +++- .../druid/segment/LongDimensionIndexer.java | 16 +++- .../NumericColumnSelectorWrappers.java | 57 +++++++++++++ .../druid/segment/StringDimensionIndexer.java | 7 ++ .../IncrementalIndexStorageAdapter.java | 33 +++++++- .../query/groupby/GroupByQueryRunnerTest.java | 46 +++++++++++ .../druid/query/topn/TopNQueryRunnerTest.java | 80 +++++++++++++++++++ 9 files changed, 282 insertions(+), 19 deletions(-) create mode 100644 processing/src/main/java/io/druid/segment/NumericColumnSelectorWrappers.java diff --git a/docs/content/ingestion/index.md b/docs/content/ingestion/index.md index a02465677d7d..5682bc95465a 100644 --- a/docs/content/ingestion/index.md +++ b/docs/content/ingestion/index.md @@ -37,17 +37,28 @@ An example dataSchema is shown below: }, "dimensionsSpec" : { "dimensions": [ - "page","language","user","unpatrolled","newPage","robot","anonymous","namespace","continent","country","region","city", + "page", + "language", + "user", + "unpatrolled", + "newPage", + "robot", + "anonymous", + "namespace", + "continent", + "country", + "region", + "city", { - "type": "LONG", + "type": "long", "name": "countryNum" }, { - "type": "FLOAT", + "type": "float", "name": "userLatitude" }, { - "type": "FLOAT", + "type": "float", "name": "userLongitude" } ], @@ -183,7 +194,7 @@ handle all formatting decisions on their own, without using the ParseSpec. | Field | Type | Description | Required | |-------|------|-------------|----------| -| dimensions | JSON Object array | A list of [dimension schema](#dimension-schema) objects or dimension names. Providing a name is equivalent to providing a String-typed dimension schema with the given name. If this is an empty array, Druid will treat all columns that are not timestamp or metric columns as String-typed dimension columns. | yes | +| dimensions | JSON array | A list of [dimension schema](#dimension-schema) objects or dimension names. Providing a name is equivalent to providing a String-typed dimension schema with the given name. If this is an empty array, Druid will treat all columns that are not timestamp or metric columns as String-typed dimension columns. | yes | | dimensionExclusions | JSON String array | The names of dimensions to exclude from ingestion. | no (default == [] | | spatialDimensions | JSON Object array | An array of [spatial dimensions](../development/geo.html) | no (default == [] | @@ -195,17 +206,28 @@ For example, the following `dimensionsSpec` section from a `dataSchema` ingests ```json "dimensionsSpec" : { "dimensions": [ - "page","language","user","unpatrolled","newPage","robot","anonymous","namespace","continent","country","region","city", + "page", + "language", + "user", + "unpatrolled", + "newPage", + "robot", + "anonymous", + "namespace", + "continent", + "country", + "region", + "city", { - "type": "LONG", + "type": "long", "name": "countryNum" }, { - "type": "FLOAT", + "type": "float", "name": "userLatitude" }, { - "type": "FLOAT", + "type": "float", "name": "userLongitude" } ], diff --git a/processing/src/main/java/io/druid/segment/DimensionIndexer.java b/processing/src/main/java/io/druid/segment/DimensionIndexer.java index c4615a1f522b..6d5f6005aef0 100644 --- a/processing/src/main/java/io/druid/segment/DimensionIndexer.java +++ b/processing/src/main/java/io/druid/segment/DimensionIndexer.java @@ -22,6 +22,7 @@ import io.druid.collections.bitmap.BitmapFactory; import io.druid.collections.bitmap.MutableBitmap; import io.druid.query.dimension.DimensionSpec; +import io.druid.segment.column.ValueType; import io.druid.segment.data.Indexed; import io.druid.segment.incremental.IncrementalIndex; import io.druid.segment.incremental.IncrementalIndexStorageAdapter; @@ -108,6 +109,11 @@ public interface DimensionIndexer , EncodedKeyComponentType, ActualType extends Comparable> { + /** + * @return The ValueType corresponding to this dimension indexer's ActualType. + */ + ValueType getValueType(); + /** * Given a single row value or list of row values (for multi-valued dimensions), update any internal data structures * with the ingested values and return the row values as an array to be used within a TimeAndDims key. diff --git a/processing/src/main/java/io/druid/segment/FloatDimensionIndexer.java b/processing/src/main/java/io/druid/segment/FloatDimensionIndexer.java index e1ceba3fe090..28a033f06489 100644 --- a/processing/src/main/java/io/druid/segment/FloatDimensionIndexer.java +++ b/processing/src/main/java/io/druid/segment/FloatDimensionIndexer.java @@ -19,10 +19,12 @@ package io.druid.segment; +import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import io.druid.collections.bitmap.BitmapFactory; import io.druid.collections.bitmap.MutableBitmap; import io.druid.query.dimension.DimensionSpec; +import io.druid.segment.column.ValueType; import io.druid.segment.data.Indexed; import io.druid.segment.incremental.IncrementalIndex; import io.druid.segment.incremental.IncrementalIndexStorageAdapter; @@ -31,6 +33,12 @@ public class FloatDimensionIndexer implements DimensionIndexer { + @Override + public ValueType getValueType() + { + return ValueType.FLOAT; + } + @Override public Float processRowValsToUnsortedEncodedKeyComponent(Object dimValues) { @@ -56,19 +64,19 @@ public Float getUnsortedEncodedValueFromSorted(Float sortedIntermediateValue) @Override public Indexed getSortedIndexedValues() { - return null; + throw new UnsupportedOperationException("Numeric columns do not support value dictionaries."); } @Override public Float getMinValue() { - return 0.0f; + return Float.MIN_VALUE; } @Override public Float getMaxValue() { - return 0.0f; + return Float.MAX_VALUE; } @Override @@ -124,7 +132,7 @@ public int getUnsortedEncodedKeyComponentHashCode(Float key) @Override public Object convertUnsortedEncodedKeyComponentToActualArrayOrList(Float key, boolean asList) { - return Lists.newArrayList(key); + return ImmutableList.of(key); } @Override diff --git a/processing/src/main/java/io/druid/segment/LongDimensionIndexer.java b/processing/src/main/java/io/druid/segment/LongDimensionIndexer.java index 0f028c0c1ea1..0b92b92a9ee9 100644 --- a/processing/src/main/java/io/druid/segment/LongDimensionIndexer.java +++ b/processing/src/main/java/io/druid/segment/LongDimensionIndexer.java @@ -19,10 +19,12 @@ package io.druid.segment; +import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import io.druid.collections.bitmap.BitmapFactory; import io.druid.collections.bitmap.MutableBitmap; import io.druid.query.dimension.DimensionSpec; +import io.druid.segment.column.ValueType; import io.druid.segment.data.Indexed; import io.druid.segment.incremental.IncrementalIndex; import io.druid.segment.incremental.IncrementalIndexStorageAdapter; @@ -31,6 +33,12 @@ public class LongDimensionIndexer implements DimensionIndexer { + @Override + public ValueType getValueType() + { + return ValueType.LONG; + } + @Override public Long processRowValsToUnsortedEncodedKeyComponent(Object dimValues) { @@ -56,19 +64,19 @@ public Long getUnsortedEncodedValueFromSorted(Long sortedIntermediateValue) @Override public Indexed getSortedIndexedValues() { - return null; + throw new UnsupportedOperationException("Numeric columns do not support value dictionaries."); } @Override public Long getMinValue() { - return 0L; + return Long.MIN_VALUE; } @Override public Long getMaxValue() { - return 0L; + return Long.MAX_VALUE; } @Override @@ -124,7 +132,7 @@ public int getUnsortedEncodedKeyComponentHashCode(Long key) @Override public Object convertUnsortedEncodedKeyComponentToActualArrayOrList(Long key, boolean asList) { - return Lists.newArrayList(key); + return ImmutableList.of(key); } @Override diff --git a/processing/src/main/java/io/druid/segment/NumericColumnSelectorWrappers.java b/processing/src/main/java/io/druid/segment/NumericColumnSelectorWrappers.java new file mode 100644 index 000000000000..903570dd5cb4 --- /dev/null +++ b/processing/src/main/java/io/druid/segment/NumericColumnSelectorWrappers.java @@ -0,0 +1,57 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.segment; + +import com.google.common.collect.ImmutableList; +import io.druid.segment.column.ValueType; + +import java.util.List; + +public class NumericColumnSelectorWrappers +{ + public static final List WRAPPABLE_TYPES = ImmutableList.of( + ValueType.LONG, + ValueType.FLOAT + ); + + public static LongColumnSelector wrapFloatAsLong(final FloatColumnSelector selector) { + class FloatWrappingLongColumnSelector implements LongColumnSelector + { + @Override + public long get() + { + return (long) selector.get(); + } + } + return new FloatWrappingLongColumnSelector(); + }; + + public static FloatColumnSelector wrapLongAsFloat(final LongColumnSelector selector) { + class LongWrappingFloatColumnSelector implements FloatColumnSelector + { + @Override + public float get() + { + return (float) selector.get(); + } + } + return new LongWrappingFloatColumnSelector(); + }; +} diff --git a/processing/src/main/java/io/druid/segment/StringDimensionIndexer.java b/processing/src/main/java/io/druid/segment/StringDimensionIndexer.java index 829ea60c31f0..057caef98fba 100644 --- a/processing/src/main/java/io/druid/segment/StringDimensionIndexer.java +++ b/processing/src/main/java/io/druid/segment/StringDimensionIndexer.java @@ -32,6 +32,7 @@ import io.druid.query.dimension.DimensionSpec; import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.ValueMatcher; +import io.druid.segment.column.ValueType; import io.druid.segment.data.ArrayBasedIndexedInts; import io.druid.segment.data.Indexed; import io.druid.segment.data.IndexedInts; @@ -219,6 +220,12 @@ public StringDimensionIndexer(MultiValueHandling multiValueHandling) this.multiValueHandling = multiValueHandling == null ? MultiValueHandling.ofDefault() : multiValueHandling; } + @Override + public ValueType getValueType() + { + return ValueType.STRING; + } + @Override public int[] processRowValsToUnsortedEncodedKeyComponent(Object dimValues) { diff --git a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java index fb65a56d5dcb..0a7bb2e254ac 100644 --- a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java +++ b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java @@ -33,6 +33,7 @@ import io.druid.query.filter.Filter; import io.druid.query.filter.ValueMatcher; import io.druid.segment.Capabilities; +import io.druid.segment.ColumnValueSelector; import io.druid.segment.Cursor; import io.druid.segment.DimensionHandler; import io.druid.segment.DimensionIndexer; @@ -43,6 +44,7 @@ import io.druid.segment.LongWrappingDimensionSelector; import io.druid.segment.Metadata; import io.druid.segment.NullDimensionSelector; +import io.druid.segment.NumericColumnSelectorWrappers; import io.druid.segment.ObjectColumnSelector; import io.druid.segment.SingleScanTimeDimSelector; import io.druid.segment.StorageAdapter; @@ -393,11 +395,25 @@ public FloatColumnSelector makeFloatColumnSelector(String columnName) if (dimIndex != null) { final IncrementalIndex.DimensionDesc dimensionDesc = index.getDimension(columnName); final DimensionIndexer indexer = dimensionDesc.getIndexer(); - return (FloatColumnSelector) indexer.makeColumnValueSelector( + final ColumnValueSelector selector = indexer.makeColumnValueSelector( new DefaultDimensionSpec(columnName, null), currEntry, dimensionDesc ); + + ValueType indexerType = indexer.getValueType(); + if (!NumericColumnSelectorWrappers.WRAPPABLE_TYPES.contains(indexerType)) { + throw new UnsupportedOperationException( + "Cannot create float selector on column with type: " + indexerType + ); + } + + if (indexer.getValueType() == ValueType.LONG) { + return NumericColumnSelectorWrappers.wrapLongAsFloat((LongColumnSelector) selector); + } + + return (FloatColumnSelector) selector; + } final Integer metricIndexInt = index.getMetricIndex(columnName); @@ -438,11 +454,24 @@ public long get() if (dimIndex != null) { final IncrementalIndex.DimensionDesc dimensionDesc = index.getDimension(columnName); final DimensionIndexer indexer = dimensionDesc.getIndexer(); - return (LongColumnSelector) indexer.makeColumnValueSelector( + final ColumnValueSelector selector = indexer.makeColumnValueSelector( new DefaultDimensionSpec(columnName, null), currEntry, dimensionDesc ); + + ValueType indexerType = indexer.getValueType(); + if (!NumericColumnSelectorWrappers.WRAPPABLE_TYPES.contains(indexerType)) { + throw new UnsupportedOperationException( + "Cannot create float selector on column with type: " + indexerType + ); + } + + if (indexer.getValueType() == ValueType.FLOAT) { + return NumericColumnSelectorWrappers.wrapFloatAsLong((FloatColumnSelector) selector); + } + + return (LongColumnSelector) selector; } final Integer metricIndexInt = index.getMetricIndex(columnName); diff --git a/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java b/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java index 22b231693efe..d85202a8195b 100644 --- a/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java @@ -7861,4 +7861,50 @@ public void testGroupByStringOutputAsLong() Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); TestHelper.assertExpectedObjects(expectedResults, results, ""); } + + @Test + public void testGroupByWithAggsOnNumericDimensions() + { + GroupByQuery query = GroupByQuery + .builder() + .setDataSource(QueryRunnerTestHelper.dataSource) + .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) + .setDimensions(Lists.newArrayList(new DefaultDimensionSpec("quality", "alias"))) + .setDimFilter(new SelectorDimFilter("quality", "technology", null)) + .setAggregatorSpecs( + Arrays.asList( + QueryRunnerTestHelper.rowsCount, + new LongSumAggregatorFactory("qlLong", "qualityLong"), + new DoubleSumAggregatorFactory("qlFloat", "qualityLong"), + new DoubleSumAggregatorFactory("qfFloat", "qualityFloat"), + new LongSumAggregatorFactory("qfLong", "qualityFloat") + ) + ) + .setGranularity(QueryRunnerTestHelper.dayGran) + .build(); + + List expectedResults = Arrays.asList( + GroupByQueryRunnerTestHelper.createExpectedRow( + "2011-04-01", + "alias", "technology", + "rows", 1L, + "qlLong", 1700L, + "qlFloat", 1700.0, + "qfFloat", 17000.0, + "qfLong", 17000L + ), + GroupByQueryRunnerTestHelper.createExpectedRow( + "2011-04-02", + "alias", "technology", + "rows", 1L, + "qlLong", 1700L, + "qlFloat", 1700.0, + "qfFloat", 17000.0, + "qfLong", 17000L + ) + ); + + Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); + TestHelper.assertExpectedObjects(expectedResults, results, ""); + } } diff --git a/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java b/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java index 0f8fb6dfa8e8..1425b35e263d 100644 --- a/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java @@ -48,6 +48,7 @@ import io.druid.query.aggregation.DoubleMinAggregatorFactory; import io.druid.query.aggregation.DoubleSumAggregatorFactory; import io.druid.query.aggregation.FilteredAggregatorFactory; +import io.druid.query.aggregation.LongSumAggregatorFactory; import io.druid.query.aggregation.first.DoubleFirstAggregatorFactory; import io.druid.query.aggregation.first.LongFirstAggregatorFactory; import io.druid.query.aggregation.PostAggregator; @@ -4664,4 +4665,83 @@ public void testFullOnTopNDecorationOnNumeric() ); assertExpectedResults(expectedResults, query); } + + @Test + public void testFullOnTopNWithAggsOnNumericDims() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource(QueryRunnerTestHelper.dataSource) + .granularity(QueryRunnerTestHelper.allGran) + .dimension(QueryRunnerTestHelper.marketDimension) + .metric(QueryRunnerTestHelper.indexMetric) + .threshold(4) + .intervals(QueryRunnerTestHelper.fullOnInterval) + .aggregators( + Lists.newArrayList( + Iterables.concat( + QueryRunnerTestHelper.commonAggregators, + Lists.newArrayList( + new DoubleMaxAggregatorFactory("maxIndex", "index"), + new DoubleMinAggregatorFactory("minIndex", "index"), + new LongSumAggregatorFactory("qlLong", "qualityLong"), + new DoubleSumAggregatorFactory("qlFloat", "qualityLong"), + new DoubleSumAggregatorFactory("qfFloat", "qualityFloat"), + new LongSumAggregatorFactory("qfLong", "qualityFloat") + ) + ) + ) + ) + .postAggregators(Arrays.asList(QueryRunnerTestHelper.addRowsIndexConstant)) + .build(); + + List> expectedResults = Arrays.asList( + new Result( + new DateTime("2011-01-12T00:00:00.000Z"), + new TopNResultValue( + Arrays.>asList( + ImmutableMap.builder() + .put(QueryRunnerTestHelper.marketDimension, "total_market") + .put("rows", 186L) + .put("index", 215679.82879638672D) + .put("addRowsIndexConstant", 215866.82879638672D) + .put("uniques", QueryRunnerTestHelper.UNIQUES_2) + .put("maxIndex", 1743.9217529296875D) + .put("minIndex", 792.3260498046875D) + .put("qlLong", 279000L) + .put("qlFloat", 279000.0) + .put("qfFloat", 2790000.0) + .put("qfLong", 2790000L) + .build(), + ImmutableMap.builder() + .put(QueryRunnerTestHelper.marketDimension, "upfront") + .put("rows", 186L) + .put("index", 192046.1060180664D) + .put("addRowsIndexConstant", 192233.1060180664D) + .put("uniques", QueryRunnerTestHelper.UNIQUES_2) + .put("maxIndex", 1870.06103515625D) + .put("minIndex", 545.9906005859375D) + .put("qlLong", 279000L) + .put("qlFloat", 279000.0) + .put("qfFloat", 2790000.0) + .put("qfLong", 2790000L) + .build(), + ImmutableMap.builder() + .put(QueryRunnerTestHelper.marketDimension, "spot") + .put("rows", 837L) + .put("index", 95606.57232284546D) + .put("addRowsIndexConstant", 96444.57232284546D) + .put("uniques", QueryRunnerTestHelper.UNIQUES_9) + .put("maxIndex", 277.2735290527344D) + .put("minIndex", 59.02102279663086D) + .put("qlLong", 1171800L) + .put("qlFloat", 1171800.0) + .put("qfFloat", 11718000.0) + .put("qfLong", 11718000L) + .build() + ) + ) + ) + ); + assertExpectedResults(expectedResults, query); + } } From 3ad56db117c479460c283085c3c9b4e16bc227fd Mon Sep 17 00:00:00 2001 From: jon-wei Date: Mon, 27 Feb 2017 16:12:38 -0800 Subject: [PATCH 07/13] remove unused imports --- .../src/main/java/io/druid/segment/FloatDimensionIndexer.java | 1 - .../src/main/java/io/druid/segment/LongDimensionIndexer.java | 1 - 2 files changed, 2 deletions(-) diff --git a/processing/src/main/java/io/druid/segment/FloatDimensionIndexer.java b/processing/src/main/java/io/druid/segment/FloatDimensionIndexer.java index 28a033f06489..2aa099ed8f37 100644 --- a/processing/src/main/java/io/druid/segment/FloatDimensionIndexer.java +++ b/processing/src/main/java/io/druid/segment/FloatDimensionIndexer.java @@ -20,7 +20,6 @@ package io.druid.segment; import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; import io.druid.collections.bitmap.BitmapFactory; import io.druid.collections.bitmap.MutableBitmap; import io.druid.query.dimension.DimensionSpec; diff --git a/processing/src/main/java/io/druid/segment/LongDimensionIndexer.java b/processing/src/main/java/io/druid/segment/LongDimensionIndexer.java index 0b92b92a9ee9..348b656e9483 100644 --- a/processing/src/main/java/io/druid/segment/LongDimensionIndexer.java +++ b/processing/src/main/java/io/druid/segment/LongDimensionIndexer.java @@ -20,7 +20,6 @@ package io.druid.segment; import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; import io.druid.collections.bitmap.BitmapFactory; import io.druid.collections.bitmap.MutableBitmap; import io.druid.query.dimension.DimensionSpec; From e782d297d34a4583260a1871ddaa16baee063c3b Mon Sep 17 00:00:00 2001 From: jon-wei Date: Mon, 27 Feb 2017 16:23:22 -0800 Subject: [PATCH 08/13] Throw exception for bitmap method on numerics --- .../src/main/java/io/druid/segment/FloatDimensionIndexer.java | 2 +- .../src/main/java/io/druid/segment/LongDimensionIndexer.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/io/druid/segment/FloatDimensionIndexer.java b/processing/src/main/java/io/druid/segment/FloatDimensionIndexer.java index 2aa099ed8f37..137cd7f62e21 100644 --- a/processing/src/main/java/io/druid/segment/FloatDimensionIndexer.java +++ b/processing/src/main/java/io/druid/segment/FloatDimensionIndexer.java @@ -145,6 +145,6 @@ public void fillBitmapsFromUnsortedEncodedKeyComponent( Float key, int rowNum, MutableBitmap[] bitmapIndexes, BitmapFactory factory ) { - // floats don't have bitmaps + throw new UnsupportedOperationException("Numeric columns do not support bitmaps."); } } diff --git a/processing/src/main/java/io/druid/segment/LongDimensionIndexer.java b/processing/src/main/java/io/druid/segment/LongDimensionIndexer.java index 348b656e9483..344f11c4386e 100644 --- a/processing/src/main/java/io/druid/segment/LongDimensionIndexer.java +++ b/processing/src/main/java/io/druid/segment/LongDimensionIndexer.java @@ -145,6 +145,6 @@ public void fillBitmapsFromUnsortedEncodedKeyComponent( Long key, int rowNum, MutableBitmap[] bitmapIndexes, BitmapFactory factory ) { - // longs don't have bitmaps + throw new UnsupportedOperationException("Numeric columns do not support bitmaps."); } } From a21ca0149b951b85c7e5aad1f8beca33e2c04048 Mon Sep 17 00:00:00 2001 From: jon-wei Date: Tue, 28 Feb 2017 11:31:48 -0800 Subject: [PATCH 09/13] Move typed selector creation to DimensionIndexer interface --- .../io/druid/segment/DimensionIndexer.java | 51 +++++++++++-- .../druid/segment/FloatDimensionIndexer.java | 72 ++++++++++++++++++- .../druid/segment/LongDimensionIndexer.java | 72 ++++++++++++++++++- .../NumericColumnSelectorWrappers.java | 57 --------------- .../druid/segment/StringDimensionIndexer.java | 66 ++++++++++++++++- .../IncrementalIndexStorageAdapter.java | 45 +----------- 6 files changed, 255 insertions(+), 108 deletions(-) delete mode 100644 processing/src/main/java/io/druid/segment/NumericColumnSelectorWrappers.java diff --git a/processing/src/main/java/io/druid/segment/DimensionIndexer.java b/processing/src/main/java/io/druid/segment/DimensionIndexer.java index 6d5f6005aef0..7e33b6a65759 100644 --- a/processing/src/main/java/io/druid/segment/DimensionIndexer.java +++ b/processing/src/main/java/io/druid/segment/DimensionIndexer.java @@ -206,20 +206,59 @@ public interface DimensionIndexer /** - * Return an object used to read rows from a StorageAdapter's Cursor. + * Return an object used to read values from this indexer's column as Strings. * - * e.g. String -> DimensionSelector - * Long -> LongColumnSelector - * Float -> FloatColumnSelector + * @param spec Specifies the output name of a dimension and any extraction functions to be applied. + * @param currEntry Provides access to the current TimeAndDims object in the Cursor + * @param desc Descriptor object for this dimension within an IncrementalIndex + * @return A new object that reads rows from currEntry + */ + DimensionSelector makeDimensionSelector( + DimensionSpec spec, + IncrementalIndexStorageAdapter.EntryHolder currEntry, + IncrementalIndex.DimensionDesc desc + ); + + + /** + * Return an object used to read values from this indexer's column as Longs. * - * See StringDimensionIndexer.makeColumnValueSelector() for a reference implementation. + * @param spec Specifies the output name of a dimension and any extraction functions to be applied. + * @param currEntry Provides access to the current TimeAndDims object in the Cursor + * @param desc Descriptor object for this dimension within an IncrementalIndex + * @return A new object that reads rows from currEntry + */ + LongColumnSelector makeLongColumnSelector( + DimensionSpec spec, + IncrementalIndexStorageAdapter.EntryHolder currEntry, + IncrementalIndex.DimensionDesc desc + ); + + + /** + * Return an object used to read values from this indexer's column as Floats. + * + * @param spec Specifies the output name of a dimension and any extraction functions to be applied. + * @param currEntry Provides access to the current TimeAndDims object in the Cursor + * @param desc Descriptor object for this dimension within an IncrementalIndex + * @return A new object that reads rows from currEntry + */ + FloatColumnSelector makeFloatColumnSelector( + DimensionSpec spec, + IncrementalIndexStorageAdapter.EntryHolder currEntry, + IncrementalIndex.DimensionDesc desc + ); + + + /** + * Return an object used to read values from this indexer's column as Objects. * * @param spec Specifies the output name of a dimension and any extraction functions to be applied. * @param currEntry Provides access to the current TimeAndDims object in the Cursor * @param desc Descriptor object for this dimension within an IncrementalIndex * @return A new object that reads rows from currEntry */ - ColumnValueSelector makeColumnValueSelector( + ObjectColumnSelector makeObjectColumnSelector( DimensionSpec spec, IncrementalIndexStorageAdapter.EntryHolder currEntry, IncrementalIndex.DimensionDesc desc diff --git a/processing/src/main/java/io/druid/segment/FloatDimensionIndexer.java b/processing/src/main/java/io/druid/segment/FloatDimensionIndexer.java index 137cd7f62e21..94b419106823 100644 --- a/processing/src/main/java/io/druid/segment/FloatDimensionIndexer.java +++ b/processing/src/main/java/io/druid/segment/FloatDimensionIndexer.java @@ -85,7 +85,45 @@ public int getCardinality() } @Override - public ColumnValueSelector makeColumnValueSelector( + public DimensionSelector makeDimensionSelector( + DimensionSpec spec, IncrementalIndexStorageAdapter.EntryHolder currEntry, IncrementalIndex.DimensionDesc desc + ) + { + return new FloatWrappingDimensionSelector( + makeFloatColumnSelector(spec, currEntry, desc), + spec.getExtractionFn() + ); + } + + @Override + public LongColumnSelector makeLongColumnSelector( + final DimensionSpec spec, + final IncrementalIndexStorageAdapter.EntryHolder currEntry, + final IncrementalIndex.DimensionDesc desc + ) + { + final int dimIndex = desc.getIndex(); + class IndexerLongColumnSelector implements LongColumnSelector + { + @Override + public long get() + { + final Object[] dims = currEntry.getKey().getDims(); + + if (dimIndex >= dims.length) { + return 0L; + } + + float floatVal = (Float) dims[dimIndex]; + return (long) floatVal; + } + } + + return new IndexerLongColumnSelector(); + } + + @Override + public FloatColumnSelector makeFloatColumnSelector( final DimensionSpec spec, final IncrementalIndexStorageAdapter.EntryHolder currEntry, final IncrementalIndex.DimensionDesc desc @@ -110,6 +148,38 @@ public float get() return new IndexerFloatColumnSelector(); } + @Override + public ObjectColumnSelector makeObjectColumnSelector( + final DimensionSpec spec, + final IncrementalIndexStorageAdapter.EntryHolder currEntry, + final IncrementalIndex.DimensionDesc desc + ) + { + final int dimIndex = desc.getIndex(); + class IndexerObjectColumnSelector implements ObjectColumnSelector + { + @Override + public Class classOfObject() + { + return Float.class; + } + + @Override + public Object get() + { + final Object[] dims = currEntry.getKey().getDims(); + + if (dimIndex >= dims.length) { + return 0L; + } + + return dims[dimIndex]; + } + } + + return new IndexerObjectColumnSelector(); + } + @Override public int compareUnsortedEncodedKeyComponents(Float lhs, Float rhs) { diff --git a/processing/src/main/java/io/druid/segment/LongDimensionIndexer.java b/processing/src/main/java/io/druid/segment/LongDimensionIndexer.java index 344f11c4386e..928428fefe32 100644 --- a/processing/src/main/java/io/druid/segment/LongDimensionIndexer.java +++ b/processing/src/main/java/io/druid/segment/LongDimensionIndexer.java @@ -85,7 +85,18 @@ public int getCardinality() } @Override - public ColumnValueSelector makeColumnValueSelector( + public DimensionSelector makeDimensionSelector( + DimensionSpec spec, IncrementalIndexStorageAdapter.EntryHolder currEntry, IncrementalIndex.DimensionDesc desc + ) + { + return new LongWrappingDimensionSelector( + makeLongColumnSelector(spec, currEntry, desc), + spec.getExtractionFn() + ); + } + + @Override + public LongColumnSelector makeLongColumnSelector( final DimensionSpec spec, final IncrementalIndexStorageAdapter.EntryHolder currEntry, final IncrementalIndex.DimensionDesc desc @@ -110,6 +121,65 @@ public long get() return new IndexerLongColumnSelector(); } + @Override + public FloatColumnSelector makeFloatColumnSelector( + final DimensionSpec spec, + final IncrementalIndexStorageAdapter.EntryHolder currEntry, + final IncrementalIndex.DimensionDesc desc + ) + { + final int dimIndex = desc.getIndex(); + class IndexerFloatColumnSelector implements FloatColumnSelector + { + @Override + public float get() + { + final Object[] dims = currEntry.getKey().getDims(); + + if (dimIndex >= dims.length) { + return 0.0f; + } + + long longVal = (Long) dims[dimIndex]; + return (float) longVal; + } + } + + return new IndexerFloatColumnSelector(); + } + + @Override + public ObjectColumnSelector makeObjectColumnSelector( + final DimensionSpec spec, + final IncrementalIndexStorageAdapter.EntryHolder currEntry, + final IncrementalIndex.DimensionDesc desc + ) + { + final int dimIndex = desc.getIndex(); + class IndexerObjectColumnSelector implements ObjectColumnSelector + { + @Override + public Class classOfObject() + { + return Long.class; + } + + @Override + public Object get() + { + final Object[] dims = currEntry.getKey().getDims(); + + if (dimIndex >= dims.length) { + return 0L; + } + + return dims[dimIndex]; + } + } + + return new IndexerObjectColumnSelector(); + } + @Override public int compareUnsortedEncodedKeyComponents(Long lhs, Long rhs) { diff --git a/processing/src/main/java/io/druid/segment/NumericColumnSelectorWrappers.java b/processing/src/main/java/io/druid/segment/NumericColumnSelectorWrappers.java deleted file mode 100644 index 903570dd5cb4..000000000000 --- a/processing/src/main/java/io/druid/segment/NumericColumnSelectorWrappers.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to Metamarkets Group Inc. (Metamarkets) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. Metamarkets licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package io.druid.segment; - -import com.google.common.collect.ImmutableList; -import io.druid.segment.column.ValueType; - -import java.util.List; - -public class NumericColumnSelectorWrappers -{ - public static final List WRAPPABLE_TYPES = ImmutableList.of( - ValueType.LONG, - ValueType.FLOAT - ); - - public static LongColumnSelector wrapFloatAsLong(final FloatColumnSelector selector) { - class FloatWrappingLongColumnSelector implements LongColumnSelector - { - @Override - public long get() - { - return (long) selector.get(); - } - } - return new FloatWrappingLongColumnSelector(); - }; - - public static FloatColumnSelector wrapLongAsFloat(final LongColumnSelector selector) { - class LongWrappingFloatColumnSelector implements FloatColumnSelector - { - @Override - public float get() - { - return (float) selector.get(); - } - } - return new LongWrappingFloatColumnSelector(); - }; -} diff --git a/processing/src/main/java/io/druid/segment/StringDimensionIndexer.java b/processing/src/main/java/io/druid/segment/StringDimensionIndexer.java index 057caef98fba..2d9ef2507564 100644 --- a/processing/src/main/java/io/druid/segment/StringDimensionIndexer.java +++ b/processing/src/main/java/io/druid/segment/StringDimensionIndexer.java @@ -384,7 +384,7 @@ public int getUnsortedEncodedKeyComponentHashCode(int[] key) } @Override - public DimensionSelector makeColumnValueSelector( + public DimensionSelector makeDimensionSelector( final DimensionSpec spec, final IncrementalIndexStorageAdapter.EntryHolder currEntry, final IncrementalIndex.DimensionDesc desc @@ -545,6 +545,70 @@ public int lookupId(String name) return new IndexerDimensionSelector(); } + @Override + public LongColumnSelector makeLongColumnSelector( + DimensionSpec spec, IncrementalIndexStorageAdapter.EntryHolder currEntry, IncrementalIndex.DimensionDesc desc + ) + { + return ZeroLongColumnSelector.instance(); + } + + @Override + public FloatColumnSelector makeFloatColumnSelector( + DimensionSpec spec, IncrementalIndexStorageAdapter.EntryHolder currEntry, IncrementalIndex.DimensionDesc desc + ) + { + return ZeroFloatColumnSelector.instance(); + } + + @Override + public ObjectColumnSelector makeObjectColumnSelector( + final DimensionSpec spec, + final IncrementalIndexStorageAdapter.EntryHolder currEntry, + final IncrementalIndex.DimensionDesc desc + ) + { + final ExtractionFn extractionFn = spec.getExtractionFn(); + final int dimIndex = desc.getIndex(); + + class StringIndexerObjectColumnSelector implements ObjectColumnSelector + { + + @Override + public Class classOfObject() + { + return String.class; + } + + @Override + public String get() + { + final Object[] dims = currEntry.getKey().getDims(); + + int[] indices; + if (dimIndex < dims.length) { + indices = (int[]) dims[dimIndex]; + if (indices.length > 1) { + throw new UnsupportedOperationException( + "makeObjectColumnSelector does not support multi-value columns." + ); + } + } else { + indices = null; + } + + if (indices == null || indices.length == 0) { + return extractionFn.apply(null); + } + + final String strValue = getActualValue(indices[0], false); + return extractionFn == null ? strValue : extractionFn.apply(strValue); + } + } + + return new StringIndexerObjectColumnSelector(); + } + @Override public Object convertUnsortedEncodedKeyComponentToActualArrayOrList(int[] key, boolean asList) { diff --git a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java index ae115ce76861..4f6dbf77b306 100644 --- a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java +++ b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java @@ -33,7 +33,6 @@ import io.druid.query.filter.Filter; import io.druid.query.filter.ValueMatcher; import io.druid.segment.Capabilities; -import io.druid.segment.ColumnValueSelector; import io.druid.segment.Cursor; import io.druid.segment.DimensionHandler; import io.druid.segment.DimensionIndexer; @@ -44,7 +43,6 @@ import io.druid.segment.LongWrappingDimensionSelector; import io.druid.segment.Metadata; import io.druid.segment.NullDimensionSelector; -import io.druid.segment.NumericColumnSelectorWrappers; import io.druid.segment.ObjectColumnSelector; import io.druid.segment.SingleScanTimeDimSelector; import io.druid.segment.StorageAdapter; @@ -365,23 +363,13 @@ private DimensionSelector makeDimensionSelectorUndecorated( return selector; } - ColumnCapabilities capabilities = getColumnCapabilities(dimension); - if (capabilities != null) { - if (capabilities.getType() == ValueType.LONG) { - return new LongWrappingDimensionSelector(makeLongColumnSelector(dimension), extractionFn); - } - if (capabilities.getType() == ValueType.FLOAT) { - return new FloatWrappingDimensionSelector(makeFloatColumnSelector(dimension), extractionFn); - } - } - final IncrementalIndex.DimensionDesc dimensionDesc = index.getDimension(dimensionSpec.getDimension()); if (dimensionDesc == null) { return NullDimensionSelector.instance(); } final DimensionIndexer indexer = dimensionDesc.getIndexer(); - return (DimensionSelector) indexer.makeColumnValueSelector(dimensionSpec, currEntry, dimensionDesc); + return indexer.makeDimensionSelector(dimensionSpec, currEntry, dimensionDesc); } @Override @@ -395,25 +383,11 @@ public FloatColumnSelector makeFloatColumnSelector(String columnName) if (dimIndex != null) { final IncrementalIndex.DimensionDesc dimensionDesc = index.getDimension(columnName); final DimensionIndexer indexer = dimensionDesc.getIndexer(); - final ColumnValueSelector selector = indexer.makeColumnValueSelector( + return indexer.makeFloatColumnSelector( new DefaultDimensionSpec(columnName, null), currEntry, dimensionDesc ); - - ValueType indexerType = indexer.getValueType(); - if (!NumericColumnSelectorWrappers.WRAPPABLE_TYPES.contains(indexerType)) { - throw new UnsupportedOperationException( - "Cannot create float selector on column with type: " + indexerType - ); - } - - if (indexer.getValueType() == ValueType.LONG) { - return NumericColumnSelectorWrappers.wrapLongAsFloat((LongColumnSelector) selector); - } - - return (FloatColumnSelector) selector; - } final Integer metricIndexInt = index.getMetricIndex(columnName); @@ -454,24 +428,11 @@ public long get() if (dimIndex != null) { final IncrementalIndex.DimensionDesc dimensionDesc = index.getDimension(columnName); final DimensionIndexer indexer = dimensionDesc.getIndexer(); - final ColumnValueSelector selector = indexer.makeColumnValueSelector( + return indexer.makeLongColumnSelector( new DefaultDimensionSpec(columnName, null), currEntry, dimensionDesc ); - - ValueType indexerType = indexer.getValueType(); - if (!NumericColumnSelectorWrappers.WRAPPABLE_TYPES.contains(indexerType)) { - throw new UnsupportedOperationException( - "Cannot create float selector on column with type: " + indexerType - ); - } - - if (indexer.getValueType() == ValueType.FLOAT) { - return NumericColumnSelectorWrappers.wrapFloatAsLong((FloatColumnSelector) selector); - } - - return (LongColumnSelector) selector; } final Integer metricIndexInt = index.getMetricIndex(columnName); From d225d87c3aac0e6ecfbb8614fd37bc194254b5e2 Mon Sep 17 00:00:00 2001 From: jon-wei Date: Tue, 28 Feb 2017 11:38:00 -0800 Subject: [PATCH 10/13] unused imports --- .../segment/incremental/IncrementalIndexStorageAdapter.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java index 4f6dbf77b306..3bd8e6a8c278 100644 --- a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java +++ b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java @@ -38,9 +38,7 @@ import io.druid.segment.DimensionIndexer; import io.druid.segment.DimensionSelector; import io.druid.segment.FloatColumnSelector; -import io.druid.segment.FloatWrappingDimensionSelector; import io.druid.segment.LongColumnSelector; -import io.druid.segment.LongWrappingDimensionSelector; import io.druid.segment.Metadata; import io.druid.segment.NullDimensionSelector; import io.druid.segment.ObjectColumnSelector; @@ -51,7 +49,6 @@ import io.druid.segment.ZeroLongColumnSelector; import io.druid.segment.column.Column; import io.druid.segment.column.ColumnCapabilities; -import io.druid.segment.column.ValueType; import io.druid.segment.data.Indexed; import io.druid.segment.data.ListIndexed; import io.druid.segment.filter.BooleanValueMatcher; From 2f1944941dd45ed385f6797cb5464ec58e1a7be7 Mon Sep 17 00:00:00 2001 From: jon-wei Date: Tue, 28 Feb 2017 12:07:41 -0800 Subject: [PATCH 11/13] Fix --- .../incremental/IncrementalIndexStorageAdapter.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java index 3bd8e6a8c278..df15ef9c6ed5 100644 --- a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java +++ b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java @@ -38,7 +38,9 @@ import io.druid.segment.DimensionIndexer; import io.druid.segment.DimensionSelector; import io.druid.segment.FloatColumnSelector; +import io.druid.segment.FloatWrappingDimensionSelector; import io.druid.segment.LongColumnSelector; +import io.druid.segment.LongWrappingDimensionSelector; import io.druid.segment.Metadata; import io.druid.segment.NullDimensionSelector; import io.druid.segment.ObjectColumnSelector; @@ -49,6 +51,7 @@ import io.druid.segment.ZeroLongColumnSelector; import io.druid.segment.column.Column; import io.druid.segment.column.ColumnCapabilities; +import io.druid.segment.column.ValueType; import io.druid.segment.data.Indexed; import io.druid.segment.data.ListIndexed; import io.druid.segment.filter.BooleanValueMatcher; @@ -360,6 +363,16 @@ private DimensionSelector makeDimensionSelectorUndecorated( return selector; } + ColumnCapabilities capabilities = getColumnCapabilities(dimension); + if (capabilities != null) { + if (capabilities.getType() == ValueType.LONG) { + return new LongWrappingDimensionSelector(makeLongColumnSelector(dimension), extractionFn); + } + if (capabilities.getType() == ValueType.FLOAT) { + return new FloatWrappingDimensionSelector(makeFloatColumnSelector(dimension), extractionFn); + } + } + final IncrementalIndex.DimensionDesc dimensionDesc = index.getDimension(dimensionSpec.getDimension()); if (dimensionDesc == null) { return NullDimensionSelector.instance(); From d3b5c78e44d9cc3d997b39605871ce471d177fa5 Mon Sep 17 00:00:00 2001 From: jon-wei Date: Tue, 28 Feb 2017 14:33:04 -0800 Subject: [PATCH 12/13] Remove unused DimensionSpec from indexer methods, check for dims first in inc index storage adapter --- .../io/druid/segment/DimensionIndexer.java | 4 ---- .../druid/segment/FloatDimensionIndexer.java | 4 +--- .../druid/segment/LongDimensionIndexer.java | 4 +--- .../druid/segment/StringDimensionIndexer.java | 4 ++-- .../IncrementalIndexStorageAdapter.java | 22 +++++++++---------- 5 files changed, 15 insertions(+), 23 deletions(-) diff --git a/processing/src/main/java/io/druid/segment/DimensionIndexer.java b/processing/src/main/java/io/druid/segment/DimensionIndexer.java index 7e33b6a65759..44cc8e44a31a 100644 --- a/processing/src/main/java/io/druid/segment/DimensionIndexer.java +++ b/processing/src/main/java/io/druid/segment/DimensionIndexer.java @@ -223,13 +223,11 @@ DimensionSelector makeDimensionSelector( /** * Return an object used to read values from this indexer's column as Longs. * - * @param spec Specifies the output name of a dimension and any extraction functions to be applied. * @param currEntry Provides access to the current TimeAndDims object in the Cursor * @param desc Descriptor object for this dimension within an IncrementalIndex * @return A new object that reads rows from currEntry */ LongColumnSelector makeLongColumnSelector( - DimensionSpec spec, IncrementalIndexStorageAdapter.EntryHolder currEntry, IncrementalIndex.DimensionDesc desc ); @@ -238,13 +236,11 @@ LongColumnSelector makeLongColumnSelector( /** * Return an object used to read values from this indexer's column as Floats. * - * @param spec Specifies the output name of a dimension and any extraction functions to be applied. * @param currEntry Provides access to the current TimeAndDims object in the Cursor * @param desc Descriptor object for this dimension within an IncrementalIndex * @return A new object that reads rows from currEntry */ FloatColumnSelector makeFloatColumnSelector( - DimensionSpec spec, IncrementalIndexStorageAdapter.EntryHolder currEntry, IncrementalIndex.DimensionDesc desc ); diff --git a/processing/src/main/java/io/druid/segment/FloatDimensionIndexer.java b/processing/src/main/java/io/druid/segment/FloatDimensionIndexer.java index 94b419106823..c0cd2a9b07d9 100644 --- a/processing/src/main/java/io/druid/segment/FloatDimensionIndexer.java +++ b/processing/src/main/java/io/druid/segment/FloatDimensionIndexer.java @@ -90,14 +90,13 @@ public DimensionSelector makeDimensionSelector( ) { return new FloatWrappingDimensionSelector( - makeFloatColumnSelector(spec, currEntry, desc), + makeFloatColumnSelector(currEntry, desc), spec.getExtractionFn() ); } @Override public LongColumnSelector makeLongColumnSelector( - final DimensionSpec spec, final IncrementalIndexStorageAdapter.EntryHolder currEntry, final IncrementalIndex.DimensionDesc desc ) @@ -124,7 +123,6 @@ public long get() @Override public FloatColumnSelector makeFloatColumnSelector( - final DimensionSpec spec, final IncrementalIndexStorageAdapter.EntryHolder currEntry, final IncrementalIndex.DimensionDesc desc ) diff --git a/processing/src/main/java/io/druid/segment/LongDimensionIndexer.java b/processing/src/main/java/io/druid/segment/LongDimensionIndexer.java index 928428fefe32..f12faa308aa3 100644 --- a/processing/src/main/java/io/druid/segment/LongDimensionIndexer.java +++ b/processing/src/main/java/io/druid/segment/LongDimensionIndexer.java @@ -90,14 +90,13 @@ public DimensionSelector makeDimensionSelector( ) { return new LongWrappingDimensionSelector( - makeLongColumnSelector(spec, currEntry, desc), + makeLongColumnSelector(currEntry, desc), spec.getExtractionFn() ); } @Override public LongColumnSelector makeLongColumnSelector( - final DimensionSpec spec, final IncrementalIndexStorageAdapter.EntryHolder currEntry, final IncrementalIndex.DimensionDesc desc ) @@ -123,7 +122,6 @@ public long get() @Override public FloatColumnSelector makeFloatColumnSelector( - final DimensionSpec spec, final IncrementalIndexStorageAdapter.EntryHolder currEntry, final IncrementalIndex.DimensionDesc desc ) diff --git a/processing/src/main/java/io/druid/segment/StringDimensionIndexer.java b/processing/src/main/java/io/druid/segment/StringDimensionIndexer.java index 2d9ef2507564..9367bb79350f 100644 --- a/processing/src/main/java/io/druid/segment/StringDimensionIndexer.java +++ b/processing/src/main/java/io/druid/segment/StringDimensionIndexer.java @@ -547,7 +547,7 @@ public int lookupId(String name) @Override public LongColumnSelector makeLongColumnSelector( - DimensionSpec spec, IncrementalIndexStorageAdapter.EntryHolder currEntry, IncrementalIndex.DimensionDesc desc + IncrementalIndexStorageAdapter.EntryHolder currEntry, IncrementalIndex.DimensionDesc desc ) { return ZeroLongColumnSelector.instance(); @@ -555,7 +555,7 @@ public LongColumnSelector makeLongColumnSelector( @Override public FloatColumnSelector makeFloatColumnSelector( - DimensionSpec spec, IncrementalIndexStorageAdapter.EntryHolder currEntry, IncrementalIndex.DimensionDesc desc + IncrementalIndexStorageAdapter.EntryHolder currEntry, IncrementalIndex.DimensionDesc desc ) { return ZeroFloatColumnSelector.instance(); diff --git a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java index df15ef9c6ed5..dbbfa14eddab 100644 --- a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java +++ b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java @@ -27,7 +27,6 @@ import io.druid.java.util.common.guava.Sequence; import io.druid.java.util.common.guava.Sequences; import io.druid.query.QueryInterruptedException; -import io.druid.query.dimension.DefaultDimensionSpec; import io.druid.query.dimension.DimensionSpec; import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.Filter; @@ -363,23 +362,26 @@ private DimensionSelector makeDimensionSelectorUndecorated( return selector; } - ColumnCapabilities capabilities = getColumnCapabilities(dimension); - if (capabilities != null) { + final IncrementalIndex.DimensionDesc dimensionDesc = index.getDimension(dimensionSpec.getDimension()); + if (dimensionDesc == null) { + // not a dimension, column may be a metric + ColumnCapabilities capabilities = getColumnCapabilities(dimension); + if (capabilities == null) { + return NullDimensionSelector.instance(); + } if (capabilities.getType() == ValueType.LONG) { return new LongWrappingDimensionSelector(makeLongColumnSelector(dimension), extractionFn); } if (capabilities.getType() == ValueType.FLOAT) { return new FloatWrappingDimensionSelector(makeFloatColumnSelector(dimension), extractionFn); } - } - final IncrementalIndex.DimensionDesc dimensionDesc = index.getDimension(dimensionSpec.getDimension()); - if (dimensionDesc == null) { + // if we can't wrap the base column, just return a column of all nulls return NullDimensionSelector.instance(); + } else { + final DimensionIndexer indexer = dimensionDesc.getIndexer(); + return indexer.makeDimensionSelector(dimensionSpec, currEntry, dimensionDesc); } - - final DimensionIndexer indexer = dimensionDesc.getIndexer(); - return indexer.makeDimensionSelector(dimensionSpec, currEntry, dimensionDesc); } @Override @@ -394,7 +396,6 @@ public FloatColumnSelector makeFloatColumnSelector(String columnName) final IncrementalIndex.DimensionDesc dimensionDesc = index.getDimension(columnName); final DimensionIndexer indexer = dimensionDesc.getIndexer(); return indexer.makeFloatColumnSelector( - new DefaultDimensionSpec(columnName, null), currEntry, dimensionDesc ); @@ -439,7 +440,6 @@ public long get() final IncrementalIndex.DimensionDesc dimensionDesc = index.getDimension(columnName); final DimensionIndexer indexer = dimensionDesc.getIndexer(); return indexer.makeLongColumnSelector( - new DefaultDimensionSpec(columnName, null), currEntry, dimensionDesc ); From 2505271a995ac27687e3652e4ede260a2511d448 Mon Sep 17 00:00:00 2001 From: jon-wei Date: Tue, 28 Feb 2017 14:39:49 -0800 Subject: [PATCH 13/13] Remove spaces --- .../src/test/java/io/druid/query/scan/ScanQueryRunnerTest.java | 1 - .../src/main/java/io/druid/segment/StringDimensionIndexer.java | 1 - 2 files changed, 2 deletions(-) diff --git a/extensions-contrib/scan-query/src/test/java/io/druid/query/scan/ScanQueryRunnerTest.java b/extensions-contrib/scan-query/src/test/java/io/druid/query/scan/ScanQueryRunnerTest.java index 951c3e2c94a1..e19c94b84e48 100644 --- a/extensions-contrib/scan-query/src/test/java/io/druid/query/scan/ScanQueryRunnerTest.java +++ b/extensions-contrib/scan-query/src/test/java/io/druid/query/scan/ScanQueryRunnerTest.java @@ -144,7 +144,6 @@ public void testFullOnSelect() "indexMin", "indexMaxPlusTen", "quality_uniques" - ); ScanQuery query = newTestQuery() .intervals(I_0112_0114) diff --git a/processing/src/main/java/io/druid/segment/StringDimensionIndexer.java b/processing/src/main/java/io/druid/segment/StringDimensionIndexer.java index 9367bb79350f..0b12e24b6762 100644 --- a/processing/src/main/java/io/druid/segment/StringDimensionIndexer.java +++ b/processing/src/main/java/io/druid/segment/StringDimensionIndexer.java @@ -573,7 +573,6 @@ public ObjectColumnSelector makeObjectColumnSelector( class StringIndexerObjectColumnSelector implements ObjectColumnSelector { - @Override public Class classOfObject() {