Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions beeline/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,11 @@
<version>${powermock.version}</version>
<scope>test</scope>
</dependency>
<dependency>
Comment thread
zratkai marked this conversation as resolved.
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.postgresql</groupId>
<artifactId>postgresql</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ POSTHOOK: query: select summary from default.ice_meta_3.snapshots
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_meta_3
POSTHOOK: Output: hdfs://### HDFS PATH ###
{"added-data-files":"7","added-records":"15","added-files-size":"1329","changed-partition-count":"7","total-records":"15","total-files-size":"1329","total-data-files":"7","total-delete-files":"0","total-position-deletes":"0","total-equality-deletes":"0"}
{"added-data-files":"7","added-records":"15","added-files-size":"1378","changed-partition-count":"7","total-records":"15","total-files-size":"1378","total-data-files":"7","total-delete-files":"0","total-position-deletes":"0","total-equality-deletes":"0"}
PREHOOK: query: select summary['changed-partition-count'] from default.ice_meta_2.snapshots
PREHOOK: type: QUERY
PREHOOK: Input: default@ice_meta_2
Expand Down Expand Up @@ -551,7 +551,7 @@ POSTHOOK: query: select summary from default.ice_meta_3.snapshots
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_meta_3
POSTHOOK: Output: hdfs://### HDFS PATH ###
{"added-data-files":"7","added-records":"15","added-files-size":"1329","changed-partition-count":"7","total-records":"15","total-files-size":"1329","total-data-files":"7","total-delete-files":"0","total-position-deletes":"0","total-equality-deletes":"0"}
{"added-data-files":"7","added-records":"15","added-files-size":"1378","changed-partition-count":"7","total-records":"15","total-files-size":"1378","total-data-files":"7","total-delete-files":"0","total-position-deletes":"0","total-equality-deletes":"0"}
PREHOOK: query: select summary['changed-partition-count'] from default.ice_meta_2.snapshots
PREHOOK: type: QUERY
PREHOOK: Input: default@ice_meta_2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ public void testStatsAfterCompactionPartTbl() throws Exception {
.getParameters();
Assert.assertEquals("The number of files is differing from the expected", "2", parameters.get("numFiles"));
Assert.assertEquals("The number of rows is differing from the expected", "2", parameters.get("numRows"));
Assert.assertEquals("The total table size is differing from the expected", "1373", parameters.get("totalSize"));
Assert.assertEquals("The total table size is differing from the expected", "1396", parameters.get("totalSize"));

parameters = partitions
.stream()
Expand All @@ -400,7 +400,7 @@ public void testStatsAfterCompactionPartTbl() throws Exception {
.getParameters();
Assert.assertEquals("The number of files is differing from the expected", "2", parameters.get("numFiles"));
Assert.assertEquals("The number of rows is differing from the expected", "2", parameters.get("numRows"));
Assert.assertEquals("The total table size is differing from the expected", "1442", parameters.get("totalSize"));
Assert.assertEquals("The total table size is differing from the expected", "1453", parameters.get("totalSize"));

//Do a major compaction
CompactionRequest rqst = new CompactionRequest(dbName, tblName, CompactionType.MAJOR);
Expand All @@ -425,7 +425,7 @@ public void testStatsAfterCompactionPartTbl() throws Exception {
.getParameters();
Assert.assertEquals("The number of files is differing from the expected", "1", parameters.get("numFiles"));
Assert.assertEquals("The number of rows is differing from the expected", "2", parameters.get("numRows"));
Assert.assertEquals("The total table size is differing from the expected", "801", parameters.get("totalSize"));
Assert.assertEquals("The total table size is differing from the expected", "808", parameters.get("totalSize"));

parameters = partitions
.stream()
Expand All @@ -435,7 +435,7 @@ public void testStatsAfterCompactionPartTbl() throws Exception {
.getParameters();
Assert.assertEquals("The number of files is differing from the expected", "2", parameters.get("numFiles"));
Assert.assertEquals("The number of rows is differing from the expected", "2", parameters.get("numRows"));
Assert.assertEquals("The total table size is differing from the expected", "1442", parameters.get("totalSize"));
Assert.assertEquals("The total table size is differing from the expected", "1453", parameters.get("totalSize"));
}

/**
Expand Down Expand Up @@ -478,7 +478,7 @@ public void testStatsAfterCompactionTbl() throws Exception {
Map<String, String> parameters = Hive.get().getTable(tblName).getParameters();
Assert.assertEquals("The number of files is differing from the expected", "2", parameters.get("numFiles"));
Assert.assertEquals("The number of rows is differing from the expected", "2", parameters.get("numRows"));
Assert.assertEquals("The total table size is differing from the expected", "1434", parameters.get("totalSize"));
Assert.assertEquals("The total table size is differing from the expected", "1446", parameters.get("totalSize"));

//Do a major compaction
CompactionRequest rqst = new CompactionRequest(dbName, tblName, CompactionType.MAJOR);
Expand All @@ -496,7 +496,7 @@ public void testStatsAfterCompactionTbl() throws Exception {
parameters = Hive.get().getTable(tblName).getParameters();
Assert.assertEquals("The number of files is differing from the expected", "1", parameters.get("numFiles"));
Assert.assertEquals("The number of rows is differing from the expected", "2", parameters.get("numRows"));
Assert.assertEquals("The total table size is differing from the expected", "776", parameters.get("totalSize"));
Assert.assertEquals("The total table size is differing from the expected", "783", parameters.get("totalSize"));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,18 +177,18 @@ private void testRebalanceCompactionWithParallelDeleteAsSecond(boolean optimisti
"{\"writeid\":7,\"bucketid\":536870912,\"rowid\":4}\t13\t13",
},
{
"{\"writeid\":7,\"bucketid\":536936448,\"rowid\":6}\t3\t4",
"{\"writeid\":7,\"bucketid\":536936448,\"rowid\":7}\t4\t4",
"{\"writeid\":7,\"bucketid\":536936448,\"rowid\":8}\t2\t4",
"{\"writeid\":7,\"bucketid\":536936448,\"rowid\":6}\t2\t4",
"{\"writeid\":7,\"bucketid\":536936448,\"rowid\":7}\t3\t4",
"{\"writeid\":7,\"bucketid\":536936448,\"rowid\":8}\t4\t4",
"{\"writeid\":7,\"bucketid\":536936448,\"rowid\":9}\t5\t4",
"{\"writeid\":7,\"bucketid\":536936448,\"rowid\":10}\t6\t4",
"{\"writeid\":7,\"bucketid\":536936448,\"rowid\":11}\t4\t3",
"{\"writeid\":7,\"bucketid\":536936448,\"rowid\":11}\t5\t3",
},
{
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":12}\t2\t3",
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":13}\t3\t3",
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":14}\t6\t3",
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":15}\t5\t3",
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":12}\t6\t3",
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":13}\t4\t3",
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":14}\t2\t3",
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":15}\t3\t3",
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":16}\t6\t2",
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":17}\t5\t2",
},
Expand Down Expand Up @@ -231,18 +231,18 @@ public void testRebalanceCompactionOfNotPartitionedImplicitlyBucketedTableWithOr
"{\"writeid\":7,\"bucketid\":536870912,\"rowid\":5}\t12\t12",
},
{
"{\"writeid\":7,\"bucketid\":536936448,\"rowid\":6}\t3\t4",
"{\"writeid\":7,\"bucketid\":536936448,\"rowid\":7}\t4\t4",
"{\"writeid\":7,\"bucketid\":536936448,\"rowid\":8}\t2\t4",
"{\"writeid\":7,\"bucketid\":536936448,\"rowid\":6}\t2\t4",
"{\"writeid\":7,\"bucketid\":536936448,\"rowid\":7}\t3\t4",
"{\"writeid\":7,\"bucketid\":536936448,\"rowid\":8}\t4\t4",
"{\"writeid\":7,\"bucketid\":536936448,\"rowid\":9}\t5\t4",
"{\"writeid\":7,\"bucketid\":536936448,\"rowid\":10}\t6\t4",
"{\"writeid\":7,\"bucketid\":536936448,\"rowid\":11}\t4\t3",
"{\"writeid\":7,\"bucketid\":536936448,\"rowid\":11}\t5\t3",
},
{
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":12}\t2\t3",
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":13}\t3\t3",
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":14}\t6\t3",
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":15}\t5\t3",
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":12}\t6\t3",
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":13}\t4\t3",
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":14}\t2\t3",
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":15}\t3\t3",
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":16}\t6\t2",
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":17}\t5\t2",
},
Expand Down Expand Up @@ -519,6 +519,7 @@ private TestDataProvider prepareRebalanceTestData(String tableName) throws Excep
"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":3}\t6\t4",
"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":4}\t5\t2",
"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":5}\t5\t3",
"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":6}\t2\t4",
"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t12\t12",
"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t13\t13",
"{\"writeid\":4,\"bucketid\":536870912,\"rowid\":0}\t14\t14",
Expand All @@ -527,14 +528,13 @@ private TestDataProvider prepareRebalanceTestData(String tableName) throws Excep
"{\"writeid\":7,\"bucketid\":536870912,\"rowid\":0}\t17\t17",
},
{
"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t2\t4",
"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":1}\t3\t3",
"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":2}\t4\t4",
"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":3}\t4\t3",
"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t3\t3",
"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":1}\t4\t4",
"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":2}\t4\t3",
"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":3}\t2\t3",
},
{
"{\"writeid\":1,\"bucketid\":537001984,\"rowid\":0}\t2\t3",
"{\"writeid\":1,\"bucketid\":537001984,\"rowid\":1}\t3\t4",
"{\"writeid\":1,\"bucketid\":537001984,\"rowid\":0}\t3\t4",
},
};
AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf);
Expand Down Expand Up @@ -810,7 +810,7 @@ public void testStatsAfterQueryCompactionOnTez() throws Exception {
Map<String, String> parameters = Hive.get().getTable(tblName).getParameters();
Assert.assertEquals("The number of files is differing from the expected", "2", parameters.get("numFiles"));
Assert.assertEquals("The number of rows is differing from the expected", "2", parameters.get("numRows"));
Assert.assertEquals("The total table size is differing from the expected", "1434", parameters.get("totalSize"));
Assert.assertEquals("The total table size is differing from the expected", "1446", parameters.get("totalSize"));

//Do a major compaction
CompactorTestUtil.runCompaction(conf, dbName, tblName, CompactionType.MAJOR, true);
Expand All @@ -826,7 +826,7 @@ public void testStatsAfterQueryCompactionOnTez() throws Exception {
parameters = Hive.get().getTable(tblName).getParameters();
Assert.assertEquals("The number of files is differing from the expected", "1", parameters.get("numFiles"));
Assert.assertEquals("The number of rows is differing from the expected", "2", parameters.get("numRows"));
Assert.assertEquals("The total table size is differing from the expected", "727", parameters.get("totalSize"));
Assert.assertEquals("The total table size is differing from the expected", "735", parameters.get("totalSize"));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,13 @@
import org.apache.orc.impl.SchemaEvolution;
import org.apache.orc.impl.TreeReaderFactory;
import org.apache.orc.impl.TreeReaderFactory.StructTreeReader;
import org.apache.orc.impl.TreeReaderFactory.TreeReader;
import org.apache.orc.impl.WriterImpl;
import org.apache.orc.OrcProto;

import org.apache.orc.impl.reader.tree.TypeReader;

public class OrcEncodedDataConsumer
extends EncodedDataConsumer<OrcBatchKey, OrcEncodedColumnBatch> {
private TreeReaderFactory.TreeReader[] columnReaders;
private TypeReader[] columnReaders;
private int previousStripeIndex = -1;
private ConsumerFileMetadata fileMetadata; // We assume one request is only for one file.
private CompressionCodec codec;
Expand Down Expand Up @@ -191,9 +190,9 @@ protected void decodeBatch(OrcEncodedColumnBatch batch,
* it doesn't get confused.
*
*/
TreeReader reader = columnReaders[idx];
TypeReader reader = columnReaders[idx];
ColumnVector cv = prepareColumnVector(cvb, idx, batchSize);
reader.nextVector(cv, null, batchSize);
reader.nextVector(cv, null, batchSize, cvb.filterContext, TypeReader.ReadPhase.ALL);
}

// we are done reading a batch, send it to consumer for processing
Expand Down Expand Up @@ -299,18 +298,18 @@ private ColumnVector createColumn(TypeDescription type, int batchSize, final boo
}
}

private void positionInStreams(TreeReaderFactory.TreeReader[] columnReaders,
private void positionInStreams(TypeReader[] columnReaders,
OrcBatchKey batchKey, ConsumerStripeMetadata stripeMetadata) throws IOException {
PositionProvider[] pps = createPositionProviders(columnReaders, batchKey, stripeMetadata);
if (pps == null) return;
for (int i = 0; i < columnReaders.length; i++) {
if (columnReaders[i] == null) continue;
// TODO: we could/should trace seek destinations; pps needs a "peek" method
columnReaders[i].seek(pps);
columnReaders[i].seek(pps, TypeReader.ReadPhase.ALL);
}
}

private void repositionInStreams(TreeReaderFactory.TreeReader[] columnReaders,
private void repositionInStreams(TypeReader[] columnReaders,
EncodedColumnBatch<OrcBatchKey> batch, boolean sameStripe,
ConsumerStripeMetadata stripeMetadata) throws IOException {
PositionProvider[] pps = createPositionProviders(
Expand All @@ -320,7 +319,7 @@ private void repositionInStreams(TreeReaderFactory.TreeReader[] columnReaders,
}
if (pps == null) return;
for (int i = 0; i < columnReaders.length; i++) {
TreeReader reader = columnReaders[i];
TypeReader reader = columnReaders[i];
if (reader == null) continue;
// Note: we assume this never happens for SerDe reader - the batch would never have vectors.
// That is always true now; but it wasn't some day, the below would throw in getColumnData.
Expand All @@ -331,7 +330,7 @@ private void repositionInStreams(TreeReaderFactory.TreeReader[] columnReaders,
((EncodedTreeReaderFactory.TimestampStreamReader) reader)
.updateTimezone(stripeMetadata.getWriterTimezone());
}
reader.seek(pps);
reader.seek(pps, TypeReader.ReadPhase.ALL);
}
}

Expand All @@ -352,7 +351,7 @@ public String toString() {
}

private PositionProvider[] createPositionProviders(
TreeReaderFactory.TreeReader[] columnReaders, OrcBatchKey batchKey,
TypeReader[] columnReaders, OrcBatchKey batchKey,
ConsumerStripeMetadata stripeMetadata) throws IOException {
if (columnReaders.length == 0) return null;
PositionProvider[] pps = null;
Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
<postgres.version>42.5.1</postgres.version>
<oracle.version>21.3.0.0</oracle.version>
<opencsv.version>2.3</opencsv.version>
<orc.version>1.6.9</orc.version>
<orc.version>1.8.3</orc.version>
<mockito-core.version>3.4.4</mockito-core.version>
<powermock.version>2.0.2</powermock.version>
<mina.version>2.0.0-M5</mina.version>
Expand Down
Loading