From 9d36a65d88bf5d697fd368e99bd108ee8aa7e5dd Mon Sep 17 00:00:00 2001 From: Davide Pasetto Date: Mon, 13 Mar 2023 14:44:30 -0400 Subject: [PATCH 1/6] fix r, python and c_glib --- c_glib/arrow-glib/compute.hpp | 1 + c_glib/arrow-glib/expression.hpp | 2 +- python/pyarrow/includes/libarrow.pxd | 2 +- r/src/compute-exec.cpp | 2 +- r/src/expression.cpp | 2 +- 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp index 05a1de1a5682..025db98dc6c7 100644 --- a/c_glib/arrow-glib/compute.hpp +++ b/c_glib/arrow-glib/compute.hpp @@ -20,6 +20,7 @@ #pragma once #include +#include #include diff --git a/c_glib/arrow-glib/expression.hpp b/c_glib/arrow-glib/expression.hpp index ea872bb53302..8d6e2f8d117a 100644 --- a/c_glib/arrow-glib/expression.hpp +++ b/c_glib/arrow-glib/expression.hpp @@ -19,7 +19,7 @@ #pragma once -#include +#include #include diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 6e571740c6fa..bc8c99770e5b 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -2529,7 +2529,7 @@ cdef extern from * namespace "arrow::compute": cdef struct CKnownFieldValues "arrow::compute::KnownFieldValues": unordered_map[CFieldRef, CDatum, CFieldRefHash] map -cdef extern from "arrow/compute/exec/expression.h" \ +cdef extern from "arrow/compute/expression.h" \ namespace "arrow::compute" nogil: cdef cppclass CExpression "arrow::compute::Expression": diff --git a/r/src/compute-exec.cpp b/r/src/compute-exec.cpp index 5dafc212734b..eddb91dc8a19 100644 --- a/r/src/compute-exec.cpp +++ b/r/src/compute-exec.cpp @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/r/src/expression.cpp b/r/src/expression.cpp index 35ef34ee8194..e9d08e13affe 100644 --- a/r/src/expression.cpp +++ b/r/src/expression.cpp @@ -18,7 +18,7 @@ #include "./arrow_types.h" #include -#include +#include namespace compute = ::arrow::compute; From fb20fd13a233038642f803bf9a04c01a85a2c6cc Mon Sep 17 00:00:00 2001 From: Davide Pasetto Date: Tue, 14 Mar 2023 11:09:57 -0400 Subject: [PATCH 2/6] some more cleanup and consolidation --- cpp/src/arrow/compute/exec/CMakeLists.txt | 5 - .../compute/exec/light_array_exec_test.cc | 172 ------------------ cpp/src/arrow/compute/light_array_test.cc | 151 +++++++++++++++ 3 files changed, 151 insertions(+), 177 deletions(-) delete mode 100644 cpp/src/arrow/compute/exec/light_array_exec_test.cc diff --git a/cpp/src/arrow/compute/exec/CMakeLists.txt b/cpp/src/arrow/compute/exec/CMakeLists.txt index 914840b0396c..f3f02d393c3c 100644 --- a/cpp/src/arrow/compute/exec/CMakeLists.txt +++ b/cpp/src/arrow/compute/exec/CMakeLists.txt @@ -69,11 +69,6 @@ add_arrow_compute_test(util_test SOURCES util_test.cc task_util_test.cc) -add_arrow_compute_test(light_array_exec_test - PREFIX - "arrow-compute" - SOURCES - light_array_exec_test.cc) add_arrow_benchmark(expression_benchmark PREFIX "arrow-compute") diff --git a/cpp/src/arrow/compute/exec/light_array_exec_test.cc b/cpp/src/arrow/compute/exec/light_array_exec_test.cc deleted file mode 100644 index cbc93299a5bb..000000000000 --- a/cpp/src/arrow/compute/exec/light_array_exec_test.cc +++ /dev/null @@ -1,172 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "arrow/compute/light_array.h" - -#include - -#include "arrow/compute/exec/test_util.h" -#include "arrow/testing/gtest_util.h" -#include "arrow/type.h" -#include "arrow/util/checked_cast.h" - -namespace arrow { -namespace compute { - -TEST(KeyColumnArray, FromExecBatch) { - ExecBatch batch = - ExecBatchFromJSON({int64(), boolean()}, "[[1, true], [2, false], [null, null]]"); - std::vector arrays; - ASSERT_OK(ColumnArraysFromExecBatch(batch, &arrays)); - - ASSERT_EQ(2, arrays.size()); - ASSERT_EQ(8, arrays[0].metadata().fixed_length); - ASSERT_EQ(0, arrays[1].metadata().fixed_length); - ASSERT_EQ(3, arrays[0].length()); - ASSERT_EQ(3, arrays[1].length()); - - ASSERT_OK(ColumnArraysFromExecBatch(batch, 1, 1, &arrays)); - - ASSERT_EQ(2, arrays.size()); - ASSERT_EQ(8, arrays[0].metadata().fixed_length); - ASSERT_EQ(0, arrays[1].metadata().fixed_length); - ASSERT_EQ(1, arrays[0].length()); - ASSERT_EQ(1, arrays[1].length()); -} - -TEST(ExecBatchBuilder, AppendBatches) { - std::unique_ptr owned_pool = MemoryPool::CreateDefault(); - MemoryPool* pool = owned_pool.get(); - ExecBatch batch_one = - ExecBatchFromJSON({int64(), boolean()}, "[[1, true], [2, false], [null, null]]"); - ExecBatch batch_two = - ExecBatchFromJSON({int64(), boolean()}, "[[null, true], [5, true], [6, false]]"); - ExecBatch combined = ExecBatchFromJSON( - {int64(), boolean()}, - "[[1, true], [2, false], [null, null], [null, true], [5, true], [6, false]]"); - { - ExecBatchBuilder builder; - uint16_t row_ids[3] = {0, 1, 2}; - ASSERT_OK(builder.AppendSelected(pool, batch_one, 3, row_ids, /*num_cols=*/2)); - ASSERT_OK(builder.AppendSelected(pool, batch_two, 3, row_ids, /*num_cols=*/2)); - ExecBatch built = builder.Flush(); - ASSERT_EQ(combined, built); - ASSERT_NE(0, pool->bytes_allocated()); - } - ASSERT_EQ(0, pool->bytes_allocated()); -} - -TEST(ExecBatchBuilder, AppendBatchesSomeRows) { - std::unique_ptr owned_pool = MemoryPool::CreateDefault(); - MemoryPool* pool = owned_pool.get(); - ExecBatch batch_one = - ExecBatchFromJSON({int64(), boolean()}, "[[1, true], [2, false], [null, null]]"); - ExecBatch batch_two = - ExecBatchFromJSON({int64(), boolean()}, "[[null, true], [5, true], [6, false]]"); - ExecBatch combined = ExecBatchFromJSON( - {int64(), boolean()}, "[[1, true], [2, false], [null, true], [5, true]]"); - { - ExecBatchBuilder builder; - uint16_t row_ids[2] = {0, 1}; - ASSERT_OK(builder.AppendSelected(pool, batch_one, 2, row_ids, /*num_cols=*/2)); - ASSERT_OK(builder.AppendSelected(pool, batch_two, 2, row_ids, /*num_cols=*/2)); - ExecBatch built = builder.Flush(); - ASSERT_EQ(combined, built); - ASSERT_NE(0, pool->bytes_allocated()); - } - ASSERT_EQ(0, pool->bytes_allocated()); -} - -TEST(ExecBatchBuilder, AppendBatchesSomeCols) { - std::unique_ptr owned_pool = MemoryPool::CreateDefault(); - MemoryPool* pool = owned_pool.get(); - ExecBatch batch_one = - ExecBatchFromJSON({int64(), boolean()}, "[[1, true], [2, false], [null, null]]"); - ExecBatch batch_two = - ExecBatchFromJSON({int64(), boolean()}, "[[null, true], [5, true], [6, false]]"); - ExecBatch first_col_only = - ExecBatchFromJSON({int64()}, "[[1], [2], [null], [null], [5], [6]]"); - ExecBatch last_col_only = ExecBatchFromJSON( - {boolean()}, "[[true], [false], [null], [true], [true], [false]]"); - { - ExecBatchBuilder builder; - uint16_t row_ids[3] = {0, 1, 2}; - int first_col_ids[1] = {0}; - ASSERT_OK(builder.AppendSelected(pool, batch_one, 3, row_ids, /*num_cols=*/1, - first_col_ids)); - ASSERT_OK(builder.AppendSelected(pool, batch_two, 3, row_ids, /*num_cols=*/1, - first_col_ids)); - ExecBatch built = builder.Flush(); - ASSERT_EQ(first_col_only, built); - ASSERT_NE(0, pool->bytes_allocated()); - } - { - ExecBatchBuilder builder; - uint16_t row_ids[3] = {0, 1, 2}; - // If we don't specify col_ids and num_cols is 1 it is implicitly the first col - ASSERT_OK(builder.AppendSelected(pool, batch_one, 3, row_ids, /*num_cols=*/1)); - ASSERT_OK(builder.AppendSelected(pool, batch_two, 3, row_ids, /*num_cols=*/1)); - ExecBatch built = builder.Flush(); - ASSERT_EQ(first_col_only, built); - ASSERT_NE(0, pool->bytes_allocated()); - } - { - ExecBatchBuilder builder; - uint16_t row_ids[3] = {0, 1, 2}; - int last_col_ids[1] = {1}; - ASSERT_OK(builder.AppendSelected(pool, batch_one, 3, row_ids, /*num_cols=*/1, - last_col_ids)); - ASSERT_OK(builder.AppendSelected(pool, batch_two, 3, row_ids, /*num_cols=*/1, - last_col_ids)); - ExecBatch built = builder.Flush(); - ASSERT_EQ(last_col_only, built); - ASSERT_NE(0, pool->bytes_allocated()); - } - ASSERT_EQ(0, pool->bytes_allocated()); -} - -TEST(ExecBatchBuilder, AppendNulls) { - std::unique_ptr owned_pool = MemoryPool::CreateDefault(); - MemoryPool* pool = owned_pool.get(); - ExecBatch batch_one = - ExecBatchFromJSON({int64(), boolean()}, "[[1, true], [2, false], [null, null]]"); - ExecBatch combined = ExecBatchFromJSON( - {int64(), boolean()}, - "[[1, true], [2, false], [null, null], [null, null], [null, null]]"); - ExecBatch just_nulls = - ExecBatchFromJSON({int64(), boolean()}, "[[null, null], [null, null]]"); - { - ExecBatchBuilder builder; - uint16_t row_ids[3] = {0, 1, 2}; - ASSERT_OK(builder.AppendSelected(pool, batch_one, 3, row_ids, /*num_cols=*/2)); - ASSERT_OK(builder.AppendNulls(pool, {int64(), boolean()}, 2)); - ExecBatch built = builder.Flush(); - ASSERT_EQ(combined, built); - ASSERT_NE(0, pool->bytes_allocated()); - } - { - ExecBatchBuilder builder; - ASSERT_OK(builder.AppendNulls(pool, {int64(), boolean()}, 2)); - ExecBatch built = builder.Flush(); - ASSERT_EQ(just_nulls, built); - ASSERT_NE(0, pool->bytes_allocated()); - } - ASSERT_EQ(0, pool->bytes_allocated()); -} - -} // namespace compute -} // namespace arrow diff --git a/cpp/src/arrow/compute/light_array_test.cc b/cpp/src/arrow/compute/light_array_test.cc index c5e83b546b64..70bcdf3bf7e2 100644 --- a/cpp/src/arrow/compute/light_array_test.cc +++ b/cpp/src/arrow/compute/light_array_test.cc @@ -24,6 +24,7 @@ #include "arrow/testing/gtest_util.h" #include "arrow/type.h" #include "arrow/util/checked_cast.h" +#include "arrow/util/vector.h" namespace arrow { namespace compute { @@ -33,6 +34,15 @@ const std::vector> kSampleFixedDataTypes = { uint16(), uint32(), uint64(), decimal128(38, 6), decimal256(76, 6)}; const std::vector> kSampleBinaryTypes = {utf8(), binary()}; +ExecBatch ExecBatchFromJSON(const std::vector& types, std::string_view json) { + auto fields = ::arrow::internal::MapVector( + [](const TypeHolder& th) { return field("", th.GetSharedPtr()); }, types); + + ExecBatch batch{*RecordBatchFromJSON(schema(std::move(fields)), json)}; + + return batch; +} + TEST(KeyColumnMetadata, FromDataType) { KeyColumnMetadata metadata = ColumnMetadataFromDataType(boolean()).ValueOrDie(); ASSERT_EQ(0, metadata.fixed_length); @@ -336,5 +346,146 @@ TEST(ExecBatchBuilder, AppendValuesBeyondLimit) { ASSERT_EQ(0, pool->bytes_allocated()); } +TEST(KeyColumnArray, FromExecBatch) { + ExecBatch batch = + ExecBatchFromJSON({int64(), boolean()}, "[[1, true], [2, false], [null, null]]"); + std::vector arrays; + ASSERT_OK(ColumnArraysFromExecBatch(batch, &arrays)); + + ASSERT_EQ(2, arrays.size()); + ASSERT_EQ(8, arrays[0].metadata().fixed_length); + ASSERT_EQ(0, arrays[1].metadata().fixed_length); + ASSERT_EQ(3, arrays[0].length()); + ASSERT_EQ(3, arrays[1].length()); + + ASSERT_OK(ColumnArraysFromExecBatch(batch, 1, 1, &arrays)); + + ASSERT_EQ(2, arrays.size()); + ASSERT_EQ(8, arrays[0].metadata().fixed_length); + ASSERT_EQ(0, arrays[1].metadata().fixed_length); + ASSERT_EQ(1, arrays[0].length()); + ASSERT_EQ(1, arrays[1].length()); +} + +TEST(ExecBatchBuilder, AppendBatches) { + std::unique_ptr owned_pool = MemoryPool::CreateDefault(); + MemoryPool* pool = owned_pool.get(); + ExecBatch batch_one = + ExecBatchFromJSON({int64(), boolean()}, "[[1, true], [2, false], [null, null]]"); + ExecBatch batch_two = + ExecBatchFromJSON({int64(), boolean()}, "[[null, true], [5, true], [6, false]]"); + ExecBatch combined = ExecBatchFromJSON( + {int64(), boolean()}, + "[[1, true], [2, false], [null, null], [null, true], [5, true], [6, false]]"); + { + ExecBatchBuilder builder; + uint16_t row_ids[3] = {0, 1, 2}; + ASSERT_OK(builder.AppendSelected(pool, batch_one, 3, row_ids, /*num_cols=*/2)); + ASSERT_OK(builder.AppendSelected(pool, batch_two, 3, row_ids, /*num_cols=*/2)); + ExecBatch built = builder.Flush(); + ASSERT_EQ(combined, built); + ASSERT_NE(0, pool->bytes_allocated()); + } + ASSERT_EQ(0, pool->bytes_allocated()); +} + +TEST(ExecBatchBuilder, AppendBatchesSomeRows) { + std::unique_ptr owned_pool = MemoryPool::CreateDefault(); + MemoryPool* pool = owned_pool.get(); + ExecBatch batch_one = + ExecBatchFromJSON({int64(), boolean()}, "[[1, true], [2, false], [null, null]]"); + ExecBatch batch_two = + ExecBatchFromJSON({int64(), boolean()}, "[[null, true], [5, true], [6, false]]"); + ExecBatch combined = ExecBatchFromJSON( + {int64(), boolean()}, "[[1, true], [2, false], [null, true], [5, true]]"); + { + ExecBatchBuilder builder; + uint16_t row_ids[2] = {0, 1}; + ASSERT_OK(builder.AppendSelected(pool, batch_one, 2, row_ids, /*num_cols=*/2)); + ASSERT_OK(builder.AppendSelected(pool, batch_two, 2, row_ids, /*num_cols=*/2)); + ExecBatch built = builder.Flush(); + ASSERT_EQ(combined, built); + ASSERT_NE(0, pool->bytes_allocated()); + } + ASSERT_EQ(0, pool->bytes_allocated()); +} + +TEST(ExecBatchBuilder, AppendBatchesSomeCols) { + std::unique_ptr owned_pool = MemoryPool::CreateDefault(); + MemoryPool* pool = owned_pool.get(); + ExecBatch batch_one = + ExecBatchFromJSON({int64(), boolean()}, "[[1, true], [2, false], [null, null]]"); + ExecBatch batch_two = + ExecBatchFromJSON({int64(), boolean()}, "[[null, true], [5, true], [6, false]]"); + ExecBatch first_col_only = + ExecBatchFromJSON({int64()}, "[[1], [2], [null], [null], [5], [6]]"); + ExecBatch last_col_only = ExecBatchFromJSON( + {boolean()}, "[[true], [false], [null], [true], [true], [false]]"); + { + ExecBatchBuilder builder; + uint16_t row_ids[3] = {0, 1, 2}; + int first_col_ids[1] = {0}; + ASSERT_OK(builder.AppendSelected(pool, batch_one, 3, row_ids, /*num_cols=*/1, + first_col_ids)); + ASSERT_OK(builder.AppendSelected(pool, batch_two, 3, row_ids, /*num_cols=*/1, + first_col_ids)); + ExecBatch built = builder.Flush(); + ASSERT_EQ(first_col_only, built); + ASSERT_NE(0, pool->bytes_allocated()); + } + { + ExecBatchBuilder builder; + uint16_t row_ids[3] = {0, 1, 2}; + // If we don't specify col_ids and num_cols is 1 it is implicitly the first col + ASSERT_OK(builder.AppendSelected(pool, batch_one, 3, row_ids, /*num_cols=*/1)); + ASSERT_OK(builder.AppendSelected(pool, batch_two, 3, row_ids, /*num_cols=*/1)); + ExecBatch built = builder.Flush(); + ASSERT_EQ(first_col_only, built); + ASSERT_NE(0, pool->bytes_allocated()); + } + { + ExecBatchBuilder builder; + uint16_t row_ids[3] = {0, 1, 2}; + int last_col_ids[1] = {1}; + ASSERT_OK(builder.AppendSelected(pool, batch_one, 3, row_ids, /*num_cols=*/1, + last_col_ids)); + ASSERT_OK(builder.AppendSelected(pool, batch_two, 3, row_ids, /*num_cols=*/1, + last_col_ids)); + ExecBatch built = builder.Flush(); + ASSERT_EQ(last_col_only, built); + ASSERT_NE(0, pool->bytes_allocated()); + } + ASSERT_EQ(0, pool->bytes_allocated()); +} + +TEST(ExecBatchBuilder, AppendNulls) { + std::unique_ptr owned_pool = MemoryPool::CreateDefault(); + MemoryPool* pool = owned_pool.get(); + ExecBatch batch_one = + ExecBatchFromJSON({int64(), boolean()}, "[[1, true], [2, false], [null, null]]"); + ExecBatch combined = ExecBatchFromJSON( + {int64(), boolean()}, + "[[1, true], [2, false], [null, null], [null, null], [null, null]]"); + ExecBatch just_nulls = + ExecBatchFromJSON({int64(), boolean()}, "[[null, null], [null, null]]"); + { + ExecBatchBuilder builder; + uint16_t row_ids[3] = {0, 1, 2}; + ASSERT_OK(builder.AppendSelected(pool, batch_one, 3, row_ids, /*num_cols=*/2)); + ASSERT_OK(builder.AppendNulls(pool, {int64(), boolean()}, 2)); + ExecBatch built = builder.Flush(); + ASSERT_EQ(combined, built); + ASSERT_NE(0, pool->bytes_allocated()); + } + { + ExecBatchBuilder builder; + ASSERT_OK(builder.AppendNulls(pool, {int64(), boolean()}, 2)); + ExecBatch built = builder.Flush(); + ASSERT_EQ(just_nulls, built); + ASSERT_NE(0, pool->bytes_allocated()); + } + ASSERT_EQ(0, pool->bytes_allocated()); +} + } // namespace compute } // namespace arrow From 290dd9f51c0dfd640f6e14514f2773c358def70e Mon Sep 17 00:00:00 2001 From: Davide Pasetto Date: Sun, 12 Mar 2023 10:25:54 -0400 Subject: [PATCH 3/6] move expression out of exec subdirectory --- .../arrow/compute_register_example.cc | 2 +- .../arrow/dataset_documentation_example.cc | 2 +- .../arrow/dataset_parquet_scan_example.cc | 2 +- .../arrow/dataset_skyhook_scan_example.cc | 2 +- cpp/examples/arrow/join_example.cc | 2 +- cpp/src/arrow/CMakeLists.txt | 2 +- cpp/src/arrow/compute/CMakeLists.txt | 3 +- cpp/src/arrow/compute/api.h | 14 ++--- cpp/src/arrow/compute/exec.h | 2 +- cpp/src/arrow/compute/exec/CMakeLists.txt | 3 +- cpp/src/arrow/compute/exec/exec_plan.cc | 2 +- cpp/src/arrow/compute/exec/exec_plan.h | 2 +- .../compute/exec/expression_benchmark.cc | 2 +- cpp/src/arrow/compute/exec/fetch_node.cc | 2 +- .../arrow/compute/exec/filter_benchmark.cc | 2 +- cpp/src/arrow/compute/exec/filter_node.cc | 2 +- cpp/src/arrow/compute/exec/map_node.cc | 2 +- cpp/src/arrow/compute/exec/options.h | 3 +- cpp/src/arrow/compute/exec/plan_test.cc | 2 +- .../arrow/compute/exec/project_benchmark.cc | 2 +- cpp/src/arrow/compute/exec/project_node.cc | 2 +- cpp/src/arrow/compute/exec/sink_node.cc | 2 +- cpp/src/arrow/compute/exec/source_node.cc | 2 +- cpp/src/arrow/compute/exec/subtree_internal.h | 2 +- cpp/src/arrow/compute/exec/tpch_node.h | 2 +- cpp/src/arrow/compute/exec/type_fwd.h | 36 +++++++++++++ cpp/src/arrow/compute/exec/util.h | 51 +----------------- .../arrow/compute/{exec => }/expression.cc | 6 +-- cpp/src/arrow/compute/{exec => }/expression.h | 0 .../compute/{exec => }/expression_internal.h | 2 +- .../compute/{exec => }/expression_test.cc | 4 +- cpp/src/arrow/compute/type_fwd.h | 16 +++--- cpp/src/arrow/compute/util.h | 52 +++++++++++++++++++ cpp/src/arrow/dataset/api.h | 2 +- cpp/src/arrow/dataset/dataset.h | 2 +- cpp/src/arrow/dataset/file_benchmark.cc | 2 +- cpp/src/arrow/dataset/file_json.cc | 2 +- cpp/src/arrow/dataset/partition.cc | 2 +- cpp/src/arrow/dataset/partition.h | 2 +- cpp/src/arrow/dataset/scan_node.cc | 4 +- cpp/src/arrow/dataset/scanner.h | 2 +- cpp/src/arrow/dataset/scanner_test.cc | 2 +- cpp/src/arrow/dataset/test_util_internal.h | 2 +- .../engine/substrait/expression_internal.cc | 4 +- .../arrow/engine/substrait/extension_set.h | 2 +- .../engine/substrait/relation_internal.cc | 2 +- cpp/src/arrow/engine/substrait/serde.cc | 2 +- cpp/src/arrow/engine/substrait/serde_test.cc | 4 +- cpp/src/skyhook/client/file_skyhook.cc | 2 +- cpp/src/skyhook/cls/cls_skyhook.cc | 2 +- cpp/src/skyhook/cls/cls_skyhook_test.cc | 2 +- cpp/src/skyhook/protocol/skyhook_protocol.h | 2 +- .../skyhook/protocol/skyhook_protocol_test.cc | 2 +- 53 files changed, 156 insertions(+), 122 deletions(-) create mode 100644 cpp/src/arrow/compute/exec/type_fwd.h rename cpp/src/arrow/compute/{exec => }/expression.cc (99%) rename cpp/src/arrow/compute/{exec => }/expression.h (100%) rename cpp/src/arrow/compute/{exec => }/expression_internal.h (99%) rename cpp/src/arrow/compute/{exec => }/expression_test.cc (99%) diff --git a/cpp/examples/arrow/compute_register_example.cc b/cpp/examples/arrow/compute_register_example.cc index 1df34f174597..a97c17ac7048 100644 --- a/cpp/examples/arrow/compute_register_example.cc +++ b/cpp/examples/arrow/compute_register_example.cc @@ -18,10 +18,10 @@ #include #include #include -#include #include #include #include +#include "arrow/compute/expression.h" #include #include diff --git a/cpp/examples/arrow/dataset_documentation_example.cc b/cpp/examples/arrow/dataset_documentation_example.cc index 56c796ee47e6..c78f6d598492 100644 --- a/cpp/examples/arrow/dataset_documentation_example.cc +++ b/cpp/examples/arrow/dataset_documentation_example.cc @@ -20,7 +20,6 @@ #include #include -#include #include #include #include @@ -31,6 +30,7 @@ #include #include #include +#include "arrow/compute/expression.h" #include #include diff --git a/cpp/examples/arrow/dataset_parquet_scan_example.cc b/cpp/examples/arrow/dataset_parquet_scan_example.cc index 265b75480dfd..2a2a31245de3 100644 --- a/cpp/examples/arrow/dataset_parquet_scan_example.cc +++ b/cpp/examples/arrow/dataset_parquet_scan_example.cc @@ -16,7 +16,6 @@ // under the License. #include -#include #include #include #include @@ -24,6 +23,7 @@ #include #include #include +#include "arrow/compute/expression.h" #include #include diff --git a/cpp/examples/arrow/dataset_skyhook_scan_example.cc b/cpp/examples/arrow/dataset_skyhook_scan_example.cc index 2d391723bd09..a32a6f5c4fe8 100644 --- a/cpp/examples/arrow/dataset_skyhook_scan_example.cc +++ b/cpp/examples/arrow/dataset_skyhook_scan_example.cc @@ -16,7 +16,6 @@ // under the License. #include -#include #include #include #include @@ -24,6 +23,7 @@ #include #include #include +#include "arrow/compute/expression.h" #include #include diff --git a/cpp/examples/arrow/join_example.cc b/cpp/examples/arrow/join_example.cc index eb7a8678a6e9..d912399b3c8e 100644 --- a/cpp/examples/arrow/join_example.cc +++ b/cpp/examples/arrow/join_example.cc @@ -21,8 +21,8 @@ #include #include #include -#include #include +#include "arrow/compute/expression.h" #include #include diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 4eef856cb9e5..377423403e63 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -395,7 +395,7 @@ list(APPEND compute/exec/asof_join_node.cc compute/exec/bloom_filter.cc compute/exec/exec_plan.cc - compute/exec/expression.cc + compute/expression.cc compute/exec/fetch_node.cc compute/exec/filter_node.cc compute/exec/hash_join.cc diff --git a/cpp/src/arrow/compute/CMakeLists.txt b/cpp/src/arrow/compute/CMakeLists.txt index 4accec15d9c5..3a7c30d68dd4 100644 --- a/cpp/src/arrow/compute/CMakeLists.txt +++ b/cpp/src/arrow/compute/CMakeLists.txt @@ -83,7 +83,8 @@ add_arrow_compute_test(internals_test kernel_test.cc light_array_test.cc registry_test.cc - key_hash_test.cc) + key_hash_test.cc + expression_test.cc) add_arrow_benchmark(function_benchmark PREFIX "arrow-compute") diff --git a/cpp/src/arrow/compute/api.h b/cpp/src/arrow/compute/api.h index ef593bad9377..7e7ec36c1768 100644 --- a/cpp/src/arrow/compute/api.h +++ b/cpp/src/arrow/compute/api.h @@ -38,13 +38,7 @@ /// @{ /// @} -#include "arrow/compute/exec/expression.h" // IWYU pragma: export - -/// \defgroup execnode-options Concrete option classes for ExecNode options -/// @{ -/// @} - -#include "arrow/compute/exec/options.h" // IWYU pragma: export +#include "arrow/compute/expression.h" // IWYU pragma: export /// \defgroup execnode-row Utilities for working with data in a row-major format /// @{ @@ -52,10 +46,8 @@ #include "arrow/compute/row/grouper.h" // IWYU pragma: export -/// \defgroup execnode-components Components associated with ExecNode +/// \defgroup execnode-components Components associated with ExecBatch /// @{ /// @} -#include "arrow/compute/exec.h" // IWYU pragma: export -#include "arrow/compute/exec/exec_plan.h" // IWYU pragma: export -#include "arrow/compute/exec/groupby.h" // IWYU pragma: export +#include "arrow/compute/exec.h" // IWYU pragma: export diff --git a/cpp/src/arrow/compute/exec.h b/cpp/src/arrow/compute/exec.h index 338740f066ee..d583f0e62c78 100644 --- a/cpp/src/arrow/compute/exec.h +++ b/cpp/src/arrow/compute/exec.h @@ -30,7 +30,7 @@ #include #include "arrow/array/data.h" -#include "arrow/compute/exec/expression.h" +#include "arrow/compute/expression.h" #include "arrow/compute/type_fwd.h" #include "arrow/datum.h" #include "arrow/result.h" diff --git a/cpp/src/arrow/compute/exec/CMakeLists.txt b/cpp/src/arrow/compute/exec/CMakeLists.txt index f3f02d393c3c..d77333caca5f 100644 --- a/cpp/src/arrow/compute/exec/CMakeLists.txt +++ b/cpp/src/arrow/compute/exec/CMakeLists.txt @@ -17,12 +17,11 @@ arrow_install_all_headers("arrow/compute/exec") -add_arrow_compute_test(expression_test +add_arrow_compute_test(subtree_test REQUIRE_ALL_KERNELS PREFIX "arrow-compute" SOURCES - expression_test.cc subtree_test.cc) add_arrow_compute_test(plan_test diff --git a/cpp/src/arrow/compute/exec/exec_plan.cc b/cpp/src/arrow/compute/exec/exec_plan.cc index c7e745d7a809..0fd5ff231068 100644 --- a/cpp/src/arrow/compute/exec/exec_plan.cc +++ b/cpp/src/arrow/compute/exec/exec_plan.cc @@ -24,11 +24,11 @@ #include #include "arrow/compute/exec.h" -#include "arrow/compute/exec/expression.h" #include "arrow/compute/exec/options.h" #include "arrow/compute/exec/query_context.h" #include "arrow/compute/exec/task_util.h" #include "arrow/compute/exec/util.h" +#include "arrow/compute/expression.h" #include "arrow/compute/registry.h" #include "arrow/datum.h" #include "arrow/record_batch.h" diff --git a/cpp/src/arrow/compute/exec/exec_plan.h b/cpp/src/arrow/compute/exec/exec_plan.h index 7f9c19938d20..b51f38cfa927 100644 --- a/cpp/src/arrow/compute/exec/exec_plan.h +++ b/cpp/src/arrow/compute/exec/exec_plan.h @@ -28,8 +28,8 @@ #include "arrow/compute/api_vector.h" #include "arrow/compute/exec.h" +#include "arrow/compute/exec/type_fwd.h" #include "arrow/compute/ordering.h" -#include "arrow/compute/type_fwd.h" #include "arrow/type_fwd.h" #include "arrow/util/future.h" #include "arrow/util/macros.h" diff --git a/cpp/src/arrow/compute/exec/expression_benchmark.cc b/cpp/src/arrow/compute/exec/expression_benchmark.cc index e431497e45b4..ca418b3eca0e 100644 --- a/cpp/src/arrow/compute/exec/expression_benchmark.cc +++ b/cpp/src/arrow/compute/exec/expression_benchmark.cc @@ -20,8 +20,8 @@ #include #include "arrow/compute/cast.h" -#include "arrow/compute/exec/expression.h" #include "arrow/compute/exec/test_util.h" +#include "arrow/compute/expression.h" #include "arrow/dataset/partition.h" #include "arrow/testing/generator.h" #include "arrow/testing/gtest_util.h" diff --git a/cpp/src/arrow/compute/exec/fetch_node.cc b/cpp/src/arrow/compute/exec/fetch_node.cc index 1c4ec0aaded8..7ab44a3391f9 100644 --- a/cpp/src/arrow/compute/exec/fetch_node.cc +++ b/cpp/src/arrow/compute/exec/fetch_node.cc @@ -21,11 +21,11 @@ #include "arrow/compute/exec.h" #include "arrow/compute/exec/accumulation_queue.h" #include "arrow/compute/exec/exec_plan.h" -#include "arrow/compute/exec/expression.h" #include "arrow/compute/exec/map_node.h" #include "arrow/compute/exec/options.h" #include "arrow/compute/exec/query_context.h" #include "arrow/compute/exec/util.h" +#include "arrow/compute/expression.h" #include "arrow/datum.h" #include "arrow/result.h" #include "arrow/util/checked_cast.h" diff --git a/cpp/src/arrow/compute/exec/filter_benchmark.cc b/cpp/src/arrow/compute/exec/filter_benchmark.cc index aa8e3e8b77dd..bbeb205ca777 100644 --- a/cpp/src/arrow/compute/exec/filter_benchmark.cc +++ b/cpp/src/arrow/compute/exec/filter_benchmark.cc @@ -20,8 +20,8 @@ #include "benchmark/benchmark.h" #include "arrow/compute/exec/benchmark_util.h" -#include "arrow/compute/exec/expression.h" #include "arrow/compute/exec/options.h" +#include "arrow/compute/expression.h" #include "arrow/record_batch.h" #include "arrow/testing/random.h" diff --git a/cpp/src/arrow/compute/exec/filter_node.cc b/cpp/src/arrow/compute/exec/filter_node.cc index b6877f106dca..4162f899fdbe 100644 --- a/cpp/src/arrow/compute/exec/filter_node.cc +++ b/cpp/src/arrow/compute/exec/filter_node.cc @@ -18,10 +18,10 @@ #include "arrow/compute/api_vector.h" #include "arrow/compute/exec.h" #include "arrow/compute/exec/exec_plan.h" -#include "arrow/compute/exec/expression.h" #include "arrow/compute/exec/map_node.h" #include "arrow/compute/exec/options.h" #include "arrow/compute/exec/query_context.h" +#include "arrow/compute/expression.h" #include "arrow/datum.h" #include "arrow/result.h" #include "arrow/util/checked_cast.h" diff --git a/cpp/src/arrow/compute/exec/map_node.cc b/cpp/src/arrow/compute/exec/map_node.cc index 5bb1c0f36dd9..79abcb88f10e 100644 --- a/cpp/src/arrow/compute/exec/map_node.cc +++ b/cpp/src/arrow/compute/exec/map_node.cc @@ -24,7 +24,7 @@ #include #include "arrow/compute/exec.h" -#include "arrow/compute/exec/expression.h" +#include "arrow/compute/expression.h" #include "arrow/result.h" #include "arrow/status.h" #include "arrow/util/logging.h" diff --git a/cpp/src/arrow/compute/exec/options.h b/cpp/src/arrow/compute/exec/options.h index b0057d73da16..2e44015de6c1 100644 --- a/cpp/src/arrow/compute/exec/options.h +++ b/cpp/src/arrow/compute/exec/options.h @@ -26,7 +26,8 @@ #include "arrow/compute/api_aggregate.h" #include "arrow/compute/api_vector.h" #include "arrow/compute/exec.h" -#include "arrow/compute/exec/expression.h" +#include "arrow/compute/exec/type_fwd.h" +#include "arrow/compute/expression.h" #include "arrow/record_batch.h" #include "arrow/result.h" #include "arrow/util/async_generator.h" diff --git a/cpp/src/arrow/compute/exec/plan_test.cc b/cpp/src/arrow/compute/exec/plan_test.cc index 9eed429b3dac..2dc221bb367d 100644 --- a/cpp/src/arrow/compute/exec/plan_test.cc +++ b/cpp/src/arrow/compute/exec/plan_test.cc @@ -22,11 +22,11 @@ #include "arrow/compute/exec.h" #include "arrow/compute/exec/exec_plan.h" -#include "arrow/compute/exec/expression.h" #include "arrow/compute/exec/options.h" #include "arrow/compute/exec/test_nodes.h" #include "arrow/compute/exec/test_util.h" #include "arrow/compute/exec/util.h" +#include "arrow/compute/expression.h" #include "arrow/io/util_internal.h" #include "arrow/record_batch.h" #include "arrow/table.h" diff --git a/cpp/src/arrow/compute/exec/project_benchmark.cc b/cpp/src/arrow/compute/exec/project_benchmark.cc index 9414fa890590..d289353ca077 100644 --- a/cpp/src/arrow/compute/exec/project_benchmark.cc +++ b/cpp/src/arrow/compute/exec/project_benchmark.cc @@ -23,10 +23,10 @@ #include "arrow/compute/cast.h" #include "arrow/compute/exec.h" #include "arrow/compute/exec/benchmark_util.h" -#include "arrow/compute/exec/expression.h" #include "arrow/compute/exec/options.h" #include "arrow/compute/exec/task_util.h" #include "arrow/compute/exec/test_util.h" +#include "arrow/compute/expression.h" #include "arrow/dataset/partition.h" #include "arrow/testing/future_util.h" #include "arrow/testing/generator.h" diff --git a/cpp/src/arrow/compute/exec/project_node.cc b/cpp/src/arrow/compute/exec/project_node.cc index 3bff2d6ee3d4..56d3474aa769 100644 --- a/cpp/src/arrow/compute/exec/project_node.cc +++ b/cpp/src/arrow/compute/exec/project_node.cc @@ -20,11 +20,11 @@ #include "arrow/compute/api_vector.h" #include "arrow/compute/exec.h" #include "arrow/compute/exec/exec_plan.h" -#include "arrow/compute/exec/expression.h" #include "arrow/compute/exec/map_node.h" #include "arrow/compute/exec/options.h" #include "arrow/compute/exec/query_context.h" #include "arrow/compute/exec/util.h" +#include "arrow/compute/expression.h" #include "arrow/datum.h" #include "arrow/result.h" #include "arrow/util/checked_cast.h" diff --git a/cpp/src/arrow/compute/exec/sink_node.cc b/cpp/src/arrow/compute/exec/sink_node.cc index 79c247282269..f3fe1252cec3 100644 --- a/cpp/src/arrow/compute/exec/sink_node.cc +++ b/cpp/src/arrow/compute/exec/sink_node.cc @@ -25,12 +25,12 @@ #include "arrow/compute/exec.h" #include "arrow/compute/exec/accumulation_queue.h" #include "arrow/compute/exec/exec_plan.h" -#include "arrow/compute/exec/expression.h" #include "arrow/compute/exec/options.h" #include "arrow/compute/exec/order_by_impl.h" #include "arrow/compute/exec/query_context.h" #include "arrow/compute/exec/util.h" #include "arrow/compute/exec_internal.h" +#include "arrow/compute/expression.h" #include "arrow/datum.h" #include "arrow/result.h" #include "arrow/table.h" diff --git a/cpp/src/arrow/compute/exec/source_node.cc b/cpp/src/arrow/compute/exec/source_node.cc index 1730ee4e9304..310c0c647404 100644 --- a/cpp/src/arrow/compute/exec/source_node.cc +++ b/cpp/src/arrow/compute/exec/source_node.cc @@ -21,11 +21,11 @@ #include "arrow/compute/exec.h" #include "arrow/compute/exec/exec_plan.h" -#include "arrow/compute/exec/expression.h" #include "arrow/compute/exec/options.h" #include "arrow/compute/exec/query_context.h" #include "arrow/compute/exec/util.h" #include "arrow/compute/exec_internal.h" +#include "arrow/compute/expression.h" #include "arrow/datum.h" #include "arrow/io/util_internal.h" #include "arrow/result.h" diff --git a/cpp/src/arrow/compute/exec/subtree_internal.h b/cpp/src/arrow/compute/exec/subtree_internal.h index 9e55af6068f3..ef0f19ac5cc3 100644 --- a/cpp/src/arrow/compute/exec/subtree_internal.h +++ b/cpp/src/arrow/compute/exec/subtree_internal.h @@ -24,7 +24,7 @@ #include #include -#include "arrow/compute/exec/expression.h" +#include "arrow/compute/expression.h" namespace arrow { namespace compute { diff --git a/cpp/src/arrow/compute/exec/tpch_node.h b/cpp/src/arrow/compute/exec/tpch_node.h index 061b66ca4361..c99d61d950b8 100644 --- a/cpp/src/arrow/compute/exec/tpch_node.h +++ b/cpp/src/arrow/compute/exec/tpch_node.h @@ -22,7 +22,7 @@ #include #include -#include "arrow/compute/type_fwd.h" +#include "arrow/compute/exec/type_fwd.h" #include "arrow/result.h" #include "arrow/status.h" diff --git a/cpp/src/arrow/compute/exec/type_fwd.h b/cpp/src/arrow/compute/exec/type_fwd.h new file mode 100644 index 000000000000..1c3d25205ede --- /dev/null +++ b/cpp/src/arrow/compute/exec/type_fwd.h @@ -0,0 +1,36 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "arrow/compute/type_fwd.h" + +namespace arrow { + +namespace compute { + +class ExecNode; +class ExecPlan; +class ExecNodeOptions; +class ExecFactoryRegistry; +class QueryContext; +struct QueryOptions; + +class SinkNodeConsumer; + +} // namespace compute +} // namespace arrow diff --git a/cpp/src/arrow/compute/exec/util.h b/cpp/src/arrow/compute/exec/util.h index 8e5001d99c12..0ef8ab264e08 100644 --- a/cpp/src/arrow/compute/exec/util.h +++ b/cpp/src/arrow/compute/exec/util.h @@ -25,9 +25,9 @@ #include #include "arrow/buffer.h" -#include "arrow/compute/exec/expression.h" #include "arrow/compute/exec/options.h" -#include "arrow/compute/type_fwd.h" +#include "arrow/compute/exec/type_fwd.h" +#include "arrow/compute/expression.h" #include "arrow/compute/util.h" #include "arrow/memory_pool.h" #include "arrow/result.h" @@ -186,53 +186,6 @@ class ARROW_EXPORT NullSinkNodeConsumer : public SinkNodeConsumer { } }; -/// Modify an Expression with pre-order and post-order visitation. -/// `pre` will be invoked on each Expression. `pre` will visit Calls before their -/// arguments, `post_call` will visit Calls (and no other Expressions) after their -/// arguments. Visitors should return the Identical expression to indicate no change; this -/// will prevent unnecessary construction in the common case where a modification is not -/// possible/necessary/... -/// -/// If an argument was modified, `post_call` visits a reconstructed Call with the modified -/// arguments but also receives a pointer to the unmodified Expression as a second -/// argument. If no arguments were modified the unmodified Expression* will be nullptr. -template -Result ModifyExpression(Expression expr, const PreVisit& pre, - const PostVisitCall& post_call) { - ARROW_ASSIGN_OR_RAISE(expr, Result(pre(std::move(expr)))); - - auto call = expr.call(); - if (!call) return expr; - - bool at_least_one_modified = false; - std::vector modified_arguments; - - for (size_t i = 0; i < call->arguments.size(); ++i) { - ARROW_ASSIGN_OR_RAISE(auto modified_argument, - ModifyExpression(call->arguments[i], pre, post_call)); - - if (Identical(modified_argument, call->arguments[i])) { - continue; - } - - if (!at_least_one_modified) { - modified_arguments = call->arguments; - at_least_one_modified = true; - } - - modified_arguments[i] = std::move(modified_argument); - } - - if (at_least_one_modified) { - // reconstruct the call expression with the modified arguments - auto modified_call = *call; - modified_call.arguments = std::move(modified_arguments); - return post_call(Expression(std::move(modified_call)), &expr); - } - - return post_call(std::move(expr), NULLPTR); -} - /// CRTP helper for tracing helper functions class ARROW_EXPORT TracedNode { diff --git a/cpp/src/arrow/compute/exec/expression.cc b/cpp/src/arrow/compute/expression.cc similarity index 99% rename from cpp/src/arrow/compute/exec/expression.cc rename to cpp/src/arrow/compute/expression.cc index b5e5bf60388c..5f057a4ab53a 100644 --- a/cpp/src/arrow/compute/exec/expression.cc +++ b/cpp/src/arrow/compute/expression.cc @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/compute/exec/expression.h" +#include "arrow/compute/expression.h" #include #include @@ -24,10 +24,10 @@ #include "arrow/chunked_array.h" #include "arrow/compute/api_vector.h" -#include "arrow/compute/exec/expression_internal.h" -#include "arrow/compute/exec/util.h" #include "arrow/compute/exec_internal.h" +#include "arrow/compute/expression_internal.h" #include "arrow/compute/function_internal.h" +#include "arrow/compute/util.h" #include "arrow/io/memory.h" #include "arrow/ipc/reader.h" #include "arrow/ipc/writer.h" diff --git a/cpp/src/arrow/compute/exec/expression.h b/cpp/src/arrow/compute/expression.h similarity index 100% rename from cpp/src/arrow/compute/exec/expression.h rename to cpp/src/arrow/compute/expression.h diff --git a/cpp/src/arrow/compute/exec/expression_internal.h b/cpp/src/arrow/compute/expression_internal.h similarity index 99% rename from cpp/src/arrow/compute/exec/expression_internal.h rename to cpp/src/arrow/compute/expression_internal.h index 9e29b8e27f97..083756dc5fd3 100644 --- a/cpp/src/arrow/compute/exec/expression_internal.h +++ b/cpp/src/arrow/compute/expression_internal.h @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/compute/exec/expression.h" +#include "arrow/compute/expression.h" #include #include diff --git a/cpp/src/arrow/compute/exec/expression_test.cc b/cpp/src/arrow/compute/expression_test.cc similarity index 99% rename from cpp/src/arrow/compute/exec/expression_test.cc rename to cpp/src/arrow/compute/expression_test.cc index 96069b2dd131..8c5dea40ae39 100644 --- a/cpp/src/arrow/compute/exec/expression_test.cc +++ b/cpp/src/arrow/compute/expression_test.cc @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/compute/exec/expression.h" +#include "arrow/compute/expression.h" #include #include @@ -27,7 +27,7 @@ #include #include -#include "arrow/compute/exec/expression_internal.h" +#include "arrow/compute/expression_internal.h" #include "arrow/compute/function_internal.h" #include "arrow/compute/registry.h" #include "arrow/testing/gtest_util.h" diff --git a/cpp/src/arrow/compute/type_fwd.h b/cpp/src/arrow/compute/type_fwd.h index 220460c460f4..4e665a136e33 100644 --- a/cpp/src/arrow/compute/type_fwd.h +++ b/cpp/src/arrow/compute/type_fwd.h @@ -50,14 +50,14 @@ struct KernelState; struct Declaration; class Expression; -class ExecNode; -class ExecPlan; -class ExecNodeOptions; -class ExecFactoryRegistry; -class QueryContext; -struct QueryOptions; - -class SinkNodeConsumer; +// class ExecNode; +// class ExecPlan; +// class ExecNodeOptions; +// class ExecFactoryRegistry; +// class QueryContext; +// struct QueryOptions; +// +// class SinkNodeConsumer; ARROW_EXPORT ExecContext* default_exec_context(); ARROW_EXPORT ExecContext* threaded_exec_context(); diff --git a/cpp/src/arrow/compute/util.h b/cpp/src/arrow/compute/util.h index 60c20137c8c2..94d8a4d28d8f 100644 --- a/cpp/src/arrow/compute/util.h +++ b/cpp/src/arrow/compute/util.h @@ -25,6 +25,7 @@ #include #include "arrow/buffer.h" +#include "arrow/compute/expression.h" #include "arrow/compute/type_fwd.h" #include "arrow/memory_pool.h" #include "arrow/result.h" @@ -202,4 +203,55 @@ class bit_util { }; } // namespace util + +namespace compute { + +/// Modify an Expression with pre-order and post-order visitation. +/// `pre` will be invoked on each Expression. `pre` will visit Calls before their +/// arguments, `post_call` will visit Calls (and no other Expressions) after their +/// arguments. Visitors should return the Identical expression to indicate no change; this +/// will prevent unnecessary construction in the common case where a modification is not +/// possible/necessary/... +/// +/// If an argument was modified, `post_call` visits a reconstructed Call with the modified +/// arguments but also receives a pointer to the unmodified Expression as a second +/// argument. If no arguments were modified the unmodified Expression* will be nullptr. +template +Result ModifyExpression(Expression expr, const PreVisit& pre, + const PostVisitCall& post_call) { + ARROW_ASSIGN_OR_RAISE(expr, Result(pre(std::move(expr)))); + + auto call = expr.call(); + if (!call) return expr; + + bool at_least_one_modified = false; + std::vector modified_arguments; + + for (size_t i = 0; i < call->arguments.size(); ++i) { + ARROW_ASSIGN_OR_RAISE(auto modified_argument, + ModifyExpression(call->arguments[i], pre, post_call)); + + if (Identical(modified_argument, call->arguments[i])) { + continue; + } + + if (!at_least_one_modified) { + modified_arguments = call->arguments; + at_least_one_modified = true; + } + + modified_arguments[i] = std::move(modified_argument); + } + + if (at_least_one_modified) { + // reconstruct the call expression with the modified arguments + auto modified_call = *call; + modified_call.arguments = std::move(modified_arguments); + return post_call(Expression(std::move(modified_call)), &expr); + } + + return post_call(std::move(expr), NULLPTR); +} + +} // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/dataset/api.h b/cpp/src/arrow/dataset/api.h index 6e8aab5e9ea8..6554dfc8cbcb 100644 --- a/cpp/src/arrow/dataset/api.h +++ b/cpp/src/arrow/dataset/api.h @@ -19,7 +19,7 @@ #pragma once -#include "arrow/compute/exec/expression.h" +#include "arrow/compute/expression.h" #include "arrow/dataset/dataset.h" #include "arrow/dataset/discovery.h" #include "arrow/dataset/file_base.h" diff --git a/cpp/src/arrow/dataset/dataset.h b/cpp/src/arrow/dataset/dataset.h index 0ab985e9410f..1db230b16e9c 100644 --- a/cpp/src/arrow/dataset/dataset.h +++ b/cpp/src/arrow/dataset/dataset.h @@ -26,7 +26,7 @@ #include #include -#include "arrow/compute/exec/expression.h" +#include "arrow/compute/expression.h" #include "arrow/dataset/type_fwd.h" #include "arrow/dataset/visibility.h" #include "arrow/util/async_generator_fwd.h" diff --git a/cpp/src/arrow/dataset/file_benchmark.cc b/cpp/src/arrow/dataset/file_benchmark.cc index 5caea18511dd..8953cbd11064 100644 --- a/cpp/src/arrow/dataset/file_benchmark.cc +++ b/cpp/src/arrow/dataset/file_benchmark.cc @@ -17,7 +17,7 @@ #include "benchmark/benchmark.h" -#include "arrow/compute/exec/expression.h" +#include "arrow/compute/expression.h" #include "arrow/dataset/discovery.h" #include "arrow/dataset/file_base.h" #include "arrow/dataset/file_ipc.h" diff --git a/cpp/src/arrow/dataset/file_json.cc b/cpp/src/arrow/dataset/file_json.cc index 05cbad4a5a81..6ca8405f03e2 100644 --- a/cpp/src/arrow/dataset/file_json.cc +++ b/cpp/src/arrow/dataset/file_json.cc @@ -22,7 +22,7 @@ #include #include "arrow/compute/exec.h" -#include "arrow/compute/exec/expression.h" +#include "arrow/compute/expression.h" #include "arrow/dataset/dataset_internal.h" #include "arrow/dataset/scanner.h" #include "arrow/dataset/type_fwd.h" diff --git a/cpp/src/arrow/dataset/partition.cc b/cpp/src/arrow/dataset/partition.cc index 46cdf9023ce0..e3d390703f25 100644 --- a/cpp/src/arrow/dataset/partition.cc +++ b/cpp/src/arrow/dataset/partition.cc @@ -30,7 +30,7 @@ #include "arrow/compute/api_scalar.h" #include "arrow/compute/api_vector.h" #include "arrow/compute/cast.h" -#include "arrow/compute/exec/expression_internal.h" +#include "arrow/compute/expression_internal.h" #include "arrow/compute/row/grouper.h" #include "arrow/dataset/dataset_internal.h" #include "arrow/filesystem/path_util.h" diff --git a/cpp/src/arrow/dataset/partition.h b/cpp/src/arrow/dataset/partition.h index faee0c676e24..7a2c0aa17451 100644 --- a/cpp/src/arrow/dataset/partition.h +++ b/cpp/src/arrow/dataset/partition.h @@ -28,7 +28,7 @@ #include #include -#include "arrow/compute/exec/expression.h" +#include "arrow/compute/expression.h" #include "arrow/dataset/type_fwd.h" #include "arrow/dataset/visibility.h" #include "arrow/util/compare.h" diff --git a/cpp/src/arrow/dataset/scan_node.cc b/cpp/src/arrow/dataset/scan_node.cc index 5d0bc52deb03..4f574930be04 100644 --- a/cpp/src/arrow/dataset/scan_node.cc +++ b/cpp/src/arrow/dataset/scan_node.cc @@ -23,10 +23,10 @@ #include #include "arrow/compute/exec/exec_plan.h" -#include "arrow/compute/exec/expression.h" -#include "arrow/compute/exec/expression_internal.h" #include "arrow/compute/exec/query_context.h" #include "arrow/compute/exec/util.h" +#include "arrow/compute/expression.h" +#include "arrow/compute/expression_internal.h" #include "arrow/dataset/scanner.h" #include "arrow/record_batch.h" #include "arrow/result.h" diff --git a/cpp/src/arrow/dataset/scanner.h b/cpp/src/arrow/dataset/scanner.h index 3d84e8c21e6a..89bd3c65627a 100644 --- a/cpp/src/arrow/dataset/scanner.h +++ b/cpp/src/arrow/dataset/scanner.h @@ -25,8 +25,8 @@ #include #include -#include "arrow/compute/exec/expression.h" #include "arrow/compute/exec/options.h" +#include "arrow/compute/expression.h" #include "arrow/compute/type_fwd.h" #include "arrow/dataset/dataset.h" #include "arrow/dataset/projector.h" diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc index 3dbcdaced06f..0a15eb87fd53 100644 --- a/cpp/src/arrow/dataset/scanner_test.cc +++ b/cpp/src/arrow/dataset/scanner_test.cc @@ -28,8 +28,8 @@ #include "arrow/compute/api_vector.h" #include "arrow/compute/cast.h" #include "arrow/compute/exec/exec_plan.h" -#include "arrow/compute/exec/expression_internal.h" #include "arrow/compute/exec/test_util.h" +#include "arrow/compute/expression_internal.h" #include "arrow/dataset/dataset_internal.h" #include "arrow/dataset/plan.h" #include "arrow/dataset/test_util_internal.h" diff --git a/cpp/src/arrow/dataset/test_util_internal.h b/cpp/src/arrow/dataset/test_util_internal.h index c8e2ddbfad4a..589c5e29c540 100644 --- a/cpp/src/arrow/dataset/test_util_internal.h +++ b/cpp/src/arrow/dataset/test_util_internal.h @@ -33,7 +33,7 @@ #include "arrow/array.h" #include "arrow/compute/exec/exec_plan.h" -#include "arrow/compute/exec/expression.h" +#include "arrow/compute/expression.h" #include "arrow/dataset/dataset_internal.h" #include "arrow/dataset/discovery.h" #include "arrow/dataset/file_base.h" diff --git a/cpp/src/arrow/engine/substrait/expression_internal.cc b/cpp/src/arrow/engine/substrait/expression_internal.cc index 43a9e1187fb7..5f9fec733f6a 100644 --- a/cpp/src/arrow/engine/substrait/expression_internal.cc +++ b/cpp/src/arrow/engine/substrait/expression_internal.cc @@ -41,8 +41,8 @@ #include "arrow/buffer.h" #include "arrow/builder.h" #include "arrow/compute/api_scalar.h" -#include "arrow/compute/exec/expression.h" -#include "arrow/compute/exec/expression_internal.h" +#include "arrow/compute/expression.h" +#include "arrow/compute/expression_internal.h" #include "arrow/engine/substrait/extension_set.h" #include "arrow/engine/substrait/extension_types.h" #include "arrow/engine/substrait/options.h" diff --git a/cpp/src/arrow/engine/substrait/extension_set.h b/cpp/src/arrow/engine/substrait/extension_set.h index 1f2ef09a6d54..50e0b11943f6 100644 --- a/cpp/src/arrow/engine/substrait/extension_set.h +++ b/cpp/src/arrow/engine/substrait/extension_set.h @@ -31,7 +31,7 @@ #include #include "arrow/compute/api_aggregate.h" -#include "arrow/compute/exec/expression.h" +#include "arrow/compute/expression.h" #include "arrow/engine/substrait/type_fwd.h" #include "arrow/engine/substrait/visibility.h" #include "arrow/result.h" diff --git a/cpp/src/arrow/engine/substrait/relation_internal.cc b/cpp/src/arrow/engine/substrait/relation_internal.cc index b21d8c587873..eefc37607ad9 100644 --- a/cpp/src/arrow/engine/substrait/relation_internal.cc +++ b/cpp/src/arrow/engine/substrait/relation_internal.cc @@ -30,8 +30,8 @@ #include "arrow/compute/api_aggregate.h" #include "arrow/compute/exec/exec_plan.h" -#include "arrow/compute/exec/expression.h" #include "arrow/compute/exec/options.h" +#include "arrow/compute/expression.h" #include "arrow/compute/kernel.h" #include "arrow/dataset/dataset.h" #include "arrow/dataset/discovery.h" diff --git a/cpp/src/arrow/engine/substrait/serde.cc b/cpp/src/arrow/engine/substrait/serde.cc index a6116af9592e..de4aae343ace 100644 --- a/cpp/src/arrow/engine/substrait/serde.cc +++ b/cpp/src/arrow/engine/substrait/serde.cc @@ -32,8 +32,8 @@ #include "arrow/buffer.h" #include "arrow/compute/exec/exec_plan.h" -#include "arrow/compute/exec/expression.h" #include "arrow/compute/exec/options.h" +#include "arrow/compute/expression.h" #include "arrow/dataset/file_base.h" #include "arrow/engine/substrait/expression_internal.h" #include "arrow/engine/substrait/extension_set.h" diff --git a/cpp/src/arrow/engine/substrait/serde_test.cc b/cpp/src/arrow/engine/substrait/serde_test.cc index 5b298d13f3d6..06c3cc209659 100644 --- a/cpp/src/arrow/engine/substrait/serde_test.cc +++ b/cpp/src/arrow/engine/substrait/serde_test.cc @@ -34,12 +34,12 @@ #include "arrow/compute/exec.h" #include "arrow/compute/exec/asof_join_node.h" #include "arrow/compute/exec/exec_plan.h" -#include "arrow/compute/exec/expression.h" -#include "arrow/compute/exec/expression_internal.h" #include "arrow/compute/exec/map_node.h" #include "arrow/compute/exec/options.h" #include "arrow/compute/exec/test_util.h" #include "arrow/compute/exec/util.h" +#include "arrow/compute/expression.h" +#include "arrow/compute/expression_internal.h" #include "arrow/compute/registry.h" #include "arrow/compute/type_fwd.h" #include "arrow/dataset/dataset.h" diff --git a/cpp/src/skyhook/client/file_skyhook.cc b/cpp/src/skyhook/client/file_skyhook.cc index 725b04a345ba..5af1b2b6a575 100644 --- a/cpp/src/skyhook/client/file_skyhook.cc +++ b/cpp/src/skyhook/client/file_skyhook.cc @@ -19,7 +19,7 @@ #include "skyhook/protocol/rados_protocol.h" #include "skyhook/protocol/skyhook_protocol.h" -#include "arrow/compute/exec/expression.h" +#include "arrow/compute/expression.h" #include "arrow/dataset/file_base.h" #include "arrow/dataset/file_ipc.h" #include "arrow/dataset/file_parquet.h" diff --git a/cpp/src/skyhook/cls/cls_skyhook.cc b/cpp/src/skyhook/cls/cls_skyhook.cc index 5f50dd04607d..24f80c79d573 100644 --- a/cpp/src/skyhook/cls/cls_skyhook.cc +++ b/cpp/src/skyhook/cls/cls_skyhook.cc @@ -18,7 +18,7 @@ #include -#include "arrow/compute/exec/expression.h" +#include "arrow/compute/expression.h" #include "arrow/dataset/dataset.h" #include "arrow/dataset/file_ipc.h" #include "arrow/dataset/file_parquet.h" diff --git a/cpp/src/skyhook/cls/cls_skyhook_test.cc b/cpp/src/skyhook/cls/cls_skyhook_test.cc index ebb1f4e0b2b6..ce4250cfd357 100644 --- a/cpp/src/skyhook/cls/cls_skyhook_test.cc +++ b/cpp/src/skyhook/cls/cls_skyhook_test.cc @@ -16,7 +16,7 @@ // under the License. #include "skyhook/client/file_skyhook.h" -#include "arrow/compute/exec/expression.h" +#include "arrow/compute/expression.h" #include "arrow/dataset/dataset.h" #include "arrow/dataset/file_base.h" #include "arrow/filesystem/api.h" diff --git a/cpp/src/skyhook/protocol/skyhook_protocol.h b/cpp/src/skyhook/protocol/skyhook_protocol.h index b4f6d6ee1b47..37dda8ad77a5 100644 --- a/cpp/src/skyhook/protocol/skyhook_protocol.h +++ b/cpp/src/skyhook/protocol/skyhook_protocol.h @@ -21,7 +21,7 @@ #include #include -#include "arrow/compute/exec/expression.h" +#include "arrow/compute/expression.h" #include "arrow/record_batch.h" #include "arrow/table.h" #include "arrow/type.h" diff --git a/cpp/src/skyhook/protocol/skyhook_protocol_test.cc b/cpp/src/skyhook/protocol/skyhook_protocol_test.cc index 463767a07694..23c78e2a5857 100644 --- a/cpp/src/skyhook/protocol/skyhook_protocol_test.cc +++ b/cpp/src/skyhook/protocol/skyhook_protocol_test.cc @@ -16,7 +16,7 @@ // under the License. #include "skyhook/protocol/skyhook_protocol.h" -#include "arrow/compute/exec/expression.h" +#include "arrow/compute/expression.h" #include "arrow/dataset/test_util_internal.h" #include "arrow/table.h" #include "arrow/testing/gtest_util.h" From cca06d46d61de039b05ef2ecb41a9e658388ff67 Mon Sep 17 00:00:00 2001 From: Davide Pasetto Date: Wed, 15 Mar 2023 09:42:22 -0400 Subject: [PATCH 4/6] remove more dependencies --- cpp/src/arrow/compute/exec/plan_test.cc | 33 +++++++++++++++++-------- cpp/src/arrow/compute/type_fwd.h | 8 ------ cpp/src/arrow/testing/generator.cc | 19 ++------------ cpp/src/arrow/testing/generator.h | 5 ---- 4 files changed, 25 insertions(+), 40 deletions(-) diff --git a/cpp/src/arrow/compute/exec/plan_test.cc b/cpp/src/arrow/compute/exec/plan_test.cc index 2dc221bb367d..ff6b5d96e6eb 100644 --- a/cpp/src/arrow/compute/exec/plan_test.cc +++ b/cpp/src/arrow/compute/exec/plan_test.cc @@ -37,6 +37,7 @@ #include "arrow/testing/random.h" #include "arrow/util/async_generator.h" #include "arrow/util/logging.h" +#include "arrow/util/macros.h" #include "arrow/util/thread_pool.h" #include "arrow/util/vector.h" @@ -550,10 +551,12 @@ custom_sink_label:OrderBySinkNode{by={sort_keys=[FieldRef.Name(sum(multiply(i32, } TEST(ExecPlanExecution, CustomFieldNames) { - Declaration source = gen::Gen({{"x", gen::Step()}}) - ->FailOnError() - ->SourceNode(/*rows_per_batch=*/1, /*num_batches=*/1); - + auto generator = gen::Gen({{"x", gen::Step()}})->FailOnError(); + std::vector<::arrow::compute::ExecBatch> ebatches = + generator->ExecBatches(/*rows_per_batch=*/1, /*num_batches=*/1); + Declaration source = + Declaration("exec_batch_source", ::arrow::compute::ExecBatchSourceNodeOptions( + generator->Schema(), std::move(ebatches))); QueryOptions opts; opts.field_names = {"y"}; @@ -1016,9 +1019,14 @@ TEST(ExecPlanExecution, ProjectMaintainsOrder) { constexpr int kRandomSeed = 42; constexpr int64_t kRowsPerBatch = 1; constexpr int kNumBatches = 16; - auto source_node = gen::Gen({{"x", gen::Step()}}) - ->FailOnError() - ->SourceNode(kRowsPerBatch, kNumBatches); + + auto generator = gen::Gen({{"x", gen::Step()}})->FailOnError(); + std::vector<::arrow::compute::ExecBatch> ebatches = + generator->ExecBatches(kRowsPerBatch, kNumBatches); + Declaration source_node = + Declaration("exec_batch_source", ::arrow::compute::ExecBatchSourceNodeOptions( + generator->Schema(), std::move(ebatches))); + Declaration plan = Declaration::Sequence({source_node, {"jitter", JitterNodeOptions(kRandomSeed)}, @@ -1035,9 +1043,14 @@ TEST(ExecPlanExecution, FilterMaintainsOrder) { constexpr int kRandomSeed = 42; constexpr int64_t kRowsPerBatch = 1; constexpr int kNumBatches = 16; - auto source_node = gen::Gen({{"x", gen::Step()}}) - ->FailOnError() - ->SourceNode(kRowsPerBatch, kNumBatches); + + auto generator = gen::Gen({{"x", gen::Step()}})->FailOnError(); + std::vector<::arrow::compute::ExecBatch> ebatches = + generator->ExecBatches(kRowsPerBatch, kNumBatches); + Declaration source_node = + Declaration("exec_batch_source", ::arrow::compute::ExecBatchSourceNodeOptions( + generator->Schema(), std::move(ebatches))); + Declaration plan = Declaration::Sequence( {source_node, {"jitter", JitterNodeOptions(kRandomSeed)}, diff --git a/cpp/src/arrow/compute/type_fwd.h b/cpp/src/arrow/compute/type_fwd.h index 4e665a136e33..f1e6354e4ee0 100644 --- a/cpp/src/arrow/compute/type_fwd.h +++ b/cpp/src/arrow/compute/type_fwd.h @@ -50,14 +50,6 @@ struct KernelState; struct Declaration; class Expression; -// class ExecNode; -// class ExecPlan; -// class ExecNodeOptions; -// class ExecFactoryRegistry; -// class QueryContext; -// struct QueryOptions; -// -// class SinkNodeConsumer; ARROW_EXPORT ExecContext* default_exec_context(); ARROW_EXPORT ExecContext* threaded_exec_context(); diff --git a/cpp/src/arrow/testing/generator.cc b/cpp/src/arrow/testing/generator.cc index fc90a5cd8899..18cd1fad6af8 100644 --- a/cpp/src/arrow/testing/generator.cc +++ b/cpp/src/arrow/testing/generator.cc @@ -28,7 +28,6 @@ #include "arrow/buffer.h" #include "arrow/builder.h" #include "arrow/compute/exec.h" -#include "arrow/compute/exec/options.h" #include "arrow/datum.h" #include "arrow/record_batch.h" #include "arrow/scalar.h" @@ -38,7 +37,6 @@ #include "arrow/testing/random.h" #include "arrow/type.h" #include "arrow/type_traits.h" -#include "arrow/util/bit_util.h" #include "arrow/util/checked_cast.h" #include "arrow/util/macros.h" #include "arrow/util/string.h" @@ -309,15 +307,6 @@ class DataGeneratorImpl : public DataGenerator, return batches; } - Result<::arrow::compute::Declaration> SourceNode(int64_t rows_per_batch, - int num_batches) override { - ARROW_ASSIGN_OR_RAISE(std::vector<::arrow::compute::ExecBatch> batches, - ExecBatches(rows_per_batch, num_batches)); - return ::arrow::compute::Declaration( - "exec_batch_source", - ::arrow::compute::ExecBatchSourceNodeOptions(schema_, std::move(batches))); - } - Result> Table(int64_t rows_per_chunk, int num_chunks = 1) override { ARROW_ASSIGN_OR_RAISE(RecordBatchVector batches, @@ -373,12 +362,6 @@ class GTestDataGeneratorImpl : public GTestDataGenerator { EXPECT_OK_AND_ASSIGN(auto batches, target_->ExecBatches(rows_per_batch, num_batches)); return batches; } - ::arrow::compute::Declaration SourceNode(int64_t rows_per_batch, - int num_batches) override { - EXPECT_OK_AND_ASSIGN(auto source_node, - target_->SourceNode(rows_per_batch, num_batches)); - return source_node; - } std::shared_ptr<::arrow::Table> Table(int64_t rows_per_chunk, int num_chunks) override { EXPECT_OK_AND_ASSIGN(auto table, target_->Table(rows_per_chunk, num_chunks)); @@ -386,6 +369,8 @@ class GTestDataGeneratorImpl : public GTestDataGenerator { } std::shared_ptr<::arrow::Schema> Schema() override { return target_->Schema(); } + std::shared_ptr Target() { return target_; } + private: std::shared_ptr target_; }; diff --git a/cpp/src/arrow/testing/generator.h b/cpp/src/arrow/testing/generator.h index ecfc4ee640f4..8b72f8a68aad 100644 --- a/cpp/src/arrow/testing/generator.h +++ b/cpp/src/arrow/testing/generator.h @@ -23,7 +23,6 @@ #include #include "arrow/array/array_base.h" -#include "arrow/compute/exec/exec_plan.h" #include "arrow/compute/type_fwd.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/visibility.h" @@ -256,8 +255,6 @@ class ARROW_TESTING_EXPORT GTestDataGenerator { virtual ::arrow::compute::ExecBatch ExecBatch(int64_t num_rows) = 0; virtual std::vector<::arrow::compute::ExecBatch> ExecBatches(int64_t rows_per_batch, int num_batches) = 0; - virtual ::arrow::compute::Declaration SourceNode(int64_t rows_per_batch, - int num_batches) = 0; virtual std::shared_ptr<::arrow::Table> Table(int64_t rows_per_chunk, int num_chunks = 1) = 0; @@ -274,8 +271,6 @@ class ARROW_TESTING_EXPORT DataGenerator { virtual Result<::arrow::compute::ExecBatch> ExecBatch(int64_t num_rows) = 0; virtual Result> ExecBatches( int64_t rows_per_batch, int num_batches) = 0; - virtual Result<::arrow::compute::Declaration> SourceNode(int64_t rows_per_batch, - int num_batches) = 0; virtual Result> Table(int64_t rows_per_chunk, int num_chunks = 1) = 0; From 6830f12941c1fc516ec7e4eeaea3a95631d12d7c Mon Sep 17 00:00:00 2001 From: Davide Pasetto Date: Thu, 16 Mar 2023 13:13:29 -0400 Subject: [PATCH 5/6] PR comments --- cpp/src/arrow/compute/exec/type_fwd.h | 2 +- cpp/src/arrow/compute/light_array_test.cc | 3 ++- cpp/src/arrow/compute/type_fwd.h | 1 - cpp/src/arrow/testing/generator.cc | 2 -- 4 files changed, 3 insertions(+), 5 deletions(-) diff --git a/cpp/src/arrow/compute/exec/type_fwd.h b/cpp/src/arrow/compute/exec/type_fwd.h index 1c3d25205ede..c40260326197 100644 --- a/cpp/src/arrow/compute/exec/type_fwd.h +++ b/cpp/src/arrow/compute/exec/type_fwd.h @@ -29,7 +29,7 @@ class ExecNodeOptions; class ExecFactoryRegistry; class QueryContext; struct QueryOptions; - +struct Declaration; class SinkNodeConsumer; } // namespace compute diff --git a/cpp/src/arrow/compute/light_array_test.cc b/cpp/src/arrow/compute/light_array_test.cc index 70bcdf3bf7e2..392dc2b1c5ce 100644 --- a/cpp/src/arrow/compute/light_array_test.cc +++ b/cpp/src/arrow/compute/light_array_test.cc @@ -34,7 +34,8 @@ const std::vector> kSampleFixedDataTypes = { uint16(), uint32(), uint64(), decimal128(38, 6), decimal256(76, 6)}; const std::vector> kSampleBinaryTypes = {utf8(), binary()}; -ExecBatch ExecBatchFromJSON(const std::vector& types, std::string_view json) { +static ExecBatch ExecBatchFromJSON(const std::vector& types, + std::string_view json) { auto fields = ::arrow::internal::MapVector( [](const TypeHolder& th) { return field("", th.GetSharedPtr()); }, types); diff --git a/cpp/src/arrow/compute/type_fwd.h b/cpp/src/arrow/compute/type_fwd.h index f1e6354e4ee0..3f990b181431 100644 --- a/cpp/src/arrow/compute/type_fwd.h +++ b/cpp/src/arrow/compute/type_fwd.h @@ -48,7 +48,6 @@ struct VectorKernel; struct KernelState; -struct Declaration; class Expression; ARROW_EXPORT ExecContext* default_exec_context(); diff --git a/cpp/src/arrow/testing/generator.cc b/cpp/src/arrow/testing/generator.cc index 18cd1fad6af8..45316bef8a91 100644 --- a/cpp/src/arrow/testing/generator.cc +++ b/cpp/src/arrow/testing/generator.cc @@ -369,8 +369,6 @@ class GTestDataGeneratorImpl : public GTestDataGenerator { } std::shared_ptr<::arrow::Schema> Schema() override { return target_->Schema(); } - std::shared_ptr Target() { return target_; } - private: std::shared_ptr target_; }; From a3c7b3834b6eeeff185ed3f7619b833fac826d69 Mon Sep 17 00:00:00 2001 From: Davide Pasetto Date: Thu, 16 Mar 2023 14:39:28 -0400 Subject: [PATCH 6/6] PR comments --- cpp/src/arrow/compute/light_array_test.cc | 34 +++++++++++------------ 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/cpp/src/arrow/compute/light_array_test.cc b/cpp/src/arrow/compute/light_array_test.cc index 392dc2b1c5ce..71d228bf3f61 100644 --- a/cpp/src/arrow/compute/light_array_test.cc +++ b/cpp/src/arrow/compute/light_array_test.cc @@ -34,8 +34,8 @@ const std::vector> kSampleFixedDataTypes = { uint16(), uint32(), uint64(), decimal128(38, 6), decimal256(76, 6)}; const std::vector> kSampleBinaryTypes = {utf8(), binary()}; -static ExecBatch ExecBatchFromJSON(const std::vector& types, - std::string_view json) { +static ExecBatch JSONToExecBatch(const std::vector& types, + std::string_view json) { auto fields = ::arrow::internal::MapVector( [](const TypeHolder& th) { return field("", th.GetSharedPtr()); }, types); @@ -349,7 +349,7 @@ TEST(ExecBatchBuilder, AppendValuesBeyondLimit) { TEST(KeyColumnArray, FromExecBatch) { ExecBatch batch = - ExecBatchFromJSON({int64(), boolean()}, "[[1, true], [2, false], [null, null]]"); + JSONToExecBatch({int64(), boolean()}, "[[1, true], [2, false], [null, null]]"); std::vector arrays; ASSERT_OK(ColumnArraysFromExecBatch(batch, &arrays)); @@ -372,10 +372,10 @@ TEST(ExecBatchBuilder, AppendBatches) { std::unique_ptr owned_pool = MemoryPool::CreateDefault(); MemoryPool* pool = owned_pool.get(); ExecBatch batch_one = - ExecBatchFromJSON({int64(), boolean()}, "[[1, true], [2, false], [null, null]]"); + JSONToExecBatch({int64(), boolean()}, "[[1, true], [2, false], [null, null]]"); ExecBatch batch_two = - ExecBatchFromJSON({int64(), boolean()}, "[[null, true], [5, true], [6, false]]"); - ExecBatch combined = ExecBatchFromJSON( + JSONToExecBatch({int64(), boolean()}, "[[null, true], [5, true], [6, false]]"); + ExecBatch combined = JSONToExecBatch( {int64(), boolean()}, "[[1, true], [2, false], [null, null], [null, true], [5, true], [6, false]]"); { @@ -394,10 +394,10 @@ TEST(ExecBatchBuilder, AppendBatchesSomeRows) { std::unique_ptr owned_pool = MemoryPool::CreateDefault(); MemoryPool* pool = owned_pool.get(); ExecBatch batch_one = - ExecBatchFromJSON({int64(), boolean()}, "[[1, true], [2, false], [null, null]]"); + JSONToExecBatch({int64(), boolean()}, "[[1, true], [2, false], [null, null]]"); ExecBatch batch_two = - ExecBatchFromJSON({int64(), boolean()}, "[[null, true], [5, true], [6, false]]"); - ExecBatch combined = ExecBatchFromJSON( + JSONToExecBatch({int64(), boolean()}, "[[null, true], [5, true], [6, false]]"); + ExecBatch combined = JSONToExecBatch( {int64(), boolean()}, "[[1, true], [2, false], [null, true], [5, true]]"); { ExecBatchBuilder builder; @@ -415,13 +415,13 @@ TEST(ExecBatchBuilder, AppendBatchesSomeCols) { std::unique_ptr owned_pool = MemoryPool::CreateDefault(); MemoryPool* pool = owned_pool.get(); ExecBatch batch_one = - ExecBatchFromJSON({int64(), boolean()}, "[[1, true], [2, false], [null, null]]"); + JSONToExecBatch({int64(), boolean()}, "[[1, true], [2, false], [null, null]]"); ExecBatch batch_two = - ExecBatchFromJSON({int64(), boolean()}, "[[null, true], [5, true], [6, false]]"); + JSONToExecBatch({int64(), boolean()}, "[[null, true], [5, true], [6, false]]"); ExecBatch first_col_only = - ExecBatchFromJSON({int64()}, "[[1], [2], [null], [null], [5], [6]]"); - ExecBatch last_col_only = ExecBatchFromJSON( - {boolean()}, "[[true], [false], [null], [true], [true], [false]]"); + JSONToExecBatch({int64()}, "[[1], [2], [null], [null], [5], [6]]"); + ExecBatch last_col_only = + JSONToExecBatch({boolean()}, "[[true], [false], [null], [true], [true], [false]]"); { ExecBatchBuilder builder; uint16_t row_ids[3] = {0, 1, 2}; @@ -463,12 +463,12 @@ TEST(ExecBatchBuilder, AppendNulls) { std::unique_ptr owned_pool = MemoryPool::CreateDefault(); MemoryPool* pool = owned_pool.get(); ExecBatch batch_one = - ExecBatchFromJSON({int64(), boolean()}, "[[1, true], [2, false], [null, null]]"); - ExecBatch combined = ExecBatchFromJSON( + JSONToExecBatch({int64(), boolean()}, "[[1, true], [2, false], [null, null]]"); + ExecBatch combined = JSONToExecBatch( {int64(), boolean()}, "[[1, true], [2, false], [null, null], [null, null], [null, null]]"); ExecBatch just_nulls = - ExecBatchFromJSON({int64(), boolean()}, "[[null, null], [null, null]]"); + JSONToExecBatch({int64(), boolean()}, "[[null, null], [null, null]]"); { ExecBatchBuilder builder; uint16_t row_ids[3] = {0, 1, 2};