From d95f85f22b0770228b23d7c073473fd9259efd94 Mon Sep 17 00:00:00 2001 From: den818 Date: Wed, 30 Nov 2022 21:52:22 +0400 Subject: [PATCH 1/3] improvement ColumnMap --- clickhouse/CMakeLists.txt | 1 + clickhouse/base/projected_iterator.h | 55 ++++++++++++++++++++++++++++ clickhouse/columns/array.h | 2 + clickhouse/columns/map.h | 38 ++++++++++++------- 4 files changed, 82 insertions(+), 14 deletions(-) create mode 100644 clickhouse/base/projected_iterator.h diff --git a/clickhouse/CMakeLists.txt b/clickhouse/CMakeLists.txt index 2caeebba..dd76059c 100644 --- a/clickhouse/CMakeLists.txt +++ b/clickhouse/CMakeLists.txt @@ -102,6 +102,7 @@ INSTALL(FILES base/input.h DESTINATION include/clickhouse/base/) INSTALL(FILES base/open_telemetry.h DESTINATION include/clickhouse/base/) INSTALL(FILES base/output.h DESTINATION include/clickhouse/base/) INSTALL(FILES base/platform.h DESTINATION include/clickhouse/base/) +INSTALL(FILES base/projected_iterator.h DESTINATION include/clickhouse/base/) INSTALL(FILES base/singleton.h DESTINATION include/clickhouse/base/) INSTALL(FILES base/socket.h DESTINATION include/clickhouse/base/) INSTALL(FILES base/string_utils.h DESTINATION include/clickhouse/base/) diff --git a/clickhouse/base/projected_iterator.h b/clickhouse/base/projected_iterator.h new file mode 100644 index 00000000..ca5aecfe --- /dev/null +++ b/clickhouse/base/projected_iterator.h @@ -0,0 +1,55 @@ +#pragma once + +#include +#include +#include + +namespace clickhouse { + +template ()(std::declval())), + typename Value = std::decay_t> +class ProjectedIterator { +public: + using value_type = Value; + using reference = Reference; + using pointer = Reference; + using difference_type = typename std::iterator_traits::difference_type; + using iterator_category = typename std::iterator_traits::iterator_category; + + ProjectedIterator() = default; + + inline ProjectedIterator(Iterator const& iterator, UnaryFunction functor) + : iterator_(iterator) + , functor_(std::move(functor)) { + } + + inline UnaryFunction functor() const { return functor; } + + inline Iterator const& base() const { return iterator_; } + + inline reference operator*() const { return functor_(iterator_); } + + inline ProjectedIterator& operator++() { + ++iterator_; + return *this; + } + + inline ProjectedIterator& operator--() { + --iterator_; + return *this; + } + + inline bool operator==(const ProjectedIterator& other) const { + return this->iterator_ == other.iterator_; + } + + inline bool operator!=(const ProjectedIterator& other) const { + return !(*this == other); + } + +private: + Iterator iterator_; + UnaryFunction functor_; +}; + +} // namespace clickhouse diff --git a/clickhouse/columns/array.h b/clickhouse/columns/array.h index f3bb23d6..06408fe7 100644 --- a/clickhouse/columns/array.h +++ b/clickhouse/columns/array.h @@ -167,6 +167,8 @@ class ColumnArrayT : public ColumnArray { const size_t size_; size_t index_; public: + Iterator() = default; + Iterator(std::shared_ptr typed_nested_data, size_t offset, size_t size, size_t index) : typed_nested_data_(typed_nested_data) , offset_(offset) diff --git a/clickhouse/columns/map.h b/clickhouse/columns/map.h index 298d2c80..3897be5e 100644 --- a/clickhouse/columns/map.h +++ b/clickhouse/columns/map.h @@ -1,5 +1,6 @@ #pragma once +#include "../base/projected_iterator.h" #include "array.h" #include "column.h" #include "tuple.h" @@ -122,6 +123,8 @@ class ColumnMapT : public ColumnMap { typename ArrayColumnType::ArrayValueView::Iterator data_iterator_; public: + Iterator() = default; + Iterator(typename ArrayColumnType::ArrayValueView::Iterator data_iterator) : data_iterator_(data_iterator) {} @@ -187,13 +190,16 @@ class ColumnMapT : public ColumnMap { if (size() != other.size()) { return false; } - using Vector = std::vector>; - Vector l(begin(), end()); - Vector r(other.begin(), other.end()); - auto comp = [](const auto& l, const auto& r) { return l.frist < r.first; }; - std::sort(l.begin(), l.end(), comp); - std::sort(r.begin(), r.end(), comp); - return std::equal(l.begin(), l.end(), r.begin(), r.end()); + const auto make_index = [](const auto& data) { + std::vector result{data.Size()}; + std::generate(result.begin(), result.end(), [i = 0] () mutable { return i++; }); + std::sort(result.begin(), result.end(), [&data](size_t l, size_t r) {return data[l] < data[r];}); + return result; + }; + const auto l_index = make_index(data_); + const auto r_index = make_index(other.data_); + return std::equal(l_index.begin(), l_index.end(), r_index.begin(), r_index.end(), + [&l_data = data_, &r_data = other.data_](size_t l, size_t r) { return l_data[l] == r_data[r];}); return true; } @@ -214,13 +220,17 @@ class ColumnMapT : public ColumnMap { template inline void Append(const T& value) { - // TODO Refuse to copy. - std::vector> container; - container.reserve(value.size()); - for (const auto& i : value) { - container.emplace_back(i.first, i.second); - } - typed_data_->Append(container.begin(), container.end()); + using BaseIter = typename T::const_iterator; + using KeyOfT = decltype(std::declval()->first); + using ValOfT = decltype(std::declval()->second); + using Functor = std::function(const BaseIter&)>; + using Iterator = ProjectedIterator; + + Functor functor = [](const BaseIter& i) { + return std::make_tuple(i->first, i->second); + }; + + typed_data_->Append(Iterator{value.begin(), functor}, Iterator{value.end(), functor}); } static auto Wrap(ColumnMap&& col) { From 739b2fda51d30ad6e29a9958ac1f27d80415ad44 Mon Sep 17 00:00:00 2001 From: den818 Date: Wed, 30 Nov 2022 22:04:27 +0400 Subject: [PATCH 2/3] fix --- clickhouse/columns/map.h | 1 + 1 file changed, 1 insertion(+) diff --git a/clickhouse/columns/map.h b/clickhouse/columns/map.h index 3897be5e..0249e97b 100644 --- a/clickhouse/columns/map.h +++ b/clickhouse/columns/map.h @@ -5,6 +5,7 @@ #include "column.h" #include "tuple.h" +#include #include namespace clickhouse { From 209d3ea0b197dd51e677b7af230692e4a1bbb1c4 Mon Sep 17 00:00:00 2001 From: den818 Date: Tue, 6 Dec 2022 21:21:52 +0100 Subject: [PATCH 3/3] fix --- clickhouse/columns/array.h | 2 +- clickhouse/columns/map.h | 17 ++++++++++------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/clickhouse/columns/array.h b/clickhouse/columns/array.h index 06408fe7..0ea33d5a 100644 --- a/clickhouse/columns/array.h +++ b/clickhouse/columns/array.h @@ -272,7 +272,7 @@ class ColumnArrayT : public ColumnArray { size_t counter = 0; while (begin != end) { - nested_data.Append(*begin); + nested_data.Append(std::move(*begin)); ++begin; ++counter; } diff --git a/clickhouse/columns/map.h b/clickhouse/columns/map.h index 0249e97b..24a8b4ae 100644 --- a/clickhouse/columns/map.h +++ b/clickhouse/columns/map.h @@ -193,14 +193,17 @@ class ColumnMapT : public ColumnMap { } const auto make_index = [](const auto& data) { std::vector result{data.Size()}; - std::generate(result.begin(), result.end(), [i = 0] () mutable { return i++; }); + std::generate(result.begin(), result.end(), [i = 0] () mutable {return i++;}); std::sort(result.begin(), result.end(), [&data](size_t l, size_t r) {return data[l] < data[r];}); return result; }; - const auto l_index = make_index(data_); - const auto r_index = make_index(other.data_); - return std::equal(l_index.begin(), l_index.end(), r_index.begin(), r_index.end(), - [&l_data = data_, &r_data = other.data_](size_t l, size_t r) { return l_data[l] == r_data[r];}); + const auto index = make_index(data_); + for (const auto& val : other.data_) { + if (!std::binary_search(index.begin(), index.end(), val, + [&data = data_](const auto& l, size_t r) {return l < data[r];})) { + return false; + } + } return true; } @@ -221,14 +224,14 @@ class ColumnMapT : public ColumnMap { template inline void Append(const T& value) { - using BaseIter = typename T::const_iterator; + using BaseIter = decltype(value.begin()); using KeyOfT = decltype(std::declval()->first); using ValOfT = decltype(std::declval()->second); using Functor = std::function(const BaseIter&)>; using Iterator = ProjectedIterator; Functor functor = [](const BaseIter& i) { - return std::make_tuple(i->first, i->second); + return std::make_tuple(std::cref(i->first), std::cref(i->second)); }; typed_data_->Append(Iterator{value.begin(), functor}, Iterator{value.end(), functor});