From b18be403856808b9462f130109603a6d6456e33d Mon Sep 17 00:00:00 2001 From: wangwei <1261385937@qq.com> Date: Sat, 19 Nov 2022 21:06:46 +0800 Subject: [PATCH 1/2] ColumnString improve performance(26%) by avoiding vector reallocate --- clickhouse/columns/string.cpp | 9 ++++++++- clickhouse/columns/string.h | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/clickhouse/columns/string.cpp b/clickhouse/columns/string.cpp index 8ea362c4..40c384aa 100644 --- a/clickhouse/columns/string.cpp +++ b/clickhouse/columns/string.cpp @@ -166,6 +166,13 @@ ColumnString::ColumnString() { } +ColumnString::ColumnString(size_t element_count) + : Column(Type::CreateString()) +{ + items_.reserve(element_count); + blocks_.reserve(element_count / 2); +} + ColumnString::ColumnString(const std::vector& data) : ColumnString() { @@ -291,7 +298,7 @@ size_t ColumnString::Size() const { } ColumnRef ColumnString::Slice(size_t begin, size_t len) const { - auto result = std::make_shared(); + auto result = std::make_shared(len); if (begin < items_.size()) { len = std::min(len, items_.size() - begin); diff --git a/clickhouse/columns/string.h b/clickhouse/columns/string.h index f2216f40..9b83a088 100644 --- a/clickhouse/columns/string.h +++ b/clickhouse/columns/string.h @@ -78,6 +78,7 @@ class ColumnString : public Column { ColumnString(); ~ColumnString(); + explicit ColumnString(size_t element_count); explicit ColumnString(const std::vector & data); explicit ColumnString(std::vector&& data); ColumnString& operator=(const ColumnString&) = delete; From 4f463cb59becb6f1462f55186c74442b11f2f835 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Wed, 23 Nov 2022 12:07:02 +0400 Subject: [PATCH 2/2] More conservative `reserve`-ing of vectors + other - `ColumnString` c-tor: assuming that there are about ~100 rows in each `ColumnString::Block`, rather than 2. - `ColumnString::Slice`: only reserving for exact number of elements in `items_`. - `ColumnString::Append` less code duplication - minor style fixes --- clickhouse/columns/string.cpp | 37 ++++++++++++----------------------- 1 file changed, 13 insertions(+), 24 deletions(-) diff --git a/clickhouse/columns/string.cpp b/clickhouse/columns/string.cpp index 40c384aa..38cfb90c 100644 --- a/clickhouse/columns/string.cpp +++ b/clickhouse/columns/string.cpp @@ -37,8 +37,7 @@ void ColumnFixedString::Append(std::string_view str) { + std::to_string(str.size()) + " bytes."); } - if (data_.capacity() - data_.size() < str.size()) - { + if (data_.capacity() - data_.size() < str.size()) { // round up to the next block size const auto new_size = (((data_.size() + string_size_) / DEFAULT_BLOCK_SIZE) + 1) * DEFAULT_BLOCK_SIZE; data_.reserve(new_size); @@ -129,13 +128,11 @@ struct ColumnString::Block data_(new CharT[capacity]) {} - inline auto GetAvailable() const - { + inline auto GetAvailable() const { return capacity - size; } - std::string_view AppendUnsafe(std::string_view str) - { + std::string_view AppendUnsafe(std::string_view str) { const auto pos = &data_[size]; memcpy(pos, str.data(), str.size()); @@ -144,13 +141,11 @@ struct ColumnString::Block return std::string_view(pos, str.size()); } - auto GetCurrentWritePos() - { + auto GetCurrentWritePos() { return &data_[size]; } - std::string_view ConsumeTailAsStringViewUnsafe(size_t len) - { + std::string_view ConsumeTailAsStringViewUnsafe(size_t len) { const auto start = &data_[size]; size += len; return std::string_view(start, len); @@ -170,7 +165,8 @@ ColumnString::ColumnString(size_t element_count) : Column(Type::CreateString()) { items_.reserve(element_count); - blocks_.reserve(element_count / 2); + // 100 is arbitrary number, assumption that string values are about ~40 bytes long. + blocks_.reserve(std::max(1, element_count / 100)); } ColumnString::ColumnString(const std::vector& data) @@ -179,8 +175,7 @@ ColumnString::ColumnString(const std::vector& data) items_.reserve(data.size()); blocks_.emplace_back(ComputeTotalSize(data)); - for (const auto & s : data) - { + for (const auto & s : data) { AppendUnsafe(s); } }; @@ -201,8 +196,7 @@ ColumnString::~ColumnString() {} void ColumnString::Append(std::string_view str) { - if (blocks_.size() == 0 || blocks_.back().GetAvailable() < str.length()) - { + if (blocks_.size() == 0 || blocks_.back().GetAvailable() < str.length()) { blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, str.size())); } @@ -210,12 +204,7 @@ void ColumnString::Append(std::string_view str) { } void ColumnString::Append(const char* str) { - auto len = strlen(str); - if (blocks_.size() == 0 || blocks_.back().GetAvailable() < len) { - blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, len)); - } - - items_.emplace_back(blocks_.back().AppendUnsafe(str)); + Append(std::string_view(str, strlen(str))); } void ColumnString::Append(std::string&& steal_value) { @@ -298,14 +287,14 @@ size_t ColumnString::Size() const { } ColumnRef ColumnString::Slice(size_t begin, size_t len) const { - auto result = std::make_shared(len); + auto result = std::make_shared(); if (begin < items_.size()) { len = std::min(len, items_.size() - begin); + result->items_.reserve(len); result->blocks_.emplace_back(ComputeTotalSize(items_, begin, len)); - for (size_t i = begin; i < begin + len; ++i) - { + for (size_t i = begin; i < begin + len; ++i) { result->Append(items_[i]); } }