From 433b2835043dc5f066fec060b821776fae7fc4f0 Mon Sep 17 00:00:00 2001 From: wangwei <1261385937@qq.com> Date: Fri, 26 Aug 2022 21:57:29 +0800 Subject: [PATCH 1/3] optimize for ColumnString. Move the column data if possible, without memory copy --- clickhouse/columns/array.cpp | 12 +++++++++ clickhouse/columns/array.h | 4 +++ clickhouse/columns/column.h | 6 +++++ clickhouse/columns/string.cpp | 30 +++++++++++++++++----- clickhouse/columns/string.h | 4 +++ tests/simple/main.cpp | 47 ++++++++++++++++++++++++++++++++++- ut/column_array_ut.cpp | 26 +++++++++++++++++++ 7 files changed, 122 insertions(+), 7 deletions(-) diff --git a/clickhouse/columns/array.cpp b/clickhouse/columns/array.cpp index 9ef160b5..6b044b16 100644 --- a/clickhouse/columns/array.cpp +++ b/clickhouse/columns/array.cpp @@ -35,6 +35,18 @@ void ColumnArray::AppendAsColumn(ColumnRef array) { data_->Append(array); } +void ColumnArray::AppendAsColumnWithMove(ColumnRef array) +{ + if (!data_->Type()->IsEqual(array->Type())) { + throw ValidationError( + "can't append column of type " + array->Type()->GetName() + " " + "to column type " + data_->Type()->GetName()); + } + + AddOffset(array->Size()); + data_->AppendWithMove(array); +} + ColumnRef ColumnArray::GetAsColumn(size_t n) const { if (n >= Size()) throw ValidationError("Index is out ouf bounds: " + std::to_string(n)); diff --git a/clickhouse/columns/array.h b/clickhouse/columns/array.h index 6144e430..bb455e11 100644 --- a/clickhouse/columns/array.h +++ b/clickhouse/columns/array.h @@ -35,6 +35,10 @@ class ColumnArray : public Column { /// Converts input column to array and appends as one row to the current column. void AppendAsColumn(ColumnRef array); + /// Converts input column to array and appends as one row to the current column. + /// Move the column data if possible, without memory copy. + void AppendAsColumnWithMove(ColumnRef array); + /// Convets array at pos n to column. /// Type of element of result column same as type of array element. ColumnRef GetAsColumn(size_t n) const; diff --git a/clickhouse/columns/column.h b/clickhouse/columns/column.h index b54cbdee..406eda9c 100644 --- a/clickhouse/columns/column.h +++ b/clickhouse/columns/column.h @@ -52,6 +52,12 @@ class Column : public std::enable_shared_from_this { /// Appends content of given column to the end of current one. virtual void Append(ColumnRef column) = 0; + /// Appends content of given column to the end of current one. + /// Move the column data if possible, without memory copy. + virtual void AppendWithMove(ColumnRef column) { + Append(column); + } + /// Template method to load column data from input stream. It'll call LoadPrefix and LoadBody. /// Should be called only once from the client. Derived classes should not call it. bool Load(InputStream* input, size_t rows); diff --git a/clickhouse/columns/string.cpp b/clickhouse/columns/string.cpp index 8ea362c4..1f0b4224 100644 --- a/clickhouse/columns/string.cpp +++ b/clickhouse/columns/string.cpp @@ -203,12 +203,7 @@ void ColumnString::Append(std::string_view str) { } void ColumnString::Append(const char* str) { - auto len = strlen(str); - if (blocks_.size() == 0 || blocks_.back().GetAvailable() < len) { - blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, len)); - } - - items_.emplace_back(blocks_.back().AppendUnsafe(str)); + Append(std::string_view{ str ,strlen(str) }); } void ColumnString::Append(std::string&& steal_value) { @@ -255,6 +250,29 @@ void ColumnString::Append(ColumnRef column) { } } +void ColumnString::AppendWithMove(ColumnRef column) { + if (auto col = column->As()) { + for (auto&& block : col->blocks_) { + blocks_.emplace_back(std::move(block)); + } + col->blocks_.clear(); + col->blocks_.shrink_to_fit(); + + for (auto&& ad : col->append_data_) { + append_data_.emplace_back(std::move(ad)); + } + col->append_data_.clear(); + col->append_data_.shrink_to_fit(); + + items_.reserve(items_.size() + col->Size()); + for (auto&& item : col->items_) { + items_.emplace_back(std::move(item)); + } + col->items_.clear(); + col->items_.shrink_to_fit(); + } +} + bool ColumnString::LoadBody(InputStream* input, size_t rows) { items_.clear(); blocks_.clear(); diff --git a/clickhouse/columns/string.h b/clickhouse/columns/string.h index f2216f40..a011d46b 100644 --- a/clickhouse/columns/string.h +++ b/clickhouse/columns/string.h @@ -106,6 +106,10 @@ class ColumnString : public Column { /// Appends content of given column to the end of current one. void Append(ColumnRef column) override; + /// Appends content of given column to the end of current one. + /// Move the column data if possible, without memory copy. + void AppendWithMove(ColumnRef column) override; + /// Loads column data from input stream. bool LoadBody(InputStream* input, size_t rows) override; diff --git a/tests/simple/main.cpp b/tests/simple/main.cpp index 51340a86..37684bfe 100644 --- a/tests/simple/main.cpp +++ b/tests/simple/main.cpp @@ -63,6 +63,50 @@ inline void ArrayExample(Client& client) { client.Execute("DROP TEMPORARY TABLE test_array"); } +inline void StringArrayExample(Client& client) { + { + Block b; + + /// Create a table. + client.Execute("CREATE TEMPORARY TABLE IF NOT EXISTS test_string_array (arr Array(String))"); + + auto arr = std::make_shared(std::make_shared()); + + auto id = std::make_shared(); + std::string long_time = "1234567890qwertyui"; + id->AppendNoManagedLifetime(long_time); + id->Append("hellohellohellohellohello"); + arr->AppendAsColumnWithMove(id); + + id->Append("worldworldworldworldworldworld"); + arr->AppendAsColumnWithMove(id); + + id->Append("heiheiheiheiheiheiheiheiheihei"); + arr->AppendAsColumnWithMove(id); + + id->Append("hahahahahahahahahahahahahahahaha"); + arr->AppendAsColumnWithMove(id); + + b.AppendColumn("arr", arr); + client.Insert("test_string_array", b); + + client.Select("SELECT arr FROM test_string_array", [](const Block& block) + { + for (size_t c = 0; c < block.GetRowCount(); ++c) { + auto col = block[0]->As()->GetAsColumn(c); + for (size_t i = 0; i < col->Size(); ++i) { + std::cerr << (*col->As())[i] << " "; + } + std::cerr << std::endl; + } + } + ); + + /// Delete table. + client.Execute("DROP TABLE test_string_array"); + } +} + inline void MultiArrayExample(Client& client) { Block b; @@ -477,6 +521,7 @@ inline void IPExample(Client &client) { } static void RunTests(Client& client) { + StringArrayExample(client); ArrayExample(client); CancelableExample(client); DateExample(client); @@ -496,7 +541,7 @@ static void RunTests(Client& client) { int main() { try { const auto localHostEndpoint = ClientOptions() - .SetHost( getEnvOrDefault("CLICKHOUSE_HOST", "localhost")) + .SetHost( getEnvOrDefault("CLICKHOUSE_HOST", "192.168.3.163")) .SetPort( getEnvOrDefault("CLICKHOUSE_PORT", "9000")) .SetUser( getEnvOrDefault("CLICKHOUSE_USER", "default")) .SetPassword( getEnvOrDefault("CLICKHOUSE_PASSWORD", "")) diff --git a/ut/column_array_ut.cpp b/ut/column_array_ut.cpp index 001cc62e..bcbbe257 100644 --- a/ut/column_array_ut.cpp +++ b/ut/column_array_ut.cpp @@ -69,6 +69,32 @@ TEST(ColumnArray, Append) { ASSERT_EQ(col->As()->At(1), 3u); } +TEST(ColumnArray, AppendWithMove) { + auto arr = std::make_shared(std::make_shared()); + + std::string str1 = "hello clickhouse-server"; + std::string str2 = "hello clickhouse-client"; + + auto id = std::make_shared(); + std::string expect1 = str1; + id->Append(std::move(str1)); + arr->AppendAsColumnWithMove(id); + + std::string expect2 = str2; + id->Append(std::move(str2)); + arr->AppendAsColumnWithMove(id); + + ASSERT_EQ(arr->Size(), 2); + + auto col = arr->GetAsColumn(0); + ASSERT_EQ(col->Size(), 1); + ASSERT_EQ(col->As()->At(0), expect1); + + col = arr->GetAsColumn(1); + ASSERT_EQ(col->Size(), 1); + ASSERT_EQ(col->As()->At(0), expect2); +} + TEST(ColumnArray, ArrayOfDecimal) { auto column = std::make_shared(18, 10); auto array = std::make_shared(column->CloneEmpty()); From 323841c4f4e6b2a90dfd0bb20878a2aeb31b7573 Mon Sep 17 00:00:00 2001 From: wangwei <1261385937@qq.com> Date: Fri, 26 Aug 2022 22:04:14 +0800 Subject: [PATCH 2/3] restore ClientOptions --- tests/simple/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/simple/main.cpp b/tests/simple/main.cpp index 37684bfe..72158072 100644 --- a/tests/simple/main.cpp +++ b/tests/simple/main.cpp @@ -541,7 +541,7 @@ static void RunTests(Client& client) { int main() { try { const auto localHostEndpoint = ClientOptions() - .SetHost( getEnvOrDefault("CLICKHOUSE_HOST", "192.168.3.163")) + .SetHost( getEnvOrDefault("CLICKHOUSE_HOST", "localhost")) .SetPort( getEnvOrDefault("CLICKHOUSE_PORT", "9000")) .SetUser( getEnvOrDefault("CLICKHOUSE_USER", "default")) .SetPassword( getEnvOrDefault("CLICKHOUSE_PASSWORD", "")) From e7f9d30d06290f92b97cd7540931183d3799f49a Mon Sep 17 00:00:00 2001 From: wangwei <1261385937@qq.com> Date: Fri, 26 Aug 2022 22:12:53 +0800 Subject: [PATCH 3/3] fix compile error --- ut/column_array_ut.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ut/column_array_ut.cpp b/ut/column_array_ut.cpp index bcbbe257..fb75ebee 100644 --- a/ut/column_array_ut.cpp +++ b/ut/column_array_ut.cpp @@ -84,14 +84,14 @@ TEST(ColumnArray, AppendWithMove) { id->Append(std::move(str2)); arr->AppendAsColumnWithMove(id); - ASSERT_EQ(arr->Size(), 2); + ASSERT_EQ(arr->Size(), 2u); auto col = arr->GetAsColumn(0); - ASSERT_EQ(col->Size(), 1); + ASSERT_EQ(col->Size(), 1u); ASSERT_EQ(col->As()->At(0), expect1); col = arr->GetAsColumn(1); - ASSERT_EQ(col->Size(), 1); + ASSERT_EQ(col->Size(), 1u); ASSERT_EQ(col->As()->At(0), expect2); }