Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 62 additions & 1 deletion clickhouse/types/types.cpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
#include "types.h"

#include <cityhash/city.h>

#include <stdexcept>

namespace clickhouse {

Type::Type(const Code code) : code_(code) {}
Type::Type(const Code code)
: code_(code)
, type_unique_id_(0)
{}

std::string Type::GetName() const {
switch (code_) {
Expand Down Expand Up @@ -72,6 +77,62 @@ std::string Type::GetName() const {
return std::string();
}

uint64_t Type::GetTypeUniqueId() const {
// Helper method to optimize equality checks of types with Type::IsEqual(),
// base invariant: types with same names produce same unique id (and hence considered equal).
// As an optimization, full type name is constructed at most once, and only for complex types.
switch (code_) {
case Void:
case Int8:
case Int16:
case Int32:
case Int64:
case Int128:
case UInt8:
case UInt16:
case UInt32:
case UInt64:
case UUID:
case Float32:
case Float64:
case String:
case IPv4:
case IPv6:
case Date:
// For simple types, unique ID is the same as Type::Code
return code_;

case FixedString:
case DateTime:
case DateTime64:
case Array:
case Nullable:
case Tuple:
case Enum8:
case Enum16:
case Decimal:
case Decimal32:
case Decimal64:
case Decimal128:
case LowCardinality: {
// For complex types, exact unique ID depends on nested types and/or parameters,
// the easiest way is to lazy-compute unique ID from name once.
// Here we do not care if multiple threads are computing value simultaneosly since it is both:
// 1. going to be the same
// 2. going to be stored atomically

if (type_unique_id_ == 0) {
const auto name = GetName();
type_unique_id_ = CityHash64WithSeed(name.c_str(), name.size(), code_);
}

return type_unique_id_;
}
}
assert(false);
return 0;
}

TypeRef Type::CreateArray(TypeRef item_type) {
return TypeRef(new ArrayType(item_type));
}
Expand Down
11 changes: 10 additions & 1 deletion clickhouse/types/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "absl/numeric/int128.h"

#include <atomic>
#include <map>
#include <memory>
#include <string>
Expand Down Expand Up @@ -73,7 +74,12 @@ class Type {
std::string GetName() const;

/// Is given type same as current one.
bool IsEqual(const Type& other) const { return this->GetName() == other.GetName(); }
bool IsEqual(const Type& other) const {
return this == &other
// GetTypeUniqueId() is relatively heavy, so avoid calling it when comparing obviously different types.
|| (this->GetCode() == other.GetCode() && this->GetTypeUniqueId() == other.GetTypeUniqueId());
}

bool IsEqual(const TypeRef& other) const { return IsEqual(*other); }

public:
Expand Down Expand Up @@ -113,7 +119,10 @@ class Type {
static TypeRef CreateLowCardinality(TypeRef item_type);

private:
uint64_t GetTypeUniqueId() const;

const Code code_;
mutable std::atomic<uint64_t> type_unique_id_;
};

inline bool operator==(const Type & left, const Type & right) {
Expand Down
56 changes: 56 additions & 0 deletions ut/types_ut.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#include <clickhouse/types/types.h>
#include <clickhouse/columns/factory.h>

#include <gtest/gtest.h>

using namespace clickhouse;
Expand Down Expand Up @@ -70,3 +72,57 @@ TEST(TypesCase, EnumTypesEmpty) {
TEST(TypesCase, DecimalTypes) {
// TODO: implement this test.
}

TEST(TypesCase, IsEqual) {
const std::string type_names[] = {
"UInt8",
"Int8",
"UInt128",
"String",
"FixedString(0)",
"FixedString(10000)",
"DateTime('UTC')",
"DateTime64(3, 'UTC')",
"Decimal(9,3)",
"Decimal(18,3)",
"Enum8()",
"Enum16()",
"Enum8('ONE' = 1)",
"Enum8('ONE' = 1, 'TWO' = 2)",
"Enum16('ONE' = 1, 'TWO' = 2, 'THREE' = 3, 'FOUR' = 4)",
"Nullable(FixedString(10000))",
"Nullable(LowCardinality(FixedString(10000)))",
"Array(Int8)",
"Array(UInt8)",
"Array(String)",
"Array(Nullable(LowCardinality(FixedString(10000))))",
"Array(Enum8('ONE' = 1, 'TWO' = 2))"
"Tuple(String, Int8, Date, DateTime)",
"Nullable(Tuple(String, Int8, Date, DateTime))",
"Array(Nullable(Tuple(String, Int8, Date, DateTime)))",
"Array(Array(Nullable(Tuple(String, Int8, Date, DateTime))))",
"Array(Array(Array(Nullable(Tuple(String, Int8, Date, DateTime)))))",
"Array(Array(Array(Array(Nullable(Tuple(String, Int8, Date, DateTime('UTC')))))))"
"Array(Array(Array(Array(Nullable(Tuple(String, Int8, Date, DateTime('UTC'), Tuple(LowCardinality(String), Enum8('READ'=1, 'WRITE'=0))))))))",
};

// Check that Type::IsEqual returns true only if:
// - same Type instance
// - same Type layout (matching outer type with all nested types and/or parameters)
for (const auto & type_name : type_names) {
SCOPED_TRACE(type_name);
const auto type = clickhouse::CreateColumnByType(type_name)->Type();

// Should be equal to itself
EXPECT_TRUE(type->IsEqual(type));
EXPECT_TRUE(type->IsEqual(*type));

for (const auto & other_type_name : type_names) {
const auto other_type = clickhouse::CreateColumnByType(other_type_name)->Type();

const auto should_be_equal = type_name == other_type_name;
EXPECT_EQ(should_be_equal, type->IsEqual(other_type))
<< "For types: " << type_name << " and " << other_type_name;
}
}
}