From 8056114c081f72df1571023aec61cfa5e66af17d Mon Sep 17 00:00:00 2001 From: Junwang Zhao Date: Tue, 23 Sep 2025 00:27:24 +0800 Subject: [PATCH 1/8] feat: add uuid utils uuid generator for v4 and v7 FromString and ToString utilities --- src/iceberg/CMakeLists.txt | 3 +- src/iceberg/util/uuid_util.cc | 207 ++++++++++++++++++++++++++++++++++ src/iceberg/util/uuid_util.h | 60 ++++++++++ test/CMakeLists.txt | 1 + test/uuid_util_test.cc | 97 ++++++++++++++++ 5 files changed, 367 insertions(+), 1 deletion(-) create mode 100644 src/iceberg/util/uuid_util.cc create mode 100644 src/iceberg/util/uuid_util.h create mode 100644 test/uuid_util_test.cc diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index c8fb07721..3d7f1672d 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -52,7 +52,8 @@ set(ICEBERG_SOURCES util/decimal.cc util/murmurhash3_internal.cc util/timepoint.cc - util/gzip_internal.cc) + util/gzip_internal.cc + util/uuid_util.cc) set(ICEBERG_STATIC_BUILD_INTERFACE_LIBS) set(ICEBERG_SHARED_BUILD_INTERFACE_LIBS) diff --git a/src/iceberg/util/uuid_util.cc b/src/iceberg/util/uuid_util.cc new file mode 100644 index 000000000..304911558 --- /dev/null +++ b/src/iceberg/util/uuid_util.cc @@ -0,0 +1,207 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/util/uuid_util.h" + +#include +#include +#include +#include +#include + +#include "iceberg/result.h" +#include "iceberg/util/int128.h" +#include "iceberg/util/macros.h" + +namespace iceberg { + +std::array UUIDUtils::GenerateUuidV4() { + static std::random_device rd; + static std::mt19937 gen(rd()); + static std::uniform_int_distribution distrib( + std::numeric_limits::min(), std::numeric_limits::max()); + std::array uuid; + + // Generate two random 64-bit integers + uint64_t high_bits = distrib(gen); + uint64_t low_bits = distrib(gen); + + // Combine them into a uint128_t + uint128_t random_128_bit_number = (static_cast(high_bits) << 64) | low_bits; + + // Copy the bytes into the uuid array + std::memcpy(uuid.data(), &random_128_bit_number, 16); + + // Set magic numbers for a "version 4" (pseudorandom) UUID and variant, + // see https://datatracker.ietf.org/doc/html/rfc9562#name-uuid-version-4 + uuid[6] = (uuid[6] & 0x0F) | 0x40; + // Set variant field, top two bits are 1, 0 + uuid[8] = (uuid[8] & 0x3F) | 0x80; + + return uuid; +} + +std::array UUIDUtils::GenerateUuidV7() { + // Get the current time in milliseconds since the Unix epoch + auto now = std::chrono::system_clock::now(); + auto duration_since_epoch = now.time_since_epoch(); + auto unix_ts_ms = + std::chrono::duration_cast(duration_since_epoch).count(); + + return GenerateUuidV7(static_cast(unix_ts_ms)); +} + +std::array UUIDUtils::GenerateUuidV7(uint64_t unix_ts_ms) { + std::array uuid = {}; + + // Set the timestamp (in milliseconds since Unix epoch) + uuid[0] = (unix_ts_ms >> 40) & 0xFF; + uuid[1] = (unix_ts_ms >> 32) & 0xFF; + uuid[2] = (unix_ts_ms >> 24) & 0xFF; + uuid[3] = (unix_ts_ms >> 16) & 0xFF; + uuid[4] = (unix_ts_ms >> 8) & 0xFF; + uuid[5] = unix_ts_ms & 0xFF; + + // Generate random bytes for the remaining fields + static std::random_device rd; + static std::mt19937 gen(rd()); + static std::uniform_int_distribution distrib( + std::numeric_limits::min(), std::numeric_limits::max()); + + // Note: uint8_t is invalid for uniform_int_distribution on Windows + for (size_t i = 6; i < 16; i += 2) { + auto rand = static_cast(distrib(gen)); + uuid[i] = (rand >> 8) & 0xFF; + uuid[i + 1] = rand & 0xFF; + } + + // Set magic numbers for a "version 7" (pseudorandom) UUID and variant, + // see https://www.rfc-editor.org/rfc/rfc9562#name-version-field + uuid[6] = (uuid[6] & 0x0F) | 0x70; + // set variant field, top two bits are 1, 0 + uuid[8] = (uuid[8] & 0x3F) | 0x80; + + return uuid; +} + +namespace { + +constexpr std::array BuildHexTable() { + std::array buf{}; + for (int i = 0; i < 256; i++) { + if (i >= '0' && i <= '9') { + buf[i] = static_cast(i - '0'); + } else if (i >= 'a' && i <= 'f') { + buf[i] = static_cast(i - 'a' + 10); + } else if (i >= 'A' && i <= 'F') { + buf[i] = static_cast(i - 'A' + 10); + } else { + buf[i] = 0xff; + } + } + return buf; +} + +constexpr std::array BuildShl4Table() { + std::array buf{}; + for (int i = 0; i < 256; i++) { + buf[i] = static_cast(i << 4); + } + return buf; +} + +constexpr auto HEX_TABLE = BuildHexTable(); +constexpr auto SHL4_TABLE = BuildShl4Table(); + +// Parse a UUID string without dashes, e.g. "67e5504410b1426f9247bb680e5fe0c8" +inline Result> ParseSimple(std::string_view s) { + ICEBERG_DCHECK(s.size() == 32, "s must be 32 characters long"); + + std::array buf{}; + for (size_t i = 0; i < 16; i++) { + uint8_t h1 = HEX_TABLE[static_cast(s[i * 2])]; + uint8_t h2 = HEX_TABLE[static_cast(s[i * 2 + 1])]; + + if ((h1 | h2) == 0xff) { + return InvalidArgument("Invalid UUID string: {}", s); + } + + buf[i] = static_cast(SHL4_TABLE[h1] | h2); + } + return buf; +} + +// Parse a UUID string with dashes, e.g. "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" +inline Result> ParseHyphenated(std::string_view s) { + ICEBERG_DCHECK(s.size() == 36, "s must be 36 characters long"); + + // Check that dashes are in the right places + if (!(s[8] == '-' && s[13] == '-' && s[18] == '-' && s[23] == '-')) { + return InvalidArgument("Invalid UUID string: {}", s); + } + + constexpr std::array positions = {0, 4, 9, 14, 19, 24, 28, 32}; + std::array buf{}; + + for (size_t j = 0; j < 8; j++) { + size_t i = positions[j]; + uint8_t h1 = HEX_TABLE[static_cast(s[i])]; + uint8_t h2 = HEX_TABLE[static_cast(s[i + 1])]; + uint8_t h3 = HEX_TABLE[static_cast(s[i + 2])]; + uint8_t h4 = HEX_TABLE[static_cast(s[i + 3])]; + + if ((h1 | h2 | h3 | h4) == 0xff) { + return InvalidArgument("Invalid UUID string: {}", s); + } + + buf[j * 2] = static_cast(SHL4_TABLE[h1] | h2); + buf[j * 2 + 1] = static_cast(SHL4_TABLE[h3] | h4); + } + + return buf; +} + +} // namespace + +Result> UUIDUtils::FromString(std::string_view str) { + if (str.size() == 32) { + return ParseSimple(str); + } else if (str.size() == 36) { + return ParseHyphenated(str); + } else { + return InvalidArgument("Invalid UUID string: {}", str); + } +} + +std::string UUIDUtils::ToString(std::span uuid) { + static const char* hex_chars = "0123456789abcdef"; + ICEBERG_DCHECK(uuid.size() == 16, "uuid must be 16 bytes long"); + std::string str(36, '-'); + + for (size_t i = 0; i < 16; i++) { + str[i * 2 + (i >= 4 ? 1 : 0) + (i >= 6 ? 1 : 0) + (i >= 8 ? 1 : 0) + + (i >= 10 ? 1 : 0)] = hex_chars[(uuid[i] >> 4) & 0x0F]; + str[i * 2 + 1 + (i >= 4 ? 1 : 0) + (i >= 6 ? 1 : 0) + (i >= 8 ? 1 : 0) + + (i >= 10 ? 1 : 0)] = hex_chars[uuid[i] & 0x0F]; + } + + return str; +} + +} // namespace iceberg diff --git a/src/iceberg/util/uuid_util.h b/src/iceberg/util/uuid_util.h new file mode 100644 index 000000000..99460118b --- /dev/null +++ b/src/iceberg/util/uuid_util.h @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#include "iceberg/iceberg_export.h" +#include "iceberg/result.h" + +/// \file iceberg/util/uuid_util.h +/// \brief UUID (Universally Unique Identifier) utilities. + +namespace iceberg { + +class ICEBERG_EXPORT UUIDUtils { + public: + /// \brief Generate a random UUID (version 4). + static std::array GenerateUuidV4(); + + /// \brief Generate UUID version 7 per RFC 9562, with the current timestamp. + static std::array GenerateUuidV7(); + + /// \brief Generate UUID version 7 per RFC 9562, with the given timestamp. + /// + /// UUID version 7 consists of a Unix timestamp in milliseconds (48 bits) and + /// 74 random bits, excluding the required version and variant bits. + /// + /// \param unix_ts_ms number of milliseconds since start of the UNIX epoch + /// + /// \note unix_ts_ms cannot be negative per RFC. + static std::array GenerateUuidV7(uint64_t unix_ts_ms); + + /// \brief Create a UUID from a string in standard format. + static Result> FromString(std::string_view str); + + /// \brief Convert a UUID to a string in standard format. + static std::string ToString(std::span uuid); +}; + +} // namespace iceberg diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index d70b4f85b..ed4c2bd1b 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -91,6 +91,7 @@ add_iceberg_test(util_test endian_test.cc formatter_test.cc string_util_test.cc + uuid_util_test.cc visit_type_test.cc) add_iceberg_test(roaring_test SOURCES roaring_test.cc) diff --git a/test/uuid_util_test.cc b/test/uuid_util_test.cc new file mode 100644 index 000000000..63dd5eca3 --- /dev/null +++ b/test/uuid_util_test.cc @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/util/uuid_util.h" + +#include + +#include + +#include "matchers.h" + +namespace iceberg { + +TEST(UUIDUtilTest, GenerateV4) { + auto uuid = UUIDUtils::GenerateUuidV4(); + // just ensure it runs and produces a value + EXPECT_EQ(uuid.size(), 16); + // Version 4 UUIDs have the version number (4) in the 7th byte + EXPECT_EQ((uuid[6] >> 4) & 0x0F, 4); + // Variant is in the 9th byte, the two most significant bits should be 10 + EXPECT_EQ((uuid[8] >> 6) & 0x03, 0b10); +} + +TEST(UUIDUtilTest, GenerateV7) { + auto uuid = UUIDUtils::GenerateUuidV7(); + // just ensure it runs and produces a value + EXPECT_EQ(uuid.size(), 16); + // Version 7 UUIDs have the version number (7) in the 7th byte + EXPECT_EQ((uuid[6] >> 4) & 0x0F, 7); + // Variant is in the 9th byte, the two most significant bits should be 10 + EXPECT_EQ((uuid[8] >> 6) & 0x03, 0b10); +} + +TEST(UUIDUtilTest, FromString) { + std::vector uuid_strings = { + "123e4567-e89b-12d3-a456-426614174000", + "550e8400-e29b-41d4-a716-446655440000", + "f47ac10b-58cc-4372-a567-0e02b2c3d479", + }; + + for (const auto& uuid_str : uuid_strings) { + auto result = UUIDUtils::FromString(uuid_str); + EXPECT_THAT(result, IsOk()); + auto uuid = result.value(); + EXPECT_EQ(UUIDUtils::ToString(uuid), uuid_str); + } + + std::vector> uuid_string_pairs = { + {"123e4567e89b12d3a456426614174000", "123e4567-e89b-12d3-a456-426614174000"}, + {"550E8400E29B41D4A716446655440000", "550e8400-e29b-41d4-a716-446655440000"}, + {"F47AC10B58CC4372A5670E02B2C3D479", "f47ac10b-58cc-4372-a567-0e02b2c3d479"}, + }; + + for (const auto& [input_str, expected_str] : uuid_string_pairs) { + auto result = UUIDUtils::FromString(input_str); + EXPECT_THAT(result, IsOk()); + auto uuid = result.value(); + EXPECT_EQ(UUIDUtils::ToString(uuid), expected_str); + } +} + +TEST(UUIDUtilTest, FromStringInvalid) { + std::vector invalid_uuid_strings = { + "123e4567-e89b-12d3-a456-42661417400", // too short + "123e4567-e89b-12d3-a456-4266141740000", // too long + "g23e4567-e89b-12d3-a456-426614174000", // invalid character + "123e4567e89b12d3a45642661417400", // too short without dashes + "123e4567e89b12d3a4564266141740000", // too long without dashes + "550e8400-e29b-41d4-a716-44665544000Z", // invalid character at end + "550e8400-e29b-41d4-a716-44665544000-", // invalid character at end + "550e8400-e29b-41d4-a716-4466554400", // too short + }; + + for (const auto& uuid_str : invalid_uuid_strings) { + auto result = UUIDUtils::FromString(uuid_str); + EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT(result, HasErrorMessage("Invalid UUID string")); + } +} + +} // namespace iceberg From bf7495ed709704d51772d50dd138ec314da61545 Mon Sep 17 00:00:00 2001 From: Junwang Zhao Date: Tue, 23 Sep 2025 22:33:47 +0800 Subject: [PATCH 2/8] fix: use std::format to simplify uuid ToString --- src/iceberg/util/uuid_util.cc | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/iceberg/util/uuid_util.cc b/src/iceberg/util/uuid_util.cc index 304911558..c0b70b27e 100644 --- a/src/iceberg/util/uuid_util.cc +++ b/src/iceberg/util/uuid_util.cc @@ -192,16 +192,12 @@ Result> UUIDUtils::FromString(std::string_view str) { std::string UUIDUtils::ToString(std::span uuid) { static const char* hex_chars = "0123456789abcdef"; ICEBERG_DCHECK(uuid.size() == 16, "uuid must be 16 bytes long"); - std::string str(36, '-'); - for (size_t i = 0; i < 16; i++) { - str[i * 2 + (i >= 4 ? 1 : 0) + (i >= 6 ? 1 : 0) + (i >= 8 ? 1 : 0) + - (i >= 10 ? 1 : 0)] = hex_chars[(uuid[i] >> 4) & 0x0F]; - str[i * 2 + 1 + (i >= 4 ? 1 : 0) + (i >= 6 ? 1 : 0) + (i >= 8 ? 1 : 0) + - (i >= 10 ? 1 : 0)] = hex_chars[uuid[i] & 0x0F]; - } - - return str; + return std::format( + "{:02x}{:02x}{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}{:02x}" + "{:02x}{:02x}{:02x}", + uuid[0], uuid[1], uuid[2], uuid[3], uuid[4], uuid[5], uuid[6], uuid[7], uuid[8], + uuid[9], uuid[10], uuid[11], uuid[12], uuid[13], uuid[14], uuid[15]); } } // namespace iceberg From 7ca5ee82cbb88564c5ee638604c72d6314a5e56b Mon Sep 17 00:00:00 2001 From: Junwang Zhao Date: Tue, 23 Sep 2025 22:41:52 +0800 Subject: [PATCH 3/8] fix: use ICEBERG_CHECK for uuid size check --- src/iceberg/util/uuid_util.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/iceberg/util/uuid_util.cc b/src/iceberg/util/uuid_util.cc index c0b70b27e..9db4de8da 100644 --- a/src/iceberg/util/uuid_util.cc +++ b/src/iceberg/util/uuid_util.cc @@ -25,6 +25,7 @@ #include #include +#include "iceberg/exception.h" #include "iceberg/result.h" #include "iceberg/util/int128.h" #include "iceberg/util/macros.h" @@ -191,7 +192,7 @@ Result> UUIDUtils::FromString(std::string_view str) { std::string UUIDUtils::ToString(std::span uuid) { static const char* hex_chars = "0123456789abcdef"; - ICEBERG_DCHECK(uuid.size() == 16, "uuid must be 16 bytes long"); + ICEBERG_CHECK(uuid.size() == 16, "uuid must be 16 bytes long"); return std::format( "{:02x}{:02x}{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}{:02x}" From 1274056f11671cf341adc49628995a8da3ec2dc2 Mon Sep 17 00:00:00 2001 From: Junwang Zhao Date: Wed, 24 Sep 2025 22:56:47 +0800 Subject: [PATCH 4/8] fix: make Uuid a wrapper class --- src/iceberg/CMakeLists.txt | 2 +- src/iceberg/util/{uuid_util.cc => uuid.cc} | 83 +++++++++++++--------- src/iceberg/util/{uuid_util.h => uuid.h} | 39 +++++++--- test/CMakeLists.txt | 2 +- test/{uuid_util_test.cc => uuid_test.cc} | 40 ++++++++--- 5 files changed, 112 insertions(+), 54 deletions(-) rename src/iceberg/util/{uuid_util.cc => uuid.cc} (70%) rename src/iceberg/util/{uuid_util.h => uuid.h} (62%) rename test/{uuid_util_test.cc => uuid_test.cc} (69%) diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index 3d7f1672d..747d4c40e 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -53,7 +53,7 @@ set(ICEBERG_SOURCES util/murmurhash3_internal.cc util/timepoint.cc util/gzip_internal.cc - util/uuid_util.cc) + util/uuid.cc) set(ICEBERG_STATIC_BUILD_INTERFACE_LIBS) set(ICEBERG_SHARED_BUILD_INTERFACE_LIBS) diff --git a/src/iceberg/util/uuid_util.cc b/src/iceberg/util/uuid.cc similarity index 70% rename from src/iceberg/util/uuid_util.cc rename to src/iceberg/util/uuid.cc index 9db4de8da..ed00dfff8 100644 --- a/src/iceberg/util/uuid_util.cc +++ b/src/iceberg/util/uuid.cc @@ -17,7 +17,7 @@ * under the License. */ -#include "iceberg/util/uuid_util.h" +#include "iceberg/util/uuid.h" #include #include @@ -32,7 +32,9 @@ namespace iceberg { -std::array UUIDUtils::GenerateUuidV4() { +Uuid::Uuid(std::array data) : data_(std::move(data)) {} + +Uuid Uuid::GenerateV4() { static std::random_device rd; static std::mt19937 gen(rd()); static std::uniform_int_distribution distrib( @@ -55,20 +57,20 @@ std::array UUIDUtils::GenerateUuidV4() { // Set variant field, top two bits are 1, 0 uuid[8] = (uuid[8] & 0x3F) | 0x80; - return uuid; + return Uuid(std::move(uuid)); } -std::array UUIDUtils::GenerateUuidV7() { +Uuid Uuid::GenerateV7() { // Get the current time in milliseconds since the Unix epoch auto now = std::chrono::system_clock::now(); auto duration_since_epoch = now.time_since_epoch(); auto unix_ts_ms = std::chrono::duration_cast(duration_since_epoch).count(); - return GenerateUuidV7(static_cast(unix_ts_ms)); + return GenerateV7(static_cast(unix_ts_ms)); } -std::array UUIDUtils::GenerateUuidV7(uint64_t unix_ts_ms) { +Uuid Uuid::GenerateV7(uint64_t unix_ts_ms) { std::array uuid = {}; // Set the timestamp (in milliseconds since Unix epoch) @@ -98,14 +100,14 @@ std::array UUIDUtils::GenerateUuidV7(uint64_t unix_ts_ms) { // set variant field, top two bits are 1, 0 uuid[8] = (uuid[8] & 0x3F) | 0x80; - return uuid; + return Uuid(std::move(uuid)); } namespace { constexpr std::array BuildHexTable() { std::array buf{}; - for (int i = 0; i < 256; i++) { + for (int32_t i = 0; i < 256; i++) { if (i >= '0' && i <= '9') { buf[i] = static_cast(i - '0'); } else if (i >= 'a' && i <= 'f') { @@ -121,35 +123,35 @@ constexpr std::array BuildHexTable() { constexpr std::array BuildShl4Table() { std::array buf{}; - for (int i = 0; i < 256; i++) { + for (int32_t i = 0; i < 256; i++) { buf[i] = static_cast(i << 4); } return buf; } -constexpr auto HEX_TABLE = BuildHexTable(); -constexpr auto SHL4_TABLE = BuildShl4Table(); +constexpr auto kHexTable = BuildHexTable(); +constexpr auto kShl4Table = BuildShl4Table(); // Parse a UUID string without dashes, e.g. "67e5504410b1426f9247bb680e5fe0c8" -inline Result> ParseSimple(std::string_view s) { +inline Result ParseSimple(std::string_view s) { ICEBERG_DCHECK(s.size() == 32, "s must be 32 characters long"); - std::array buf{}; + std::array uuid{}; for (size_t i = 0; i < 16; i++) { - uint8_t h1 = HEX_TABLE[static_cast(s[i * 2])]; - uint8_t h2 = HEX_TABLE[static_cast(s[i * 2 + 1])]; + uint8_t h1 = kHexTable[static_cast(s[i * 2])]; + uint8_t h2 = kHexTable[static_cast(s[i * 2 + 1])]; - if ((h1 | h2) == 0xff) { + if ((h1 | h2) == 0xFF) { return InvalidArgument("Invalid UUID string: {}", s); } - buf[i] = static_cast(SHL4_TABLE[h1] | h2); + uuid[i] = static_cast(kShl4Table[h1] | h2); } - return buf; + return Uuid(std::move(uuid)); } // Parse a UUID string with dashes, e.g. "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -inline Result> ParseHyphenated(std::string_view s) { +inline Result ParseHyphenated(std::string_view s) { ICEBERG_DCHECK(s.size() == 36, "s must be 36 characters long"); // Check that dashes are in the right places @@ -158,29 +160,29 @@ inline Result> ParseHyphenated(std::string_view s) { } constexpr std::array positions = {0, 4, 9, 14, 19, 24, 28, 32}; - std::array buf{}; + std::array uuid{}; for (size_t j = 0; j < 8; j++) { size_t i = positions[j]; - uint8_t h1 = HEX_TABLE[static_cast(s[i])]; - uint8_t h2 = HEX_TABLE[static_cast(s[i + 1])]; - uint8_t h3 = HEX_TABLE[static_cast(s[i + 2])]; - uint8_t h4 = HEX_TABLE[static_cast(s[i + 3])]; + uint8_t h1 = kHexTable[static_cast(s[i])]; + uint8_t h2 = kHexTable[static_cast(s[i + 1])]; + uint8_t h3 = kHexTable[static_cast(s[i + 2])]; + uint8_t h4 = kHexTable[static_cast(s[i + 3])]; - if ((h1 | h2 | h3 | h4) == 0xff) { + if ((h1 | h2 | h3 | h4) == 0xFF) { return InvalidArgument("Invalid UUID string: {}", s); } - buf[j * 2] = static_cast(SHL4_TABLE[h1] | h2); - buf[j * 2 + 1] = static_cast(SHL4_TABLE[h3] | h4); + uuid[j * 2] = static_cast(kShl4Table[h1] | h2); + uuid[j * 2 + 1] = static_cast(kShl4Table[h3] | h4); } - return buf; + return Uuid(std::move(uuid)); } } // namespace -Result> UUIDUtils::FromString(std::string_view str) { +Result Uuid::FromString(std::string_view str) { if (str.size() == 32) { return ParseSimple(str); } else if (str.size() == 36) { @@ -190,15 +192,30 @@ Result> UUIDUtils::FromString(std::string_view str) { } } -std::string UUIDUtils::ToString(std::span uuid) { +Result Uuid::FromBytes(std::span bytes) { + if (bytes.size() != kUuidSize) [[unlikely]] { + return InvalidArgument("UUID byte array must be exactly {} bytes, was {}", kUuidSize, + bytes.size()); + } + std::array data; + std::memcpy(data.data(), bytes.data(), kUuidSize); + return Uuid(std::move(data)); +} + +uint8_t Uuid::operator[](size_t index) const { + ICEBERG_CHECK(index < kUuidSize, "UUID index out of range: {}", index); + return data_[index]; +} + +std::string Uuid::ToString() const { static const char* hex_chars = "0123456789abcdef"; - ICEBERG_CHECK(uuid.size() == 16, "uuid must be 16 bytes long"); return std::format( "{:02x}{:02x}{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}{:02x}" "{:02x}{:02x}{:02x}", - uuid[0], uuid[1], uuid[2], uuid[3], uuid[4], uuid[5], uuid[6], uuid[7], uuid[8], - uuid[9], uuid[10], uuid[11], uuid[12], uuid[13], uuid[14], uuid[15]); + data_[0], data_[1], data_[2], data_[3], data_[4], data_[5], data_[6], data_[7], + data_[8], data_[9], data_[10], data_[11], data_[12], data_[13], data_[14], + data_[15]); } } // namespace iceberg diff --git a/src/iceberg/util/uuid_util.h b/src/iceberg/util/uuid.h similarity index 62% rename from src/iceberg/util/uuid_util.h rename to src/iceberg/util/uuid.h index 99460118b..d2bed5414 100644 --- a/src/iceberg/util/uuid_util.h +++ b/src/iceberg/util/uuid.h @@ -27,18 +27,23 @@ #include "iceberg/iceberg_export.h" #include "iceberg/result.h" -/// \file iceberg/util/uuid_util.h -/// \brief UUID (Universally Unique Identifier) utilities. +/// \file iceberg/util/uuid.h +/// \brief UUID (Universally Unique Identifier) representation. namespace iceberg { -class ICEBERG_EXPORT UUIDUtils { +class ICEBERG_EXPORT Uuid { public: + Uuid() = delete; + constexpr static size_t kUuidSize = 16; + + explicit Uuid(std::array data); + /// \brief Generate a random UUID (version 4). - static std::array GenerateUuidV4(); + static Uuid GenerateV4(); /// \brief Generate UUID version 7 per RFC 9562, with the current timestamp. - static std::array GenerateUuidV7(); + static Uuid GenerateV7(); /// \brief Generate UUID version 7 per RFC 9562, with the given timestamp. /// @@ -48,13 +53,29 @@ class ICEBERG_EXPORT UUIDUtils { /// \param unix_ts_ms number of milliseconds since start of the UNIX epoch /// /// \note unix_ts_ms cannot be negative per RFC. - static std::array GenerateUuidV7(uint64_t unix_ts_ms); + static Uuid GenerateV7(uint64_t unix_ts_ms); /// \brief Create a UUID from a string in standard format. - static Result> FromString(std::string_view str); + static Result FromString(std::string_view str); + + /// \brief Create a UUID from a 16-byte array. + static Result FromBytes(std::span bytes); + + /// \brief Get the raw bytes of the UUID. + std::span bytes() const { return data_; } + + /// \brief Access individual bytes of the UUID. + uint8_t operator[](size_t index) const; + + /// \brief Convert the UUID to a string in standard format. + std::string ToString() const; + + friend bool operator==(const Uuid& lhs, const Uuid& rhs) { + return lhs.data_ == rhs.data_; + } - /// \brief Convert a UUID to a string in standard format. - static std::string ToString(std::span uuid); + private: + std::array data_; }; } // namespace iceberg diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ed4c2bd1b..3c7473522 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -91,7 +91,7 @@ add_iceberg_test(util_test endian_test.cc formatter_test.cc string_util_test.cc - uuid_util_test.cc + uuid_test.cc visit_type_test.cc) add_iceberg_test(roaring_test SOURCES roaring_test.cc) diff --git a/test/uuid_util_test.cc b/test/uuid_test.cc similarity index 69% rename from test/uuid_util_test.cc rename to test/uuid_test.cc index 63dd5eca3..13ddb46d8 100644 --- a/test/uuid_util_test.cc +++ b/test/uuid_test.cc @@ -17,7 +17,7 @@ * under the License. */ -#include "iceberg/util/uuid_util.h" +#include "iceberg/util/uuid.h" #include @@ -28,9 +28,9 @@ namespace iceberg { TEST(UUIDUtilTest, GenerateV4) { - auto uuid = UUIDUtils::GenerateUuidV4(); + auto uuid = Uuid::GenerateV4(); // just ensure it runs and produces a value - EXPECT_EQ(uuid.size(), 16); + EXPECT_EQ(uuid.bytes().size(), Uuid::kUuidSize); // Version 4 UUIDs have the version number (4) in the 7th byte EXPECT_EQ((uuid[6] >> 4) & 0x0F, 4); // Variant is in the 9th byte, the two most significant bits should be 10 @@ -38,9 +38,9 @@ TEST(UUIDUtilTest, GenerateV4) { } TEST(UUIDUtilTest, GenerateV7) { - auto uuid = UUIDUtils::GenerateUuidV7(); + auto uuid = Uuid::GenerateV7(); // just ensure it runs and produces a value - EXPECT_EQ(uuid.size(), 16); + EXPECT_EQ(uuid.bytes().size(), 16); // Version 7 UUIDs have the version number (7) in the 7th byte EXPECT_EQ((uuid[6] >> 4) & 0x0F, 7); // Variant is in the 9th byte, the two most significant bits should be 10 @@ -55,10 +55,10 @@ TEST(UUIDUtilTest, FromString) { }; for (const auto& uuid_str : uuid_strings) { - auto result = UUIDUtils::FromString(uuid_str); + auto result = Uuid::FromString(uuid_str); EXPECT_THAT(result, IsOk()); auto uuid = result.value(); - EXPECT_EQ(UUIDUtils::ToString(uuid), uuid_str); + EXPECT_EQ(uuid.ToString(), uuid_str); } std::vector> uuid_string_pairs = { @@ -68,10 +68,10 @@ TEST(UUIDUtilTest, FromString) { }; for (const auto& [input_str, expected_str] : uuid_string_pairs) { - auto result = UUIDUtils::FromString(input_str); + auto result = Uuid::FromString(input_str); EXPECT_THAT(result, IsOk()); auto uuid = result.value(); - EXPECT_EQ(UUIDUtils::ToString(uuid), expected_str); + EXPECT_EQ(uuid.ToString(), expected_str); } } @@ -88,10 +88,30 @@ TEST(UUIDUtilTest, FromStringInvalid) { }; for (const auto& uuid_str : invalid_uuid_strings) { - auto result = UUIDUtils::FromString(uuid_str); + auto result = Uuid::FromString(uuid_str); EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument)); EXPECT_THAT(result, HasErrorMessage("Invalid UUID string")); } } +TEST(UUIDUtilTest, FromBytes) { + std::array bytes = {0x12, 0x3e, 0x45, 0x67, 0xe8, 0x9b, + 0x12, 0xd3, 0xa4, 0x56, 0x42, 0x66, + 0x14, 0x17, 0x40, 0x00}; + auto result = Uuid::FromBytes(bytes); + EXPECT_THAT(result, IsOk()); + auto uuid = result.value(); + EXPECT_EQ(uuid.ToString(), "123e4567-e89b-12d3-a456-426614174000"); + EXPECT_EQ(uuid, Uuid(bytes)); +} + +TEST(UUIDUtilTest, FromBytesInvalid) { + std::array short_bytes = {0x12, 0x3e, 0x45, 0x67, 0xe8, + 0x9b, 0x12, 0xd3, 0xa4, 0x56, + 0x42, 0x66, 0x14, 0x17, 0x40}; + auto result = Uuid::FromBytes(short_bytes); + EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT(result, HasErrorMessage("UUID byte array must be exactly 16 bytes")); +} + } // namespace iceberg From b435dc9bbb3cdabceb551e5640f9e2a9c8f438a8 Mon Sep 17 00:00:00 2001 From: Junwang Zhao Date: Thu, 25 Sep 2025 22:59:23 +0800 Subject: [PATCH 5/8] fix: review comments --- src/iceberg/util/uuid.cc | 156 +++++++++++++++++++-------------------- src/iceberg/util/uuid.h | 12 ++- test/uuid_test.cc | 14 ++-- 3 files changed, 94 insertions(+), 88 deletions(-) diff --git a/src/iceberg/util/uuid.cc b/src/iceberg/util/uuid.cc index ed00dfff8..c4895184b 100644 --- a/src/iceberg/util/uuid.cc +++ b/src/iceberg/util/uuid.cc @@ -32,77 +32,6 @@ namespace iceberg { -Uuid::Uuid(std::array data) : data_(std::move(data)) {} - -Uuid Uuid::GenerateV4() { - static std::random_device rd; - static std::mt19937 gen(rd()); - static std::uniform_int_distribution distrib( - std::numeric_limits::min(), std::numeric_limits::max()); - std::array uuid; - - // Generate two random 64-bit integers - uint64_t high_bits = distrib(gen); - uint64_t low_bits = distrib(gen); - - // Combine them into a uint128_t - uint128_t random_128_bit_number = (static_cast(high_bits) << 64) | low_bits; - - // Copy the bytes into the uuid array - std::memcpy(uuid.data(), &random_128_bit_number, 16); - - // Set magic numbers for a "version 4" (pseudorandom) UUID and variant, - // see https://datatracker.ietf.org/doc/html/rfc9562#name-uuid-version-4 - uuid[6] = (uuid[6] & 0x0F) | 0x40; - // Set variant field, top two bits are 1, 0 - uuid[8] = (uuid[8] & 0x3F) | 0x80; - - return Uuid(std::move(uuid)); -} - -Uuid Uuid::GenerateV7() { - // Get the current time in milliseconds since the Unix epoch - auto now = std::chrono::system_clock::now(); - auto duration_since_epoch = now.time_since_epoch(); - auto unix_ts_ms = - std::chrono::duration_cast(duration_since_epoch).count(); - - return GenerateV7(static_cast(unix_ts_ms)); -} - -Uuid Uuid::GenerateV7(uint64_t unix_ts_ms) { - std::array uuid = {}; - - // Set the timestamp (in milliseconds since Unix epoch) - uuid[0] = (unix_ts_ms >> 40) & 0xFF; - uuid[1] = (unix_ts_ms >> 32) & 0xFF; - uuid[2] = (unix_ts_ms >> 24) & 0xFF; - uuid[3] = (unix_ts_ms >> 16) & 0xFF; - uuid[4] = (unix_ts_ms >> 8) & 0xFF; - uuid[5] = unix_ts_ms & 0xFF; - - // Generate random bytes for the remaining fields - static std::random_device rd; - static std::mt19937 gen(rd()); - static std::uniform_int_distribution distrib( - std::numeric_limits::min(), std::numeric_limits::max()); - - // Note: uint8_t is invalid for uniform_int_distribution on Windows - for (size_t i = 6; i < 16; i += 2) { - auto rand = static_cast(distrib(gen)); - uuid[i] = (rand >> 8) & 0xFF; - uuid[i + 1] = rand & 0xFF; - } - - // Set magic numbers for a "version 7" (pseudorandom) UUID and variant, - // see https://www.rfc-editor.org/rfc/rfc9562#name-version-field - uuid[6] = (uuid[6] & 0x0F) | 0x70; - // set variant field, top two bits are 1, 0 - uuid[8] = (uuid[8] & 0x3F) | 0x80; - - return Uuid(std::move(uuid)); -} - namespace { constexpr std::array BuildHexTable() { @@ -182,6 +111,77 @@ inline Result ParseHyphenated(std::string_view s) { } // namespace +Uuid::Uuid(std::array data) : data_(std::move(data)) {} + +Uuid Uuid::GenerateV4() { + static std::random_device rd; + static std::mt19937 gen(rd()); + static std::uniform_int_distribution distrib( + std::numeric_limits::min(), std::numeric_limits::max()); + std::array uuid; + + // Generate two random 64-bit integers + uint64_t high_bits = distrib(gen); + uint64_t low_bits = distrib(gen); + + // Combine them into a uint128_t + uint128_t random_128_bit_number = (static_cast(high_bits) << 64) | low_bits; + + // Copy the bytes into the uuid array + std::memcpy(uuid.data(), &random_128_bit_number, 16); + + // Set magic numbers for a "version 4" (pseudorandom) UUID and variant, + // see https://datatracker.ietf.org/doc/html/rfc9562#name-uuid-version-4 + uuid[6] = (uuid[6] & 0x0F) | 0x40; + // Set variant field, top two bits are 1, 0 + uuid[8] = (uuid[8] & 0x3F) | 0x80; + + return Uuid(std::move(uuid)); +} + +Uuid Uuid::GenerateV7() { + // Get the current time in milliseconds since the Unix epoch + auto now = std::chrono::system_clock::now(); + auto duration_since_epoch = now.time_since_epoch(); + auto unix_ts_ms = + std::chrono::duration_cast(duration_since_epoch).count(); + + return GenerateV7(static_cast(unix_ts_ms)); +} + +Uuid Uuid::GenerateV7(uint64_t unix_ts_ms) { + std::array uuid = {}; + + // Set the timestamp (in milliseconds since Unix epoch) + uuid[0] = (unix_ts_ms >> 40) & 0xFF; + uuid[1] = (unix_ts_ms >> 32) & 0xFF; + uuid[2] = (unix_ts_ms >> 24) & 0xFF; + uuid[3] = (unix_ts_ms >> 16) & 0xFF; + uuid[4] = (unix_ts_ms >> 8) & 0xFF; + uuid[5] = unix_ts_ms & 0xFF; + + // Generate random bytes for the remaining fields + static std::random_device rd; + static std::mt19937 gen(rd()); + static std::uniform_int_distribution distrib( + std::numeric_limits::min(), std::numeric_limits::max()); + + // Note: uint8_t is invalid for uniform_int_distribution on Windows + for (size_t i = 6; i < 16; i += 2) { + auto rand = static_cast(distrib(gen)); + uuid[i] = (rand >> 8) & 0xFF; + uuid[i + 1] = rand & 0xFF; + } + + // Set magic numbers for a "version 7" (pseudorandom) UUID and variant, + // see https://www.rfc-editor.org/rfc/rfc9562#name-version-field + uuid[6] = (uuid[6] & 0x0F) | 0x70; + // set variant field, top two bits are 1, 0 + uuid[8] = (uuid[8] & 0x3F) | 0x80; + + return Uuid(std::move(uuid)); +} + Result Uuid::FromString(std::string_view str) { if (str.size() == 32) { return ParseSimple(str); @@ -193,23 +193,23 @@ Result Uuid::FromString(std::string_view str) { } Result Uuid::FromBytes(std::span bytes) { - if (bytes.size() != kUuidSize) [[unlikely]] { - return InvalidArgument("UUID byte array must be exactly {} bytes, was {}", kUuidSize, + if (bytes.size() != kLength) [[unlikely]] { + return InvalidArgument("UUID byte array must be exactly {} bytes, was {}", kLength, bytes.size()); } - std::array data; - std::memcpy(data.data(), bytes.data(), kUuidSize); + std::array data; + std::memcpy(data.data(), bytes.data(), kLength); return Uuid(std::move(data)); } uint8_t Uuid::operator[](size_t index) const { - ICEBERG_CHECK(index < kUuidSize, "UUID index out of range: {}", index); + ICEBERG_CHECK(index < kLength, "UUID index out of range: {}", index); return data_[index]; } -std::string Uuid::ToString() const { - static const char* hex_chars = "0123456789abcdef"; +std::array Uuid::ToBigEndianBytes() const { return data_; } +std::string Uuid::ToString() const { return std::format( "{:02x}{:02x}{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}{:02x}" "{:02x}{:02x}{:02x}", diff --git a/src/iceberg/util/uuid.h b/src/iceberg/util/uuid.h index d2bed5414..6932cfb34 100644 --- a/src/iceberg/util/uuid.h +++ b/src/iceberg/util/uuid.h @@ -35,9 +35,9 @@ namespace iceberg { class ICEBERG_EXPORT Uuid { public: Uuid() = delete; - constexpr static size_t kUuidSize = 16; + constexpr static size_t kLength = 16; - explicit Uuid(std::array data); + explicit Uuid(std::array data); /// \brief Generate a random UUID (version 4). static Uuid GenerateV4(); @@ -61,10 +61,16 @@ class ICEBERG_EXPORT Uuid { /// \brief Create a UUID from a 16-byte array. static Result FromBytes(std::span bytes); + /// \brief Get the raw bytes of the UUID in big-endian order. + std::array ToBigEndianBytes() const; + /// \brief Get the raw bytes of the UUID. std::span bytes() const { return data_; } /// \brief Access individual bytes of the UUID. + /// \param index The index of the byte to access (0-15). + /// \return The byte at the specified index. + /// \throw IcebergError if index is out of bounds. uint8_t operator[](size_t index) const; /// \brief Convert the UUID to a string in standard format. @@ -75,7 +81,7 @@ class ICEBERG_EXPORT Uuid { } private: - std::array data_; + std::array data_; }; } // namespace iceberg diff --git a/test/uuid_test.cc b/test/uuid_test.cc index 13ddb46d8..3dbe573b2 100644 --- a/test/uuid_test.cc +++ b/test/uuid_test.cc @@ -30,7 +30,7 @@ namespace iceberg { TEST(UUIDUtilTest, GenerateV4) { auto uuid = Uuid::GenerateV4(); // just ensure it runs and produces a value - EXPECT_EQ(uuid.bytes().size(), Uuid::kUuidSize); + EXPECT_EQ(uuid.bytes().size(), Uuid::kLength); // Version 4 UUIDs have the version number (4) in the 7th byte EXPECT_EQ((uuid[6] >> 4) & 0x0F, 4); // Variant is in the 9th byte, the two most significant bits should be 10 @@ -95,9 +95,9 @@ TEST(UUIDUtilTest, FromStringInvalid) { } TEST(UUIDUtilTest, FromBytes) { - std::array bytes = {0x12, 0x3e, 0x45, 0x67, 0xe8, 0x9b, - 0x12, 0xd3, 0xa4, 0x56, 0x42, 0x66, - 0x14, 0x17, 0x40, 0x00}; + std::array bytes = {0x12, 0x3e, 0x45, 0x67, 0xe8, 0x9b, + 0x12, 0xd3, 0xa4, 0x56, 0x42, 0x66, + 0x14, 0x17, 0x40, 0x00}; auto result = Uuid::FromBytes(bytes); EXPECT_THAT(result, IsOk()); auto uuid = result.value(); @@ -106,9 +106,9 @@ TEST(UUIDUtilTest, FromBytes) { } TEST(UUIDUtilTest, FromBytesInvalid) { - std::array short_bytes = {0x12, 0x3e, 0x45, 0x67, 0xe8, - 0x9b, 0x12, 0xd3, 0xa4, 0x56, - 0x42, 0x66, 0x14, 0x17, 0x40}; + std::array short_bytes = {0x12, 0x3e, 0x45, 0x67, 0xe8, + 0x9b, 0x12, 0xd3, 0xa4, 0x56, + 0x42, 0x66, 0x14, 0x17, 0x40}; auto result = Uuid::FromBytes(short_bytes); EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument)); EXPECT_THAT(result, HasErrorMessage("UUID byte array must be exactly 16 bytes")); From bc6546e1f513bfec77cb756a7952bac28e5fef06 Mon Sep 17 00:00:00 2001 From: Junwang Zhao Date: Fri, 26 Sep 2025 09:24:05 +0800 Subject: [PATCH 6/8] fix: review comment --- src/iceberg/util/uuid.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/iceberg/util/uuid.cc b/src/iceberg/util/uuid.cc index c4895184b..0e2726ccf 100644 --- a/src/iceberg/util/uuid.cc +++ b/src/iceberg/util/uuid.cc @@ -44,7 +44,7 @@ constexpr std::array BuildHexTable() { } else if (i >= 'A' && i <= 'F') { buf[i] = static_cast(i - 'A' + 10); } else { - buf[i] = 0xff; + buf[i] = 0xFF; } } return buf; From 79f00bc1ae3e38f225e0a4d924f45a7b8abea06f Mon Sep 17 00:00:00 2001 From: Junwang Zhao Date: Sat, 27 Sep 2025 09:26:46 +0800 Subject: [PATCH 7/8] fix: remove ToBigEndianBytes --- src/iceberg/util/uuid.cc | 2 -- src/iceberg/util/uuid.h | 3 --- 2 files changed, 5 deletions(-) diff --git a/src/iceberg/util/uuid.cc b/src/iceberg/util/uuid.cc index 0e2726ccf..5a9c8f070 100644 --- a/src/iceberg/util/uuid.cc +++ b/src/iceberg/util/uuid.cc @@ -207,8 +207,6 @@ uint8_t Uuid::operator[](size_t index) const { return data_[index]; } -std::array Uuid::ToBigEndianBytes() const { return data_; } - std::string Uuid::ToString() const { return std::format( "{:02x}{:02x}{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}{:02x}" diff --git a/src/iceberg/util/uuid.h b/src/iceberg/util/uuid.h index 6932cfb34..c90b858dd 100644 --- a/src/iceberg/util/uuid.h +++ b/src/iceberg/util/uuid.h @@ -61,9 +61,6 @@ class ICEBERG_EXPORT Uuid { /// \brief Create a UUID from a 16-byte array. static Result FromBytes(std::span bytes); - /// \brief Get the raw bytes of the UUID in big-endian order. - std::array ToBigEndianBytes() const; - /// \brief Get the raw bytes of the UUID. std::span bytes() const { return data_; } From 9688b982ae24c422a796a8866cf9eef65b27b8af Mon Sep 17 00:00:00 2001 From: Junwang Zhao Date: Sun, 28 Sep 2025 22:42:05 +0800 Subject: [PATCH 8/8] fix: make Uuid inherit util::Formattable and add unlikely --- src/iceberg/util/uuid.cc | 7 ++++--- src/iceberg/util/uuid.h | 5 +++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/iceberg/util/uuid.cc b/src/iceberg/util/uuid.cc index 5a9c8f070..14256755d 100644 --- a/src/iceberg/util/uuid.cc +++ b/src/iceberg/util/uuid.cc @@ -27,6 +27,7 @@ #include "iceberg/exception.h" #include "iceberg/result.h" +#include "iceberg/util/formatter.h" // IWYU pragma: keep #include "iceberg/util/int128.h" #include "iceberg/util/macros.h" @@ -70,7 +71,7 @@ inline Result ParseSimple(std::string_view s) { uint8_t h1 = kHexTable[static_cast(s[i * 2])]; uint8_t h2 = kHexTable[static_cast(s[i * 2 + 1])]; - if ((h1 | h2) == 0xFF) { + if ((h1 | h2) == 0xFF) [[unlikely]] { return InvalidArgument("Invalid UUID string: {}", s); } @@ -84,7 +85,7 @@ inline Result ParseHyphenated(std::string_view s) { ICEBERG_DCHECK(s.size() == 36, "s must be 36 characters long"); // Check that dashes are in the right places - if (!(s[8] == '-' && s[13] == '-' && s[18] == '-' && s[23] == '-')) { + if (!(s[8] == '-' && s[13] == '-' && s[18] == '-' && s[23] == '-')) [[unlikely]] { return InvalidArgument("Invalid UUID string: {}", s); } @@ -98,7 +99,7 @@ inline Result ParseHyphenated(std::string_view s) { uint8_t h3 = kHexTable[static_cast(s[i + 2])]; uint8_t h4 = kHexTable[static_cast(s[i + 3])]; - if ((h1 | h2 | h3 | h4) == 0xFF) { + if ((h1 | h2 | h3 | h4) == 0xFF) [[unlikely]] { return InvalidArgument("Invalid UUID string: {}", s); } diff --git a/src/iceberg/util/uuid.h b/src/iceberg/util/uuid.h index c90b858dd..64db7c5d6 100644 --- a/src/iceberg/util/uuid.h +++ b/src/iceberg/util/uuid.h @@ -26,13 +26,14 @@ #include "iceberg/iceberg_export.h" #include "iceberg/result.h" +#include "iceberg/util/formattable.h" /// \file iceberg/util/uuid.h /// \brief UUID (Universally Unique Identifier) representation. namespace iceberg { -class ICEBERG_EXPORT Uuid { +class ICEBERG_EXPORT Uuid : public util::Formattable { public: Uuid() = delete; constexpr static size_t kLength = 16; @@ -71,7 +72,7 @@ class ICEBERG_EXPORT Uuid { uint8_t operator[](size_t index) const; /// \brief Convert the UUID to a string in standard format. - std::string ToString() const; + std::string ToString() const override; friend bool operator==(const Uuid& lhs, const Uuid& rhs) { return lhs.data_ == rhs.data_;