From fa3050597318cc2c601b30c30c536b8ea4c5558d Mon Sep 17 00:00:00 2001 From: monthdev Date: Wed, 22 Apr 2026 21:36:31 -0700 Subject: [PATCH] MDEV-38180 Add XXH32() and XXH3() SQL functions MariaDB supports XXH hash algorithms for KEY partitioning, but there is no SQL interface for computing these hashes directly. Add XXH32() and XXH3() scalar functions for long string data types. Reject non-string argument data types with ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION, while preserving NULL handling. Hash string values through the charset-aware Hasher path. Register the functions and add MTR coverage for string literals, NULL, collation-sensitive comparisons, illegal non-string arguments, and KEY partitioning distribution for supported string columns. Reviewed-by: Yuchen Pei --- mysql-test/main/func_xxh.result | 61 ++++++++++++++++++++ mysql-test/main/func_xxh.test | 39 +++++++++++++ mysql-test/main/func_xxh_partition.result | 68 +++++++++++++++++++++++ mysql-test/main/func_xxh_partition.test | 50 +++++++++++++++++ sql/item.cc | 7 +++ sql/item.h | 1 + sql/item_create.cc | 34 ++++++++++++ sql/item_strfunc.cc | 47 ++++++++++++++++ sql/item_strfunc.h | 62 +++++++++++++++++++++ 9 files changed, 369 insertions(+) create mode 100644 mysql-test/main/func_xxh.result create mode 100644 mysql-test/main/func_xxh.test create mode 100644 mysql-test/main/func_xxh_partition.result create mode 100644 mysql-test/main/func_xxh_partition.test diff --git a/mysql-test/main/func_xxh.result b/mysql-test/main/func_xxh.result new file mode 100644 index 0000000000000..b5c914a6edd8c --- /dev/null +++ b/mysql-test/main/func_xxh.result @@ -0,0 +1,61 @@ +SELECT XXH32('abc') = 2154901205 AS xxh32_expected; +xxh32_expected +1 +SELECT XXH32('abc') = XXH32('abc') AS xxh32_eq; +xxh32_eq +1 +SELECT XXH32(NULL) IS NULL AS xxh32_null; +xxh32_null +1 +SELECT XXH32('') = 0 AS xxh32_empty; +xxh32_empty +1 +SELECT XXH32(11223344); +ERROR HY000: Illegal parameter data type int for operation 'XXH32' +SELECT XXH32(FROM_DAYS(735000)); +ERROR HY000: Illegal parameter data type date for operation 'XXH32' +SELECT XXH3('abc') = 2615927343983396622 AS xxh3_expected; +xxh3_expected +1 +SELECT XXH3('abc') = XXH3('abc') AS xxh3_eq; +xxh3_eq +1 +SELECT XXH3(NULL) IS NULL AS xxh3_null; +xxh3_null +1 +SELECT XXH3('') = 0 AS xxh3_empty; +xxh3_empty +1 +SELECT XXH3(11223344); +ERROR HY000: Illegal parameter data type int for operation 'XXH3' +SELECT XXH3(FROM_DAYS(735000)); +ERROR HY000: Illegal parameter data type date for operation 'XXH3' +CREATE TABLE t (c INT, d VARCHAR(3)) DEFAULT CHARSET=latin1; +INSERT INTO t VALUES (11223344, 'abc'); +SELECT XXH32(d) = XXH32('abc') AS xxh32_str_col_eq +FROM t; +xxh32_str_col_eq +1 +SELECT XXH3(d) = XXH3('abc') AS xxh3_str_col_eq +FROM t; +xxh3_str_col_eq +1 +SELECT XXH32(c) FROM t; +ERROR HY000: Illegal parameter data type int for operation 'XXH32' +SELECT XXH3(c) FROM t; +ERROR HY000: Illegal parameter data type int for operation 'XXH3' +DROP TABLE t; +SELECT XXH32(' ') = XXH32(' ') AS xxh32_space_eq; +xxh32_space_eq +1 +SELECT XXH3(' ') = XXH3(' ') AS xxh3_space_eq; +xxh3_space_eq +1 +SELECT XXH32(_koi8u 0x20 COLLATE koi8u_general_ci) = +XXH32(_koi8u 0x60 COLLATE koi8u_general_ci) AS xxh32_koi8u_eq; +xxh32_koi8u_eq +1 +SELECT XXH3(_koi8u 0x20 COLLATE koi8u_general_ci) = +XXH3(_koi8u 0x60 COLLATE koi8u_general_ci) AS xxh3_koi8u_eq; +xxh3_koi8u_eq +1 diff --git a/mysql-test/main/func_xxh.test b/mysql-test/main/func_xxh.test new file mode 100644 index 0000000000000..a4fe38240b7d8 --- /dev/null +++ b/mysql-test/main/func_xxh.test @@ -0,0 +1,39 @@ +SELECT XXH32('abc') = 2154901205 AS xxh32_expected; +SELECT XXH32('abc') = XXH32('abc') AS xxh32_eq; +SELECT XXH32(NULL) IS NULL AS xxh32_null; +SELECT XXH32('') = 0 AS xxh32_empty; +--error ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION +SELECT XXH32(11223344); +--error ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION +SELECT XXH32(FROM_DAYS(735000)); + +SELECT XXH3('abc') = 2615927343983396622 AS xxh3_expected; +SELECT XXH3('abc') = XXH3('abc') AS xxh3_eq; +SELECT XXH3(NULL) IS NULL AS xxh3_null; +SELECT XXH3('') = 0 AS xxh3_empty; +--error ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION +SELECT XXH3(11223344); +--error ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION +SELECT XXH3(FROM_DAYS(735000)); + +CREATE TABLE t (c INT, d VARCHAR(3)) DEFAULT CHARSET=latin1; +INSERT INTO t VALUES (11223344, 'abc'); + +SELECT XXH32(d) = XXH32('abc') AS xxh32_str_col_eq +FROM t; +SELECT XXH3(d) = XXH3('abc') AS xxh3_str_col_eq +FROM t; +--error ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION +SELECT XXH32(c) FROM t; +--error ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION +SELECT XXH3(c) FROM t; + +DROP TABLE t; + +SELECT XXH32(' ') = XXH32(' ') AS xxh32_space_eq; +SELECT XXH3(' ') = XXH3(' ') AS xxh3_space_eq; + +SELECT XXH32(_koi8u 0x20 COLLATE koi8u_general_ci) = +XXH32(_koi8u 0x60 COLLATE koi8u_general_ci) AS xxh32_koi8u_eq; +SELECT XXH3(_koi8u 0x20 COLLATE koi8u_general_ci) = +XXH3(_koi8u 0x60 COLLATE koi8u_general_ci) AS xxh3_koi8u_eq; diff --git a/mysql-test/main/func_xxh_partition.result b/mysql-test/main/func_xxh_partition.result new file mode 100644 index 0000000000000..99bc96bdedb8f --- /dev/null +++ b/mysql-test/main/func_xxh_partition.result @@ -0,0 +1,68 @@ +## XXH32: sql function modulo distribution agrees with KEY partitioning +CREATE OR REPLACE TABLE t1 (c1 VARCHAR(10)) +PARTITION BY KEY ALGORITHM=XXH32 (c1) PARTITIONS 7; +INSERT INTO t1 +SELECT CAST(seq AS CHAR) FROM seq_735000_to_736000; +SELECT partition_name, table_rows +FROM information_schema.partitions +WHERE table_schema=DATABASE() AND table_name='t1' +ORDER BY partition_name; +partition_name table_rows +p0 131 +p1 150 +p2 125 +p3 155 +p4 161 +p5 145 +p6 134 +CREATE OR REPLACE TABLE t2 (part_id INT, part_rows INT); +INSERT INTO t2 +SELECT MOD(XXH32(c1), 7) AS part_id, COUNT(*) +FROM t1 +GROUP BY part_id; +SELECT CONCAT('p', part_id) AS partition_name, part_rows +FROM t2 +ORDER BY partition_name; +partition_name part_rows +p0 131 +p1 150 +p2 125 +p3 155 +p4 161 +p5 145 +p6 134 +DROP TABLE t1, t2; +## XXH3: sql function modulo distribution agrees with KEY partitioning +CREATE OR REPLACE TABLE t1 (c1 VARCHAR(10)) +PARTITION BY KEY ALGORITHM=XXH3 (c1) PARTITIONS 7; +INSERT INTO t1 +SELECT CAST(seq AS CHAR) FROM seq_735000_to_736000; +SELECT partition_name, table_rows +FROM information_schema.partitions +WHERE table_schema=DATABASE() AND table_name='t1' +ORDER BY partition_name; +partition_name table_rows +p0 146 +p1 136 +p2 155 +p3 146 +p4 148 +p5 138 +p6 132 +CREATE OR REPLACE TABLE t2 (part_id INT, part_rows INT); +INSERT INTO t2 +SELECT MOD(XXH3(c1), 7) AS part_id, COUNT(*) +FROM t1 +GROUP BY part_id; +SELECT CONCAT('p', part_id) AS partition_name, part_rows +FROM t2 +ORDER BY partition_name; +partition_name part_rows +p0 146 +p1 136 +p2 155 +p3 146 +p4 148 +p5 138 +p6 132 +DROP TABLE t1, t2; diff --git a/mysql-test/main/func_xxh_partition.test b/mysql-test/main/func_xxh_partition.test new file mode 100644 index 0000000000000..f1e551e9c4b9d --- /dev/null +++ b/mysql-test/main/func_xxh_partition.test @@ -0,0 +1,50 @@ +--source include/have_partition.inc +--source include/have_sequence.inc + +--echo ## XXH32: sql function modulo distribution agrees with KEY partitioning +CREATE OR REPLACE TABLE t1 (c1 VARCHAR(10)) +PARTITION BY KEY ALGORITHM=XXH32 (c1) PARTITIONS 7; + +INSERT INTO t1 +SELECT CAST(seq AS CHAR) FROM seq_735000_to_736000; + +SELECT partition_name, table_rows +FROM information_schema.partitions +WHERE table_schema=DATABASE() AND table_name='t1' +ORDER BY partition_name; + +CREATE OR REPLACE TABLE t2 (part_id INT, part_rows INT); +INSERT INTO t2 +SELECT MOD(XXH32(c1), 7) AS part_id, COUNT(*) +FROM t1 +GROUP BY part_id; + +SELECT CONCAT('p', part_id) AS partition_name, part_rows +FROM t2 +ORDER BY partition_name; + +DROP TABLE t1, t2; + +--echo ## XXH3: sql function modulo distribution agrees with KEY partitioning +CREATE OR REPLACE TABLE t1 (c1 VARCHAR(10)) +PARTITION BY KEY ALGORITHM=XXH3 (c1) PARTITIONS 7; + +INSERT INTO t1 +SELECT CAST(seq AS CHAR) FROM seq_735000_to_736000; + +SELECT partition_name, table_rows +FROM information_schema.partitions +WHERE table_schema=DATABASE() AND table_name='t1' +ORDER BY partition_name; + +CREATE OR REPLACE TABLE t2 (part_id INT, part_rows INT); +INSERT INTO t2 +SELECT MOD(XXH3(c1), 7) AS part_id, COUNT(*) +FROM t1 +GROUP BY part_id; + +SELECT CONCAT('p', part_id) AS partition_name, part_rows +FROM t2 +ORDER BY partition_name; + +DROP TABLE t1, t2; diff --git a/sql/item.cc b/sql/item.cc index f29ed13a3e9ca..b94bcc0dca668 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -1151,6 +1151,13 @@ bool Item::check_type_can_return_text(const LEX_CSTRING &opname) const } +void Item::hash_val_str(Hasher *hasher, String *buffer) +{ + if (String *res= val_str(buffer)) + hasher->add(res->charset(), res->ptr(), res->length()); +} + + bool Item::check_type_scalar(const LEX_CSTRING &opname) const { /* diff --git a/sql/item.h b/sql/item.h index 43d0b57a7af22..4c9e08823a72f 100644 --- a/sql/item.h +++ b/sql/item.h @@ -1007,6 +1007,7 @@ class Item :public Value_source, { return null_to_empty ? val_str_null_to_empty(to) : val_str(to); } + void hash_val_str(Hasher *hasher, String *buffer); virtual Item_func *get_item_func() { return NULL; } const MY_LOCALE *locale_from_val_str(); diff --git a/sql/item_create.cc b/sql/item_create.cc index f2716e643668a..3c01f468b5163 100644 --- a/sql/item_create.cc +++ b/sql/item_create.cc @@ -633,6 +633,25 @@ class Create_func_crc32c : public Create_native_func virtual ~Create_func_crc32c() = default; }; +class Create_func_xxh32 : public Create_func_arg1 +{ +public: + Item *create_1_arg(THD *thd, Item *arg1) override; + static Create_func_xxh32 s_singleton; +protected: + Create_func_xxh32() = default; + ~Create_func_xxh32() override = default; +}; + +class Create_func_xxh3 : public Create_func_arg1 +{ +public: + Item *create_1_arg(THD *thd, Item *arg1) override; + static Create_func_xxh3 s_singleton; +protected: + Create_func_xxh3() = default; + ~Create_func_xxh3() override = default; +}; class Create_func_datediff : public Create_func_arg2 { @@ -3682,6 +3701,19 @@ Create_func_crc32c::create_native(THD *thd, const LEX_CSTRING *name, : new (thd->mem_root) Item_func_crc32(thd, true, arg1); } +Create_func_xxh32 Create_func_xxh32::s_singleton; + +Item *Create_func_xxh32::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_xxh32(thd, arg1); +} + +Create_func_xxh3 Create_func_xxh3::s_singleton; + +Item *Create_func_xxh3::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_xxh3(thd, arg1); +} Create_func_datediff Create_func_datediff::s_singleton; @@ -6551,6 +6583,8 @@ const Native_func_registry func_array[] = { { STRING_WITH_LEN("WSREP_LAST_SEEN_GTID") }, BUILDER(Create_func_wsrep_last_seen_gtid)}, { { STRING_WITH_LEN("WSREP_SYNC_WAIT_UPTO_GTID") }, BUILDER(Create_func_wsrep_sync_wait_upto)}, #endif /* WITH_WSREP */ + { { STRING_WITH_LEN("XXH3") }, BUILDER(Create_func_xxh3) }, + { { STRING_WITH_LEN("XXH32") }, BUILDER(Create_func_xxh32) }, { { STRING_WITH_LEN("YEARWEEK") }, BUILDER(Create_func_year_week)} }; diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index c24ef8ec7e991..640ba780fb2f8 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -4574,6 +4574,53 @@ longlong Item_func_crc32::val_int() (ulonglong{crc_func(uint32_t(crc), res->ptr(), res->length())}); } +static bool check_xxh_arg_type(Item *arg, const LEX_CSTRING &func_name) +{ + const Type_handler *handler= arg->type_handler(); + if (handler == &type_handler_null || + dynamic_cast(handler) != nullptr) + return false; + + my_error(ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION, MYF(0), + handler->name().ptr(), func_name.str); + return true; +} + + +bool Item_func_xxh32::check_arguments() const +{ + return check_xxh_arg_type(args[0], func_name_cstring()); +} + +longlong Item_func_xxh32::val_int() +{ + DBUG_ASSERT(fixed()); + DBUG_ASSERT(arg_count == 1); + + Hasher hasher(my_hasher_xxh32()); + args[0]->hash_val_str(&hasher, &buffer); + if ((null_value= args[0]->null_value)) + return 0; + return static_cast(hasher.finalize()); +} + +bool Item_func_xxh3::check_arguments() const +{ + return check_xxh_arg_type(args[0], func_name_cstring()); +} + +longlong Item_func_xxh3::val_int() +{ + DBUG_ASSERT(fixed()); + DBUG_ASSERT(arg_count == 1); + + Hasher hasher(my_hasher_xxh3()); + args[0]->hash_val_str(&hasher, &buffer); + if ((null_value= args[0]->null_value)) + return 0; + return static_cast(hasher.finalize()); +} + #ifdef HAVE_COMPRESS #include "zlib.h" diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h index 4383f86178971..ad953a5616e46 100644 --- a/sql/item_strfunc.h +++ b/sql/item_strfunc.h @@ -2317,6 +2317,68 @@ class Item_func_crc32 :public Item_long_func { return get_item_copy(thd, this); } }; +class Item_func_xxh32 : public Item_long_func +{ + String buffer; +public: + Item_func_xxh32(THD *thd, Item *arg) : Item_long_func(thd, arg) + { + unsigned_flag= true; + } + + longlong val_int() override; + bool check_arguments() const override; + + bool fix_length_and_dec(THD *) override + { + max_length= 10; + set_maybe_null(); + return false; + } + + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("XXH32")}; + return name; + } + + Item *shallow_copy(THD *thd) const override + { + return get_item_copy(thd, this); + } +}; + +class Item_func_xxh3 : public Item_longlong_func +{ + String buffer; +public: + Item_func_xxh3(THD *thd, Item *arg) : Item_longlong_func(thd, arg) + { + unsigned_flag= true; + } + + longlong val_int() override; + bool check_arguments() const override; + + bool fix_length_and_dec(THD *) override + { + max_length= 20; + set_maybe_null(); + return false; + } + + LEX_CSTRING func_name_cstring() const override + { + static LEX_CSTRING name= {STRING_WITH_LEN("XXH3")}; + return name; + } + + Item *shallow_copy(THD *thd) const override + { + return get_item_copy(thd, this); + } +}; + class Item_func_uncompressed_length : public Item_long_func_length { String value;