Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions mysql-test/main/func_xxh.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
SELECT XXH32('abc') = 2154901205 AS xxh32_expected;
xxh32_expected
1
SELECT XXH32('abc') = XXH32('abc') AS xxh32_eq;
xxh32_eq
1
SELECT XXH32(NULL) IS NULL AS xxh32_null;
xxh32_null
1
Comment on lines +7 to +9
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On one hand, we want xxh32 and xxh3 to return the same values as in partitioning. In partitioning NULL and empty string produce the same hash values.

On the other hand, it looks like returning NULL makes sense, given CRC32C(NULL) is also NULL.

What do you think should be the value of XXH32(NULL) @abarkov?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think XXH32(NULL) should return NULL.

SELECT XXH32('') = 0 AS xxh32_empty;
xxh32_empty
1
SELECT XXH32(11223344);
ERROR HY000: Illegal parameter data type int for operation 'XXH32'
SELECT XXH32(FROM_DAYS(735000));
ERROR HY000: Illegal parameter data type date for operation 'XXH32'
SELECT XXH3('abc') = 2615927343983396622 AS xxh3_expected;
xxh3_expected
1
SELECT XXH3('abc') = XXH3('abc') AS xxh3_eq;
xxh3_eq
1
SELECT XXH3(NULL) IS NULL AS xxh3_null;
xxh3_null
1
SELECT XXH3('') = 0 AS xxh3_empty;
xxh3_empty
1
SELECT XXH3(11223344);
ERROR HY000: Illegal parameter data type int for operation 'XXH3'
SELECT XXH3(FROM_DAYS(735000));
ERROR HY000: Illegal parameter data type date for operation 'XXH3'
CREATE TABLE t (c INT, d VARCHAR(3)) DEFAULT CHARSET=latin1;
INSERT INTO t VALUES (11223344, 'abc');
SELECT XXH32(d) = XXH32('abc') AS xxh32_str_col_eq
FROM t;
xxh32_str_col_eq
1
SELECT XXH3(d) = XXH3('abc') AS xxh3_str_col_eq
FROM t;
xxh3_str_col_eq
1
SELECT XXH32(c) FROM t;
ERROR HY000: Illegal parameter data type int for operation 'XXH32'
SELECT XXH3(c) FROM t;
ERROR HY000: Illegal parameter data type int for operation 'XXH3'
DROP TABLE t;
SELECT XXH32(' ') = XXH32(' ') AS xxh32_space_eq;
xxh32_space_eq
1
SELECT XXH3(' ') = XXH3(' ') AS xxh3_space_eq;
xxh3_space_eq
1
SELECT XXH32(_koi8u 0x20 COLLATE koi8u_general_ci) =
XXH32(_koi8u 0x60 COLLATE koi8u_general_ci) AS xxh32_koi8u_eq;
xxh32_koi8u_eq
1
SELECT XXH3(_koi8u 0x20 COLLATE koi8u_general_ci) =
XXH3(_koi8u 0x60 COLLATE koi8u_general_ci) AS xxh3_koi8u_eq;
xxh3_koi8u_eq
1
39 changes: 39 additions & 0 deletions mysql-test/main/func_xxh.test
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice tests, thanks.

It would be good to also test that the hash values agree with those computed in KEY partitioning. Given the latter is not shown but only the result after modulus of number of partitions, one could test a weaker agreement by taking modulus of results of the xxh sql functions and check it is equal to the partitions, as well as check distributions. See the test main.partition_key_algorithm.

Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
SELECT XXH32('abc') = 2154901205 AS xxh32_expected;
SELECT XXH32('abc') = XXH32('abc') AS xxh32_eq;
SELECT XXH32(NULL) IS NULL AS xxh32_null;
SELECT XXH32('') = 0 AS xxh32_empty;
--error ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION
SELECT XXH32(11223344);
--error ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION
SELECT XXH32(FROM_DAYS(735000));

SELECT XXH3('abc') = 2615927343983396622 AS xxh3_expected;
SELECT XXH3('abc') = XXH3('abc') AS xxh3_eq;
SELECT XXH3(NULL) IS NULL AS xxh3_null;
SELECT XXH3('') = 0 AS xxh3_empty;
--error ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION
SELECT XXH3(11223344);
--error ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION
SELECT XXH3(FROM_DAYS(735000));

CREATE TABLE t (c INT, d VARCHAR(3)) DEFAULT CHARSET=latin1;
INSERT INTO t VALUES (11223344, 'abc');

SELECT XXH32(d) = XXH32('abc') AS xxh32_str_col_eq
FROM t;
SELECT XXH3(d) = XXH3('abc') AS xxh3_str_col_eq
FROM t;
--error ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION
SELECT XXH32(c) FROM t;
--error ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION
SELECT XXH3(c) FROM t;

DROP TABLE t;

SELECT XXH32(' ') = XXH32(' ') AS xxh32_space_eq;
SELECT XXH3(' ') = XXH3(' ') AS xxh3_space_eq;

SELECT XXH32(_koi8u 0x20 COLLATE koi8u_general_ci) =
XXH32(_koi8u 0x60 COLLATE koi8u_general_ci) AS xxh32_koi8u_eq;
SELECT XXH3(_koi8u 0x20 COLLATE koi8u_general_ci) =
XXH3(_koi8u 0x60 COLLATE koi8u_general_ci) AS xxh3_koi8u_eq;
68 changes: 68 additions & 0 deletions mysql-test/main/func_xxh_partition.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
## XXH32: sql function modulo distribution agrees with KEY partitioning
CREATE OR REPLACE TABLE t1 (c1 VARCHAR(10))
PARTITION BY KEY ALGORITHM=XXH32 (c1) PARTITIONS 7;
INSERT INTO t1
SELECT CAST(seq AS CHAR) FROM seq_735000_to_736000;
SELECT partition_name, table_rows
FROM information_schema.partitions
WHERE table_schema=DATABASE() AND table_name='t1'
ORDER BY partition_name;
partition_name table_rows
p0 131
p1 150
p2 125
p3 155
p4 161
p5 145
p6 134
CREATE OR REPLACE TABLE t2 (part_id INT, part_rows INT);
INSERT INTO t2
SELECT MOD(XXH32(c1), 7) AS part_id, COUNT(*)
FROM t1
GROUP BY part_id;
SELECT CONCAT('p', part_id) AS partition_name, part_rows
FROM t2
ORDER BY partition_name;
partition_name part_rows
p0 131
p1 150
p2 125
p3 155
p4 161
p5 145
p6 134
DROP TABLE t1, t2;
## XXH3: sql function modulo distribution agrees with KEY partitioning
CREATE OR REPLACE TABLE t1 (c1 VARCHAR(10))
PARTITION BY KEY ALGORITHM=XXH3 (c1) PARTITIONS 7;
INSERT INTO t1
SELECT CAST(seq AS CHAR) FROM seq_735000_to_736000;
SELECT partition_name, table_rows
FROM information_schema.partitions
WHERE table_schema=DATABASE() AND table_name='t1'
ORDER BY partition_name;
partition_name table_rows
p0 146
p1 136
p2 155
p3 146
p4 148
p5 138
p6 132
CREATE OR REPLACE TABLE t2 (part_id INT, part_rows INT);
INSERT INTO t2
SELECT MOD(XXH3(c1), 7) AS part_id, COUNT(*)
FROM t1
GROUP BY part_id;
SELECT CONCAT('p', part_id) AS partition_name, part_rows
FROM t2
ORDER BY partition_name;
partition_name part_rows
p0 146
p1 136
p2 155
p3 146
p4 148
p5 138
p6 132
DROP TABLE t1, t2;
50 changes: 50 additions & 0 deletions mysql-test/main/func_xxh_partition.test
Comment thread
gkodinov marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
--source include/have_partition.inc
--source include/have_sequence.inc

--echo ## XXH32: sql function modulo distribution agrees with KEY partitioning
CREATE OR REPLACE TABLE t1 (c1 VARCHAR(10))
PARTITION BY KEY ALGORITHM=XXH32 (c1) PARTITIONS 7;

INSERT INTO t1
SELECT CAST(seq AS CHAR) FROM seq_735000_to_736000;

SELECT partition_name, table_rows
FROM information_schema.partitions
WHERE table_schema=DATABASE() AND table_name='t1'
ORDER BY partition_name;

CREATE OR REPLACE TABLE t2 (part_id INT, part_rows INT);
INSERT INTO t2
SELECT MOD(XXH32(c1), 7) AS part_id, COUNT(*)
FROM t1
GROUP BY part_id;

SELECT CONCAT('p', part_id) AS partition_name, part_rows
FROM t2
ORDER BY partition_name;

DROP TABLE t1, t2;

--echo ## XXH3: sql function modulo distribution agrees with KEY partitioning
CREATE OR REPLACE TABLE t1 (c1 VARCHAR(10))
PARTITION BY KEY ALGORITHM=XXH3 (c1) PARTITIONS 7;

INSERT INTO t1
SELECT CAST(seq AS CHAR) FROM seq_735000_to_736000;

SELECT partition_name, table_rows
FROM information_schema.partitions
WHERE table_schema=DATABASE() AND table_name='t1'
ORDER BY partition_name;

CREATE OR REPLACE TABLE t2 (part_id INT, part_rows INT);
INSERT INTO t2
SELECT MOD(XXH3(c1), 7) AS part_id, COUNT(*)
FROM t1
GROUP BY part_id;

SELECT CONCAT('p', part_id) AS partition_name, part_rows
FROM t2
ORDER BY partition_name;
Comment thread
gkodinov marked this conversation as resolved.

DROP TABLE t1, t2;
7 changes: 7 additions & 0 deletions sql/item.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1151,6 +1151,13 @@ bool Item::check_type_can_return_text(const LEX_CSTRING &opname) const
}


void Item::hash_val_str(Hasher *hasher, String *buffer)
{
if (String *res= val_str(buffer))
hasher->add(res->charset(), res->ptr(), res->length());
}


bool Item::check_type_scalar(const LEX_CSTRING &opname) const
{
/*
Expand Down
1 change: 1 addition & 0 deletions sql/item.h
Original file line number Diff line number Diff line change
Expand Up @@ -1007,6 +1007,7 @@ class Item :public Value_source,
{
return null_to_empty ? val_str_null_to_empty(to) : val_str(to);
}
void hash_val_str(Hasher *hasher, String *buffer);
virtual Item_func *get_item_func() { return NULL; }

const MY_LOCALE *locale_from_val_str();
Expand Down
34 changes: 34 additions & 0 deletions sql/item_create.cc
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,25 @@ class Create_func_crc32c : public Create_native_func
virtual ~Create_func_crc32c() = default;
};

class Create_func_xxh32 : public Create_func_arg1
{
public:
Item *create_1_arg(THD *thd, Item *arg1) override;
static Create_func_xxh32 s_singleton;
protected:
Create_func_xxh32() = default;
~Create_func_xxh32() override = default;
};

class Create_func_xxh3 : public Create_func_arg1
{
public:
Item *create_1_arg(THD *thd, Item *arg1) override;
static Create_func_xxh3 s_singleton;
protected:
Create_func_xxh3() = default;
~Create_func_xxh3() override = default;
};

class Create_func_datediff : public Create_func_arg2
{
Expand Down Expand Up @@ -3682,6 +3701,19 @@ Create_func_crc32c::create_native(THD *thd, const LEX_CSTRING *name,
: new (thd->mem_root) Item_func_crc32(thd, true, arg1);
}

Create_func_xxh32 Create_func_xxh32::s_singleton;

Item *Create_func_xxh32::create_1_arg(THD *thd, Item *arg1)
{
return new (thd->mem_root) Item_func_xxh32(thd, arg1);
}

Create_func_xxh3 Create_func_xxh3::s_singleton;

Item *Create_func_xxh3::create_1_arg(THD *thd, Item *arg1)
{
return new (thd->mem_root) Item_func_xxh3(thd, arg1);
}

Create_func_datediff Create_func_datediff::s_singleton;

Expand Down Expand Up @@ -6551,6 +6583,8 @@ const Native_func_registry func_array[] =
{ { STRING_WITH_LEN("WSREP_LAST_SEEN_GTID") }, BUILDER(Create_func_wsrep_last_seen_gtid)},
{ { STRING_WITH_LEN("WSREP_SYNC_WAIT_UPTO_GTID") }, BUILDER(Create_func_wsrep_sync_wait_upto)},
#endif /* WITH_WSREP */
{ { STRING_WITH_LEN("XXH3") }, BUILDER(Create_func_xxh3) },
{ { STRING_WITH_LEN("XXH32") }, BUILDER(Create_func_xxh32) },
{ { STRING_WITH_LEN("YEARWEEK") }, BUILDER(Create_func_year_week)}
};

Expand Down
47 changes: 47 additions & 0 deletions sql/item_strfunc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4574,6 +4574,53 @@ longlong Item_func_crc32::val_int()
(ulonglong{crc_func(uint32_t(crc), res->ptr(), res->length())});
}

static bool check_xxh_arg_type(Item *arg, const LEX_CSTRING &func_name)
{
const Type_handler *handler= arg->type_handler();
if (handler == &type_handler_null ||
dynamic_cast<const Type_handler_longstr *>(handler) != nullptr)
return false;

my_error(ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION, MYF(0),
handler->name().ptr(), func_name.str);
return true;
}

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please rename the argument value to buffer to reflect its nature.

Please move the above code as a method to Item.

Like this:

void Item::hash_val_str(Hasher *hasher, String *buffer, bool *null_value);

It will go there anyway when we'll add support for xxh32() and xxh3() for other data types.
There will be also hash_val_int(), hash_val_real(), hash_get_date() etc, and they will be called from Type_handler virtual methods.
Also, with this reduction later it will be easy to implement multiple argument support for the new functions.

Note, after moving to Item, the argument null_value will be redundant, it will be set automatically to true when val_str() returns nullptr. So you can remove it.


bool Item_func_xxh32::check_arguments() const
{
return check_xxh_arg_type(args[0], func_name_cstring());
}

longlong Item_func_xxh32::val_int()
{
DBUG_ASSERT(fixed());
DBUG_ASSERT(arg_count == 1);

Hasher hasher(my_hasher_xxh32());
args[0]->hash_val_str(&hasher, &buffer);
if ((null_value= args[0]->null_value))
return 0;
return static_cast<longlong>(hasher.finalize());
}

bool Item_func_xxh3::check_arguments() const
{
return check_xxh_arg_type(args[0], func_name_cstring());
}

longlong Item_func_xxh3::val_int()
{
DBUG_ASSERT(fixed());
DBUG_ASSERT(arg_count == 1);

Hasher hasher(my_hasher_xxh3());
args[0]->hash_val_str(&hasher, &buffer);
if ((null_value= args[0]->null_value))
return 0;
return static_cast<longlong>(hasher.finalize());
}

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar change here.

#ifdef HAVE_COMPRESS
#include "zlib.h"

Expand Down
Loading