diff --git a/cpp/src/arrow/util/bpacking_simd128_generated.h b/cpp/src/arrow/util/bpacking_simd128_generated.h index f7700fd0e765..dca692971d60 100644 --- a/cpp/src/arrow/util/bpacking_simd128_generated.h +++ b/cpp/src/arrow/util/bpacking_simd128_generated.h @@ -36,7 +36,13 @@ using ::arrow::util::SafeLoad; template struct UnpackBits128 { -using simd_batch = xsimd::batch; +#ifdef ARROW_HAVE_NEON +using simd_arch = xsimd::neon64; +#else +using simd_arch = xsimd::sse4_2; +#endif + +using simd_batch = xsimd::batch; inline static const uint32_t* unpack0_32(const uint32_t* in, uint32_t* out) { memset(out, 0x0, 32 * sizeof(*out)); diff --git a/cpp/src/arrow/util/bpacking_simd256_generated.h b/cpp/src/arrow/util/bpacking_simd256_generated.h index a73bafe17e53..9fa0ded9878c 100644 --- a/cpp/src/arrow/util/bpacking_simd256_generated.h +++ b/cpp/src/arrow/util/bpacking_simd256_generated.h @@ -36,7 +36,8 @@ using ::arrow::util::SafeLoad; template struct UnpackBits256 { -using simd_batch = xsimd::batch; +using simd_arch = xsimd::avx2; +using simd_batch = xsimd::batch; inline static const uint32_t* unpack0_32(const uint32_t* in, uint32_t* out) { memset(out, 0x0, 32 * sizeof(*out)); diff --git a/cpp/src/arrow/util/bpacking_simd512_generated.h b/cpp/src/arrow/util/bpacking_simd512_generated.h index 2a62c962cd0b..d5d643878fab 100644 --- a/cpp/src/arrow/util/bpacking_simd512_generated.h +++ b/cpp/src/arrow/util/bpacking_simd512_generated.h @@ -36,7 +36,8 @@ using ::arrow::util::SafeLoad; template struct UnpackBits512 { -using simd_batch = xsimd::batch; +using simd_arch = xsimd::avx512bw; +using simd_batch = xsimd::batch; inline static const uint32_t* unpack0_32(const uint32_t* in, uint32_t* out) { memset(out, 0x0, 32 * sizeof(*out)); diff --git a/cpp/src/arrow/util/bpacking_simd_codegen.py b/cpp/src/arrow/util/bpacking_simd_codegen.py index 805735e39187..9bdc22569ec0 100644 --- a/cpp/src/arrow/util/bpacking_simd_codegen.py +++ b/cpp/src/arrow/util/bpacking_simd_codegen.py @@ -152,6 +152,19 @@ def main(simd_width): struct_name = f"UnpackBits{simd_width}" + define_simd_arch = { + # ugly format to get aligned output + 128: dedent("""\ + #ifdef ARROW_HAVE_NEON + using simd_arch = xsimd::neon64; + #else + using simd_arch = xsimd::sse4_2; + #endif + """), + 256: "using simd_arch = xsimd::avx2;", + 512: "using simd_arch = xsimd::avx512bw;" + } + # NOTE: templating the UnpackBits struct on the dispatch level avoids # potential name collisions if there are several UnpackBits generations # with the same SIMD width on a given architecture. @@ -176,7 +189,8 @@ def main(simd_width): template struct {struct_name} {{ - using simd_batch = xsimd::batch; + {define_simd_arch[simd_width]} + using simd_batch = xsimd::batch; """)) gen = UnpackGenerator(simd_width) diff --git a/cpp/src/arrow/util/utf8.h b/cpp/src/arrow/util/utf8.h index c5a100ff267d..cb473dd1d6e3 100644 --- a/cpp/src/arrow/util/utf8.h +++ b/cpp/src/arrow/util/utf8.h @@ -248,7 +248,11 @@ inline bool ValidateAsciiSw(const uint8_t* data, int64_t len) { #if defined(ARROW_HAVE_NEON) || defined(ARROW_HAVE_SSE4_2) inline bool ValidateAsciiSimd(const uint8_t* data, int64_t len) { - using simd_batch = xsimd::batch; +#ifdef ARROW_HAVE_NEON + using simd_batch = xsimd::batch; +#else + using simd_batch = xsimd::batch; +#endif if (len >= 32) { const simd_batch zero(static_cast(0)); @@ -256,8 +260,8 @@ inline bool ValidateAsciiSimd(const uint8_t* data, int64_t len) { simd_batch or1 = zero, or2 = zero; while (len >= 32) { - or1 |= simd_batch(reinterpret_cast(data), xsimd::unaligned_mode{}); - or2 |= simd_batch(reinterpret_cast(data2), xsimd::unaligned_mode{}); + or1 |= simd_batch::load_unaligned(reinterpret_cast(data)); + or2 |= simd_batch::load_unaligned(reinterpret_cast(data2)); data += 32; data2 += 32; len -= 32; diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 4223286fb2d3..6e1d2c1ad3e1 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -82,8 +82,8 @@ ARROW_THRIFT_BUILD_VERSION=0.13.0 ARROW_THRIFT_BUILD_SHA256_CHECKSUM=7ad348b88033af46ce49148097afe354d513c1fca7c607b59c33ebb6064b5179 ARROW_UTF8PROC_BUILD_VERSION=v2.6.1 ARROW_UTF8PROC_BUILD_SHA256_CHECKSUM=4c06a9dc4017e8a2438ef80ee371d45868bda2237a98b26554de7a95406b283b -ARROW_XSIMD_BUILD_VERSION=e9234cd6e6f4428fc260073b2c34ffe86fda1f34 -ARROW_XSIMD_BUILD_SHA256_CHECKSUM=1e98bae41abae7f3f6fa4c70ec2dcad008d831876009aa047fb69fd5b24076fd +ARROW_XSIMD_BUILD_VERSION=f212f3c3801924bf218bc39705230a747467edcb +ARROW_XSIMD_BUILD_SHA256_CHECKSUM=8f362dfbc12026e6689563cbe5f1680443d50c2e133110245d270da5e2edc0a9 ARROW_ZLIB_BUILD_VERSION=1.2.11 ARROW_ZLIB_BUILD_SHA256_CHECKSUM=c3e5e9fdd5004dcb542feda5ee4f0ff0744628baf8ed2dd5d66f8ca1197cb1a1 ARROW_ZSTD_BUILD_VERSION=v1.5.0