From 64758be07c6442d24a2222c68d6701a99ef56fa8 Mon Sep 17 00:00:00 2001
From: David Bayer <bayer.david99@gmail.com>
Date: Mon, 17 Feb 2025 10:59:51 +0100
Subject: [PATCH 1/5] implement limits for new fp types

---
 .../cuda/std/__cccl/extended_data_types.h     |  12 +
 .../is_extended_floating_point.h              |  43 +++
 libcudacxx/include/cuda/std/limits            | 350 ++++++++++++++++++
 .../limits/is_specialized.pass.cpp            |  14 +
 .../limits/numeric.limits.members/common.h    |  83 ++++-
 .../const_data_members.pass.cpp               |  10 +
 .../denorm_min.pass.cpp                       |  11 +
 .../numeric.limits.members/digits.pass.cpp    |  10 +
 .../numeric.limits.members/digits10.pass.cpp  |  10 +
 .../numeric.limits.members/epsilon.pass.cpp   |  10 +
 .../has_denorm.pass.cpp                       |  10 +
 .../has_denorm_loss.pass.cpp                  |  10 +
 .../has_infinity.pass.cpp                     |  10 +
 .../has_quiet_NaN.pass.cpp                    |  10 +
 .../has_signaling_NaN.pass.cpp                |  10 +
 .../numeric.limits.members/infinity.pass.cpp  |  72 ++--
 .../is_bounded.pass.cpp                       |  10 +
 .../numeric.limits.members/is_exact.pass.cpp  |  10 +
 .../numeric.limits.members/is_iec559.pass.cpp |  10 +
 .../is_integer.pass.cpp                       |  10 +
 .../numeric.limits.members/is_modulo.pass.cpp |  10 +
 .../numeric.limits.members/is_signed.pass.cpp |  10 +
 .../numeric.limits.members/lowest.pass.cpp    |  10 +
 .../numeric.limits.members/max.pass.cpp       |  10 +
 .../max_digits10.pass.cpp                     |  10 +
 .../max_exponent.pass.cpp                     |  10 +
 .../max_exponent10.pass.cpp                   |  10 +
 .../numeric.limits.members/min.pass.cpp       |  10 +
 .../min_exponent.pass.cpp                     |  10 +
 .../min_exponent10.pass.cpp                   |  10 +
 .../numeric.limits.members/quiet_NaN.pass.cpp |  44 +--
 .../numeric.limits.members/radix.pass.cpp     |  10 +
 .../round_error.pass.cpp                      |  10 +
 .../round_style.pass.cpp                      |  10 +
 .../signaling_NaN.pass.cpp                    |  37 +-
 .../tinyness_before.pass.cpp                  |  10 +
 .../numeric.limits.members/traps.pass.cpp     |  10 +
 37 files changed, 849 insertions(+), 97 deletions(-)

diff --git a/libcudacxx/include/cuda/std/__cccl/extended_data_types.h b/libcudacxx/include/cuda/std/__cccl/extended_data_types.h
index 1354389e24c..7c4638462bf 100644
--- a/libcudacxx/include/cuda/std/__cccl/extended_data_types.h
+++ b/libcudacxx/include/cuda/std/__cccl/extended_data_types.h
@@ -27,6 +27,8 @@
 #include <cuda/std/__cccl/preprocessor.h>
 
 #define _CCCL_HAS_INT128()   0
+#define _CCCL_HAS_NVFP4()    0
+#define _CCCL_HAS_NVFP6()    0
 #define _CCCL_HAS_NVFP8()    0
 #define _CCCL_HAS_NVFP16()   0
 #define _CCCL_HAS_NVBF16()   0
@@ -54,6 +56,16 @@
 #  define _CCCL_HAS_NVFP8() 1
 #endif
 
+#if _CCCL_HAS_INCLUDE(<cuda_fp6.h>) && _CCCL_HAS_NVFP8() && !defined(CCCL_DISABLE_NVFP6_SUPPORT)
+#  undef _CCCL_HAS_NVFP6
+#  define _CCCL_HAS_NVFP6() 1
+#endif
+
+#if _CCCL_HAS_INCLUDE(<cuda_fp4.h>) && _CCCL_HAS_NVFP6() && !defined(CCCL_DISABLE_NVFP4_SUPPORT)
+#  undef _CCCL_HAS_NVFP4
+#  define _CCCL_HAS_NVFP4() 1
+#endif
+
 // NVC++ supports float128 only in host code
 #if !defined(CCCL_DISABLE_FLOAT128_SUPPORT) && _CCCL_OS(LINUX)                     \
   && ((_CCCL_COMPILER(NVRTC) && defined(__CUDACC_RTC_FLOAT128__)) /*NVRTC*/        \
diff --git a/libcudacxx/include/cuda/std/__type_traits/is_extended_floating_point.h b/libcudacxx/include/cuda/std/__type_traits/is_extended_floating_point.h
index 6ea27552011..240f6f81e89 100644
--- a/libcudacxx/include/cuda/std/__type_traits/is_extended_floating_point.h
+++ b/libcudacxx/include/cuda/std/__type_traits/is_extended_floating_point.h
@@ -37,6 +37,14 @@ _CCCL_DIAG_POP
 #  include <cuda_fp8.h>
 #endif // _CCCL_HAS_NVFP8()
 
+#if _CCCL_HAS_NVFP6()
+#  include <cuda_fp6.h>
+#endif // _CCCL_HAS_NVFP6()
+
+#if _CCCL_HAS_NVFP4()
+#  include <cuda_fp4.h>
+#endif // _CCCL_HAS_NVFP4()
+
 _LIBCUDACXX_BEGIN_NAMESPACE_STD
 
 template <class _Tp>
@@ -83,14 +91,49 @@ template <>
 struct __is_extended_floating_point<__nv_fp8_e5m2> : true_type
 {};
 
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+template <>
+struct __is_extended_floating_point<__nv_fp8_e8m0> : true_type
+{};
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
+
 #  ifndef _CCCL_NO_INLINE_VARIABLES
 template <>
 _CCCL_INLINE_VAR constexpr bool __is_extended_floating_point_v<__nv_fp8_e4m3> = true;
 template <>
 _CCCL_INLINE_VAR constexpr bool __is_extended_floating_point_v<__nv_fp8_e5m2> = true;
+#    if _CCCL_CUDACC_AT_LEAST(12, 8)
+template <>
+_CCCL_INLINE_VAR constexpr bool __is_extended_floating_point_v<__nv_fp8_e8m0> = true;
+#    endif // _CCCL_CUDACC_AT_LEAST(12, 8)
 #  endif // !_CCCL_NO_INLINE_VARIABLES
 #endif // _CCCL_HAS_NVFP8()
 
+#if _CCCL_HAS_NVFP6()
+template <>
+struct __is_extended_floating_point<__nv_fp6_e2m3> : true_type
+{};
+template <>
+struct __is_extended_floating_point<__nv_fp6_e3m2> : true_type
+{};
+#  ifndef _CCCL_NO_INLINE_VARIABLES
+template <>
+_CCCL_INLINE_VAR constexpr bool __is_extended_floating_point_v<__nv_fp6_e2m3> = true;
+template <>
+_CCCL_INLINE_VAR constexpr bool __is_extended_floating_point_v<__nv_fp6_e3m2> = true;
+#  endif // !_CCCL_NO_INLINE_VARIABLES
+#endif // _CCCL_HAS_NVFP6()
+
+#if _CCCL_HAS_NVFP4()
+template <>
+struct __is_extended_floating_point<__nv_fp4_e2m1> : true_type
+{};
+#  ifndef _CCCL_NO_INLINE_VARIABLES
+template <>
+_CCCL_INLINE_VAR constexpr bool __is_extended_floating_point_v<__nv_fp4_e2m1> = true;
+#  endif // !_CCCL_NO_INLINE_VARIABLES
+#endif // _CCCL_HAS_NVFP4()
+
 _LIBCUDACXX_END_NAMESPACE_STD
 
 #endif // _LIBCUDACXX___TYPE_TRAITS_IS_EXTENDED_FLOATING_POINT_H
diff --git a/libcudacxx/include/cuda/std/limits b/libcudacxx/include/cuda/std/limits
index 1c6eeac23b9..19464640d90 100644
--- a/libcudacxx/include/cuda/std/limits
+++ b/libcudacxx/include/cuda/std/limits
@@ -44,6 +44,14 @@ _CCCL_DIAG_POP
 #  include <cuda_fp8.h>
 #endif // _CCCL_HAS_NVFP8()
 
+#if _CCCL_HAS_NVFP6()
+#  include <cuda_fp6.h>
+#endif // _CCCL_HAS_NVFP6()
+
+#if _CCCL_HAS_NVFP4()
+#  include <cuda_fp4.h>
+#endif // _CCCL_HAS_NVFP4()
+
 _CCCL_PUSH_MACROS
 
 _LIBCUDACXX_BEGIN_NAMESPACE_STD
@@ -940,8 +948,350 @@ public:
   static constexpr bool tinyness_before          = false;
   static constexpr float_round_style round_style = round_to_nearest;
 };
+
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+template <>
+class __numeric_limits_impl<__nv_fp8_e8m0, __numeric_limits_type::__floating_point>
+{
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr __nv_fp8_e8m0 __make_value(__nv_fp8_storage_t __val)
+  {
+#    if defined(_CCCL_BUILTIN_BIT_CAST)
+    return _CUDA_VSTD::bit_cast<__nv_fp8_e8m0>(__val);
+#    else // ^^^ _CCCL_BUILTIN_BIT_CAST ^^^ // vvv !_CCCL_BUILTIN_BIT_CAST vvv
+    __nv_fp8_e8m0 __ret{};
+    __ret.__x = __val;
+    return __ret;
+#    endif // ^^^ !_CCCL_BUILTIN_BIT_CAST ^^^
+  }
+
+public:
+  using type = __nv_fp8_e8m0;
+
+  static constexpr bool is_specialized = true;
+
+  static constexpr bool is_signed   = false;
+  static constexpr int digits       = 0;
+  static constexpr int digits10     = 0;
+  static constexpr int max_digits10 = 1;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
+  {
+    return __make_value(static_cast<__nv_fp8_storage_t>(0x00u));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
+  {
+    return __make_value(static_cast<__nv_fp8_storage_t>(0xfeu));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
+  {
+    return __make_value(static_cast<__nv_fp8_storage_t>(0x00u));
+  }
+
+  static constexpr bool is_integer = false;
+  static constexpr bool is_exact   = false;
+  static constexpr int radix       = __FLT_RADIX__;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
+  {
+    return __make_value(static_cast<__nv_fp8_storage_t>(0x7fu));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
+  {
+    return __make_value(static_cast<__nv_fp8_storage_t>(0x7fu));
+  }
+
+  static constexpr int min_exponent   = -127;
+  static constexpr int min_exponent10 = -39;
+  static constexpr int max_exponent   = 127;
+  static constexpr int max_exponent10 = 38;
+
+  static constexpr bool has_infinity             = false;
+  static constexpr bool has_quiet_NaN            = true;
+  static constexpr bool has_signaling_NaN        = false;
+  static constexpr float_denorm_style has_denorm = denorm_absent;
+  static constexpr bool has_denorm_loss          = false;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
+  {
+    return type{};
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
+  {
+    return __make_value(static_cast<__nv_fp8_storage_t>(0xffu));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
+  {
+    return type{};
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
+  {
+    return type{};
+  }
+
+  static constexpr bool is_iec559  = false;
+  static constexpr bool is_bounded = true;
+  static constexpr bool is_modulo  = false;
+
+  static constexpr bool traps                    = false;
+  static constexpr bool tinyness_before          = false;
+  static constexpr float_round_style round_style = round_toward_zero;
+};
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
 #endif // _CCCL_HAS_NVFP8()
 
+#if _CCCL_HAS_NVFP6()
+template <>
+class __numeric_limits_impl<__nv_fp6_e2m3, __numeric_limits_type::__floating_point>
+{
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr __nv_fp6_e2m3 __make_value(__nv_fp6_storage_t __val)
+  {
+#  if defined(_CCCL_BUILTIN_BIT_CAST)
+    return _CUDA_VSTD::bit_cast<__nv_fp6_e2m3>(__val);
+#  else // ^^^ _CCCL_BUILTIN_BIT_CAST ^^^ // vvv !_CCCL_BUILTIN_BIT_CAST vvv
+    __nv_fp6_e2m3 __ret{};
+    __ret.__x = __val;
+    return __ret;
+#  endif // ^^^ !_CCCL_BUILTIN_BIT_CAST ^^^
+  }
+
+public:
+  using type = __nv_fp6_e2m3;
+
+  static constexpr bool is_specialized = true;
+
+  static constexpr bool is_signed   = true;
+  static constexpr int digits       = 3;
+  static constexpr int digits10     = 0;
+  static constexpr int max_digits10 = 2;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
+  {
+    return __make_value(static_cast<__nv_fp6_storage_t>(0x08u));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
+  {
+    return __make_value(static_cast<__nv_fp6_storage_t>(0x1fu));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
+  {
+    return __make_value(static_cast<__nv_fp6_storage_t>(0x3fu));
+  }
+
+  static constexpr bool is_integer = false;
+  static constexpr bool is_exact   = false;
+  static constexpr int radix       = __FLT_RADIX__;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
+  {
+    return __make_value(static_cast<__nv_fp6_storage_t>(0x01u));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
+  {
+    return __make_value(static_cast<__nv_fp6_storage_t>(0x04u));
+  }
+
+  static constexpr int min_exponent   = 0;
+  static constexpr int min_exponent10 = 0;
+  static constexpr int max_exponent   = 2;
+  static constexpr int max_exponent10 = 0;
+
+  static constexpr bool has_infinity             = false;
+  static constexpr bool has_quiet_NaN            = false;
+  static constexpr bool has_signaling_NaN        = false;
+  static constexpr float_denorm_style has_denorm = denorm_present;
+  static constexpr bool has_denorm_loss          = false;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
+  {
+    return type{};
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
+  {
+    return type{};
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
+  {
+    return type{};
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
+  {
+    return __make_value(static_cast<__nv_fp6_storage_t>(0x01u));
+  }
+
+  static constexpr bool is_iec559  = false;
+  static constexpr bool is_bounded = true;
+  static constexpr bool is_modulo  = false;
+
+  static constexpr bool traps                    = false;
+  static constexpr bool tinyness_before          = false;
+  static constexpr float_round_style round_style = round_to_nearest;
+};
+
+template <>
+class __numeric_limits_impl<__nv_fp6_e3m2, __numeric_limits_type::__floating_point>
+{
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr __nv_fp6_e3m2 __make_value(__nv_fp6_storage_t __val)
+  {
+#  if defined(_CCCL_BUILTIN_BIT_CAST)
+    return _CUDA_VSTD::bit_cast<__nv_fp6_e3m2>(__val);
+#  else // ^^^ _CCCL_BUILTIN_BIT_CAST ^^^ // vvv !_CCCL_BUILTIN_BIT_CAST vvv
+    __nv_fp6_e3m2 __ret{};
+    __ret.__x = __val;
+    return __ret;
+#  endif // ^^^ !_CCCL_BUILTIN_BIT_CAST ^^^
+  }
+
+public:
+  using type = __nv_fp6_e3m2;
+
+  static constexpr bool is_specialized = true;
+
+  static constexpr bool is_signed   = true;
+  static constexpr int digits       = 2;
+  static constexpr int digits10     = 0;
+  static constexpr int max_digits10 = 2;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
+  {
+    return __make_value(static_cast<__nv_fp6_storage_t>(0x04u));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
+  {
+    return __make_value(static_cast<__nv_fp6_storage_t>(0x1fu));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
+  {
+    return __make_value(static_cast<__nv_fp6_storage_t>(0x3fu));
+  }
+
+  static constexpr bool is_integer = false;
+  static constexpr bool is_exact   = false;
+  static constexpr int radix       = __FLT_RADIX__;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
+  {
+    return __make_value(static_cast<__nv_fp6_storage_t>(0x04u));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
+  {
+    return __make_value(static_cast<__nv_fp6_storage_t>(0x08u));
+  }
+
+  static constexpr int min_exponent   = -2;
+  static constexpr int min_exponent10 = -1;
+  static constexpr int max_exponent   = 4;
+  static constexpr int max_exponent10 = 1;
+
+  static constexpr bool has_infinity             = false;
+  static constexpr bool has_quiet_NaN            = false;
+  static constexpr bool has_signaling_NaN        = false;
+  static constexpr float_denorm_style has_denorm = denorm_present;
+  static constexpr bool has_denorm_loss          = false;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
+  {
+    return type{};
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
+  {
+    return type{};
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
+  {
+    return type{};
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
+  {
+    return __make_value(static_cast<__nv_fp6_storage_t>(0x01u));
+  }
+
+  static constexpr bool is_iec559  = false;
+  static constexpr bool is_bounded = true;
+  static constexpr bool is_modulo  = false;
+
+  static constexpr bool traps                    = false;
+  static constexpr bool tinyness_before          = false;
+  static constexpr float_round_style round_style = round_to_nearest;
+};
+#endif // _CCCL_HAS_NVFP6()
+
+#if _CCCL_HAS_NVFP4()
+template <>
+class __numeric_limits_impl<__nv_fp4_e2m1, __numeric_limits_type::__floating_point>
+{
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr __nv_fp4_e2m1 __make_value(__nv_fp4_storage_t __val)
+  {
+#  if defined(_CCCL_BUILTIN_BIT_CAST)
+    return _CUDA_VSTD::bit_cast<__nv_fp4_e2m1>(__val);
+#  else // ^^^ _CCCL_BUILTIN_BIT_CAST ^^^ // vvv !_CCCL_BUILTIN_BIT_CAST vvv
+    __nv_fp4_e2m1 __ret{};
+    __ret.__x = __val;
+    return __ret;
+#  endif // ^^^ !_CCCL_BUILTIN_BIT_CAST ^^^
+  }
+
+public:
+  using type = __nv_fp4_e2m1;
+
+  static constexpr bool is_specialized = true;
+
+  static constexpr bool is_signed   = true;
+  static constexpr int digits       = 1;
+  static constexpr int digits10     = 0;
+  static constexpr int max_digits10 = 2;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
+  {
+    return __make_value(static_cast<__nv_fp4_storage_t>(0x2u));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
+  {
+    return __make_value(static_cast<__nv_fp4_storage_t>(0x7u));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
+  {
+    return __make_value(static_cast<__nv_fp4_storage_t>(0xfu));
+  }
+
+  static constexpr bool is_integer = false;
+  static constexpr bool is_exact   = false;
+  static constexpr int radix       = __FLT_RADIX__;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
+  {
+    return __make_value(static_cast<__nv_fp4_storage_t>(0x1u));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
+  {
+    return __make_value(static_cast<__nv_fp4_storage_t>(0x1u));
+  }
+
+  static constexpr int min_exponent   = 0;
+  static constexpr int min_exponent10 = 0;
+  static constexpr int max_exponent   = 2;
+  static constexpr int max_exponent10 = 0;
+
+  static constexpr bool has_infinity             = false;
+  static constexpr bool has_quiet_NaN            = false;
+  static constexpr bool has_signaling_NaN        = false;
+  static constexpr float_denorm_style has_denorm = denorm_present;
+  static constexpr bool has_denorm_loss          = false;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
+  {
+    return type{};
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
+  {
+    return type{};
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
+  {
+    return type{};
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
+  {
+    return __make_value(static_cast<__nv_fp4_storage_t>(0x1u));
+  }
+
+  static constexpr bool is_iec559  = false;
+  static constexpr bool is_bounded = true;
+  static constexpr bool is_modulo  = false;
+
+  static constexpr bool traps                    = false;
+  static constexpr bool tinyness_before          = false;
+  static constexpr float_round_style round_style = round_to_nearest;
+};
+#endif // _CCCL_HAS_NVFP4()
+
 template <class _Tp>
 class numeric_limits : public __numeric_limits_impl<_Tp>
 {};
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/is_specialized.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/is_specialized.pass.cpp
index 3030c6b15db..e5a2ffc01c5 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/is_specialized.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/is_specialized.pass.cpp
@@ -74,6 +74,20 @@ int main(int, char**)
 #if _CCCL_HAS_NVBF16()
   test<__nv_bfloat16>();
 #endif // _CCCL_HAS_NVBF16
+#if _CCCL_HAS_NVFP8()
+  test<__nv_fp8_e4m3>();
+  test<__nv_fp8_e5m2>();
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+  test<__nv_fp8_e8m0>();
+#  endif // _CUDACC_AT_LEAST(12, 8)
+#endif // _CCCL_HAS_NVFP8()
+#if _CCCL_HAS_NVFP6()
+  test<__nv_fp6_e2m3>();
+  test<__nv_fp6_e3m2>();
+#endif // _CCCL_HAS_NVFP6()
+#if _CCCL_HAS_NVFP4()
+  test<__nv_fp4_e2m1>();
+#endif // _CCCL_HAS_NVFP4()
 
   static_assert(!cuda::std::numeric_limits<cuda::std::complex<double>>::is_specialized,
                 "!cuda::std::numeric_limits<cuda::std::complex<double> >::is_specialized");
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/common.h b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/common.h
index 3d91abb6255..27ab6d0f699 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/common.h
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/common.h
@@ -11,11 +11,16 @@
 #define NUMERIC_LIMITS_MEMBERS_COMMON_H
 
 // Disable all the extended floating point operations and conversions
-#define __CUDA_NO_FP8_CONVERSIONS__      1
-#define __CUDA_NO_HALF_CONVERSIONS__     1
-#define __CUDA_NO_HALF_OPERATORS__       1
-#define __CUDA_NO_BFLOAT16_CONVERSIONS__ 1
-#define __CUDA_NO_BFLOAT16_OPERATORS__   1
+#define __CUDA_NO_FP4_CONVERSIONS__          1
+#define __CUDA_NO_FP4_CONVERSION_OPERATORS__ 1
+#define __CUDA_NO_FP6_CONVERSIONS__          1
+#define __CUDA_NO_FP6_CONVERSION_OPERATORS__ 1
+#define __CUDA_NO_FP8_CONVERSIONS__          1
+#define __CUDA_NO_FP8_CONVERSION_OPERATORS__ 1
+#define __CUDA_NO_HALF_CONVERSIONS__         1
+#define __CUDA_NO_HALF_OPERATORS__           1
+#define __CUDA_NO_BFLOAT16_CONVERSIONS__     1
+#define __CUDA_NO_BFLOAT16_OPERATORS__       1
 
 #include <cuda/std/__bit/bit_cast.h>
 #include <cuda/std/limits>
@@ -26,6 +31,46 @@ __host__ __device__ bool float_eq(T x, T y)
   return x == y;
 }
 
+#if _CCCL_HAS_NVFP4()
+__host__ __device__ inline __nv_fp4_e2m1 make_fp4_e2m1(double x)
+{
+  __nv_fp4_e2m1 res;
+  res.__x = __nv_cvt_double_to_fp4(x, __NV_E2M1, cudaRoundNearest);
+  return res;
+}
+
+__host__ __device__ inline bool float_eq(__nv_fp4_e2m1 x, __nv_fp4_e2m1 y)
+{
+  return x.__x == y.__x;
+}
+#endif // _CCCL_HAS_NVFP4
+
+#if _CCCL_HAS_NVFP6()
+__host__ __device__ inline __nv_fp6_e2m3 make_fp6_e2m3(double x)
+{
+  __nv_fp6_e2m3 res;
+  res.__x = __nv_cvt_double_to_fp6(x, __NV_E2M3, cudaRoundNearest);
+  return res;
+}
+
+__host__ __device__ inline __nv_fp6_e3m2 make_fp6_e3m2(double x)
+{
+  __nv_fp6_e3m2 res;
+  res.__x = __nv_cvt_double_to_fp6(x, __NV_E3M2, cudaRoundNearest);
+  return res;
+}
+
+__host__ __device__ inline bool float_eq(__nv_fp6_e2m3 x, __nv_fp6_e2m3 y)
+{
+  return x.__x == y.__x;
+}
+
+__host__ __device__ inline bool float_eq(__nv_fp6_e3m2 x, __nv_fp6_e3m2 y)
+{
+  return x.__x == y.__x;
+}
+#endif // _CCCL_HAS_NVFP6
+
 #if _CCCL_HAS_NVFP8()
 __host__ __device__ inline __nv_fp8_e4m3 make_fp8_e4m3(double x, __nv_saturation_t sat = __NV_NOSAT)
 {
@@ -41,23 +86,31 @@ __host__ __device__ inline __nv_fp8_e5m2 make_fp8_e5m2(double x, __nv_saturation
   return res;
 }
 
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+__host__ __device__ inline __nv_fp8_e8m0 make_fp8_e8m0(double x, __nv_saturation_t sat = __NV_NOSAT)
+{
+  __nv_fp8_e8m0 res;
+  res.__x = __nv_cvt_double_to_e8m0(x, sat, cudaRoundZero);
+  return res;
+}
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
+
 __host__ __device__ inline bool float_eq(__nv_fp8_e4m3 x, __nv_fp8_e4m3 y)
 {
-#  if _CCCL_CUDACC_AT_LEAST(12, 2)
-  return float_eq(__half{__nv_cvt_fp8_to_halfraw(x.__x, __NV_E4M3)}, __half{__nv_cvt_fp8_to_halfraw(y.__x, __NV_E4M3)});
-#  else
-  return ::cuda::std::bit_cast<unsigned char>(x) == ::cuda::std::bit_cast<unsigned char>(y);
-#  endif
+  return x.__x == y.__x;
 }
 
 __host__ __device__ inline bool float_eq(__nv_fp8_e5m2 x, __nv_fp8_e5m2 y)
 {
-#  if _CCCL_CUDACC_AT_LEAST(12, 2)
-  return float_eq(__half{__nv_cvt_fp8_to_halfraw(x.__x, __NV_E5M2)}, __half{__nv_cvt_fp8_to_halfraw(y.__x, __NV_E5M2)});
-#  else
-  return ::cuda::std::bit_cast<unsigned char>(x) == ::cuda::std::bit_cast<unsigned char>(y);
-#  endif
+  return x.__x == y.__x;
+}
+
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+__host__ __device__ inline bool float_eq(__nv_fp8_e8m0 x, __nv_fp8_e8m0 y)
+{
+  return x.__x == y.__x;
 }
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
 #endif // _CCCL_HAS_NVFP8
 
 #if _CCCL_HAS_NVFP16()
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/const_data_members.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/const_data_members.pass.cpp
index 49545ca6f15..e26b80689a1 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/const_data_members.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/const_data_members.pass.cpp
@@ -119,7 +119,17 @@ int main(int, char**)
 #if _CCCL_HAS_NVFP8()
   test_type<__nv_fp8_e4m3>();
   test_type<__nv_fp8_e5m2>();
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+  test_type<__nv_fp8_e8m0>();
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
 #endif // _CCCL_HAS_NVFP8()
+#if _CCCL_HAS_NVFP6()
+  test_type<__nv_fp6_e2m3>();
+  test_type<__nv_fp6_e3m2>();
+#endif // _CCCL_HAS_NVFP6
+#if _CCCL_HAS_NVFP4()
+  test_type<__nv_fp4_e2m1>();
+#endif // _CCCL_HAS_NVFP4
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/denorm_min.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/denorm_min.pass.cpp
index 9c8a6a97bc7..494cf76b23b 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/denorm_min.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/denorm_min.pass.cpp
@@ -75,7 +75,18 @@ int main(int, char**)
 #if _CCCL_HAS_NVFP8()
   test<__nv_fp8_e4m3>(make_fp8_e4m3(0.001953125));
   test<__nv_fp8_e5m2>(make_fp8_e5m2(0.0000152587890625));
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+  test<__nv_fp8_e8m0>(__nv_fp8_e8m0{});
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
 #endif // _CCCL_HAS_NVFP8()
+#if _CCCL_HAS_NVFP6()
+  test<__nv_fp6_e2m3>(make_fp6_e2m3(0.125));
+  test<__nv_fp6_e3m2>(make_fp6_e3m2(0.0625));
+#endif // _CCCL_HAS_NVFP6
+#if _CCCL_HAS_NVFP4()
+  test<__nv_fp4_e2m1>(make_fp4_e2m1(0.5));
+#endif // _CCCL_HAS_NVFP4
+
 #if !defined(__FLT_DENORM_MIN__) && !defined(FLT_TRUE_MIN)
 #  error Test has no expected values for floating point types
 #endif
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/digits.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/digits.pass.cpp
index 7e32bd3bde0..14ad49ea2f7 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/digits.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/digits.pass.cpp
@@ -64,7 +64,17 @@ int main(int, char**)
 #if _CCCL_HAS_NVFP8()
   test<__nv_fp8_e4m3, 3>();
   test<__nv_fp8_e5m2, 2>();
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+  test<__nv_fp8_e8m0, 0>();
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
 #endif // _CCCL_HAS_NVFP8()
+#if _CCCL_HAS_NVFP6()
+  test<__nv_fp6_e2m3, 3>();
+  test<__nv_fp6_e3m2, 2>();
+#endif // _CCCL_HAS_NVFP6
+#if _CCCL_HAS_NVFP4()
+  test<__nv_fp4_e2m1, 1>();
+#endif // _CCCL_HAS_NVFP4
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/digits10.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/digits10.pass.cpp
index 63ef4f23fed..f71674777a3 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/digits10.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/digits10.pass.cpp
@@ -83,7 +83,17 @@ int main(int, char**)
 #if _CCCL_HAS_NVFP8()
   test<__nv_fp8_e4m3>();
   test<__nv_fp8_e5m2>();
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+  test<__nv_fp8_e8m0>();
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
 #endif // _CCCL_HAS_NVFP8()
+#if _CCCL_HAS_NVFP6()
+  test<__nv_fp6_e2m3>();
+  test<__nv_fp6_e3m2>();
+#endif // _CCCL_HAS_NVFP6
+#if _CCCL_HAS_NVFP4()
+  test<__nv_fp4_e2m1>();
+#endif // _CCCL_HAS_NVFP4
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/epsilon.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/epsilon.pass.cpp
index ddaabf41987..9b3d9de9f8a 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/epsilon.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/epsilon.pass.cpp
@@ -66,7 +66,17 @@ int main(int, char**)
 #if _CCCL_HAS_NVFP8()
   test<__nv_fp8_e4m3>(make_fp8_e4m3(0.125));
   test<__nv_fp8_e5m2>(make_fp8_e5m2(0.25));
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+  test<__nv_fp8_e8m0>(make_fp8_e8m0(1.0));
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
 #endif // _CCCL_HAS_NVFP8()
+#if _CCCL_HAS_NVFP6()
+  test<__nv_fp6_e2m3>(make_fp6_e2m3(0.125));
+  test<__nv_fp6_e3m2>(make_fp6_e3m2(0.25));
+#endif // _CCCL_HAS_NVFP6
+#if _CCCL_HAS_NVFP4()
+  test<__nv_fp4_e2m1>(make_fp4_e2m1(0.5));
+#endif // _CCCL_HAS_NVFP4
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/has_denorm.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/has_denorm.pass.cpp
index 7cdb61fc428..35efdd8646d 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/has_denorm.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/has_denorm.pass.cpp
@@ -63,7 +63,17 @@ int main(int, char**)
 #if _CCCL_HAS_NVFP8()
   test<__nv_fp8_e4m3, cuda::std::denorm_present>();
   test<__nv_fp8_e5m2, cuda::std::denorm_present>();
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+  test<__nv_fp8_e8m0, cuda::std::denorm_absent>();
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
 #endif // _CCCL_HAS_NVFP8()
+#if _CCCL_HAS_NVFP6()
+  test<__nv_fp6_e2m3, cuda::std::denorm_present>();
+  test<__nv_fp6_e3m2, cuda::std::denorm_present>();
+#endif // _CCCL_HAS_NVFP6
+#if _CCCL_HAS_NVFP4()
+  test<__nv_fp4_e2m1, cuda::std::denorm_present>();
+#endif // _CCCL_HAS_NVFP4
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/has_denorm_loss.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/has_denorm_loss.pass.cpp
index 7b729a642c7..b3a3b3e5cd8 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/has_denorm_loss.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/has_denorm_loss.pass.cpp
@@ -63,7 +63,17 @@ int main(int, char**)
 #if _CCCL_HAS_NVFP8()
   test<__nv_fp8_e4m3, false>();
   test<__nv_fp8_e5m2, false>();
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+  test<__nv_fp8_e8m0, false>();
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
 #endif // _CCCL_HAS_NVFP8()
+#if _CCCL_HAS_NVFP6()
+  test<__nv_fp6_e2m3, false>();
+  test<__nv_fp6_e3m2, false>();
+#endif // _CCCL_HAS_NVFP6
+#if _CCCL_HAS_NVFP4()
+  test<__nv_fp4_e2m1, false>();
+#endif // _CCCL_HAS_NVFP4
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/has_infinity.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/has_infinity.pass.cpp
index 8f00af449ee..4c922e0cc68 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/has_infinity.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/has_infinity.pass.cpp
@@ -63,7 +63,17 @@ int main(int, char**)
 #if _CCCL_HAS_NVFP8()
   test<__nv_fp8_e4m3, false>();
   test<__nv_fp8_e5m2, true>();
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+  test<__nv_fp8_e8m0, false>();
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
 #endif // _CCCL_HAS_NVFP8()
+#if _CCCL_HAS_NVFP6()
+  test<__nv_fp6_e2m3, false>();
+  test<__nv_fp6_e3m2, false>();
+#endif // _CCCL_HAS_NVFP6
+#if _CCCL_HAS_NVFP4()
+  test<__nv_fp4_e2m1, false>();
+#endif // _CCCL_HAS_NVFP4
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/has_quiet_NaN.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/has_quiet_NaN.pass.cpp
index 75ce85c5148..600f1bbeb2e 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/has_quiet_NaN.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/has_quiet_NaN.pass.cpp
@@ -63,7 +63,17 @@ int main(int, char**)
 #if _CCCL_HAS_NVFP8()
   test<__nv_fp8_e4m3, true>();
   test<__nv_fp8_e5m2, true>();
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+  test<__nv_fp8_e8m0, true>();
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
 #endif // _CCCL_HAS_NVFP8()
+#if _CCCL_HAS_NVFP6()
+  test<__nv_fp6_e2m3, false>();
+  test<__nv_fp6_e3m2, false>();
+#endif // _CCCL_HAS_NVFP6
+#if _CCCL_HAS_NVFP4()
+  test<__nv_fp4_e2m1, false>();
+#endif // _CCCL_HAS_NVFP4
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/has_signaling_NaN.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/has_signaling_NaN.pass.cpp
index 664b3488acd..c0427d145a6 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/has_signaling_NaN.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/has_signaling_NaN.pass.cpp
@@ -63,7 +63,17 @@ int main(int, char**)
 #if _CCCL_HAS_NVFP8()
   test<__nv_fp8_e4m3, false>();
   test<__nv_fp8_e5m2, true>();
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+  test<__nv_fp8_e8m0, false>();
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
 #endif // _CCCL_HAS_NVFP8()
+#if _CCCL_HAS_NVFP6()
+  test<__nv_fp6_e2m3, false>();
+  test<__nv_fp6_e3m2, false>();
+#endif // _CCCL_HAS_NVFP6
+#if _CCCL_HAS_NVFP4()
+  test<__nv_fp4_e2m1, false>();
+#endif // _CCCL_HAS_NVFP4
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/infinity.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/infinity.pass.cpp
index 5cc08a755f1..796f13ad79c 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/infinity.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/infinity.pass.cpp
@@ -14,13 +14,11 @@
 #include <cuda/std/cfloat>
 #include <cuda/std/limits>
 
-#include "common.h"
-
-// MSVC has issues with producing INF with divisions by zero.
-#if defined(_MSC_VER)
+#if _CCCL_COMPILER(MSVC)
 #  include <cmath>
-#endif
+#endif // _CCCL_COMPILER(MSVC)
 
+#include "common.h"
 #include "test_macros.h"
 
 template <class T>
@@ -34,6 +32,13 @@ __host__ __device__ void test(T expected)
 
 int main(int, char**)
 {
+  // MSVC has problems producing infinity from 1.0 / 0.0
+#if _CCCL_COMPILER(MSVC)
+  const double inf = INFINITY;
+#else // ^^^ _CCCL_COMPILER(MSVC) ^^^ / vvv !_CCCL_COMPILER(MSVC) vvv
+  const double inf = 1.0 / 0.0;
+#endif // ^^^ !_CCCL_COMPILER(MSVC) ^^^
+
   test<bool>(false);
   test<char>(0);
   test<signed char>(0);
@@ -58,40 +63,31 @@ int main(int, char**)
   test<__int128_t>(0);
   test<__uint128_t>(0);
 #endif // _CCCL_HAS_INT128()
-#if !defined(_MSC_VER)
-  test<float>(1.f / 0.f);
-  test<double>(1. / 0.);
-#  ifndef _LIBCUDACXX_HAS_NO_LONG_DOUBLE
-  test<long double>(1. / 0.);
-#  endif
-#  if _CCCL_HAS_NVFP16()
-  test<__half>(__double2half(1.0 / 0.0));
-#  endif // _CCCL_HAS_NVFP16
-#  if _CCCL_HAS_NVBF16()
-  test<__nv_bfloat16>(__double2bfloat16(1.0 / 0.0));
-#  endif // _CCCL_HAS_NVBF16
-#  if _CCCL_HAS_NVFP8()
-  test<__nv_fp8_e4m3>(__nv_fp8_e4m3{});
-  test<__nv_fp8_e5m2>(make_fp8_e5m2(1.0 / 0.0));
-#  endif // _CCCL_HAS_NVFP8()
-// MSVC has issues with producing INF with divisions by zero.
-#else
-  test<float>(INFINITY);
-  test<double>(INFINITY);
-#  ifndef _LIBCUDACXX_HAS_NO_LONG_DOUBLE
-  test<long double>(INFINITY);
-#  endif
-#  if _CCCL_HAS_NVFP16()
-  test<__half>(__double2half(INFINITY));
-#  endif // _CCCL_HAS_NVFP16
-#  if _CCCL_HAS_NVBF16()
-  test<__nv_bfloat16>(__double2bfloat16(INFINITY));
-#  endif // _CCCL_HAS_NVBF16
-#  if _CCCL_HAS_NVFP8()
-  test<__nv_fp8_e4m3>(__nv_fp8_e4m3{});
-  test<__nv_fp8_e5m2>(make_fp8_e5m2(INFINITY));
-#  endif // _CCCL_HAS_NVFP8()
+  test<float>(inf);
+  test<double>(inf);
+#ifndef _LIBCUDACXX_HAS_NO_LONG_DOUBLE
+  test<long double>(inf);
 #endif
+#if _CCCL_HAS_NVFP16()
+  test<__half>(__double2half(inf));
+#endif // _CCCL_HAS_NVFP16
+#if _CCCL_HAS_NVBF16()
+  test<__nv_bfloat16>(__double2bfloat16(inf));
+#endif // _CCCL_HAS_NVBF16
+#if _CCCL_HAS_NVFP8()
+  test<__nv_fp8_e4m3>(__nv_fp8_e4m3{});
+  test<__nv_fp8_e5m2>(make_fp8_e5m2(inf));
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+  test<__nv_fp8_e8m0>(__nv_fp8_e8m0{});
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
+#endif // _CCCL_HAS_NVFP8()
+#if _CCCL_HAS_NVFP6()
+  test<__nv_fp6_e2m3>(__nv_fp6_e2m3{});
+  test<__nv_fp6_e3m2>(__nv_fp6_e3m2{});
+#endif // _CCCL_HAS_NVFP6
+#if _CCCL_HAS_NVFP4()
+  test<__nv_fp4_e2m1>(__nv_fp4_e2m1{});
+#endif // _CCCL_HAS_NVFP4
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/is_bounded.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/is_bounded.pass.cpp
index f5d8e650ab9..ee165007fa1 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/is_bounded.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/is_bounded.pass.cpp
@@ -63,7 +63,17 @@ int main(int, char**)
 #if _CCCL_HAS_NVFP8()
   test<__nv_fp8_e4m3, true>();
   test<__nv_fp8_e5m2, true>();
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+  test<__nv_fp8_e8m0, true>();
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
 #endif // _CCCL_HAS_NVFP8()
+#if _CCCL_HAS_NVFP6()
+  test<__nv_fp6_e2m3, true>();
+  test<__nv_fp6_e3m2, true>();
+#endif // _CCCL_HAS_NVFP6
+#if _CCCL_HAS_NVFP4()
+  test<__nv_fp4_e2m1, true>();
+#endif // _CCCL_HAS_NVFP4()
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/is_exact.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/is_exact.pass.cpp
index b88695cc0e6..a16ba79d59d 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/is_exact.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/is_exact.pass.cpp
@@ -63,7 +63,17 @@ int main(int, char**)
 #if _CCCL_HAS_NVFP8()
   test<__nv_fp8_e4m3, false>();
   test<__nv_fp8_e5m2, false>();
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+  test<__nv_fp8_e8m0, false>();
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
 #endif // _CCCL_HAS_NVFP8()
+#if _CCCL_HAS_NVFP6()
+  test<__nv_fp6_e2m3, false>();
+  test<__nv_fp6_e3m2, false>();
+#endif // _CCCL_HAS_NVFP6()
+#if _CCCL_HAS_NVFP4()
+  test<__nv_fp4_e2m1, false>();
+#endif // _CCCL_HAS_NVFP4()
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/is_iec559.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/is_iec559.pass.cpp
index 7128802061d..b9b19f15772 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/is_iec559.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/is_iec559.pass.cpp
@@ -63,7 +63,17 @@ int main(int, char**)
 #if _CCCL_HAS_NVFP8()
   test<__nv_fp8_e4m3, false>();
   test<__nv_fp8_e5m2, false>();
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+  test<__nv_fp8_e8m0, false>();
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
 #endif // _CCCL_HAS_NVFP8()
+#if _CCCL_HAS_NVFP6()
+  test<__nv_fp6_e2m3, false>();
+  test<__nv_fp6_e3m2, false>();
+#endif // _CCCL_HAS_NVFP6()
+#if _CCCL_HAS_NVFP4()
+  test<__nv_fp4_e2m1, false>();
+#endif // _CCCL_HAS_NVFP4()
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/is_integer.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/is_integer.pass.cpp
index 1b5c07fc794..d9555507604 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/is_integer.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/is_integer.pass.cpp
@@ -63,7 +63,17 @@ int main(int, char**)
 #if _CCCL_HAS_NVFP8()
   test<__nv_fp8_e4m3, false>();
   test<__nv_fp8_e5m2, false>();
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+  test<__nv_fp8_e8m0, false>();
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
 #endif // _CCCL_HAS_NVFP8()
+#if _CCCL_HAS_NVFP6()
+  test<__nv_fp6_e2m3, false>();
+  test<__nv_fp6_e3m2, false>();
+#endif // _CCCL_HAS_NVFP6()
+#if _CCCL_HAS_NVFP4()
+  test<__nv_fp4_e2m1, false>();
+#endif // _CCCL_HAS_NVFP4()
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/is_modulo.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/is_modulo.pass.cpp
index 12739d25b18..e30fb9ba75c 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/is_modulo.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/is_modulo.pass.cpp
@@ -63,7 +63,17 @@ int main(int, char**)
 #if _CCCL_HAS_NVFP8()
   test<__nv_fp8_e4m3, false>();
   test<__nv_fp8_e5m2, false>();
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+  test<__nv_fp8_e8m0, false>();
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
 #endif // _CCCL_HAS_NVFP8()
+#if _CCCL_HAS_NVFP6()
+  test<__nv_fp6_e2m3, false>();
+  test<__nv_fp6_e3m2, false>();
+#endif // _CCCL_HAS_NVFP6()
+#if _CCCL_HAS_NVFP4()
+  test<__nv_fp4_e2m1, false>();
+#endif // _CCCL_HAS_NVFP4()
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/is_signed.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/is_signed.pass.cpp
index 6080b041ca4..191e5f4e344 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/is_signed.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/is_signed.pass.cpp
@@ -63,7 +63,17 @@ int main(int, char**)
 #if _CCCL_HAS_NVFP8()
   test<__nv_fp8_e4m3, true>();
   test<__nv_fp8_e5m2, true>();
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+  test<__nv_fp8_e8m0, false>();
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
 #endif // _CCCL_HAS_NVFP8()
+#if _CCCL_HAS_NVFP6()
+  test<__nv_fp6_e2m3, true>();
+  test<__nv_fp6_e3m2, true>();
+#endif // _CCCL_HAS_NVFP6()
+#if _CCCL_HAS_NVFP4()
+  test<__nv_fp4_e2m1, true>();
+#endif // _CCCL_HAS_NVFP4()
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/lowest.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/lowest.pass.cpp
index 895408b38cd..721519316cc 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/lowest.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/lowest.pass.cpp
@@ -75,7 +75,17 @@ int main(int, char**)
 #if _CCCL_HAS_NVFP8()
   test<__nv_fp8_e4m3>(make_fp8_e4m3(-448.0));
   test<__nv_fp8_e5m2>(make_fp8_e5m2(-57344.0));
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+  test<__nv_fp8_e8m0>(make_fp8_e8m0(5.8774717541114375398436826861112283890933277838604376075437585313920e-39));
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
 #endif // _CCCL_HAS_NVFP8()
+#if _CCCL_HAS_NVFP6()
+  test<__nv_fp6_e2m3>(make_fp6_e2m3(-7.5));
+  test<__nv_fp6_e3m2>(make_fp6_e3m2(-28.0));
+#endif // _CCCL_HAS_NVFP6()
+#if _CCCL_HAS_NVFP4()
+  test<__nv_fp4_e2m1>(make_fp4_e2m1(-6.0));
+#endif // _CCCL_HAS_NVFP4()
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/max.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/max.pass.cpp
index 1599fe0191f..b5c183deb9f 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/max.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/max.pass.cpp
@@ -74,7 +74,17 @@ int main(int, char**)
 #if _CCCL_HAS_NVFP8()
   test<__nv_fp8_e4m3>(make_fp8_e4m3(448.0));
   test<__nv_fp8_e5m2>(make_fp8_e5m2(57344.0));
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+  test<__nv_fp8_e8m0>(make_fp8_e8m0(3.40282366920938463463374607431768211456e+38));
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
 #endif // _CCCL_HAS_NVFP8()
+#if _CCCL_HAS_NVFP6()
+  test<__nv_fp6_e2m3>(make_fp6_e2m3(7.5));
+  test<__nv_fp6_e3m2>(make_fp6_e3m2(28.0));
+#endif // _CCCL_HAS_NVFP6()
+#if _CCCL_HAS_NVFP4()
+  test<__nv_fp4_e2m1>(make_fp4_e2m1(6.0));
+#endif // _CCCL_HAS_NVFP4()
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/max_digits10.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/max_digits10.pass.cpp
index c7a1029a44e..c05539bfa82 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/max_digits10.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/max_digits10.pass.cpp
@@ -78,7 +78,17 @@ int main(int, char**)
 #if _CCCL_HAS_NVFP8()
   test<__nv_fp8_e4m3>();
   test<__nv_fp8_e5m2>();
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+  test<__nv_fp8_e8m0>();
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
 #endif // _CCCL_HAS_NVFP8()
+#if _CCCL_HAS_NVFP6()
+  test<__nv_fp6_e2m3>();
+  test<__nv_fp6_e3m2>();
+#endif // _CCCL_HAS_NVFP6()
+#if _CCCL_HAS_NVFP4()
+  test<__nv_fp4_e2m1>();
+#endif // _CCCL_HAS_NVFP4()
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/max_exponent.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/max_exponent.pass.cpp
index ab99a925adf..51de4a9fb22 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/max_exponent.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/max_exponent.pass.cpp
@@ -71,7 +71,17 @@ int main(int, char**)
 #if _CCCL_HAS_NVFP8()
   test<__nv_fp8_e4m3, 8>();
   test<__nv_fp8_e5m2, 15>();
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+  test<__nv_fp8_e8m0, 127>();
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
 #endif // _CCCL_HAS_NVFP8()
+#if _CCCL_HAS_NVFP6()
+  test<__nv_fp6_e2m3, 2>();
+  test<__nv_fp6_e3m2, 4>();
+#endif // _CCCL_HAS_NVFP6()
+#if _CCCL_HAS_NVFP4()
+  test<__nv_fp4_e2m1, 2>();
+#endif // _CCCL_HAS_NVFP4()
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/max_exponent10.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/max_exponent10.pass.cpp
index 1ccd0ea768f..875ea9d35f7 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/max_exponent10.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/max_exponent10.pass.cpp
@@ -71,7 +71,17 @@ int main(int, char**)
 #if _CCCL_HAS_NVFP8()
   test<__nv_fp8_e4m3, 2>();
   test<__nv_fp8_e5m2, 4>();
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+  test<__nv_fp8_e8m0, 38>();
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
 #endif // _CCCL_HAS_NVFP8()
+#if _CCCL_HAS_NVFP6()
+  test<__nv_fp6_e2m3, 0>();
+  test<__nv_fp6_e3m2, 1>();
+#endif // _CCCL_HAS_NVFP6()
+#if _CCCL_HAS_NVFP4()
+  test<__nv_fp4_e2m1, 0>();
+#endif // _CCCL_HAS_NVFP4()
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/min.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/min.pass.cpp
index 1bf5a4042d3..2cb8792f099 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/min.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/min.pass.cpp
@@ -75,7 +75,17 @@ int main(int, char**)
 #if _CCCL_HAS_NVFP8()
   test<__nv_fp8_e4m3>(make_fp8_e4m3(0.015625));
   test<__nv_fp8_e5m2>(make_fp8_e5m2(0.000061035));
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+  test<__nv_fp8_e8m0>(make_fp8_e8m0(5.8774717541114375398436826861112283890933277838604376075437585313920e-39));
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
 #endif // _CCCL_HAS_NVFP8()
+#if _CCCL_HAS_NVFP6()
+  test<__nv_fp6_e2m3>(make_fp6_e2m3(1.0));
+  test<__nv_fp6_e3m2>(make_fp6_e3m2(0.25));
+#endif // _CCCL_HAS_NVFP6()
+#if _CCCL_HAS_NVFP4()
+  test<__nv_fp4_e2m1>(make_fp4_e2m1(1.0));
+#endif // _CCCL_HAS_NVFP4()
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/min_exponent.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/min_exponent.pass.cpp
index c4beb42cdd4..4fa8aea2f1b 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/min_exponent.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/min_exponent.pass.cpp
@@ -71,7 +71,17 @@ int main(int, char**)
 #if _CCCL_HAS_NVFP8()
   test<__nv_fp8_e4m3, -6>();
   test<__nv_fp8_e5m2, -15>();
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+  test<__nv_fp8_e8m0, -127>();
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
 #endif // _CCCL_HAS_NVFP8()
+#if _CCCL_HAS_NVFP6()
+  test<__nv_fp6_e2m3, 0>();
+  test<__nv_fp6_e3m2, -2>();
+#endif // _CCCL_HAS_NVFP6()
+#if _CCCL_HAS_NVFP4()
+  test<__nv_fp4_e2m1, 0>();
+#endif // _CCCL_HAS_NVFP4()
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/min_exponent10.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/min_exponent10.pass.cpp
index b4a981f33bf..f650aed220b 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/min_exponent10.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/min_exponent10.pass.cpp
@@ -71,7 +71,17 @@ int main(int, char**)
 #if _CCCL_HAS_NVFP8()
   test<__nv_fp8_e4m3, -2>();
   test<__nv_fp8_e5m2, -5>();
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+  test<__nv_fp8_e8m0, -39>();
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
 #endif // _CCCL_HAS_NVFP8()
+#if _CCCL_HAS_NVFP6()
+  test<__nv_fp6_e2m3, 0>();
+  test<__nv_fp6_e3m2, -1>();
+#endif // _CCCL_HAS_NVFP6()
+#if _CCCL_HAS_NVFP4()
+  test<__nv_fp4_e2m1, 0>();
+#endif // _CCCL_HAS_NVFP4()
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/quiet_NaN.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/quiet_NaN.pass.cpp
index 20755805896..4c929cdfe10 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/quiet_NaN.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/quiet_NaN.pass.cpp
@@ -15,6 +15,7 @@
 #include <cuda/std/limits>
 #include <cuda/std/type_traits>
 
+#include "common.h"
 #include "test_macros.h"
 
 template <class T>
@@ -33,6 +34,13 @@ __host__ __device__ bool is_nan(__nv_fp8_e5m2 x)
 {
   return is_nan(__half{__nv_cvt_fp8_to_halfraw(x.__x, __NV_E5M2)});
 }
+
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+__host__ __device__ bool is_nan(__nv_fp8_e8m0 x)
+{
+  return x.__x == static_cast<__nv_fp8_storage_t>(0xffu);
+}
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
 #endif // _CCCL_HAS_NVFP8()
 
 template <class T>
@@ -44,31 +52,13 @@ __host__ __device__ void test_impl(cuda::std::true_type)
   assert(is_nan(cuda::std::numeric_limits<const volatile T>::quiet_NaN()));
 }
 
-template <class T>
-__host__ __device__ bool equal_to(T x, T y)
-{
-  return x == y;
-}
-
-#if _CCCL_HAS_NVFP8()
-__host__ __device__ bool equal_to(__nv_fp8_e4m3 x, __nv_fp8_e4m3 y)
-{
-  return x.__x == y.__x;
-}
-
-__host__ __device__ bool equal_to(__nv_fp8_e5m2 x, __nv_fp8_e5m2 y)
-{
-  return x.__x == y.__x;
-}
-#endif // _CCCL_HAS_NVFP8()
-
 template <class T>
 __host__ __device__ void test_impl(cuda::std::false_type)
 {
-  assert(equal_to(cuda::std::numeric_limits<T>::signaling_NaN(), T()));
-  assert(equal_to(cuda::std::numeric_limits<const T>::signaling_NaN(), T()));
-  assert(equal_to(cuda::std::numeric_limits<volatile T>::signaling_NaN(), T()));
-  assert(equal_to(cuda::std::numeric_limits<const volatile T>::signaling_NaN(), T()));
+  assert(float_eq(cuda::std::numeric_limits<T>::signaling_NaN(), T()));
+  assert(float_eq(cuda::std::numeric_limits<const T>::signaling_NaN(), T()));
+  assert(float_eq(cuda::std::numeric_limits<volatile T>::signaling_NaN(), T()));
+  assert(float_eq(cuda::std::numeric_limits<const volatile T>::signaling_NaN(), T()));
 }
 
 template <class T>
@@ -117,7 +107,17 @@ int main(int, char**)
 #if _CCCL_HAS_NVFP8()
   test<__nv_fp8_e4m3>();
   test<__nv_fp8_e5m2>();
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+  test<__nv_fp8_e8m0>();
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
 #endif // _CCCL_HAS_NVFP8()
+#if _CCCL_HAS_NVFP6()
+  test<__nv_fp6_e2m3>();
+  test<__nv_fp6_e3m2>();
+#endif // _CCCL_HAS_NVFP6()
+#if _CCCL_HAS_NVFP4()
+  test<__nv_fp4_e2m1>();
+#endif // _CCCL_HAS_NVFP4()
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/radix.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/radix.pass.cpp
index 5ce183910f7..49d8d7d51dd 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/radix.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/radix.pass.cpp
@@ -64,7 +64,17 @@ int main(int, char**)
 #if _CCCL_HAS_NVFP8()
   test<__nv_fp8_e4m3, FLT_RADIX>();
   test<__nv_fp8_e5m2, FLT_RADIX>();
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+  test<__nv_fp8_e8m0, FLT_RADIX>();
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
 #endif // _CCCL_HAS_NVFP8()
+#if _CCCL_HAS_NVFP6()
+  test<__nv_fp6_e2m3, FLT_RADIX>();
+  test<__nv_fp6_e3m2, FLT_RADIX>();
+#endif // _CCCL_HAS_NVFP6()
+#if _CCCL_HAS_NVFP4()
+  test<__nv_fp4_e2m1, FLT_RADIX>();
+#endif // _CCCL_HAS_NVFP4()
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/round_error.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/round_error.pass.cpp
index 2f215284025..0e7fee20cfa 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/round_error.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/round_error.pass.cpp
@@ -66,7 +66,17 @@ int main(int, char**)
 #if _CCCL_HAS_NVFP8()
   test<__nv_fp8_e4m3>(make_fp8_e4m3(0.5));
   test<__nv_fp8_e5m2>(make_fp8_e5m2(0.5));
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+  test<__nv_fp8_e8m0>(make_fp8_e8m0(1.0));
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
 #endif // _CCCL_HAS_NVFP8()
+#if _CCCL_HAS_NVFP6()
+  test<__nv_fp6_e2m3>(make_fp6_e2m3(0.5));
+  test<__nv_fp6_e3m2>(make_fp6_e3m2(0.5));
+#endif // _CCCL_HAS_NVFP6()
+#if _CCCL_HAS_NVFP4()
+  test<__nv_fp4_e2m1>(make_fp4_e2m1(0.5));
+#endif // _CCCL_HAS_NVFP4()
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/round_style.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/round_style.pass.cpp
index 70fbb0544dc..37d3ae36a05 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/round_style.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/round_style.pass.cpp
@@ -63,7 +63,17 @@ int main(int, char**)
 #if _CCCL_HAS_NVFP8()
   test<__nv_fp8_e4m3, cuda::std::round_to_nearest>();
   test<__nv_fp8_e5m2, cuda::std::round_to_nearest>();
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+  test<__nv_fp8_e8m0, cuda::std::round_toward_zero>();
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
 #endif // _CCCL_HAS_NVFP8()
+#if _CCCL_HAS_NVFP6()
+  test<__nv_fp6_e2m3, cuda::std::round_to_nearest>();
+  test<__nv_fp6_e3m2, cuda::std::round_to_nearest>();
+#endif // _CCCL_HAS_NVFP6()
+#if _CCCL_HAS_NVFP4()
+  test<__nv_fp4_e2m1, cuda::std::round_to_nearest>();
+#endif // _CCCL_HAS_NVFP4()
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/signaling_NaN.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/signaling_NaN.pass.cpp
index a3505479143..267b6e3e2ef 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/signaling_NaN.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/signaling_NaN.pass.cpp
@@ -15,6 +15,7 @@
 #include <cuda/std/limits>
 #include <cuda/std/type_traits>
 
+#include "common.h"
 #include "test_macros.h"
 
 template <class T>
@@ -44,31 +45,13 @@ __host__ __device__ void test_impl(cuda::std::true_type)
   assert(is_nan(cuda::std::numeric_limits<const volatile T>::signaling_NaN()));
 }
 
-template <class T>
-__host__ __device__ bool equal_to(T x, T y)
-{
-  return x == y;
-}
-
-#if _CCCL_HAS_NVFP8()
-__host__ __device__ bool equal_to(__nv_fp8_e4m3 x, __nv_fp8_e4m3 y)
-{
-  return x.__x == y.__x;
-}
-
-__host__ __device__ bool equal_to(__nv_fp8_e5m2 x, __nv_fp8_e5m2 y)
-{
-  return x.__x == y.__x;
-}
-#endif // _CCCL_HAS_NVFP8()
-
 template <class T>
 __host__ __device__ void test_impl(cuda::std::false_type)
 {
-  assert(equal_to(cuda::std::numeric_limits<T>::signaling_NaN(), T()));
-  assert(equal_to(cuda::std::numeric_limits<const T>::signaling_NaN(), T()));
-  assert(equal_to(cuda::std::numeric_limits<volatile T>::signaling_NaN(), T()));
-  assert(equal_to(cuda::std::numeric_limits<const volatile T>::signaling_NaN(), T()));
+  assert(float_eq(cuda::std::numeric_limits<T>::signaling_NaN(), T()));
+  assert(float_eq(cuda::std::numeric_limits<const T>::signaling_NaN(), T()));
+  assert(float_eq(cuda::std::numeric_limits<volatile T>::signaling_NaN(), T()));
+  assert(float_eq(cuda::std::numeric_limits<const volatile T>::signaling_NaN(), T()));
 }
 
 template <class T>
@@ -117,7 +100,17 @@ int main(int, char**)
 #if _CCCL_HAS_NVFP8()
   test<__nv_fp8_e4m3>();
   test<__nv_fp8_e5m2>();
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+  test<__nv_fp8_e8m0>();
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
 #endif // _CCCL_HAS_NVFP8()
+#if _CCCL_HAS_NVFP6()
+  test<__nv_fp6_e2m3>();
+  test<__nv_fp6_e3m2>();
+#endif // _CCCL_HAS_NVFP6()
+#if _CCCL_HAS_NVFP4()
+  test<__nv_fp4_e2m1>();
+#endif // _CCCL_HAS_NVFP4()
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/tinyness_before.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/tinyness_before.pass.cpp
index 3d2700b0061..9469e2b7400 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/tinyness_before.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/tinyness_before.pass.cpp
@@ -63,7 +63,17 @@ int main(int, char**)
 #if _CCCL_HAS_NVFP8()
   test<__nv_fp8_e4m3, false>();
   test<__nv_fp8_e5m2, false>();
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+  test<__nv_fp8_e8m0, false>();
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
 #endif // _CCCL_HAS_NVFP8()
+#if _CCCL_HAS_NVFP6()
+  test<__nv_fp6_e2m3, false>();
+  test<__nv_fp6_e3m2, false>();
+#endif // _CCCL_HAS_NVFP6()
+#if _CCCL_HAS_NVFP4()
+  test<__nv_fp4_e2m1, false>();
+#endif // _CCCL_HAS_NVFP4()
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/traps.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/traps.pass.cpp
index 7d99e880bcc..577442e80a5 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/traps.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/traps.pass.cpp
@@ -69,7 +69,17 @@ int main(int, char**)
 #if _CCCL_HAS_NVFP8()
   test<__nv_fp8_e4m3, false>();
   test<__nv_fp8_e5m2, false>();
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+  test<__nv_fp8_e8m0, false>();
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
 #endif // _CCCL_HAS_NVFP8()
+#if _CCCL_HAS_NVFP6()
+  test<__nv_fp6_e2m3, false>();
+  test<__nv_fp6_e3m2, false>();
+#endif // _CCCL_HAS_NVFP6()
+#if _CCCL_HAS_NVFP4()
+  test<__nv_fp4_e2m1, false>();
+#endif // _CCCL_HAS_NVFP4()
 
   return 0;
 }

From d8afc2db5b77da8219bc17aa130ea1135dfef728 Mon Sep 17 00:00:00 2001
From: David Bayer <bayer.david99@gmail.com>
Date: Mon, 17 Feb 2025 12:06:18 +0100
Subject: [PATCH 2/5] modularize `numeric_limits`

---
 .../cuda/std/__limits/numeric_limits.h        |  612 ++++++++
 .../cuda/std/__limits/numeric_limits_ext.h    |  731 ++++++++++
 libcudacxx/include/cuda/std/limits            | 1289 +----------------
 3 files changed, 1346 insertions(+), 1286 deletions(-)
 create mode 100644 libcudacxx/include/cuda/std/__limits/numeric_limits.h
 create mode 100644 libcudacxx/include/cuda/std/__limits/numeric_limits_ext.h

diff --git a/libcudacxx/include/cuda/std/__limits/numeric_limits.h b/libcudacxx/include/cuda/std/__limits/numeric_limits.h
new file mode 100644
index 00000000000..fdb5660c5a4
--- /dev/null
+++ b/libcudacxx/include/cuda/std/__limits/numeric_limits.h
@@ -0,0 +1,612 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCUDACXX___LIMITS_NUMERIC_LIMITS_H
+#define _LIBCUDACXX___LIMITS_NUMERIC_LIMITS_H
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#include <cuda/std/__bit/bit_cast.h>
+#include <cuda/std/__type_traits/is_extended_floating_point.h>
+#include <cuda/std/__type_traits/is_floating_point.h>
+#include <cuda/std/__type_traits/is_integral.h>
+#include <cuda/std/__type_traits/is_same.h>
+#include <cuda/std/climits>
+
+_LIBCUDACXX_BEGIN_NAMESPACE_STD
+
+enum float_round_style
+{
+  round_indeterminate       = -1,
+  round_toward_zero         = 0,
+  round_to_nearest          = 1,
+  round_toward_infinity     = 2,
+  round_toward_neg_infinity = 3
+};
+
+enum float_denorm_style
+{
+  denorm_indeterminate = -1,
+  denorm_absent        = 0,
+  denorm_present       = 1
+};
+
+enum class __numeric_limits_type
+{
+  __integral,
+  __bool,
+  __floating_point,
+  __other,
+};
+
+template <class _Tp>
+_LIBCUDACXX_HIDE_FROM_ABI constexpr __numeric_limits_type __make_numeric_limits_type()
+{
+#if !defined(_CCCL_NO_IF_CONSTEXPR)
+  if constexpr (_CCCL_TRAIT(is_same, _Tp, bool))
+  {
+    return __numeric_limits_type::__bool;
+  }
+  else if constexpr (_CCCL_TRAIT(is_integral, _Tp))
+  {
+    return __numeric_limits_type::__integral;
+  }
+  else if constexpr (_CCCL_TRAIT(is_floating_point, _Tp) || _CCCL_TRAIT(__is_extended_floating_point, _Tp))
+  {
+    return __numeric_limits_type::__floating_point;
+  }
+  else
+  {
+    return __numeric_limits_type::__other;
+  }
+#else // ^^^ !_CCCL_NO_IF_CONSTEXPR ^^^ // vvv _CCCL_NO_IF_CONSTEXPR vvv
+  return _CCCL_TRAIT(is_same, _Tp, bool)
+         ? __numeric_limits_type::__bool
+         : (_CCCL_TRAIT(is_integral, _Tp)
+              ? __numeric_limits_type::__integral
+              : (_CCCL_TRAIT(is_floating_point, _Tp) || _CCCL_TRAIT(__is_extended_floating_point, _Tp)
+                   ? __numeric_limits_type::__floating_point
+                   : __numeric_limits_type::__other));
+#endif // _CCCL_NO_IF_CONSTEXPR
+}
+
+template <class _Tp, __numeric_limits_type = __make_numeric_limits_type<_Tp>()>
+class __numeric_limits_impl
+{
+public:
+  using type = _Tp;
+
+  static constexpr bool is_specialized = false;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
+  {
+    return type();
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
+  {
+    return type();
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
+  {
+    return type();
+  }
+
+  static constexpr int digits       = 0;
+  static constexpr int digits10     = 0;
+  static constexpr int max_digits10 = 0;
+  static constexpr bool is_signed   = false;
+  static constexpr bool is_integer  = false;
+  static constexpr bool is_exact    = false;
+  static constexpr int radix        = 0;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
+  {
+    return type();
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
+  {
+    return type();
+  }
+
+  static constexpr int min_exponent   = 0;
+  static constexpr int min_exponent10 = 0;
+  static constexpr int max_exponent   = 0;
+  static constexpr int max_exponent10 = 0;
+
+  static constexpr bool has_infinity             = false;
+  static constexpr bool has_quiet_NaN            = false;
+  static constexpr bool has_signaling_NaN        = false;
+  static constexpr float_denorm_style has_denorm = denorm_absent;
+  static constexpr bool has_denorm_loss          = false;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
+  {
+    return type();
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
+  {
+    return type();
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
+  {
+    return type();
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
+  {
+    return type();
+  }
+
+  static constexpr bool is_iec559  = false;
+  static constexpr bool is_bounded = false;
+  static constexpr bool is_modulo  = false;
+
+  static constexpr bool traps                    = false;
+  static constexpr bool tinyness_before          = false;
+  static constexpr float_round_style round_style = round_toward_zero;
+};
+
+// MSVC warns about overflowing left shift
+_CCCL_DIAG_PUSH
+_CCCL_DIAG_SUPPRESS_MSVC(4309)
+template <class _Tp, int __digits, bool _IsSigned>
+struct __int_min
+{
+  static constexpr _Tp value = static_cast<_Tp>(_Tp(1) << __digits);
+};
+_CCCL_DIAG_POP
+
+template <class _Tp, int __digits>
+struct __int_min<_Tp, __digits, false>
+{
+  static constexpr _Tp value = _Tp(0);
+};
+
+template <class _Tp>
+class __numeric_limits_impl<_Tp, __numeric_limits_type::__integral>
+{
+public:
+  using type = _Tp;
+
+  static constexpr bool is_specialized = true;
+
+  static constexpr bool is_signed   = type(-1) < type(0);
+  static constexpr int digits       = static_cast<int>(sizeof(type) * __CHAR_BIT__ - is_signed);
+  static constexpr int digits10     = digits * 3 / 10;
+  static constexpr int max_digits10 = 0;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
+  {
+    return __int_min<type, digits, is_signed>::value;
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
+  {
+    return is_signed ? type(type(~0) ^ min()) : type(~0);
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
+  {
+    return min();
+  }
+
+  static constexpr bool is_integer = true;
+  static constexpr bool is_exact   = true;
+  static constexpr int radix       = 2;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
+  {
+    return type(0);
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
+  {
+    return type(0);
+  }
+
+  static constexpr int min_exponent   = 0;
+  static constexpr int min_exponent10 = 0;
+  static constexpr int max_exponent   = 0;
+  static constexpr int max_exponent10 = 0;
+
+  static constexpr bool has_infinity             = false;
+  static constexpr bool has_quiet_NaN            = false;
+  static constexpr bool has_signaling_NaN        = false;
+  static constexpr float_denorm_style has_denorm = denorm_absent;
+  static constexpr bool has_denorm_loss          = false;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
+  {
+    return type(0);
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
+  {
+    return type(0);
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
+  {
+    return type(0);
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
+  {
+    return type(0);
+  }
+
+  static constexpr bool is_iec559  = false;
+  static constexpr bool is_bounded = true;
+  static constexpr bool is_modulo  = !is_signed;
+
+#if (_CCCL_ARCH(X86_64) && _CCCL_OS(LINUX)) || defined(__pnacl__) || defined(__wasm__)
+  static constexpr bool traps = true;
+#else
+  static constexpr bool traps = false;
+#endif
+  static constexpr bool tinyness_before          = false;
+  static constexpr float_round_style round_style = round_toward_zero;
+};
+
+template <>
+class __numeric_limits_impl<bool, __numeric_limits_type::__bool>
+{
+public:
+  using type = bool;
+
+  static constexpr bool is_specialized = true;
+
+  static constexpr bool is_signed   = false;
+  static constexpr int digits       = 1;
+  static constexpr int digits10     = 0;
+  static constexpr int max_digits10 = 0;
+
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
+  {
+    return false;
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
+  {
+    return true;
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
+  {
+    return min();
+  }
+
+  static constexpr bool is_integer = true;
+  static constexpr bool is_exact   = true;
+  static constexpr int radix       = 2;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
+  {
+    return type(0);
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
+  {
+    return type(0);
+  }
+
+  static constexpr int min_exponent   = 0;
+  static constexpr int min_exponent10 = 0;
+  static constexpr int max_exponent   = 0;
+  static constexpr int max_exponent10 = 0;
+
+  static constexpr bool has_infinity             = false;
+  static constexpr bool has_quiet_NaN            = false;
+  static constexpr bool has_signaling_NaN        = false;
+  static constexpr float_denorm_style has_denorm = denorm_absent;
+  static constexpr bool has_denorm_loss          = false;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
+  {
+    return type(0);
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
+  {
+    return type(0);
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
+  {
+    return type(0);
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
+  {
+    return type(0);
+  }
+
+  static constexpr bool is_iec559  = false;
+  static constexpr bool is_bounded = true;
+  static constexpr bool is_modulo  = false;
+
+  static constexpr bool traps                    = false;
+  static constexpr bool tinyness_before          = false;
+  static constexpr float_round_style round_style = round_toward_zero;
+};
+
+template <>
+class __numeric_limits_impl<float, __numeric_limits_type::__floating_point>
+{
+public:
+  using type = float;
+
+  static constexpr bool is_specialized = true;
+
+  static constexpr bool is_signed   = true;
+  static constexpr int digits       = __FLT_MANT_DIG__;
+  static constexpr int digits10     = __FLT_DIG__;
+  static constexpr int max_digits10 = 2 + (digits * 30103l) / 100000l;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
+  {
+    return __FLT_MIN__;
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
+  {
+    return __FLT_MAX__;
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
+  {
+    return -max();
+  }
+
+  static constexpr bool is_integer = false;
+  static constexpr bool is_exact   = false;
+  static constexpr int radix       = __FLT_RADIX__;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
+  {
+    return __FLT_EPSILON__;
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
+  {
+    return 0.5F;
+  }
+
+  static constexpr int min_exponent   = __FLT_MIN_EXP__;
+  static constexpr int min_exponent10 = __FLT_MIN_10_EXP__;
+  static constexpr int max_exponent   = __FLT_MAX_EXP__;
+  static constexpr int max_exponent10 = __FLT_MAX_10_EXP__;
+
+  static constexpr bool has_infinity             = true;
+  static constexpr bool has_quiet_NaN            = true;
+  static constexpr bool has_signaling_NaN        = true;
+  static constexpr float_denorm_style has_denorm = denorm_present;
+  static constexpr bool has_denorm_loss          = false;
+
+#if defined(_CCCL_BUILTIN_HUGE_VALF)
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
+  {
+    return _CCCL_BUILTIN_HUGE_VALF();
+  }
+#else // ^^^ _CCCL_BUILTIN_HUGE_VALF ^^^ // vvv !_CCCL_BUILTIN_HUGE_VALF vvv
+  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_CONSTEXPR_BIT_CAST type infinity() noexcept
+  {
+    return _CUDA_VSTD::bit_cast<type>(0x7f800000);
+  }
+#endif // !_CCCL_BUILTIN_HUGE_VALF
+#if defined(_CCCL_BUILTIN_NANF)
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
+  {
+    return _CCCL_BUILTIN_NANF("");
+  }
+#else // ^^^ _CCCL_BUILTIN_NANF ^^^ // vvv !_CCCL_BUILTIN_NANF vvv
+  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_CONSTEXPR_BIT_CAST type quiet_NaN() noexcept
+  {
+    return _CUDA_VSTD::bit_cast<type>(0x7fc00000);
+  }
+#endif // !_CCCL_BUILTIN_NANF
+#if defined(_CCCL_BUILTIN_NANSF)
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
+  {
+    return _CCCL_BUILTIN_NANSF("");
+  }
+#else // ^^^ _CCCL_BUILTIN_NANSF ^^^ // vvv !_CCCL_BUILTIN_NANSF vvv
+  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_CONSTEXPR_BIT_CAST type signaling_NaN() noexcept
+  {
+    return _CUDA_VSTD::bit_cast<type>(0x7fa00000);
+  }
+#endif // !_CCCL_BUILTIN_NANSF
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
+  {
+    return __FLT_DENORM_MIN__;
+  }
+
+  static constexpr bool is_iec559  = true;
+  static constexpr bool is_bounded = true;
+  static constexpr bool is_modulo  = false;
+
+  static constexpr bool traps                    = false;
+  static constexpr bool tinyness_before          = false;
+  static constexpr float_round_style round_style = round_to_nearest;
+};
+
+template <>
+class __numeric_limits_impl<double, __numeric_limits_type::__floating_point>
+{
+public:
+  using type = double;
+
+  static constexpr bool is_specialized = true;
+
+  static constexpr bool is_signed   = true;
+  static constexpr int digits       = __DBL_MANT_DIG__;
+  static constexpr int digits10     = __DBL_DIG__;
+  static constexpr int max_digits10 = 2 + (digits * 30103l) / 100000l;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
+  {
+    return __DBL_MIN__;
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
+  {
+    return __DBL_MAX__;
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
+  {
+    return -max();
+  }
+
+  static constexpr bool is_integer = false;
+  static constexpr bool is_exact   = false;
+  static constexpr int radix       = __FLT_RADIX__;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
+  {
+    return __DBL_EPSILON__;
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
+  {
+    return 0.5;
+  }
+
+  static constexpr int min_exponent   = __DBL_MIN_EXP__;
+  static constexpr int min_exponent10 = __DBL_MIN_10_EXP__;
+  static constexpr int max_exponent   = __DBL_MAX_EXP__;
+  static constexpr int max_exponent10 = __DBL_MAX_10_EXP__;
+
+  static constexpr bool has_infinity             = true;
+  static constexpr bool has_quiet_NaN            = true;
+  static constexpr bool has_signaling_NaN        = true;
+  static constexpr float_denorm_style has_denorm = denorm_present;
+  static constexpr bool has_denorm_loss          = false;
+
+#if defined(_CCCL_BUILTIN_HUGE_VAL)
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
+  {
+    return _CCCL_BUILTIN_HUGE_VAL();
+  }
+#else // ^^^ _CCCL_BUILTIN_HUGE_VAL ^^^ // vvv !_CCCL_BUILTIN_HUGE_VAL vvv
+  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_CONSTEXPR_BIT_CAST type infinity() noexcept
+  {
+    return _CUDA_VSTD::bit_cast<type>(0x7ff0000000000000);
+  }
+#endif // !_CCCL_BUILTIN_HUGE_VAL
+#if defined(_CCCL_BUILTIN_NAN)
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
+  {
+    return _CCCL_BUILTIN_NAN("");
+  }
+#else // ^^^ _CCCL_BUILTIN_NAN ^^^ // vvv !_CCCL_BUILTIN_NAN vvv
+  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_CONSTEXPR_BIT_CAST type quiet_NaN() noexcept
+  {
+    return _CUDA_VSTD::bit_cast<type>(0x7ff8000000000000);
+  }
+#endif // !_CCCL_BUILTIN_NAN
+#if defined(_CCCL_BUILTIN_NANS)
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
+  {
+    return _CCCL_BUILTIN_NANS("");
+  }
+#else // ^^^ _CCCL_BUILTIN_NANS ^^^ // vvv !_CCCL_BUILTIN_NANS vvv
+  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_CONSTEXPR_BIT_CAST type signaling_NaN() noexcept
+  {
+    return _CUDA_VSTD::bit_cast<type>(0x7ff4000000000000);
+  }
+#endif // !_CCCL_BUILTIN_NANS
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
+  {
+    return __DBL_DENORM_MIN__;
+  }
+
+  static constexpr bool is_iec559  = true;
+  static constexpr bool is_bounded = true;
+  static constexpr bool is_modulo  = false;
+
+  static constexpr bool traps                    = false;
+  static constexpr bool tinyness_before          = false;
+  static constexpr float_round_style round_style = round_to_nearest;
+};
+
+template <>
+class __numeric_limits_impl<long double, __numeric_limits_type::__floating_point>
+{
+#ifndef _LIBCUDACXX_HAS_NO_LONG_DOUBLE
+
+public:
+  using type = long double;
+
+  static constexpr bool is_specialized = true;
+
+  static constexpr bool is_signed   = true;
+  static constexpr int digits       = __LDBL_MANT_DIG__;
+  static constexpr int digits10     = __LDBL_DIG__;
+  static constexpr int max_digits10 = 2 + (digits * 30103l) / 100000l;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
+  {
+    return __LDBL_MIN__;
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
+  {
+    return __LDBL_MAX__;
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
+  {
+    return -max();
+  }
+
+  static constexpr bool is_integer = false;
+  static constexpr bool is_exact   = false;
+  static constexpr int radix       = __FLT_RADIX__;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
+  {
+    return __LDBL_EPSILON__;
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
+  {
+    return 0.5L;
+  }
+
+  static constexpr int min_exponent   = __LDBL_MIN_EXP__;
+  static constexpr int min_exponent10 = __LDBL_MIN_10_EXP__;
+  static constexpr int max_exponent   = __LDBL_MAX_EXP__;
+  static constexpr int max_exponent10 = __LDBL_MAX_10_EXP__;
+
+  static constexpr bool has_infinity             = true;
+  static constexpr bool has_quiet_NaN            = true;
+  static constexpr bool has_signaling_NaN        = true;
+  static constexpr float_denorm_style has_denorm = denorm_present;
+  static constexpr bool has_denorm_loss          = false;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
+  {
+    return _CCCL_BUILTIN_HUGE_VALL();
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
+  {
+    return _CCCL_BUILTIN_NANL("");
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
+  {
+    return _CCCL_BUILTIN_NANSL("");
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
+  {
+    return __LDBL_DENORM_MIN__;
+  }
+
+  static constexpr bool is_iec559  = true;
+  static constexpr bool is_bounded = true;
+  static constexpr bool is_modulo  = false;
+
+  static constexpr bool traps                    = false;
+  static constexpr bool tinyness_before          = false;
+  static constexpr float_round_style round_style = round_to_nearest;
+#endif // !_LIBCUDACXX_HAS_NO_LONG_DOUBLE
+};
+
+template <class _Tp>
+class numeric_limits : public __numeric_limits_impl<_Tp>
+{};
+
+template <class _Tp>
+class numeric_limits<const _Tp> : public numeric_limits<_Tp>
+{};
+
+template <class _Tp>
+class numeric_limits<volatile _Tp> : public numeric_limits<_Tp>
+{};
+
+template <class _Tp>
+class numeric_limits<const volatile _Tp> : public numeric_limits<_Tp>
+{};
+
+_LIBCUDACXX_END_NAMESPACE_STD
+
+#endif // _LIBCUDACXX___LIMITS_NUMERIC_LIMITS_H
diff --git a/libcudacxx/include/cuda/std/__limits/numeric_limits_ext.h b/libcudacxx/include/cuda/std/__limits/numeric_limits_ext.h
new file mode 100644
index 00000000000..a2ba033d4c8
--- /dev/null
+++ b/libcudacxx/include/cuda/std/__limits/numeric_limits_ext.h
@@ -0,0 +1,731 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCUDACXX___LIMITS_NUMERIC_LIMITS_EXT_H
+#define _LIBCUDACXX___LIMITS_NUMERIC_LIMITS_EXT_H
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#include <cuda/std/__bit/bit_cast.h>
+#include <cuda/std/__limits/numeric_limits.h>
+
+#if defined(_LIBCUDACXX_HAS_NVFP16)
+#  include <cuda_fp16.h>
+#endif // _LIBCUDACXX_HAS_NVFP16
+
+#if defined(_LIBCUDACXX_HAS_NVBF16)
+_CCCL_DIAG_PUSH
+_CCCL_DIAG_SUPPRESS_CLANG("-Wunused-function")
+#  include <cuda_bf16.h>
+_CCCL_DIAG_POP
+#endif // _LIBCUDACXX_HAS_NVBF16
+
+#if _CCCL_HAS_NVFP8()
+#  include <cuda_fp8.h>
+#endif // _CCCL_HAS_NVFP8()
+
+#if _CCCL_HAS_NVFP6()
+#  include <cuda_fp6.h>
+#endif // _CCCL_HAS_NVFP6()
+
+#if _CCCL_HAS_NVFP4()
+#  include <cuda_fp4.h>
+#endif // _CCCL_HAS_NVFP4()
+
+_LIBCUDACXX_BEGIN_NAMESPACE_STD
+
+#if _CCCL_HAS_NVFP16()
+#  ifdef _LIBCUDACXX_HAS_NVFP16
+#    define _LIBCUDACXX_FP16_CONSTEXPR constexpr
+#  else //_LIBCUDACXX_HAS_NVFP16
+#    define _LIBCUDACXX_FP16_CONSTEXPR
+#  endif //_LIBCUDACXX_HAS_NVFP16
+
+template <>
+class __numeric_limits_impl<__half, __numeric_limits_type::__floating_point>
+{
+public:
+  using type = __half;
+
+  static constexpr bool is_specialized = true;
+
+  static constexpr bool is_signed   = true;
+  static constexpr int digits       = 11;
+  static constexpr int digits10     = 3;
+  static constexpr int max_digits10 = 5;
+  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_FP16_CONSTEXPR type min() noexcept
+  {
+    return type(__half_raw{0x0400u});
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_FP16_CONSTEXPR type max() noexcept
+  {
+    return type(__half_raw{0x7bffu});
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_FP16_CONSTEXPR type lowest() noexcept
+  {
+    return type(__half_raw{0xfbffu});
+  }
+
+  static constexpr bool is_integer = false;
+  static constexpr bool is_exact   = false;
+  static constexpr int radix       = __FLT_RADIX__;
+  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_FP16_CONSTEXPR type epsilon() noexcept
+  {
+    return type(__half_raw{0x1400u});
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_FP16_CONSTEXPR type round_error() noexcept
+  {
+    return type(__half_raw{0x3800u});
+  }
+
+  static constexpr int min_exponent   = -13;
+  static constexpr int min_exponent10 = -4;
+  static constexpr int max_exponent   = 16;
+  static constexpr int max_exponent10 = 4;
+
+  static constexpr bool has_infinity             = true;
+  static constexpr bool has_quiet_NaN            = true;
+  static constexpr bool has_signaling_NaN        = true;
+  static constexpr float_denorm_style has_denorm = denorm_present;
+  static constexpr bool has_denorm_loss          = false;
+  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_FP16_CONSTEXPR type infinity() noexcept
+  {
+    return type(__half_raw{0x7c00u});
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_FP16_CONSTEXPR type quiet_NaN() noexcept
+  {
+    return type(__half_raw{0x7e00u});
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_FP16_CONSTEXPR type signaling_NaN() noexcept
+  {
+    return type(__half_raw{0x7d00u});
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_FP16_CONSTEXPR type denorm_min() noexcept
+  {
+    return type(__half_raw{0x0001u});
+  }
+
+  static constexpr bool is_iec559  = true;
+  static constexpr bool is_bounded = true;
+  static constexpr bool is_modulo  = false;
+
+  static constexpr bool traps                    = false;
+  static constexpr bool tinyness_before          = false;
+  static constexpr float_round_style round_style = round_to_nearest;
+};
+#  undef _LIBCUDACXX_FP16_CONSTEXPR
+#endif // _CCCL_HAS_NVFP16
+
+#if _CCCL_HAS_NVBF16()
+#  ifdef _LIBCUDACXX_HAS_NVBF16
+#    define _LIBCUDACXX_BF16_CONSTEXPR constexpr
+#  else //_LIBCUDACXX_HAS_NVBF16
+#    define _LIBCUDACXX_BF16_CONSTEXPR
+#  endif //_LIBCUDACXX_HAS_NVBF16
+
+template <>
+class __numeric_limits_impl<__nv_bfloat16, __numeric_limits_type::__floating_point>
+{
+public:
+  using type = __nv_bfloat16;
+
+  static constexpr bool is_specialized = true;
+
+  static constexpr bool is_signed   = true;
+  static constexpr int digits       = 8;
+  static constexpr int digits10     = 2;
+  static constexpr int max_digits10 = 4;
+  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_BF16_CONSTEXPR type min() noexcept
+  {
+    return type(__nv_bfloat16_raw{0x0080u});
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_BF16_CONSTEXPR type max() noexcept
+  {
+    return type(__nv_bfloat16_raw{0x7f7fu});
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_BF16_CONSTEXPR type lowest() noexcept
+  {
+    return type(__nv_bfloat16_raw{0xff7fu});
+  }
+
+  static constexpr bool is_integer = false;
+  static constexpr bool is_exact   = false;
+  static constexpr int radix       = __FLT_RADIX__;
+  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_BF16_CONSTEXPR type epsilon() noexcept
+  {
+    return type(__nv_bfloat16_raw{0x3c00u});
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_BF16_CONSTEXPR type round_error() noexcept
+  {
+    return type(__nv_bfloat16_raw{0x3f00u});
+  }
+
+  static constexpr int min_exponent   = -125;
+  static constexpr int min_exponent10 = -37;
+  static constexpr int max_exponent   = 128;
+  static constexpr int max_exponent10 = 38;
+
+  static constexpr bool has_infinity             = true;
+  static constexpr bool has_quiet_NaN            = true;
+  static constexpr bool has_signaling_NaN        = true;
+  static constexpr float_denorm_style has_denorm = denorm_present;
+  static constexpr bool has_denorm_loss          = false;
+  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_BF16_CONSTEXPR type infinity() noexcept
+  {
+    return type(__nv_bfloat16_raw{0x7f80u});
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_BF16_CONSTEXPR type quiet_NaN() noexcept
+  {
+    return type(__nv_bfloat16_raw{0x7fc0u});
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_BF16_CONSTEXPR type signaling_NaN() noexcept
+  {
+    return type(__nv_bfloat16_raw{0x7fa0u});
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_BF16_CONSTEXPR type denorm_min() noexcept
+  {
+    return type(__nv_bfloat16_raw{0x0001u});
+  }
+
+  static constexpr bool is_iec559  = true;
+  static constexpr bool is_bounded = true;
+  static constexpr bool is_modulo  = false;
+
+  static constexpr bool traps                    = false;
+  static constexpr bool tinyness_before          = false;
+  static constexpr float_round_style round_style = round_to_nearest;
+};
+#  undef _LIBCUDACXX_BF16_CONSTEXPR
+#endif // _CCCL_HAS_NVBF16
+
+#if _CCCL_HAS_NVFP8()
+template <>
+class __numeric_limits_impl<__nv_fp8_e4m3, __numeric_limits_type::__floating_point>
+{
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr __nv_fp8_e4m3 __make_value(__nv_fp8_storage_t __val)
+  {
+#  if defined(_CCCL_BUILTIN_BIT_CAST)
+    return _CUDA_VSTD::bit_cast<__nv_fp8_e4m3>(__val);
+#  else // ^^^ _CCCL_BUILTIN_BIT_CAST ^^^ // vvv !_CCCL_BUILTIN_BIT_CAST vvv
+    __nv_fp8_e4m3 __ret{};
+    __ret.__x = __val;
+    return __ret;
+#  endif // ^^^ !_CCCL_BUILTIN_BIT_CAST ^^^
+  }
+
+public:
+  using type = __nv_fp8_e4m3;
+
+  static constexpr bool is_specialized = true;
+
+  static constexpr bool is_signed   = true;
+  static constexpr int digits       = 3;
+  static constexpr int digits10     = 0;
+  static constexpr int max_digits10 = 2;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
+  {
+    return __make_value(static_cast<__nv_fp8_storage_t>(0x08u));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
+  {
+    return __make_value(static_cast<__nv_fp8_storage_t>(0x7eu));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
+  {
+    return __make_value(static_cast<__nv_fp8_storage_t>(0xfeu));
+  }
+
+  static constexpr bool is_integer = false;
+  static constexpr bool is_exact   = false;
+  static constexpr int radix       = __FLT_RADIX__;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
+  {
+    return __make_value(static_cast<__nv_fp8_storage_t>(0x20u));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
+  {
+    return __make_value(static_cast<__nv_fp8_storage_t>(0x30u));
+  }
+
+  static constexpr int min_exponent   = -6;
+  static constexpr int min_exponent10 = -2;
+  static constexpr int max_exponent   = 8;
+  static constexpr int max_exponent10 = 2;
+
+  static constexpr bool has_infinity             = false;
+  static constexpr bool has_quiet_NaN            = true;
+  static constexpr bool has_signaling_NaN        = false;
+  static constexpr float_denorm_style has_denorm = denorm_present;
+  static constexpr bool has_denorm_loss          = false;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
+  {
+    return type{};
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
+  {
+    return __make_value(static_cast<__nv_fp8_storage_t>(0x7fu));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
+  {
+    return type{};
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
+  {
+    return __make_value(static_cast<__nv_fp8_storage_t>(0x01u));
+  }
+
+  static constexpr bool is_iec559  = false;
+  static constexpr bool is_bounded = true;
+  static constexpr bool is_modulo  = false;
+
+  static constexpr bool traps                    = false;
+  static constexpr bool tinyness_before          = false;
+  static constexpr float_round_style round_style = round_to_nearest;
+};
+
+template <>
+class __numeric_limits_impl<__nv_fp8_e5m2, __numeric_limits_type::__floating_point>
+{
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr __nv_fp8_e5m2 __make_value(__nv_fp8_storage_t __val)
+  {
+#  if defined(_CCCL_BUILTIN_BIT_CAST)
+    return _CUDA_VSTD::bit_cast<__nv_fp8_e5m2>(__val);
+#  else // ^^^ _CCCL_BUILTIN_BIT_CAST ^^^ // vvv !_CCCL_BUILTIN_BIT_CAST vvv
+    __nv_fp8_e5m2 __ret{};
+    __ret.__x = __val;
+    return __ret;
+#  endif // ^^^ !_CCCL_BUILTIN_BIT_CAST ^^^
+  }
+
+public:
+  using type = __nv_fp8_e5m2;
+
+  static constexpr bool is_specialized = true;
+
+  static constexpr bool is_signed   = true;
+  static constexpr int digits       = 2;
+  static constexpr int digits10     = 0;
+  static constexpr int max_digits10 = 2;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
+  {
+    return __make_value(static_cast<__nv_fp8_storage_t>(0x04u));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
+  {
+    return __make_value(static_cast<__nv_fp8_storage_t>(0x7bu));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
+  {
+    return __make_value(static_cast<__nv_fp8_storage_t>(0xfbu));
+  }
+
+  static constexpr bool is_integer = false;
+  static constexpr bool is_exact   = false;
+  static constexpr int radix       = __FLT_RADIX__;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
+  {
+    return __make_value(static_cast<__nv_fp8_storage_t>(0x34u));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
+  {
+    return __make_value(static_cast<__nv_fp8_storage_t>(0x38u));
+  }
+
+  static constexpr int min_exponent   = -15;
+  static constexpr int min_exponent10 = -5;
+  static constexpr int max_exponent   = 15;
+  static constexpr int max_exponent10 = 4;
+
+  static constexpr bool has_infinity             = true;
+  static constexpr bool has_quiet_NaN            = true;
+  static constexpr bool has_signaling_NaN        = true;
+  static constexpr float_denorm_style has_denorm = denorm_present;
+  static constexpr bool has_denorm_loss          = false;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
+  {
+    return __make_value(static_cast<__nv_fp8_storage_t>(0x7cu));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
+  {
+    return __make_value(static_cast<__nv_fp8_storage_t>(0x7eu));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
+  {
+    return __make_value(static_cast<__nv_fp8_storage_t>(0x7du));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
+  {
+    return __make_value(static_cast<__nv_fp8_storage_t>(0x01u));
+  }
+
+  static constexpr bool is_iec559  = false;
+  static constexpr bool is_bounded = true;
+  static constexpr bool is_modulo  = false;
+
+  static constexpr bool traps                    = false;
+  static constexpr bool tinyness_before          = false;
+  static constexpr float_round_style round_style = round_to_nearest;
+};
+
+#  if _CCCL_CUDACC_AT_LEAST(12, 8)
+template <>
+class __numeric_limits_impl<__nv_fp8_e8m0, __numeric_limits_type::__floating_point>
+{
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr __nv_fp8_e8m0 __make_value(__nv_fp8_storage_t __val)
+  {
+#    if defined(_CCCL_BUILTIN_BIT_CAST)
+    return _CUDA_VSTD::bit_cast<__nv_fp8_e8m0>(__val);
+#    else // ^^^ _CCCL_BUILTIN_BIT_CAST ^^^ // vvv !_CCCL_BUILTIN_BIT_CAST vvv
+    __nv_fp8_e8m0 __ret{};
+    __ret.__x = __val;
+    return __ret;
+#    endif // ^^^ !_CCCL_BUILTIN_BIT_CAST ^^^
+  }
+
+public:
+  using type = __nv_fp8_e8m0;
+
+  static constexpr bool is_specialized = true;
+
+  static constexpr bool is_signed   = false;
+  static constexpr int digits       = 0;
+  static constexpr int digits10     = 0;
+  static constexpr int max_digits10 = 1;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
+  {
+    return __make_value(static_cast<__nv_fp8_storage_t>(0x00u));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
+  {
+    return __make_value(static_cast<__nv_fp8_storage_t>(0xfeu));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
+  {
+    return __make_value(static_cast<__nv_fp8_storage_t>(0x00u));
+  }
+
+  static constexpr bool is_integer = false;
+  static constexpr bool is_exact   = false;
+  static constexpr int radix       = __FLT_RADIX__;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
+  {
+    return __make_value(static_cast<__nv_fp8_storage_t>(0x7fu));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
+  {
+    return __make_value(static_cast<__nv_fp8_storage_t>(0x7fu));
+  }
+
+  static constexpr int min_exponent   = -127;
+  static constexpr int min_exponent10 = -39;
+  static constexpr int max_exponent   = 127;
+  static constexpr int max_exponent10 = 38;
+
+  static constexpr bool has_infinity             = false;
+  static constexpr bool has_quiet_NaN            = true;
+  static constexpr bool has_signaling_NaN        = false;
+  static constexpr float_denorm_style has_denorm = denorm_absent;
+  static constexpr bool has_denorm_loss          = false;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
+  {
+    return type{};
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
+  {
+    return __make_value(static_cast<__nv_fp8_storage_t>(0xffu));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
+  {
+    return type{};
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
+  {
+    return type{};
+  }
+
+  static constexpr bool is_iec559  = false;
+  static constexpr bool is_bounded = true;
+  static constexpr bool is_modulo  = false;
+
+  static constexpr bool traps                    = false;
+  static constexpr bool tinyness_before          = false;
+  static constexpr float_round_style round_style = round_toward_zero;
+};
+#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
+#endif // _CCCL_HAS_NVFP8()
+
+#if _CCCL_HAS_NVFP6()
+template <>
+class __numeric_limits_impl<__nv_fp6_e2m3, __numeric_limits_type::__floating_point>
+{
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr __nv_fp6_e2m3 __make_value(__nv_fp6_storage_t __val)
+  {
+#  if defined(_CCCL_BUILTIN_BIT_CAST)
+    return _CUDA_VSTD::bit_cast<__nv_fp6_e2m3>(__val);
+#  else // ^^^ _CCCL_BUILTIN_BIT_CAST ^^^ // vvv !_CCCL_BUILTIN_BIT_CAST vvv
+    __nv_fp6_e2m3 __ret{};
+    __ret.__x = __val;
+    return __ret;
+#  endif // ^^^ !_CCCL_BUILTIN_BIT_CAST ^^^
+  }
+
+public:
+  using type = __nv_fp6_e2m3;
+
+  static constexpr bool is_specialized = true;
+
+  static constexpr bool is_signed   = true;
+  static constexpr int digits       = 3;
+  static constexpr int digits10     = 0;
+  static constexpr int max_digits10 = 2;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
+  {
+    return __make_value(static_cast<__nv_fp6_storage_t>(0x08u));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
+  {
+    return __make_value(static_cast<__nv_fp6_storage_t>(0x1fu));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
+  {
+    return __make_value(static_cast<__nv_fp6_storage_t>(0x3fu));
+  }
+
+  static constexpr bool is_integer = false;
+  static constexpr bool is_exact   = false;
+  static constexpr int radix       = __FLT_RADIX__;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
+  {
+    return __make_value(static_cast<__nv_fp6_storage_t>(0x01u));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
+  {
+    return __make_value(static_cast<__nv_fp6_storage_t>(0x04u));
+  }
+
+  static constexpr int min_exponent   = 0;
+  static constexpr int min_exponent10 = 0;
+  static constexpr int max_exponent   = 2;
+  static constexpr int max_exponent10 = 0;
+
+  static constexpr bool has_infinity             = false;
+  static constexpr bool has_quiet_NaN            = false;
+  static constexpr bool has_signaling_NaN        = false;
+  static constexpr float_denorm_style has_denorm = denorm_present;
+  static constexpr bool has_denorm_loss          = false;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
+  {
+    return type{};
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
+  {
+    return type{};
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
+  {
+    return type{};
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
+  {
+    return __make_value(static_cast<__nv_fp6_storage_t>(0x01u));
+  }
+
+  static constexpr bool is_iec559  = false;
+  static constexpr bool is_bounded = true;
+  static constexpr bool is_modulo  = false;
+
+  static constexpr bool traps                    = false;
+  static constexpr bool tinyness_before          = false;
+  static constexpr float_round_style round_style = round_to_nearest;
+};
+
+template <>
+class __numeric_limits_impl<__nv_fp6_e3m2, __numeric_limits_type::__floating_point>
+{
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr __nv_fp6_e3m2 __make_value(__nv_fp6_storage_t __val)
+  {
+#  if defined(_CCCL_BUILTIN_BIT_CAST)
+    return _CUDA_VSTD::bit_cast<__nv_fp6_e3m2>(__val);
+#  else // ^^^ _CCCL_BUILTIN_BIT_CAST ^^^ // vvv !_CCCL_BUILTIN_BIT_CAST vvv
+    __nv_fp6_e3m2 __ret{};
+    __ret.__x = __val;
+    return __ret;
+#  endif // ^^^ !_CCCL_BUILTIN_BIT_CAST ^^^
+  }
+
+public:
+  using type = __nv_fp6_e3m2;
+
+  static constexpr bool is_specialized = true;
+
+  static constexpr bool is_signed   = true;
+  static constexpr int digits       = 2;
+  static constexpr int digits10     = 0;
+  static constexpr int max_digits10 = 2;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
+  {
+    return __make_value(static_cast<__nv_fp6_storage_t>(0x04u));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
+  {
+    return __make_value(static_cast<__nv_fp6_storage_t>(0x1fu));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
+  {
+    return __make_value(static_cast<__nv_fp6_storage_t>(0x3fu));
+  }
+
+  static constexpr bool is_integer = false;
+  static constexpr bool is_exact   = false;
+  static constexpr int radix       = __FLT_RADIX__;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
+  {
+    return __make_value(static_cast<__nv_fp6_storage_t>(0x04u));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
+  {
+    return __make_value(static_cast<__nv_fp6_storage_t>(0x08u));
+  }
+
+  static constexpr int min_exponent   = -2;
+  static constexpr int min_exponent10 = -1;
+  static constexpr int max_exponent   = 4;
+  static constexpr int max_exponent10 = 1;
+
+  static constexpr bool has_infinity             = false;
+  static constexpr bool has_quiet_NaN            = false;
+  static constexpr bool has_signaling_NaN        = false;
+  static constexpr float_denorm_style has_denorm = denorm_present;
+  static constexpr bool has_denorm_loss          = false;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
+  {
+    return type{};
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
+  {
+    return type{};
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
+  {
+    return type{};
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
+  {
+    return __make_value(static_cast<__nv_fp6_storage_t>(0x01u));
+  }
+
+  static constexpr bool is_iec559  = false;
+  static constexpr bool is_bounded = true;
+  static constexpr bool is_modulo  = false;
+
+  static constexpr bool traps                    = false;
+  static constexpr bool tinyness_before          = false;
+  static constexpr float_round_style round_style = round_to_nearest;
+};
+#endif // _CCCL_HAS_NVFP6()
+
+#if _CCCL_HAS_NVFP4()
+template <>
+class __numeric_limits_impl<__nv_fp4_e2m1, __numeric_limits_type::__floating_point>
+{
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr __nv_fp4_e2m1 __make_value(__nv_fp4_storage_t __val)
+  {
+#  if defined(_CCCL_BUILTIN_BIT_CAST)
+    return _CUDA_VSTD::bit_cast<__nv_fp4_e2m1>(__val);
+#  else // ^^^ _CCCL_BUILTIN_BIT_CAST ^^^ // vvv !_CCCL_BUILTIN_BIT_CAST vvv
+    __nv_fp4_e2m1 __ret{};
+    __ret.__x = __val;
+    return __ret;
+#  endif // ^^^ !_CCCL_BUILTIN_BIT_CAST ^^^
+  }
+
+public:
+  using type = __nv_fp4_e2m1;
+
+  static constexpr bool is_specialized = true;
+
+  static constexpr bool is_signed   = true;
+  static constexpr int digits       = 1;
+  static constexpr int digits10     = 0;
+  static constexpr int max_digits10 = 2;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
+  {
+    return __make_value(static_cast<__nv_fp4_storage_t>(0x2u));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
+  {
+    return __make_value(static_cast<__nv_fp4_storage_t>(0x7u));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
+  {
+    return __make_value(static_cast<__nv_fp4_storage_t>(0xfu));
+  }
+
+  static constexpr bool is_integer = false;
+  static constexpr bool is_exact   = false;
+  static constexpr int radix       = __FLT_RADIX__;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
+  {
+    return __make_value(static_cast<__nv_fp4_storage_t>(0x1u));
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
+  {
+    return __make_value(static_cast<__nv_fp4_storage_t>(0x1u));
+  }
+
+  static constexpr int min_exponent   = 0;
+  static constexpr int min_exponent10 = 0;
+  static constexpr int max_exponent   = 2;
+  static constexpr int max_exponent10 = 0;
+
+  static constexpr bool has_infinity             = false;
+  static constexpr bool has_quiet_NaN            = false;
+  static constexpr bool has_signaling_NaN        = false;
+  static constexpr float_denorm_style has_denorm = denorm_present;
+  static constexpr bool has_denorm_loss          = false;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
+  {
+    return type{};
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
+  {
+    return type{};
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
+  {
+    return type{};
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
+  {
+    return __make_value(static_cast<__nv_fp4_storage_t>(0x1u));
+  }
+
+  static constexpr bool is_iec559  = false;
+  static constexpr bool is_bounded = true;
+  static constexpr bool is_modulo  = false;
+
+  static constexpr bool traps                    = false;
+  static constexpr bool tinyness_before          = false;
+  static constexpr float_round_style round_style = round_to_nearest;
+};
+#endif // _CCCL_HAS_NVFP4()
+
+_LIBCUDACXX_END_NAMESPACE_STD
+
+#endif // _LIBCUDACXX___LIMITS_NUMERIC_LIMITS_EXT_H
diff --git a/libcudacxx/include/cuda/std/limits b/libcudacxx/include/cuda/std/limits
index 19464640d90..3e0112e9e92 100644
--- a/libcudacxx/include/cuda/std/limits
+++ b/libcudacxx/include/cuda/std/limits
@@ -21,1294 +21,11 @@
 #  pragma system_header
 #endif // no system header
 
-#include <cuda/std/__bit/bit_cast.h>
-#include <cuda/std/__type_traits/integral_constant.h>
-#include <cuda/std/__type_traits/is_extended_floating_point.h>
-#include <cuda/std/__type_traits/is_floating_point.h>
-#include <cuda/std/__type_traits/is_integral.h>
-#include <cuda/std/climits>
-#include <cuda/std/version>
-
-#if defined(_LIBCUDACXX_HAS_NVFP16)
-#  include <cuda_fp16.h>
-#endif // _LIBCUDACXX_HAS_NVFP16
-
-#if defined(_LIBCUDACXX_HAS_NVBF16)
-_CCCL_DIAG_PUSH
-_CCCL_DIAG_SUPPRESS_CLANG("-Wunused-function")
-#  include <cuda_bf16.h>
-_CCCL_DIAG_POP
-#endif // _LIBCUDACXX_HAS_NVBF16
-
-#if _CCCL_HAS_NVFP8()
-#  include <cuda_fp8.h>
-#endif // _CCCL_HAS_NVFP8()
-
-#if _CCCL_HAS_NVFP6()
-#  include <cuda_fp6.h>
-#endif // _CCCL_HAS_NVFP6()
-
-#if _CCCL_HAS_NVFP4()
-#  include <cuda_fp4.h>
-#endif // _CCCL_HAS_NVFP4()
-
 _CCCL_PUSH_MACROS
 
-_LIBCUDACXX_BEGIN_NAMESPACE_STD
-
-enum float_round_style
-{
-  round_indeterminate       = -1,
-  round_toward_zero         = 0,
-  round_to_nearest          = 1,
-  round_toward_infinity     = 2,
-  round_toward_neg_infinity = 3
-};
-
-enum float_denorm_style
-{
-  denorm_indeterminate = -1,
-  denorm_absent        = 0,
-  denorm_present       = 1
-};
-
-enum class __numeric_limits_type
-{
-  __integral,
-  __bool,
-  __floating_point,
-  __other,
-};
-
-template <class _Tp>
-_LIBCUDACXX_HIDE_FROM_ABI constexpr __numeric_limits_type __make_numeric_limits_type()
-{
-#if !defined(_CCCL_NO_IF_CONSTEXPR)
-  if constexpr (_CCCL_TRAIT(is_same, _Tp, bool))
-  {
-    return __numeric_limits_type::__bool;
-  }
-  else if constexpr (_CCCL_TRAIT(is_integral, _Tp))
-  {
-    return __numeric_limits_type::__integral;
-  }
-  else if constexpr (_CCCL_TRAIT(is_floating_point, _Tp) || _CCCL_TRAIT(__is_extended_floating_point, _Tp))
-  {
-    return __numeric_limits_type::__floating_point;
-  }
-  else
-  {
-    return __numeric_limits_type::__other;
-  }
-#else // ^^^ !_CCCL_NO_IF_CONSTEXPR ^^^ // vvv _CCCL_NO_IF_CONSTEXPR vvv
-  return _CCCL_TRAIT(is_same, _Tp, bool)
-         ? __numeric_limits_type::__bool
-         : (_CCCL_TRAIT(is_integral, _Tp)
-              ? __numeric_limits_type::__integral
-              : (_CCCL_TRAIT(is_floating_point, _Tp) || _CCCL_TRAIT(__is_extended_floating_point, _Tp)
-                   ? __numeric_limits_type::__floating_point
-                   : __numeric_limits_type::__other));
-#endif // _CCCL_NO_IF_CONSTEXPR
-}
-
-template <class _Tp, __numeric_limits_type = __make_numeric_limits_type<_Tp>()>
-class __numeric_limits_impl
-{
-public:
-  using type = _Tp;
-
-  static constexpr bool is_specialized = false;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
-  {
-    return type();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
-  {
-    return type();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
-  {
-    return type();
-  }
-
-  static constexpr int digits       = 0;
-  static constexpr int digits10     = 0;
-  static constexpr int max_digits10 = 0;
-  static constexpr bool is_signed   = false;
-  static constexpr bool is_integer  = false;
-  static constexpr bool is_exact    = false;
-  static constexpr int radix        = 0;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
-  {
-    return type();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
-  {
-    return type();
-  }
-
-  static constexpr int min_exponent   = 0;
-  static constexpr int min_exponent10 = 0;
-  static constexpr int max_exponent   = 0;
-  static constexpr int max_exponent10 = 0;
-
-  static constexpr bool has_infinity             = false;
-  static constexpr bool has_quiet_NaN            = false;
-  static constexpr bool has_signaling_NaN        = false;
-  static constexpr float_denorm_style has_denorm = denorm_absent;
-  static constexpr bool has_denorm_loss          = false;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
-  {
-    return type();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
-  {
-    return type();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
-  {
-    return type();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
-  {
-    return type();
-  }
-
-  static constexpr bool is_iec559  = false;
-  static constexpr bool is_bounded = false;
-  static constexpr bool is_modulo  = false;
-
-  static constexpr bool traps                    = false;
-  static constexpr bool tinyness_before          = false;
-  static constexpr float_round_style round_style = round_toward_zero;
-};
-
-// MSVC warns about overflowing left shift
-_CCCL_DIAG_PUSH
-_CCCL_DIAG_SUPPRESS_MSVC(4309)
-template <class _Tp, int __digits, bool _IsSigned>
-struct __int_min
-{
-  static constexpr _Tp value = static_cast<_Tp>(_Tp(1) << __digits);
-};
-_CCCL_DIAG_POP
-
-template <class _Tp, int __digits>
-struct __int_min<_Tp, __digits, false>
-{
-  static constexpr _Tp value = _Tp(0);
-};
-
-template <class _Tp>
-class __numeric_limits_impl<_Tp, __numeric_limits_type::__integral>
-{
-public:
-  using type = _Tp;
-
-  static constexpr bool is_specialized = true;
-
-  static constexpr bool is_signed   = type(-1) < type(0);
-  static constexpr int digits       = static_cast<int>(sizeof(type) * __CHAR_BIT__ - is_signed);
-  static constexpr int digits10     = digits * 3 / 10;
-  static constexpr int max_digits10 = 0;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
-  {
-    return __int_min<type, digits, is_signed>::value;
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
-  {
-    return is_signed ? type(type(~0) ^ min()) : type(~0);
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
-  {
-    return min();
-  }
-
-  static constexpr bool is_integer = true;
-  static constexpr bool is_exact   = true;
-  static constexpr int radix       = 2;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
-  {
-    return type(0);
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
-  {
-    return type(0);
-  }
-
-  static constexpr int min_exponent   = 0;
-  static constexpr int min_exponent10 = 0;
-  static constexpr int max_exponent   = 0;
-  static constexpr int max_exponent10 = 0;
-
-  static constexpr bool has_infinity             = false;
-  static constexpr bool has_quiet_NaN            = false;
-  static constexpr bool has_signaling_NaN        = false;
-  static constexpr float_denorm_style has_denorm = denorm_absent;
-  static constexpr bool has_denorm_loss          = false;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
-  {
-    return type(0);
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
-  {
-    return type(0);
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
-  {
-    return type(0);
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
-  {
-    return type(0);
-  }
-
-  static constexpr bool is_iec559  = false;
-  static constexpr bool is_bounded = true;
-  static constexpr bool is_modulo  = !is_signed;
-
-#if (_CCCL_ARCH(X86_64) && _CCCL_OS(LINUX)) || defined(__pnacl__) || defined(__wasm__)
-  static constexpr bool traps = true;
-#else
-  static constexpr bool traps = false;
-#endif
-  static constexpr bool tinyness_before          = false;
-  static constexpr float_round_style round_style = round_toward_zero;
-};
-
-template <>
-class __numeric_limits_impl<bool, __numeric_limits_type::__bool>
-{
-public:
-  using type = bool;
-
-  static constexpr bool is_specialized = true;
-
-  static constexpr bool is_signed   = false;
-  static constexpr int digits       = 1;
-  static constexpr int digits10     = 0;
-  static constexpr int max_digits10 = 0;
-
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
-  {
-    return false;
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
-  {
-    return true;
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
-  {
-    return min();
-  }
-
-  static constexpr bool is_integer = true;
-  static constexpr bool is_exact   = true;
-  static constexpr int radix       = 2;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
-  {
-    return type(0);
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
-  {
-    return type(0);
-  }
-
-  static constexpr int min_exponent   = 0;
-  static constexpr int min_exponent10 = 0;
-  static constexpr int max_exponent   = 0;
-  static constexpr int max_exponent10 = 0;
-
-  static constexpr bool has_infinity             = false;
-  static constexpr bool has_quiet_NaN            = false;
-  static constexpr bool has_signaling_NaN        = false;
-  static constexpr float_denorm_style has_denorm = denorm_absent;
-  static constexpr bool has_denorm_loss          = false;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
-  {
-    return type(0);
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
-  {
-    return type(0);
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
-  {
-    return type(0);
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
-  {
-    return type(0);
-  }
-
-  static constexpr bool is_iec559  = false;
-  static constexpr bool is_bounded = true;
-  static constexpr bool is_modulo  = false;
-
-  static constexpr bool traps                    = false;
-  static constexpr bool tinyness_before          = false;
-  static constexpr float_round_style round_style = round_toward_zero;
-};
-
-template <>
-class __numeric_limits_impl<float, __numeric_limits_type::__floating_point>
-{
-public:
-  using type = float;
-
-  static constexpr bool is_specialized = true;
-
-  static constexpr bool is_signed   = true;
-  static constexpr int digits       = __FLT_MANT_DIG__;
-  static constexpr int digits10     = __FLT_DIG__;
-  static constexpr int max_digits10 = 2 + (digits * 30103l) / 100000l;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
-  {
-    return __FLT_MIN__;
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
-  {
-    return __FLT_MAX__;
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
-  {
-    return -max();
-  }
-
-  static constexpr bool is_integer = false;
-  static constexpr bool is_exact   = false;
-  static constexpr int radix       = __FLT_RADIX__;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
-  {
-    return __FLT_EPSILON__;
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
-  {
-    return 0.5F;
-  }
-
-  static constexpr int min_exponent   = __FLT_MIN_EXP__;
-  static constexpr int min_exponent10 = __FLT_MIN_10_EXP__;
-  static constexpr int max_exponent   = __FLT_MAX_EXP__;
-  static constexpr int max_exponent10 = __FLT_MAX_10_EXP__;
-
-  static constexpr bool has_infinity             = true;
-  static constexpr bool has_quiet_NaN            = true;
-  static constexpr bool has_signaling_NaN        = true;
-  static constexpr float_denorm_style has_denorm = denorm_present;
-  static constexpr bool has_denorm_loss          = false;
-
-#if defined(_CCCL_BUILTIN_HUGE_VALF)
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
-  {
-    return _CCCL_BUILTIN_HUGE_VALF();
-  }
-#else // ^^^ _CCCL_BUILTIN_HUGE_VALF ^^^ // vvv !_CCCL_BUILTIN_HUGE_VALF vvv
-  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_CONSTEXPR_BIT_CAST type infinity() noexcept
-  {
-    return _CUDA_VSTD::bit_cast<type>(0x7f800000);
-  }
-#endif // !_CCCL_BUILTIN_HUGE_VALF
-#if defined(_CCCL_BUILTIN_NANF)
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
-  {
-    return _CCCL_BUILTIN_NANF("");
-  }
-#else // ^^^ _CCCL_BUILTIN_NANF ^^^ // vvv !_CCCL_BUILTIN_NANF vvv
-  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_CONSTEXPR_BIT_CAST type quiet_NaN() noexcept
-  {
-    return _CUDA_VSTD::bit_cast<type>(0x7fc00000);
-  }
-#endif // !_CCCL_BUILTIN_NANF
-#if defined(_CCCL_BUILTIN_NANSF)
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
-  {
-    return _CCCL_BUILTIN_NANSF("");
-  }
-#else // ^^^ _CCCL_BUILTIN_NANSF ^^^ // vvv !_CCCL_BUILTIN_NANSF vvv
-  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_CONSTEXPR_BIT_CAST type signaling_NaN() noexcept
-  {
-    return _CUDA_VSTD::bit_cast<type>(0x7fa00000);
-  }
-#endif // !_CCCL_BUILTIN_NANSF
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
-  {
-    return __FLT_DENORM_MIN__;
-  }
-
-  static constexpr bool is_iec559  = true;
-  static constexpr bool is_bounded = true;
-  static constexpr bool is_modulo  = false;
-
-  static constexpr bool traps                    = false;
-  static constexpr bool tinyness_before          = false;
-  static constexpr float_round_style round_style = round_to_nearest;
-};
-
-template <>
-class __numeric_limits_impl<double, __numeric_limits_type::__floating_point>
-{
-public:
-  using type = double;
-
-  static constexpr bool is_specialized = true;
-
-  static constexpr bool is_signed   = true;
-  static constexpr int digits       = __DBL_MANT_DIG__;
-  static constexpr int digits10     = __DBL_DIG__;
-  static constexpr int max_digits10 = 2 + (digits * 30103l) / 100000l;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
-  {
-    return __DBL_MIN__;
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
-  {
-    return __DBL_MAX__;
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
-  {
-    return -max();
-  }
-
-  static constexpr bool is_integer = false;
-  static constexpr bool is_exact   = false;
-  static constexpr int radix       = __FLT_RADIX__;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
-  {
-    return __DBL_EPSILON__;
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
-  {
-    return 0.5;
-  }
-
-  static constexpr int min_exponent   = __DBL_MIN_EXP__;
-  static constexpr int min_exponent10 = __DBL_MIN_10_EXP__;
-  static constexpr int max_exponent   = __DBL_MAX_EXP__;
-  static constexpr int max_exponent10 = __DBL_MAX_10_EXP__;
-
-  static constexpr bool has_infinity             = true;
-  static constexpr bool has_quiet_NaN            = true;
-  static constexpr bool has_signaling_NaN        = true;
-  static constexpr float_denorm_style has_denorm = denorm_present;
-  static constexpr bool has_denorm_loss          = false;
-
-#if defined(_CCCL_BUILTIN_HUGE_VAL)
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
-  {
-    return _CCCL_BUILTIN_HUGE_VAL();
-  }
-#else // ^^^ _CCCL_BUILTIN_HUGE_VAL ^^^ // vvv !_CCCL_BUILTIN_HUGE_VAL vvv
-  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_CONSTEXPR_BIT_CAST type infinity() noexcept
-  {
-    return _CUDA_VSTD::bit_cast<type>(0x7ff0000000000000);
-  }
-#endif // !_CCCL_BUILTIN_HUGE_VAL
-#if defined(_CCCL_BUILTIN_NAN)
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
-  {
-    return _CCCL_BUILTIN_NAN("");
-  }
-#else // ^^^ _CCCL_BUILTIN_NAN ^^^ // vvv !_CCCL_BUILTIN_NAN vvv
-  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_CONSTEXPR_BIT_CAST type quiet_NaN() noexcept
-  {
-    return _CUDA_VSTD::bit_cast<type>(0x7ff8000000000000);
-  }
-#endif // !_CCCL_BUILTIN_NAN
-#if defined(_CCCL_BUILTIN_NANS)
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
-  {
-    return _CCCL_BUILTIN_NANS("");
-  }
-#else // ^^^ _CCCL_BUILTIN_NANS ^^^ // vvv !_CCCL_BUILTIN_NANS vvv
-  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_CONSTEXPR_BIT_CAST type signaling_NaN() noexcept
-  {
-    return _CUDA_VSTD::bit_cast<type>(0x7ff4000000000000);
-  }
-#endif // !_CCCL_BUILTIN_NANS
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
-  {
-    return __DBL_DENORM_MIN__;
-  }
-
-  static constexpr bool is_iec559  = true;
-  static constexpr bool is_bounded = true;
-  static constexpr bool is_modulo  = false;
-
-  static constexpr bool traps                    = false;
-  static constexpr bool tinyness_before          = false;
-  static constexpr float_round_style round_style = round_to_nearest;
-};
-
-template <>
-class __numeric_limits_impl<long double, __numeric_limits_type::__floating_point>
-{
-#ifndef _LIBCUDACXX_HAS_NO_LONG_DOUBLE
-
-public:
-  using type = long double;
-
-  static constexpr bool is_specialized = true;
-
-  static constexpr bool is_signed   = true;
-  static constexpr int digits       = __LDBL_MANT_DIG__;
-  static constexpr int digits10     = __LDBL_DIG__;
-  static constexpr int max_digits10 = 2 + (digits * 30103l) / 100000l;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
-  {
-    return __LDBL_MIN__;
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
-  {
-    return __LDBL_MAX__;
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
-  {
-    return -max();
-  }
-
-  static constexpr bool is_integer = false;
-  static constexpr bool is_exact   = false;
-  static constexpr int radix       = __FLT_RADIX__;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
-  {
-    return __LDBL_EPSILON__;
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
-  {
-    return 0.5L;
-  }
-
-  static constexpr int min_exponent   = __LDBL_MIN_EXP__;
-  static constexpr int min_exponent10 = __LDBL_MIN_10_EXP__;
-  static constexpr int max_exponent   = __LDBL_MAX_EXP__;
-  static constexpr int max_exponent10 = __LDBL_MAX_10_EXP__;
-
-  static constexpr bool has_infinity             = true;
-  static constexpr bool has_quiet_NaN            = true;
-  static constexpr bool has_signaling_NaN        = true;
-  static constexpr float_denorm_style has_denorm = denorm_present;
-  static constexpr bool has_denorm_loss          = false;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
-  {
-    return _CCCL_BUILTIN_HUGE_VALL();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
-  {
-    return _CCCL_BUILTIN_NANL("");
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
-  {
-    return _CCCL_BUILTIN_NANSL("");
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
-  {
-    return __LDBL_DENORM_MIN__;
-  }
-
-  static constexpr bool is_iec559  = true;
-  static constexpr bool is_bounded = true;
-  static constexpr bool is_modulo  = false;
-
-  static constexpr bool traps                    = false;
-  static constexpr bool tinyness_before          = false;
-  static constexpr float_round_style round_style = round_to_nearest;
-#endif // !_LIBCUDACXX_HAS_NO_LONG_DOUBLE
-};
-
-#if _CCCL_HAS_NVFP16()
-#  ifdef _LIBCUDACXX_HAS_NVFP16
-#    define _LIBCUDACXX_FP16_CONSTEXPR constexpr
-#  else //_LIBCUDACXX_HAS_NVFP16
-#    define _LIBCUDACXX_FP16_CONSTEXPR
-#  endif //_LIBCUDACXX_HAS_NVFP16
-
-template <>
-class __numeric_limits_impl<__half, __numeric_limits_type::__floating_point>
-{
-public:
-  using type = __half;
-
-  static constexpr bool is_specialized = true;
-
-  static constexpr bool is_signed   = true;
-  static constexpr int digits       = 11;
-  static constexpr int digits10     = 3;
-  static constexpr int max_digits10 = 5;
-  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_FP16_CONSTEXPR type min() noexcept
-  {
-    return type(__half_raw{0x0400u});
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_FP16_CONSTEXPR type max() noexcept
-  {
-    return type(__half_raw{0x7bffu});
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_FP16_CONSTEXPR type lowest() noexcept
-  {
-    return type(__half_raw{0xfbffu});
-  }
-
-  static constexpr bool is_integer = false;
-  static constexpr bool is_exact   = false;
-  static constexpr int radix       = __FLT_RADIX__;
-  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_FP16_CONSTEXPR type epsilon() noexcept
-  {
-    return type(__half_raw{0x1400u});
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_FP16_CONSTEXPR type round_error() noexcept
-  {
-    return type(__half_raw{0x3800u});
-  }
-
-  static constexpr int min_exponent   = -13;
-  static constexpr int min_exponent10 = -4;
-  static constexpr int max_exponent   = 16;
-  static constexpr int max_exponent10 = 4;
-
-  static constexpr bool has_infinity             = true;
-  static constexpr bool has_quiet_NaN            = true;
-  static constexpr bool has_signaling_NaN        = true;
-  static constexpr float_denorm_style has_denorm = denorm_present;
-  static constexpr bool has_denorm_loss          = false;
-  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_FP16_CONSTEXPR type infinity() noexcept
-  {
-    return type(__half_raw{0x7c00u});
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_FP16_CONSTEXPR type quiet_NaN() noexcept
-  {
-    return type(__half_raw{0x7e00u});
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_FP16_CONSTEXPR type signaling_NaN() noexcept
-  {
-    return type(__half_raw{0x7d00u});
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_FP16_CONSTEXPR type denorm_min() noexcept
-  {
-    return type(__half_raw{0x0001u});
-  }
-
-  static constexpr bool is_iec559  = true;
-  static constexpr bool is_bounded = true;
-  static constexpr bool is_modulo  = false;
-
-  static constexpr bool traps                    = false;
-  static constexpr bool tinyness_before          = false;
-  static constexpr float_round_style round_style = round_to_nearest;
-};
-#  undef _LIBCUDACXX_FP16_CONSTEXPR
-#endif // _CCCL_HAS_NVFP16
-
-#if _CCCL_HAS_NVBF16()
-#  ifdef _LIBCUDACXX_HAS_NVBF16
-#    define _LIBCUDACXX_BF16_CONSTEXPR constexpr
-#  else //_LIBCUDACXX_HAS_NVBF16
-#    define _LIBCUDACXX_BF16_CONSTEXPR
-#  endif //_LIBCUDACXX_HAS_NVBF16
-
-template <>
-class __numeric_limits_impl<__nv_bfloat16, __numeric_limits_type::__floating_point>
-{
-public:
-  using type = __nv_bfloat16;
-
-  static constexpr bool is_specialized = true;
-
-  static constexpr bool is_signed   = true;
-  static constexpr int digits       = 8;
-  static constexpr int digits10     = 2;
-  static constexpr int max_digits10 = 4;
-  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_BF16_CONSTEXPR type min() noexcept
-  {
-    return type(__nv_bfloat16_raw{0x0080u});
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_BF16_CONSTEXPR type max() noexcept
-  {
-    return type(__nv_bfloat16_raw{0x7f7fu});
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_BF16_CONSTEXPR type lowest() noexcept
-  {
-    return type(__nv_bfloat16_raw{0xff7fu});
-  }
-
-  static constexpr bool is_integer = false;
-  static constexpr bool is_exact   = false;
-  static constexpr int radix       = __FLT_RADIX__;
-  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_BF16_CONSTEXPR type epsilon() noexcept
-  {
-    return type(__nv_bfloat16_raw{0x3c00u});
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_BF16_CONSTEXPR type round_error() noexcept
-  {
-    return type(__nv_bfloat16_raw{0x3f00u});
-  }
-
-  static constexpr int min_exponent   = -125;
-  static constexpr int min_exponent10 = -37;
-  static constexpr int max_exponent   = 128;
-  static constexpr int max_exponent10 = 38;
-
-  static constexpr bool has_infinity             = true;
-  static constexpr bool has_quiet_NaN            = true;
-  static constexpr bool has_signaling_NaN        = true;
-  static constexpr float_denorm_style has_denorm = denorm_present;
-  static constexpr bool has_denorm_loss          = false;
-  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_BF16_CONSTEXPR type infinity() noexcept
-  {
-    return type(__nv_bfloat16_raw{0x7f80u});
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_BF16_CONSTEXPR type quiet_NaN() noexcept
-  {
-    return type(__nv_bfloat16_raw{0x7fc0u});
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_BF16_CONSTEXPR type signaling_NaN() noexcept
-  {
-    return type(__nv_bfloat16_raw{0x7fa0u});
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_BF16_CONSTEXPR type denorm_min() noexcept
-  {
-    return type(__nv_bfloat16_raw{0x0001u});
-  }
-
-  static constexpr bool is_iec559  = true;
-  static constexpr bool is_bounded = true;
-  static constexpr bool is_modulo  = false;
-
-  static constexpr bool traps                    = false;
-  static constexpr bool tinyness_before          = false;
-  static constexpr float_round_style round_style = round_to_nearest;
-};
-#  undef _LIBCUDACXX_BF16_CONSTEXPR
-#endif // _CCCL_HAS_NVBF16
-
-#if _CCCL_HAS_NVFP8()
-template <>
-class __numeric_limits_impl<__nv_fp8_e4m3, __numeric_limits_type::__floating_point>
-{
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr __nv_fp8_e4m3 __make_value(__nv_fp8_storage_t __val)
-  {
-#  if defined(_CCCL_BUILTIN_BIT_CAST)
-    return _CUDA_VSTD::bit_cast<__nv_fp8_e4m3>(__val);
-#  else // ^^^ _CCCL_BUILTIN_BIT_CAST ^^^ // vvv !_CCCL_BUILTIN_BIT_CAST vvv
-    __nv_fp8_e4m3 __ret{};
-    __ret.__x = __val;
-    return __ret;
-#  endif // ^^^ !_CCCL_BUILTIN_BIT_CAST ^^^
-  }
-
-public:
-  using type = __nv_fp8_e4m3;
-
-  static constexpr bool is_specialized = true;
-
-  static constexpr bool is_signed   = true;
-  static constexpr int digits       = 3;
-  static constexpr int digits10     = 0;
-  static constexpr int max_digits10 = 2;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
-  {
-    return __make_value(static_cast<__nv_fp8_storage_t>(0x08u));
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
-  {
-    return __make_value(static_cast<__nv_fp8_storage_t>(0x7eu));
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
-  {
-    return __make_value(static_cast<__nv_fp8_storage_t>(0xfeu));
-  }
-
-  static constexpr bool is_integer = false;
-  static constexpr bool is_exact   = false;
-  static constexpr int radix       = __FLT_RADIX__;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
-  {
-    return __make_value(static_cast<__nv_fp8_storage_t>(0x20u));
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
-  {
-    return __make_value(static_cast<__nv_fp8_storage_t>(0x30u));
-  }
-
-  static constexpr int min_exponent   = -6;
-  static constexpr int min_exponent10 = -2;
-  static constexpr int max_exponent   = 8;
-  static constexpr int max_exponent10 = 2;
-
-  static constexpr bool has_infinity             = false;
-  static constexpr bool has_quiet_NaN            = true;
-  static constexpr bool has_signaling_NaN        = false;
-  static constexpr float_denorm_style has_denorm = denorm_present;
-  static constexpr bool has_denorm_loss          = false;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
-  {
-    return type{};
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
-  {
-    return __make_value(static_cast<__nv_fp8_storage_t>(0x7fu));
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
-  {
-    return type{};
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
-  {
-    return __make_value(static_cast<__nv_fp8_storage_t>(0x01u));
-  }
-
-  static constexpr bool is_iec559  = false;
-  static constexpr bool is_bounded = true;
-  static constexpr bool is_modulo  = false;
-
-  static constexpr bool traps                    = false;
-  static constexpr bool tinyness_before          = false;
-  static constexpr float_round_style round_style = round_to_nearest;
-};
-
-template <>
-class __numeric_limits_impl<__nv_fp8_e5m2, __numeric_limits_type::__floating_point>
-{
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr __nv_fp8_e5m2 __make_value(__nv_fp8_storage_t __val)
-  {
-#  if defined(_CCCL_BUILTIN_BIT_CAST)
-    return _CUDA_VSTD::bit_cast<__nv_fp8_e5m2>(__val);
-#  else // ^^^ _CCCL_BUILTIN_BIT_CAST ^^^ // vvv !_CCCL_BUILTIN_BIT_CAST vvv
-    __nv_fp8_e5m2 __ret{};
-    __ret.__x = __val;
-    return __ret;
-#  endif // ^^^ !_CCCL_BUILTIN_BIT_CAST ^^^
-  }
-
-public:
-  using type = __nv_fp8_e5m2;
-
-  static constexpr bool is_specialized = true;
-
-  static constexpr bool is_signed   = true;
-  static constexpr int digits       = 2;
-  static constexpr int digits10     = 0;
-  static constexpr int max_digits10 = 2;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
-  {
-    return __make_value(static_cast<__nv_fp8_storage_t>(0x04u));
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
-  {
-    return __make_value(static_cast<__nv_fp8_storage_t>(0x7bu));
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
-  {
-    return __make_value(static_cast<__nv_fp8_storage_t>(0xfbu));
-  }
-
-  static constexpr bool is_integer = false;
-  static constexpr bool is_exact   = false;
-  static constexpr int radix       = __FLT_RADIX__;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
-  {
-    return __make_value(static_cast<__nv_fp8_storage_t>(0x34u));
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
-  {
-    return __make_value(static_cast<__nv_fp8_storage_t>(0x38u));
-  }
-
-  static constexpr int min_exponent   = -15;
-  static constexpr int min_exponent10 = -5;
-  static constexpr int max_exponent   = 15;
-  static constexpr int max_exponent10 = 4;
-
-  static constexpr bool has_infinity             = true;
-  static constexpr bool has_quiet_NaN            = true;
-  static constexpr bool has_signaling_NaN        = true;
-  static constexpr float_denorm_style has_denorm = denorm_present;
-  static constexpr bool has_denorm_loss          = false;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
-  {
-    return __make_value(static_cast<__nv_fp8_storage_t>(0x7cu));
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
-  {
-    return __make_value(static_cast<__nv_fp8_storage_t>(0x7eu));
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
-  {
-    return __make_value(static_cast<__nv_fp8_storage_t>(0x7du));
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
-  {
-    return __make_value(static_cast<__nv_fp8_storage_t>(0x01u));
-  }
-
-  static constexpr bool is_iec559  = false;
-  static constexpr bool is_bounded = true;
-  static constexpr bool is_modulo  = false;
-
-  static constexpr bool traps                    = false;
-  static constexpr bool tinyness_before          = false;
-  static constexpr float_round_style round_style = round_to_nearest;
-};
-
-#  if _CCCL_CUDACC_AT_LEAST(12, 8)
-template <>
-class __numeric_limits_impl<__nv_fp8_e8m0, __numeric_limits_type::__floating_point>
-{
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr __nv_fp8_e8m0 __make_value(__nv_fp8_storage_t __val)
-  {
-#    if defined(_CCCL_BUILTIN_BIT_CAST)
-    return _CUDA_VSTD::bit_cast<__nv_fp8_e8m0>(__val);
-#    else // ^^^ _CCCL_BUILTIN_BIT_CAST ^^^ // vvv !_CCCL_BUILTIN_BIT_CAST vvv
-    __nv_fp8_e8m0 __ret{};
-    __ret.__x = __val;
-    return __ret;
-#    endif // ^^^ !_CCCL_BUILTIN_BIT_CAST ^^^
-  }
-
-public:
-  using type = __nv_fp8_e8m0;
-
-  static constexpr bool is_specialized = true;
-
-  static constexpr bool is_signed   = false;
-  static constexpr int digits       = 0;
-  static constexpr int digits10     = 0;
-  static constexpr int max_digits10 = 1;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
-  {
-    return __make_value(static_cast<__nv_fp8_storage_t>(0x00u));
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
-  {
-    return __make_value(static_cast<__nv_fp8_storage_t>(0xfeu));
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
-  {
-    return __make_value(static_cast<__nv_fp8_storage_t>(0x00u));
-  }
-
-  static constexpr bool is_integer = false;
-  static constexpr bool is_exact   = false;
-  static constexpr int radix       = __FLT_RADIX__;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
-  {
-    return __make_value(static_cast<__nv_fp8_storage_t>(0x7fu));
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
-  {
-    return __make_value(static_cast<__nv_fp8_storage_t>(0x7fu));
-  }
-
-  static constexpr int min_exponent   = -127;
-  static constexpr int min_exponent10 = -39;
-  static constexpr int max_exponent   = 127;
-  static constexpr int max_exponent10 = 38;
-
-  static constexpr bool has_infinity             = false;
-  static constexpr bool has_quiet_NaN            = true;
-  static constexpr bool has_signaling_NaN        = false;
-  static constexpr float_denorm_style has_denorm = denorm_absent;
-  static constexpr bool has_denorm_loss          = false;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
-  {
-    return type{};
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
-  {
-    return __make_value(static_cast<__nv_fp8_storage_t>(0xffu));
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
-  {
-    return type{};
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
-  {
-    return type{};
-  }
-
-  static constexpr bool is_iec559  = false;
-  static constexpr bool is_bounded = true;
-  static constexpr bool is_modulo  = false;
-
-  static constexpr bool traps                    = false;
-  static constexpr bool tinyness_before          = false;
-  static constexpr float_round_style round_style = round_toward_zero;
-};
-#  endif // _CCCL_CUDACC_AT_LEAST(12, 8)
-#endif // _CCCL_HAS_NVFP8()
-
-#if _CCCL_HAS_NVFP6()
-template <>
-class __numeric_limits_impl<__nv_fp6_e2m3, __numeric_limits_type::__floating_point>
-{
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr __nv_fp6_e2m3 __make_value(__nv_fp6_storage_t __val)
-  {
-#  if defined(_CCCL_BUILTIN_BIT_CAST)
-    return _CUDA_VSTD::bit_cast<__nv_fp6_e2m3>(__val);
-#  else // ^^^ _CCCL_BUILTIN_BIT_CAST ^^^ // vvv !_CCCL_BUILTIN_BIT_CAST vvv
-    __nv_fp6_e2m3 __ret{};
-    __ret.__x = __val;
-    return __ret;
-#  endif // ^^^ !_CCCL_BUILTIN_BIT_CAST ^^^
-  }
-
-public:
-  using type = __nv_fp6_e2m3;
-
-  static constexpr bool is_specialized = true;
-
-  static constexpr bool is_signed   = true;
-  static constexpr int digits       = 3;
-  static constexpr int digits10     = 0;
-  static constexpr int max_digits10 = 2;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
-  {
-    return __make_value(static_cast<__nv_fp6_storage_t>(0x08u));
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
-  {
-    return __make_value(static_cast<__nv_fp6_storage_t>(0x1fu));
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
-  {
-    return __make_value(static_cast<__nv_fp6_storage_t>(0x3fu));
-  }
-
-  static constexpr bool is_integer = false;
-  static constexpr bool is_exact   = false;
-  static constexpr int radix       = __FLT_RADIX__;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
-  {
-    return __make_value(static_cast<__nv_fp6_storage_t>(0x01u));
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
-  {
-    return __make_value(static_cast<__nv_fp6_storage_t>(0x04u));
-  }
-
-  static constexpr int min_exponent   = 0;
-  static constexpr int min_exponent10 = 0;
-  static constexpr int max_exponent   = 2;
-  static constexpr int max_exponent10 = 0;
-
-  static constexpr bool has_infinity             = false;
-  static constexpr bool has_quiet_NaN            = false;
-  static constexpr bool has_signaling_NaN        = false;
-  static constexpr float_denorm_style has_denorm = denorm_present;
-  static constexpr bool has_denorm_loss          = false;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
-  {
-    return type{};
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
-  {
-    return type{};
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
-  {
-    return type{};
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
-  {
-    return __make_value(static_cast<__nv_fp6_storage_t>(0x01u));
-  }
-
-  static constexpr bool is_iec559  = false;
-  static constexpr bool is_bounded = true;
-  static constexpr bool is_modulo  = false;
-
-  static constexpr bool traps                    = false;
-  static constexpr bool tinyness_before          = false;
-  static constexpr float_round_style round_style = round_to_nearest;
-};
-
-template <>
-class __numeric_limits_impl<__nv_fp6_e3m2, __numeric_limits_type::__floating_point>
-{
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr __nv_fp6_e3m2 __make_value(__nv_fp6_storage_t __val)
-  {
-#  if defined(_CCCL_BUILTIN_BIT_CAST)
-    return _CUDA_VSTD::bit_cast<__nv_fp6_e3m2>(__val);
-#  else // ^^^ _CCCL_BUILTIN_BIT_CAST ^^^ // vvv !_CCCL_BUILTIN_BIT_CAST vvv
-    __nv_fp6_e3m2 __ret{};
-    __ret.__x = __val;
-    return __ret;
-#  endif // ^^^ !_CCCL_BUILTIN_BIT_CAST ^^^
-  }
-
-public:
-  using type = __nv_fp6_e3m2;
-
-  static constexpr bool is_specialized = true;
-
-  static constexpr bool is_signed   = true;
-  static constexpr int digits       = 2;
-  static constexpr int digits10     = 0;
-  static constexpr int max_digits10 = 2;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
-  {
-    return __make_value(static_cast<__nv_fp6_storage_t>(0x04u));
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
-  {
-    return __make_value(static_cast<__nv_fp6_storage_t>(0x1fu));
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
-  {
-    return __make_value(static_cast<__nv_fp6_storage_t>(0x3fu));
-  }
-
-  static constexpr bool is_integer = false;
-  static constexpr bool is_exact   = false;
-  static constexpr int radix       = __FLT_RADIX__;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
-  {
-    return __make_value(static_cast<__nv_fp6_storage_t>(0x04u));
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
-  {
-    return __make_value(static_cast<__nv_fp6_storage_t>(0x08u));
-  }
-
-  static constexpr int min_exponent   = -2;
-  static constexpr int min_exponent10 = -1;
-  static constexpr int max_exponent   = 4;
-  static constexpr int max_exponent10 = 1;
-
-  static constexpr bool has_infinity             = false;
-  static constexpr bool has_quiet_NaN            = false;
-  static constexpr bool has_signaling_NaN        = false;
-  static constexpr float_denorm_style has_denorm = denorm_present;
-  static constexpr bool has_denorm_loss          = false;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
-  {
-    return type{};
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
-  {
-    return type{};
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
-  {
-    return type{};
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
-  {
-    return __make_value(static_cast<__nv_fp6_storage_t>(0x01u));
-  }
-
-  static constexpr bool is_iec559  = false;
-  static constexpr bool is_bounded = true;
-  static constexpr bool is_modulo  = false;
-
-  static constexpr bool traps                    = false;
-  static constexpr bool tinyness_before          = false;
-  static constexpr float_round_style round_style = round_to_nearest;
-};
-#endif // _CCCL_HAS_NVFP6()
-
-#if _CCCL_HAS_NVFP4()
-template <>
-class __numeric_limits_impl<__nv_fp4_e2m1, __numeric_limits_type::__floating_point>
-{
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr __nv_fp4_e2m1 __make_value(__nv_fp4_storage_t __val)
-  {
-#  if defined(_CCCL_BUILTIN_BIT_CAST)
-    return _CUDA_VSTD::bit_cast<__nv_fp4_e2m1>(__val);
-#  else // ^^^ _CCCL_BUILTIN_BIT_CAST ^^^ // vvv !_CCCL_BUILTIN_BIT_CAST vvv
-    __nv_fp4_e2m1 __ret{};
-    __ret.__x = __val;
-    return __ret;
-#  endif // ^^^ !_CCCL_BUILTIN_BIT_CAST ^^^
-  }
-
-public:
-  using type = __nv_fp4_e2m1;
-
-  static constexpr bool is_specialized = true;
-
-  static constexpr bool is_signed   = true;
-  static constexpr int digits       = 1;
-  static constexpr int digits10     = 0;
-  static constexpr int max_digits10 = 2;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
-  {
-    return __make_value(static_cast<__nv_fp4_storage_t>(0x2u));
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
-  {
-    return __make_value(static_cast<__nv_fp4_storage_t>(0x7u));
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
-  {
-    return __make_value(static_cast<__nv_fp4_storage_t>(0xfu));
-  }
-
-  static constexpr bool is_integer = false;
-  static constexpr bool is_exact   = false;
-  static constexpr int radix       = __FLT_RADIX__;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
-  {
-    return __make_value(static_cast<__nv_fp4_storage_t>(0x1u));
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
-  {
-    return __make_value(static_cast<__nv_fp4_storage_t>(0x1u));
-  }
-
-  static constexpr int min_exponent   = 0;
-  static constexpr int min_exponent10 = 0;
-  static constexpr int max_exponent   = 2;
-  static constexpr int max_exponent10 = 0;
-
-  static constexpr bool has_infinity             = false;
-  static constexpr bool has_quiet_NaN            = false;
-  static constexpr bool has_signaling_NaN        = false;
-  static constexpr float_denorm_style has_denorm = denorm_present;
-  static constexpr bool has_denorm_loss          = false;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
-  {
-    return type{};
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
-  {
-    return type{};
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
-  {
-    return type{};
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
-  {
-    return __make_value(static_cast<__nv_fp4_storage_t>(0x1u));
-  }
-
-  static constexpr bool is_iec559  = false;
-  static constexpr bool is_bounded = true;
-  static constexpr bool is_modulo  = false;
-
-  static constexpr bool traps                    = false;
-  static constexpr bool tinyness_before          = false;
-  static constexpr float_round_style round_style = round_to_nearest;
-};
-#endif // _CCCL_HAS_NVFP4()
-
-template <class _Tp>
-class numeric_limits : public __numeric_limits_impl<_Tp>
-{};
-
-template <class _Tp>
-class numeric_limits<const _Tp> : public numeric_limits<_Tp>
-{};
-
-template <class _Tp>
-class numeric_limits<volatile _Tp> : public numeric_limits<_Tp>
-{};
-
-template <class _Tp>
-class numeric_limits<const volatile _Tp> : public numeric_limits<_Tp>
-{};
-
-_LIBCUDACXX_END_NAMESPACE_STD
+#include <cuda/std/__limits/numeric_limits.h>
+#include <cuda/std/__limits/numeric_limits_ext.h>
+#include <cuda/std/version>
 
 _CCCL_POP_MACROS
 

From 89fc6d46c6ac05e0251259cc85c4e704f0522eb2 Mon Sep 17 00:00:00 2001
From: David Bayer <bayer.david99@gmail.com>
Date: Mon, 17 Feb 2025 14:34:30 +0100
Subject: [PATCH 3/5] suppress unused parameter warnings

---
 libcudacxx/include/cuda/std/__limits/numeric_limits_ext.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/libcudacxx/include/cuda/std/__limits/numeric_limits_ext.h b/libcudacxx/include/cuda/std/__limits/numeric_limits_ext.h
index a2ba033d4c8..7e691d21993 100644
--- a/libcudacxx/include/cuda/std/__limits/numeric_limits_ext.h
+++ b/libcudacxx/include/cuda/std/__limits/numeric_limits_ext.h
@@ -45,7 +45,11 @@ _CCCL_DIAG_POP
 #endif // _CCCL_HAS_NVFP6()
 
 #if _CCCL_HAS_NVFP4()
+_CCCL_DIAG_PUSH
+_CCCL_DIAG_SUPPRESS_GCC("-Wunused-parameter")
+_CCCL_DIAG_SUPPRESS_MSVC(2220) // unreferenced formal parameter
 #  include <cuda_fp4.h>
+_CCCL_DIAG_POP
 #endif // _CCCL_HAS_NVFP4()
 
 _LIBCUDACXX_BEGIN_NAMESPACE_STD

From 034489e5a03bb6081a1b54f327f4a244f66ad3f9 Mon Sep 17 00:00:00 2001
From: David Bayer <bayer.david99@gmail.com>
Date: Mon, 17 Feb 2025 15:12:18 +0100
Subject: [PATCH 4/5] oops, suppress unused parameter warning everywhere

---
 .../cuda/std/__type_traits/is_extended_floating_point.h       | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/libcudacxx/include/cuda/std/__type_traits/is_extended_floating_point.h b/libcudacxx/include/cuda/std/__type_traits/is_extended_floating_point.h
index 240f6f81e89..03b46502530 100644
--- a/libcudacxx/include/cuda/std/__type_traits/is_extended_floating_point.h
+++ b/libcudacxx/include/cuda/std/__type_traits/is_extended_floating_point.h
@@ -42,7 +42,11 @@ _CCCL_DIAG_POP
 #endif // _CCCL_HAS_NVFP6()
 
 #if _CCCL_HAS_NVFP4()
+_CCCL_DIAG_PUSH
+_CCCL_DIAG_SUPPRESS_GCC("-Wunused-parameter")
+_CCCL_DIAG_SUPPRESS_MSVC(2220) // unreferenced formal parameter
 #  include <cuda_fp4.h>
+_CCCL_DIAG_POP
 #endif // _CCCL_HAS_NVFP4()
 
 _LIBCUDACXX_BEGIN_NAMESPACE_STD

From 15d2dc77405f8a30cd07fa04fccb5846adb05b0b Mon Sep 17 00:00:00 2001
From: David Bayer <bayer.david99@gmail.com>
Date: Mon, 17 Feb 2025 15:32:32 +0100
Subject: [PATCH 5/5] its C4100, not C2200

---
 libcudacxx/include/cuda/std/__limits/numeric_limits_ext.h       | 2 +-
 .../include/cuda/std/__type_traits/is_extended_floating_point.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libcudacxx/include/cuda/std/__limits/numeric_limits_ext.h b/libcudacxx/include/cuda/std/__limits/numeric_limits_ext.h
index 7e691d21993..5d995e1f030 100644
--- a/libcudacxx/include/cuda/std/__limits/numeric_limits_ext.h
+++ b/libcudacxx/include/cuda/std/__limits/numeric_limits_ext.h
@@ -47,7 +47,7 @@ _CCCL_DIAG_POP
 #if _CCCL_HAS_NVFP4()
 _CCCL_DIAG_PUSH
 _CCCL_DIAG_SUPPRESS_GCC("-Wunused-parameter")
-_CCCL_DIAG_SUPPRESS_MSVC(2220) // unreferenced formal parameter
+_CCCL_DIAG_SUPPRESS_MSVC(4100) // unreferenced formal parameter
 #  include <cuda_fp4.h>
 _CCCL_DIAG_POP
 #endif // _CCCL_HAS_NVFP4()
diff --git a/libcudacxx/include/cuda/std/__type_traits/is_extended_floating_point.h b/libcudacxx/include/cuda/std/__type_traits/is_extended_floating_point.h
index 03b46502530..6422f352e85 100644
--- a/libcudacxx/include/cuda/std/__type_traits/is_extended_floating_point.h
+++ b/libcudacxx/include/cuda/std/__type_traits/is_extended_floating_point.h
@@ -44,7 +44,7 @@ _CCCL_DIAG_POP
 #if _CCCL_HAS_NVFP4()
 _CCCL_DIAG_PUSH
 _CCCL_DIAG_SUPPRESS_GCC("-Wunused-parameter")
-_CCCL_DIAG_SUPPRESS_MSVC(2220) // unreferenced formal parameter
+_CCCL_DIAG_SUPPRESS_MSVC(4100) // unreferenced formal parameter
 #  include <cuda_fp4.h>
 _CCCL_DIAG_POP
 #endif // _CCCL_HAS_NVFP4()