Add tfloat32 datatype (apache#31)

ColmaLiu · web-flow · commit 1c5cdb8f1ee2 · 2026-04-14T14:34:13.000+08:00
* Add tfloat32 datatype

* fix: change tfloat32 type code to 130

* minor fix
diff --git a/include/tvm/runtime/data_type.h b/include/tvm/runtime/data_type.h
@@ -72,7 +72,8 @@ class DataType {
     kFloat6_e2m3fn = kDLFloat6_e2m3fn,
     kFloat6_e3m2fn = kDLFloat6_e3m2fn,
     kFloat4_e2m1fn = kDLFloat4_e2m1fn,
-    kCustomBegin = 129
+    kCustomBegin = 129,
+    kTensorFloat32 = 130
   };
   /*! \brief default constructor */
   DataType() { data_ = DataType::Void(); }
@@ -109,6 +110,9 @@ class DataType {
     if (code == kFloat4_e2m1fn) {
       ICHECK_EQ(bits, 4);
     }
+    if (code == kTensorFloat32) {
+      ICHECK_EQ(bits, 32);
+    }
   }
   /*! \return The type code. */
   int code() const { return static_cast<int>(data_.code); }
@@ -146,6 +150,8 @@ class DataType {
   bool is_float() const { return code() == DataType::kFloat; }
   /*! \return whether type is a bfloat type. */
   bool is_bfloat() const { return code() == DataType::kBFloat; }
+  /*! \return whether type is a tfloat type. */
+  bool is_tfloat() const { return code() == DataType::kTensorFloat32; }
   /*! \return whether type is any 8-bit custom Float8 variant. */
   bool is_float8() const {
     return bits() == 8 &&
@@ -185,6 +191,8 @@ class DataType {
   bool is_float6_e3m2fn() const { return bits() == 6 && code() == DataType::kFloat6_e3m2fn; }
   /*! \return whether type is Float4E2M1FN. */
   bool is_float4_e2m1fn() const { return bits() == 4 && code() == DataType::kFloat4_e2m1fn; }
+  /*! \return whether type is a tfloat32 type. */
+  bool is_tfloat32() const { return bits() == 32 && code() == DataType::kTensorFloat32; }
   /*! \return whether type is a float16 type. */
   bool is_float16() const { return is_float() && bits() == 16; }
   /*! \return whether type is a bfloat16 type. */
@@ -377,6 +385,14 @@ class DataType {
    * \return The constructed data type.
    */
   static DataType Float4E2M1FN(int lanes = 1) { return DataType(kFloat4_e2m1fn, 4, lanes); }
+
+  /*!
+   * \brief Construct a tensorfloat32 datatype.
+   * \param lanes The number of lanes
+   * \return The constructed data type.
+   */
+  static DataType TensorFloat32(int lanes = 1) { return DataType(kTensorFloat32, 32, lanes); }
+
   /*!
    * \brief Construct a bool type.
    * \param lanes The number of lanes.
diff --git a/include/tvm/script/ir_builder/tir/ir.h b/include/tvm/script/ir_builder/tir/ir.h
@@ -529,6 +529,8 @@ TVM_TIR_IR_BUILDER_DEF_DTYPE_CAST_LANES_FIXED_SIZE(Float6E3M2FN, DataType::Float
 
 TVM_TIR_IR_BUILDER_DEF_DTYPE_CAST_LANES_FIXED_SIZE(Float4E2M1FN, DataType::Float4E2M1FN);
 
+TVM_TIR_IR_BUILDER_DEF_DTYPE_CAST_LANES_FIXED_SIZE(TensorFloat32, DataType::TensorFloat32);
+
 TVM_TIR_IR_BUILDER_DEF_DTYPE_CAST(Boolean, DataType::Bool());
 TVM_TIR_IR_BUILDER_DEF_DTYPE_CAST(Void, DataType::Void());
 
diff --git a/python/tvm/script/ir_builder/tir/ir.py b/python/tvm/script/ir_builder/tir/ir.py
@@ -1584,6 +1584,13 @@ class bfloat16x8: ...
     class bfloat16x16: ...
     class bfloat16x32: ...
     class bfloat16x64: ...
+    class tfloat32: ...
+    class tfloat32x2: ...
+    class tfloat32x4: ...
+    class tfloat32x8: ...
+    class tfloat32x16: ...
+    class tfloat32x32: ...
+    class tfloat32x64: ...
 else:
     # pylint: disable=invalid-name
     int8 = func_gen(("Int8"))
@@ -1756,6 +1763,14 @@ class bfloat16x64: ...
     bfloat16x16 = func_gen(("BFloat16x16"))
     bfloat16x32 = func_gen(("BFloat16x32"))
     bfloat16x64 = func_gen(("BFloat16x64"))
+
+    tfloat32 = func_gen(("TensorFloat32"))
+    tfloat32x2 = func_gen(("TensorFloat32x2"))
+    tfloat32x4 = func_gen(("TensorFloat32x4"))
+    tfloat32x8 = func_gen(("TensorFloat32x8"))
+    tfloat32x16 = func_gen(("TensorFloat32x16"))
+    tfloat32x32 = func_gen(("TensorFloat32x32"))
+    tfloat32x64 = func_gen(("TensorFloat32x64"))
     # pylint: enable=invalid-name
 
 
@@ -2337,6 +2352,13 @@ def wrapped(*args, **kwargs):
     "bfloat16x16",
     "bfloat16x32",
     "bfloat16x64",
+    "tfloat32",
+    "tfloat32x2",
+    "tfloat32x4",
+    "tfloat32x8",
+    "tfloat32x16",
+    "tfloat32x32",
+    "tfloat32x64",
     "buffer",
     "buffer_decl",
     "prim_func",
diff --git a/src/script/ir_builder/tir/ir.cc b/src/script/ir_builder/tir/ir.cc
@@ -894,6 +894,13 @@ TVM_FFI_STATIC_INIT_BLOCK() {
       .TVM_FFI_REFL_DEF_GLOBAL_LANES("script.ir_builder.tir.Float4E2M1FN", Float4E2M1FN);
 }
 
+TVM_FFI_STATIC_INIT_BLOCK() {
+  namespace refl = tvm::ffi::reflection;
+  refl::GlobalDef()
+      .def("script.ir_builder.tir.TensorFloat32", TensorFloat32)
+      .TVM_FFI_REFL_DEF_GLOBAL_LANES("script.ir_builder.tir.TensorFloat32", TensorFloat32);
+}
+
 TVM_FFI_STATIC_INIT_BLOCK() {
   namespace refl = tvm::ffi::reflection;
   refl::GlobalDef()
diff --git a/src/target/datatype/registry.cc b/src/target/datatype/registry.cc
@@ -47,6 +47,8 @@ TVM_FFI_STATIC_INIT_BLOCK() {
       .def_packed("runtime._datatype_get_type_registered", [](ffi::PackedArgs args, ffi::Any* ret) {
         *ret = Registry::Global()->GetTypeRegistered(args[0].cast<int>());
       });
+  // Register tfloat32 as a custom datatype with type code 130
+  Registry::Global()->Register("tfloat32", 130);
 }
 
 Registry* Registry::Global() {
diff --git a/src/target/source/intrin_rule_cuda.cc b/src/target/source/intrin_rule_cuda.cc
@@ -52,6 +52,10 @@ struct CUDAMath {
         default:
           return "";
       }
+    } else if (t.is_tfloat32()) {
+      if (name == "fabs") {
+        return "abs";
+      }
     } else if (t.is_bfloat16()) {
       if (name == "fabs") {
         return "__habs";
diff --git a/src/tir/op/op.cc b/src/tir/op/op.cc
@@ -301,6 +301,8 @@ PrimExpr max_value(const DataType& dtype, Span span) {
     } else if (dtype.bits() == 16) {
       return FloatImm(dtype, 65504.0, span);
     }
+  } else if (dtype.is_tfloat32()) {
+    return FloatImm(dtype, std::numeric_limits<float>::max(), span);
   } else if (dtype.is_bfloat16()) {
     return FloatImm(dtype, std::numeric_limits<float>::max(), span);
   } else if (dtype.is_float8()) {
@@ -336,14 +338,7 @@ PrimExpr max_value(const DataType& dtype, Span span) {
 PrimExpr min_value(const DataType& dtype, Span span) {
   using namespace tir;
   ICHECK_EQ(dtype.lanes(), 1);
-  if (datatype::Registry::Global()->GetTypeRegistered(dtype.code())) {
-    // TODO(tkonolige): need to convert all registered min functions to use the span.
-    auto f = datatype::GetMinFunc(dtype.code());
-    ICHECK(f) << "No minimum function registered for custom dtype " << (unsigned int)dtype.code();
-    // TODO(@hypercubestart) Document this change (and others associated with the overflowing
-    // floatimm min bug)
-    return (*f)(dtype.bits()).cast<PrimExpr>();
-  } else if (dtype.is_int()) {
+  if (dtype.is_int()) {
     if (dtype.bits() == 64) {
       return IntImm(dtype, std::numeric_limits<int64_t>::lowest(), span);
     } else if (dtype.bits() < 64) {
@@ -361,6 +356,9 @@ PrimExpr min_value(const DataType& dtype, Span span) {
     } else if (dtype.bits() == 16) {
       return FloatImm(dtype, -65504.0, span);
     }
+  }
+  else if (dtype.is_tfloat32()) {
+    return FloatImm(dtype, std::numeric_limits<float>::lowest(), span);
   } else if (dtype.is_bfloat16()) {
     return FloatImm(dtype, std::numeric_limits<float>::lowest(), span);
   } else if (dtype.is_float8()) {
@@ -888,7 +886,7 @@ PrimExpr abs(PrimExpr x, Span span) {
       return IntImm(x.dtype(), std::abs(px->value), px->span);
     }
     return tir::Select(x >= make_zero(x.dtype()), x, -x, span);
-  } else if (x.dtype().is_float() || x.dtype().is_bfloat()) {
+  } else if (x.dtype().is_float() || x.dtype().is_bfloat() || x.dtype().is_tfloat()) {
     using tir::FloatImmNode;
     const FloatImmNode* fx = x.as<FloatImmNode>();
     if (fx) {

Original file line number	Diff line number	Diff line change
`@@ -47,6 +47,8 @@ TVM_FFI_STATIC_INIT_BLOCK() {`
`47`	`47`	`.def_packed("runtime._datatype_get_type_registered", [](ffi::PackedArgs args, ffi::Any* ret) {`
`48`	`48`	`*ret = Registry::Global()->GetTypeRegistered(args[0].cast<int>());`
`49`	`49`	`});`
	`50`	`+ // Register tfloat32 as a custom datatype with type code 130`
	`51`	`+ Registry::Global()->Register("tfloat32", 130);`
`50`	`52`	`}`
`51`	`53`
`52`	`54`	`Registry* Registry::Global() {`