From 0eeb7d0eb301b71ce63792eb1fe011e8a4d0209b Mon Sep 17 00:00:00 2001 From: Jiacheng Huang Date: Wed, 1 Jul 2026 17:39:54 +0800 Subject: [PATCH 01/15] Align runtime API with generated wrappers --- README.md | 10 +- scripts/generate_public_headers.py | 281 +++++++++++++-------------- src/native/ascend/runtime_.h | 18 +- src/native/cambricon/runtime_.h | 18 +- src/native/cpu/runtime_.h | 91 +++++++-- src/native/cuda/hygon/runtime_.h | 22 ++- src/native/cuda/iluvatar/runtime_.h | 14 +- src/native/cuda/metax/runtime_.h | 16 +- src/native/cuda/moore/runtime_.h | 16 +- src/native/cuda/nvidia/runtime_.h | 14 +- src/native/cuda/runtime_.h | 10 +- src/runtime.h | 28 +-- tests/CMakeLists.txt | 20 +- tests/compile_install_consumer.cmake | 11 ++ tests/install_consumer_smoke.cc | 64 +++++- tests/test_cpu_runtime.cc | 18 +- tests/test_nvidia_runtime.cc | 6 +- tests/test_runtime_dispatch.cc | 65 ------- 18 files changed, 416 insertions(+), 306 deletions(-) delete mode 100644 tests/test_runtime_dispatch.cc diff --git a/README.md b/README.md index b93b7be..d8883e3 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,7 @@ cmake --install build #include int main() { - infini::rt::SetDevice({infini::rt::Device::Type::kCpu, 0}); + infini::rt::SetDevice(0); constexpr std::size_t size = 1024; void* ptr = nullptr; @@ -79,12 +79,16 @@ int main() { } ``` -For NVIDIA: +When a GPU backend is enabled, the top-level runtime API targets that backend, +matching CUDA Runtime API behavior: ```cpp -infini::rt::SetDevice({infini::rt::Device::Type::kNvidia, 0}); +infini::rt::SetDevice(0); ``` +Use `infini::rt::Runtime` when CPU runtime +calls are needed explicitly in a build that also enables an accelerator backend. + ## Using Installed InfiniRT From Another Project Downstream projects should consume the installed headers and libraries instead of depending on the InfiniRT source tree. diff --git a/scripts/generate_public_headers.py b/scripts/generate_public_headers.py index 93ceee3..74dde79 100644 --- a/scripts/generate_public_headers.py +++ b/scripts/generate_public_headers.py @@ -53,21 +53,23 @@ "cpu": "Device::Type::kCpu", "nvidia": "Device::Type::kNvidia", "iluvatar": "Device::Type::kIluvatar", + "hygon": "Device::Type::kHygon", "metax": "Device::Type::kMetax", "moore": "Device::Type::kMoore", "cambricon": "Device::Type::kCambricon", "ascend": "Device::Type::kAscend", } -_RUNTIME_HEADERS = { - "cpu": "native/cpu/runtime_.h", - "nvidia": "native/cuda/nvidia/runtime_.h", - "iluvatar": "native/cuda/iluvatar/runtime_.h", - "metax": "native/cuda/metax/runtime_.h", - "moore": "native/cuda/moore/runtime_.h", - "cambricon": "native/cambricon/runtime_.h", - "ascend": "native/ascend/runtime_.h", -} +_DEFAULT_DEVICE_PRIORITY = ( + "nvidia", + "iluvatar", + "hygon", + "metax", + "moore", + "cambricon", + "ascend", + "cpu", +) def _guard(path): @@ -155,12 +157,16 @@ def _write_detail_headers(include_root, source_root, devices): def _write_generated_header(include_root, devices): + default_device = _default_device(devices) + default_device_type = _DEVICE_TYPES[default_device] includes = [ + "#include ", f"#include {_detail_include('data_type.h')}", f"#include {_detail_include('device.h')}", f"#include {_detail_include('hash.h')}", f"#include {_detail_include('runtime.h')}", f"#include {_detail_include('tensor_view.h')}", + f"#include ", ] for device in devices: @@ -174,6 +180,51 @@ def _write_generated_header(include_root, devices): {chr(10).join(includes)} +namespace infini::rt {{ +namespace generated_detail {{ + +using DefaultRuntime = Runtime<{default_device_type}>; + +}} // namespace generated_detail + +using Error = typename generated_detail::DefaultRuntime::Error; + +using Stream = typename generated_detail::DefaultRuntime::Stream; + +inline constexpr Error kSuccess = generated_detail::DefaultRuntime::kSuccess; + +enum class MemcpyKind {{ + kMemcpyHostToHost = + static_cast(generated_detail::DefaultRuntime::kMemcpyHostToHost), + kMemcpyHostToDevice = + static_cast(generated_detail::DefaultRuntime::kMemcpyHostToDevice), + kMemcpyDeviceToHost = + static_cast(generated_detail::DefaultRuntime::kMemcpyDeviceToHost), + kMemcpyDeviceToDevice = + static_cast(generated_detail::DefaultRuntime::kMemcpyDeviceToDevice), +}}; + +Error SetDevice(int device); + +Error GetDevice(int* device); + +Error GetDeviceCount(int* count); + +Error DeviceSynchronize(); + +Error Malloc(void** ptr, std::size_t size); + +Error Free(void* ptr); + +Error Memset(void* ptr, int value, std::size_t count); + +Error Memcpy(void* dst, const void* src, std::size_t count, MemcpyKind kind); + +Error MemcpyAsync(void* dst, const void* src, std::size_t count, + MemcpyKind kind, Stream stream); + +}} // namespace infini::rt + #endif """ ) @@ -198,58 +249,65 @@ def params_decl(self): return ", ".join(f"{param.type} {param.name}" for param in self.params) -def _parse_param(param): - param_type, param_name = param.strip().rsplit(" ", 1) - - return _Param(param_type, param_name) - - -def _parse_runtime_functions(runtime_header): - text = pathlib.Path(runtime_header).read_text() - return tuple( - _Function( - return_type, - name, - tuple(_parse_param(param) for param in params.split(", ") if param), - ) - for return_type, name, params in re.findall( - r"^(void) ([A-Z]\w*)\(([^()]*)\);$", text, re.MULTILINE - ) - ) - - -def _abort_statement(message): - return f""" assert(false && "{message}"); - std::abort();""" - - -def _dispatch_cases(devices, statements): - return "\n".join( - f""" case {_DEVICE_TYPES[device]}: {{ -{statements.replace("__DEVICE_TYPE__", _DEVICE_TYPES[device])} - return; - }}""" - for device in devices - ) +_PUBLIC_RUNTIME_FUNCTIONS = ( + _Function("Error", "SetDevice", (_Param("int", "device"),)), + _Function("Error", "GetDevice", (_Param("int*", "device"),)), + _Function("Error", "GetDeviceCount", (_Param("int*", "count"),)), + _Function("Error", "DeviceSynchronize", ()), + _Function( + "Error", + "Malloc", + (_Param("void**", "ptr"), _Param("std::size_t", "size")), + ), + _Function("Error", "Free", (_Param("void*", "ptr"),)), + _Function( + "Error", + "Memset", + ( + _Param("void*", "ptr"), + _Param("int", "value"), + _Param("std::size_t", "count"), + ), + ), + _Function( + "Error", + "Memcpy", + ( + _Param("void*", "dst"), + _Param("const void*", "src"), + _Param("std::size_t", "count"), + _Param("MemcpyKind", "kind"), + ), + ), + _Function( + "Error", + "MemcpyAsync", + ( + _Param("void*", "dst"), + _Param("const void*", "src"), + _Param("std::size_t", "count"), + _Param("MemcpyKind", "kind"), + _Param("Stream", "stream"), + ), + ), +) -def _selector(function): - for param in function.params: - if param.type == "Device": - return f"{param.name}.type()" - if param.type == "Device::Type": - return param.name +def _default_device(devices): + for device in _DEFAULT_DEVICE_PRIORITY: + if device in devices: + return device - return "current_device.type()" + raise ValueError("at least one device is required") def _runtime_arg(param): - if param.type == "Device": - return f"{param.name}.index()" - if param.type == "Device::Type": - return None if param.type == "MemcpyKind": - return f"RuntimeMemcpyKind<__DEVICE_TYPE__>({param.name})" + return f"RuntimeMemcpyKind({param.name})" + if param.type == "Stream": + return ( + f"reinterpret_cast({param.name})" + ) return param.name @@ -260,134 +318,67 @@ def _runtime_args(function): return ", ".join(arg for arg in args if arg is not None) -def _preconditions(function): - required_pointer_names = { - "GetDevice": {"device"}, - "GetDeviceCount": {"count"}, - } - checks = [] - for param in function.params: - if param.type.endswith("**") or param.name in required_pointer_names.get( - function.name, set() - ): - checks.append(f" assert({param.name} != nullptr);") - - return "\n".join(checks) - - -def _post_dispatch(function): - if function.name == "SetDevice": - return "\n current_device = device;" - - return "" - - def _runtime_call(function): args = _runtime_args(function) if args: - return f"Runtime<__DEVICE_TYPE__>::{function.name}({args})" + return f"DefaultRuntime::{function.name}({args})" - return f"Runtime<__DEVICE_TYPE__>::{function.name}()" + return f"DefaultRuntime::{function.name}()" -def _write_get_device(function, devices): - device_param = function.params[0].name - cases = _dispatch_cases( - devices, - f""" int index = current_device.index(); - CheckCall([&] {{ return Runtime<__DEVICE_TYPE__>::GetDevice(&index); }}); - current_device = Device{{current_device.type(), index}}; - *{device_param} = current_device;""", - ) - - return f"""void GetDevice(Device* {device_param}) {{ - assert({device_param} != nullptr); - - switch (current_device.type()) {{ -{cases} - default: -{_abort_statement("runtime device is not enabled")} - }} -}} -""" - - -def _write_dispatch_function(function, devices): - if function.name == "GetDevice": - return _write_get_device(function, devices) - - cases = _dispatch_cases( - devices, - f""" CheckCall([&] {{ return {_runtime_call(function)}; }});{_post_dispatch(function)}""", - ) - preconditions = _preconditions(function) - if preconditions: - preconditions = f"{preconditions}\n\n" - +def _write_dispatch_function(function): return f"""{function.signature()} {{ -{preconditions} switch ({_selector(function)}) {{ -{cases} - default: -{_abort_statement("runtime device is not enabled")} - }} + return CheckCall([&] {{ return {_runtime_call(function)}; }}); }} """ -def _write_runtime_dispatch(source_path, runtime_header, devices): - first_device_type = _DEVICE_TYPES[devices[0]] - includes = ['#include "runtime.h"'] - includes.extend(f'#include "{_RUNTIME_HEADERS[device]}"' for device in devices) - functions = _parse_runtime_functions(runtime_header) +def _write_runtime_dispatch(source_path): + functions = _PUBLIC_RUNTIME_FUNCTIONS dispatch_functions = "\n".join( - _write_dispatch_function(function, devices) for function in functions + _write_dispatch_function(function) for function in functions ) source_path.parent.mkdir(parents=True, exist_ok=True) source_path.write_text( f"""#include -#include +#include #include #include -{chr(10).join(includes)} +#include namespace infini::rt {{ namespace {{ -thread_local Device current_device{{{first_device_type}, 0}}; +using DefaultRuntime = generated_detail::DefaultRuntime; template -void CheckCall(Func&& func) {{ +Error CheckCall(Func&& func) {{ using ReturnType = decltype(std::forward(func)()); if constexpr (std::is_void_v) {{ std::forward(func)(); + return kSuccess; }} else {{ - ReturnType status = std::forward(func)(); - if (status != ReturnType{{}}) {{ - assert(false && "runtime call failed"); - std::abort(); - }} + return static_cast(std::forward(func)()); }} }} -template auto RuntimeMemcpyKind(MemcpyKind kind) {{ switch (kind) {{ - case MemcpyKind::kHostToHost: - return Runtime::MemcpyHostToHost; - case MemcpyKind::kHostToDevice: - return Runtime::MemcpyHostToDevice; - case MemcpyKind::kDeviceToHost: - return Runtime::MemcpyDeviceToHost; - case MemcpyKind::kDeviceToDevice: - return Runtime::MemcpyDeviceToDevice; + case MemcpyKind::kMemcpyHostToHost: + return DefaultRuntime::kMemcpyHostToHost; + case MemcpyKind::kMemcpyHostToDevice: + return DefaultRuntime::kMemcpyHostToDevice; + case MemcpyKind::kMemcpyDeviceToHost: + return DefaultRuntime::kMemcpyDeviceToHost; + case MemcpyKind::kMemcpyDeviceToDevice: + return DefaultRuntime::kMemcpyDeviceToDevice; }} assert(false && "unsupported memcpy kind"); - std::abort(); - return Runtime::MemcpyHostToHost; + return DefaultRuntime::kMemcpyHostToHost; }} }} // namespace @@ -422,9 +413,7 @@ def main(): _write_wrapper(include_root, wrapper_device, header_name, target) _write_generated_header(include_root, devices) - _write_runtime_dispatch( - pathlib.Path(args.source_output), args.runtime_header, devices - ) + _write_runtime_dispatch(pathlib.Path(args.source_output)) if __name__ == "__main__": diff --git a/src/native/ascend/runtime_.h b/src/native/ascend/runtime_.h index 8b33e54..191beef 100644 --- a/src/native/ascend/runtime_.h +++ b/src/native/ascend/runtime_.h @@ -16,10 +16,14 @@ namespace infini::rt { template <> struct Runtime : DeviceRuntime> { + using Error = aclError; + using Stream = aclrtStream; static constexpr Device::Type kDeviceType = Device::Type::kAscend; + static constexpr Error kSuccess = ACL_SUCCESS; + static constexpr auto SetDevice = aclrtSetDevice; static constexpr auto GetDevice = aclrtGetDevice; @@ -45,13 +49,19 @@ struct Runtime return aclrtMemcpy(dst, count, src, count, kind); }; - static constexpr auto MemcpyHostToHost = ACL_MEMCPY_HOST_TO_HOST; + static constexpr auto MemcpyAsync = [](void* dst, const void* src, + size_t count, aclrtMemcpyKind kind, + Stream stream) { + return aclrtMemcpyAsync(dst, count, src, count, kind, stream); + }; + + static constexpr auto kMemcpyHostToHost = ACL_MEMCPY_HOST_TO_HOST; - static constexpr auto MemcpyHostToDevice = ACL_MEMCPY_HOST_TO_DEVICE; + static constexpr auto kMemcpyHostToDevice = ACL_MEMCPY_HOST_TO_DEVICE; - static constexpr auto MemcpyDeviceToHost = ACL_MEMCPY_DEVICE_TO_HOST; + static constexpr auto kMemcpyDeviceToHost = ACL_MEMCPY_DEVICE_TO_HOST; - static constexpr auto MemcpyDeviceToDevice = ACL_MEMCPY_DEVICE_TO_DEVICE; + static constexpr auto kMemcpyDeviceToDevice = ACL_MEMCPY_DEVICE_TO_DEVICE; static constexpr auto Memset = [](void* ptr, int value, size_t count) { return aclrtMemset(ptr, count, value, count); diff --git a/src/native/cambricon/runtime_.h b/src/native/cambricon/runtime_.h index 4db4920..d892df8 100644 --- a/src/native/cambricon/runtime_.h +++ b/src/native/cambricon/runtime_.h @@ -14,10 +14,14 @@ namespace infini::rt { template <> struct Runtime : DeviceRuntime> { + using Error = cnrtRet_t; + using Stream = cnrtQueue_t; static constexpr Device::Type kDeviceType = Device::Type::kCambricon; + static constexpr Error kSuccess = CNRT_RET_SUCCESS; + static constexpr auto SetDevice = cnrtSetDevice; static constexpr auto GetDevice = cnrtGetDevice; @@ -41,13 +45,19 @@ struct Runtime return cnrtMemcpy(dst, const_cast(src), size, kind); }; - static constexpr auto MemcpyHostToHost = cnrtMemcpyHostToHost; + static constexpr auto MemcpyAsync = [](void* dst, const void* src, + std::size_t size, auto kind, + Stream stream) { + return cnrtMemcpyAsync(dst, const_cast(src), size, kind, stream); + }; + + static constexpr auto kMemcpyHostToHost = cnrtMemcpyHostToHost; - static constexpr auto MemcpyHostToDevice = cnrtMemcpyHostToDev; + static constexpr auto kMemcpyHostToDevice = cnrtMemcpyHostToDev; - static constexpr auto MemcpyDeviceToHost = cnrtMemcpyDevToHost; + static constexpr auto kMemcpyDeviceToHost = cnrtMemcpyDevToHost; - static constexpr auto MemcpyDeviceToDevice = cnrtMemcpyDevToDev; + static constexpr auto kMemcpyDeviceToDevice = cnrtMemcpyDevToDev; static constexpr auto Memset = cnrtMemset; }; diff --git a/src/native/cpu/runtime_.h b/src/native/cpu/runtime_.h index bf5a81c..c2bcf4f 100644 --- a/src/native/cpu/runtime_.h +++ b/src/native/cpu/runtime_.h @@ -1,7 +1,6 @@ #ifndef INFINI_RT_CPU_RUNTIME__H_ #define INFINI_RT_CPU_RUNTIME__H_ -#include #include #include @@ -13,44 +12,98 @@ template <> struct Runtime : RuntimeBase> { static constexpr Device::Type kDeviceType = Device::Type::kCpu; - static void SetDevice(int index) { - if (index != 0) { - assert(false && "CPU device index must be 0"); - std::abort(); + using Error = int; + + using Stream = void*; + + static constexpr Error kSuccess = 0; + + static constexpr Error kErrorInvalidValue = 1; + + static constexpr Error kErrorMemoryAllocation = 2; + + static Error SetDevice(int device) { + if (device != 0) { + return kErrorInvalidValue; } + + return kSuccess; } - static void GetDevice(int* index) { - assert(index != nullptr); - *index = 0; + static Error GetDevice(int* device) { + if (device == nullptr) { + return kErrorInvalidValue; + } + + *device = 0; + + return kSuccess; } - static void GetDeviceCount(int* count) { - assert(count != nullptr); + static Error GetDeviceCount(int* count) { + if (count == nullptr) { + return kErrorInvalidValue; + } + *count = 1; + + return kSuccess; + } + + static Error DeviceSynchronize() { return kSuccess; } + + static Error Malloc(void** ptr, std::size_t size) { + if (ptr == nullptr) { + return kErrorInvalidValue; + } + + *ptr = std::malloc(size); + + if (size != 0 && *ptr == nullptr) { + return kErrorMemoryAllocation; + } + + return kSuccess; } - static void DeviceSynchronize() {} + static Error Free(void* ptr) { + std::free(ptr); - static void Malloc(void** ptr, std::size_t size) { *ptr = std::malloc(size); } + return kSuccess; + } - static void Free(void* ptr) { std::free(ptr); } + static Error Memcpy(void* dst, const void* src, std::size_t size, int) { + if ((dst == nullptr || src == nullptr) && size != 0) { + return kErrorInvalidValue; + } - static void Memcpy(void* dst, const void* src, std::size_t size, int) { std::memcpy(dst, src, size); + + return kSuccess; } - static void Memset(void* ptr, int value, std::size_t count) { + static Error Memset(void* ptr, int value, std::size_t count) { + if (ptr == nullptr && count != 0) { + return kErrorInvalidValue; + } + std::memset(ptr, value, count); + + return kSuccess; + } + + static Error MemcpyAsync(void* dst, const void* src, std::size_t size, + int kind, Stream) { + return kErrorInvalidValue; } - static constexpr int MemcpyHostToHost = 0; + static constexpr int kMemcpyHostToHost = 0; - static constexpr int MemcpyHostToDevice = 0; + static constexpr int kMemcpyHostToDevice = 1; - static constexpr int MemcpyDeviceToHost = 1; + static constexpr int kMemcpyDeviceToHost = 2; - static constexpr int MemcpyDeviceToDevice = 0; + static constexpr int kMemcpyDeviceToDevice = 3; }; static_assert(Runtime::Validate()); diff --git a/src/native/cuda/hygon/runtime_.h b/src/native/cuda/hygon/runtime_.h index a9f41ec..520e8f4 100644 --- a/src/native/cuda/hygon/runtime_.h +++ b/src/native/cuda/hygon/runtime_.h @@ -15,23 +15,41 @@ namespace infini::rt { template <> struct Runtime : CudaRuntime> { + using Error = cudaError_t; + using Stream = cudaStream_t; static constexpr Device::Type kDeviceType = Device::Type::kHygon; + static constexpr Error kSuccess = cudaSuccess; + + static constexpr auto SetDevice = cudaSetDevice; + + static constexpr auto GetDevice = cudaGetDevice; + + static constexpr auto GetDeviceCount = cudaGetDeviceCount; + + static constexpr auto DeviceSynchronize = cudaDeviceSynchronize; + static constexpr auto Malloc = [](auto&&... args) { return cudaMalloc(std::forward(args)...); }; static constexpr auto Memcpy = cudaMemcpy; + static constexpr auto MemcpyAsync = cudaMemcpyAsync; + static constexpr auto Free = [](auto&&... args) { return cudaFree(std::forward(args)...); }; - static constexpr auto MemcpyHostToDevice = cudaMemcpyHostToDevice; + static constexpr auto kMemcpyHostToHost = cudaMemcpyHostToHost; + + static constexpr auto kMemcpyHostToDevice = cudaMemcpyHostToDevice; + + static constexpr auto kMemcpyDeviceToHost = cudaMemcpyDeviceToHost; - static constexpr auto MemcpyDeviceToHost = cudaMemcpyDeviceToHost; + static constexpr auto kMemcpyDeviceToDevice = cudaMemcpyDeviceToDevice; static constexpr auto Memset = cudaMemset; }; diff --git a/src/native/cuda/iluvatar/runtime_.h b/src/native/cuda/iluvatar/runtime_.h index 8a1b649..87558bf 100644 --- a/src/native/cuda/iluvatar/runtime_.h +++ b/src/native/cuda/iluvatar/runtime_.h @@ -15,10 +15,14 @@ namespace infini::rt { template <> struct Runtime : CudaRuntime> { + using Error = cudaError_t; + using Stream = cudaStream_t; static constexpr Device::Type kDeviceType = Device::Type::kIluvatar; + static constexpr Error kSuccess = cudaSuccess; + static constexpr auto SetDevice = cudaSetDevice; static constexpr auto GetDevice = cudaGetDevice; @@ -33,15 +37,17 @@ struct Runtime static constexpr auto Memcpy = cudaMemcpy; + static constexpr auto MemcpyAsync = cudaMemcpyAsync; + static constexpr auto Free = cudaFree; - static constexpr auto MemcpyHostToHost = cudaMemcpyHostToHost; + static constexpr auto kMemcpyHostToHost = cudaMemcpyHostToHost; - static constexpr auto MemcpyHostToDevice = cudaMemcpyHostToDevice; + static constexpr auto kMemcpyHostToDevice = cudaMemcpyHostToDevice; - static constexpr auto MemcpyDeviceToHost = cudaMemcpyDeviceToHost; + static constexpr auto kMemcpyDeviceToHost = cudaMemcpyDeviceToHost; - static constexpr auto MemcpyDeviceToDevice = cudaMemcpyDeviceToDevice; + static constexpr auto kMemcpyDeviceToDevice = cudaMemcpyDeviceToDevice; static constexpr auto Memset = cudaMemset; }; diff --git a/src/native/cuda/metax/runtime_.h b/src/native/cuda/metax/runtime_.h index 5785a51..14ed18b 100644 --- a/src/native/cuda/metax/runtime_.h +++ b/src/native/cuda/metax/runtime_.h @@ -13,10 +13,14 @@ namespace infini::rt { template <> struct Runtime : CudaRuntime> { + using Error = mcError_t; + using Stream = mcStream_t; static constexpr Device::Type kDeviceType = Device::Type::kMetax; + static constexpr Error kSuccess = mcSuccess; + static constexpr auto SetDevice = mcSetDevice; static constexpr auto GetDevice = mcGetDevice; @@ -33,17 +37,21 @@ struct Runtime return mcMemcpy(std::forward(args)...); }; + static constexpr auto MemcpyAsync = [](auto&&... args) { + return mcMemcpyAsync(std::forward(args)...); + }; + static constexpr auto Free = [](auto&&... args) { return mcFree(std::forward(args)...); }; - static constexpr auto MemcpyHostToHost = mcMemcpyHostToHost; + static constexpr auto kMemcpyHostToHost = mcMemcpyHostToHost; - static constexpr auto MemcpyHostToDevice = mcMemcpyHostToDevice; + static constexpr auto kMemcpyHostToDevice = mcMemcpyHostToDevice; - static constexpr auto MemcpyDeviceToHost = mcMemcpyDeviceToHost; + static constexpr auto kMemcpyDeviceToHost = mcMemcpyDeviceToHost; - static constexpr auto MemcpyDeviceToDevice = mcMemcpyDeviceToDevice; + static constexpr auto kMemcpyDeviceToDevice = mcMemcpyDeviceToDevice; static constexpr auto Memset = mcMemset; }; diff --git a/src/native/cuda/moore/runtime_.h b/src/native/cuda/moore/runtime_.h index 8ced2ed..fd0a215 100644 --- a/src/native/cuda/moore/runtime_.h +++ b/src/native/cuda/moore/runtime_.h @@ -13,10 +13,14 @@ namespace infini::rt { template <> struct Runtime : CudaRuntime> { + using Error = musaError_t; + using Stream = musaStream_t; static constexpr Device::Type kDeviceType = Device::Type::kMoore; + static constexpr Error kSuccess = musaSuccess; + static constexpr auto SetDevice = musaSetDevice; static constexpr auto GetDevice = [](auto&&... args) { @@ -39,17 +43,21 @@ struct Runtime return musaMemcpy(std::forward(args)...); }; + static constexpr auto MemcpyAsync = [](auto&&... args) { + return musaMemcpyAsync(std::forward(args)...); + }; + static constexpr auto Free = [](auto&&... args) { return musaFree(std::forward(args)...); }; - static constexpr auto MemcpyHostToHost = musaMemcpyHostToHost; + static constexpr auto kMemcpyHostToHost = musaMemcpyHostToHost; - static constexpr auto MemcpyHostToDevice = musaMemcpyHostToDevice; + static constexpr auto kMemcpyHostToDevice = musaMemcpyHostToDevice; - static constexpr auto MemcpyDeviceToHost = musaMemcpyDeviceToHost; + static constexpr auto kMemcpyDeviceToHost = musaMemcpyDeviceToHost; - static constexpr auto MemcpyDeviceToDevice = musaMemcpyDeviceToDevice; + static constexpr auto kMemcpyDeviceToDevice = musaMemcpyDeviceToDevice; static constexpr auto Memset = musaMemset; }; diff --git a/src/native/cuda/nvidia/runtime_.h b/src/native/cuda/nvidia/runtime_.h index f6a9f2d..c910198 100644 --- a/src/native/cuda/nvidia/runtime_.h +++ b/src/native/cuda/nvidia/runtime_.h @@ -15,10 +15,14 @@ namespace infini::rt { template <> struct Runtime : CudaRuntime> { + using Error = cudaError_t; + using Stream = cudaStream_t; static constexpr Device::Type kDeviceType = Device::Type::kNvidia; + static constexpr Error kSuccess = cudaSuccess; + static constexpr auto SetDevice = cudaSetDevice; static constexpr auto GetDevice = cudaGetDevice; @@ -33,15 +37,17 @@ struct Runtime static constexpr auto Memcpy = cudaMemcpy; + static constexpr auto MemcpyAsync = cudaMemcpyAsync; + static constexpr auto Free = cudaFree; - static constexpr auto MemcpyHostToHost = cudaMemcpyHostToHost; + static constexpr auto kMemcpyHostToHost = cudaMemcpyHostToHost; - static constexpr auto MemcpyHostToDevice = cudaMemcpyHostToDevice; + static constexpr auto kMemcpyHostToDevice = cudaMemcpyHostToDevice; - static constexpr auto MemcpyDeviceToHost = cudaMemcpyDeviceToHost; + static constexpr auto kMemcpyDeviceToHost = cudaMemcpyDeviceToHost; - static constexpr auto MemcpyDeviceToDevice = cudaMemcpyDeviceToDevice; + static constexpr auto kMemcpyDeviceToDevice = cudaMemcpyDeviceToDevice; static constexpr auto Memset = cudaMemset; }; diff --git a/src/native/cuda/runtime_.h b/src/native/cuda/runtime_.h index 8765a05..7d1899d 100644 --- a/src/native/cuda/runtime_.h +++ b/src/native/cuda/runtime_.h @@ -17,9 +17,15 @@ struct CudaRuntime : DeviceRuntime { DeviceRuntime::Validate(); static_assert( std::is_invocable_v, + size_t, decltype(Derived::kMemcpyHostToDevice)>, "`Runtime::Memcpy` must be callable with " - "`(void*, const void*, size_t, MemcpyHostToDevice)`."); + "`(void*, const void*, size_t, kMemcpyHostToDevice)`."); + static_assert( + std::is_invocable_v, + "`Runtime::MemcpyAsync` must be callable with " + "`(void*, const void*, size_t, kMemcpyHostToDevice, Stream)`."); return true; } }; diff --git a/src/runtime.h b/src/runtime.h index ebc2698..43ba9da 100644 --- a/src/runtime.h +++ b/src/runtime.h @@ -30,6 +30,11 @@ struct RuntimeBase { std::is_same_v, Device::Type>, "`Runtime` must define `static constexpr Device::Type kDeviceType`."); + static_assert(sizeof(typename Derived::Error) > 0, + "`Runtime` must define an `Error` type alias."); + static_assert(std::is_same_v, + typename Derived::Error>, + "`Runtime` must define `static constexpr Error kSuccess`."); return true; } }; @@ -51,29 +56,6 @@ struct DeviceRuntime : RuntimeBase { } }; -enum class MemcpyKind { - kHostToHost, - kHostToDevice, - kDeviceToHost, - kDeviceToDevice, -}; - -void SetDevice(Device device); - -void GetDevice(Device* device); - -void GetDeviceCount(int* count, Device::Type type); - -void DeviceSynchronize(); - -void Malloc(void** ptr, std::size_t size); - -void Free(void* ptr); - -void Memset(void* ptr, int value, std::size_t count); - -void Memcpy(void* dst, const void* src, std::size_t count, MemcpyKind kind); - } // namespace infini::rt #endif diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index ab54530..2bb0812 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -11,10 +11,6 @@ if(WITH_CPU) add_infini_rt_test(test_cpu_runtime test_cpu_runtime.cc) endif() -if(WITH_CPU OR WITH_NVIDIA) - add_infini_rt_test(test_runtime_dispatch test_runtime_dispatch.cc) -endif() - if(WITH_NVIDIA) add_infini_rt_test(test_nvidia_runtime test_nvidia_runtime.cc) endif() @@ -24,12 +20,20 @@ set(INFINI_RT_TEST_INSTALL_PREFIX set(INFINI_RT_TEST_CONSUMER_BINARY "${CMAKE_CURRENT_BINARY_DIR}/install_consumer_smoke") set(INFINI_RT_TEST_EXTRA_LIBRARY_DIRS "") +set(INFINI_RT_TEST_EXTRA_INCLUDE_DIRS "") set(INFINI_RT_TEST_CONSUMER_BACKEND NONE) -if(WITH_CPU) - set(INFINI_RT_TEST_CONSUMER_BACKEND CPU) -elseif(WITH_NVIDIA) +if(WITH_NVIDIA) set(INFINI_RT_TEST_CONSUMER_BACKEND NVIDIA) + if(CUDAToolkit_INCLUDE_DIRS) + list(APPEND INFINI_RT_TEST_EXTRA_INCLUDE_DIRS + ${CUDAToolkit_INCLUDE_DIRS}) + elseif(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES) + list(APPEND INFINI_RT_TEST_EXTRA_INCLUDE_DIRS + ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) + endif() +elseif(WITH_CPU) + set(INFINI_RT_TEST_CONSUMER_BACKEND CPU) endif() if(WITH_ASCEND) @@ -44,6 +48,7 @@ if(WITH_ASCEND) endif() list(JOIN INFINI_RT_TEST_EXTRA_LIBRARY_DIRS ":" INFINI_RT_TEST_EXTRA_LIBRARY_PATHS) +list(JOIN INFINI_RT_TEST_EXTRA_INCLUDE_DIRS ":" INFINI_RT_TEST_EXTRA_INCLUDE_PATHS) add_test( NAME test_install @@ -59,6 +64,7 @@ add_test( "-DINFINI_RT_CONSUMER_BINARY=${INFINI_RT_TEST_CONSUMER_BINARY}" "-DINFINI_RT_CXX_COMPILER=${CMAKE_CXX_COMPILER}" "-DINFINI_RT_EXTRA_LIBRARY_PATHS=${INFINI_RT_TEST_EXTRA_LIBRARY_PATHS}" + "-DINFINI_RT_EXTRA_INCLUDE_PATHS=${INFINI_RT_TEST_EXTRA_INCLUDE_PATHS}" "-DINFINI_RT_CONSUMER_BACKEND=${INFINI_RT_TEST_CONSUMER_BACKEND}" -P "${CMAKE_CURRENT_SOURCE_DIR}/compile_install_consumer.cmake") set_tests_properties(test_install_consumer PROPERTIES diff --git a/tests/compile_install_consumer.cmake b/tests/compile_install_consumer.cmake index 169311e..1bb96d1 100644 --- a/tests/compile_install_consumer.cmake +++ b/tests/compile_install_consumer.cmake @@ -17,6 +17,17 @@ set(INFINI_RT_EXTRA_LINK_ARGS "") set(INFINI_RT_EXTRA_COMPILE_ARGS "") set(INFINI_RT_LD_LIBRARY_PATH "${INFINI_RT_LIBRARY_DIR}") +if(INFINI_RT_EXTRA_INCLUDE_PATHS) + string(REPLACE ":" ";" INFINI_RT_EXTRA_INCLUDE_DIRS + "${INFINI_RT_EXTRA_INCLUDE_PATHS}") + foreach(INFINI_RT_EXTRA_INCLUDE_DIR ${INFINI_RT_EXTRA_INCLUDE_DIRS}) + if(EXISTS "${INFINI_RT_EXTRA_INCLUDE_DIR}") + list(APPEND INFINI_RT_EXTRA_COMPILE_ARGS + "-I${INFINI_RT_EXTRA_INCLUDE_DIR}") + endif() + endforeach() +endif() + if(INFINI_RT_CONSUMER_BACKEND AND NOT INFINI_RT_CONSUMER_BACKEND STREQUAL "NONE") list(APPEND INFINI_RT_EXTRA_COMPILE_ARGS "-DINFINI_RT_CONSUMER_BACKEND_${INFINI_RT_CONSUMER_BACKEND}=1") diff --git a/tests/install_consumer_smoke.cc b/tests/install_consumer_smoke.cc index 97442b5..9414e71 100644 --- a/tests/install_consumer_smoke.cc +++ b/tests/install_consumer_smoke.cc @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -20,19 +21,64 @@ int main() { #if defined(INFINI_RT_CONSUMER_BACKEND_CPU) || \ defined(INFINI_RT_CONSUMER_BACKEND_NVIDIA) -#if defined(INFINI_RT_CONSUMER_BACKEND_NVIDIA) - const infini::rt::Device runtime_device{infini::rt::Device::Type::kNvidia}; -#else - const infini::rt::Device runtime_device{infini::rt::Device::Type::kCpu}; -#endif - + std::array input{1, 2, 3, 4}; + std::array output{}; void* ptr = nullptr; - infini::rt::SetDevice(runtime_device); - infini::rt::Malloc(&ptr, sizeof(std::uint32_t)); + if (infini::rt::SetDevice(0) != infini::rt::kSuccess) { + return 1; + } + int current_device = -1; + if (infini::rt::GetDevice(¤t_device) != infini::rt::kSuccess) { + return 1; + } + if (current_device != 0) { + return 1; + } + int device_count = 0; + if (infini::rt::GetDeviceCount(&device_count) != infini::rt::kSuccess) { + return 1; + } + if (device_count <= 0) { + return 1; + } + if (infini::rt::Malloc(&ptr, input.size()) != infini::rt::kSuccess) { + return 1; + } if (ptr == nullptr) { return 1; } - infini::rt::Free(ptr); + if (infini::rt::Memcpy(ptr, input.data(), input.size(), + infini::rt::MemcpyKind::kMemcpyHostToDevice) != + infini::rt::kSuccess) { + return 1; + } +#if defined(INFINI_RT_CONSUMER_BACKEND_CPU) + if (infini::rt::MemcpyAsync(ptr, input.data(), input.size(), + infini::rt::MemcpyKind::kMemcpyHostToDevice, + nullptr) == infini::rt::kSuccess) { + return 1; + } +#else + if (infini::rt::MemcpyAsync(ptr, input.data(), input.size(), + infini::rt::MemcpyKind::kMemcpyHostToDevice, + nullptr) != infini::rt::kSuccess) { + return 1; + } +#endif + if (infini::rt::DeviceSynchronize() != infini::rt::kSuccess) { + return 1; + } + if (infini::rt::Memcpy(output.data(), ptr, output.size(), + infini::rt::MemcpyKind::kMemcpyDeviceToHost) != + infini::rt::kSuccess) { + return 1; + } + if (output != input) { + return 1; + } + if (infini::rt::Free(ptr) != infini::rt::kSuccess) { + return 1; + } #endif return 0; diff --git a/tests/test_cpu_runtime.cc b/tests/test_cpu_runtime.cc index 20edce9..d307f3c 100644 --- a/tests/test_cpu_runtime.cc +++ b/tests/test_cpu_runtime.cc @@ -32,15 +32,26 @@ void TestMemcpyRoundTrip(infini::rt::test::TestContext* context) { } CpuRuntime::Memcpy(ptr, input.data(), input.size(), - CpuRuntime::MemcpyHostToDevice); + CpuRuntime::kMemcpyHostToDevice); CpuRuntime::Memcpy(output.data(), ptr, output.size(), - CpuRuntime::MemcpyDeviceToHost); + CpuRuntime::kMemcpyDeviceToHost); CpuRuntime::Free(ptr); context->ExpectEqual(output, input, "CPU runtime should copy data through runtime memory."); } +void TestMemcpyAsyncUnsupported(infini::rt::test::TestContext* context) { + std::array input{1}; + std::array output{}; + + context->Expect(CpuRuntime::MemcpyAsync(output.data(), input.data(), + input.size(), + CpuRuntime::kMemcpyHostToHost, + nullptr) != CpuRuntime::kSuccess, + "CPU runtime should not report async memcpy success."); +} + void TestMemset(infini::rt::test::TestContext* context) { std::array output{}; void* ptr = nullptr; @@ -53,7 +64,7 @@ void TestMemset(infini::rt::test::TestContext* context) { CpuRuntime::Memset(ptr, 0x5A, output.size()); CpuRuntime::Memcpy(output.data(), ptr, output.size(), - CpuRuntime::MemcpyDeviceToHost); + CpuRuntime::kMemcpyDeviceToHost); CpuRuntime::Free(ptr); for (const auto value : output) { @@ -69,6 +80,7 @@ int main() { TestMallocAndFree(&context); TestMemcpyRoundTrip(&context); + TestMemcpyAsyncUnsupported(&context); TestMemset(&context); return context.ExitCode(); diff --git a/tests/test_nvidia_runtime.cc b/tests/test_nvidia_runtime.cc index 2d3dac2..df5a038 100644 --- a/tests/test_nvidia_runtime.cc +++ b/tests/test_nvidia_runtime.cc @@ -41,11 +41,11 @@ void TestMemcpyRoundTrip(infini::rt::test::TestContext* context) { ExpectCudaSuccess(context, NvidiaRuntime::Memcpy(ptr, input.data(), input.size(), - NvidiaRuntime::MemcpyHostToDevice), + NvidiaRuntime::kMemcpyHostToDevice), "NVIDIA runtime should copy host data to device memory."); ExpectCudaSuccess(context, NvidiaRuntime::Memcpy(output.data(), ptr, output.size(), - NvidiaRuntime::MemcpyDeviceToHost), + NvidiaRuntime::kMemcpyDeviceToHost), "NVIDIA runtime should copy device data to host memory."); ExpectCudaSuccess(context, NvidiaRuntime::Free(ptr), "NVIDIA runtime should free copy memory."); @@ -68,7 +68,7 @@ void TestMemset(infini::rt::test::TestContext* context) { "NVIDIA runtime should memset device memory."); ExpectCudaSuccess(context, NvidiaRuntime::Memcpy(output.data(), ptr, output.size(), - NvidiaRuntime::MemcpyDeviceToHost), + NvidiaRuntime::kMemcpyDeviceToHost), "NVIDIA runtime should copy memset data to host memory."); ExpectCudaSuccess(context, NvidiaRuntime::Free(ptr), "NVIDIA runtime should free memset memory."); diff --git a/tests/test_runtime_dispatch.cc b/tests/test_runtime_dispatch.cc deleted file mode 100644 index 9be92a7..0000000 --- a/tests/test_runtime_dispatch.cc +++ /dev/null @@ -1,65 +0,0 @@ -#include - -#include -#include -#include - -#include "test_helper.h" - -namespace { - -infini::rt::Device RuntimeTestDevice() { -#if defined(WITH_NVIDIA) - return infini::rt::Device{infini::rt::Device::Type::kNvidia}; -#else - return infini::rt::Device{infini::rt::Device::Type::kCpu}; -#endif -} - -} // namespace - -int main() { - infini::rt::test::TestContext context; - const infini::rt::Device device = RuntimeTestDevice(); - std::array input{1, 2, 3, 4}; - std::array output{}; - void* ptr = nullptr; - - infini::rt::SetDevice(device); - - infini::rt::Device current_device; - infini::rt::GetDevice(¤t_device); - context.ExpectEqual(current_device, device, - "Runtime dispatch should keep the current device."); - - int device_count = 0; - infini::rt::GetDeviceCount(&device_count, device.type()); - context.Expect(device_count > 0, - "Runtime dispatch should report at least one device."); - - infini::rt::Malloc(&ptr, input.size()); - context.Expect(ptr != nullptr, "Runtime dispatch should allocate memory."); - if (ptr == nullptr) { - return context.ExitCode(); - } - - infini::rt::Memcpy(ptr, input.data(), input.size(), - infini::rt::MemcpyKind::kHostToDevice); - infini::rt::Memcpy(output.data(), ptr, output.size(), - infini::rt::MemcpyKind::kDeviceToHost); - context.ExpectEqual(output, input, - "Runtime dispatch should copy data through memory."); - - infini::rt::Memset(ptr, 0x5A, output.size()); - infini::rt::Memcpy(output.data(), ptr, output.size(), - infini::rt::MemcpyKind::kDeviceToHost); - for (const auto value : output) { - context.ExpectEqual(value, static_cast(0x5A), - "Runtime dispatch should fill memory."); - } - - infini::rt::DeviceSynchronize(); - infini::rt::Free(ptr); - - return context.ExitCode(); -} From 310310100fc29f3ff853e0357dd06c4542f52404 Mon Sep 17 00:00:00 2001 From: Jiacheng Huang Date: Wed, 1 Jul 2026 20:39:43 +0800 Subject: [PATCH 02/15] Add default runtime dispatch specialization --- README.md | 17 ++- scripts/generate_public_headers.py | 187 ++++++++++++++++++++++++----- src/runtime.h | 2 +- src/tensor_view.h | 22 +++- tests/CMakeLists.txt | 12 ++ tests/install_consumer_smoke.cc | 14 +++ tests/test_core.cc | 4 + tests/test_default_runtime.cc | 126 +++++++++++++++++++ 8 files changed, 347 insertions(+), 37 deletions(-) create mode 100644 tests/test_default_runtime.cc diff --git a/README.md b/README.md index d8883e3..33fbe90 100644 --- a/README.md +++ b/README.md @@ -79,11 +79,22 @@ int main() { } ``` -When a GPU backend is enabled, the top-level runtime API targets that backend, -matching CUDA Runtime API behavior: +The top-level runtime API dispatches through `infini::rt::Runtime<>`, which is +the `Runtime` default specialization. A GPU backend is +selected initially when one is enabled; otherwise CPU is selected. +`SetDeviceType` accepts only backends enabled in the current build. ```cpp -infini::rt::SetDevice(0); +constexpr std::size_t size = 1024; +void* ptr = nullptr; + +infini::rt::Runtime<>::SetDeviceType(infini::rt::Device::Type::kCpu); +infini::rt::Malloc(&ptr, size); +infini::rt::Free(ptr); + +infini::rt::Runtime<>::SetDeviceType(infini::rt::Device::Type::kNvidia); +infini::rt::Malloc(&ptr, size); +infini::rt::Free(ptr); ``` Use `infini::rt::Runtime` when CPU runtime diff --git a/scripts/generate_public_headers.py b/scripts/generate_public_headers.py index 74dde79..755d098 100644 --- a/scripts/generate_public_headers.py +++ b/scripts/generate_public_headers.py @@ -166,12 +166,14 @@ def _write_generated_header(include_root, devices): f"#include {_detail_include('hash.h')}", f"#include {_detail_include('runtime.h')}", f"#include {_detail_include('tensor_view.h')}", - f"#include ", ] for device in devices: includes.append(f"#include ") + for device in devices: + includes.append(f"#include ") + path = include_root / "infini" / "rt" / "generated.h" path.parent.mkdir(parents=True, exist_ok=True) path.write_text( @@ -183,27 +185,83 @@ def _write_generated_header(include_root, devices): namespace infini::rt {{ namespace generated_detail {{ -using DefaultRuntime = Runtime<{default_device_type}>; +using DefaultErrorRuntime = Runtime<{default_device_type}>; + +inline constexpr Device::Type kDefaultDeviceType = {default_device_type}; }} // namespace generated_detail -using Error = typename generated_detail::DefaultRuntime::Error; +using Error = typename generated_detail::DefaultErrorRuntime::Error; -using Stream = typename generated_detail::DefaultRuntime::Stream; +using Stream = typename generated_detail::DefaultErrorRuntime::Stream; -inline constexpr Error kSuccess = generated_detail::DefaultRuntime::kSuccess; +inline constexpr Error kSuccess = generated_detail::DefaultErrorRuntime::kSuccess; enum class MemcpyKind {{ kMemcpyHostToHost = - static_cast(generated_detail::DefaultRuntime::kMemcpyHostToHost), + static_cast(generated_detail::DefaultErrorRuntime::kMemcpyHostToHost), kMemcpyHostToDevice = - static_cast(generated_detail::DefaultRuntime::kMemcpyHostToDevice), + static_cast(generated_detail::DefaultErrorRuntime::kMemcpyHostToDevice), kMemcpyDeviceToHost = - static_cast(generated_detail::DefaultRuntime::kMemcpyDeviceToHost), + static_cast(generated_detail::DefaultErrorRuntime::kMemcpyDeviceToHost), kMemcpyDeviceToDevice = - static_cast(generated_detail::DefaultRuntime::kMemcpyDeviceToDevice), + static_cast(generated_detail::DefaultErrorRuntime::kMemcpyDeviceToDevice), +}}; + +template <> +struct Runtime + : RuntimeBase> {{ + using Error = infini::rt::Error; + + using Stream = infini::rt::Stream; + + static constexpr Device::Type kDeviceType = Device::Type::kCount; + + static constexpr Error kSuccess = infini::rt::kSuccess; + + static constexpr MemcpyKind kMemcpyHostToHost = + MemcpyKind::kMemcpyHostToHost; + + static constexpr MemcpyKind kMemcpyHostToDevice = + MemcpyKind::kMemcpyHostToDevice; + + static constexpr MemcpyKind kMemcpyDeviceToHost = + MemcpyKind::kMemcpyDeviceToHost; + + static constexpr MemcpyKind kMemcpyDeviceToDevice = + MemcpyKind::kMemcpyDeviceToDevice; + + static Error SetDeviceType(Device::Type device_type); + + static Device::Type GetDeviceType(); + + static Error SetDevice(int device); + + static Error GetDevice(int* device); + + static Error GetDeviceCount(int* count); + + static Error DeviceSynchronize(); + + static Error Malloc(void** ptr, std::size_t size); + + static Error Free(void* ptr); + + static Error Memset(void* ptr, int value, std::size_t count); + + static Error Memcpy(void* dst, const void* src, std::size_t count, + MemcpyKind kind); + + static Error MemcpyAsync(void* dst, const void* src, std::size_t count, + MemcpyKind kind, Stream stream); + + private: + inline static thread_local Device::Type device_type_ = + generated_detail::kDefaultDeviceType; }}; +static_assert(Runtime::Validate()); + Error SetDevice(int device); Error GetDevice(int* device); @@ -301,42 +359,91 @@ def _default_device(devices): raise ValueError("at least one device is required") -def _runtime_arg(param): +def _runtime_arg(param, device): + device_type = _DEVICE_TYPES[device] if param.type == "MemcpyKind": - return f"RuntimeMemcpyKind({param.name})" + return f"RuntimeMemcpyKind<{device_type}>({param.name})" if param.type == "Stream": return ( - f"reinterpret_cast({param.name})" + f"reinterpret_cast::Stream>" + f"({param.name})" ) return param.name -def _runtime_args(function): - args = (_runtime_arg(param) for param in function.params) +def _runtime_args(function, device): + args = (_runtime_arg(param, device) for param in function.params) return ", ".join(arg for arg in args if arg is not None) -def _runtime_call(function): - args = _runtime_args(function) +def _runtime_call(function, device): + device_type = _DEVICE_TYPES[device] + args = _runtime_args(function, device) if args: - return f"DefaultRuntime::{function.name}({args})" + return f"Runtime<{device_type}>::{function.name}({args})" + + return f"Runtime<{device_type}>::{function.name}()" - return f"DefaultRuntime::{function.name}()" +def _call_args(function): + return ", ".join(param.name for param in function.params) + + +def _member_signature(function): + return ( + f"{function.return_type} Runtime::" + f"{function.name}({function.params_decl()})" + ) + + +def _dispatch_cases(devices, function): + return "\n".join( + f""" case {_DEVICE_TYPES[device]}: + return CheckCall([&] {{ return {_runtime_call(function, device)}; }});""" + for device in devices + ) + + +def _write_member_dispatch_function(function, devices): + return f"""{_member_signature(function)} {{ + switch (device_type_) {{ +{_dispatch_cases(devices, function)} + }} + + return InvalidValueError(); +}} +""" + + +def _write_public_dispatch_function(function): + args = _call_args(function) + if args: + call = f"Runtime::{function.name}({args})" + else: + call = f"Runtime::{function.name}()" -def _write_dispatch_function(function): return f"""{function.signature()} {{ - return CheckCall([&] {{ return {_runtime_call(function)}; }}); + return {call}; }} """ -def _write_runtime_dispatch(source_path): +def _write_runtime_dispatch(source_path, devices): functions = _PUBLIC_RUNTIME_FUNCTIONS - dispatch_functions = "\n".join( - _write_dispatch_function(function) for function in functions + member_dispatch_functions = "\n".join( + _write_member_dispatch_function(function, devices=devices) + for function in functions + ) + public_dispatch_functions = "\n".join( + _write_public_dispatch_function(function) for function in functions + ) + set_device_type_cases = "\n".join( + f""" case {_DEVICE_TYPES[device]}: + device_type_ = device_type; + return kSuccess;""" + for device in devices ) source_path.parent.mkdir(parents=True, exist_ok=True) @@ -351,8 +458,6 @@ def _write_runtime_dispatch(source_path): namespace infini::rt {{ namespace {{ -using DefaultRuntime = generated_detail::DefaultRuntime; - template Error CheckCall(Func&& func) {{ using ReturnType = decltype(std::forward(func)()); @@ -365,25 +470,43 @@ def _write_runtime_dispatch(source_path): }} }} +Error InvalidValueError() {{ return static_cast(1); }} + +template auto RuntimeMemcpyKind(MemcpyKind kind) {{ + using DeviceRuntime = Runtime; + switch (kind) {{ case MemcpyKind::kMemcpyHostToHost: - return DefaultRuntime::kMemcpyHostToHost; + return DeviceRuntime::kMemcpyHostToHost; case MemcpyKind::kMemcpyHostToDevice: - return DefaultRuntime::kMemcpyHostToDevice; + return DeviceRuntime::kMemcpyHostToDevice; case MemcpyKind::kMemcpyDeviceToHost: - return DefaultRuntime::kMemcpyDeviceToHost; + return DeviceRuntime::kMemcpyDeviceToHost; case MemcpyKind::kMemcpyDeviceToDevice: - return DefaultRuntime::kMemcpyDeviceToDevice; + return DeviceRuntime::kMemcpyDeviceToDevice; }} assert(false && "unsupported memcpy kind"); - return DefaultRuntime::kMemcpyHostToHost; + return DeviceRuntime::kMemcpyHostToHost; }} }} // namespace -{dispatch_functions} +Error Runtime::SetDeviceType(Device::Type device_type) {{ + switch (device_type) {{ +{set_device_type_cases} + }} + + return InvalidValueError(); +}} + +Device::Type Runtime::GetDeviceType() {{ + return device_type_; +}} + +{member_dispatch_functions} +{public_dispatch_functions} }} // namespace infini::rt """ ) @@ -413,7 +536,7 @@ def main(): _write_wrapper(include_root, wrapper_device, header_name, target) _write_generated_header(include_root, devices) - _write_runtime_dispatch(pathlib.Path(args.source_output)) + _write_runtime_dispatch(pathlib.Path(args.source_output), devices) if __name__ == "__main__": diff --git a/src/runtime.h b/src/runtime.h index 43ba9da..104e160 100644 --- a/src/runtime.h +++ b/src/runtime.h @@ -8,7 +8,7 @@ namespace infini::rt { -template +template struct Runtime; /// ## Interface enforcement via CRTP. diff --git a/src/tensor_view.h b/src/tensor_view.h index 0b6fc59..90ed0df 100644 --- a/src/tensor_view.h +++ b/src/tensor_view.h @@ -3,6 +3,8 @@ #include #include +#include +#include #include #include "data_type.h" @@ -11,6 +13,22 @@ namespace infini::rt { +namespace tensor_view_detail { + +template +struct IsTensorLike : std::false_type {}; + +template +struct IsTensorLike< + T, std::void_t().data()), + decltype(std::declval().shape()), + decltype(std::declval().dtype()), + decltype(std::declval().device()), + decltype(std::declval().strides())>> + : std::true_type {}; + +} // namespace tensor_view_detail + class TensorView { public: using Size = std::size_t; @@ -23,7 +41,9 @@ class TensorView { using Strides = std::vector; - template + template ::value>> TensorView(const TensorLike& tensor) : data_{const_cast(static_cast(tensor.data()))}, shape_{tensor.shape()}, diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 2bb0812..1aeca74 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -7,6 +7,18 @@ endfunction() add_infini_rt_test(test_smoke test_smoke.cc) add_infini_rt_test(test_core test_core.cc) +if(WITH_CPU OR WITH_NVIDIA) + add_infini_rt_test(test_default_runtime test_default_runtime.cc) + if(WITH_CPU) + target_compile_definitions(test_default_runtime + PRIVATE INFINI_RT_TEST_WITH_CPU=1) + endif() + if(WITH_NVIDIA) + target_compile_definitions(test_default_runtime + PRIVATE INFINI_RT_TEST_WITH_NVIDIA=1) + endif() +endif() + if(WITH_CPU) add_infini_rt_test(test_cpu_runtime test_cpu_runtime.cc) endif() diff --git a/tests/install_consumer_smoke.cc b/tests/install_consumer_smoke.cc index 9414e71..b4e1ce6 100644 --- a/tests/install_consumer_smoke.cc +++ b/tests/install_consumer_smoke.cc @@ -21,6 +21,20 @@ int main() { #if defined(INFINI_RT_CONSUMER_BACKEND_CPU) || \ defined(INFINI_RT_CONSUMER_BACKEND_NVIDIA) + using DefaultRuntime = infini::rt::Runtime<>; +#if defined(INFINI_RT_CONSUMER_BACKEND_CPU) + constexpr auto kExpectedDeviceType = infini::rt::Device::Type::kCpu; +#else + constexpr auto kExpectedDeviceType = infini::rt::Device::Type::kNvidia; +#endif + if (DefaultRuntime::SetDeviceType(kExpectedDeviceType) != + infini::rt::kSuccess) { + return 1; + } + if (DefaultRuntime::GetDeviceType() != kExpectedDeviceType) { + return 1; + } + std::array input{1, 2, 3, 4}; std::array output{}; void* ptr = nullptr; diff --git a/tests/test_core.cc b/tests/test_core.cc index e8b8071..1c57c04 100644 --- a/tests/test_core.cc +++ b/tests/test_core.cc @@ -2,6 +2,7 @@ #include #include +#include #include #include "test_helper.h" @@ -12,6 +13,9 @@ using infini::rt::DataType; using infini::rt::Device; using infini::rt::TensorView; +static_assert(!std::is_constructible_v>, + "TensorView should not treat tensor containers as tensor-like."); + void TestDevice(infini::rt::test::TestContext* context) { const Device cpu{Device::Type::kCpu}; const Device nvidia{Device::Type::kNvidia, 1}; diff --git a/tests/test_default_runtime.cc b/tests/test_default_runtime.cc new file mode 100644 index 0000000..f2c6978 --- /dev/null +++ b/tests/test_default_runtime.cc @@ -0,0 +1,126 @@ +#include + +#include +#include +#include + +#include "test_helper.h" + +namespace { + +using DefaultRuntime = infini::rt::Runtime<>; + +void ExpectSuccess(infini::rt::test::TestContext* context, + infini::rt::Error status, const char* message) { + context->Expect(status == infini::rt::kSuccess, message); +} + +void TestInvalidDeviceType(infini::rt::test::TestContext* context) { + const auto before = DefaultRuntime::GetDeviceType(); + + context->Expect(DefaultRuntime::SetDeviceType(infini::rt::Device::Type::kQy) != + infini::rt::kSuccess, + "Default runtime should reject disabled device types."); + context->Expect(DefaultRuntime::GetDeviceType() == before, + "Rejected device type should not change dispatch target."); +} + +#if defined(INFINI_RT_TEST_WITH_CPU) +void TestCpuDispatch(infini::rt::test::TestContext* context) { + ExpectSuccess(context, + DefaultRuntime::SetDeviceType(infini::rt::Device::Type::kCpu), + "Default runtime should select CPU dispatch."); + context->Expect(DefaultRuntime::GetDeviceType() == + infini::rt::Device::Type::kCpu, + "Default runtime should report CPU dispatch."); + + std::array input{1, 2, 3, 4}; + std::array output{}; + void* ptr = nullptr; + + ExpectSuccess(context, infini::rt::SetDevice(0), + "CPU dispatch should set device 0."); + ExpectSuccess(context, infini::rt::Malloc(&ptr, input.size()), + "CPU dispatch should allocate memory."); + if (ptr == nullptr) { + return; + } + + ExpectSuccess(context, + infini::rt::Memcpy(ptr, input.data(), input.size(), + infini::rt::MemcpyKind::kMemcpyHostToDevice), + "CPU dispatch should copy host data to runtime memory."); + context->Expect( + infini::rt::MemcpyAsync(ptr, input.data(), input.size(), + infini::rt::MemcpyKind::kMemcpyHostToDevice, + nullptr) != infini::rt::kSuccess, + "CPU dispatch should not report async memcpy success."); + ExpectSuccess(context, + infini::rt::Memcpy(output.data(), ptr, output.size(), + infini::rt::MemcpyKind::kMemcpyDeviceToHost), + "CPU dispatch should copy runtime memory to host."); + ExpectSuccess(context, infini::rt::Free(ptr), + "CPU dispatch should free memory."); + + context->ExpectEqual(output, input, + "CPU dispatch should preserve copied bytes."); +} +#endif + +#if defined(INFINI_RT_TEST_WITH_NVIDIA) +void TestNvidiaDispatch(infini::rt::test::TestContext* context) { + ExpectSuccess( + context, DefaultRuntime::SetDeviceType(infini::rt::Device::Type::kNvidia), + "Default runtime should select NVIDIA dispatch."); + context->Expect(DefaultRuntime::GetDeviceType() == + infini::rt::Device::Type::kNvidia, + "Default runtime should report NVIDIA dispatch."); + + std::array input{5, 6, 7, 8}; + std::array output{}; + void* ptr = nullptr; + + ExpectSuccess(context, infini::rt::SetDevice(0), + "NVIDIA dispatch should set device 0."); + ExpectSuccess(context, infini::rt::Malloc(&ptr, input.size()), + "NVIDIA dispatch should allocate memory."); + if (ptr == nullptr) { + return; + } + + ExpectSuccess(context, + infini::rt::MemcpyAsync( + ptr, input.data(), input.size(), + infini::rt::MemcpyKind::kMemcpyHostToDevice, nullptr), + "NVIDIA dispatch should support async host-to-device copy."); + ExpectSuccess(context, infini::rt::DeviceSynchronize(), + "NVIDIA dispatch should synchronize the device."); + ExpectSuccess(context, + infini::rt::Memcpy(output.data(), ptr, output.size(), + infini::rt::MemcpyKind::kMemcpyDeviceToHost), + "NVIDIA dispatch should copy device data to host."); + ExpectSuccess(context, infini::rt::Free(ptr), + "NVIDIA dispatch should free memory."); + + context->ExpectEqual(output, input, + "NVIDIA dispatch should preserve copied bytes."); +} +#endif + +} // namespace + +int main() { + infini::rt::test::TestContext context; + + TestInvalidDeviceType(&context); + +#if defined(INFINI_RT_TEST_WITH_CPU) + TestCpuDispatch(&context); +#endif + +#if defined(INFINI_RT_TEST_WITH_NVIDIA) + TestNvidiaDispatch(&context); +#endif + + return context.ExitCode(); +} From 50fca829288405dde5ee1567e9a73947ab6a4e25 Mon Sep 17 00:00:00 2001 From: Jiacheng Huang Date: Wed, 1 Jul 2026 23:49:56 +0800 Subject: [PATCH 03/15] Refactor runtime dispatch namespace --- scripts/generate_public_headers.py | 145 ++++++++-------------------- src/native/ascend/runtime_.h | 4 +- src/native/cambricon/runtime_.h | 4 +- src/native/cpu/runtime_.h | 4 +- src/native/cuda/hygon/runtime_.h | 4 +- src/native/cuda/iluvatar/runtime_.h | 4 +- src/native/cuda/metax/runtime_.h | 4 +- src/native/cuda/moore/runtime_.h | 4 +- src/native/cuda/nvidia/runtime_.h | 4 +- src/native/cuda/runtime_.h | 4 +- src/runtime.h | 11 ++- src/tensor_view.h | 12 +-- tests/install_consumer_smoke.cc | 45 ++++----- tests/test_cpu_runtime.cc | 12 +-- tests/test_default_runtime.cc | 83 +++++++--------- tests/test_nvidia_runtime.cc | 3 +- 16 files changed, 132 insertions(+), 215 deletions(-) diff --git a/scripts/generate_public_headers.py b/scripts/generate_public_headers.py index 755d098..957d0d8 100644 --- a/scripts/generate_public_headers.py +++ b/scripts/generate_public_headers.py @@ -183,6 +183,12 @@ def _write_generated_header(include_root, devices): {chr(10).join(includes)} namespace infini::rt {{ + +void set_runtime_device_type(Device::Type device_type); + +Device::Type runtime_device_type(); + +namespace runtime {{ namespace generated_detail {{ using DefaultErrorRuntime = Runtime<{default_device_type}>; @@ -208,60 +214,6 @@ def _write_generated_header(include_root, devices): static_cast(generated_detail::DefaultErrorRuntime::kMemcpyDeviceToDevice), }}; -template <> -struct Runtime - : RuntimeBase> {{ - using Error = infini::rt::Error; - - using Stream = infini::rt::Stream; - - static constexpr Device::Type kDeviceType = Device::Type::kCount; - - static constexpr Error kSuccess = infini::rt::kSuccess; - - static constexpr MemcpyKind kMemcpyHostToHost = - MemcpyKind::kMemcpyHostToHost; - - static constexpr MemcpyKind kMemcpyHostToDevice = - MemcpyKind::kMemcpyHostToDevice; - - static constexpr MemcpyKind kMemcpyDeviceToHost = - MemcpyKind::kMemcpyDeviceToHost; - - static constexpr MemcpyKind kMemcpyDeviceToDevice = - MemcpyKind::kMemcpyDeviceToDevice; - - static Error SetDeviceType(Device::Type device_type); - - static Device::Type GetDeviceType(); - - static Error SetDevice(int device); - - static Error GetDevice(int* device); - - static Error GetDeviceCount(int* count); - - static Error DeviceSynchronize(); - - static Error Malloc(void** ptr, std::size_t size); - - static Error Free(void* ptr); - - static Error Memset(void* ptr, int value, std::size_t count); - - static Error Memcpy(void* dst, const void* src, std::size_t count, - MemcpyKind kind); - - static Error MemcpyAsync(void* dst, const void* src, std::size_t count, - MemcpyKind kind, Stream stream); - - private: - inline static thread_local Device::Type device_type_ = - generated_detail::kDefaultDeviceType; -}}; - -static_assert(Runtime::Validate()); - Error SetDevice(int device); Error GetDevice(int* device); @@ -281,6 +233,7 @@ def _write_generated_header(include_root, devices): Error MemcpyAsync(void* dst, const void* src, std::size_t count, MemcpyKind kind, Stream stream); +}} // namespace runtime }} // namespace infini::rt #endif @@ -365,8 +318,7 @@ def _runtime_arg(param, device): return f"RuntimeMemcpyKind<{device_type}>({param.name})" if param.type == "Stream": return ( - f"reinterpret_cast::Stream>" - f"({param.name})" + f"reinterpret_cast::Stream>({param.name})" ) return param.name @@ -387,17 +339,6 @@ def _runtime_call(function, device): return f"Runtime<{device_type}>::{function.name}()" -def _call_args(function): - return ", ".join(param.name for param in function.params) - - -def _member_signature(function): - return ( - f"{function.return_type} Runtime::" - f"{function.name}({function.params_decl()})" - ) - - def _dispatch_cases(devices, function): return "\n".join( f""" case {_DEVICE_TYPES[device]}: @@ -406,43 +347,28 @@ def _dispatch_cases(devices, function): ) -def _write_member_dispatch_function(function, devices): - return f"""{_member_signature(function)} {{ - switch (device_type_) {{ +def _write_runtime_dispatch_function(function, devices): + return f"""{function.signature()} {{ + switch (infini::rt::runtime_device_type()) {{ {_dispatch_cases(devices, function)} }} + assert(false && "unsupported runtime device type"); return InvalidValueError(); }} """ -def _write_public_dispatch_function(function): - args = _call_args(function) - if args: - call = f"Runtime::{function.name}({args})" - else: - call = f"Runtime::{function.name}()" - - return f"""{function.signature()} {{ - return {call}; -}} -""" - - def _write_runtime_dispatch(source_path, devices): functions = _PUBLIC_RUNTIME_FUNCTIONS - member_dispatch_functions = "\n".join( - _write_member_dispatch_function(function, devices=devices) + dispatch_functions = "\n".join( + _write_runtime_dispatch_function(function, devices=devices) for function in functions ) - public_dispatch_functions = "\n".join( - _write_public_dispatch_function(function) for function in functions - ) set_device_type_cases = "\n".join( f""" case {_DEVICE_TYPES[device]}: - device_type_ = device_type; - return kSuccess;""" + runtime_device_type_ = device_type; + return;""" for device in devices ) @@ -458,6 +384,28 @@ def _write_runtime_dispatch(source_path, devices): namespace infini::rt {{ namespace {{ +thread_local Device::Type runtime_device_type_ = + runtime::generated_detail::kDefaultDeviceType; + +}} // namespace + +void set_runtime_device_type(Device::Type device_type) {{ + switch (device_type) {{ +{set_device_type_cases} + }} + + assert(false && "unsupported runtime device type"); +}} + +Device::Type runtime_device_type() {{ + return runtime_device_type_; +}} + +}} // namespace infini::rt + +namespace infini::rt::runtime {{ +namespace {{ + template Error CheckCall(Func&& func) {{ using ReturnType = decltype(std::forward(func)()); @@ -493,21 +441,8 @@ def _write_runtime_dispatch(source_path, devices): }} // namespace -Error Runtime::SetDeviceType(Device::Type device_type) {{ - switch (device_type) {{ -{set_device_type_cases} - }} - - return InvalidValueError(); -}} - -Device::Type Runtime::GetDeviceType() {{ - return device_type_; -}} - -{member_dispatch_functions} -{public_dispatch_functions} -}} // namespace infini::rt +{dispatch_functions} +}} // namespace infini::rt::runtime """ ) diff --git a/src/native/ascend/runtime_.h b/src/native/ascend/runtime_.h index 191beef..065a9d3 100644 --- a/src/native/ascend/runtime_.h +++ b/src/native/ascend/runtime_.h @@ -11,7 +11,7 @@ #include "native/ascend/device_.h" #include "runtime.h" -namespace infini::rt { +namespace infini::rt::runtime { template <> struct Runtime @@ -70,6 +70,6 @@ struct Runtime static_assert(Runtime::Validate()); -} // namespace infini::rt +} // namespace infini::rt::runtime #endif diff --git a/src/native/cambricon/runtime_.h b/src/native/cambricon/runtime_.h index d892df8..927e2c5 100644 --- a/src/native/cambricon/runtime_.h +++ b/src/native/cambricon/runtime_.h @@ -9,7 +9,7 @@ #include "native/cambricon/device_.h" #include "runtime.h" -namespace infini::rt { +namespace infini::rt::runtime { template <> struct Runtime @@ -64,6 +64,6 @@ struct Runtime static_assert(Runtime::Validate()); -} // namespace infini::rt +} // namespace infini::rt::runtime #endif diff --git a/src/native/cpu/runtime_.h b/src/native/cpu/runtime_.h index c2bcf4f..f4b18bf 100644 --- a/src/native/cpu/runtime_.h +++ b/src/native/cpu/runtime_.h @@ -6,7 +6,7 @@ #include "runtime.h" -namespace infini::rt { +namespace infini::rt::runtime { template <> struct Runtime : RuntimeBase> { @@ -108,6 +108,6 @@ struct Runtime : RuntimeBase> { static_assert(Runtime::Validate()); -} // namespace infini::rt +} // namespace infini::rt::runtime #endif diff --git a/src/native/cuda/hygon/runtime_.h b/src/native/cuda/hygon/runtime_.h index 520e8f4..52c47eb 100644 --- a/src/native/cuda/hygon/runtime_.h +++ b/src/native/cuda/hygon/runtime_.h @@ -10,7 +10,7 @@ #include "native/cuda/hygon/device_.h" #include "native/cuda/runtime_.h" -namespace infini::rt { +namespace infini::rt::runtime { template <> struct Runtime @@ -56,6 +56,6 @@ struct Runtime static_assert(Runtime::Validate()); -} // namespace infini::rt +} // namespace infini::rt::runtime #endif diff --git a/src/native/cuda/iluvatar/runtime_.h b/src/native/cuda/iluvatar/runtime_.h index 87558bf..f49db23 100644 --- a/src/native/cuda/iluvatar/runtime_.h +++ b/src/native/cuda/iluvatar/runtime_.h @@ -10,7 +10,7 @@ #include "native/cuda/iluvatar/device_.h" #include "native/cuda/runtime_.h" -namespace infini::rt { +namespace infini::rt::runtime { template <> struct Runtime @@ -54,6 +54,6 @@ struct Runtime static_assert(Runtime::Validate()); -} // namespace infini::rt +} // namespace infini::rt::runtime #endif diff --git a/src/native/cuda/metax/runtime_.h b/src/native/cuda/metax/runtime_.h index 14ed18b..c1f19c0 100644 --- a/src/native/cuda/metax/runtime_.h +++ b/src/native/cuda/metax/runtime_.h @@ -8,7 +8,7 @@ #include "native/cuda/metax/device_.h" #include "native/cuda/runtime_.h" -namespace infini::rt { +namespace infini::rt::runtime { template <> struct Runtime @@ -58,6 +58,6 @@ struct Runtime static_assert(Runtime::Validate()); -} // namespace infini::rt +} // namespace infini::rt::runtime #endif diff --git a/src/native/cuda/moore/runtime_.h b/src/native/cuda/moore/runtime_.h index fd0a215..5beffcf 100644 --- a/src/native/cuda/moore/runtime_.h +++ b/src/native/cuda/moore/runtime_.h @@ -8,7 +8,7 @@ #include "native/cuda/moore/device_.h" #include "native/cuda/runtime_.h" -namespace infini::rt { +namespace infini::rt::runtime { template <> struct Runtime @@ -64,6 +64,6 @@ struct Runtime static_assert(Runtime::Validate()); -} // namespace infini::rt +} // namespace infini::rt::runtime #endif diff --git a/src/native/cuda/nvidia/runtime_.h b/src/native/cuda/nvidia/runtime_.h index c910198..1786e08 100644 --- a/src/native/cuda/nvidia/runtime_.h +++ b/src/native/cuda/nvidia/runtime_.h @@ -10,7 +10,7 @@ #include "native/cuda/nvidia/device_.h" #include "native/cuda/runtime_.h" -namespace infini::rt { +namespace infini::rt::runtime { template <> struct Runtime @@ -54,6 +54,6 @@ struct Runtime static_assert(Runtime::Validate()); -} // namespace infini::rt +} // namespace infini::rt::runtime #endif diff --git a/src/native/cuda/runtime_.h b/src/native/cuda/runtime_.h index 7d1899d..59dd6bc 100644 --- a/src/native/cuda/runtime_.h +++ b/src/native/cuda/runtime_.h @@ -5,7 +5,7 @@ #include "runtime.h" -namespace infini::rt { +namespace infini::rt::runtime { /// ## CUDA-like runtime interface enforcement via CRTP. /// @@ -30,6 +30,6 @@ struct CudaRuntime : DeviceRuntime { } }; -} // namespace infini::rt +} // namespace infini::rt::runtime #endif diff --git a/src/runtime.h b/src/runtime.h index 104e160..c3e7640 100644 --- a/src/runtime.h +++ b/src/runtime.h @@ -6,16 +6,17 @@ #include "device.h" -namespace infini::rt { +namespace infini::rt::runtime { -template +template struct Runtime; /// ## Interface enforcement via CRTP. /// /// Inherit from the appropriate base to declare which interface level a -/// `Runtime` specialization implements. After the struct is fully defined, call -/// `static_assert(Runtime<...>::Validate())`. The chained `Validate()` checks +/// `runtime::Runtime` specialization implements. After the struct is fully +/// defined, call `static_assert(Runtime<...>::Validate())`. The chained +/// `Validate()` checks /// every required member's existence and signature at compile time, analogous /// to how `override` catches signature mismatches for virtual functions. /// @@ -56,6 +57,6 @@ struct DeviceRuntime : RuntimeBase { } }; -} // namespace infini::rt +} // namespace infini::rt::runtime #endif diff --git a/src/tensor_view.h b/src/tensor_view.h index 90ed0df..dcf7cc9 100644 --- a/src/tensor_view.h +++ b/src/tensor_view.h @@ -19,12 +19,12 @@ template struct IsTensorLike : std::false_type {}; template -struct IsTensorLike< - T, std::void_t().data()), - decltype(std::declval().shape()), - decltype(std::declval().dtype()), - decltype(std::declval().device()), - decltype(std::declval().strides())>> +struct IsTensorLike().data()), + decltype(std::declval().shape()), + decltype(std::declval().dtype()), + decltype(std::declval().device()), + decltype(std::declval().strides())>> : std::true_type {}; } // namespace tensor_view_detail diff --git a/tests/install_consumer_smoke.cc b/tests/install_consumer_smoke.cc index b4e1ce6..14e6c0d 100644 --- a/tests/install_consumer_smoke.cc +++ b/tests/install_consumer_smoke.cc @@ -21,76 +21,73 @@ int main() { #if defined(INFINI_RT_CONSUMER_BACKEND_CPU) || \ defined(INFINI_RT_CONSUMER_BACKEND_NVIDIA) - using DefaultRuntime = infini::rt::Runtime<>; + namespace runtime = infini::rt::runtime; #if defined(INFINI_RT_CONSUMER_BACKEND_CPU) constexpr auto kExpectedDeviceType = infini::rt::Device::Type::kCpu; #else constexpr auto kExpectedDeviceType = infini::rt::Device::Type::kNvidia; #endif - if (DefaultRuntime::SetDeviceType(kExpectedDeviceType) != - infini::rt::kSuccess) { - return 1; - } - if (DefaultRuntime::GetDeviceType() != kExpectedDeviceType) { + infini::rt::set_runtime_device_type(kExpectedDeviceType); + if (infini::rt::runtime_device_type() != kExpectedDeviceType) { return 1; } std::array input{1, 2, 3, 4}; std::array output{}; void* ptr = nullptr; - if (infini::rt::SetDevice(0) != infini::rt::kSuccess) { + if (runtime::SetDevice(0) != runtime::kSuccess) { return 1; } int current_device = -1; - if (infini::rt::GetDevice(¤t_device) != infini::rt::kSuccess) { + if (runtime::GetDevice(¤t_device) != runtime::kSuccess) { return 1; } if (current_device != 0) { return 1; } int device_count = 0; - if (infini::rt::GetDeviceCount(&device_count) != infini::rt::kSuccess) { + if (runtime::GetDeviceCount(&device_count) != runtime::kSuccess) { return 1; } if (device_count <= 0) { return 1; } - if (infini::rt::Malloc(&ptr, input.size()) != infini::rt::kSuccess) { + if (runtime::Malloc(&ptr, input.size()) != runtime::kSuccess) { return 1; } if (ptr == nullptr) { return 1; } - if (infini::rt::Memcpy(ptr, input.data(), input.size(), - infini::rt::MemcpyKind::kMemcpyHostToDevice) != - infini::rt::kSuccess) { + if (runtime::Memcpy(ptr, input.data(), input.size(), + runtime::MemcpyKind::kMemcpyHostToDevice) != + runtime::kSuccess) { return 1; } #if defined(INFINI_RT_CONSUMER_BACKEND_CPU) - if (infini::rt::MemcpyAsync(ptr, input.data(), input.size(), - infini::rt::MemcpyKind::kMemcpyHostToDevice, - nullptr) == infini::rt::kSuccess) { + if (runtime::MemcpyAsync(ptr, input.data(), input.size(), + runtime::MemcpyKind::kMemcpyHostToDevice, + nullptr) == runtime::kSuccess) { return 1; } #else - if (infini::rt::MemcpyAsync(ptr, input.data(), input.size(), - infini::rt::MemcpyKind::kMemcpyHostToDevice, - nullptr) != infini::rt::kSuccess) { + if (runtime::MemcpyAsync(ptr, input.data(), input.size(), + runtime::MemcpyKind::kMemcpyHostToDevice, + nullptr) != runtime::kSuccess) { return 1; } #endif - if (infini::rt::DeviceSynchronize() != infini::rt::kSuccess) { + if (runtime::DeviceSynchronize() != runtime::kSuccess) { return 1; } - if (infini::rt::Memcpy(output.data(), ptr, output.size(), - infini::rt::MemcpyKind::kMemcpyDeviceToHost) != - infini::rt::kSuccess) { + if (runtime::Memcpy(output.data(), ptr, output.size(), + runtime::MemcpyKind::kMemcpyDeviceToHost) != + runtime::kSuccess) { return 1; } if (output != input) { return 1; } - if (infini::rt::Free(ptr) != infini::rt::kSuccess) { + if (runtime::Free(ptr) != runtime::kSuccess) { return 1; } #endif diff --git a/tests/test_cpu_runtime.cc b/tests/test_cpu_runtime.cc index d307f3c..e066c85 100644 --- a/tests/test_cpu_runtime.cc +++ b/tests/test_cpu_runtime.cc @@ -9,7 +9,7 @@ namespace { -using CpuRuntime = infini::rt::Runtime; +using CpuRuntime = infini::rt::runtime::Runtime; void TestMallocAndFree(infini::rt::test::TestContext* context) { void* ptr = nullptr; @@ -45,11 +45,11 @@ void TestMemcpyAsyncUnsupported(infini::rt::test::TestContext* context) { std::array input{1}; std::array output{}; - context->Expect(CpuRuntime::MemcpyAsync(output.data(), input.data(), - input.size(), - CpuRuntime::kMemcpyHostToHost, - nullptr) != CpuRuntime::kSuccess, - "CPU runtime should not report async memcpy success."); + context->Expect( + CpuRuntime::MemcpyAsync(output.data(), input.data(), input.size(), + CpuRuntime::kMemcpyHostToHost, + nullptr) != CpuRuntime::kSuccess, + "CPU runtime should not report async memcpy success."); } void TestMemset(infini::rt::test::TestContext* context) { diff --git a/tests/test_default_runtime.cc b/tests/test_default_runtime.cc index f2c6978..bf76a4b 100644 --- a/tests/test_default_runtime.cc +++ b/tests/test_default_runtime.cc @@ -8,58 +8,45 @@ namespace { -using DefaultRuntime = infini::rt::Runtime<>; +namespace runtime = infini::rt::runtime; void ExpectSuccess(infini::rt::test::TestContext* context, - infini::rt::Error status, const char* message) { - context->Expect(status == infini::rt::kSuccess, message); -} - -void TestInvalidDeviceType(infini::rt::test::TestContext* context) { - const auto before = DefaultRuntime::GetDeviceType(); - - context->Expect(DefaultRuntime::SetDeviceType(infini::rt::Device::Type::kQy) != - infini::rt::kSuccess, - "Default runtime should reject disabled device types."); - context->Expect(DefaultRuntime::GetDeviceType() == before, - "Rejected device type should not change dispatch target."); + runtime::Error status, const char* message) { + context->Expect(status == runtime::kSuccess, message); } #if defined(INFINI_RT_TEST_WITH_CPU) void TestCpuDispatch(infini::rt::test::TestContext* context) { - ExpectSuccess(context, - DefaultRuntime::SetDeviceType(infini::rt::Device::Type::kCpu), - "Default runtime should select CPU dispatch."); - context->Expect(DefaultRuntime::GetDeviceType() == - infini::rt::Device::Type::kCpu, - "Default runtime should report CPU dispatch."); + infini::rt::set_runtime_device_type(infini::rt::Device::Type::kCpu); + context->Expect( + infini::rt::runtime_device_type() == infini::rt::Device::Type::kCpu, + "Default runtime should report CPU dispatch."); std::array input{1, 2, 3, 4}; std::array output{}; void* ptr = nullptr; - ExpectSuccess(context, infini::rt::SetDevice(0), + ExpectSuccess(context, runtime::SetDevice(0), "CPU dispatch should set device 0."); - ExpectSuccess(context, infini::rt::Malloc(&ptr, input.size()), + ExpectSuccess(context, runtime::Malloc(&ptr, input.size()), "CPU dispatch should allocate memory."); if (ptr == nullptr) { return; } ExpectSuccess(context, - infini::rt::Memcpy(ptr, input.data(), input.size(), - infini::rt::MemcpyKind::kMemcpyHostToDevice), + runtime::Memcpy(ptr, input.data(), input.size(), + runtime::MemcpyKind::kMemcpyHostToDevice), "CPU dispatch should copy host data to runtime memory."); - context->Expect( - infini::rt::MemcpyAsync(ptr, input.data(), input.size(), - infini::rt::MemcpyKind::kMemcpyHostToDevice, - nullptr) != infini::rt::kSuccess, - "CPU dispatch should not report async memcpy success."); + context->Expect(runtime::MemcpyAsync(ptr, input.data(), input.size(), + runtime::MemcpyKind::kMemcpyHostToDevice, + nullptr) != runtime::kSuccess, + "CPU dispatch should not report async memcpy success."); ExpectSuccess(context, - infini::rt::Memcpy(output.data(), ptr, output.size(), - infini::rt::MemcpyKind::kMemcpyDeviceToHost), + runtime::Memcpy(output.data(), ptr, output.size(), + runtime::MemcpyKind::kMemcpyDeviceToHost), "CPU dispatch should copy runtime memory to host."); - ExpectSuccess(context, infini::rt::Free(ptr), + ExpectSuccess(context, runtime::Free(ptr), "CPU dispatch should free memory."); context->ExpectEqual(output, input, @@ -69,37 +56,35 @@ void TestCpuDispatch(infini::rt::test::TestContext* context) { #if defined(INFINI_RT_TEST_WITH_NVIDIA) void TestNvidiaDispatch(infini::rt::test::TestContext* context) { - ExpectSuccess( - context, DefaultRuntime::SetDeviceType(infini::rt::Device::Type::kNvidia), - "Default runtime should select NVIDIA dispatch."); - context->Expect(DefaultRuntime::GetDeviceType() == - infini::rt::Device::Type::kNvidia, - "Default runtime should report NVIDIA dispatch."); + infini::rt::set_runtime_device_type(infini::rt::Device::Type::kNvidia); + context->Expect( + infini::rt::runtime_device_type() == infini::rt::Device::Type::kNvidia, + "Default runtime should report NVIDIA dispatch."); std::array input{5, 6, 7, 8}; std::array output{}; void* ptr = nullptr; - ExpectSuccess(context, infini::rt::SetDevice(0), + ExpectSuccess(context, runtime::SetDevice(0), "NVIDIA dispatch should set device 0."); - ExpectSuccess(context, infini::rt::Malloc(&ptr, input.size()), + ExpectSuccess(context, runtime::Malloc(&ptr, input.size()), "NVIDIA dispatch should allocate memory."); if (ptr == nullptr) { return; } - ExpectSuccess(context, - infini::rt::MemcpyAsync( - ptr, input.data(), input.size(), - infini::rt::MemcpyKind::kMemcpyHostToDevice, nullptr), - "NVIDIA dispatch should support async host-to-device copy."); - ExpectSuccess(context, infini::rt::DeviceSynchronize(), + ExpectSuccess( + context, + runtime::MemcpyAsync(ptr, input.data(), input.size(), + runtime::MemcpyKind::kMemcpyHostToDevice, nullptr), + "NVIDIA dispatch should support async host-to-device copy."); + ExpectSuccess(context, runtime::DeviceSynchronize(), "NVIDIA dispatch should synchronize the device."); ExpectSuccess(context, - infini::rt::Memcpy(output.data(), ptr, output.size(), - infini::rt::MemcpyKind::kMemcpyDeviceToHost), + runtime::Memcpy(output.data(), ptr, output.size(), + runtime::MemcpyKind::kMemcpyDeviceToHost), "NVIDIA dispatch should copy device data to host."); - ExpectSuccess(context, infini::rt::Free(ptr), + ExpectSuccess(context, runtime::Free(ptr), "NVIDIA dispatch should free memory."); context->ExpectEqual(output, input, @@ -112,8 +97,6 @@ void TestNvidiaDispatch(infini::rt::test::TestContext* context) { int main() { infini::rt::test::TestContext context; - TestInvalidDeviceType(&context); - #if defined(INFINI_RT_TEST_WITH_CPU) TestCpuDispatch(&context); #endif diff --git a/tests/test_nvidia_runtime.cc b/tests/test_nvidia_runtime.cc index df5a038..fa10858 100644 --- a/tests/test_nvidia_runtime.cc +++ b/tests/test_nvidia_runtime.cc @@ -9,7 +9,8 @@ namespace { -using NvidiaRuntime = infini::rt::Runtime; +using NvidiaRuntime = + infini::rt::runtime::Runtime; void ExpectCudaSuccess(infini::rt::test::TestContext* context, cudaError_t status, const char* message) { From a26ddffad65819bc0fd2f7aa38bb8cf8a2c8667d Mon Sep 17 00:00:00 2001 From: Jiacheng Huang Date: Thu, 2 Jul 2026 09:22:10 +0800 Subject: [PATCH 04/15] Use Abseil status for runtime device API --- CMakeLists.txt | 22 ++++++ scripts/generate_public_headers.py | 24 +++--- src/CMakeLists.txt | 2 + src/status.h | 21 +++++ tests/compile_install_consumer.cmake | 111 ++++++++++++++++++++------- tests/install_consumer_smoke.cc | 8 +- tests/test_core.cc | 14 ++++ tests/test_default_runtime.cc | 42 +++++++++- 8 files changed, 203 insertions(+), 41 deletions(-) create mode 100644 src/status.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 38b8d72..525df0d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,6 +3,7 @@ project(InfiniRT LANGUAGES CXX) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_POSITION_INDEPENDENT_CODE ON) set(_DEFAULT_HYGON_DTK_ROOT "/opt/dtk") @@ -33,6 +34,27 @@ option(WITH_ASCEND "Enable Ascend backend" OFF) option(AUTO_DETECT_DEVICES "Automatically detect available devices" OFF) option(INFINI_RT_BUILD_TESTING "Build InfiniRT tests" OFF) +option(INFINI_RT_FETCH_ABSEIL "Fetch Abseil when no package is found" ON) + +find_package(absl CONFIG QUIET) +if(NOT absl_FOUND) + if(NOT INFINI_RT_FETCH_ABSEIL) + message(FATAL_ERROR "Abseil was not found. Set absl_DIR or enable INFINI_RT_FETCH_ABSEIL.") + endif() + + include(FetchContent) + set(ABSL_PROPAGATE_CXX_STD ON CACHE BOOL "" FORCE) + set(ABSL_ENABLE_INSTALL ON CACHE BOOL "" FORCE) + set(ABSL_BUILD_TESTING OFF CACHE BOOL "" FORCE) + FetchContent_Declare( + abseil + GIT_REPOSITORY https://github.com/abseil/abseil-cpp.git + GIT_TAG 20260526.0 + GIT_SHALLOW TRUE + GIT_PROGRESS TRUE + ) + FetchContent_MakeAvailable(abseil) +endif() if(AUTO_DETECT_DEVICES) message(STATUS "Auto-detecting available devices...") diff --git a/scripts/generate_public_headers.py b/scripts/generate_public_headers.py index 957d0d8..9910d94 100644 --- a/scripts/generate_public_headers.py +++ b/scripts/generate_public_headers.py @@ -103,7 +103,7 @@ def _rewrite_detail_include(match): _DETAIL_INCLUDE_PATTERN = re.compile( - r'#include "((?:common|native)/[^"]+|data_type\.h|device\.h|dispatcher\.h|hash\.h|runtime\.h|tensor_view\.h)"' + r'#include "((?:common|native)/[^"]+|data_type\.h|device\.h|dispatcher\.h|hash\.h|runtime\.h|status\.h|tensor_view\.h)"' ) @@ -137,6 +137,7 @@ def _write_detail_headers(include_root, source_root, devices): "dispatcher.h", "hash.h", "runtime.h", + "status.h", "tensor_view.h", } @@ -165,6 +166,7 @@ def _write_generated_header(include_root, devices): f"#include {_detail_include('device.h')}", f"#include {_detail_include('hash.h')}", f"#include {_detail_include('runtime.h')}", + f"#include {_detail_include('status.h')}", f"#include {_detail_include('tensor_view.h')}", ] @@ -184,9 +186,9 @@ def _write_generated_header(include_root, devices): namespace infini::rt {{ -void set_runtime_device_type(Device::Type device_type); +Status set_runtime_device_type(Device::Type device_type); -Device::Type runtime_device_type(); +StatusOr runtime_device_type(); namespace runtime {{ namespace generated_detail {{ @@ -349,11 +351,15 @@ def _dispatch_cases(devices, function): def _write_runtime_dispatch_function(function, devices): return f"""{function.signature()} {{ - switch (infini::rt::runtime_device_type()) {{ + auto device_type = infini::rt::runtime_device_type(); + if (!device_type.ok()) {{ + return InvalidValueError(); + }} + + switch (*device_type) {{ {_dispatch_cases(devices, function)} }} - assert(false && "unsupported runtime device type"); return InvalidValueError(); }} """ @@ -368,7 +374,7 @@ def _write_runtime_dispatch(source_path, devices): set_device_type_cases = "\n".join( f""" case {_DEVICE_TYPES[device]}: runtime_device_type_ = device_type; - return;""" + return OkStatus();""" for device in devices ) @@ -389,15 +395,15 @@ def _write_runtime_dispatch(source_path, devices): }} // namespace -void set_runtime_device_type(Device::Type device_type) {{ +Status set_runtime_device_type(Device::Type device_type) {{ switch (device_type) {{ {set_device_type_cases} }} - assert(false && "unsupported runtime device type"); + return InvalidArgumentError("unsupported runtime device type"); }} -Device::Type runtime_device_type() {{ +StatusOr runtime_device_type() {{ return runtime_device_type_; }} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index dd9c99f..8c0c856 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -7,6 +7,8 @@ target_sources(infinirt PRIVATE ${BASE_SRCS} ${PROJECT_SOURCE_DIR}/generated/src/runtime_dispatch.cc) +target_link_libraries(infinirt PUBLIC absl::status absl::statusor) + if(WITH_CPU) target_compile_definitions(infinirt PUBLIC WITH_CPU=1) diff --git a/src/status.h b/src/status.h new file mode 100644 index 0000000..3ca670a --- /dev/null +++ b/src/status.h @@ -0,0 +1,21 @@ +#ifndef INFINI_RT_STATUS_H_ +#define INFINI_RT_STATUS_H_ + +#include +#include + +namespace infini::rt { + +using Status = absl::Status; + +using StatusCode = absl::StatusCode; + +using absl::InvalidArgumentError; +using absl::OkStatus; + +template +using StatusOr = absl::StatusOr; + +} // namespace infini::rt + +#endif diff --git a/tests/compile_install_consumer.cmake b/tests/compile_install_consumer.cmake index 1bb96d1..9f4107b 100644 --- a/tests/compile_install_consumer.cmake +++ b/tests/compile_install_consumer.cmake @@ -13,51 +13,110 @@ if(NOT EXISTS "${INFINI_RT_LIBRARY_DIR}/libinfinirt.so") message(FATAL_ERROR "The installed InfiniRT library was not found.") endif() -set(INFINI_RT_EXTRA_LINK_ARGS "") -set(INFINI_RT_EXTRA_COMPILE_ARGS "") set(INFINI_RT_LD_LIBRARY_PATH "${INFINI_RT_LIBRARY_DIR}") +if(INFINI_RT_EXTRA_LIBRARY_PATHS) + string(REPLACE ":" ";" INFINI_RT_EXTRA_LIBRARY_DIRS + "${INFINI_RT_EXTRA_LIBRARY_PATHS}") + foreach(INFINI_RT_EXTRA_LIBRARY_DIR ${INFINI_RT_EXTRA_LIBRARY_DIRS}) + if(EXISTS "${INFINI_RT_EXTRA_LIBRARY_DIR}") + set(INFINI_RT_LD_LIBRARY_PATH + "${INFINI_RT_LD_LIBRARY_PATH}:${INFINI_RT_EXTRA_LIBRARY_DIR}") + endif() + endforeach() +endif() + +get_filename_component(INFINI_RT_CONSUMER_OUTPUT_DIR + "${INFINI_RT_CONSUMER_BINARY}" DIRECTORY) +get_filename_component(INFINI_RT_CONSUMER_OUTPUT_NAME + "${INFINI_RT_CONSUMER_BINARY}" NAME) +set(INFINI_RT_CONSUMER_PROJECT_DIR + "${INFINI_RT_CONSUMER_BINARY}.project") +set(INFINI_RT_CONSUMER_BUILD_DIR + "${INFINI_RT_CONSUMER_BINARY}.build") + +file(MAKE_DIRECTORY "${INFINI_RT_CONSUMER_PROJECT_DIR}") +file(WRITE "${INFINI_RT_CONSUMER_PROJECT_DIR}/CMakeLists.txt" [=[ +cmake_minimum_required(VERSION 3.18) +project(InfiniRTInstallConsumer LANGUAGES CXX) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +find_package(absl CONFIG REQUIRED) + +add_library(infinirt SHARED IMPORTED GLOBAL) +set_target_properties(infinirt PROPERTIES + IMPORTED_LOCATION "${INFINI_RT_LIBRARY_DIR}/libinfinirt.so") + +add_executable(install_consumer "${INFINI_RT_CONSUMER_SOURCE}") +target_include_directories(install_consumer PRIVATE "${INFINI_RT_INCLUDE_DIR}") +target_compile_options(install_consumer PRIVATE -Werror) +target_link_libraries(install_consumer + PRIVATE + infinirt + absl::status + absl::statusor) +set_target_properties(install_consumer PROPERTIES + OUTPUT_NAME "${INFINI_RT_CONSUMER_OUTPUT_NAME}" + RUNTIME_OUTPUT_DIRECTORY "${INFINI_RT_CONSUMER_OUTPUT_DIR}" + BUILD_RPATH "${INFINI_RT_LIBRARY_DIR}") + +if(INFINI_RT_CONSUMER_BACKEND AND + NOT INFINI_RT_CONSUMER_BACKEND STREQUAL "NONE") + target_compile_definitions(install_consumer + PRIVATE INFINI_RT_CONSUMER_BACKEND_${INFINI_RT_CONSUMER_BACKEND}=1) +endif() + if(INFINI_RT_EXTRA_INCLUDE_PATHS) string(REPLACE ":" ";" INFINI_RT_EXTRA_INCLUDE_DIRS "${INFINI_RT_EXTRA_INCLUDE_PATHS}") - foreach(INFINI_RT_EXTRA_INCLUDE_DIR ${INFINI_RT_EXTRA_INCLUDE_DIRS}) + foreach(INFINI_RT_EXTRA_INCLUDE_DIR IN LISTS INFINI_RT_EXTRA_INCLUDE_DIRS) if(EXISTS "${INFINI_RT_EXTRA_INCLUDE_DIR}") - list(APPEND INFINI_RT_EXTRA_COMPILE_ARGS - "-I${INFINI_RT_EXTRA_INCLUDE_DIR}") + target_include_directories(install_consumer + PRIVATE "${INFINI_RT_EXTRA_INCLUDE_DIR}") endif() endforeach() endif() -if(INFINI_RT_CONSUMER_BACKEND AND NOT INFINI_RT_CONSUMER_BACKEND STREQUAL "NONE") - list(APPEND INFINI_RT_EXTRA_COMPILE_ARGS - "-DINFINI_RT_CONSUMER_BACKEND_${INFINI_RT_CONSUMER_BACKEND}=1") -endif() - if(INFINI_RT_EXTRA_LIBRARY_PATHS) string(REPLACE ":" ";" INFINI_RT_EXTRA_LIBRARY_DIRS "${INFINI_RT_EXTRA_LIBRARY_PATHS}") - foreach(INFINI_RT_EXTRA_LIBRARY_DIR ${INFINI_RT_EXTRA_LIBRARY_DIRS}) + foreach(INFINI_RT_EXTRA_LIBRARY_DIR IN LISTS INFINI_RT_EXTRA_LIBRARY_DIRS) if(EXISTS "${INFINI_RT_EXTRA_LIBRARY_DIR}") - list(APPEND INFINI_RT_EXTRA_LINK_ARGS - "-Wl,-rpath-link,${INFINI_RT_EXTRA_LIBRARY_DIR}") - set(INFINI_RT_LD_LIBRARY_PATH - "${INFINI_RT_LD_LIBRARY_PATH}:${INFINI_RT_EXTRA_LIBRARY_DIR}") + target_link_options(install_consumer + PRIVATE "-Wl,-rpath-link,${INFINI_RT_EXTRA_LIBRARY_DIR}") endif() endforeach() endif() +]=]) + +execute_process( + COMMAND "${CMAKE_COMMAND}" + -S "${INFINI_RT_CONSUMER_PROJECT_DIR}" + -B "${INFINI_RT_CONSUMER_BUILD_DIR}" + "-DCMAKE_CXX_COMPILER=${INFINI_RT_CXX_COMPILER}" + "-DCMAKE_PREFIX_PATH=${INFINI_RT_INSTALL_PREFIX}" + "-DINFINI_RT_INCLUDE_DIR=${INFINI_RT_INCLUDE_DIR}" + "-DINFINI_RT_LIBRARY_DIR=${INFINI_RT_LIBRARY_DIR}" + "-DINFINI_RT_CONSUMER_SOURCE=${INFINI_RT_CONSUMER_SOURCE}" + "-DINFINI_RT_CONSUMER_OUTPUT_DIR=${INFINI_RT_CONSUMER_OUTPUT_DIR}" + "-DINFINI_RT_CONSUMER_OUTPUT_NAME=${INFINI_RT_CONSUMER_OUTPUT_NAME}" + "-DINFINI_RT_EXTRA_INCLUDE_PATHS=${INFINI_RT_EXTRA_INCLUDE_PATHS}" + "-DINFINI_RT_EXTRA_LIBRARY_PATHS=${INFINI_RT_EXTRA_LIBRARY_PATHS}" + "-DINFINI_RT_CONSUMER_BACKEND=${INFINI_RT_CONSUMER_BACKEND}" + RESULT_VARIABLE INFINI_RT_COMPILE_RESULT + OUTPUT_VARIABLE INFINI_RT_COMPILE_OUTPUT + ERROR_VARIABLE INFINI_RT_COMPILE_ERROR) + +if(NOT INFINI_RT_COMPILE_RESULT EQUAL 0) + message(FATAL_ERROR + "Configuring the install consumer failed.\n" + "${INFINI_RT_COMPILE_OUTPUT}\n${INFINI_RT_COMPILE_ERROR}") +endif() execute_process( - COMMAND "${INFINI_RT_CXX_COMPILER}" - -std=c++17 - -Werror - "-I${INFINI_RT_INCLUDE_DIR}" - ${INFINI_RT_EXTRA_COMPILE_ARGS} - "${INFINI_RT_CONSUMER_SOURCE}" - "-L${INFINI_RT_LIBRARY_DIR}" - -linfinirt - "-Wl,-rpath,${INFINI_RT_LIBRARY_DIR}" - ${INFINI_RT_EXTRA_LINK_ARGS} - -o "${INFINI_RT_CONSUMER_BINARY}" + COMMAND "${CMAKE_COMMAND}" --build "${INFINI_RT_CONSUMER_BUILD_DIR}" RESULT_VARIABLE INFINI_RT_COMPILE_RESULT OUTPUT_VARIABLE INFINI_RT_COMPILE_OUTPUT ERROR_VARIABLE INFINI_RT_COMPILE_ERROR) diff --git a/tests/install_consumer_smoke.cc b/tests/install_consumer_smoke.cc index 14e6c0d..697cff0 100644 --- a/tests/install_consumer_smoke.cc +++ b/tests/install_consumer_smoke.cc @@ -27,8 +27,12 @@ int main() { #else constexpr auto kExpectedDeviceType = infini::rt::Device::Type::kNvidia; #endif - infini::rt::set_runtime_device_type(kExpectedDeviceType); - if (infini::rt::runtime_device_type() != kExpectedDeviceType) { + if (!infini::rt::set_runtime_device_type(kExpectedDeviceType).ok()) { + return 1; + } + const auto runtime_device_type = infini::rt::runtime_device_type(); + if (!runtime_device_type.ok() || + *runtime_device_type != kExpectedDeviceType) { return 1; } diff --git a/tests/test_core.cc b/tests/test_core.cc index 1c57c04..68092e1 100644 --- a/tests/test_core.cc +++ b/tests/test_core.cc @@ -45,6 +45,19 @@ void TestDataType(infini::rt::test::TestContext* context) { DataType::kUInt16, "uint16 should parse by name."); } +void TestStatus(infini::rt::test::TestContext* context) { + const infini::rt::Status ok_status = infini::rt::OkStatus(); + context->Expect(ok_status.ok(), "OkStatus should report success."); + + const infini::rt::Status invalid = + infini::rt::InvalidArgumentError("invalid argument"); + context->Expect(!invalid.ok(), "InvalidArgumentError should report failure."); + + const infini::rt::StatusOr value{1}; + context->Expect(value.ok(), "StatusOr should hold a value."); + context->ExpectEqual(*value, 1, "StatusOr should expose its value."); +} + void TestTensorView(infini::rt::test::TestContext* context) { std::vector data{1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; TensorView tensor{data.data(), std::vector{2, 3}, @@ -89,6 +102,7 @@ int main() { TestDevice(&context); TestDataType(&context); + TestStatus(&context); TestTensorView(&context); return context.ExitCode(); diff --git a/tests/test_default_runtime.cc b/tests/test_default_runtime.cc index bf76a4b..26e62cc 100644 --- a/tests/test_default_runtime.cc +++ b/tests/test_default_runtime.cc @@ -15,11 +15,39 @@ void ExpectSuccess(infini::rt::test::TestContext* context, context->Expect(status == runtime::kSuccess, message); } +void ExpectStatusOk(infini::rt::test::TestContext* context, + const infini::rt::Status& status, const char* message) { + context->Expect(status.ok(), message); +} + +void TestInvalidDeviceType(infini::rt::test::TestContext* context) { + const auto before = infini::rt::runtime_device_type(); + context->Expect(before.ok(), + "Default runtime should report its initial device type."); + + const auto status = + infini::rt::set_runtime_device_type(infini::rt::Device::Type::kQy); + context->Expect(!status.ok(), + "Default runtime should reject disabled device types."); + + const auto after = infini::rt::runtime_device_type(); + context->Expect(after.ok(), + "Rejected device type should keep runtime state valid."); + if (before.ok() && after.ok()) { + context->Expect(*after == *before, + "Rejected device type should not change dispatch target."); + } +} + #if defined(INFINI_RT_TEST_WITH_CPU) void TestCpuDispatch(infini::rt::test::TestContext* context) { - infini::rt::set_runtime_device_type(infini::rt::Device::Type::kCpu); + ExpectStatusOk( + context, + infini::rt::set_runtime_device_type(infini::rt::Device::Type::kCpu), + "Default runtime should select CPU dispatch."); + const auto device_type = infini::rt::runtime_device_type(); context->Expect( - infini::rt::runtime_device_type() == infini::rt::Device::Type::kCpu, + device_type.ok() && *device_type == infini::rt::Device::Type::kCpu, "Default runtime should report CPU dispatch."); std::array input{1, 2, 3, 4}; @@ -56,9 +84,13 @@ void TestCpuDispatch(infini::rt::test::TestContext* context) { #if defined(INFINI_RT_TEST_WITH_NVIDIA) void TestNvidiaDispatch(infini::rt::test::TestContext* context) { - infini::rt::set_runtime_device_type(infini::rt::Device::Type::kNvidia); + ExpectStatusOk( + context, + infini::rt::set_runtime_device_type(infini::rt::Device::Type::kNvidia), + "Default runtime should select NVIDIA dispatch."); + const auto device_type = infini::rt::runtime_device_type(); context->Expect( - infini::rt::runtime_device_type() == infini::rt::Device::Type::kNvidia, + device_type.ok() && *device_type == infini::rt::Device::Type::kNvidia, "Default runtime should report NVIDIA dispatch."); std::array input{5, 6, 7, 8}; @@ -97,6 +129,8 @@ void TestNvidiaDispatch(infini::rt::test::TestContext* context) { int main() { infini::rt::test::TestContext context; + TestInvalidDeviceType(&context); + #if defined(INFINI_RT_TEST_WITH_CPU) TestCpuDispatch(&context); #endif From 1965f98db146772bcc747fed42e6af6b2b04efb4 Mon Sep 17 00:00:00 2001 From: Jiacheng Huang Date: Thu, 2 Jul 2026 09:29:35 +0800 Subject: [PATCH 05/15] Revert "Use Abseil status for runtime device API" This reverts commit a26ddffad65819bc0fd2f7aa38bb8cf8a2c8667d. --- CMakeLists.txt | 22 ------ scripts/generate_public_headers.py | 24 +++--- src/CMakeLists.txt | 2 - src/status.h | 21 ----- tests/compile_install_consumer.cmake | 111 +++++++-------------------- tests/install_consumer_smoke.cc | 8 +- tests/test_core.cc | 14 ---- tests/test_default_runtime.cc | 42 +--------- 8 files changed, 41 insertions(+), 203 deletions(-) delete mode 100644 src/status.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 525df0d..38b8d72 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,6 @@ project(InfiniRT LANGUAGES CXX) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_POSITION_INDEPENDENT_CODE ON) set(_DEFAULT_HYGON_DTK_ROOT "/opt/dtk") @@ -34,27 +33,6 @@ option(WITH_ASCEND "Enable Ascend backend" OFF) option(AUTO_DETECT_DEVICES "Automatically detect available devices" OFF) option(INFINI_RT_BUILD_TESTING "Build InfiniRT tests" OFF) -option(INFINI_RT_FETCH_ABSEIL "Fetch Abseil when no package is found" ON) - -find_package(absl CONFIG QUIET) -if(NOT absl_FOUND) - if(NOT INFINI_RT_FETCH_ABSEIL) - message(FATAL_ERROR "Abseil was not found. Set absl_DIR or enable INFINI_RT_FETCH_ABSEIL.") - endif() - - include(FetchContent) - set(ABSL_PROPAGATE_CXX_STD ON CACHE BOOL "" FORCE) - set(ABSL_ENABLE_INSTALL ON CACHE BOOL "" FORCE) - set(ABSL_BUILD_TESTING OFF CACHE BOOL "" FORCE) - FetchContent_Declare( - abseil - GIT_REPOSITORY https://github.com/abseil/abseil-cpp.git - GIT_TAG 20260526.0 - GIT_SHALLOW TRUE - GIT_PROGRESS TRUE - ) - FetchContent_MakeAvailable(abseil) -endif() if(AUTO_DETECT_DEVICES) message(STATUS "Auto-detecting available devices...") diff --git a/scripts/generate_public_headers.py b/scripts/generate_public_headers.py index 9910d94..957d0d8 100644 --- a/scripts/generate_public_headers.py +++ b/scripts/generate_public_headers.py @@ -103,7 +103,7 @@ def _rewrite_detail_include(match): _DETAIL_INCLUDE_PATTERN = re.compile( - r'#include "((?:common|native)/[^"]+|data_type\.h|device\.h|dispatcher\.h|hash\.h|runtime\.h|status\.h|tensor_view\.h)"' + r'#include "((?:common|native)/[^"]+|data_type\.h|device\.h|dispatcher\.h|hash\.h|runtime\.h|tensor_view\.h)"' ) @@ -137,7 +137,6 @@ def _write_detail_headers(include_root, source_root, devices): "dispatcher.h", "hash.h", "runtime.h", - "status.h", "tensor_view.h", } @@ -166,7 +165,6 @@ def _write_generated_header(include_root, devices): f"#include {_detail_include('device.h')}", f"#include {_detail_include('hash.h')}", f"#include {_detail_include('runtime.h')}", - f"#include {_detail_include('status.h')}", f"#include {_detail_include('tensor_view.h')}", ] @@ -186,9 +184,9 @@ def _write_generated_header(include_root, devices): namespace infini::rt {{ -Status set_runtime_device_type(Device::Type device_type); +void set_runtime_device_type(Device::Type device_type); -StatusOr runtime_device_type(); +Device::Type runtime_device_type(); namespace runtime {{ namespace generated_detail {{ @@ -351,15 +349,11 @@ def _dispatch_cases(devices, function): def _write_runtime_dispatch_function(function, devices): return f"""{function.signature()} {{ - auto device_type = infini::rt::runtime_device_type(); - if (!device_type.ok()) {{ - return InvalidValueError(); - }} - - switch (*device_type) {{ + switch (infini::rt::runtime_device_type()) {{ {_dispatch_cases(devices, function)} }} + assert(false && "unsupported runtime device type"); return InvalidValueError(); }} """ @@ -374,7 +368,7 @@ def _write_runtime_dispatch(source_path, devices): set_device_type_cases = "\n".join( f""" case {_DEVICE_TYPES[device]}: runtime_device_type_ = device_type; - return OkStatus();""" + return;""" for device in devices ) @@ -395,15 +389,15 @@ def _write_runtime_dispatch(source_path, devices): }} // namespace -Status set_runtime_device_type(Device::Type device_type) {{ +void set_runtime_device_type(Device::Type device_type) {{ switch (device_type) {{ {set_device_type_cases} }} - return InvalidArgumentError("unsupported runtime device type"); + assert(false && "unsupported runtime device type"); }} -StatusOr runtime_device_type() {{ +Device::Type runtime_device_type() {{ return runtime_device_type_; }} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8c0c856..dd9c99f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -7,8 +7,6 @@ target_sources(infinirt PRIVATE ${BASE_SRCS} ${PROJECT_SOURCE_DIR}/generated/src/runtime_dispatch.cc) -target_link_libraries(infinirt PUBLIC absl::status absl::statusor) - if(WITH_CPU) target_compile_definitions(infinirt PUBLIC WITH_CPU=1) diff --git a/src/status.h b/src/status.h deleted file mode 100644 index 3ca670a..0000000 --- a/src/status.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef INFINI_RT_STATUS_H_ -#define INFINI_RT_STATUS_H_ - -#include -#include - -namespace infini::rt { - -using Status = absl::Status; - -using StatusCode = absl::StatusCode; - -using absl::InvalidArgumentError; -using absl::OkStatus; - -template -using StatusOr = absl::StatusOr; - -} // namespace infini::rt - -#endif diff --git a/tests/compile_install_consumer.cmake b/tests/compile_install_consumer.cmake index 9f4107b..1bb96d1 100644 --- a/tests/compile_install_consumer.cmake +++ b/tests/compile_install_consumer.cmake @@ -13,110 +13,51 @@ if(NOT EXISTS "${INFINI_RT_LIBRARY_DIR}/libinfinirt.so") message(FATAL_ERROR "The installed InfiniRT library was not found.") endif() +set(INFINI_RT_EXTRA_LINK_ARGS "") +set(INFINI_RT_EXTRA_COMPILE_ARGS "") set(INFINI_RT_LD_LIBRARY_PATH "${INFINI_RT_LIBRARY_DIR}") -if(INFINI_RT_EXTRA_LIBRARY_PATHS) - string(REPLACE ":" ";" INFINI_RT_EXTRA_LIBRARY_DIRS - "${INFINI_RT_EXTRA_LIBRARY_PATHS}") - foreach(INFINI_RT_EXTRA_LIBRARY_DIR ${INFINI_RT_EXTRA_LIBRARY_DIRS}) - if(EXISTS "${INFINI_RT_EXTRA_LIBRARY_DIR}") - set(INFINI_RT_LD_LIBRARY_PATH - "${INFINI_RT_LD_LIBRARY_PATH}:${INFINI_RT_EXTRA_LIBRARY_DIR}") - endif() - endforeach() -endif() - -get_filename_component(INFINI_RT_CONSUMER_OUTPUT_DIR - "${INFINI_RT_CONSUMER_BINARY}" DIRECTORY) -get_filename_component(INFINI_RT_CONSUMER_OUTPUT_NAME - "${INFINI_RT_CONSUMER_BINARY}" NAME) -set(INFINI_RT_CONSUMER_PROJECT_DIR - "${INFINI_RT_CONSUMER_BINARY}.project") -set(INFINI_RT_CONSUMER_BUILD_DIR - "${INFINI_RT_CONSUMER_BINARY}.build") - -file(MAKE_DIRECTORY "${INFINI_RT_CONSUMER_PROJECT_DIR}") -file(WRITE "${INFINI_RT_CONSUMER_PROJECT_DIR}/CMakeLists.txt" [=[ -cmake_minimum_required(VERSION 3.18) -project(InfiniRTInstallConsumer LANGUAGES CXX) - -set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CXX_STANDARD_REQUIRED ON) - -find_package(absl CONFIG REQUIRED) - -add_library(infinirt SHARED IMPORTED GLOBAL) -set_target_properties(infinirt PROPERTIES - IMPORTED_LOCATION "${INFINI_RT_LIBRARY_DIR}/libinfinirt.so") - -add_executable(install_consumer "${INFINI_RT_CONSUMER_SOURCE}") -target_include_directories(install_consumer PRIVATE "${INFINI_RT_INCLUDE_DIR}") -target_compile_options(install_consumer PRIVATE -Werror) -target_link_libraries(install_consumer - PRIVATE - infinirt - absl::status - absl::statusor) -set_target_properties(install_consumer PROPERTIES - OUTPUT_NAME "${INFINI_RT_CONSUMER_OUTPUT_NAME}" - RUNTIME_OUTPUT_DIRECTORY "${INFINI_RT_CONSUMER_OUTPUT_DIR}" - BUILD_RPATH "${INFINI_RT_LIBRARY_DIR}") - -if(INFINI_RT_CONSUMER_BACKEND AND - NOT INFINI_RT_CONSUMER_BACKEND STREQUAL "NONE") - target_compile_definitions(install_consumer - PRIVATE INFINI_RT_CONSUMER_BACKEND_${INFINI_RT_CONSUMER_BACKEND}=1) -endif() - if(INFINI_RT_EXTRA_INCLUDE_PATHS) string(REPLACE ":" ";" INFINI_RT_EXTRA_INCLUDE_DIRS "${INFINI_RT_EXTRA_INCLUDE_PATHS}") - foreach(INFINI_RT_EXTRA_INCLUDE_DIR IN LISTS INFINI_RT_EXTRA_INCLUDE_DIRS) + foreach(INFINI_RT_EXTRA_INCLUDE_DIR ${INFINI_RT_EXTRA_INCLUDE_DIRS}) if(EXISTS "${INFINI_RT_EXTRA_INCLUDE_DIR}") - target_include_directories(install_consumer - PRIVATE "${INFINI_RT_EXTRA_INCLUDE_DIR}") + list(APPEND INFINI_RT_EXTRA_COMPILE_ARGS + "-I${INFINI_RT_EXTRA_INCLUDE_DIR}") endif() endforeach() endif() +if(INFINI_RT_CONSUMER_BACKEND AND NOT INFINI_RT_CONSUMER_BACKEND STREQUAL "NONE") + list(APPEND INFINI_RT_EXTRA_COMPILE_ARGS + "-DINFINI_RT_CONSUMER_BACKEND_${INFINI_RT_CONSUMER_BACKEND}=1") +endif() + if(INFINI_RT_EXTRA_LIBRARY_PATHS) string(REPLACE ":" ";" INFINI_RT_EXTRA_LIBRARY_DIRS "${INFINI_RT_EXTRA_LIBRARY_PATHS}") - foreach(INFINI_RT_EXTRA_LIBRARY_DIR IN LISTS INFINI_RT_EXTRA_LIBRARY_DIRS) + foreach(INFINI_RT_EXTRA_LIBRARY_DIR ${INFINI_RT_EXTRA_LIBRARY_DIRS}) if(EXISTS "${INFINI_RT_EXTRA_LIBRARY_DIR}") - target_link_options(install_consumer - PRIVATE "-Wl,-rpath-link,${INFINI_RT_EXTRA_LIBRARY_DIR}") + list(APPEND INFINI_RT_EXTRA_LINK_ARGS + "-Wl,-rpath-link,${INFINI_RT_EXTRA_LIBRARY_DIR}") + set(INFINI_RT_LD_LIBRARY_PATH + "${INFINI_RT_LD_LIBRARY_PATH}:${INFINI_RT_EXTRA_LIBRARY_DIR}") endif() endforeach() endif() -]=]) - -execute_process( - COMMAND "${CMAKE_COMMAND}" - -S "${INFINI_RT_CONSUMER_PROJECT_DIR}" - -B "${INFINI_RT_CONSUMER_BUILD_DIR}" - "-DCMAKE_CXX_COMPILER=${INFINI_RT_CXX_COMPILER}" - "-DCMAKE_PREFIX_PATH=${INFINI_RT_INSTALL_PREFIX}" - "-DINFINI_RT_INCLUDE_DIR=${INFINI_RT_INCLUDE_DIR}" - "-DINFINI_RT_LIBRARY_DIR=${INFINI_RT_LIBRARY_DIR}" - "-DINFINI_RT_CONSUMER_SOURCE=${INFINI_RT_CONSUMER_SOURCE}" - "-DINFINI_RT_CONSUMER_OUTPUT_DIR=${INFINI_RT_CONSUMER_OUTPUT_DIR}" - "-DINFINI_RT_CONSUMER_OUTPUT_NAME=${INFINI_RT_CONSUMER_OUTPUT_NAME}" - "-DINFINI_RT_EXTRA_INCLUDE_PATHS=${INFINI_RT_EXTRA_INCLUDE_PATHS}" - "-DINFINI_RT_EXTRA_LIBRARY_PATHS=${INFINI_RT_EXTRA_LIBRARY_PATHS}" - "-DINFINI_RT_CONSUMER_BACKEND=${INFINI_RT_CONSUMER_BACKEND}" - RESULT_VARIABLE INFINI_RT_COMPILE_RESULT - OUTPUT_VARIABLE INFINI_RT_COMPILE_OUTPUT - ERROR_VARIABLE INFINI_RT_COMPILE_ERROR) - -if(NOT INFINI_RT_COMPILE_RESULT EQUAL 0) - message(FATAL_ERROR - "Configuring the install consumer failed.\n" - "${INFINI_RT_COMPILE_OUTPUT}\n${INFINI_RT_COMPILE_ERROR}") -endif() execute_process( - COMMAND "${CMAKE_COMMAND}" --build "${INFINI_RT_CONSUMER_BUILD_DIR}" + COMMAND "${INFINI_RT_CXX_COMPILER}" + -std=c++17 + -Werror + "-I${INFINI_RT_INCLUDE_DIR}" + ${INFINI_RT_EXTRA_COMPILE_ARGS} + "${INFINI_RT_CONSUMER_SOURCE}" + "-L${INFINI_RT_LIBRARY_DIR}" + -linfinirt + "-Wl,-rpath,${INFINI_RT_LIBRARY_DIR}" + ${INFINI_RT_EXTRA_LINK_ARGS} + -o "${INFINI_RT_CONSUMER_BINARY}" RESULT_VARIABLE INFINI_RT_COMPILE_RESULT OUTPUT_VARIABLE INFINI_RT_COMPILE_OUTPUT ERROR_VARIABLE INFINI_RT_COMPILE_ERROR) diff --git a/tests/install_consumer_smoke.cc b/tests/install_consumer_smoke.cc index 697cff0..14e6c0d 100644 --- a/tests/install_consumer_smoke.cc +++ b/tests/install_consumer_smoke.cc @@ -27,12 +27,8 @@ int main() { #else constexpr auto kExpectedDeviceType = infini::rt::Device::Type::kNvidia; #endif - if (!infini::rt::set_runtime_device_type(kExpectedDeviceType).ok()) { - return 1; - } - const auto runtime_device_type = infini::rt::runtime_device_type(); - if (!runtime_device_type.ok() || - *runtime_device_type != kExpectedDeviceType) { + infini::rt::set_runtime_device_type(kExpectedDeviceType); + if (infini::rt::runtime_device_type() != kExpectedDeviceType) { return 1; } diff --git a/tests/test_core.cc b/tests/test_core.cc index 68092e1..1c57c04 100644 --- a/tests/test_core.cc +++ b/tests/test_core.cc @@ -45,19 +45,6 @@ void TestDataType(infini::rt::test::TestContext* context) { DataType::kUInt16, "uint16 should parse by name."); } -void TestStatus(infini::rt::test::TestContext* context) { - const infini::rt::Status ok_status = infini::rt::OkStatus(); - context->Expect(ok_status.ok(), "OkStatus should report success."); - - const infini::rt::Status invalid = - infini::rt::InvalidArgumentError("invalid argument"); - context->Expect(!invalid.ok(), "InvalidArgumentError should report failure."); - - const infini::rt::StatusOr value{1}; - context->Expect(value.ok(), "StatusOr should hold a value."); - context->ExpectEqual(*value, 1, "StatusOr should expose its value."); -} - void TestTensorView(infini::rt::test::TestContext* context) { std::vector data{1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; TensorView tensor{data.data(), std::vector{2, 3}, @@ -102,7 +89,6 @@ int main() { TestDevice(&context); TestDataType(&context); - TestStatus(&context); TestTensorView(&context); return context.ExitCode(); diff --git a/tests/test_default_runtime.cc b/tests/test_default_runtime.cc index 26e62cc..bf76a4b 100644 --- a/tests/test_default_runtime.cc +++ b/tests/test_default_runtime.cc @@ -15,39 +15,11 @@ void ExpectSuccess(infini::rt::test::TestContext* context, context->Expect(status == runtime::kSuccess, message); } -void ExpectStatusOk(infini::rt::test::TestContext* context, - const infini::rt::Status& status, const char* message) { - context->Expect(status.ok(), message); -} - -void TestInvalidDeviceType(infini::rt::test::TestContext* context) { - const auto before = infini::rt::runtime_device_type(); - context->Expect(before.ok(), - "Default runtime should report its initial device type."); - - const auto status = - infini::rt::set_runtime_device_type(infini::rt::Device::Type::kQy); - context->Expect(!status.ok(), - "Default runtime should reject disabled device types."); - - const auto after = infini::rt::runtime_device_type(); - context->Expect(after.ok(), - "Rejected device type should keep runtime state valid."); - if (before.ok() && after.ok()) { - context->Expect(*after == *before, - "Rejected device type should not change dispatch target."); - } -} - #if defined(INFINI_RT_TEST_WITH_CPU) void TestCpuDispatch(infini::rt::test::TestContext* context) { - ExpectStatusOk( - context, - infini::rt::set_runtime_device_type(infini::rt::Device::Type::kCpu), - "Default runtime should select CPU dispatch."); - const auto device_type = infini::rt::runtime_device_type(); + infini::rt::set_runtime_device_type(infini::rt::Device::Type::kCpu); context->Expect( - device_type.ok() && *device_type == infini::rt::Device::Type::kCpu, + infini::rt::runtime_device_type() == infini::rt::Device::Type::kCpu, "Default runtime should report CPU dispatch."); std::array input{1, 2, 3, 4}; @@ -84,13 +56,9 @@ void TestCpuDispatch(infini::rt::test::TestContext* context) { #if defined(INFINI_RT_TEST_WITH_NVIDIA) void TestNvidiaDispatch(infini::rt::test::TestContext* context) { - ExpectStatusOk( - context, - infini::rt::set_runtime_device_type(infini::rt::Device::Type::kNvidia), - "Default runtime should select NVIDIA dispatch."); - const auto device_type = infini::rt::runtime_device_type(); + infini::rt::set_runtime_device_type(infini::rt::Device::Type::kNvidia); context->Expect( - device_type.ok() && *device_type == infini::rt::Device::Type::kNvidia, + infini::rt::runtime_device_type() == infini::rt::Device::Type::kNvidia, "Default runtime should report NVIDIA dispatch."); std::array input{5, 6, 7, 8}; @@ -129,8 +97,6 @@ void TestNvidiaDispatch(infini::rt::test::TestContext* context) { int main() { infini::rt::test::TestContext context; - TestInvalidDeviceType(&context); - #if defined(INFINI_RT_TEST_WITH_CPU) TestCpuDispatch(&context); #endif From 1fd4ef4b7c674f774729753b0dc83c314dd9643a Mon Sep 17 00:00:00 2001 From: Jiacheng Huang Date: Thu, 2 Jul 2026 10:13:52 +0800 Subject: [PATCH 06/15] Address runtime dispatch review feedback --- README.md | 38 ++-- scripts/generate_public_headers.py | 176 ++++++++++-------- src/native/cuda/nvidia/runtime_.h | 32 ++-- src/tensor_view.h | 22 +-- tests/CMakeLists.txt | 6 +- tests/test_core.cc | 4 - ...lt_runtime.cc => test_runtime_dispatch.cc} | 62 +++++- 7 files changed, 200 insertions(+), 140 deletions(-) rename tests/{test_default_runtime.cc => test_runtime_dispatch.cc} (60%) diff --git a/README.md b/README.md index 33fbe90..51099fc 100644 --- a/README.md +++ b/README.md @@ -66,39 +66,45 @@ cmake --install build #include int main() { - infini::rt::SetDevice(0); + namespace runtime = infini::rt::runtime; + + runtime::SetDevice(0); constexpr std::size_t size = 1024; void* ptr = nullptr; - infini::rt::Malloc(&ptr, size); - infini::rt::Memset(ptr, 0, size); - infini::rt::Free(ptr); + runtime::Malloc(&ptr, size); + runtime::Memset(ptr, 0, size); + runtime::Free(ptr); return 0; } ``` -The top-level runtime API dispatches through `infini::rt::Runtime<>`, which is -the `Runtime` default specialization. A GPU backend is -selected initially when one is enabled; otherwise CPU is selected. -`SetDeviceType` accepts only backends enabled in the current build. +The CUDA Runtime API-aligned layer lives under `infini::rt::runtime`. The +top-level `infini::rt::set_runtime_device_type` and +`infini::rt::runtime_device_type` APIs select which enabled backend receives +those runtime calls. A GPU backend is selected initially when one is enabled; +otherwise CPU is selected. ```cpp +namespace runtime = infini::rt::runtime; + constexpr std::size_t size = 1024; void* ptr = nullptr; -infini::rt::Runtime<>::SetDeviceType(infini::rt::Device::Type::kCpu); -infini::rt::Malloc(&ptr, size); -infini::rt::Free(ptr); +infini::rt::set_runtime_device_type(infini::rt::Device::Type::kCpu); +runtime::Malloc(&ptr, size); +runtime::Free(ptr); -infini::rt::Runtime<>::SetDeviceType(infini::rt::Device::Type::kNvidia); -infini::rt::Malloc(&ptr, size); -infini::rt::Free(ptr); +infini::rt::set_runtime_device_type(infini::rt::Device::Type::kNvidia); +runtime::Malloc(&ptr, size); +runtime::Free(ptr); ``` -Use `infini::rt::Runtime` when CPU runtime -calls are needed explicitly in a build that also enables an accelerator backend. +Use `infini::rt::runtime::Runtime` when CPU +runtime calls are needed explicitly in a build that also enables an accelerator +backend. ## Using Installed InfiniRT From Another Project diff --git a/scripts/generate_public_headers.py b/scripts/generate_public_headers.py index 957d0d8..d2eeca9 100644 --- a/scripts/generate_public_headers.py +++ b/scripts/generate_public_headers.py @@ -71,6 +71,13 @@ "cpu", ) +_NVIDIA_RUNTIME_HEADER = "native/cuda/nvidia/runtime_.h" + +_PUBLIC_TYPE_NAMES = { + "cudaMemcpyKind": "MemcpyKind", + "size_t": "std::size_t", +} + def _guard(path): token = "_".join(path.parts).replace(".", "_").upper() @@ -156,7 +163,8 @@ def _write_detail_headers(include_root, source_root, devices): _write_detail_header(include_root, source_root, relative_path) -def _write_generated_header(include_root, devices): +def _write_generated_header(include_root, source_root, devices): + runtime_api = _parse_nvidia_runtime_api(source_root) default_device = _default_device(devices) default_device_type = _DEVICE_TYPES[default_device] includes = [ @@ -174,6 +182,19 @@ def _write_generated_header(include_root, devices): for device in devices: includes.append(f"#include ") + runtime_aliases = "\n\n".join( + f"using {alias} = typename generated_detail::DefaultRuntime::{alias};" + for alias in runtime_api.aliases + ) + memcpy_kind_values = "\n".join( + f""" {constant} = + static_cast(generated_detail::DefaultRuntime::{constant}),""" + for constant in runtime_api.memcpy_kind_constants + ) + runtime_declarations = "\n\n".join( + f"{function.signature()};" for function in runtime_api.functions + ) + path = include_root / "infini" / "rt" / "generated.h" path.parent.mkdir(parents=True, exist_ok=True) path.write_text( @@ -191,47 +212,21 @@ def _write_generated_header(include_root, devices): namespace runtime {{ namespace generated_detail {{ -using DefaultErrorRuntime = Runtime<{default_device_type}>; +using DefaultRuntime = Runtime<{default_device_type}>; inline constexpr Device::Type kDefaultDeviceType = {default_device_type}; }} // namespace generated_detail -using Error = typename generated_detail::DefaultErrorRuntime::Error; - -using Stream = typename generated_detail::DefaultErrorRuntime::Stream; +{runtime_aliases} -inline constexpr Error kSuccess = generated_detail::DefaultErrorRuntime::kSuccess; +inline constexpr Error kSuccess = generated_detail::DefaultRuntime::kSuccess; enum class MemcpyKind {{ - kMemcpyHostToHost = - static_cast(generated_detail::DefaultErrorRuntime::kMemcpyHostToHost), - kMemcpyHostToDevice = - static_cast(generated_detail::DefaultErrorRuntime::kMemcpyHostToDevice), - kMemcpyDeviceToHost = - static_cast(generated_detail::DefaultErrorRuntime::kMemcpyDeviceToHost), - kMemcpyDeviceToDevice = - static_cast(generated_detail::DefaultErrorRuntime::kMemcpyDeviceToDevice), +{memcpy_kind_values} }}; -Error SetDevice(int device); - -Error GetDevice(int* device); - -Error GetDeviceCount(int* count); - -Error DeviceSynchronize(); - -Error Malloc(void** ptr, std::size_t size); - -Error Free(void* ptr); - -Error Memset(void* ptr, int value, std::size_t count); - -Error Memcpy(void* dst, const void* src, std::size_t count, MemcpyKind kind); - -Error MemcpyAsync(void* dst, const void* src, std::size_t count, - MemcpyKind kind, Stream stream); +{runtime_declarations} }} // namespace runtime }} // namespace infini::rt @@ -260,48 +255,73 @@ def params_decl(self): return ", ".join(f"{param.type} {param.name}" for param in self.params) -_PUBLIC_RUNTIME_FUNCTIONS = ( - _Function("Error", "SetDevice", (_Param("int", "device"),)), - _Function("Error", "GetDevice", (_Param("int*", "device"),)), - _Function("Error", "GetDeviceCount", (_Param("int*", "count"),)), - _Function("Error", "DeviceSynchronize", ()), - _Function( - "Error", - "Malloc", - (_Param("void**", "ptr"), _Param("std::size_t", "size")), - ), - _Function("Error", "Free", (_Param("void*", "ptr"),)), - _Function( - "Error", - "Memset", - ( - _Param("void*", "ptr"), - _Param("int", "value"), - _Param("std::size_t", "count"), - ), - ), - _Function( - "Error", - "Memcpy", - ( - _Param("void*", "dst"), - _Param("const void*", "src"), - _Param("std::size_t", "count"), - _Param("MemcpyKind", "kind"), - ), - ), - _Function( - "Error", - "MemcpyAsync", - ( - _Param("void*", "dst"), - _Param("const void*", "src"), - _Param("std::size_t", "count"), - _Param("MemcpyKind", "kind"), - _Param("Stream", "stream"), - ), - ), -) +@dataclasses.dataclass(frozen=True) +class _RuntimeApi: + aliases: tuple[str, ...] + memcpy_kind_constants: tuple[str, ...] + functions: tuple[_Function, ...] + + +def _runtime_struct_body(source_root): + text = (source_root / _NVIDIA_RUNTIME_HEADER).read_text() + match = re.search( + r"struct Runtime.*?\{\n(?P.*?)\n\};", + text, + flags=re.DOTALL, + ) + if match is None: + raise ValueError("could not find NVIDIA Runtime specialization") + + return match.group("body") + + +def _public_type_name(name): + return _PUBLIC_TYPE_NAMES.get(name, name) + + +def _parse_param(param): + param_type, param_name = " ".join(param.strip().split()).rsplit(" ", 1) + + return _Param(_public_type_name(param_type), param_name) + + +def _parse_function(match): + params = match.group("params").strip() + return _Function( + _public_type_name(match.group("return_type")), + match.group("name"), + tuple(_parse_param(param) for param in re.split(r"\s*,\s*", params) if param), + ) + + +def _parse_nvidia_runtime_api(source_root): + body = _runtime_struct_body(source_root) + aliases = tuple( + alias + for alias in re.findall(r"^\s+using\s+(\w+)\s*=", body, flags=re.MULTILINE) + if alias in {"Error", "Stream"} + ) + memcpy_kind_constants = tuple( + re.findall( + r"^\s+static constexpr auto (kMemcpy\w+)\s*=", + body, + flags=re.MULTILINE, + ) + ) + functions = tuple( + _parse_function(match) + for match in re.finditer( + r"^\s+static\s+(?P\w+)\s+" + r"(?P[A-Z]\w*)\((?P[^)]*)\)\s*\{", + body, + flags=re.MULTILINE, + ) + ) + + if not aliases or not memcpy_kind_constants or not functions: + raise ValueError("NVIDIA Runtime specialization has no public API") + + return _RuntimeApi(aliases, memcpy_kind_constants, functions) def _default_device(devices): @@ -359,8 +379,8 @@ def _write_runtime_dispatch_function(function, devices): """ -def _write_runtime_dispatch(source_path, devices): - functions = _PUBLIC_RUNTIME_FUNCTIONS +def _write_runtime_dispatch(source_path, source_root, devices): + functions = _parse_nvidia_runtime_api(source_root).functions dispatch_functions = "\n".join( _write_runtime_dispatch_function(function, devices=devices) for function in functions @@ -470,8 +490,8 @@ def main(): for wrapper_device, header_name, target in _DEVICE_HEADERS[device]: _write_wrapper(include_root, wrapper_device, header_name, target) - _write_generated_header(include_root, devices) - _write_runtime_dispatch(pathlib.Path(args.source_output), devices) + _write_generated_header(include_root, source_root, devices) + _write_runtime_dispatch(pathlib.Path(args.source_output), source_root, devices) if __name__ == "__main__": diff --git a/src/native/cuda/nvidia/runtime_.h b/src/native/cuda/nvidia/runtime_.h index 1786e08..1ce61f7 100644 --- a/src/native/cuda/nvidia/runtime_.h +++ b/src/native/cuda/nvidia/runtime_.h @@ -1,7 +1,7 @@ #ifndef INFINI_RT_NVIDIA_RUNTIME__H_ #define INFINI_RT_NVIDIA_RUNTIME__H_ -#include +#include // clang-format off #include @@ -23,23 +23,29 @@ struct Runtime static constexpr Error kSuccess = cudaSuccess; - static constexpr auto SetDevice = cudaSetDevice; + static Error SetDevice(int device) { return cudaSetDevice(device); } - static constexpr auto GetDevice = cudaGetDevice; + static Error GetDevice(int* device) { return cudaGetDevice(device); } - static constexpr auto GetDeviceCount = cudaGetDeviceCount; + static Error GetDeviceCount(int* count) { return cudaGetDeviceCount(count); } - static constexpr auto DeviceSynchronize = cudaDeviceSynchronize; + static Error DeviceSynchronize() { return cudaDeviceSynchronize(); } - static constexpr auto Malloc = [](auto&&... args) { - return cudaMalloc(std::forward(args)...); - }; + static Error Malloc(void** ptr, std::size_t size) { + return cudaMalloc(ptr, size); + } - static constexpr auto Memcpy = cudaMemcpy; + static Error Memcpy(void* dst, const void* src, std::size_t count, + cudaMemcpyKind kind) { + return cudaMemcpy(dst, src, count, kind); + } - static constexpr auto MemcpyAsync = cudaMemcpyAsync; + static Error MemcpyAsync(void* dst, const void* src, std::size_t count, + cudaMemcpyKind kind, Stream stream) { + return cudaMemcpyAsync(dst, src, count, kind, stream); + } - static constexpr auto Free = cudaFree; + static Error Free(void* ptr) { return cudaFree(ptr); } static constexpr auto kMemcpyHostToHost = cudaMemcpyHostToHost; @@ -49,7 +55,9 @@ struct Runtime static constexpr auto kMemcpyDeviceToDevice = cudaMemcpyDeviceToDevice; - static constexpr auto Memset = cudaMemset; + static Error Memset(void* ptr, int value, std::size_t count) { + return cudaMemset(ptr, value, count); + } }; static_assert(Runtime::Validate()); diff --git a/src/tensor_view.h b/src/tensor_view.h index dcf7cc9..0b6fc59 100644 --- a/src/tensor_view.h +++ b/src/tensor_view.h @@ -3,8 +3,6 @@ #include #include -#include -#include #include #include "data_type.h" @@ -13,22 +11,6 @@ namespace infini::rt { -namespace tensor_view_detail { - -template -struct IsTensorLike : std::false_type {}; - -template -struct IsTensorLike().data()), - decltype(std::declval().shape()), - decltype(std::declval().dtype()), - decltype(std::declval().device()), - decltype(std::declval().strides())>> - : std::true_type {}; - -} // namespace tensor_view_detail - class TensorView { public: using Size = std::size_t; @@ -41,9 +23,7 @@ class TensorView { using Strides = std::vector; - template ::value>> + template TensorView(const TensorLike& tensor) : data_{const_cast(static_cast(tensor.data()))}, shape_{tensor.shape()}, diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 1aeca74..f24954c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -8,13 +8,13 @@ add_infini_rt_test(test_smoke test_smoke.cc) add_infini_rt_test(test_core test_core.cc) if(WITH_CPU OR WITH_NVIDIA) - add_infini_rt_test(test_default_runtime test_default_runtime.cc) + add_infini_rt_test(test_runtime_dispatch test_runtime_dispatch.cc) if(WITH_CPU) - target_compile_definitions(test_default_runtime + target_compile_definitions(test_runtime_dispatch PRIVATE INFINI_RT_TEST_WITH_CPU=1) endif() if(WITH_NVIDIA) - target_compile_definitions(test_default_runtime + target_compile_definitions(test_runtime_dispatch PRIVATE INFINI_RT_TEST_WITH_NVIDIA=1) endif() endif() diff --git a/tests/test_core.cc b/tests/test_core.cc index 1c57c04..e8b8071 100644 --- a/tests/test_core.cc +++ b/tests/test_core.cc @@ -2,7 +2,6 @@ #include #include -#include #include #include "test_helper.h" @@ -13,9 +12,6 @@ using infini::rt::DataType; using infini::rt::Device; using infini::rt::TensorView; -static_assert(!std::is_constructible_v>, - "TensorView should not treat tensor containers as tensor-like."); - void TestDevice(infini::rt::test::TestContext* context) { const Device cpu{Device::Type::kCpu}; const Device nvidia{Device::Type::kNvidia, 1}; diff --git a/tests/test_default_runtime.cc b/tests/test_runtime_dispatch.cc similarity index 60% rename from tests/test_default_runtime.cc rename to tests/test_runtime_dispatch.cc index bf76a4b..e48a276 100644 --- a/tests/test_default_runtime.cc +++ b/tests/test_runtime_dispatch.cc @@ -20,7 +20,7 @@ void TestCpuDispatch(infini::rt::test::TestContext* context) { infini::rt::set_runtime_device_type(infini::rt::Device::Type::kCpu); context->Expect( infini::rt::runtime_device_type() == infini::rt::Device::Type::kCpu, - "Default runtime should report CPU dispatch."); + "Runtime dispatch should report CPU dispatch."); std::array input{1, 2, 3, 4}; std::array output{}; @@ -28,6 +28,18 @@ void TestCpuDispatch(infini::rt::test::TestContext* context) { ExpectSuccess(context, runtime::SetDevice(0), "CPU dispatch should set device 0."); + int current_device = -1; + ExpectSuccess(context, runtime::GetDevice(¤t_device), + "CPU dispatch should get the current device."); + context->ExpectEqual(current_device, 0, + "CPU dispatch should keep the current device."); + + int device_count = 0; + ExpectSuccess(context, runtime::GetDeviceCount(&device_count), + "CPU dispatch should get the device count."); + context->Expect(device_count > 0, + "CPU dispatch should report at least one device."); + ExpectSuccess(context, runtime::Malloc(&ptr, input.size()), "CPU dispatch should allocate memory."); if (ptr == nullptr) { @@ -46,11 +58,23 @@ void TestCpuDispatch(infini::rt::test::TestContext* context) { runtime::Memcpy(output.data(), ptr, output.size(), runtime::MemcpyKind::kMemcpyDeviceToHost), "CPU dispatch should copy runtime memory to host."); - ExpectSuccess(context, runtime::Free(ptr), - "CPU dispatch should free memory."); context->ExpectEqual(output, input, "CPU dispatch should preserve copied bytes."); + + ExpectSuccess(context, runtime::Memset(ptr, 0x5A, output.size()), + "CPU dispatch should fill runtime memory."); + ExpectSuccess(context, + runtime::Memcpy(output.data(), ptr, output.size(), + runtime::MemcpyKind::kMemcpyDeviceToHost), + "CPU dispatch should copy filled memory to host."); + for (const auto value : output) { + context->ExpectEqual(value, static_cast(0x5A), + "CPU dispatch should preserve filled bytes."); + } + + ExpectSuccess(context, runtime::Free(ptr), + "CPU dispatch should free memory."); } #endif @@ -59,7 +83,7 @@ void TestNvidiaDispatch(infini::rt::test::TestContext* context) { infini::rt::set_runtime_device_type(infini::rt::Device::Type::kNvidia); context->Expect( infini::rt::runtime_device_type() == infini::rt::Device::Type::kNvidia, - "Default runtime should report NVIDIA dispatch."); + "Runtime dispatch should report NVIDIA dispatch."); std::array input{5, 6, 7, 8}; std::array output{}; @@ -67,6 +91,18 @@ void TestNvidiaDispatch(infini::rt::test::TestContext* context) { ExpectSuccess(context, runtime::SetDevice(0), "NVIDIA dispatch should set device 0."); + int current_device = -1; + ExpectSuccess(context, runtime::GetDevice(¤t_device), + "NVIDIA dispatch should get the current device."); + context->ExpectEqual(current_device, 0, + "NVIDIA dispatch should keep the current device."); + + int device_count = 0; + ExpectSuccess(context, runtime::GetDeviceCount(&device_count), + "NVIDIA dispatch should get the device count."); + context->Expect(device_count > 0, + "NVIDIA dispatch should report at least one device."); + ExpectSuccess(context, runtime::Malloc(&ptr, input.size()), "NVIDIA dispatch should allocate memory."); if (ptr == nullptr) { @@ -84,11 +120,25 @@ void TestNvidiaDispatch(infini::rt::test::TestContext* context) { runtime::Memcpy(output.data(), ptr, output.size(), runtime::MemcpyKind::kMemcpyDeviceToHost), "NVIDIA dispatch should copy device data to host."); - ExpectSuccess(context, runtime::Free(ptr), - "NVIDIA dispatch should free memory."); context->ExpectEqual(output, input, "NVIDIA dispatch should preserve copied bytes."); + + ExpectSuccess(context, runtime::Memset(ptr, 0x5A, output.size()), + "NVIDIA dispatch should fill runtime memory."); + ExpectSuccess(context, runtime::DeviceSynchronize(), + "NVIDIA dispatch should synchronize filled memory."); + ExpectSuccess(context, + runtime::Memcpy(output.data(), ptr, output.size(), + runtime::MemcpyKind::kMemcpyDeviceToHost), + "NVIDIA dispatch should copy filled memory to host."); + for (const auto value : output) { + context->ExpectEqual(value, static_cast(0x5A), + "NVIDIA dispatch should preserve filled bytes."); + } + + ExpectSuccess(context, runtime::Free(ptr), + "NVIDIA dispatch should free memory."); } #endif From d854b8639ecb62403a1ada47abe0aab3f0db4ec8 Mon Sep 17 00:00:00 2001 From: Jiacheng Huang Date: Thu, 2 Jul 2026 10:45:54 +0800 Subject: [PATCH 07/15] Keep runtime API list in generator --- scripts/generate_public_headers.py | 155 ++++++++++++----------------- src/native/cuda/nvidia/runtime_.h | 32 +++--- 2 files changed, 73 insertions(+), 114 deletions(-) diff --git a/scripts/generate_public_headers.py b/scripts/generate_public_headers.py index d2eeca9..b10e7f4 100644 --- a/scripts/generate_public_headers.py +++ b/scripts/generate_public_headers.py @@ -71,13 +71,6 @@ "cpu", ) -_NVIDIA_RUNTIME_HEADER = "native/cuda/nvidia/runtime_.h" - -_PUBLIC_TYPE_NAMES = { - "cudaMemcpyKind": "MemcpyKind", - "size_t": "std::size_t", -} - def _guard(path): token = "_".join(path.parts).replace(".", "_").upper() @@ -163,8 +156,7 @@ def _write_detail_headers(include_root, source_root, devices): _write_detail_header(include_root, source_root, relative_path) -def _write_generated_header(include_root, source_root, devices): - runtime_api = _parse_nvidia_runtime_api(source_root) +def _write_generated_header(include_root, devices): default_device = _default_device(devices) default_device_type = _DEVICE_TYPES[default_device] includes = [ @@ -182,17 +174,8 @@ def _write_generated_header(include_root, source_root, devices): for device in devices: includes.append(f"#include ") - runtime_aliases = "\n\n".join( - f"using {alias} = typename generated_detail::DefaultRuntime::{alias};" - for alias in runtime_api.aliases - ) - memcpy_kind_values = "\n".join( - f""" {constant} = - static_cast(generated_detail::DefaultRuntime::{constant}),""" - for constant in runtime_api.memcpy_kind_constants - ) runtime_declarations = "\n\n".join( - f"{function.signature()};" for function in runtime_api.functions + f"{function.signature()};" for function in _PUBLIC_RUNTIME_FUNCTIONS ) path = include_root / "infini" / "rt" / "generated.h" @@ -212,18 +195,27 @@ def _write_generated_header(include_root, source_root, devices): namespace runtime {{ namespace generated_detail {{ -using DefaultRuntime = Runtime<{default_device_type}>; +using DefaultErrorRuntime = Runtime<{default_device_type}>; inline constexpr Device::Type kDefaultDeviceType = {default_device_type}; }} // namespace generated_detail -{runtime_aliases} +using Error = typename generated_detail::DefaultErrorRuntime::Error; + +using Stream = typename generated_detail::DefaultErrorRuntime::Stream; -inline constexpr Error kSuccess = generated_detail::DefaultRuntime::kSuccess; +inline constexpr Error kSuccess = generated_detail::DefaultErrorRuntime::kSuccess; enum class MemcpyKind {{ -{memcpy_kind_values} + kMemcpyHostToHost = + static_cast(generated_detail::DefaultErrorRuntime::kMemcpyHostToHost), + kMemcpyHostToDevice = + static_cast(generated_detail::DefaultErrorRuntime::kMemcpyHostToDevice), + kMemcpyDeviceToHost = + static_cast(generated_detail::DefaultErrorRuntime::kMemcpyDeviceToHost), + kMemcpyDeviceToDevice = + static_cast(generated_detail::DefaultErrorRuntime::kMemcpyDeviceToDevice), }}; {runtime_declarations} @@ -255,73 +247,48 @@ def params_decl(self): return ", ".join(f"{param.type} {param.name}" for param in self.params) -@dataclasses.dataclass(frozen=True) -class _RuntimeApi: - aliases: tuple[str, ...] - memcpy_kind_constants: tuple[str, ...] - functions: tuple[_Function, ...] - - -def _runtime_struct_body(source_root): - text = (source_root / _NVIDIA_RUNTIME_HEADER).read_text() - match = re.search( - r"struct Runtime.*?\{\n(?P.*?)\n\};", - text, - flags=re.DOTALL, - ) - if match is None: - raise ValueError("could not find NVIDIA Runtime specialization") - - return match.group("body") - - -def _public_type_name(name): - return _PUBLIC_TYPE_NAMES.get(name, name) - - -def _parse_param(param): - param_type, param_name = " ".join(param.strip().split()).rsplit(" ", 1) - - return _Param(_public_type_name(param_type), param_name) - - -def _parse_function(match): - params = match.group("params").strip() - return _Function( - _public_type_name(match.group("return_type")), - match.group("name"), - tuple(_parse_param(param) for param in re.split(r"\s*,\s*", params) if param), - ) - - -def _parse_nvidia_runtime_api(source_root): - body = _runtime_struct_body(source_root) - aliases = tuple( - alias - for alias in re.findall(r"^\s+using\s+(\w+)\s*=", body, flags=re.MULTILINE) - if alias in {"Error", "Stream"} - ) - memcpy_kind_constants = tuple( - re.findall( - r"^\s+static constexpr auto (kMemcpy\w+)\s*=", - body, - flags=re.MULTILINE, - ) - ) - functions = tuple( - _parse_function(match) - for match in re.finditer( - r"^\s+static\s+(?P\w+)\s+" - r"(?P[A-Z]\w*)\((?P[^)]*)\)\s*\{", - body, - flags=re.MULTILINE, - ) - ) - - if not aliases or not memcpy_kind_constants or not functions: - raise ValueError("NVIDIA Runtime specialization has no public API") - - return _RuntimeApi(aliases, memcpy_kind_constants, functions) +_PUBLIC_RUNTIME_FUNCTIONS = ( + _Function("Error", "SetDevice", (_Param("int", "device"),)), + _Function("Error", "GetDevice", (_Param("int*", "device"),)), + _Function("Error", "GetDeviceCount", (_Param("int*", "count"),)), + _Function("Error", "DeviceSynchronize", ()), + _Function( + "Error", + "Malloc", + (_Param("void**", "ptr"), _Param("std::size_t", "size")), + ), + _Function("Error", "Free", (_Param("void*", "ptr"),)), + _Function( + "Error", + "Memset", + ( + _Param("void*", "ptr"), + _Param("int", "value"), + _Param("std::size_t", "count"), + ), + ), + _Function( + "Error", + "Memcpy", + ( + _Param("void*", "dst"), + _Param("const void*", "src"), + _Param("std::size_t", "count"), + _Param("MemcpyKind", "kind"), + ), + ), + _Function( + "Error", + "MemcpyAsync", + ( + _Param("void*", "dst"), + _Param("const void*", "src"), + _Param("std::size_t", "count"), + _Param("MemcpyKind", "kind"), + _Param("Stream", "stream"), + ), + ), +) def _default_device(devices): @@ -379,8 +346,8 @@ def _write_runtime_dispatch_function(function, devices): """ -def _write_runtime_dispatch(source_path, source_root, devices): - functions = _parse_nvidia_runtime_api(source_root).functions +def _write_runtime_dispatch(source_path, devices): + functions = _PUBLIC_RUNTIME_FUNCTIONS dispatch_functions = "\n".join( _write_runtime_dispatch_function(function, devices=devices) for function in functions @@ -490,8 +457,8 @@ def main(): for wrapper_device, header_name, target in _DEVICE_HEADERS[device]: _write_wrapper(include_root, wrapper_device, header_name, target) - _write_generated_header(include_root, source_root, devices) - _write_runtime_dispatch(pathlib.Path(args.source_output), source_root, devices) + _write_generated_header(include_root, devices) + _write_runtime_dispatch(pathlib.Path(args.source_output), devices) if __name__ == "__main__": diff --git a/src/native/cuda/nvidia/runtime_.h b/src/native/cuda/nvidia/runtime_.h index 1ce61f7..1786e08 100644 --- a/src/native/cuda/nvidia/runtime_.h +++ b/src/native/cuda/nvidia/runtime_.h @@ -1,7 +1,7 @@ #ifndef INFINI_RT_NVIDIA_RUNTIME__H_ #define INFINI_RT_NVIDIA_RUNTIME__H_ -#include +#include // clang-format off #include @@ -23,29 +23,23 @@ struct Runtime static constexpr Error kSuccess = cudaSuccess; - static Error SetDevice(int device) { return cudaSetDevice(device); } + static constexpr auto SetDevice = cudaSetDevice; - static Error GetDevice(int* device) { return cudaGetDevice(device); } + static constexpr auto GetDevice = cudaGetDevice; - static Error GetDeviceCount(int* count) { return cudaGetDeviceCount(count); } + static constexpr auto GetDeviceCount = cudaGetDeviceCount; - static Error DeviceSynchronize() { return cudaDeviceSynchronize(); } + static constexpr auto DeviceSynchronize = cudaDeviceSynchronize; - static Error Malloc(void** ptr, std::size_t size) { - return cudaMalloc(ptr, size); - } + static constexpr auto Malloc = [](auto&&... args) { + return cudaMalloc(std::forward(args)...); + }; - static Error Memcpy(void* dst, const void* src, std::size_t count, - cudaMemcpyKind kind) { - return cudaMemcpy(dst, src, count, kind); - } + static constexpr auto Memcpy = cudaMemcpy; - static Error MemcpyAsync(void* dst, const void* src, std::size_t count, - cudaMemcpyKind kind, Stream stream) { - return cudaMemcpyAsync(dst, src, count, kind, stream); - } + static constexpr auto MemcpyAsync = cudaMemcpyAsync; - static Error Free(void* ptr) { return cudaFree(ptr); } + static constexpr auto Free = cudaFree; static constexpr auto kMemcpyHostToHost = cudaMemcpyHostToHost; @@ -55,9 +49,7 @@ struct Runtime static constexpr auto kMemcpyDeviceToDevice = cudaMemcpyDeviceToDevice; - static Error Memset(void* ptr, int value, std::size_t count) { - return cudaMemset(ptr, value, count); - } + static constexpr auto Memset = cudaMemset; }; static_assert(Runtime::Validate()); From 091d22bb7df85f248c311a6f1724716e9fbc4558 Mon Sep 17 00:00:00 2001 From: Jiacheng Huang Date: Thu, 2 Jul 2026 12:46:23 +0800 Subject: [PATCH 08/15] Add TensorView constructor guard test --- src/tensor_view.h | 22 +++++++++++++- tests/CMakeLists.txt | 1 + tests/test_core.cc | 40 ------------------------- tests/test_tensor_view.cc | 63 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 85 insertions(+), 41 deletions(-) create mode 100644 tests/test_tensor_view.cc diff --git a/src/tensor_view.h b/src/tensor_view.h index 0b6fc59..dcf7cc9 100644 --- a/src/tensor_view.h +++ b/src/tensor_view.h @@ -3,6 +3,8 @@ #include #include +#include +#include #include #include "data_type.h" @@ -11,6 +13,22 @@ namespace infini::rt { +namespace tensor_view_detail { + +template +struct IsTensorLike : std::false_type {}; + +template +struct IsTensorLike().data()), + decltype(std::declval().shape()), + decltype(std::declval().dtype()), + decltype(std::declval().device()), + decltype(std::declval().strides())>> + : std::true_type {}; + +} // namespace tensor_view_detail + class TensorView { public: using Size = std::size_t; @@ -23,7 +41,9 @@ class TensorView { using Strides = std::vector; - template + template ::value>> TensorView(const TensorLike& tensor) : data_{const_cast(static_cast(tensor.data()))}, shape_{tensor.shape()}, diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index f24954c..672d3b6 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -6,6 +6,7 @@ endfunction() add_infini_rt_test(test_smoke test_smoke.cc) add_infini_rt_test(test_core test_core.cc) +add_infini_rt_test(test_tensor_view test_tensor_view.cc) if(WITH_CPU OR WITH_NVIDIA) add_infini_rt_test(test_runtime_dispatch test_runtime_dispatch.cc) diff --git a/tests/test_core.cc b/tests/test_core.cc index e8b8071..20be207 100644 --- a/tests/test_core.cc +++ b/tests/test_core.cc @@ -2,7 +2,6 @@ #include #include -#include #include "test_helper.h" @@ -10,7 +9,6 @@ namespace { using infini::rt::DataType; using infini::rt::Device; -using infini::rt::TensorView; void TestDevice(infini::rt::test::TestContext* context) { const Device cpu{Device::Type::kCpu}; @@ -41,43 +39,6 @@ void TestDataType(infini::rt::test::TestContext* context) { DataType::kUInt16, "uint16 should parse by name."); } -void TestTensorView(infini::rt::test::TestContext* context) { - std::vector data{1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; - TensorView tensor{data.data(), std::vector{2, 3}, - DataType::kFloat32, Device{Device::Type::kCpu}}; - - context->ExpectEqual(tensor.ndim(), std::size_t{2}, - "TensorView should keep its rank."); - context->ExpectEqual(tensor.numel(), std::size_t{6}, - "TensorView should compute element count."); - context->ExpectEqual(tensor.element_size(), std::size_t{4}, - "TensorView should compute element size."); - context->ExpectEqual(tensor.size(0), std::size_t{2}, - "TensorView should expose dimension sizes."); - context->ExpectEqual(tensor.size(-1), std::size_t{3}, - "TensorView should support negative dimension sizes."); - context->ExpectEqual(tensor.stride(0), std::ptrdiff_t{3}, - "TensorView should compute default row-major strides."); - context->ExpectEqual(tensor.stride(1), std::ptrdiff_t{1}, - "TensorView should compute default innermost stride."); - context->Expect(tensor.IsContiguous(), - "Default TensorView strides should be contiguous."); - - TensorView transposed = tensor.T(); - context->ExpectEqual(transposed.shape(), TensorView::Shape({3, 2}), - "Transposed TensorView should swap shape."); - context->ExpectEqual(transposed.strides(), TensorView::Strides({1, 3}), - "Transposed TensorView should swap strides."); - context->Expect(!transposed.IsContiguous(), - "Transposed TensorView should not be contiguous."); - - TensorView strided{data.data(), std::vector{2, 3}, - DataType::kFloat32, Device{Device::Type::kCpu}, - std::vector{4, 1}}; - context->Expect(!strided.IsContiguous(), - "TensorView with row padding should not be contiguous."); -} - } // namespace int main() { @@ -85,7 +46,6 @@ int main() { TestDevice(&context); TestDataType(&context); - TestTensorView(&context); return context.ExitCode(); } diff --git a/tests/test_tensor_view.cc b/tests/test_tensor_view.cc new file mode 100644 index 0000000..37b2c8d --- /dev/null +++ b/tests/test_tensor_view.cc @@ -0,0 +1,63 @@ +#include + +#include +#include +#include + +#include "test_helper.h" + +namespace { + +using infini::rt::DataType; +using infini::rt::Device; +using infini::rt::TensorView; + +static_assert(!std::is_constructible_v>, + "TensorView should not treat tensor containers as tensor-like."); + +void TestTensorView(infini::rt::test::TestContext* context) { + std::vector data{1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; + TensorView tensor{data.data(), std::vector{2, 3}, + DataType::kFloat32, Device{Device::Type::kCpu}}; + + context->ExpectEqual(tensor.ndim(), std::size_t{2}, + "TensorView should keep its rank."); + context->ExpectEqual(tensor.numel(), std::size_t{6}, + "TensorView should compute element count."); + context->ExpectEqual(tensor.element_size(), std::size_t{4}, + "TensorView should compute element size."); + context->ExpectEqual(tensor.size(0), std::size_t{2}, + "TensorView should expose dimension sizes."); + context->ExpectEqual(tensor.size(-1), std::size_t{3}, + "TensorView should support negative dimension sizes."); + context->ExpectEqual(tensor.stride(0), std::ptrdiff_t{3}, + "TensorView should compute default row-major strides."); + context->ExpectEqual(tensor.stride(1), std::ptrdiff_t{1}, + "TensorView should compute default innermost stride."); + context->Expect(tensor.IsContiguous(), + "Default TensorView strides should be contiguous."); + + TensorView transposed = tensor.T(); + context->ExpectEqual(transposed.shape(), TensorView::Shape({3, 2}), + "Transposed TensorView should swap shape."); + context->ExpectEqual(transposed.strides(), TensorView::Strides({1, 3}), + "Transposed TensorView should swap strides."); + context->Expect(!transposed.IsContiguous(), + "Transposed TensorView should not be contiguous."); + + TensorView strided{data.data(), std::vector{2, 3}, + DataType::kFloat32, Device{Device::Type::kCpu}, + std::vector{4, 1}}; + context->Expect(!strided.IsContiguous(), + "TensorView with row padding should not be contiguous."); +} + +} // namespace + +int main() { + infini::rt::test::TestContext context; + + TestTensorView(&context); + + return context.ExitCode(); +} From 8189784c52f8502c17a74a170ac3270854741e41 Mon Sep 17 00:00:00 2001 From: Jiacheng Huang <45955067+voltjia@users.noreply.github.com> Date: Thu, 2 Jul 2026 16:25:49 +0800 Subject: [PATCH 09/15] Align runtime memcpy kind constants with CUDA API --- scripts/generate_public_headers.py | 33 +++++++++++++++++------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/scripts/generate_public_headers.py b/scripts/generate_public_headers.py index b10e7f4..e929682 100644 --- a/scripts/generate_public_headers.py +++ b/scripts/generate_public_headers.py @@ -161,6 +161,7 @@ def _write_generated_header(include_root, devices): default_device_type = _DEVICE_TYPES[default_device] includes = [ "#include ", + "#include ", f"#include {_detail_include('data_type.h')}", f"#include {_detail_include('device.h')}", f"#include {_detail_include('hash.h')}", @@ -205,18 +206,22 @@ def _write_generated_header(include_root, devices): using Stream = typename generated_detail::DefaultErrorRuntime::Stream; +using MemcpyKind = std::remove_cv_t< + decltype(generated_detail::DefaultErrorRuntime::kMemcpyHostToHost)>; + inline constexpr Error kSuccess = generated_detail::DefaultErrorRuntime::kSuccess; -enum class MemcpyKind {{ - kMemcpyHostToHost = - static_cast(generated_detail::DefaultErrorRuntime::kMemcpyHostToHost), - kMemcpyHostToDevice = - static_cast(generated_detail::DefaultErrorRuntime::kMemcpyHostToDevice), - kMemcpyDeviceToHost = - static_cast(generated_detail::DefaultErrorRuntime::kMemcpyDeviceToHost), - kMemcpyDeviceToDevice = - static_cast(generated_detail::DefaultErrorRuntime::kMemcpyDeviceToDevice), -}}; +inline constexpr MemcpyKind kMemcpyHostToHost = + generated_detail::DefaultErrorRuntime::kMemcpyHostToHost; + +inline constexpr MemcpyKind kMemcpyHostToDevice = + generated_detail::DefaultErrorRuntime::kMemcpyHostToDevice; + +inline constexpr MemcpyKind kMemcpyDeviceToHost = + generated_detail::DefaultErrorRuntime::kMemcpyDeviceToHost; + +inline constexpr MemcpyKind kMemcpyDeviceToDevice = + generated_detail::DefaultErrorRuntime::kMemcpyDeviceToDevice; {runtime_declarations} @@ -412,13 +417,13 @@ def _write_runtime_dispatch(source_path, devices): using DeviceRuntime = Runtime; switch (kind) {{ - case MemcpyKind::kMemcpyHostToHost: + case kMemcpyHostToHost: return DeviceRuntime::kMemcpyHostToHost; - case MemcpyKind::kMemcpyHostToDevice: + case kMemcpyHostToDevice: return DeviceRuntime::kMemcpyHostToDevice; - case MemcpyKind::kMemcpyDeviceToHost: + case kMemcpyDeviceToHost: return DeviceRuntime::kMemcpyDeviceToHost; - case MemcpyKind::kMemcpyDeviceToDevice: + case kMemcpyDeviceToDevice: return DeviceRuntime::kMemcpyDeviceToDevice; }} From c35cd9f18e5f31ccbfd5713099f3a3e23ea31561 Mon Sep 17 00:00:00 2001 From: Jiacheng Huang <45955067+voltjia@users.noreply.github.com> Date: Thu, 2 Jul 2026 16:26:40 +0800 Subject: [PATCH 10/15] Use CUDA-style runtime memcpy constants --- tests/test_runtime_dispatch.cc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test_runtime_dispatch.cc b/tests/test_runtime_dispatch.cc index e48a276..347d6e8 100644 --- a/tests/test_runtime_dispatch.cc +++ b/tests/test_runtime_dispatch.cc @@ -48,15 +48,15 @@ void TestCpuDispatch(infini::rt::test::TestContext* context) { ExpectSuccess(context, runtime::Memcpy(ptr, input.data(), input.size(), - runtime::MemcpyKind::kMemcpyHostToDevice), + runtime::kMemcpyHostToDevice), "CPU dispatch should copy host data to runtime memory."); context->Expect(runtime::MemcpyAsync(ptr, input.data(), input.size(), - runtime::MemcpyKind::kMemcpyHostToDevice, + runtime::kMemcpyHostToDevice, nullptr) != runtime::kSuccess, "CPU dispatch should not report async memcpy success."); ExpectSuccess(context, runtime::Memcpy(output.data(), ptr, output.size(), - runtime::MemcpyKind::kMemcpyDeviceToHost), + runtime::kMemcpyDeviceToHost), "CPU dispatch should copy runtime memory to host."); context->ExpectEqual(output, input, @@ -66,7 +66,7 @@ void TestCpuDispatch(infini::rt::test::TestContext* context) { "CPU dispatch should fill runtime memory."); ExpectSuccess(context, runtime::Memcpy(output.data(), ptr, output.size(), - runtime::MemcpyKind::kMemcpyDeviceToHost), + runtime::kMemcpyDeviceToHost), "CPU dispatch should copy filled memory to host."); for (const auto value : output) { context->ExpectEqual(value, static_cast(0x5A), @@ -112,13 +112,13 @@ void TestNvidiaDispatch(infini::rt::test::TestContext* context) { ExpectSuccess( context, runtime::MemcpyAsync(ptr, input.data(), input.size(), - runtime::MemcpyKind::kMemcpyHostToDevice, nullptr), + runtime::kMemcpyHostToDevice, nullptr), "NVIDIA dispatch should support async host-to-device copy."); ExpectSuccess(context, runtime::DeviceSynchronize(), "NVIDIA dispatch should synchronize the device."); ExpectSuccess(context, runtime::Memcpy(output.data(), ptr, output.size(), - runtime::MemcpyKind::kMemcpyDeviceToHost), + runtime::kMemcpyDeviceToHost), "NVIDIA dispatch should copy device data to host."); context->ExpectEqual(output, input, @@ -130,7 +130,7 @@ void TestNvidiaDispatch(infini::rt::test::TestContext* context) { "NVIDIA dispatch should synchronize filled memory."); ExpectSuccess(context, runtime::Memcpy(output.data(), ptr, output.size(), - runtime::MemcpyKind::kMemcpyDeviceToHost), + runtime::kMemcpyDeviceToHost), "NVIDIA dispatch should copy filled memory to host."); for (const auto value : output) { context->ExpectEqual(value, static_cast(0x5A), From 7b5b7e34293f9f728692f5585d82bde411add2aa Mon Sep 17 00:00:00 2001 From: Jiacheng Huang <45955067+voltjia@users.noreply.github.com> Date: Thu, 2 Jul 2026 16:27:02 +0800 Subject: [PATCH 11/15] Use CUDA-style runtime memcpy constants --- tests/install_consumer_smoke.cc | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tests/install_consumer_smoke.cc b/tests/install_consumer_smoke.cc index 14e6c0d..0684763 100644 --- a/tests/install_consumer_smoke.cc +++ b/tests/install_consumer_smoke.cc @@ -59,19 +59,18 @@ int main() { return 1; } if (runtime::Memcpy(ptr, input.data(), input.size(), - runtime::MemcpyKind::kMemcpyHostToDevice) != - runtime::kSuccess) { + runtime::kMemcpyHostToDevice) != runtime::kSuccess) { return 1; } #if defined(INFINI_RT_CONSUMER_BACKEND_CPU) if (runtime::MemcpyAsync(ptr, input.data(), input.size(), - runtime::MemcpyKind::kMemcpyHostToDevice, + runtime::kMemcpyHostToDevice, nullptr) == runtime::kSuccess) { return 1; } #else if (runtime::MemcpyAsync(ptr, input.data(), input.size(), - runtime::MemcpyKind::kMemcpyHostToDevice, + runtime::kMemcpyHostToDevice, nullptr) != runtime::kSuccess) { return 1; } @@ -80,8 +79,7 @@ int main() { return 1; } if (runtime::Memcpy(output.data(), ptr, output.size(), - runtime::MemcpyKind::kMemcpyDeviceToHost) != - runtime::kSuccess) { + runtime::kMemcpyDeviceToHost) != runtime::kSuccess) { return 1; } if (output != input) { From 58e0c854d1c88af4937e04ec38c15f29c6c41570 Mon Sep 17 00:00:00 2001 From: Jiacheng Huang <45955067+voltjia@users.noreply.github.com> Date: Thu, 2 Jul 2026 16:36:09 +0800 Subject: [PATCH 12/15] Move TensorView tests back into core test --- tests/test_core.cc | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/tests/test_core.cc b/tests/test_core.cc index 20be207..273ecef 100644 --- a/tests/test_core.cc +++ b/tests/test_core.cc @@ -1,7 +1,10 @@ #include +#include #include #include +#include +#include #include "test_helper.h" @@ -9,6 +12,10 @@ namespace { using infini::rt::DataType; using infini::rt::Device; +using infini::rt::TensorView; + +static_assert(!std::is_constructible_v>, + "TensorView should not treat tensor containers as tensor-like."); void TestDevice(infini::rt::test::TestContext* context) { const Device cpu{Device::Type::kCpu}; @@ -39,6 +46,43 @@ void TestDataType(infini::rt::test::TestContext* context) { DataType::kUInt16, "uint16 should parse by name."); } +void TestTensorView(infini::rt::test::TestContext* context) { + std::vector data{1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; + TensorView tensor{data.data(), std::vector{2, 3}, + DataType::kFloat32, Device{Device::Type::kCpu}}; + + context->ExpectEqual(tensor.ndim(), std::size_t{2}, + "TensorView should keep its rank."); + context->ExpectEqual(tensor.numel(), std::size_t{6}, + "TensorView should compute element count."); + context->ExpectEqual(tensor.element_size(), std::size_t{4}, + "TensorView should compute element size."); + context->ExpectEqual(tensor.size(0), std::size_t{2}, + "TensorView should expose dimension sizes."); + context->ExpectEqual(tensor.size(-1), std::size_t{3}, + "TensorView should support negative dimension sizes."); + context->ExpectEqual(tensor.stride(0), std::ptrdiff_t{3}, + "TensorView should compute default row-major strides."); + context->ExpectEqual(tensor.stride(1), std::ptrdiff_t{1}, + "TensorView should compute default innermost stride."); + context->Expect(tensor.IsContiguous(), + "Default TensorView strides should be contiguous."); + + TensorView transposed = tensor.T(); + context->ExpectEqual(transposed.shape(), TensorView::Shape({3, 2}), + "Transposed TensorView should swap shape."); + context->ExpectEqual(transposed.strides(), TensorView::Strides({1, 3}), + "Transposed TensorView should swap strides."); + context->Expect(!transposed.IsContiguous(), + "Transposed TensorView should not be contiguous."); + + TensorView strided{data.data(), std::vector{2, 3}, + DataType::kFloat32, Device{Device::Type::kCpu}, + std::vector{4, 1}}; + context->Expect(!strided.IsContiguous(), + "TensorView with row padding should not be contiguous."); +} + } // namespace int main() { @@ -46,6 +90,7 @@ int main() { TestDevice(&context); TestDataType(&context); + TestTensorView(&context); return context.ExitCode(); } From 4cdda6d183a3d024da15c8b6e02c2151f1709b42 Mon Sep 17 00:00:00 2001 From: Jiacheng Huang <45955067+voltjia@users.noreply.github.com> Date: Thu, 2 Jul 2026 16:37:09 +0800 Subject: [PATCH 13/15] Remove standalone TensorView test target --- tests/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 672d3b6..f24954c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -6,7 +6,6 @@ endfunction() add_infini_rt_test(test_smoke test_smoke.cc) add_infini_rt_test(test_core test_core.cc) -add_infini_rt_test(test_tensor_view test_tensor_view.cc) if(WITH_CPU OR WITH_NVIDIA) add_infini_rt_test(test_runtime_dispatch test_runtime_dispatch.cc) From a0679a71fa35f329480a6d9c3be3ab7944058c0b Mon Sep 17 00:00:00 2001 From: Jiacheng Huang <45955067+voltjia@users.noreply.github.com> Date: Thu, 2 Jul 2026 16:37:40 +0800 Subject: [PATCH 14/15] Remove standalone TensorView test file --- tests/test_tensor_view.cc | 63 --------------------------------------- 1 file changed, 63 deletions(-) delete mode 100644 tests/test_tensor_view.cc diff --git a/tests/test_tensor_view.cc b/tests/test_tensor_view.cc deleted file mode 100644 index 37b2c8d..0000000 --- a/tests/test_tensor_view.cc +++ /dev/null @@ -1,63 +0,0 @@ -#include - -#include -#include -#include - -#include "test_helper.h" - -namespace { - -using infini::rt::DataType; -using infini::rt::Device; -using infini::rt::TensorView; - -static_assert(!std::is_constructible_v>, - "TensorView should not treat tensor containers as tensor-like."); - -void TestTensorView(infini::rt::test::TestContext* context) { - std::vector data{1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; - TensorView tensor{data.data(), std::vector{2, 3}, - DataType::kFloat32, Device{Device::Type::kCpu}}; - - context->ExpectEqual(tensor.ndim(), std::size_t{2}, - "TensorView should keep its rank."); - context->ExpectEqual(tensor.numel(), std::size_t{6}, - "TensorView should compute element count."); - context->ExpectEqual(tensor.element_size(), std::size_t{4}, - "TensorView should compute element size."); - context->ExpectEqual(tensor.size(0), std::size_t{2}, - "TensorView should expose dimension sizes."); - context->ExpectEqual(tensor.size(-1), std::size_t{3}, - "TensorView should support negative dimension sizes."); - context->ExpectEqual(tensor.stride(0), std::ptrdiff_t{3}, - "TensorView should compute default row-major strides."); - context->ExpectEqual(tensor.stride(1), std::ptrdiff_t{1}, - "TensorView should compute default innermost stride."); - context->Expect(tensor.IsContiguous(), - "Default TensorView strides should be contiguous."); - - TensorView transposed = tensor.T(); - context->ExpectEqual(transposed.shape(), TensorView::Shape({3, 2}), - "Transposed TensorView should swap shape."); - context->ExpectEqual(transposed.strides(), TensorView::Strides({1, 3}), - "Transposed TensorView should swap strides."); - context->Expect(!transposed.IsContiguous(), - "Transposed TensorView should not be contiguous."); - - TensorView strided{data.data(), std::vector{2, 3}, - DataType::kFloat32, Device{Device::Type::kCpu}, - std::vector{4, 1}}; - context->Expect(!strided.IsContiguous(), - "TensorView with row padding should not be contiguous."); -} - -} // namespace - -int main() { - infini::rt::test::TestContext context; - - TestTensorView(&context); - - return context.ExitCode(); -} From 04c13489456aa62c2734ced4353d8237d961785b Mon Sep 17 00:00:00 2001 From: Jiacheng Huang <45955067+voltjia@users.noreply.github.com> Date: Thu, 2 Jul 2026 16:38:06 +0800 Subject: [PATCH 15/15] Use fully qualified runtime API names in README --- README.md | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 51099fc..5d41768 100644 --- a/README.md +++ b/README.md @@ -66,16 +66,14 @@ cmake --install build #include int main() { - namespace runtime = infini::rt::runtime; - - runtime::SetDevice(0); + infini::rt::runtime::SetDevice(0); constexpr std::size_t size = 1024; void* ptr = nullptr; - runtime::Malloc(&ptr, size); - runtime::Memset(ptr, 0, size); - runtime::Free(ptr); + infini::rt::runtime::Malloc(&ptr, size); + infini::rt::runtime::Memset(ptr, 0, size); + infini::rt::runtime::Free(ptr); return 0; } @@ -88,18 +86,16 @@ those runtime calls. A GPU backend is selected initially when one is enabled; otherwise CPU is selected. ```cpp -namespace runtime = infini::rt::runtime; - constexpr std::size_t size = 1024; void* ptr = nullptr; infini::rt::set_runtime_device_type(infini::rt::Device::Type::kCpu); -runtime::Malloc(&ptr, size); -runtime::Free(ptr); +infini::rt::runtime::Malloc(&ptr, size); +infini::rt::runtime::Free(ptr); infini::rt::set_runtime_device_type(infini::rt::Device::Type::kNvidia); -runtime::Malloc(&ptr, size); -runtime::Free(ptr); +infini::rt::runtime::Malloc(&ptr, size); +infini::rt::runtime::Free(ptr); ``` Use `infini::rt::runtime::Runtime` when CPU