From a10f9527be813c43623e12bc8d4ac38c03b51397 Mon Sep 17 00:00:00 2001 From: gongchensu Date: Tue, 23 Jun 2026 08:18:58 +0000 Subject: [PATCH 1/5] feat: add graph runtime api --- .gitignore | 1 + scripts/generate_public_headers.py | 110 +++++++++++++++++++++- src/native/cuda/iluvatar/runtime_.h | 39 +++++++- src/native/cuda/metax/runtime_.h | 27 ++++++ src/native/cuda/moore/runtime_.h | 26 ++++++ src/native/cuda/nvidia/runtime_.h | 79 +++++++++++++++- tests/CMakeLists.txt | 5 + tests/test_nvidia_graph.cc | 140 ++++++++++++++++++++++++++++ 8 files changed, 421 insertions(+), 6 deletions(-) create mode 100644 tests/test_nvidia_graph.cc diff --git a/.gitignore b/.gitignore index 99bca7d..48539b4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ # Generated files build/ +build-*/ generated/ # Prerequisites diff --git a/scripts/generate_public_headers.py b/scripts/generate_public_headers.py index 6620b42..3363741 100644 --- a/scripts/generate_public_headers.py +++ b/scripts/generate_public_headers.py @@ -156,9 +156,31 @@ def _write_detail_headers(include_root, source_root, devices): _write_detail_header(include_root, source_root, relative_path) -def _write_generated_header(include_root, devices): +def _write_generated_header(include_root, source_root, devices): default_device = _default_device(devices) default_device_type = _DEVICE_TYPES[default_device] + public_runtime_functions = _public_runtime_functions_for_devices( + devices, source_root + ) + has_graph_api = any( + function.name in {"StreamBeginCapture", "GraphLaunch"} + for function in public_runtime_functions + ) + graph_declarations = ( + """ +using Graph = void*; + +using GraphExec = void*; + +enum class StreamCaptureMode { + kStreamCaptureModeGlobal = 0, + kStreamCaptureModeThreadLocal = 1, + kStreamCaptureModeRelaxed = 2, +}; +""" + if has_graph_api + else "" + ) includes = [ "#include ", "#include ", @@ -177,7 +199,7 @@ def _write_generated_header(include_root, devices): includes.append(f"#include ") runtime_declarations = "\n\n".join( - f"{function.signature()};" for function in _PUBLIC_RUNTIME_FUNCTIONS + f"{function.signature()};" for function in public_runtime_functions ) path = include_root / "infini" / "rt" / "generated.h" @@ -209,6 +231,7 @@ def _write_generated_header(include_root, devices): using Event = void*; +{graph_declarations} using MemcpyKind = std::remove_cv_t< decltype(generated_detail::DefaultErrorRuntime::kMemcpyHostToHost)>; @@ -366,6 +389,32 @@ def params_decl(self): _Param("Event", "end"), ), ), + _Function( + "Error", + "StreamBeginCapture", + (_Param("Stream", "stream"), _Param("StreamCaptureMode", "mode")), + ), + _Function( + "Error", + "StreamEndCapture", + (_Param("Stream", "stream"), _Param("Graph*", "graph")), + ), + _Function("Error", "GraphDestroy", (_Param("Graph", "graph"),)), + _Function( + "Error", + "GraphInstantiate", + (_Param("GraphExec*", "graph_exec"), _Param("Graph", "graph")), + ), + _Function( + "Error", + "GraphExecDestroy", + (_Param("GraphExec", "graph_exec"),), + ), + _Function( + "Error", + "GraphLaunch", + (_Param("GraphExec", "graph_exec"), _Param("Stream", "stream")), + ), ) @@ -395,6 +444,24 @@ def _runtime_arg(param, device): return ( f"reinterpret_cast::Event*>({param.name})" ) + if param.type == "Graph": + return f"reinterpret_cast::Graph>({param.name})" + if param.type == "Graph*": + return ( + f"reinterpret_cast::Graph*>({param.name})" + ) + if param.type == "GraphExec": + return ( + f"reinterpret_cast::GraphExec>" + f"({param.name})" + ) + if param.type == "GraphExec*": + return ( + f"reinterpret_cast::GraphExec*>" + f"({param.name})" + ) + if param.type == "StreamCaptureMode": + return f"RuntimeStreamCaptureMode<{device_type}>({param.name})" return param.name @@ -452,8 +519,42 @@ def _devices_for_function(function, devices, source_root): ) +def _public_runtime_functions_for_devices(devices, source_root): + return tuple( + function + for function in _PUBLIC_RUNTIME_FUNCTIONS + if _devices_for_function(function, devices, source_root) + ) + + def _write_runtime_dispatch(source_path, source_root, devices): - functions = _PUBLIC_RUNTIME_FUNCTIONS + functions = _public_runtime_functions_for_devices(devices, source_root) + stream_capture_mode_helper = ( + """ +template +auto RuntimeStreamCaptureMode(StreamCaptureMode mode) { + using DeviceRuntime = Runtime; + + switch (mode) { + case StreamCaptureMode::kStreamCaptureModeGlobal: + return DeviceRuntime::kStreamCaptureModeGlobal; + case StreamCaptureMode::kStreamCaptureModeThreadLocal: + return DeviceRuntime::kStreamCaptureModeThreadLocal; + case StreamCaptureMode::kStreamCaptureModeRelaxed: + return DeviceRuntime::kStreamCaptureModeRelaxed; + } + + assert(false && "unsupported stream capture mode"); + return DeviceRuntime::kStreamCaptureModeRelaxed; +} +""" + if any( + param.type == "StreamCaptureMode" + for function in functions + for param in function.params + ) + else "" + ) dispatch_functions = "\n".join( _write_runtime_dispatch_function( function, @@ -535,6 +636,7 @@ def _write_runtime_dispatch(source_path, source_root, devices): return DeviceRuntime::kMemcpyHostToHost; }} +{stream_capture_mode_helper} }} // namespace {dispatch_functions} @@ -566,7 +668,7 @@ def main(): for wrapper_device, header_name, target in _DEVICE_HEADERS[device]: _write_wrapper(include_root, wrapper_device, header_name, target) - _write_generated_header(include_root, devices) + _write_generated_header(include_root, source_root, devices) _write_runtime_dispatch(pathlib.Path(args.source_output), source_root, devices) diff --git a/src/native/cuda/iluvatar/runtime_.h b/src/native/cuda/iluvatar/runtime_.h index 9425559..09feb65 100644 --- a/src/native/cuda/iluvatar/runtime_.h +++ b/src/native/cuda/iluvatar/runtime_.h @@ -19,6 +19,10 @@ struct Runtime using Stream = cudaStream_t; + using Graph = cudaGraph_t; + + using GraphExec = cudaGraphExec_t; + using Event = cudaEvent_t; static constexpr Device::Type kDeviceType = Device::Type::kIluvatar; @@ -78,7 +82,8 @@ struct Runtime }; static constexpr auto StreamCreate = [](auto&&... args) { - return cudaStreamCreate(std::forward(args)...); + return cudaStreamCreateWithFlags(std::forward(args)..., + cudaStreamNonBlocking); }; static constexpr auto StreamDestroy = [](auto&&... args) { @@ -120,6 +125,38 @@ struct Runtime static constexpr auto EventElapsedTime = [](auto&&... args) { return cudaEventElapsedTime(std::forward(args)...); }; + + static constexpr auto kStreamCaptureModeGlobal = cudaStreamCaptureModeGlobal; + + static constexpr auto kStreamCaptureModeThreadLocal = + cudaStreamCaptureModeThreadLocal; + + static constexpr auto kStreamCaptureModeRelaxed = + cudaStreamCaptureModeRelaxed; + + static constexpr auto StreamBeginCapture = [](auto&&... args) { + return cudaStreamBeginCapture(std::forward(args)...); + }; + + static constexpr auto StreamEndCapture = [](auto&&... args) { + return cudaStreamEndCapture(std::forward(args)...); + }; + + static constexpr auto GraphDestroy = [](auto&&... args) { + return cudaGraphDestroy(std::forward(args)...); + }; + + static constexpr auto GraphInstantiate = [](auto&&... args) { + return cudaGraphInstantiate(std::forward(args)...); + }; + + static constexpr auto GraphExecDestroy = [](auto&&... args) { + return cudaGraphExecDestroy(std::forward(args)...); + }; + + static constexpr auto GraphLaunch = [](auto&&... args) { + return cudaGraphLaunch(std::forward(args)...); + }; }; static_assert(Runtime::Validate()); diff --git a/src/native/cuda/metax/runtime_.h b/src/native/cuda/metax/runtime_.h index 462f5f1..201f9b0 100644 --- a/src/native/cuda/metax/runtime_.h +++ b/src/native/cuda/metax/runtime_.h @@ -3,6 +3,7 @@ #include +#include #include #include "native/cuda/metax/device_.h" @@ -17,6 +18,10 @@ struct Runtime using Stream = mcStream_t; + using Graph = void*; + + using GraphExec = void*; + using Event = mcEvent_t; static constexpr Device::Type kDeviceType = Device::Type::kMetax; @@ -124,6 +129,28 @@ struct Runtime static constexpr auto EventElapsedTime = [](auto&&... args) { return mcEventElapsedTime(std::forward(args)...); }; + + static constexpr int kStreamCaptureModeGlobal = 0; + + static constexpr int kStreamCaptureModeThreadLocal = 1; + + static constexpr int kStreamCaptureModeRelaxed = 2; + + static Error StreamBeginCapture(Stream, int) { return static_cast(1); } + + static Error StreamEndCapture(Stream, Graph*) { + return static_cast(1); + } + + static Error GraphDestroy(Graph) { return static_cast(1); } + + static Error GraphInstantiate(GraphExec*, Graph) { + return static_cast(1); + } + + static Error GraphExecDestroy(GraphExec) { return static_cast(1); } + + static Error GraphLaunch(GraphExec, Stream) { return static_cast(1); } }; static_assert(Runtime::Validate()); diff --git a/src/native/cuda/moore/runtime_.h b/src/native/cuda/moore/runtime_.h index 81ccd15..37eab28 100644 --- a/src/native/cuda/moore/runtime_.h +++ b/src/native/cuda/moore/runtime_.h @@ -18,6 +18,10 @@ struct Runtime using Stream = musaStream_t; + using Graph = void*; + + using GraphExec = void*; + using Event = musaEvent_t; static constexpr Device::Type kDeviceType = Device::Type::kMoore; @@ -131,6 +135,28 @@ struct Runtime static constexpr auto EventElapsedTime = [](auto&&... args) { return musaEventElapsedTime(std::forward(args)...); }; + + static constexpr int kStreamCaptureModeGlobal = 0; + + static constexpr int kStreamCaptureModeThreadLocal = 1; + + static constexpr int kStreamCaptureModeRelaxed = 2; + + static Error StreamBeginCapture(Stream, int) { return static_cast(1); } + + static Error StreamEndCapture(Stream, Graph*) { + return static_cast(1); + } + + static Error GraphDestroy(Graph) { return static_cast(1); } + + static Error GraphInstantiate(GraphExec*, Graph) { + return static_cast(1); + } + + static Error GraphExecDestroy(GraphExec) { return static_cast(1); } + + static Error GraphLaunch(GraphExec, Stream) { return static_cast(1); } }; static_assert(Runtime::Validate()); diff --git a/src/native/cuda/nvidia/runtime_.h b/src/native/cuda/nvidia/runtime_.h index 840f0bd..c9d2649 100644 --- a/src/native/cuda/nvidia/runtime_.h +++ b/src/native/cuda/nvidia/runtime_.h @@ -19,6 +19,10 @@ struct Runtime using Stream = cudaStream_t; + using Graph = cudaGraph_t; + + using GraphExec = cudaGraphExec_t; + using Event = cudaEvent_t; static constexpr Device::Type kDeviceType = Device::Type::kNvidia; @@ -78,7 +82,8 @@ struct Runtime }; static constexpr auto StreamCreate = [](auto&&... args) { - return cudaStreamCreate(std::forward(args)...); + return cudaStreamCreateWithFlags(std::forward(args)..., + cudaStreamNonBlocking); }; static constexpr auto StreamDestroy = [](auto&&... args) { @@ -120,6 +125,78 @@ struct Runtime static constexpr auto EventElapsedTime = [](auto&&... args) { return cudaEventElapsedTime(std::forward(args)...); }; + + static constexpr auto kStreamCaptureModeGlobal = cudaStreamCaptureModeGlobal; + + static constexpr auto kStreamCaptureModeThreadLocal = + cudaStreamCaptureModeThreadLocal; + + static constexpr auto kStreamCaptureModeRelaxed = + cudaStreamCaptureModeRelaxed; + + static constexpr auto StreamBeginCapture = [](auto&&... args) { + return cudaStreamBeginCapture(std::forward(args)...); + }; + + static constexpr auto StreamEndCapture = [](auto&&... args) { + return cudaStreamEndCapture(std::forward(args)...); + }; + + static constexpr auto GraphDestroy = [](auto&&... args) { + return cudaGraphDestroy(std::forward(args)...); + }; + + static constexpr auto GraphInstantiate = [](auto&&... args) { + return cudaGraphInstantiate(std::forward(args)...); + }; + + static constexpr auto GraphExecDestroy = [](auto&&... args) { + return cudaGraphExecDestroy(std::forward(args)...); + }; + + static constexpr auto GraphLaunch = [](auto&&... args) { + return cudaGraphLaunch(std::forward(args)...); + }; + + static constexpr bool Validate() { + CudaRuntime>::Validate(); + static_assert(sizeof(Graph) > 0, + "`Runtime` must define a `Graph` type alias."); + static_assert(sizeof(GraphExec) > 0, + "`Runtime` must define a `GraphExec` type alias."); + static_assert(std::is_invocable_v, + "`Runtime::StreamCreate` must be callable with `(Stream*)`."); + static_assert(std::is_invocable_v, + "`Runtime::StreamDestroy` must be callable with `(Stream)`."); + static_assert( + std::is_invocable_v, + "`Runtime::StreamSynchronize` must be callable with `(Stream)`."); + static_assert(std::is_invocable_v, + "`Runtime::MemcpyAsync` must be callable with " + "`(void*, const void*, size_t, cudaMemcpyKind, Stream)`."); + static_assert(std::is_invocable_v, + "`Runtime::StreamBeginCapture` must be callable with " + "`(Stream, cudaStreamCaptureMode)`."); + static_assert( + std::is_invocable_v, + "`Runtime::StreamEndCapture` must be callable with " + "`(Stream, Graph*)`."); + static_assert(std::is_invocable_v, + "`Runtime::GraphDestroy` must be callable with `(Graph)`."); + static_assert( + std::is_invocable_v, + "`Runtime::GraphInstantiate` must be callable with " + "`(GraphExec*, Graph)`."); + static_assert( + std::is_invocable_v, + "`Runtime::GraphExecDestroy` must be callable with `(GraphExec)`."); + static_assert( + std::is_invocable_v, + "`Runtime::GraphLaunch` must be callable with `(GraphExec, Stream)`."); + return true; + } }; static_assert(Runtime::Validate()); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 2fcf5b4..b801026 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -131,6 +131,11 @@ if(INFINI_RT_TEST_HAS_RUNTIME_BACKEND) endif() endif() +if(WITH_NVIDIA) + add_infini_rt_test(test_nvidia_graph test_nvidia_graph.cc) + add_infini_rt_test(test_nvidia_graph_c_api test_nvidia_graph_c_api.cc) +endif() + set(INFINI_RT_TEST_INSTALL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/install_consumer_prefix") set(INFINI_RT_TEST_CONSUMER_BINARY diff --git a/tests/test_nvidia_graph.cc b/tests/test_nvidia_graph.cc new file mode 100644 index 0000000..291c46b --- /dev/null +++ b/tests/test_nvidia_graph.cc @@ -0,0 +1,140 @@ +#include + +#include +#include +#include + +#include "test_helper.h" + +namespace { + +namespace runtime = infini::rt::runtime; + +void ExpectSuccess(infini::rt::test::TestContext* context, + runtime::Error status, const char* message) { + context->Expect(status == runtime::kSuccess, message); +} + +void FillPattern(std::array* input, std::uint8_t salt) { + for (std::size_t i = 0; i < input->size(); ++i) { + (*input)[i] = static_cast(i * 13 + salt); + } +} + +bool CopyDeviceToHostAndValidate(infini::rt::test::TestContext* context, + void* device_ptr, + const std::array& expected, + std::string_view message) { + std::array output{}; + runtime::Memcpy(output.data(), device_ptr, output.size(), + runtime::kMemcpyDeviceToHost); + return context->ExpectEqual(output, expected, message); +} + +} // namespace + +int main() { + infini::rt::test::TestContext context; + + infini::rt::set_runtime_device_type(infini::rt::Device::Type::kNvidia); + ExpectSuccess(&context, runtime::SetDevice(0), + "Failed to set NVIDIA runtime device."); + + void* src = nullptr; + void* dst = nullptr; + runtime::Stream stream = nullptr; + runtime::Graph graph = nullptr; + runtime::GraphExec graph_exec = nullptr; + + std::array capture_input{}; + FillPattern(&capture_input, 7); + + ExpectSuccess(&context, runtime::Malloc(&src, capture_input.size()), + "Failed to allocate source buffer."); + ExpectSuccess(&context, runtime::Malloc(&dst, capture_input.size()), + "Failed to allocate destination buffer."); + ExpectSuccess(&context, runtime::StreamCreate(&stream), + "Failed to create stream."); + + ExpectSuccess(&context, + runtime::Memcpy(src, capture_input.data(), capture_input.size(), + runtime::kMemcpyHostToDevice), + "Failed to initialize source buffer."); + ExpectSuccess(&context, runtime::Memset(dst, 0, capture_input.size()), + "Failed to initialize destination buffer."); + + ExpectSuccess( + &context, + runtime::StreamBeginCapture( + stream, runtime::StreamCaptureMode::kStreamCaptureModeRelaxed), + "Failed to begin stream capture."); + ExpectSuccess(&context, + runtime::MemcpyAsync(dst, src, capture_input.size(), + runtime::kMemcpyDeviceToDevice, stream), + "Failed to record device-to-device copy."); + ExpectSuccess(&context, runtime::StreamEndCapture(stream, &graph), + "Failed to end stream capture."); + + ExpectSuccess(&context, runtime::GraphInstantiate(&graph_exec, graph), + "Failed to instantiate graph."); + + std::array replay_input_1{}; + std::array replay_input_2{}; + FillPattern(&replay_input_1, 31); + FillPattern(&replay_input_2, 53); + + ExpectSuccess( + &context, + runtime::Memcpy(src, replay_input_1.data(), replay_input_1.size(), + runtime::kMemcpyHostToDevice), + "Failed to refresh first source buffer."); + ExpectSuccess(&context, runtime::Memset(dst, 0, replay_input_1.size()), + "Failed to clear first destination buffer."); + ExpectSuccess(&context, runtime::DeviceSynchronize(), + "Failed to synchronize first replay inputs."); + ExpectSuccess(&context, runtime::GraphLaunch(graph_exec, stream), + "Failed to launch first graph replay."); + ExpectSuccess(&context, runtime::StreamSynchronize(stream), + "Failed to synchronize first graph replay."); + CopyDeviceToHostAndValidate(&context, dst, replay_input_1, + "First graph replay should copy D2D data."); + + ExpectSuccess( + &context, + runtime::Memcpy(src, replay_input_2.data(), replay_input_2.size(), + runtime::kMemcpyHostToDevice), + "Failed to refresh second source buffer."); + ExpectSuccess(&context, runtime::Memset(dst, 0, replay_input_2.size()), + "Failed to clear second destination buffer."); + ExpectSuccess(&context, runtime::DeviceSynchronize(), + "Failed to synchronize second replay inputs."); + ExpectSuccess(&context, runtime::GraphLaunch(graph_exec, stream), + "Failed to launch second graph replay."); + ExpectSuccess(&context, runtime::StreamSynchronize(stream), + "Failed to synchronize second graph replay."); + CopyDeviceToHostAndValidate(&context, dst, replay_input_2, + "Second graph replay should copy D2D data."); + + if (graph_exec != nullptr) { + ExpectSuccess(&context, runtime::GraphExecDestroy(graph_exec), + "Failed to destroy graph exec."); + } + if (graph != nullptr) { + ExpectSuccess(&context, runtime::GraphDestroy(graph), + "Failed to destroy graph."); + } + if (stream != nullptr) { + ExpectSuccess(&context, runtime::StreamDestroy(stream), + "Failed to destroy stream."); + } + if (dst != nullptr) { + ExpectSuccess(&context, runtime::Free(dst), + "Failed to free destination buffer."); + } + if (src != nullptr) { + ExpectSuccess(&context, runtime::Free(src), + "Failed to free source buffer."); + } + + return context.ExitCode(); +} From 2cb2cd23a76cf0625e3edd964e24b72707829175 Mon Sep 17 00:00:00 2001 From: gongchensu Date: Wed, 24 Jun 2026 09:06:10 +0000 Subject: [PATCH 2/5] feat: add graph c api --- include/infini/rt.h | 1 + include/infini/rt/c_api.h | 78 ++++++++ src/c_api.cc | 306 +++++++++++++++++++++++++++++++ tests/test_nvidia_graph_c_api.cc | 133 ++++++++++++++ 4 files changed, 518 insertions(+) create mode 100644 include/infini/rt/c_api.h create mode 100644 src/c_api.cc create mode 100644 tests/test_nvidia_graph_c_api.cc diff --git a/include/infini/rt.h b/include/infini/rt.h index 3adb6eb..e687976 100644 --- a/include/infini/rt.h +++ b/include/infini/rt.h @@ -1,6 +1,7 @@ #ifndef INFINI_RT_PUBLIC_H_ #define INFINI_RT_PUBLIC_H_ +#include #include #endif diff --git a/include/infini/rt/c_api.h b/include/infini/rt/c_api.h new file mode 100644 index 0000000..c5ad65f --- /dev/null +++ b/include/infini/rt/c_api.h @@ -0,0 +1,78 @@ +#ifndef INFINI_RT_C_API_H_ +#define INFINI_RT_C_API_H_ + +#if defined(_WIN32) +#define INFINI_RT_EXPORT __declspec(dllexport) +#elif defined(__GNUC__) && \ + ((__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3)) +#define INFINI_RT_EXPORT __attribute__((visibility("default"))) +#else +#define INFINI_RT_EXPORT +#endif + +#ifdef __cplusplus +#define INFINI_RT_EXTERN_C extern "C" +#else +#define INFINI_RT_EXTERN_C +#endif + +typedef enum { + INFINI_RT_STATUS_SUCCESS = 0, + INFINI_RT_STATUS_INVALID_ARGUMENT = 1, + INFINI_RT_STATUS_UNSUPPORTED_DEVICE = 2, + INFINI_RT_STATUS_RUNTIME_ERROR = 3, +} infiniRtStatus_t; + +typedef enum { + INFINI_RT_DEVICE_CPU = 0, + INFINI_RT_DEVICE_NVIDIA = 1, + INFINI_RT_DEVICE_CAMBRICON = 2, + INFINI_RT_DEVICE_ASCEND = 3, + INFINI_RT_DEVICE_METAX = 4, + INFINI_RT_DEVICE_MOORE = 5, + INFINI_RT_DEVICE_ILUVATAR = 6, + INFINI_RT_DEVICE_KUNLUN = 7, + INFINI_RT_DEVICE_HYGON = 8, + INFINI_RT_DEVICE_QY = 9, +} infiniRtDeviceType_t; + +typedef struct { + infiniRtDeviceType_t type; + int index; +} infiniRtDevice_t; + +typedef enum { + INFINI_RT_STREAM_CAPTURE_MODE_GLOBAL = 0, + INFINI_RT_STREAM_CAPTURE_MODE_THREAD_LOCAL = 1, + INFINI_RT_STREAM_CAPTURE_MODE_RELAXED = 2, +} infiniRtStreamCaptureMode_t; + +typedef void* infiniRtStream_t; +typedef void* infiniRtGraph_t; +typedef void* infiniRtGraphExec_t; + +INFINI_RT_EXTERN_C INFINI_RT_EXPORT infiniRtStatus_t infiniRtStreamWrap( + infiniRtDevice_t device, void* native_stream, infiniRtStream_t* stream); + +INFINI_RT_EXTERN_C INFINI_RT_EXPORT infiniRtStatus_t +infiniRtStreamDestroy(infiniRtStream_t stream); + +INFINI_RT_EXTERN_C INFINI_RT_EXPORT infiniRtStatus_t infiniRtStreamBeginCapture( + infiniRtStream_t stream, infiniRtStreamCaptureMode_t mode); + +INFINI_RT_EXTERN_C INFINI_RT_EXPORT infiniRtStatus_t +infiniRtStreamEndCapture(infiniRtStream_t stream, infiniRtGraph_t* graph); + +INFINI_RT_EXTERN_C INFINI_RT_EXPORT infiniRtStatus_t +infiniRtGraphDestroy(infiniRtGraph_t graph); + +INFINI_RT_EXTERN_C INFINI_RT_EXPORT infiniRtStatus_t infiniRtGraphInstantiate( + infiniRtGraphExec_t* graph_exec, infiniRtGraph_t graph); + +INFINI_RT_EXTERN_C INFINI_RT_EXPORT infiniRtStatus_t +infiniRtGraphExecDestroy(infiniRtGraphExec_t graph_exec); + +INFINI_RT_EXTERN_C INFINI_RT_EXPORT infiniRtStatus_t +infiniRtGraphLaunch(infiniRtGraphExec_t graph_exec, infiniRtStream_t stream); + +#endif diff --git a/src/c_api.cc b/src/c_api.cc new file mode 100644 index 0000000..178514a --- /dev/null +++ b/src/c_api.cc @@ -0,0 +1,306 @@ +#include + +#include +#include +#include + +#include "runtime.h" + +#if defined(WITH_NVIDIA) +#include "native/cuda/nvidia/runtime_.h" +#endif + +namespace { + +using infini::rt::Device; +using infini::rt::Graph; +using infini::rt::GraphExec; +using infini::rt::Runtime; +using infini::rt::Stream; + +struct CStream { + Stream stream; +}; + +struct CGraph { + Graph graph; +}; + +struct CGraphExec { + GraphExec graph_exec; +}; + +template +infiniRtStatus_t Guard(Func&& func) { + try { + return std::forward(func)(); + } catch (const std::bad_alloc&) { + return INFINI_RT_STATUS_RUNTIME_ERROR; + } catch (...) { + return INFINI_RT_STATUS_RUNTIME_ERROR; + } +} + +template +infiniRtStatus_t CheckBackendCall(Func&& func) { + using ReturnType = decltype(std::forward(func)()); + if constexpr (std::is_void_v) { + std::forward(func)(); + return INFINI_RT_STATUS_SUCCESS; + } else { + return std::forward(func)() == ReturnType{} + ? INFINI_RT_STATUS_SUCCESS + : INFINI_RT_STATUS_RUNTIME_ERROR; + } +} + +Device::Type ToCppDeviceType(infiniRtDeviceType_t type) { + switch (type) { + case INFINI_RT_DEVICE_CPU: + return Device::Type::kCpu; + case INFINI_RT_DEVICE_NVIDIA: + return Device::Type::kNvidia; + case INFINI_RT_DEVICE_CAMBRICON: + return Device::Type::kCambricon; + case INFINI_RT_DEVICE_ASCEND: + return Device::Type::kAscend; + case INFINI_RT_DEVICE_METAX: + return Device::Type::kMetax; + case INFINI_RT_DEVICE_MOORE: + return Device::Type::kMoore; + case INFINI_RT_DEVICE_ILUVATAR: + return Device::Type::kIluvatar; + case INFINI_RT_DEVICE_KUNLUN: + return Device::Type::kKunlun; + case INFINI_RT_DEVICE_HYGON: + return Device::Type::kHygon; + case INFINI_RT_DEVICE_QY: + return Device::Type::kQy; + } + return Device::Type::kCount; +} + +#if defined(WITH_NVIDIA) +auto ToNvidiaCaptureMode(infiniRtStreamCaptureMode_t mode) { + switch (mode) { + case INFINI_RT_STREAM_CAPTURE_MODE_GLOBAL: + return Runtime::StreamCaptureModeGlobal; + case INFINI_RT_STREAM_CAPTURE_MODE_THREAD_LOCAL: + return Runtime::StreamCaptureModeThreadLocal; + case INFINI_RT_STREAM_CAPTURE_MODE_RELAXED: + return Runtime::StreamCaptureModeRelaxed; + } + return Runtime::StreamCaptureModeRelaxed; +} + +auto RawNvidiaStream(Stream stream) { + return static_cast::Stream>( + stream.raw()); +} + +auto RawNvidiaGraph(Graph graph) { + return static_cast::Graph>( + graph.raw()); +} + +auto RawNvidiaGraphExec(GraphExec graph_exec) { + return static_cast::GraphExec>( + graph_exec.raw()); +} +#endif + +CStream* AsStream(infiniRtStream_t stream) { + return static_cast(stream); +} + +CGraph* AsGraph(infiniRtGraph_t graph) { return static_cast(graph); } + +CGraphExec* AsGraphExec(infiniRtGraphExec_t graph_exec) { + return static_cast(graph_exec); +} + +infiniRtStatus_t Unsupported() { return INFINI_RT_STATUS_UNSUPPORTED_DEVICE; } + +} // namespace + +infiniRtStatus_t infiniRtStreamWrap(infiniRtDevice_t device, + void* native_stream, + infiniRtStream_t* stream) { + if (native_stream == nullptr || stream == nullptr) { + return INFINI_RT_STATUS_INVALID_ARGUMENT; + } + return Guard([&] { + const auto device_type = ToCppDeviceType(device.type); + if (device_type == Device::Type::kCount) { + return INFINI_RT_STATUS_UNSUPPORTED_DEVICE; + } + *stream = new CStream{Stream{device_type, native_stream}}; + return INFINI_RT_STATUS_SUCCESS; + }); +} + +infiniRtStatus_t infiniRtStreamDestroy(infiniRtStream_t stream) { + if (stream == nullptr) { + return INFINI_RT_STATUS_INVALID_ARGUMENT; + } + delete AsStream(stream); + return INFINI_RT_STATUS_SUCCESS; +} + +infiniRtStatus_t infiniRtStreamBeginCapture(infiniRtStream_t stream, + infiniRtStreamCaptureMode_t mode) { + if (stream == nullptr) { + return INFINI_RT_STATUS_INVALID_ARGUMENT; + } + return Guard([&] { + auto* wrapped = AsStream(stream); + switch (wrapped->stream.device_type()) { +#if defined(WITH_NVIDIA) + case Device::Type::kNvidia: + return CheckBackendCall([&] { + return Runtime::StreamBeginCapture( + RawNvidiaStream(wrapped->stream), ToNvidiaCaptureMode(mode)); + }); +#endif + default: + return Unsupported(); + } + }); +} + +infiniRtStatus_t infiniRtStreamEndCapture(infiniRtStream_t stream, + infiniRtGraph_t* graph) { + if (stream == nullptr || graph == nullptr) { + return INFINI_RT_STATUS_INVALID_ARGUMENT; + } + return Guard([&] { + auto* wrapped = AsStream(stream); + switch (wrapped->stream.device_type()) { +#if defined(WITH_NVIDIA) + case Device::Type::kNvidia: { + typename Runtime::Graph raw_graph = {}; + const auto status = CheckBackendCall([&] { + return Runtime::StreamEndCapture( + RawNvidiaStream(wrapped->stream), &raw_graph); + }); + if (status != INFINI_RT_STATUS_SUCCESS) { + return status; + } + *graph = new CGraph{ + Graph{Device::Type::kNvidia, static_cast(raw_graph)}}; + return INFINI_RT_STATUS_SUCCESS; + } +#endif + default: + return Unsupported(); + } + }); +} + +infiniRtStatus_t infiniRtGraphDestroy(infiniRtGraph_t graph) { + if (graph == nullptr) { + return INFINI_RT_STATUS_INVALID_ARGUMENT; + } + return Guard([&] { + auto* wrapped = AsGraph(graph); + switch (wrapped->graph.device_type()) { +#if defined(WITH_NVIDIA) + case Device::Type::kNvidia: { + const auto status = CheckBackendCall([&] { + return Runtime::GraphDestroy( + RawNvidiaGraph(wrapped->graph)); + }); + // The C wrapper owns only the wrapper object. The backend destroy call + // above owns the native graph handle. + delete wrapped; + return status; + } +#endif + default: + delete wrapped; + return Unsupported(); + } + }); +} + +infiniRtStatus_t infiniRtGraphInstantiate(infiniRtGraphExec_t* graph_exec, + infiniRtGraph_t graph) { + if (graph_exec == nullptr || graph == nullptr) { + return INFINI_RT_STATUS_INVALID_ARGUMENT; + } + return Guard([&] { + auto* wrapped = AsGraph(graph); + switch (wrapped->graph.device_type()) { +#if defined(WITH_NVIDIA) + case Device::Type::kNvidia: { + typename Runtime::GraphExec raw_exec = {}; + const auto status = CheckBackendCall([&] { + return Runtime::GraphInstantiate( + &raw_exec, RawNvidiaGraph(wrapped->graph)); + }); + if (status != INFINI_RT_STATUS_SUCCESS) { + return status; + } + *graph_exec = new CGraphExec{ + GraphExec{Device::Type::kNvidia, static_cast(raw_exec)}}; + return INFINI_RT_STATUS_SUCCESS; + } +#endif + default: + return Unsupported(); + } + }); +} + +infiniRtStatus_t infiniRtGraphExecDestroy(infiniRtGraphExec_t graph_exec) { + if (graph_exec == nullptr) { + return INFINI_RT_STATUS_INVALID_ARGUMENT; + } + return Guard([&] { + auto* wrapped = AsGraphExec(graph_exec); + switch (wrapped->graph_exec.device_type()) { +#if defined(WITH_NVIDIA) + case Device::Type::kNvidia: { + const auto status = CheckBackendCall([&] { + return Runtime::GraphExecDestroy( + RawNvidiaGraphExec(wrapped->graph_exec)); + }); + // The C wrapper owns only the wrapper object. The backend destroy call + // above owns the native executable graph handle. + delete wrapped; + return status; + } +#endif + default: + delete wrapped; + return Unsupported(); + } + }); +} + +infiniRtStatus_t infiniRtGraphLaunch(infiniRtGraphExec_t graph_exec, + infiniRtStream_t stream) { + if (graph_exec == nullptr || stream == nullptr) { + return INFINI_RT_STATUS_INVALID_ARGUMENT; + } + return Guard([&] { + auto* exec = AsGraphExec(graph_exec); + auto* wrapped_stream = AsStream(stream); + if (exec->graph_exec.device_type() != + wrapped_stream->stream.device_type()) { + return INFINI_RT_STATUS_INVALID_ARGUMENT; + } + switch (exec->graph_exec.device_type()) { +#if defined(WITH_NVIDIA) + case Device::Type::kNvidia: + return CheckBackendCall([&] { + return Runtime::GraphLaunch( + RawNvidiaGraphExec(exec->graph_exec), + RawNvidiaStream(wrapped_stream->stream)); + }); +#endif + default: + return Unsupported(); + } + }); +} diff --git a/tests/test_nvidia_graph_c_api.cc b/tests/test_nvidia_graph_c_api.cc new file mode 100644 index 0000000..1a1b2cd --- /dev/null +++ b/tests/test_nvidia_graph_c_api.cc @@ -0,0 +1,133 @@ +#include +#include + +#include +#include +#include + +#include "test_helper.h" + +namespace { + +bool ExpectCudaSuccess(infini::rt::test::TestContext* context, + cudaError_t status, std::string_view message) { + return context->Expect(status == cudaSuccess, message); +} + +bool ExpectRtSuccess(infini::rt::test::TestContext* context, + infiniRtStatus_t status, std::string_view message) { + return context->Expect(status == INFINI_RT_STATUS_SUCCESS, message); +} + +void FillPattern(std::array* input, std::uint8_t salt) { + for (std::size_t i = 0; i < input->size(); ++i) { + (*input)[i] = static_cast(i * 13 + salt); + } +} + +bool CopyDeviceToHostAndValidate(infini::rt::test::TestContext* context, + void* device_ptr, + const std::array& expected, + std::string_view message) { + std::array output{}; + if (!ExpectCudaSuccess(context, + cudaMemcpy(output.data(), device_ptr, output.size(), + cudaMemcpyDeviceToHost), + "Failed to copy device output to host.")) { + return false; + } + return context->ExpectEqual(output, expected, message); +} + +} // namespace + +int main() { + infini::rt::test::TestContext context; + + cudaStream_t native_stream = nullptr; + void* src = nullptr; + void* dst = nullptr; + infiniRtStream_t stream = nullptr; + infiniRtGraph_t graph = nullptr; + infiniRtGraphExec_t graph_exec = nullptr; + + const auto device = infiniRtDevice_t{INFINI_RT_DEVICE_NVIDIA, 0}; + + ExpectCudaSuccess(&context, cudaSetDevice(device.index), + "Failed to set CUDA device."); + ExpectCudaSuccess( + &context, + cudaStreamCreateWithFlags(&native_stream, cudaStreamNonBlocking), + "Failed to create CUDA stream."); + + std::array capture_input{}; + FillPattern(&capture_input, 7); + + ExpectCudaSuccess(&context, cudaMalloc(&src, capture_input.size()), + "Failed to allocate source buffer."); + ExpectCudaSuccess(&context, cudaMalloc(&dst, capture_input.size()), + "Failed to allocate destination buffer."); + ExpectCudaSuccess(&context, + cudaMemcpy(src, capture_input.data(), capture_input.size(), + cudaMemcpyHostToDevice), + "Failed to initialize source buffer."); + ExpectCudaSuccess(&context, cudaMemset(dst, 0, capture_input.size()), + "Failed to initialize destination buffer."); + + ExpectRtSuccess(&context, infiniRtStreamWrap(device, native_stream, &stream), + "Failed to wrap native CUDA stream."); + ExpectRtSuccess( + &context, + infiniRtStreamBeginCapture(stream, INFINI_RT_STREAM_CAPTURE_MODE_RELAXED), + "Failed to begin graph capture through C API."); + ExpectCudaSuccess(&context, + cudaMemcpyAsync(dst, src, capture_input.size(), + cudaMemcpyDeviceToDevice, native_stream), + "Failed to record device-to-device copy."); + ExpectRtSuccess(&context, infiniRtStreamEndCapture(stream, &graph), + "Failed to end graph capture through C API."); + ExpectRtSuccess(&context, infiniRtGraphInstantiate(&graph_exec, graph), + "Failed to instantiate graph through C API."); + + std::array replay_input{}; + FillPattern(&replay_input, 31); + + ExpectCudaSuccess(&context, + cudaMemcpy(src, replay_input.data(), replay_input.size(), + cudaMemcpyHostToDevice), + "Failed to refresh source buffer."); + ExpectCudaSuccess(&context, cudaMemset(dst, 0, replay_input.size()), + "Failed to clear destination buffer."); + ExpectRtSuccess(&context, infiniRtGraphLaunch(graph_exec, stream), + "Failed to launch graph through C API."); + ExpectCudaSuccess(&context, cudaStreamSynchronize(native_stream), + "Failed to synchronize CUDA stream."); + CopyDeviceToHostAndValidate(&context, dst, replay_input, + "C API graph replay should copy D2D data."); + + if (graph_exec != nullptr) { + ExpectRtSuccess(&context, infiniRtGraphExecDestroy(graph_exec), + "Failed to destroy graph exec through C API."); + } + if (graph != nullptr) { + ExpectRtSuccess(&context, infiniRtGraphDestroy(graph), + "Failed to destroy graph through C API."); + } + if (stream != nullptr) { + ExpectRtSuccess(&context, infiniRtStreamDestroy(stream), + "Failed to destroy wrapped stream through C API."); + } + if (dst != nullptr) { + ExpectCudaSuccess(&context, cudaFree(dst), + "Failed to free destination buffer."); + } + if (src != nullptr) { + ExpectCudaSuccess(&context, cudaFree(src), "Failed to free source buffer."); + } + if (native_stream != nullptr) { + ExpectCudaSuccess(&context, cudaStreamDestroy(native_stream), + "Failed to destroy CUDA stream."); + } + + return context.ExitCode(); +} From 81823a4465081d14d30263b9fffefb97305164af Mon Sep 17 00:00:00 2001 From: Jiacheng Huang <45955067+voltjia@users.noreply.github.com> Date: Fri, 3 Jul 2026 16:01:02 +0800 Subject: [PATCH 3/5] fix: align graph runtime API with runtime namespace * feat!: align runtime API and add runtime dispatch (#11) * Align runtime API with generated wrappers * Add default runtime dispatch specialization * Refactor runtime dispatch namespace * Use Abseil status for runtime device API * Revert "Use Abseil status for runtime device API" This reverts commit a26ddffad65819bc0fd2f7aa38bb8cf8a2c8667d. * Address runtime dispatch review feedback * Keep runtime API list in generator * Add TensorView constructor guard test * Align runtime memcpy kind constants with CUDA API * Use CUDA-style runtime memcpy constants * Use CUDA-style runtime memcpy constants * Move TensorView tests back into core test * Remove standalone TensorView test target * Remove standalone TensorView test file * Use fully qualified runtime API names in README * style: format runtime dispatch test * feat: refactor InfiniCore CPU runtime to InfiniRT (#8) Co-authored-by: Jiacheng Huang * feat: add platform-adaptive runtime tests (#15) * feat: add runtime backend API foundation (#14) --------- Co-authored-by: spike-zhu <74974704+spike-zhu@users.noreply.github.com> --- src/c_api.cc | 95 ++++++++++++++++++++++++++++------------------------ 1 file changed, 51 insertions(+), 44 deletions(-) diff --git a/src/c_api.cc b/src/c_api.cc index 178514a..61b40b3 100644 --- a/src/c_api.cc +++ b/src/c_api.cc @@ -13,32 +13,26 @@ namespace { using infini::rt::Device; -using infini::rt::Graph; -using infini::rt::GraphExec; -using infini::rt::Runtime; -using infini::rt::Stream; +using infini::rt::runtime::Runtime; struct CStream { - Stream stream; + Device::Type device_type; + void* raw; }; struct CGraph { - Graph graph; + Device::Type device_type; + void* raw; }; struct CGraphExec { - GraphExec graph_exec; + Device::Type device_type; + void* raw; }; template infiniRtStatus_t Guard(Func&& func) { - try { - return std::forward(func)(); - } catch (const std::bad_alloc&) { - return INFINI_RT_STATUS_RUNTIME_ERROR; - } catch (...) { - return INFINI_RT_STATUS_RUNTIME_ERROR; - } + return std::forward(func)(); } template @@ -84,28 +78,28 @@ Device::Type ToCppDeviceType(infiniRtDeviceType_t type) { auto ToNvidiaCaptureMode(infiniRtStreamCaptureMode_t mode) { switch (mode) { case INFINI_RT_STREAM_CAPTURE_MODE_GLOBAL: - return Runtime::StreamCaptureModeGlobal; + return Runtime::kStreamCaptureModeGlobal; case INFINI_RT_STREAM_CAPTURE_MODE_THREAD_LOCAL: - return Runtime::StreamCaptureModeThreadLocal; + return Runtime::kStreamCaptureModeThreadLocal; case INFINI_RT_STREAM_CAPTURE_MODE_RELAXED: - return Runtime::StreamCaptureModeRelaxed; + return Runtime::kStreamCaptureModeRelaxed; } - return Runtime::StreamCaptureModeRelaxed; + return Runtime::kStreamCaptureModeRelaxed; } -auto RawNvidiaStream(Stream stream) { +auto RawNvidiaStream(const CStream* stream) { return static_cast::Stream>( - stream.raw()); + stream->raw); } -auto RawNvidiaGraph(Graph graph) { +auto RawNvidiaGraph(const CGraph* graph) { return static_cast::Graph>( - graph.raw()); + graph->raw); } -auto RawNvidiaGraphExec(GraphExec graph_exec) { +auto RawNvidiaGraphExec(const CGraphExec* graph_exec) { return static_cast::GraphExec>( - graph_exec.raw()); + graph_exec->raw); } #endif @@ -134,7 +128,12 @@ infiniRtStatus_t infiniRtStreamWrap(infiniRtDevice_t device, if (device_type == Device::Type::kCount) { return INFINI_RT_STATUS_UNSUPPORTED_DEVICE; } - *stream = new CStream{Stream{device_type, native_stream}}; + auto* wrapped = new (std::nothrow) CStream{device_type, native_stream}; + if (wrapped == nullptr) { + return INFINI_RT_STATUS_RUNTIME_ERROR; + } + *stream = wrapped; + return INFINI_RT_STATUS_SUCCESS; }); } @@ -154,12 +153,12 @@ infiniRtStatus_t infiniRtStreamBeginCapture(infiniRtStream_t stream, } return Guard([&] { auto* wrapped = AsStream(stream); - switch (wrapped->stream.device_type()) { + switch (wrapped->device_type) { #if defined(WITH_NVIDIA) case Device::Type::kNvidia: return CheckBackendCall([&] { return Runtime::StreamBeginCapture( - RawNvidiaStream(wrapped->stream), ToNvidiaCaptureMode(mode)); + RawNvidiaStream(wrapped), ToNvidiaCaptureMode(mode)); }); #endif default: @@ -175,19 +174,24 @@ infiniRtStatus_t infiniRtStreamEndCapture(infiniRtStream_t stream, } return Guard([&] { auto* wrapped = AsStream(stream); - switch (wrapped->stream.device_type()) { + switch (wrapped->device_type) { #if defined(WITH_NVIDIA) case Device::Type::kNvidia: { typename Runtime::Graph raw_graph = {}; const auto status = CheckBackendCall([&] { return Runtime::StreamEndCapture( - RawNvidiaStream(wrapped->stream), &raw_graph); + RawNvidiaStream(wrapped), &raw_graph); }); if (status != INFINI_RT_STATUS_SUCCESS) { return status; } - *graph = new CGraph{ - Graph{Device::Type::kNvidia, static_cast(raw_graph)}}; + auto* wrapped_graph = new (std::nothrow) + CGraph{Device::Type::kNvidia, static_cast(raw_graph)}; + if (wrapped_graph == nullptr) { + return INFINI_RT_STATUS_RUNTIME_ERROR; + } + *graph = wrapped_graph; + return INFINI_RT_STATUS_SUCCESS; } #endif @@ -203,12 +207,12 @@ infiniRtStatus_t infiniRtGraphDestroy(infiniRtGraph_t graph) { } return Guard([&] { auto* wrapped = AsGraph(graph); - switch (wrapped->graph.device_type()) { + switch (wrapped->device_type) { #if defined(WITH_NVIDIA) case Device::Type::kNvidia: { const auto status = CheckBackendCall([&] { return Runtime::GraphDestroy( - RawNvidiaGraph(wrapped->graph)); + RawNvidiaGraph(wrapped)); }); // The C wrapper owns only the wrapper object. The backend destroy call // above owns the native graph handle. @@ -230,19 +234,24 @@ infiniRtStatus_t infiniRtGraphInstantiate(infiniRtGraphExec_t* graph_exec, } return Guard([&] { auto* wrapped = AsGraph(graph); - switch (wrapped->graph.device_type()) { + switch (wrapped->device_type) { #if defined(WITH_NVIDIA) case Device::Type::kNvidia: { typename Runtime::GraphExec raw_exec = {}; const auto status = CheckBackendCall([&] { return Runtime::GraphInstantiate( - &raw_exec, RawNvidiaGraph(wrapped->graph)); + &raw_exec, RawNvidiaGraph(wrapped)); }); if (status != INFINI_RT_STATUS_SUCCESS) { return status; } - *graph_exec = new CGraphExec{ - GraphExec{Device::Type::kNvidia, static_cast(raw_exec)}}; + auto* wrapped_exec = new (std::nothrow) + CGraphExec{Device::Type::kNvidia, static_cast(raw_exec)}; + if (wrapped_exec == nullptr) { + return INFINI_RT_STATUS_RUNTIME_ERROR; + } + *graph_exec = wrapped_exec; + return INFINI_RT_STATUS_SUCCESS; } #endif @@ -258,12 +267,12 @@ infiniRtStatus_t infiniRtGraphExecDestroy(infiniRtGraphExec_t graph_exec) { } return Guard([&] { auto* wrapped = AsGraphExec(graph_exec); - switch (wrapped->graph_exec.device_type()) { + switch (wrapped->device_type) { #if defined(WITH_NVIDIA) case Device::Type::kNvidia: { const auto status = CheckBackendCall([&] { return Runtime::GraphExecDestroy( - RawNvidiaGraphExec(wrapped->graph_exec)); + RawNvidiaGraphExec(wrapped)); }); // The C wrapper owns only the wrapper object. The backend destroy call // above owns the native executable graph handle. @@ -286,17 +295,15 @@ infiniRtStatus_t infiniRtGraphLaunch(infiniRtGraphExec_t graph_exec, return Guard([&] { auto* exec = AsGraphExec(graph_exec); auto* wrapped_stream = AsStream(stream); - if (exec->graph_exec.device_type() != - wrapped_stream->stream.device_type()) { + if (exec->device_type != wrapped_stream->device_type) { return INFINI_RT_STATUS_INVALID_ARGUMENT; } - switch (exec->graph_exec.device_type()) { + switch (exec->device_type) { #if defined(WITH_NVIDIA) case Device::Type::kNvidia: return CheckBackendCall([&] { return Runtime::GraphLaunch( - RawNvidiaGraphExec(exec->graph_exec), - RawNvidiaStream(wrapped_stream->stream)); + RawNvidiaGraphExec(exec), RawNvidiaStream(wrapped_stream)); }); #endif default: From 9f82a06d831c8ffe19d93c96c62dbff641945278 Mon Sep 17 00:00:00 2001 From: Jiacheng Huang Date: Fri, 3 Jul 2026 16:49:12 +0800 Subject: [PATCH 4/5] refactor: use generated C++ graph runtime API --- include/infini/rt.h | 1 - include/infini/rt/c_api.h | 78 -------- src/c_api.cc | 313 ------------------------------- tests/CMakeLists.txt | 24 ++- tests/test_native_graph.cc | 196 +++++++++++++++++++ tests/test_nvidia_graph.cc | 140 -------------- tests/test_nvidia_graph_c_api.cc | 133 ------------- 7 files changed, 215 insertions(+), 670 deletions(-) delete mode 100644 include/infini/rt/c_api.h delete mode 100644 src/c_api.cc create mode 100644 tests/test_native_graph.cc delete mode 100644 tests/test_nvidia_graph.cc delete mode 100644 tests/test_nvidia_graph_c_api.cc diff --git a/include/infini/rt.h b/include/infini/rt.h index e687976..3adb6eb 100644 --- a/include/infini/rt.h +++ b/include/infini/rt.h @@ -1,7 +1,6 @@ #ifndef INFINI_RT_PUBLIC_H_ #define INFINI_RT_PUBLIC_H_ -#include #include #endif diff --git a/include/infini/rt/c_api.h b/include/infini/rt/c_api.h deleted file mode 100644 index c5ad65f..0000000 --- a/include/infini/rt/c_api.h +++ /dev/null @@ -1,78 +0,0 @@ -#ifndef INFINI_RT_C_API_H_ -#define INFINI_RT_C_API_H_ - -#if defined(_WIN32) -#define INFINI_RT_EXPORT __declspec(dllexport) -#elif defined(__GNUC__) && \ - ((__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3)) -#define INFINI_RT_EXPORT __attribute__((visibility("default"))) -#else -#define INFINI_RT_EXPORT -#endif - -#ifdef __cplusplus -#define INFINI_RT_EXTERN_C extern "C" -#else -#define INFINI_RT_EXTERN_C -#endif - -typedef enum { - INFINI_RT_STATUS_SUCCESS = 0, - INFINI_RT_STATUS_INVALID_ARGUMENT = 1, - INFINI_RT_STATUS_UNSUPPORTED_DEVICE = 2, - INFINI_RT_STATUS_RUNTIME_ERROR = 3, -} infiniRtStatus_t; - -typedef enum { - INFINI_RT_DEVICE_CPU = 0, - INFINI_RT_DEVICE_NVIDIA = 1, - INFINI_RT_DEVICE_CAMBRICON = 2, - INFINI_RT_DEVICE_ASCEND = 3, - INFINI_RT_DEVICE_METAX = 4, - INFINI_RT_DEVICE_MOORE = 5, - INFINI_RT_DEVICE_ILUVATAR = 6, - INFINI_RT_DEVICE_KUNLUN = 7, - INFINI_RT_DEVICE_HYGON = 8, - INFINI_RT_DEVICE_QY = 9, -} infiniRtDeviceType_t; - -typedef struct { - infiniRtDeviceType_t type; - int index; -} infiniRtDevice_t; - -typedef enum { - INFINI_RT_STREAM_CAPTURE_MODE_GLOBAL = 0, - INFINI_RT_STREAM_CAPTURE_MODE_THREAD_LOCAL = 1, - INFINI_RT_STREAM_CAPTURE_MODE_RELAXED = 2, -} infiniRtStreamCaptureMode_t; - -typedef void* infiniRtStream_t; -typedef void* infiniRtGraph_t; -typedef void* infiniRtGraphExec_t; - -INFINI_RT_EXTERN_C INFINI_RT_EXPORT infiniRtStatus_t infiniRtStreamWrap( - infiniRtDevice_t device, void* native_stream, infiniRtStream_t* stream); - -INFINI_RT_EXTERN_C INFINI_RT_EXPORT infiniRtStatus_t -infiniRtStreamDestroy(infiniRtStream_t stream); - -INFINI_RT_EXTERN_C INFINI_RT_EXPORT infiniRtStatus_t infiniRtStreamBeginCapture( - infiniRtStream_t stream, infiniRtStreamCaptureMode_t mode); - -INFINI_RT_EXTERN_C INFINI_RT_EXPORT infiniRtStatus_t -infiniRtStreamEndCapture(infiniRtStream_t stream, infiniRtGraph_t* graph); - -INFINI_RT_EXTERN_C INFINI_RT_EXPORT infiniRtStatus_t -infiniRtGraphDestroy(infiniRtGraph_t graph); - -INFINI_RT_EXTERN_C INFINI_RT_EXPORT infiniRtStatus_t infiniRtGraphInstantiate( - infiniRtGraphExec_t* graph_exec, infiniRtGraph_t graph); - -INFINI_RT_EXTERN_C INFINI_RT_EXPORT infiniRtStatus_t -infiniRtGraphExecDestroy(infiniRtGraphExec_t graph_exec); - -INFINI_RT_EXTERN_C INFINI_RT_EXPORT infiniRtStatus_t -infiniRtGraphLaunch(infiniRtGraphExec_t graph_exec, infiniRtStream_t stream); - -#endif diff --git a/src/c_api.cc b/src/c_api.cc deleted file mode 100644 index 61b40b3..0000000 --- a/src/c_api.cc +++ /dev/null @@ -1,313 +0,0 @@ -#include - -#include -#include -#include - -#include "runtime.h" - -#if defined(WITH_NVIDIA) -#include "native/cuda/nvidia/runtime_.h" -#endif - -namespace { - -using infini::rt::Device; -using infini::rt::runtime::Runtime; - -struct CStream { - Device::Type device_type; - void* raw; -}; - -struct CGraph { - Device::Type device_type; - void* raw; -}; - -struct CGraphExec { - Device::Type device_type; - void* raw; -}; - -template -infiniRtStatus_t Guard(Func&& func) { - return std::forward(func)(); -} - -template -infiniRtStatus_t CheckBackendCall(Func&& func) { - using ReturnType = decltype(std::forward(func)()); - if constexpr (std::is_void_v) { - std::forward(func)(); - return INFINI_RT_STATUS_SUCCESS; - } else { - return std::forward(func)() == ReturnType{} - ? INFINI_RT_STATUS_SUCCESS - : INFINI_RT_STATUS_RUNTIME_ERROR; - } -} - -Device::Type ToCppDeviceType(infiniRtDeviceType_t type) { - switch (type) { - case INFINI_RT_DEVICE_CPU: - return Device::Type::kCpu; - case INFINI_RT_DEVICE_NVIDIA: - return Device::Type::kNvidia; - case INFINI_RT_DEVICE_CAMBRICON: - return Device::Type::kCambricon; - case INFINI_RT_DEVICE_ASCEND: - return Device::Type::kAscend; - case INFINI_RT_DEVICE_METAX: - return Device::Type::kMetax; - case INFINI_RT_DEVICE_MOORE: - return Device::Type::kMoore; - case INFINI_RT_DEVICE_ILUVATAR: - return Device::Type::kIluvatar; - case INFINI_RT_DEVICE_KUNLUN: - return Device::Type::kKunlun; - case INFINI_RT_DEVICE_HYGON: - return Device::Type::kHygon; - case INFINI_RT_DEVICE_QY: - return Device::Type::kQy; - } - return Device::Type::kCount; -} - -#if defined(WITH_NVIDIA) -auto ToNvidiaCaptureMode(infiniRtStreamCaptureMode_t mode) { - switch (mode) { - case INFINI_RT_STREAM_CAPTURE_MODE_GLOBAL: - return Runtime::kStreamCaptureModeGlobal; - case INFINI_RT_STREAM_CAPTURE_MODE_THREAD_LOCAL: - return Runtime::kStreamCaptureModeThreadLocal; - case INFINI_RT_STREAM_CAPTURE_MODE_RELAXED: - return Runtime::kStreamCaptureModeRelaxed; - } - return Runtime::kStreamCaptureModeRelaxed; -} - -auto RawNvidiaStream(const CStream* stream) { - return static_cast::Stream>( - stream->raw); -} - -auto RawNvidiaGraph(const CGraph* graph) { - return static_cast::Graph>( - graph->raw); -} - -auto RawNvidiaGraphExec(const CGraphExec* graph_exec) { - return static_cast::GraphExec>( - graph_exec->raw); -} -#endif - -CStream* AsStream(infiniRtStream_t stream) { - return static_cast(stream); -} - -CGraph* AsGraph(infiniRtGraph_t graph) { return static_cast(graph); } - -CGraphExec* AsGraphExec(infiniRtGraphExec_t graph_exec) { - return static_cast(graph_exec); -} - -infiniRtStatus_t Unsupported() { return INFINI_RT_STATUS_UNSUPPORTED_DEVICE; } - -} // namespace - -infiniRtStatus_t infiniRtStreamWrap(infiniRtDevice_t device, - void* native_stream, - infiniRtStream_t* stream) { - if (native_stream == nullptr || stream == nullptr) { - return INFINI_RT_STATUS_INVALID_ARGUMENT; - } - return Guard([&] { - const auto device_type = ToCppDeviceType(device.type); - if (device_type == Device::Type::kCount) { - return INFINI_RT_STATUS_UNSUPPORTED_DEVICE; - } - auto* wrapped = new (std::nothrow) CStream{device_type, native_stream}; - if (wrapped == nullptr) { - return INFINI_RT_STATUS_RUNTIME_ERROR; - } - *stream = wrapped; - - return INFINI_RT_STATUS_SUCCESS; - }); -} - -infiniRtStatus_t infiniRtStreamDestroy(infiniRtStream_t stream) { - if (stream == nullptr) { - return INFINI_RT_STATUS_INVALID_ARGUMENT; - } - delete AsStream(stream); - return INFINI_RT_STATUS_SUCCESS; -} - -infiniRtStatus_t infiniRtStreamBeginCapture(infiniRtStream_t stream, - infiniRtStreamCaptureMode_t mode) { - if (stream == nullptr) { - return INFINI_RT_STATUS_INVALID_ARGUMENT; - } - return Guard([&] { - auto* wrapped = AsStream(stream); - switch (wrapped->device_type) { -#if defined(WITH_NVIDIA) - case Device::Type::kNvidia: - return CheckBackendCall([&] { - return Runtime::StreamBeginCapture( - RawNvidiaStream(wrapped), ToNvidiaCaptureMode(mode)); - }); -#endif - default: - return Unsupported(); - } - }); -} - -infiniRtStatus_t infiniRtStreamEndCapture(infiniRtStream_t stream, - infiniRtGraph_t* graph) { - if (stream == nullptr || graph == nullptr) { - return INFINI_RT_STATUS_INVALID_ARGUMENT; - } - return Guard([&] { - auto* wrapped = AsStream(stream); - switch (wrapped->device_type) { -#if defined(WITH_NVIDIA) - case Device::Type::kNvidia: { - typename Runtime::Graph raw_graph = {}; - const auto status = CheckBackendCall([&] { - return Runtime::StreamEndCapture( - RawNvidiaStream(wrapped), &raw_graph); - }); - if (status != INFINI_RT_STATUS_SUCCESS) { - return status; - } - auto* wrapped_graph = new (std::nothrow) - CGraph{Device::Type::kNvidia, static_cast(raw_graph)}; - if (wrapped_graph == nullptr) { - return INFINI_RT_STATUS_RUNTIME_ERROR; - } - *graph = wrapped_graph; - - return INFINI_RT_STATUS_SUCCESS; - } -#endif - default: - return Unsupported(); - } - }); -} - -infiniRtStatus_t infiniRtGraphDestroy(infiniRtGraph_t graph) { - if (graph == nullptr) { - return INFINI_RT_STATUS_INVALID_ARGUMENT; - } - return Guard([&] { - auto* wrapped = AsGraph(graph); - switch (wrapped->device_type) { -#if defined(WITH_NVIDIA) - case Device::Type::kNvidia: { - const auto status = CheckBackendCall([&] { - return Runtime::GraphDestroy( - RawNvidiaGraph(wrapped)); - }); - // The C wrapper owns only the wrapper object. The backend destroy call - // above owns the native graph handle. - delete wrapped; - return status; - } -#endif - default: - delete wrapped; - return Unsupported(); - } - }); -} - -infiniRtStatus_t infiniRtGraphInstantiate(infiniRtGraphExec_t* graph_exec, - infiniRtGraph_t graph) { - if (graph_exec == nullptr || graph == nullptr) { - return INFINI_RT_STATUS_INVALID_ARGUMENT; - } - return Guard([&] { - auto* wrapped = AsGraph(graph); - switch (wrapped->device_type) { -#if defined(WITH_NVIDIA) - case Device::Type::kNvidia: { - typename Runtime::GraphExec raw_exec = {}; - const auto status = CheckBackendCall([&] { - return Runtime::GraphInstantiate( - &raw_exec, RawNvidiaGraph(wrapped)); - }); - if (status != INFINI_RT_STATUS_SUCCESS) { - return status; - } - auto* wrapped_exec = new (std::nothrow) - CGraphExec{Device::Type::kNvidia, static_cast(raw_exec)}; - if (wrapped_exec == nullptr) { - return INFINI_RT_STATUS_RUNTIME_ERROR; - } - *graph_exec = wrapped_exec; - - return INFINI_RT_STATUS_SUCCESS; - } -#endif - default: - return Unsupported(); - } - }); -} - -infiniRtStatus_t infiniRtGraphExecDestroy(infiniRtGraphExec_t graph_exec) { - if (graph_exec == nullptr) { - return INFINI_RT_STATUS_INVALID_ARGUMENT; - } - return Guard([&] { - auto* wrapped = AsGraphExec(graph_exec); - switch (wrapped->device_type) { -#if defined(WITH_NVIDIA) - case Device::Type::kNvidia: { - const auto status = CheckBackendCall([&] { - return Runtime::GraphExecDestroy( - RawNvidiaGraphExec(wrapped)); - }); - // The C wrapper owns only the wrapper object. The backend destroy call - // above owns the native executable graph handle. - delete wrapped; - return status; - } -#endif - default: - delete wrapped; - return Unsupported(); - } - }); -} - -infiniRtStatus_t infiniRtGraphLaunch(infiniRtGraphExec_t graph_exec, - infiniRtStream_t stream) { - if (graph_exec == nullptr || stream == nullptr) { - return INFINI_RT_STATUS_INVALID_ARGUMENT; - } - return Guard([&] { - auto* exec = AsGraphExec(graph_exec); - auto* wrapped_stream = AsStream(stream); - if (exec->device_type != wrapped_stream->device_type) { - return INFINI_RT_STATUS_INVALID_ARGUMENT; - } - switch (exec->device_type) { -#if defined(WITH_NVIDIA) - case Device::Type::kNvidia: - return CheckBackendCall([&] { - return Runtime::GraphLaunch( - RawNvidiaGraphExec(exec), RawNvidiaStream(wrapped_stream)); - }); -#endif - default: - return Unsupported(); - } - }); -} diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index b801026..4da61b3 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -26,6 +26,17 @@ function(add_infini_rt_backend_runtime_test backend device_type runtime_header "INFINI_RT_TEST_SUPPORTS_EVENT_ELAPSED_TIME=${supports_event_elapsed_time}") endfunction() +function(add_infini_rt_backend_graph_test backend device_type supports_graph_capture) + string(TOLOWER "${backend}" backend_lower) + set(target "test_${backend_lower}_graph") + add_infini_rt_test(${target} test_native_graph.cc) + target_compile_definitions(${target} + PRIVATE + "INFINI_RT_TEST_BACKEND_NAME=\"${backend}\"" + "INFINI_RT_TEST_DEVICE_TYPE=${device_type}" + "INFINI_RT_TEST_SUPPORTS_GRAPH_CAPTURE=${supports_graph_capture}") +endfunction() + add_infini_rt_test(test_smoke test_smoke.cc) add_infini_rt_test(test_core test_core.cc) @@ -44,6 +55,8 @@ if(WITH_NVIDIA) NVIDIA infini::rt::Device::Type::kNvidia infini/rt/nvidia/runtime_.h 1 1 1 1 1 1 1 1) + add_infini_rt_backend_graph_test( + NVIDIA infini::rt::Device::Type::kNvidia 1) endif() if(WITH_ILUVATAR) @@ -52,6 +65,8 @@ if(WITH_ILUVATAR) ILUVATAR infini::rt::Device::Type::kIluvatar infini/rt/iluvatar/runtime_.h 1 1 1 1 1 1 1 1) + add_infini_rt_backend_graph_test( + ILUVATAR infini::rt::Device::Type::kIluvatar 1) endif() if(WITH_HYGON) @@ -68,6 +83,8 @@ if(WITH_METAX) METAX infini::rt::Device::Type::kMetax infini/rt/metax/runtime_.h 1 1 1 1 1 1 1 1) + add_infini_rt_backend_graph_test( + METAX infini::rt::Device::Type::kMetax 0) endif() if(WITH_MOORE) @@ -76,6 +93,8 @@ if(WITH_MOORE) MOORE infini::rt::Device::Type::kMoore infini/rt/moore/runtime_.h 1 1 0 1 1 1 1 1) + add_infini_rt_backend_graph_test( + MOORE infini::rt::Device::Type::kMoore 0) endif() if(WITH_CAMBRICON) @@ -131,11 +150,6 @@ if(INFINI_RT_TEST_HAS_RUNTIME_BACKEND) endif() endif() -if(WITH_NVIDIA) - add_infini_rt_test(test_nvidia_graph test_nvidia_graph.cc) - add_infini_rt_test(test_nvidia_graph_c_api test_nvidia_graph_c_api.cc) -endif() - set(INFINI_RT_TEST_INSTALL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/install_consumer_prefix") set(INFINI_RT_TEST_CONSUMER_BINARY diff --git a/tests/test_native_graph.cc b/tests/test_native_graph.cc new file mode 100644 index 0000000..17a51c8 --- /dev/null +++ b/tests/test_native_graph.cc @@ -0,0 +1,196 @@ +#include + +#include +#include +#include +#include +#include + +#include "test_helper.h" + +namespace { + +namespace runtime = infini::rt::runtime; + +void ExpectSuccess(infini::rt::test::TestContext* context, + runtime::Error status, std::string_view message) { + context->Expect(status == runtime::kSuccess, message); +} + +void FillPattern(std::array* input, std::uint8_t salt) { + for (std::size_t i = 0; i < input->size(); ++i) { + (*input)[i] = static_cast(i * 13 + salt); + } +} + +bool CopyDeviceToHostAndValidate(infini::rt::test::TestContext* context, + void* device_ptr, + const std::array& expected, + std::string_view message) { + std::array output{}; + ExpectSuccess(context, + runtime::Memcpy(output.data(), device_ptr, output.size(), + runtime::kMemcpyDeviceToHost), + "Failed to copy graph output to host."); + return context->ExpectEqual(output, expected, message); +} + +bool SelectTestDevice(infini::rt::test::TestContext* context) { + infini::rt::set_runtime_device_type(INFINI_RT_TEST_DEVICE_TYPE); + + int count = 0; + ExpectSuccess(context, runtime::GetDeviceCount(&count), + "Failed to query device count."); + if (context->ExitCode() != 0) { + return false; + } + + if (count == 0) { + std::cout << "Skipping " << INFINI_RT_TEST_BACKEND_NAME + << " graph test because no device is available." << std::endl; + return false; + } + + ExpectSuccess(context, runtime::SetDevice(0), + "Failed to set graph test device."); + return context->ExitCode() == 0; +} + +void RunUnsupportedGraphSmoke(infini::rt::test::TestContext* context) { + runtime::Stream stream = nullptr; + ExpectSuccess(context, runtime::StreamCreate(&stream), + "Failed to create stream for unsupported graph smoke."); + if (context->ExitCode() != 0) { + return; + } + + const auto status = runtime::StreamBeginCapture( + stream, runtime::StreamCaptureMode::kStreamCaptureModeRelaxed); + context->Expect(status != runtime::kSuccess, + "Unsupported graph capture should return an error."); + + ExpectSuccess(context, runtime::StreamDestroy(stream), + "Failed to destroy stream for unsupported graph smoke."); +} + +void RunGraphReplayTest(infini::rt::test::TestContext* context) { + void* src = nullptr; + void* dst = nullptr; + runtime::Stream stream = nullptr; + runtime::Graph graph = nullptr; + runtime::GraphExec graph_exec = nullptr; + + std::array capture_input{}; + FillPattern(&capture_input, 7); + + ExpectSuccess(context, runtime::Malloc(&src, capture_input.size()), + "Failed to allocate source buffer."); + ExpectSuccess(context, runtime::Malloc(&dst, capture_input.size()), + "Failed to allocate destination buffer."); + ExpectSuccess(context, runtime::StreamCreate(&stream), + "Failed to create stream."); + + if (context->ExitCode() == 0) { + ExpectSuccess( + context, + runtime::Memcpy(src, capture_input.data(), capture_input.size(), + runtime::kMemcpyHostToDevice), + "Failed to initialize source buffer."); + ExpectSuccess(context, runtime::Memset(dst, 0, capture_input.size()), + "Failed to initialize destination buffer."); + + ExpectSuccess( + context, + runtime::StreamBeginCapture( + stream, runtime::StreamCaptureMode::kStreamCaptureModeRelaxed), + "Failed to begin stream capture."); + ExpectSuccess(context, + runtime::MemcpyAsync(dst, src, capture_input.size(), + runtime::kMemcpyDeviceToDevice, stream), + "Failed to record device-to-device copy."); + ExpectSuccess(context, runtime::StreamEndCapture(stream, &graph), + "Failed to end stream capture."); + + ExpectSuccess(context, runtime::GraphInstantiate(&graph_exec, graph), + "Failed to instantiate graph."); + } + + std::array replay_input_1{}; + std::array replay_input_2{}; + FillPattern(&replay_input_1, 31); + FillPattern(&replay_input_2, 53); + + if (context->ExitCode() == 0) { + ExpectSuccess( + context, + runtime::Memcpy(src, replay_input_1.data(), replay_input_1.size(), + runtime::kMemcpyHostToDevice), + "Failed to refresh first source buffer."); + ExpectSuccess(context, runtime::Memset(dst, 0, replay_input_1.size()), + "Failed to clear first destination buffer."); + ExpectSuccess(context, runtime::DeviceSynchronize(), + "Failed to synchronize first replay inputs."); + ExpectSuccess(context, runtime::GraphLaunch(graph_exec, stream), + "Failed to launch first graph replay."); + ExpectSuccess(context, runtime::StreamSynchronize(stream), + "Failed to synchronize first graph replay."); + CopyDeviceToHostAndValidate(context, dst, replay_input_1, + "First graph replay should copy D2D data."); + } + + if (context->ExitCode() == 0) { + ExpectSuccess( + context, + runtime::Memcpy(src, replay_input_2.data(), replay_input_2.size(), + runtime::kMemcpyHostToDevice), + "Failed to refresh second source buffer."); + ExpectSuccess(context, runtime::Memset(dst, 0, replay_input_2.size()), + "Failed to clear second destination buffer."); + ExpectSuccess(context, runtime::DeviceSynchronize(), + "Failed to synchronize second replay inputs."); + ExpectSuccess(context, runtime::GraphLaunch(graph_exec, stream), + "Failed to launch second graph replay."); + ExpectSuccess(context, runtime::StreamSynchronize(stream), + "Failed to synchronize second graph replay."); + CopyDeviceToHostAndValidate(context, dst, replay_input_2, + "Second graph replay should copy D2D data."); + } + + if (graph_exec != nullptr) { + ExpectSuccess(context, runtime::GraphExecDestroy(graph_exec), + "Failed to destroy graph exec."); + } + if (graph != nullptr) { + ExpectSuccess(context, runtime::GraphDestroy(graph), + "Failed to destroy graph."); + } + if (stream != nullptr) { + ExpectSuccess(context, runtime::StreamDestroy(stream), + "Failed to destroy stream."); + } + if (dst != nullptr) { + ExpectSuccess(context, runtime::Free(dst), + "Failed to free destination buffer."); + } + if (src != nullptr) { + ExpectSuccess(context, runtime::Free(src), "Failed to free source buffer."); + } +} + +} // namespace + +int main() { + infini::rt::test::TestContext context; + + if (!SelectTestDevice(&context)) { + return context.ExitCode(); + } + + if constexpr (INFINI_RT_TEST_SUPPORTS_GRAPH_CAPTURE) { + RunGraphReplayTest(&context); + } else { + RunUnsupportedGraphSmoke(&context); + } + + return context.ExitCode(); +} diff --git a/tests/test_nvidia_graph.cc b/tests/test_nvidia_graph.cc deleted file mode 100644 index 291c46b..0000000 --- a/tests/test_nvidia_graph.cc +++ /dev/null @@ -1,140 +0,0 @@ -#include - -#include -#include -#include - -#include "test_helper.h" - -namespace { - -namespace runtime = infini::rt::runtime; - -void ExpectSuccess(infini::rt::test::TestContext* context, - runtime::Error status, const char* message) { - context->Expect(status == runtime::kSuccess, message); -} - -void FillPattern(std::array* input, std::uint8_t salt) { - for (std::size_t i = 0; i < input->size(); ++i) { - (*input)[i] = static_cast(i * 13 + salt); - } -} - -bool CopyDeviceToHostAndValidate(infini::rt::test::TestContext* context, - void* device_ptr, - const std::array& expected, - std::string_view message) { - std::array output{}; - runtime::Memcpy(output.data(), device_ptr, output.size(), - runtime::kMemcpyDeviceToHost); - return context->ExpectEqual(output, expected, message); -} - -} // namespace - -int main() { - infini::rt::test::TestContext context; - - infini::rt::set_runtime_device_type(infini::rt::Device::Type::kNvidia); - ExpectSuccess(&context, runtime::SetDevice(0), - "Failed to set NVIDIA runtime device."); - - void* src = nullptr; - void* dst = nullptr; - runtime::Stream stream = nullptr; - runtime::Graph graph = nullptr; - runtime::GraphExec graph_exec = nullptr; - - std::array capture_input{}; - FillPattern(&capture_input, 7); - - ExpectSuccess(&context, runtime::Malloc(&src, capture_input.size()), - "Failed to allocate source buffer."); - ExpectSuccess(&context, runtime::Malloc(&dst, capture_input.size()), - "Failed to allocate destination buffer."); - ExpectSuccess(&context, runtime::StreamCreate(&stream), - "Failed to create stream."); - - ExpectSuccess(&context, - runtime::Memcpy(src, capture_input.data(), capture_input.size(), - runtime::kMemcpyHostToDevice), - "Failed to initialize source buffer."); - ExpectSuccess(&context, runtime::Memset(dst, 0, capture_input.size()), - "Failed to initialize destination buffer."); - - ExpectSuccess( - &context, - runtime::StreamBeginCapture( - stream, runtime::StreamCaptureMode::kStreamCaptureModeRelaxed), - "Failed to begin stream capture."); - ExpectSuccess(&context, - runtime::MemcpyAsync(dst, src, capture_input.size(), - runtime::kMemcpyDeviceToDevice, stream), - "Failed to record device-to-device copy."); - ExpectSuccess(&context, runtime::StreamEndCapture(stream, &graph), - "Failed to end stream capture."); - - ExpectSuccess(&context, runtime::GraphInstantiate(&graph_exec, graph), - "Failed to instantiate graph."); - - std::array replay_input_1{}; - std::array replay_input_2{}; - FillPattern(&replay_input_1, 31); - FillPattern(&replay_input_2, 53); - - ExpectSuccess( - &context, - runtime::Memcpy(src, replay_input_1.data(), replay_input_1.size(), - runtime::kMemcpyHostToDevice), - "Failed to refresh first source buffer."); - ExpectSuccess(&context, runtime::Memset(dst, 0, replay_input_1.size()), - "Failed to clear first destination buffer."); - ExpectSuccess(&context, runtime::DeviceSynchronize(), - "Failed to synchronize first replay inputs."); - ExpectSuccess(&context, runtime::GraphLaunch(graph_exec, stream), - "Failed to launch first graph replay."); - ExpectSuccess(&context, runtime::StreamSynchronize(stream), - "Failed to synchronize first graph replay."); - CopyDeviceToHostAndValidate(&context, dst, replay_input_1, - "First graph replay should copy D2D data."); - - ExpectSuccess( - &context, - runtime::Memcpy(src, replay_input_2.data(), replay_input_2.size(), - runtime::kMemcpyHostToDevice), - "Failed to refresh second source buffer."); - ExpectSuccess(&context, runtime::Memset(dst, 0, replay_input_2.size()), - "Failed to clear second destination buffer."); - ExpectSuccess(&context, runtime::DeviceSynchronize(), - "Failed to synchronize second replay inputs."); - ExpectSuccess(&context, runtime::GraphLaunch(graph_exec, stream), - "Failed to launch second graph replay."); - ExpectSuccess(&context, runtime::StreamSynchronize(stream), - "Failed to synchronize second graph replay."); - CopyDeviceToHostAndValidate(&context, dst, replay_input_2, - "Second graph replay should copy D2D data."); - - if (graph_exec != nullptr) { - ExpectSuccess(&context, runtime::GraphExecDestroy(graph_exec), - "Failed to destroy graph exec."); - } - if (graph != nullptr) { - ExpectSuccess(&context, runtime::GraphDestroy(graph), - "Failed to destroy graph."); - } - if (stream != nullptr) { - ExpectSuccess(&context, runtime::StreamDestroy(stream), - "Failed to destroy stream."); - } - if (dst != nullptr) { - ExpectSuccess(&context, runtime::Free(dst), - "Failed to free destination buffer."); - } - if (src != nullptr) { - ExpectSuccess(&context, runtime::Free(src), - "Failed to free source buffer."); - } - - return context.ExitCode(); -} diff --git a/tests/test_nvidia_graph_c_api.cc b/tests/test_nvidia_graph_c_api.cc deleted file mode 100644 index 1a1b2cd..0000000 --- a/tests/test_nvidia_graph_c_api.cc +++ /dev/null @@ -1,133 +0,0 @@ -#include -#include - -#include -#include -#include - -#include "test_helper.h" - -namespace { - -bool ExpectCudaSuccess(infini::rt::test::TestContext* context, - cudaError_t status, std::string_view message) { - return context->Expect(status == cudaSuccess, message); -} - -bool ExpectRtSuccess(infini::rt::test::TestContext* context, - infiniRtStatus_t status, std::string_view message) { - return context->Expect(status == INFINI_RT_STATUS_SUCCESS, message); -} - -void FillPattern(std::array* input, std::uint8_t salt) { - for (std::size_t i = 0; i < input->size(); ++i) { - (*input)[i] = static_cast(i * 13 + salt); - } -} - -bool CopyDeviceToHostAndValidate(infini::rt::test::TestContext* context, - void* device_ptr, - const std::array& expected, - std::string_view message) { - std::array output{}; - if (!ExpectCudaSuccess(context, - cudaMemcpy(output.data(), device_ptr, output.size(), - cudaMemcpyDeviceToHost), - "Failed to copy device output to host.")) { - return false; - } - return context->ExpectEqual(output, expected, message); -} - -} // namespace - -int main() { - infini::rt::test::TestContext context; - - cudaStream_t native_stream = nullptr; - void* src = nullptr; - void* dst = nullptr; - infiniRtStream_t stream = nullptr; - infiniRtGraph_t graph = nullptr; - infiniRtGraphExec_t graph_exec = nullptr; - - const auto device = infiniRtDevice_t{INFINI_RT_DEVICE_NVIDIA, 0}; - - ExpectCudaSuccess(&context, cudaSetDevice(device.index), - "Failed to set CUDA device."); - ExpectCudaSuccess( - &context, - cudaStreamCreateWithFlags(&native_stream, cudaStreamNonBlocking), - "Failed to create CUDA stream."); - - std::array capture_input{}; - FillPattern(&capture_input, 7); - - ExpectCudaSuccess(&context, cudaMalloc(&src, capture_input.size()), - "Failed to allocate source buffer."); - ExpectCudaSuccess(&context, cudaMalloc(&dst, capture_input.size()), - "Failed to allocate destination buffer."); - ExpectCudaSuccess(&context, - cudaMemcpy(src, capture_input.data(), capture_input.size(), - cudaMemcpyHostToDevice), - "Failed to initialize source buffer."); - ExpectCudaSuccess(&context, cudaMemset(dst, 0, capture_input.size()), - "Failed to initialize destination buffer."); - - ExpectRtSuccess(&context, infiniRtStreamWrap(device, native_stream, &stream), - "Failed to wrap native CUDA stream."); - ExpectRtSuccess( - &context, - infiniRtStreamBeginCapture(stream, INFINI_RT_STREAM_CAPTURE_MODE_RELAXED), - "Failed to begin graph capture through C API."); - ExpectCudaSuccess(&context, - cudaMemcpyAsync(dst, src, capture_input.size(), - cudaMemcpyDeviceToDevice, native_stream), - "Failed to record device-to-device copy."); - ExpectRtSuccess(&context, infiniRtStreamEndCapture(stream, &graph), - "Failed to end graph capture through C API."); - ExpectRtSuccess(&context, infiniRtGraphInstantiate(&graph_exec, graph), - "Failed to instantiate graph through C API."); - - std::array replay_input{}; - FillPattern(&replay_input, 31); - - ExpectCudaSuccess(&context, - cudaMemcpy(src, replay_input.data(), replay_input.size(), - cudaMemcpyHostToDevice), - "Failed to refresh source buffer."); - ExpectCudaSuccess(&context, cudaMemset(dst, 0, replay_input.size()), - "Failed to clear destination buffer."); - ExpectRtSuccess(&context, infiniRtGraphLaunch(graph_exec, stream), - "Failed to launch graph through C API."); - ExpectCudaSuccess(&context, cudaStreamSynchronize(native_stream), - "Failed to synchronize CUDA stream."); - CopyDeviceToHostAndValidate(&context, dst, replay_input, - "C API graph replay should copy D2D data."); - - if (graph_exec != nullptr) { - ExpectRtSuccess(&context, infiniRtGraphExecDestroy(graph_exec), - "Failed to destroy graph exec through C API."); - } - if (graph != nullptr) { - ExpectRtSuccess(&context, infiniRtGraphDestroy(graph), - "Failed to destroy graph through C API."); - } - if (stream != nullptr) { - ExpectRtSuccess(&context, infiniRtStreamDestroy(stream), - "Failed to destroy wrapped stream through C API."); - } - if (dst != nullptr) { - ExpectCudaSuccess(&context, cudaFree(dst), - "Failed to free destination buffer."); - } - if (src != nullptr) { - ExpectCudaSuccess(&context, cudaFree(src), "Failed to free source buffer."); - } - if (native_stream != nullptr) { - ExpectCudaSuccess(&context, cudaStreamDestroy(native_stream), - "Failed to destroy CUDA stream."); - } - - return context.ExitCode(); -} From 2fc8a16a29d4ee9c4feff654ac1691fd5e510904 Mon Sep 17 00:00:00 2001 From: Jiacheng Huang Date: Fri, 3 Jul 2026 17:59:09 +0800 Subject: [PATCH 5/5] chore: drop unrelated gitignore change --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index 48539b4..99bca7d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,5 @@ # Generated files build/ -build-*/ generated/ # Prerequisites