diff --git a/README.md b/README.md new file mode 100644 index 0000000..5d41768 --- /dev/null +++ b/README.md @@ -0,0 +1,158 @@ +# InfiniRT + +InfiniRT is the runtime library of the InfiniCore project. It provides a stable runtime API for device management and memory operations across supported backends such as CPU and NVIDIA. + +The recommended public entry is: + +```cpp +#include +``` + +## Build and Install + +Configure, build, and install InfiniRT with CMake: + +```bash +cmake -S . -B build -DCMAKE_INSTALL_PREFIX=/path/to/infini-rt-prefix +cmake --build build -j +cmake --install build +``` + +`infini-rt-prefix` means the installation prefix used by CMake. After installation, headers and libraries are placed under this prefix, for example: + +```text +/path/to/infini-rt-prefix/include +/path/to/infini-rt-prefix/lib +``` + +## Build Options + +Common options include: + +```bash +-DWITH_CPU=ON +-DWITH_NVIDIA=ON +-DINFINI_RT_BUILD_TESTING=ON +``` + +Example CPU build: + +```bash +cmake -S . -B build \ + -DCMAKE_INSTALL_PREFIX=/path/to/infini-rt-prefix \ + -DWITH_CPU=ON \ + -DINFINI_RT_BUILD_TESTING=ON +cmake --build build -j +ctest --test-dir build --output-on-failure +cmake --install build +``` + +Example NVIDIA build: + +```bash +cmake -S . -B build \ + -DCMAKE_INSTALL_PREFIX=/path/to/infini-rt-prefix \ + -DWITH_NVIDIA=ON \ + -DINFINI_RT_BUILD_TESTING=ON +cmake --build build -j +ctest --test-dir build --output-on-failure +cmake --install build +``` + +## Basic Usage + +```cpp +#include +#include + +int main() { + infini::rt::runtime::SetDevice(0); + + constexpr std::size_t size = 1024; + void* ptr = nullptr; + + infini::rt::runtime::Malloc(&ptr, size); + infini::rt::runtime::Memset(ptr, 0, size); + infini::rt::runtime::Free(ptr); + + return 0; +} +``` + +The CUDA Runtime API-aligned layer lives under `infini::rt::runtime`. The +top-level `infini::rt::set_runtime_device_type` and +`infini::rt::runtime_device_type` APIs select which enabled backend receives +those runtime calls. A GPU backend is selected initially when one is enabled; +otherwise CPU is selected. + +```cpp +constexpr std::size_t size = 1024; +void* ptr = nullptr; + +infini::rt::set_runtime_device_type(infini::rt::Device::Type::kCpu); +infini::rt::runtime::Malloc(&ptr, size); +infini::rt::runtime::Free(ptr); + +infini::rt::set_runtime_device_type(infini::rt::Device::Type::kNvidia); +infini::rt::runtime::Malloc(&ptr, size); +infini::rt::runtime::Free(ptr); +``` + +Use `infini::rt::runtime::Runtime` when CPU +runtime calls are needed explicitly in a build that also enables an accelerator +backend. + +## Using Installed InfiniRT From Another Project + +Downstream projects should consume the installed headers and libraries instead of depending on the InfiniRT source tree. + +Typical include and library paths are: + +```text +/path/to/infini-rt-prefix/include +/path/to/infini-rt-prefix/lib +``` + +A simple compile command may look like: + +```bash +c++ main.cc \ + -I/path/to/infini-rt-prefix/include \ + -L/path/to/infini-rt-prefix/lib \ + -linfinirt +``` + +At runtime, make sure the dynamic linker can find `libinfinirt.so`, for example: + +```bash +export LD_LIBRARY_PATH=/path/to/infini-rt-prefix/lib:$LD_LIBRARY_PATH +``` + +## Tests + +Enable C++ tests with: + +```bash +-DINFINI_RT_BUILD_TESTING=ON +``` + +Then run: + +```bash +ctest --test-dir build --output-on-failure +``` + +## Formatting + +C++ code should be formatted with `clang-format`. + +Python scripts should be checked and formatted with Ruff: + +```bash +ruff format --check . +ruff check . +``` + +## Contributing + +Please follow the repository's `CONTRIBUTING.md`. diff --git a/scripts/generate_public_headers.py b/scripts/generate_public_headers.py index 71b2c4f..6620b42 100644 --- a/scripts/generate_public_headers.py +++ b/scripts/generate_public_headers.py @@ -53,21 +53,23 @@ "cpu": "Device::Type::kCpu", "nvidia": "Device::Type::kNvidia", "iluvatar": "Device::Type::kIluvatar", + "hygon": "Device::Type::kHygon", "metax": "Device::Type::kMetax", "moore": "Device::Type::kMoore", "cambricon": "Device::Type::kCambricon", "ascend": "Device::Type::kAscend", } -_RUNTIME_HEADERS = { - "cpu": "native/cpu/runtime_.h", - "nvidia": "native/cuda/nvidia/runtime_.h", - "iluvatar": "native/cuda/iluvatar/runtime_.h", - "metax": "native/cuda/metax/runtime_.h", - "moore": "native/cuda/moore/runtime_.h", - "cambricon": "native/cambricon/runtime_.h", - "ascend": "native/ascend/runtime_.h", -} +_DEFAULT_DEVICE_PRIORITY = ( + "nvidia", + "iluvatar", + "hygon", + "metax", + "moore", + "cambricon", + "ascend", + "cpu", +) def _guard(path): @@ -155,7 +157,12 @@ def _write_detail_headers(include_root, source_root, devices): def _write_generated_header(include_root, devices): + default_device = _default_device(devices) + default_device_type = _DEVICE_TYPES[default_device] includes = [ + "#include ", + "#include ", + "#include ", f"#include {_detail_include('data_type.h')}", f"#include {_detail_include('device.h')}", f"#include {_detail_include('hash.h')}", @@ -166,6 +173,13 @@ def _write_generated_header(include_root, devices): for device in devices: includes.append(f"#include ") + for device in devices: + includes.append(f"#include ") + + runtime_declarations = "\n\n".join( + f"{function.signature()};" for function in _PUBLIC_RUNTIME_FUNCTIONS + ) + path = include_root / "infini" / "rt" / "generated.h" path.parent.mkdir(parents=True, exist_ok=True) path.write_text( @@ -174,6 +188,49 @@ def _write_generated_header(include_root, devices): {chr(10).join(includes)} +namespace infini::rt {{ + +void set_runtime_device_type(Device::Type device_type); + +Device::Type runtime_device_type(); + +namespace runtime {{ +namespace generated_detail {{ + +using DefaultErrorRuntime = Runtime<{default_device_type}>; + +inline constexpr Device::Type kDefaultDeviceType = {default_device_type}; + +}} // namespace generated_detail + +using Error = typename generated_detail::DefaultErrorRuntime::Error; + +using Stream = typename generated_detail::DefaultErrorRuntime::Stream; + +using Event = void*; + +using MemcpyKind = std::remove_cv_t< + decltype(generated_detail::DefaultErrorRuntime::kMemcpyHostToHost)>; + +inline constexpr Error kSuccess = generated_detail::DefaultErrorRuntime::kSuccess; + +inline constexpr MemcpyKind kMemcpyHostToHost = + generated_detail::DefaultErrorRuntime::kMemcpyHostToHost; + +inline constexpr MemcpyKind kMemcpyHostToDevice = + generated_detail::DefaultErrorRuntime::kMemcpyHostToDevice; + +inline constexpr MemcpyKind kMemcpyDeviceToHost = + generated_detail::DefaultErrorRuntime::kMemcpyDeviceToHost; + +inline constexpr MemcpyKind kMemcpyDeviceToDevice = + generated_detail::DefaultErrorRuntime::kMemcpyDeviceToDevice; + +{runtime_declarations} + +}} // namespace runtime +}} // namespace infini::rt + #endif """ ) @@ -198,237 +255,290 @@ def params_decl(self): return ", ".join(f"{param.type} {param.name}" for param in self.params) -def _parse_param(param): - param_type, param_name = param.strip().rsplit(" ", 1) - - return _Param(param_type, param_name) - - -def _parse_runtime_functions(runtime_header): - text = pathlib.Path(runtime_header).read_text() - return tuple( - _Function( - return_type, - name, - tuple( - _parse_param(param) - for param in re.split(r",\s*", params.strip()) - if param - ), - ) - for return_type, name, params in re.findall( - r"^(infini::rt::Error|Error|void) ([A-Z]\w*)\(([^()]*)\);$", - text, - re.MULTILINE, - ) - ) - - -def _unsupported_statement(): - return " return infini::rt::kUnSuccess;" - - -def _dispatch_cases(devices, statements): - return "\n".join( - f""" case {_DEVICE_TYPES[device]}: {{ -{statements.replace("__DEVICE_TYPE__", _DEVICE_TYPES[device])} - }}""" - for device in devices - ) +_PUBLIC_RUNTIME_FUNCTIONS = ( + _Function("Error", "SetDevice", (_Param("int", "device"),)), + _Function("Error", "GetDevice", (_Param("int*", "device"),)), + _Function("Error", "GetDeviceCount", (_Param("int*", "count"),)), + _Function("Error", "DeviceSynchronize", ()), + _Function( + "Error", + "Malloc", + (_Param("void**", "ptr"), _Param("std::size_t", "size")), + ), + _Function( + "Error", + "MallocHost", + (_Param("void**", "ptr"), _Param("std::size_t", "size")), + ), + _Function( + "Error", + "MallocAsync", + ( + _Param("void**", "ptr"), + _Param("std::size_t", "size"), + _Param("Stream", "stream"), + ), + ), + _Function("Error", "Free", (_Param("void*", "ptr"),)), + _Function("Error", "FreeHost", (_Param("void*", "ptr"),)), + _Function( + "Error", + "FreeAsync", + (_Param("void*", "ptr"), _Param("Stream", "stream")), + ), + _Function( + "Error", + "MemGetInfo", + (_Param("std::size_t*", "free"), _Param("std::size_t*", "total")), + ), + _Function( + "Error", + "Memcpy", + ( + _Param("void*", "dst"), + _Param("const void*", "src"), + _Param("std::size_t", "count"), + _Param("MemcpyKind", "kind"), + ), + ), + _Function( + "Error", + "MemcpyAsync", + ( + _Param("void*", "dst"), + _Param("const void*", "src"), + _Param("std::size_t", "count"), + _Param("MemcpyKind", "kind"), + _Param("Stream", "stream"), + ), + ), + _Function( + "Error", + "Memset", + ( + _Param("void*", "ptr"), + _Param("int", "value"), + _Param("std::size_t", "count"), + ), + ), + _Function( + "Error", + "MemsetAsync", + ( + _Param("void*", "ptr"), + _Param("int", "value"), + _Param("std::size_t", "count"), + _Param("Stream", "stream"), + ), + ), + _Function("Error", "StreamCreate", (_Param("Stream*", "stream"),)), + _Function("Error", "StreamDestroy", (_Param("Stream", "stream"),)), + _Function("Error", "StreamSynchronize", (_Param("Stream", "stream"),)), + _Function( + "Error", + "StreamWaitEvent", + ( + _Param("Stream", "stream"), + _Param("Event", "event"), + _Param("unsigned int", "flags"), + ), + ), + _Function("Error", "EventCreate", (_Param("Event*", "event"),)), + _Function( + "Error", + "EventCreateWithFlags", + (_Param("Event*", "event"), _Param("unsigned int", "flags")), + ), + _Function( + "Error", + "EventRecord", + (_Param("Event", "event"), _Param("Stream", "stream")), + ), + _Function("Error", "EventQuery", (_Param("Event", "event"),)), + _Function("Error", "EventSynchronize", (_Param("Event", "event"),)), + _Function("Error", "EventDestroy", (_Param("Event", "event"),)), + _Function( + "Error", + "EventElapsedTime", + ( + _Param("float*", "ms"), + _Param("Event", "start"), + _Param("Event", "end"), + ), + ), +) -def _selector(function): - for param in function.params: - if param.type == "Device": - return f"{param.name}.type()" - if param.type == "Device::Type": - return param.name +def _default_device(devices): + for device in _DEFAULT_DEVICE_PRIORITY: + if device in devices: + return device - return "current_device.type()" + raise ValueError("at least one device is required") -def _runtime_arg(function, param): - if param.type == "Device": - if function.name in {"SetDevice", "GetDeviceResourceSnapshot"}: - return f"{param.name}.index()" - return None - if param.type == "Device::Type": - return None - if param.type in {"MemcpyKind", "infini::rt::MemcpyKind"}: - return f"RuntimeMemcpyKind<__DEVICE_TYPE__>({param.name})" +def _runtime_arg(param, device): + device_type = _DEVICE_TYPES[device] + if param.type == "MemcpyKind": + return f"RuntimeMemcpyKind<{device_type}>({param.name})" + if param.type == "Stream": + return ( + f"reinterpret_cast::Stream>({param.name})" + ) + if param.type == "Stream*": + return ( + f"reinterpret_cast::Stream*>({param.name})" + ) + if param.type == "Event": + return f"reinterpret_cast::Event>({param.name})" + if param.type == "Event*": + return ( + f"reinterpret_cast::Event*>({param.name})" + ) return param.name -def _runtime_args(function): - args = (_runtime_arg(function, param) for param in function.params) +def _runtime_args(function, device): + args = (_runtime_arg(param, device) for param in function.params) return ", ".join(arg for arg in args if arg is not None) -def _preconditions(function): - required_pointer_names = { - "GetDevice": {"device"}, - "GetDeviceCount": {"count"}, - } - checks = [] - for param in function.params: - if ( - param.type.endswith("**") - or param.type.endswith("*") - or param.name in required_pointer_names.get(function.name, set()) - ): - checks.append(f" if ({param.name} == nullptr) {{") - checks.append(" return infini::rt::kUnSuccess;") - checks.append(" }") - - return "\n".join(checks) - - -def _post_dispatch(function): - if function.name == "SetDevice": - return "\n if (rt_status == infini::rt::kSuccess) {\n current_device = Device{current_device.type(), device};\n }" - - return "" - - -def _runtime_call(function): - args = _runtime_args(function) +def _runtime_call(function, device): + device_type = _DEVICE_TYPES[device] + args = _runtime_args(function, device) if args: - return f"Runtime<__DEVICE_TYPE__>::{function.name}({args})" + return f"Runtime<{device_type}>::{function.name}({args})" - return f"Runtime<__DEVICE_TYPE__>::{function.name}()" + return f"Runtime<{device_type}>::{function.name}()" -def _write_get_device(function, devices): - device_param = function.params[0].name - cases = _dispatch_cases( - devices, - f""" infini::rt::Error status = CheckCall([&] {{ return Runtime<__DEVICE_TYPE__>::GetDevice({device_param}); }}); - if (status != infini::rt::kSuccess) {{ - return status; - }} - current_device = Device{{current_device.type(), *{device_param}}}; - return infini::rt::kSuccess;""", +def _dispatch_cases(devices, function): + return "\n".join( + f""" case {_DEVICE_TYPES[device]}: + return CheckCall([&] {{ return {_runtime_call(function, device)}; }});""" + for device in devices ) - return f"""{function.return_type} GetDevice(int* {device_param}) {{ - if ({device_param} == nullptr) {{ - return infini::rt::kUnSuccess; - }} - - switch (current_device.type()) {{ -{cases} - default: -{_unsupported_statement()} - }} -}} -""" - - -def _write_dispatch_function(function, devices): - if function.name == "GetDevice": - return _write_get_device(function, devices) - - cases = _dispatch_cases( - devices, - f""" infini::rt::Error rt_status = CheckCall([&] {{ return {_runtime_call(function)}; }});{_post_dispatch(function)} - return rt_status;""", - ) - preconditions = _preconditions(function) - if preconditions: - preconditions = f"{preconditions}\n\n" +def _write_runtime_dispatch_function(function, devices): return f"""{function.signature()} {{ -{preconditions} switch ({_selector(function)}) {{ -{cases} - default: -{_unsupported_statement()} + switch (infini::rt::runtime_device_type()) {{ +{_dispatch_cases(devices, function)} }} + + assert(false && "unsupported runtime device type"); + return InvalidValueError(); }} """ def _runtime_header_for_device(source_root, device): - return source_root / _RUNTIME_HEADERS[device] + for _, header_name, target in _DEVICE_HEADERS[device]: + if header_name == "runtime_.h": + return source_root / target + + raise ValueError(f"device {device!r} does not have a runtime header") def _devices_for_function(function, devices, source_root): - enabled = [] pattern = re.compile(r"\b" + re.escape(function.name) + r"\b") - for device in devices: - runtime_header = _runtime_header_for_device(source_root, device) - if runtime_header.exists() and pattern.search(runtime_header.read_text()): - enabled.append(device) - return enabled - - -def _write_runtime_dispatch(source_path, runtime_header, devices): - first_device_type = _DEVICE_TYPES[devices[0]] - includes = ['#include "runtime.h"'] - includes.extend(f'#include "{_RUNTIME_HEADERS[device]}"' for device in devices) - functions = _parse_runtime_functions(runtime_header) - source_root = pathlib.Path(runtime_header).parent + + return tuple( + device + for device in devices + if pattern.search(_runtime_header_for_device(source_root, device).read_text()) + ) + + +def _write_runtime_dispatch(source_path, source_root, devices): + functions = _PUBLIC_RUNTIME_FUNCTIONS dispatch_functions = "\n".join( - _write_dispatch_function( - function, _devices_for_function(function, devices, source_root) + _write_runtime_dispatch_function( + function, + devices=_devices_for_function(function, devices, source_root), ) for function in functions ) + set_device_type_cases = "\n".join( + f""" case {_DEVICE_TYPES[device]}: + runtime_device_type_ = device_type; + return;""" + for device in devices + ) source_path.parent.mkdir(parents=True, exist_ok=True) source_path.write_text( f"""#include +#include #include #include -{chr(10).join(includes)} +#include namespace infini::rt {{ namespace {{ -thread_local Device current_device{{{first_device_type}, 0}}; +thread_local Device::Type runtime_device_type_ = + runtime::generated_detail::kDefaultDeviceType; + +}} // namespace + +void set_runtime_device_type(Device::Type device_type) {{ + switch (device_type) {{ +{set_device_type_cases} + }} + + assert(false && "unsupported runtime device type"); +}} + +Device::Type runtime_device_type() {{ + return runtime_device_type_; +}} + +}} // namespace infini::rt + +namespace infini::rt::runtime {{ +namespace {{ template -infini::rt::Error CheckCall(Func&& func) {{ +Error CheckCall(Func&& func) {{ using ReturnType = decltype(std::forward(func)()); if constexpr (std::is_void_v) {{ std::forward(func)(); - return infini::rt::kSuccess; + return kSuccess; }} else {{ - ReturnType status = std::forward(func)(); - if constexpr (std::is_same_v) {{ - return status == infini::rt::kSuccess ? infini::rt::kSuccess - : infini::rt::kUnSuccess; - }} else {{ - return status == ReturnType{{}} ? infini::rt::kSuccess - : infini::rt::kUnSuccess; - }} + return static_cast(std::forward(func)()); }} }} -template -auto RuntimeMemcpyKind(infini::rt::MemcpyKind kind) {{ +Error InvalidValueError() {{ return static_cast(1); }} + +template +auto RuntimeMemcpyKind(MemcpyKind kind) {{ + using DeviceRuntime = Runtime; + switch (kind) {{ - case infini::rt::MemcpyKind::kMemcpyHostToHost: - return Runtime::kMemcpyHostToHost; - case infini::rt::MemcpyKind::kMemcpyHostToDevice: - return Runtime::kMemcpyHostToDevice; - case infini::rt::MemcpyKind::kMemcpyDeviceToHost: - return Runtime::kMemcpyDeviceToHost; - case infini::rt::MemcpyKind::kMemcpyDeviceToDevice: - return Runtime::kMemcpyDeviceToDevice; + case kMemcpyHostToHost: + return DeviceRuntime::kMemcpyHostToHost; + case kMemcpyHostToDevice: + return DeviceRuntime::kMemcpyHostToDevice; + case kMemcpyDeviceToHost: + return DeviceRuntime::kMemcpyDeviceToHost; + case kMemcpyDeviceToDevice: + return DeviceRuntime::kMemcpyDeviceToDevice; }} assert(false && "unsupported memcpy kind"); - return Runtime::kMemcpyHostToHost; + return DeviceRuntime::kMemcpyHostToHost; }} }} // namespace {dispatch_functions} -}} // namespace infini::rt +}} // namespace infini::rt::runtime """ ) @@ -457,9 +567,7 @@ def main(): _write_wrapper(include_root, wrapper_device, header_name, target) _write_generated_header(include_root, devices) - _write_runtime_dispatch( - pathlib.Path(args.source_output), args.runtime_header, devices - ) + _write_runtime_dispatch(pathlib.Path(args.source_output), source_root, devices) if __name__ == "__main__": diff --git a/src/native/ascend/runtime_.h b/src/native/ascend/runtime_.h index 8b33e54..065a9d3 100644 --- a/src/native/ascend/runtime_.h +++ b/src/native/ascend/runtime_.h @@ -11,15 +11,19 @@ #include "native/ascend/device_.h" #include "runtime.h" -namespace infini::rt { +namespace infini::rt::runtime { template <> struct Runtime : DeviceRuntime> { + using Error = aclError; + using Stream = aclrtStream; static constexpr Device::Type kDeviceType = Device::Type::kAscend; + static constexpr Error kSuccess = ACL_SUCCESS; + static constexpr auto SetDevice = aclrtSetDevice; static constexpr auto GetDevice = aclrtGetDevice; @@ -45,13 +49,19 @@ struct Runtime return aclrtMemcpy(dst, count, src, count, kind); }; - static constexpr auto MemcpyHostToHost = ACL_MEMCPY_HOST_TO_HOST; + static constexpr auto MemcpyAsync = [](void* dst, const void* src, + size_t count, aclrtMemcpyKind kind, + Stream stream) { + return aclrtMemcpyAsync(dst, count, src, count, kind, stream); + }; + + static constexpr auto kMemcpyHostToHost = ACL_MEMCPY_HOST_TO_HOST; - static constexpr auto MemcpyHostToDevice = ACL_MEMCPY_HOST_TO_DEVICE; + static constexpr auto kMemcpyHostToDevice = ACL_MEMCPY_HOST_TO_DEVICE; - static constexpr auto MemcpyDeviceToHost = ACL_MEMCPY_DEVICE_TO_HOST; + static constexpr auto kMemcpyDeviceToHost = ACL_MEMCPY_DEVICE_TO_HOST; - static constexpr auto MemcpyDeviceToDevice = ACL_MEMCPY_DEVICE_TO_DEVICE; + static constexpr auto kMemcpyDeviceToDevice = ACL_MEMCPY_DEVICE_TO_DEVICE; static constexpr auto Memset = [](void* ptr, int value, size_t count) { return aclrtMemset(ptr, count, value, count); @@ -60,6 +70,6 @@ struct Runtime static_assert(Runtime::Validate()); -} // namespace infini::rt +} // namespace infini::rt::runtime #endif diff --git a/src/native/cambricon/runtime_.h b/src/native/cambricon/runtime_.h index 4db4920..927e2c5 100644 --- a/src/native/cambricon/runtime_.h +++ b/src/native/cambricon/runtime_.h @@ -9,15 +9,19 @@ #include "native/cambricon/device_.h" #include "runtime.h" -namespace infini::rt { +namespace infini::rt::runtime { template <> struct Runtime : DeviceRuntime> { + using Error = cnrtRet_t; + using Stream = cnrtQueue_t; static constexpr Device::Type kDeviceType = Device::Type::kCambricon; + static constexpr Error kSuccess = CNRT_RET_SUCCESS; + static constexpr auto SetDevice = cnrtSetDevice; static constexpr auto GetDevice = cnrtGetDevice; @@ -41,19 +45,25 @@ struct Runtime return cnrtMemcpy(dst, const_cast(src), size, kind); }; - static constexpr auto MemcpyHostToHost = cnrtMemcpyHostToHost; + static constexpr auto MemcpyAsync = [](void* dst, const void* src, + std::size_t size, auto kind, + Stream stream) { + return cnrtMemcpyAsync(dst, const_cast(src), size, kind, stream); + }; + + static constexpr auto kMemcpyHostToHost = cnrtMemcpyHostToHost; - static constexpr auto MemcpyHostToDevice = cnrtMemcpyHostToDev; + static constexpr auto kMemcpyHostToDevice = cnrtMemcpyHostToDev; - static constexpr auto MemcpyDeviceToHost = cnrtMemcpyDevToHost; + static constexpr auto kMemcpyDeviceToHost = cnrtMemcpyDevToHost; - static constexpr auto MemcpyDeviceToDevice = cnrtMemcpyDevToDev; + static constexpr auto kMemcpyDeviceToDevice = cnrtMemcpyDevToDev; static constexpr auto Memset = cnrtMemset; }; static_assert(Runtime::Validate()); -} // namespace infini::rt +} // namespace infini::rt::runtime #endif diff --git a/src/native/cpu/runtime_.h b/src/native/cpu/runtime_.h index d757672..c928e1f 100644 --- a/src/native/cpu/runtime_.h +++ b/src/native/cpu/runtime_.h @@ -2,7 +2,6 @@ #define INFINI_RT_CPU_RUNTIME__H_ #include -#include #include #include #include @@ -10,209 +9,225 @@ #include "runtime.h" -namespace infini::rt { +namespace infini::rt::runtime { template <> struct Runtime : RuntimeBase> { static constexpr Device::Type kDeviceType = Device::Type::kCpu; - static infini::rt::Error SetDevice(int index) { - return index == 0 ? infini::rt::kSuccess : infini::rt::kUnSuccess; + using Error = int; + + using Stream = void*; + + using Event = void*; + + static constexpr Error kSuccess = 0; + + static constexpr Error kErrorInvalidValue = 1; + + static constexpr Error kErrorMemoryAllocation = 2; + + static Error SetDevice(int device) { + if (device != 0) { + return kErrorInvalidValue; + } + + return kSuccess; } - static infini::rt::Error GetDevice(int* index) { - if (index == nullptr) { - return infini::rt::kUnSuccess; + static Error GetDevice(int* device) { + if (device == nullptr) { + return kErrorInvalidValue; } - *index = 0; - return infini::rt::kSuccess; + + *device = 0; + + return kSuccess; } - static infini::rt::Error GetDeviceCount(int* count) { + static Error GetDeviceCount(int* count) { if (count == nullptr) { - return infini::rt::kUnSuccess; + return kErrorInvalidValue; } + *count = 1; - return infini::rt::kSuccess; + + return kSuccess; } - static infini::rt::Error DeviceSynchronize() { return infini::rt::kSuccess; } + static Error DeviceSynchronize() { return kSuccess; } - static infini::rt::Error Malloc(void** ptr, std::size_t size) { + static Error Malloc(void** ptr, std::size_t size) { if (ptr == nullptr) { - return infini::rt::kUnSuccess; + return kErrorInvalidValue; } + *ptr = std::malloc(size); - return size != 0 && *ptr == nullptr ? infini::rt::kUnSuccess - : infini::rt::kSuccess; + + if (size != 0 && *ptr == nullptr) { + return kErrorMemoryAllocation; + } + + return kSuccess; } - static infini::rt::Error Free(void* ptr) { - std::free(ptr); - return infini::rt::kSuccess; + static Error MallocHost(void** ptr, std::size_t size) { + return Malloc(ptr, size); } - static infini::rt::Error Memcpy(void* dst, const void* src, std::size_t size, - infini::rt::MemcpyKind) { - if ((dst == nullptr || src == nullptr) && size != 0) { - return infini::rt::kUnSuccess; - } - std::memcpy(dst, src, size); - return infini::rt::kSuccess; + static Error MallocAsync(void** ptr, std::size_t size, Stream) { + return kErrorInvalidValue; } - static infini::rt::Error Memset(void* ptr, int value, std::size_t count) { - if (ptr == nullptr && count != 0) { - return infini::rt::kUnSuccess; - } - std::memset(ptr, value, count); - return infini::rt::kSuccess; + static Error Free(void* ptr) { + std::free(ptr); + + return kSuccess; } - static infini::rt::Error MemGetInfo(std::size_t* free_bytes, - std::size_t* total_bytes) { - if (free_bytes == nullptr || total_bytes == nullptr) { - return infini::rt::kUnSuccess; + static Error FreeHost(void* ptr) { return Free(ptr); } + + static Error FreeAsync(void* ptr, Stream) { return kErrorInvalidValue; } + + static Error MemGetInfo(std::size_t* free, std::size_t* total) { + if (free == nullptr || total == nullptr) { + return kErrorInvalidValue; } - *free_bytes = 0; - *total_bytes = 0; + + *free = 0; + *total = 0; #ifndef _WIN32 FILE* fp = std::fopen("/proc/meminfo", "r"); if (fp == nullptr) { - return infini::rt::kUnSuccess; + return kErrorInvalidValue; } char label[64]; std::size_t value = 0; while (std::fscanf(fp, "%63s %zu %*s", label, &value) == 2) { if (std::strcmp(label, "MemTotal:") == 0) { - *total_bytes = value * 1024; + *total = value * 1024; } else if (std::strcmp(label, "MemAvailable:") == 0) { - *free_bytes = value * 1024; + *free = value * 1024; } } std::fclose(fp); #endif - if (*total_bytes == 0) { - return infini::rt::kUnSuccess; - } - return infini::rt::kSuccess; + + return *total == 0 ? kErrorInvalidValue : kSuccess; } - static infini::rt::Error StreamCreate(infini::rt::Stream* stream) { - if (stream == nullptr) { - return infini::rt::kUnSuccess; + static Error Memcpy(void* dst, const void* src, std::size_t size, int) { + if ((dst == nullptr || src == nullptr) && size != 0) { + return kErrorInvalidValue; } - *stream = nullptr; - return infini::rt::kSuccess; + + std::memcpy(dst, src, size); + + return kSuccess; } - static infini::rt::Error StreamDestroy(infini::rt::Stream) { - return infini::rt::kSuccess; + static Error MemcpyAsync(void* dst, const void* src, std::size_t size, + int kind, Stream) { + return kErrorInvalidValue; + } + + static Error Memset(void* ptr, int value, std::size_t count) { + if (ptr == nullptr && count != 0) { + return kErrorInvalidValue; + } + + std::memset(ptr, value, count); + + return kSuccess; } - static infini::rt::Error StreamSynchronize(infini::rt::Stream) { - return infini::rt::kSuccess; + static Error MemsetAsync(void* ptr, int value, std::size_t count, Stream) { + return kErrorInvalidValue; } - static infini::rt::Error StreamWaitEvent(infini::rt::Stream, - infini::rt::Event, std::uint32_t) { - return infini::rt::kSuccess; + static Error StreamCreate(Stream* stream) { + if (stream == nullptr) { + return kErrorInvalidValue; + } + + *stream = nullptr; + + return kSuccess; } + static Error StreamDestroy(Stream) { return kSuccess; } + + static Error StreamSynchronize(Stream) { return kSuccess; } + + static Error StreamWaitEvent(Stream, Event, unsigned int) { return kSuccess; } + using CpuEvent = std::chrono::steady_clock::time_point; - static infini::rt::Error EventCreate(infini::rt::Event* event) { + static Error EventCreate(Event* event) { if (event == nullptr) { - return infini::rt::kUnSuccess; + return kErrorInvalidValue; } + *event = new (std::nothrow) CpuEvent(std::chrono::steady_clock::now()); - return *event == nullptr ? infini::rt::kUnSuccess : infini::rt::kSuccess; + + return *event == nullptr ? kErrorMemoryAllocation : kSuccess; } - static infini::rt::Error EventCreateWithFlags(infini::rt::Event* event, - std::uint32_t) { + static Error EventCreateWithFlags(Event* event, unsigned int) { return EventCreate(event); } - static infini::rt::Error EventRecord(infini::rt::Event event, - infini::rt::Stream) { + static Error EventRecord(Event event, Stream) { if (event == nullptr) { - return infini::rt::kUnSuccess; + return kErrorInvalidValue; } + *static_cast(event) = std::chrono::steady_clock::now(); - return infini::rt::kSuccess; + + return kSuccess; } - static infini::rt::Error EventQuery(infini::rt::Event event) { - return event == nullptr ? infini::rt::kUnSuccess : infini::rt::kSuccess; + static Error EventQuery(Event event) { + return event == nullptr ? kErrorInvalidValue : kSuccess; } - static infini::rt::Error EventSynchronize(infini::rt::Event event) { - return event == nullptr ? infini::rt::kUnSuccess : infini::rt::kSuccess; + static Error EventSynchronize(Event event) { + return event == nullptr ? kErrorInvalidValue : kSuccess; } - static infini::rt::Error EventDestroy(infini::rt::Event event) { + static Error EventDestroy(Event event) { delete static_cast(event); - return infini::rt::kSuccess; + + return kSuccess; } - static infini::rt::Error EventElapsedTime(float* ms, infini::rt::Event start, - infini::rt::Event end) { + static Error EventElapsedTime(float* ms, Event start, Event end) { if (ms == nullptr || start == nullptr || end == nullptr) { - return infini::rt::kUnSuccess; + return kErrorInvalidValue; } + const auto* start_time = static_cast(start); const auto* end_time = static_cast(end); const auto duration = std::chrono::duration_cast( *end_time - *start_time); *ms = static_cast(duration.count()) / 1000.0f; - return infini::rt::kSuccess; - } - - static infini::rt::Error MallocHost(void** ptr, std::size_t size) { - return Malloc(ptr, size); - } - - static infini::rt::Error FreeHost(void* ptr) { return Free(ptr); } - - static infini::rt::Error MemcpyAsync(void* dst, const void* src, - std::size_t size, - infini::rt::MemcpyKind kind, - infini::rt::Stream) { - return Memcpy(dst, src, size, kind); - } - - static infini::rt::Error MallocAsync(void** ptr, std::size_t size, - infini::rt::Stream) { - return Malloc(ptr, size); - } - - static infini::rt::Error FreeAsync(void* ptr, infini::rt::Stream) { - return Free(ptr); - } - static infini::rt::Error MemsetAsync(void* ptr, int value, std::size_t count, - infini::rt::Stream) { - return Memset(ptr, value, count); + return kSuccess; } - static constexpr auto kMemcpyHostToHost = - infini::rt::MemcpyKind::kMemcpyHostToHost; + static constexpr int kMemcpyHostToHost = 0; - static constexpr auto kMemcpyHostToDevice = - infini::rt::MemcpyKind::kMemcpyHostToDevice; + static constexpr int kMemcpyHostToDevice = 1; - static constexpr auto kMemcpyDeviceToHost = - infini::rt::MemcpyKind::kMemcpyDeviceToHost; + static constexpr int kMemcpyDeviceToHost = 2; - static constexpr auto kMemcpyDeviceToDevice = - infini::rt::MemcpyKind::kMemcpyDeviceToDevice; + static constexpr int kMemcpyDeviceToDevice = 3; }; static_assert(Runtime::Validate()); -} // namespace infini::rt +} // namespace infini::rt::runtime #endif diff --git a/src/native/cuda/hygon/runtime_.h b/src/native/cuda/hygon/runtime_.h index a9f41ec..52c47eb 100644 --- a/src/native/cuda/hygon/runtime_.h +++ b/src/native/cuda/hygon/runtime_.h @@ -10,34 +10,52 @@ #include "native/cuda/hygon/device_.h" #include "native/cuda/runtime_.h" -namespace infini::rt { +namespace infini::rt::runtime { template <> struct Runtime : CudaRuntime> { + using Error = cudaError_t; + using Stream = cudaStream_t; static constexpr Device::Type kDeviceType = Device::Type::kHygon; + static constexpr Error kSuccess = cudaSuccess; + + static constexpr auto SetDevice = cudaSetDevice; + + static constexpr auto GetDevice = cudaGetDevice; + + static constexpr auto GetDeviceCount = cudaGetDeviceCount; + + static constexpr auto DeviceSynchronize = cudaDeviceSynchronize; + static constexpr auto Malloc = [](auto&&... args) { return cudaMalloc(std::forward(args)...); }; static constexpr auto Memcpy = cudaMemcpy; + static constexpr auto MemcpyAsync = cudaMemcpyAsync; + static constexpr auto Free = [](auto&&... args) { return cudaFree(std::forward(args)...); }; - static constexpr auto MemcpyHostToDevice = cudaMemcpyHostToDevice; + static constexpr auto kMemcpyHostToHost = cudaMemcpyHostToHost; + + static constexpr auto kMemcpyHostToDevice = cudaMemcpyHostToDevice; + + static constexpr auto kMemcpyDeviceToHost = cudaMemcpyDeviceToHost; - static constexpr auto MemcpyDeviceToHost = cudaMemcpyDeviceToHost; + static constexpr auto kMemcpyDeviceToDevice = cudaMemcpyDeviceToDevice; static constexpr auto Memset = cudaMemset; }; static_assert(Runtime::Validate()); -} // namespace infini::rt +} // namespace infini::rt::runtime #endif diff --git a/src/native/cuda/iluvatar/runtime_.h b/src/native/cuda/iluvatar/runtime_.h index 8a1b649..f49db23 100644 --- a/src/native/cuda/iluvatar/runtime_.h +++ b/src/native/cuda/iluvatar/runtime_.h @@ -10,15 +10,19 @@ #include "native/cuda/iluvatar/device_.h" #include "native/cuda/runtime_.h" -namespace infini::rt { +namespace infini::rt::runtime { template <> struct Runtime : CudaRuntime> { + using Error = cudaError_t; + using Stream = cudaStream_t; static constexpr Device::Type kDeviceType = Device::Type::kIluvatar; + static constexpr Error kSuccess = cudaSuccess; + static constexpr auto SetDevice = cudaSetDevice; static constexpr auto GetDevice = cudaGetDevice; @@ -33,21 +37,23 @@ struct Runtime static constexpr auto Memcpy = cudaMemcpy; + static constexpr auto MemcpyAsync = cudaMemcpyAsync; + static constexpr auto Free = cudaFree; - static constexpr auto MemcpyHostToHost = cudaMemcpyHostToHost; + static constexpr auto kMemcpyHostToHost = cudaMemcpyHostToHost; - static constexpr auto MemcpyHostToDevice = cudaMemcpyHostToDevice; + static constexpr auto kMemcpyHostToDevice = cudaMemcpyHostToDevice; - static constexpr auto MemcpyDeviceToHost = cudaMemcpyDeviceToHost; + static constexpr auto kMemcpyDeviceToHost = cudaMemcpyDeviceToHost; - static constexpr auto MemcpyDeviceToDevice = cudaMemcpyDeviceToDevice; + static constexpr auto kMemcpyDeviceToDevice = cudaMemcpyDeviceToDevice; static constexpr auto Memset = cudaMemset; }; static_assert(Runtime::Validate()); -} // namespace infini::rt +} // namespace infini::rt::runtime #endif diff --git a/src/native/cuda/metax/runtime_.h b/src/native/cuda/metax/runtime_.h index 5785a51..c1f19c0 100644 --- a/src/native/cuda/metax/runtime_.h +++ b/src/native/cuda/metax/runtime_.h @@ -8,15 +8,19 @@ #include "native/cuda/metax/device_.h" #include "native/cuda/runtime_.h" -namespace infini::rt { +namespace infini::rt::runtime { template <> struct Runtime : CudaRuntime> { + using Error = mcError_t; + using Stream = mcStream_t; static constexpr Device::Type kDeviceType = Device::Type::kMetax; + static constexpr Error kSuccess = mcSuccess; + static constexpr auto SetDevice = mcSetDevice; static constexpr auto GetDevice = mcGetDevice; @@ -33,23 +37,27 @@ struct Runtime return mcMemcpy(std::forward(args)...); }; + static constexpr auto MemcpyAsync = [](auto&&... args) { + return mcMemcpyAsync(std::forward(args)...); + }; + static constexpr auto Free = [](auto&&... args) { return mcFree(std::forward(args)...); }; - static constexpr auto MemcpyHostToHost = mcMemcpyHostToHost; + static constexpr auto kMemcpyHostToHost = mcMemcpyHostToHost; - static constexpr auto MemcpyHostToDevice = mcMemcpyHostToDevice; + static constexpr auto kMemcpyHostToDevice = mcMemcpyHostToDevice; - static constexpr auto MemcpyDeviceToHost = mcMemcpyDeviceToHost; + static constexpr auto kMemcpyDeviceToHost = mcMemcpyDeviceToHost; - static constexpr auto MemcpyDeviceToDevice = mcMemcpyDeviceToDevice; + static constexpr auto kMemcpyDeviceToDevice = mcMemcpyDeviceToDevice; static constexpr auto Memset = mcMemset; }; static_assert(Runtime::Validate()); -} // namespace infini::rt +} // namespace infini::rt::runtime #endif diff --git a/src/native/cuda/moore/runtime_.h b/src/native/cuda/moore/runtime_.h index 8ced2ed..5beffcf 100644 --- a/src/native/cuda/moore/runtime_.h +++ b/src/native/cuda/moore/runtime_.h @@ -8,15 +8,19 @@ #include "native/cuda/moore/device_.h" #include "native/cuda/runtime_.h" -namespace infini::rt { +namespace infini::rt::runtime { template <> struct Runtime : CudaRuntime> { + using Error = musaError_t; + using Stream = musaStream_t; static constexpr Device::Type kDeviceType = Device::Type::kMoore; + static constexpr Error kSuccess = musaSuccess; + static constexpr auto SetDevice = musaSetDevice; static constexpr auto GetDevice = [](auto&&... args) { @@ -39,23 +43,27 @@ struct Runtime return musaMemcpy(std::forward(args)...); }; + static constexpr auto MemcpyAsync = [](auto&&... args) { + return musaMemcpyAsync(std::forward(args)...); + }; + static constexpr auto Free = [](auto&&... args) { return musaFree(std::forward(args)...); }; - static constexpr auto MemcpyHostToHost = musaMemcpyHostToHost; + static constexpr auto kMemcpyHostToHost = musaMemcpyHostToHost; - static constexpr auto MemcpyHostToDevice = musaMemcpyHostToDevice; + static constexpr auto kMemcpyHostToDevice = musaMemcpyHostToDevice; - static constexpr auto MemcpyDeviceToHost = musaMemcpyDeviceToHost; + static constexpr auto kMemcpyDeviceToHost = musaMemcpyDeviceToHost; - static constexpr auto MemcpyDeviceToDevice = musaMemcpyDeviceToDevice; + static constexpr auto kMemcpyDeviceToDevice = musaMemcpyDeviceToDevice; static constexpr auto Memset = musaMemset; }; static_assert(Runtime::Validate()); -} // namespace infini::rt +} // namespace infini::rt::runtime #endif diff --git a/src/native/cuda/nvidia/runtime_.h b/src/native/cuda/nvidia/runtime_.h index f6a9f2d..1786e08 100644 --- a/src/native/cuda/nvidia/runtime_.h +++ b/src/native/cuda/nvidia/runtime_.h @@ -10,15 +10,19 @@ #include "native/cuda/nvidia/device_.h" #include "native/cuda/runtime_.h" -namespace infini::rt { +namespace infini::rt::runtime { template <> struct Runtime : CudaRuntime> { + using Error = cudaError_t; + using Stream = cudaStream_t; static constexpr Device::Type kDeviceType = Device::Type::kNvidia; + static constexpr Error kSuccess = cudaSuccess; + static constexpr auto SetDevice = cudaSetDevice; static constexpr auto GetDevice = cudaGetDevice; @@ -33,21 +37,23 @@ struct Runtime static constexpr auto Memcpy = cudaMemcpy; + static constexpr auto MemcpyAsync = cudaMemcpyAsync; + static constexpr auto Free = cudaFree; - static constexpr auto MemcpyHostToHost = cudaMemcpyHostToHost; + static constexpr auto kMemcpyHostToHost = cudaMemcpyHostToHost; - static constexpr auto MemcpyHostToDevice = cudaMemcpyHostToDevice; + static constexpr auto kMemcpyHostToDevice = cudaMemcpyHostToDevice; - static constexpr auto MemcpyDeviceToHost = cudaMemcpyDeviceToHost; + static constexpr auto kMemcpyDeviceToHost = cudaMemcpyDeviceToHost; - static constexpr auto MemcpyDeviceToDevice = cudaMemcpyDeviceToDevice; + static constexpr auto kMemcpyDeviceToDevice = cudaMemcpyDeviceToDevice; static constexpr auto Memset = cudaMemset; }; static_assert(Runtime::Validate()); -} // namespace infini::rt +} // namespace infini::rt::runtime #endif diff --git a/src/native/cuda/runtime_.h b/src/native/cuda/runtime_.h index 8765a05..59dd6bc 100644 --- a/src/native/cuda/runtime_.h +++ b/src/native/cuda/runtime_.h @@ -5,7 +5,7 @@ #include "runtime.h" -namespace infini::rt { +namespace infini::rt::runtime { /// ## CUDA-like runtime interface enforcement via CRTP. /// @@ -17,13 +17,19 @@ struct CudaRuntime : DeviceRuntime { DeviceRuntime::Validate(); static_assert( std::is_invocable_v, + size_t, decltype(Derived::kMemcpyHostToDevice)>, "`Runtime::Memcpy` must be callable with " - "`(void*, const void*, size_t, MemcpyHostToDevice)`."); + "`(void*, const void*, size_t, kMemcpyHostToDevice)`."); + static_assert( + std::is_invocable_v, + "`Runtime::MemcpyAsync` must be callable with " + "`(void*, const void*, size_t, kMemcpyHostToDevice, Stream)`."); return true; } }; -} // namespace infini::rt +} // namespace infini::rt::runtime #endif diff --git a/src/runtime.h b/src/runtime.h index e983dca..c3e7640 100644 --- a/src/runtime.h +++ b/src/runtime.h @@ -2,12 +2,11 @@ #define INFINI_RT_RUNTIME_H_ #include -#include #include #include "device.h" -namespace infini::rt { +namespace infini::rt::runtime { template struct Runtime; @@ -15,8 +14,9 @@ struct Runtime; /// ## Interface enforcement via CRTP. /// /// Inherit from the appropriate base to declare which interface level a -/// `Runtime` specialization implements. After the struct is fully defined, call -/// `static_assert(Runtime<...>::Validate())`. The chained `Validate()` checks +/// `runtime::Runtime` specialization implements. After the struct is fully +/// defined, call `static_assert(Runtime<...>::Validate())`. The chained +/// `Validate()` checks /// every required member's existence and signature at compile time, analogous /// to how `override` catches signature mismatches for virtual functions. /// @@ -31,6 +31,11 @@ struct RuntimeBase { std::is_same_v, Device::Type>, "`Runtime` must define `static constexpr Device::Type kDeviceType`."); + static_assert(sizeof(typename Derived::Error) > 0, + "`Runtime` must define an `Error` type alias."); + static_assert(std::is_same_v, + typename Derived::Error>, + "`Runtime` must define `static constexpr Error kSuccess`."); return true; } }; @@ -52,85 +57,6 @@ struct DeviceRuntime : RuntimeBase { } }; -enum class Error { - kSuccess = 0, - kUnSuccess = -1, -}; - -inline constexpr infini::rt::Error kSuccess = infini::rt::Error::kSuccess; -inline constexpr infini::rt::Error kUnSuccess = infini::rt::Error::kUnSuccess; - -enum class MemcpyKind { - kMemcpyHostToHost, - kMemcpyHostToDevice, - kMemcpyDeviceToHost, - kMemcpyDeviceToDevice, -}; - -using Stream = void*; -using Event = void*; - -infini::rt::Error SetDevice(int device); - -infini::rt::Error GetDevice(int* device); - -infini::rt::Error GetDeviceCount(int* count); - -infini::rt::Error DeviceSynchronize(); - -infini::rt::Error Malloc(void** ptr, std::size_t size); - -infini::rt::Error Free(void* ptr); - -infini::rt::Error Memset(void* ptr, int value, std::size_t count); - -infini::rt::Error Memcpy(void* dst, const void* src, std::size_t count, - infini::rt::MemcpyKind kind); - -infini::rt::Error MallocHost(void** ptr, std::size_t size); - -infini::rt::Error FreeHost(void* ptr); - -infini::rt::Error MemcpyAsync(void* dst, const void* src, std::size_t count, - infini::rt::MemcpyKind kind, - infini::rt::Stream stream); - -infini::rt::Error MallocAsync(void** ptr, std::size_t size, - infini::rt::Stream stream); - -infini::rt::Error FreeAsync(void* ptr, infini::rt::Stream stream); - -infini::rt::Error MemsetAsync(void* ptr, int value, std::size_t count, - infini::rt::Stream stream); - -infini::rt::Error MemGetInfo(std::size_t* free_bytes, std::size_t* total_bytes); - -infini::rt::Error StreamCreate(infini::rt::Stream* stream); - -infini::rt::Error StreamDestroy(infini::rt::Stream stream); - -infini::rt::Error StreamSynchronize(infini::rt::Stream stream); - -infini::rt::Error StreamWaitEvent(infini::rt::Stream stream, - infini::rt::Event event, std::uint32_t flags); - -infini::rt::Error EventCreate(infini::rt::Event* event); - -infini::rt::Error EventCreateWithFlags(infini::rt::Event* event, - std::uint32_t flags); - -infini::rt::Error EventRecord(infini::rt::Event event, - infini::rt::Stream stream); - -infini::rt::Error EventQuery(infini::rt::Event event); - -infini::rt::Error EventSynchronize(infini::rt::Event event); - -infini::rt::Error EventDestroy(infini::rt::Event event); - -infini::rt::Error EventElapsedTime(float* ms, infini::rt::Event start, - infini::rt::Event end); - -} // namespace infini::rt +} // namespace infini::rt::runtime #endif diff --git a/src/tensor_view.h b/src/tensor_view.h index e747d19..dcf7cc9 100644 --- a/src/tensor_view.h +++ b/src/tensor_view.h @@ -3,6 +3,8 @@ #include #include +#include +#include #include #include "data_type.h" @@ -11,6 +13,22 @@ namespace infini::rt { +namespace tensor_view_detail { + +template +struct IsTensorLike : std::false_type {}; + +template +struct IsTensorLike().data()), + decltype(std::declval().shape()), + decltype(std::declval().dtype()), + decltype(std::declval().device()), + decltype(std::declval().strides())>> + : std::true_type {}; + +} // namespace tensor_view_detail + class TensorView { public: using Size = std::size_t; @@ -23,6 +41,16 @@ class TensorView { using Strides = std::vector; + template ::value>> + TensorView(const TensorLike& tensor) + : data_{const_cast(static_cast(tensor.data()))}, + shape_{tensor.shape()}, + dtype_{tensor.dtype()}, + device_{tensor.device()}, + strides_{tensor.strides()} {} + template TensorView(void* data, const Shape& shape) : data_{data}, diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index ab54530..f24954c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -7,12 +7,20 @@ endfunction() add_infini_rt_test(test_smoke test_smoke.cc) add_infini_rt_test(test_core test_core.cc) -if(WITH_CPU) - add_infini_rt_test(test_cpu_runtime test_cpu_runtime.cc) -endif() - if(WITH_CPU OR WITH_NVIDIA) add_infini_rt_test(test_runtime_dispatch test_runtime_dispatch.cc) + if(WITH_CPU) + target_compile_definitions(test_runtime_dispatch + PRIVATE INFINI_RT_TEST_WITH_CPU=1) + endif() + if(WITH_NVIDIA) + target_compile_definitions(test_runtime_dispatch + PRIVATE INFINI_RT_TEST_WITH_NVIDIA=1) + endif() +endif() + +if(WITH_CPU) + add_infini_rt_test(test_cpu_runtime test_cpu_runtime.cc) endif() if(WITH_NVIDIA) @@ -24,12 +32,20 @@ set(INFINI_RT_TEST_INSTALL_PREFIX set(INFINI_RT_TEST_CONSUMER_BINARY "${CMAKE_CURRENT_BINARY_DIR}/install_consumer_smoke") set(INFINI_RT_TEST_EXTRA_LIBRARY_DIRS "") +set(INFINI_RT_TEST_EXTRA_INCLUDE_DIRS "") set(INFINI_RT_TEST_CONSUMER_BACKEND NONE) -if(WITH_CPU) - set(INFINI_RT_TEST_CONSUMER_BACKEND CPU) -elseif(WITH_NVIDIA) +if(WITH_NVIDIA) set(INFINI_RT_TEST_CONSUMER_BACKEND NVIDIA) + if(CUDAToolkit_INCLUDE_DIRS) + list(APPEND INFINI_RT_TEST_EXTRA_INCLUDE_DIRS + ${CUDAToolkit_INCLUDE_DIRS}) + elseif(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES) + list(APPEND INFINI_RT_TEST_EXTRA_INCLUDE_DIRS + ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) + endif() +elseif(WITH_CPU) + set(INFINI_RT_TEST_CONSUMER_BACKEND CPU) endif() if(WITH_ASCEND) @@ -44,6 +60,7 @@ if(WITH_ASCEND) endif() list(JOIN INFINI_RT_TEST_EXTRA_LIBRARY_DIRS ":" INFINI_RT_TEST_EXTRA_LIBRARY_PATHS) +list(JOIN INFINI_RT_TEST_EXTRA_INCLUDE_DIRS ":" INFINI_RT_TEST_EXTRA_INCLUDE_PATHS) add_test( NAME test_install @@ -59,6 +76,7 @@ add_test( "-DINFINI_RT_CONSUMER_BINARY=${INFINI_RT_TEST_CONSUMER_BINARY}" "-DINFINI_RT_CXX_COMPILER=${CMAKE_CXX_COMPILER}" "-DINFINI_RT_EXTRA_LIBRARY_PATHS=${INFINI_RT_TEST_EXTRA_LIBRARY_PATHS}" + "-DINFINI_RT_EXTRA_INCLUDE_PATHS=${INFINI_RT_TEST_EXTRA_INCLUDE_PATHS}" "-DINFINI_RT_CONSUMER_BACKEND=${INFINI_RT_TEST_CONSUMER_BACKEND}" -P "${CMAKE_CURRENT_SOURCE_DIR}/compile_install_consumer.cmake") set_tests_properties(test_install_consumer PROPERTIES diff --git a/tests/compile_install_consumer.cmake b/tests/compile_install_consumer.cmake index 169311e..1bb96d1 100644 --- a/tests/compile_install_consumer.cmake +++ b/tests/compile_install_consumer.cmake @@ -17,6 +17,17 @@ set(INFINI_RT_EXTRA_LINK_ARGS "") set(INFINI_RT_EXTRA_COMPILE_ARGS "") set(INFINI_RT_LD_LIBRARY_PATH "${INFINI_RT_LIBRARY_DIR}") +if(INFINI_RT_EXTRA_INCLUDE_PATHS) + string(REPLACE ":" ";" INFINI_RT_EXTRA_INCLUDE_DIRS + "${INFINI_RT_EXTRA_INCLUDE_PATHS}") + foreach(INFINI_RT_EXTRA_INCLUDE_DIR ${INFINI_RT_EXTRA_INCLUDE_DIRS}) + if(EXISTS "${INFINI_RT_EXTRA_INCLUDE_DIR}") + list(APPEND INFINI_RT_EXTRA_COMPILE_ARGS + "-I${INFINI_RT_EXTRA_INCLUDE_DIR}") + endif() + endforeach() +endif() + if(INFINI_RT_CONSUMER_BACKEND AND NOT INFINI_RT_CONSUMER_BACKEND STREQUAL "NONE") list(APPEND INFINI_RT_EXTRA_COMPILE_ARGS "-DINFINI_RT_CONSUMER_BACKEND_${INFINI_RT_CONSUMER_BACKEND}=1") diff --git a/tests/install_consumer_smoke.cc b/tests/install_consumer_smoke.cc index 97442b5..0684763 100644 --- a/tests/install_consumer_smoke.cc +++ b/tests/install_consumer_smoke.cc @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -20,19 +21,73 @@ int main() { #if defined(INFINI_RT_CONSUMER_BACKEND_CPU) || \ defined(INFINI_RT_CONSUMER_BACKEND_NVIDIA) -#if defined(INFINI_RT_CONSUMER_BACKEND_NVIDIA) - const infini::rt::Device runtime_device{infini::rt::Device::Type::kNvidia}; + namespace runtime = infini::rt::runtime; +#if defined(INFINI_RT_CONSUMER_BACKEND_CPU) + constexpr auto kExpectedDeviceType = infini::rt::Device::Type::kCpu; #else - const infini::rt::Device runtime_device{infini::rt::Device::Type::kCpu}; + constexpr auto kExpectedDeviceType = infini::rt::Device::Type::kNvidia; #endif + infini::rt::set_runtime_device_type(kExpectedDeviceType); + if (infini::rt::runtime_device_type() != kExpectedDeviceType) { + return 1; + } + std::array input{1, 2, 3, 4}; + std::array output{}; void* ptr = nullptr; - infini::rt::SetDevice(runtime_device); - infini::rt::Malloc(&ptr, sizeof(std::uint32_t)); + if (runtime::SetDevice(0) != runtime::kSuccess) { + return 1; + } + int current_device = -1; + if (runtime::GetDevice(¤t_device) != runtime::kSuccess) { + return 1; + } + if (current_device != 0) { + return 1; + } + int device_count = 0; + if (runtime::GetDeviceCount(&device_count) != runtime::kSuccess) { + return 1; + } + if (device_count <= 0) { + return 1; + } + if (runtime::Malloc(&ptr, input.size()) != runtime::kSuccess) { + return 1; + } if (ptr == nullptr) { return 1; } - infini::rt::Free(ptr); + if (runtime::Memcpy(ptr, input.data(), input.size(), + runtime::kMemcpyHostToDevice) != runtime::kSuccess) { + return 1; + } +#if defined(INFINI_RT_CONSUMER_BACKEND_CPU) + if (runtime::MemcpyAsync(ptr, input.data(), input.size(), + runtime::kMemcpyHostToDevice, + nullptr) == runtime::kSuccess) { + return 1; + } +#else + if (runtime::MemcpyAsync(ptr, input.data(), input.size(), + runtime::kMemcpyHostToDevice, + nullptr) != runtime::kSuccess) { + return 1; + } +#endif + if (runtime::DeviceSynchronize() != runtime::kSuccess) { + return 1; + } + if (runtime::Memcpy(output.data(), ptr, output.size(), + runtime::kMemcpyDeviceToHost) != runtime::kSuccess) { + return 1; + } + if (output != input) { + return 1; + } + if (runtime::Free(ptr) != runtime::kSuccess) { + return 1; + } #endif return 0; diff --git a/tests/test_core.cc b/tests/test_core.cc index e8b8071..273ecef 100644 --- a/tests/test_core.cc +++ b/tests/test_core.cc @@ -1,7 +1,9 @@ #include +#include #include #include +#include #include #include "test_helper.h" @@ -12,6 +14,9 @@ using infini::rt::DataType; using infini::rt::Device; using infini::rt::TensorView; +static_assert(!std::is_constructible_v>, + "TensorView should not treat tensor containers as tensor-like."); + void TestDevice(infini::rt::test::TestContext* context) { const Device cpu{Device::Type::kCpu}; const Device nvidia{Device::Type::kNvidia, 1}; diff --git a/tests/test_cpu_runtime.cc b/tests/test_cpu_runtime.cc index 20edce9..e066c85 100644 --- a/tests/test_cpu_runtime.cc +++ b/tests/test_cpu_runtime.cc @@ -9,7 +9,7 @@ namespace { -using CpuRuntime = infini::rt::Runtime; +using CpuRuntime = infini::rt::runtime::Runtime; void TestMallocAndFree(infini::rt::test::TestContext* context) { void* ptr = nullptr; @@ -32,15 +32,26 @@ void TestMemcpyRoundTrip(infini::rt::test::TestContext* context) { } CpuRuntime::Memcpy(ptr, input.data(), input.size(), - CpuRuntime::MemcpyHostToDevice); + CpuRuntime::kMemcpyHostToDevice); CpuRuntime::Memcpy(output.data(), ptr, output.size(), - CpuRuntime::MemcpyDeviceToHost); + CpuRuntime::kMemcpyDeviceToHost); CpuRuntime::Free(ptr); context->ExpectEqual(output, input, "CPU runtime should copy data through runtime memory."); } +void TestMemcpyAsyncUnsupported(infini::rt::test::TestContext* context) { + std::array input{1}; + std::array output{}; + + context->Expect( + CpuRuntime::MemcpyAsync(output.data(), input.data(), input.size(), + CpuRuntime::kMemcpyHostToHost, + nullptr) != CpuRuntime::kSuccess, + "CPU runtime should not report async memcpy success."); +} + void TestMemset(infini::rt::test::TestContext* context) { std::array output{}; void* ptr = nullptr; @@ -53,7 +64,7 @@ void TestMemset(infini::rt::test::TestContext* context) { CpuRuntime::Memset(ptr, 0x5A, output.size()); CpuRuntime::Memcpy(output.data(), ptr, output.size(), - CpuRuntime::MemcpyDeviceToHost); + CpuRuntime::kMemcpyDeviceToHost); CpuRuntime::Free(ptr); for (const auto value : output) { @@ -69,6 +80,7 @@ int main() { TestMallocAndFree(&context); TestMemcpyRoundTrip(&context); + TestMemcpyAsyncUnsupported(&context); TestMemset(&context); return context.ExitCode(); diff --git a/tests/test_nvidia_runtime.cc b/tests/test_nvidia_runtime.cc index 2d3dac2..fa10858 100644 --- a/tests/test_nvidia_runtime.cc +++ b/tests/test_nvidia_runtime.cc @@ -9,7 +9,8 @@ namespace { -using NvidiaRuntime = infini::rt::Runtime; +using NvidiaRuntime = + infini::rt::runtime::Runtime; void ExpectCudaSuccess(infini::rt::test::TestContext* context, cudaError_t status, const char* message) { @@ -41,11 +42,11 @@ void TestMemcpyRoundTrip(infini::rt::test::TestContext* context) { ExpectCudaSuccess(context, NvidiaRuntime::Memcpy(ptr, input.data(), input.size(), - NvidiaRuntime::MemcpyHostToDevice), + NvidiaRuntime::kMemcpyHostToDevice), "NVIDIA runtime should copy host data to device memory."); ExpectCudaSuccess(context, NvidiaRuntime::Memcpy(output.data(), ptr, output.size(), - NvidiaRuntime::MemcpyDeviceToHost), + NvidiaRuntime::kMemcpyDeviceToHost), "NVIDIA runtime should copy device data to host memory."); ExpectCudaSuccess(context, NvidiaRuntime::Free(ptr), "NVIDIA runtime should free copy memory."); @@ -68,7 +69,7 @@ void TestMemset(infini::rt::test::TestContext* context) { "NVIDIA runtime should memset device memory."); ExpectCudaSuccess(context, NvidiaRuntime::Memcpy(output.data(), ptr, output.size(), - NvidiaRuntime::MemcpyDeviceToHost), + NvidiaRuntime::kMemcpyDeviceToHost), "NVIDIA runtime should copy memset data to host memory."); ExpectCudaSuccess(context, NvidiaRuntime::Free(ptr), "NVIDIA runtime should free memset memory."); diff --git a/tests/test_runtime_dispatch.cc b/tests/test_runtime_dispatch.cc index 9be92a7..4d6e15f 100644 --- a/tests/test_runtime_dispatch.cc +++ b/tests/test_runtime_dispatch.cc @@ -8,58 +8,151 @@ namespace { -infini::rt::Device RuntimeTestDevice() { -#if defined(WITH_NVIDIA) - return infini::rt::Device{infini::rt::Device::Type::kNvidia}; -#else - return infini::rt::Device{infini::rt::Device::Type::kCpu}; -#endif +namespace runtime = infini::rt::runtime; + +void ExpectSuccess(infini::rt::test::TestContext* context, + runtime::Error status, const char* message) { + context->Expect(status == runtime::kSuccess, message); } -} // namespace +#if defined(INFINI_RT_TEST_WITH_CPU) +void TestCpuDispatch(infini::rt::test::TestContext* context) { + infini::rt::set_runtime_device_type(infini::rt::Device::Type::kCpu); + context->Expect( + infini::rt::runtime_device_type() == infini::rt::Device::Type::kCpu, + "Runtime dispatch should report CPU dispatch."); -int main() { - infini::rt::test::TestContext context; - const infini::rt::Device device = RuntimeTestDevice(); std::array input{1, 2, 3, 4}; std::array output{}; void* ptr = nullptr; - infini::rt::SetDevice(device); + ExpectSuccess(context, runtime::SetDevice(0), + "CPU dispatch should set device 0."); + int current_device = -1; + ExpectSuccess(context, runtime::GetDevice(¤t_device), + "CPU dispatch should get the current device."); + context->ExpectEqual(current_device, 0, + "CPU dispatch should keep the current device."); + + int device_count = 0; + ExpectSuccess(context, runtime::GetDeviceCount(&device_count), + "CPU dispatch should get the device count."); + context->Expect(device_count > 0, + "CPU dispatch should report at least one device."); + + ExpectSuccess(context, runtime::Malloc(&ptr, input.size()), + "CPU dispatch should allocate memory."); + if (ptr == nullptr) { + return; + } + + ExpectSuccess(context, + runtime::Memcpy(ptr, input.data(), input.size(), + runtime::kMemcpyHostToDevice), + "CPU dispatch should copy host data to runtime memory."); + context->Expect(runtime::MemcpyAsync(ptr, input.data(), input.size(), + runtime::kMemcpyHostToDevice, + nullptr) != runtime::kSuccess, + "CPU dispatch should not report async memcpy success."); + ExpectSuccess(context, + runtime::Memcpy(output.data(), ptr, output.size(), + runtime::kMemcpyDeviceToHost), + "CPU dispatch should copy runtime memory to host."); + + context->ExpectEqual(output, input, + "CPU dispatch should preserve copied bytes."); + + ExpectSuccess(context, runtime::Memset(ptr, 0x5A, output.size()), + "CPU dispatch should fill runtime memory."); + ExpectSuccess(context, + runtime::Memcpy(output.data(), ptr, output.size(), + runtime::kMemcpyDeviceToHost), + "CPU dispatch should copy filled memory to host."); + for (const auto value : output) { + context->ExpectEqual(value, static_cast(0x5A), + "CPU dispatch should preserve filled bytes."); + } + + ExpectSuccess(context, runtime::Free(ptr), + "CPU dispatch should free memory."); +} +#endif + +#if defined(INFINI_RT_TEST_WITH_NVIDIA) +void TestNvidiaDispatch(infini::rt::test::TestContext* context) { + infini::rt::set_runtime_device_type(infini::rt::Device::Type::kNvidia); + context->Expect( + infini::rt::runtime_device_type() == infini::rt::Device::Type::kNvidia, + "Runtime dispatch should report NVIDIA dispatch."); + + std::array input{5, 6, 7, 8}; + std::array output{}; + void* ptr = nullptr; - infini::rt::Device current_device; - infini::rt::GetDevice(¤t_device); - context.ExpectEqual(current_device, device, - "Runtime dispatch should keep the current device."); + ExpectSuccess(context, runtime::SetDevice(0), + "NVIDIA dispatch should set device 0."); + int current_device = -1; + ExpectSuccess(context, runtime::GetDevice(¤t_device), + "NVIDIA dispatch should get the current device."); + context->ExpectEqual(current_device, 0, + "NVIDIA dispatch should keep the current device."); int device_count = 0; - infini::rt::GetDeviceCount(&device_count, device.type()); - context.Expect(device_count > 0, - "Runtime dispatch should report at least one device."); + ExpectSuccess(context, runtime::GetDeviceCount(&device_count), + "NVIDIA dispatch should get the device count."); + context->Expect(device_count > 0, + "NVIDIA dispatch should report at least one device."); - infini::rt::Malloc(&ptr, input.size()); - context.Expect(ptr != nullptr, "Runtime dispatch should allocate memory."); + ExpectSuccess(context, runtime::Malloc(&ptr, input.size()), + "NVIDIA dispatch should allocate memory."); if (ptr == nullptr) { - return context.ExitCode(); + return; } - infini::rt::Memcpy(ptr, input.data(), input.size(), - infini::rt::MemcpyKind::kHostToDevice); - infini::rt::Memcpy(output.data(), ptr, output.size(), - infini::rt::MemcpyKind::kDeviceToHost); - context.ExpectEqual(output, input, - "Runtime dispatch should copy data through memory."); + ExpectSuccess(context, + runtime::MemcpyAsync(ptr, input.data(), input.size(), + runtime::kMemcpyHostToDevice, nullptr), + "NVIDIA dispatch should support async host-to-device copy."); + ExpectSuccess(context, runtime::DeviceSynchronize(), + "NVIDIA dispatch should synchronize the device."); + ExpectSuccess(context, + runtime::Memcpy(output.data(), ptr, output.size(), + runtime::kMemcpyDeviceToHost), + "NVIDIA dispatch should copy device data to host."); - infini::rt::Memset(ptr, 0x5A, output.size()); - infini::rt::Memcpy(output.data(), ptr, output.size(), - infini::rt::MemcpyKind::kDeviceToHost); + context->ExpectEqual(output, input, + "NVIDIA dispatch should preserve copied bytes."); + + ExpectSuccess(context, runtime::Memset(ptr, 0x5A, output.size()), + "NVIDIA dispatch should fill runtime memory."); + ExpectSuccess(context, runtime::DeviceSynchronize(), + "NVIDIA dispatch should synchronize filled memory."); + ExpectSuccess(context, + runtime::Memcpy(output.data(), ptr, output.size(), + runtime::kMemcpyDeviceToHost), + "NVIDIA dispatch should copy filled memory to host."); for (const auto value : output) { - context.ExpectEqual(value, static_cast(0x5A), - "Runtime dispatch should fill memory."); + context->ExpectEqual(value, static_cast(0x5A), + "NVIDIA dispatch should preserve filled bytes."); } - infini::rt::DeviceSynchronize(); - infini::rt::Free(ptr); + ExpectSuccess(context, runtime::Free(ptr), + "NVIDIA dispatch should free memory."); +} +#endif + +} // namespace + +int main() { + infini::rt::test::TestContext context; + +#if defined(INFINI_RT_TEST_WITH_CPU) + TestCpuDispatch(&context); +#endif + +#if defined(INFINI_RT_TEST_WITH_NVIDIA) + TestNvidiaDispatch(&context); +#endif return context.ExitCode(); }