From d8195e9bc069e82ea66133f0d5b1ae40ad3d0b6c Mon Sep 17 00:00:00 2001 From: Junru Shao Date: Sun, 27 Aug 2023 17:43:05 +0000 Subject: [PATCH 1/5] [CMake] Add NCCL to TVM and TVM Runtime (#15605) This PR introduces NCCL in the cmake system. NCCL is NVIDIA's library for distributed communication. --- CMakeLists.txt | 15 ++++++++++ cmake/config.cmake | 6 ++++ cmake/modules/LibInfo.cmake | 1 + cmake/utils/FindNCCL.cmake | 56 +++++++++++++++++++++++++++++++++++++ src/support/libinfo.cc | 5 ++++ 5 files changed, 83 insertions(+) create mode 100644 cmake/utils/FindNCCL.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index f7c34fa22bf7..7c40a08b9be5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,6 +6,7 @@ include(cmake/utils/Utils.cmake) include(cmake/utils/Summary.cmake) include(cmake/utils/Linker.cmake) include(cmake/utils/FindCUDA.cmake) +include(cmake/utils/FindNCCL.cmake) include(cmake/utils/FindOpenCL.cmake) include(cmake/utils/FindVulkan.cmake) include(cmake/utils/FindLLVM.cmake) @@ -25,6 +26,7 @@ endif() # and add set(OPTION VALUE) to override these build options. # Alernatively, use cmake -DOPTION=VALUE through command-line. tvm_option(USE_CUDA "Build with CUDA" OFF) +tvm_option(USE_NCCL "Build with NCCL" OFF) tvm_option(USE_OPENCL "Build with OpenCL" OFF) tvm_option(USE_OPENCL_ENABLE_HOST_PTR "Enable OpenCL memory object access to host" OFF) tvm_option(USE_OPENCL_GTEST "Path to OpenCL specific gtest version for runtime cpp tests." /path/to/opencl/gtest) @@ -350,6 +352,7 @@ list(APPEND COMPILER_SRCS "src/target/datatype/myfloat/myfloat.cc") tvm_file_glob(GLOB RUNTIME_SRCS src/runtime/*.cc src/runtime/vm/*.cc + src/runtime/disco/*.cc src/runtime/minrpc/*.cc src/runtime/relax_vm/*.cc ) @@ -434,6 +437,13 @@ if(USE_PROFILER) list(APPEND RUNTIME_SRCS ${RUNTIME_VM_PROFILER_SRCS}) endif(USE_PROFILER) +if(USE_CUDA AND USE_NCCL) + message(STATUS "Build with NCCL...") + find_nccl(${USE_NCCL}) + tvm_file_glob(GLOB RUNTIME_NCCL_SRC src/runtime/disco/nccl/*.cc) + list(APPEND RUNTIME_SRCS ${RUNTIME_NCCL_SRC}) +endif() + if(USE_AOT_EXECUTOR) message(STATUS "Build with AOT Executor support...") file(GLOB RUNTIME_AOT_EXECUTOR_SRCS src/runtime/aot_executor/*.cc) @@ -850,3 +860,8 @@ if(USE_CUDA AND USE_CUTLASS) target_link_libraries(tvm PRIVATE -Wl,--no-as-needed flash_attn) target_link_libraries(tvm_runtime PRIVATE -Wl,--no-as-needed flash_attn) endif() + +if(USE_CUDA AND USE_NCCL) + target_link_libraries(tvm_runtime PRIVATE nccl) + target_link_libraries(tvm PRIVATE nccl) +endif() diff --git a/cmake/config.cmake b/cmake/config.cmake index 4990e52d634f..1fa1765da5d6 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -48,6 +48,12 @@ # - /path/to/cuda: use specific path to cuda toolkit set(USE_CUDA OFF) +# Whether to enable NCCL support: +# - ON: enable NCCL with cmake's auto search +# - OFF: disable NCCL +# - /path/to/nccl: use specific path to nccl +set(USE_NCCL OFF) + # Whether enable ROCM runtime # # Possible values: diff --git a/cmake/modules/LibInfo.cmake b/cmake/modules/LibInfo.cmake index 9e1f71c72938..bb283912af98 100644 --- a/cmake/modules/LibInfo.cmake +++ b/cmake/modules/LibInfo.cmake @@ -64,6 +64,7 @@ function(add_lib_info src_file) TVM_INFO_USE_CPP_RTVM="${USE_CPP_RTVM}" TVM_INFO_USE_CUBLAS="${USE_CUBLAS}" TVM_INFO_USE_CUDA="${USE_CUDA}" + TVM_INFO_USE_NCCL="${USE_NCCL}" TVM_INFO_USE_CUDNN="${USE_CUDNN}" TVM_INFO_USE_CUSTOM_LOGGING="${USE_CUSTOM_LOGGING}" TVM_INFO_USE_CUTLASS="${USE_CUTLASS}" diff --git a/cmake/utils/FindNCCL.cmake b/cmake/utils/FindNCCL.cmake new file mode 100644 index 000000000000..0cabaf74f879 --- /dev/null +++ b/cmake/utils/FindNCCL.cmake @@ -0,0 +1,56 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# Variables used by this module, they can change the default behaviour and need +# to be set before calling find_package: +# +# NCCL_ROOT - When set, this path is inspected instead of standard library +# locations as the root of the NCCL installation. +# The environment variable NCCL_ROOT overrides this variable. +# +# This module defines +# Nccl_FOUND, whether nccl has been found +# NCCL_INCLUDE_DIR, directory containing header +# NCCL_LIBRARY, directory containing nccl library +# This module assumes that the user has already called find_package(CUDA) + +macro(find_nccl use_nccl) + if(${use_nccl} MATCHES ${IS_FALSE_PATTERN}) + return() + endif() + if(${use_nccl} MATCHES ${IS_TRUE_PATTERN}) + find_path(NCCL_INCLUDE_DIR NAMES nccl.h) + find_library(NCCL_LIBRARY NAMES nccl) + else() + find_path(NCCL_INCLUDE_DIR NAMES nccl.h HINTS ${use_nccl} ${use_nccl}/include) + find_library(NCCL_LIBRARY NAMES nccl HINTS ${use_nccl} ${use_nccl}/lib) + endif() + include(FindPackageHandleStandardArgs) + find_package_handle_standard_args(Nccl DEFAULT_MSG NCCL_INCLUDE_DIR NCCL_LIBRARY) + if (Nccl_FOUND) + message(STATUS "Found NCCL_LIBRARY: ${NCCL_LIBRARY}") + message(STATUS "Found NCCL_INCLUDE_DIR: ${NCCL_INCLUDE_DIR}") + add_library(nccl SHARED IMPORTED) + set_target_properties(nccl + PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${NCCL_INCLUDE_DIR}" + IMPORTED_LOCATION "${NCCL_LIBRARY}") + else() + message(STATUS "NCCL not found") + endif() + mark_as_advanced(NCCL_INCLUDE_DIR NCCL_LIBRARY) +endmacro(find_nccl) diff --git a/src/support/libinfo.cc b/src/support/libinfo.cc index 3f028ba65657..68cf771bf006 100644 --- a/src/support/libinfo.cc +++ b/src/support/libinfo.cc @@ -35,6 +35,10 @@ #define TVM_INFO_USE_CUDA "NOT-FOUND" #endif +#ifndef TVM_INFO_USE_NCCL +#define TVM_INFO_USE_NCCL "NOT-FOUND" +#endif + #ifndef TVM_INFO_CUDA_VERSION #define TVM_INFO_CUDA_VERSION "NOT-FOUND" #endif @@ -281,6 +285,7 @@ TVM_DLL Map GetLibInfo() { {"USE_CPP_RTVM", TVM_INFO_USE_CPP_RTVM}, {"USE_CUBLAS", TVM_INFO_USE_CUBLAS}, {"USE_CUDA", TVM_INFO_USE_CUDA}, + {"USE_NCCL", TVM_INFO_USE_NCCL}, {"USE_CUDNN", TVM_INFO_USE_CUDNN}, {"USE_CUSTOM_LOGGING", TVM_INFO_USE_CUSTOM_LOGGING}, {"USE_CUTLASS", TVM_INFO_USE_CUTLASS}, From bebf5902aeb0075ff7406a9d2c410206d2bd7dbb Mon Sep 17 00:00:00 2001 From: Junru Shao Date: Sun, 27 Aug 2023 17:43:49 +0000 Subject: [PATCH 2/5] [Runtime] Expose ModuleGetFunction as PackedFunc (#15623) This PR exposes `Module.GetFunction` as a global PackedFunc. Previously, the only way to access this method is via TVM's C API, but the C++ PackedFunc API is missing. This PR patches this issue. --- src/runtime/module.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/runtime/module.cc b/src/runtime/module.cc index cf7c5d921f2f..de372e5de053 100644 --- a/src/runtime/module.cc +++ b/src/runtime/module.cc @@ -198,6 +198,10 @@ TVM_REGISTER_GLOBAL("runtime.ModuleGetFormat").set_body_typed([](Module mod) { }); TVM_REGISTER_GLOBAL("runtime.ModuleLoadFromFile").set_body_typed(Module::LoadFromFile); +TVM_REGISTER_GLOBAL("runtime.ModuleGetFunction") + .set_body_typed([](Module mod, String name, bool query_imports) { + return mod->GetFunction(name, query_imports); + }); TVM_REGISTER_GLOBAL("runtime.ModuleSaveToFile") .set_body_typed([](Module mod, String name, tvm::String fmt) { mod->SaveToFile(name, fmt); }); From d3f9d3dfd76f032baf3cd70ef0281f7fdb176240 Mon Sep 17 00:00:00 2001 From: Junru Shao Date: Sun, 27 Aug 2023 17:52:42 +0000 Subject: [PATCH 3/5] [Runtime] Utils to Stringify Device (#15630) There exist some basic functionality to convert Device and DLDeviceType to std::string, but they are not following the common naming convention in TVM, and thus less discoverable. This commit makes changes accordingly: - `runtime::DeviceName` to `runtime::DLDeviceType2Str` - move declaration of `operator << (std::ostream&, Device)` from `runtime/device_api.h` to `runtime/packed_func.h` --- include/tvm/runtime/data_type.h | 1 + include/tvm/runtime/device_api.h | 50 +---------- include/tvm/runtime/packed_func.h | 107 +++++++++++++++++++++--- include/tvm/tir/op.h | 1 + src/runtime/c_runtime_api.cc | 2 +- src/runtime/profiling.cc | 6 +- src/runtime/relax_vm/memory_manager.cc | 18 ++-- src/runtime/rpc/rpc_module.cc | 1 + src/runtime/vm/memory_manager.cc | 15 ++-- src/tir/transforms/lower_tvm_builtin.cc | 4 +- 10 files changed, 120 insertions(+), 85 deletions(-) diff --git a/include/tvm/runtime/data_type.h b/include/tvm/runtime/data_type.h index 9fb113f56b2c..ac7e879a644d 100644 --- a/include/tvm/runtime/data_type.h +++ b/include/tvm/runtime/data_type.h @@ -339,6 +339,7 @@ inline const char* DLDataTypeCode2Str(DLDataTypeCode type_code) { default: LOG(FATAL) << "unknown type_code=" << static_cast(type_code); } + throw; } inline std::ostream& operator<<(std::ostream& os, DLDataType t) { // NOLINT(*) diff --git a/include/tvm/runtime/device_api.h b/include/tvm/runtime/device_api.h index 654018565716..cb0eb7c21f11 100644 --- a/include/tvm/runtime/device_api.h +++ b/include/tvm/runtime/device_api.h @@ -245,54 +245,6 @@ class TVM_DLL DeviceAPI { constexpr int kRPCSessMask = 128; static_assert(kRPCSessMask >= TVMDeviceExtType_End); -/*! - * \brief The name of Device API factory. - * \param type The device type. - * \return the device name. - */ -inline const char* DeviceName(int type) { - switch (type) { - case kDLCPU: - return "cpu"; - case kDLCUDA: - return "cuda"; - case kDLCUDAHost: - return "cuda_host"; - case kDLCUDAManaged: - return "cuda_managed"; - case kDLOpenCL: - return "opencl"; - case kDLSDAccel: - return "sdaccel"; - case kDLAOCL: - return "aocl"; - case kDLVulkan: - return "vulkan"; - case kDLMetal: - return "metal"; - case kDLVPI: - return "vpi"; - case kDLROCM: - return "rocm"; - case kDLROCMHost: - return "rocm_host"; - case kDLExtDev: - return "ext_dev"; - case kDLOneAPI: - return "oneapi"; - case kDLWebGPU: - return "webgpu"; - case kDLHexagon: - return "hexagon"; - case kOpenGL: - return "opengl"; - case kDLMicroDev: - return "microdev"; - default: - LOG(FATAL) << "unknown type =" << type; - } -} - /*! * \brief Return true if a Device is owned by an RPC session. */ @@ -324,7 +276,7 @@ inline std::ostream& operator<<(std::ostream& os, DLDevice dev) { // NOLINT(*) os << "remote[" << tvm::runtime::GetRPCSessionIndex(dev) << "]-"; dev = tvm::runtime::RemoveRPCSessionMask(dev); } - os << tvm::runtime::DeviceName(static_cast(dev.device_type)) << "(" << dev.device_id << ")"; + os << tvm::runtime::DLDeviceType2Str(static_cast(dev.device_type)) << ":" << dev.device_id; return os; } diff --git a/include/tvm/runtime/packed_func.h b/include/tvm/runtime/packed_func.h index 660c24284b8d..e63e92835cc5 100644 --- a/include/tvm/runtime/packed_func.h +++ b/include/tvm/runtime/packed_func.h @@ -418,6 +418,8 @@ class TVMArgs { */ inline const char* ArgTypeCode2Str(int type_code); +inline std::ostream& operator<<(std::ostream& os, DLDevice dev); // NOLINT(*) + // macro to check type code. #define TVM_CHECK_TYPE_CODE(CODE, T) \ ICHECK_EQ(CODE, T) << "expected " << ArgTypeCode2Str(T) << " but got " << ArgTypeCode2Str(CODE) @@ -1257,6 +1259,56 @@ inline const char* ArgTypeCode2Str(int type_code) { default: LOG(FATAL) << "unknown type_code=" << static_cast(type_code); } + throw; +} + +/*! + * \brief The name of DLDeviceType. + * \param type The device type. + * \return the device name. + */ +inline const char* DLDeviceType2Str(int type) { + switch (type) { + case kDLCPU: + return "cpu"; + case kDLCUDA: + return "cuda"; + case kDLCUDAHost: + return "cuda_host"; + case kDLCUDAManaged: + return "cuda_managed"; + case kDLOpenCL: + return "opencl"; + case kDLSDAccel: + return "sdaccel"; + case kDLAOCL: + return "aocl"; + case kDLVulkan: + return "vulkan"; + case kDLMetal: + return "metal"; + case kDLVPI: + return "vpi"; + case kDLROCM: + return "rocm"; + case kDLROCMHost: + return "rocm_host"; + case kDLExtDev: + return "ext_dev"; + case kDLOneAPI: + return "oneapi"; + case kDLWebGPU: + return "webgpu"; + case kDLHexagon: + return "hexagon"; + case kOpenGL: + return "opengl"; + case kDLMicroDev: + return "microdev"; + default: + LOG(FATAL) << "unknown type = " << type; + } + throw; } namespace detail { @@ -1284,13 +1336,27 @@ namespace parameter_pack { template struct EnumeratedParamPack { - struct Invoke { - template