diff --git a/.clang-format b/.clang-format index 4a88069..23b99fb 100644 --- a/.clang-format +++ b/.clang-format @@ -46,7 +46,7 @@ BreakConstructorInitializers: BeforeColon BreakAfterJavaFieldAnnotations: false BreakStringLiterals: true ColumnLimit: 80 -CommentPragmas: '^ IWYU pragma:' +#CommentPragmas: '^ IWYU pragma:' CommentPragmas: '^\\.+' CompactNamespaces: false ConstructorInitializerAllOnOneLineOrOnePerLine: true diff --git a/CMakeLists.txt b/CMakeLists.txt index 0736f43..b341304 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,15 +4,18 @@ project(jitify LANGUAGES CXX CUDA) option(ASAN "Enable address sanitizer in debug build" ON) # C++ compiler options. -set (CMAKE_CXX_STANDARD 11) -set (CMAKE_CUDA_STANDARD 11) # Doesn't work? +set (CMAKE_CXX_STANDARD 17) +set (CMAKE_CUDA_STANDARD 17) # Doesn't work? set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ") if (MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /WX") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /O2") else() set(CMAKE_CXX_FLAGS - "${CMAKE_CXX_FLAGS} -O3 -Wall -Wextra -Wconversion -Wshadow -fmessage-length=80") + "${CMAKE_CXX_FLAGS} -O3 \ + -Wall -Wextra -Wconversion -Wshadow -fmessage-length=80 \ + -D_FILE_OFFSET_BITS=64 \ + ") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g") if (ASAN) set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} \ @@ -32,7 +35,11 @@ if (MSVC) "${CMAKE_CUDA_FLAGS_RELEASE} -O3 -Xcompiler=\"/O2\"") else() set(CMAKE_CUDA_FLAGS - "${CMAKE_CUDA_FLAGS} -Xcompiler=\"-Wall -Wextra -Wconversion -Wshadow\" -O3 -rdc=true") + "${CMAKE_CUDA_FLAGS} -O3 \ + -Xcompiler=\"-Wall -Wextra -Wconversion -Wshadow -fmessage-length=80 \" \ + -D_FILE_OFFSET_BITS=64 \ + -rdc=true \ + ") set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -g") endif() @@ -76,6 +83,15 @@ add_subdirectory(${CMAKE_CURRENT_BINARY_DIR}/googletest-src ${CMAKE_CURRENT_BINARY_DIR}/googletest-build EXCLUDE_FROM_ALL) +# ---- +# NVTX +# ---- +# Download and unpack nvtx at configure time. +configure_file(CMakeLists.txt.in nvtx-download/CMakeLists.txt) +add_subdirectory(${CMAKE_CURRENT_BINARY_DIR}/nvtx-src/c + ${CMAKE_CURRENT_BINARY_DIR}/nvtx-build + EXCLUDE_FROM_ALL) + # ---- # Executable utilities # ---- @@ -135,8 +151,10 @@ foreach(test ${TESTS}) target_include_directories(${test} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) # Ensure the main jitify header can be found. target_include_directories(${test} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) - target_link_libraries(${test} gtest_main) + target_link_libraries(${test} gtest_main nvtx3-cpp) set_property(TARGET ${test} PROPERTY CUDA_ARCHITECTURES OFF) + target_compile_definitions(${test} + PUBLIC JITIFY_ENABLE_NVTX=1) if (${test} MATCHES "_static$") target_compile_definitions(${test} PUBLIC JITIFY_LINK_CUDA_STATIC=1 diff --git a/CMakeLists.txt.in b/CMakeLists.txt.in index 764f48d..f37e590 100644 --- a/CMakeLists.txt.in +++ b/CMakeLists.txt.in @@ -13,3 +13,13 @@ ExternalProject_Add(googletest INSTALL_COMMAND "" TEST_COMMAND "" ) +ExternalProject_Add(nvtx3 + GIT_REPOSITORY https://github.com/NVIDIA/NVTX.git + GIT_TAG release-v3 + SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/nvtx-src" + BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/nvtx-build" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "" +) diff --git a/LICENSE b/LICENSE index b678a46..a4d873b 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ BSD 3-Clause License -Copyright (c) 2017-2024, NVIDIA Corporation +Copyright (c) 2017-2025, NVIDIA Corporation All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/example_headers/class_arg_kernel.cuh b/example_headers/class_arg_kernel.cuh index 19dd48a..b452ba3 100644 --- a/example_headers/class_arg_kernel.cuh +++ b/example_headers/class_arg_kernel.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2024, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2025, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/example_headers/constant_header.cuh b/example_headers/constant_header.cuh index f3f1cc9..0eaf9bf 100644 --- a/example_headers/constant_header.cuh +++ b/example_headers/constant_header.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2024, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2025, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/example_headers/my_header1.cuh b/example_headers/my_header1.cuh index 7f07df7..38027c9 100644 --- a/example_headers/my_header1.cuh +++ b/example_headers/my_header1.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2024, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2025, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/example_headers/my_header2.cuh b/example_headers/my_header2.cuh index f5a90c2..c776fae 100644 --- a/example_headers/my_header2.cuh +++ b/example_headers/my_header2.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2024, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2025, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/example_headers/my_header3.cuh b/example_headers/my_header3.cuh index 4933de5..e5f3cc7 100644 --- a/example_headers/my_header3.cuh +++ b/example_headers/my_header3.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2024, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2025, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/jitify2.hpp b/jitify2.hpp index d5d379d..740a086 100644 --- a/jitify2.hpp +++ b/jitify2.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2024, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2025, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -42,21 +42,31 @@ #define JITIFY2_HPP_INCLUDE_GUARD #include +#include #include #include #include #include +#include #include #include #include #include #include +#ifdef _MSC_VER // MSVC compiler +// In MSVC, __cplusplus is always 199711L unless the `/Zc:__cplusplus` option is +// specified, so need to use _MSVC_LANG instead. +#define JITIFY_CPLUSPLUS _MSVC_LANG +#else +#define JITIFY_CPLUSPLUS __cplusplus +#endif + // This macro is used by source files generated by jitify_preprocess to avoid // unnecessary dependencies. #ifdef JITIFY_SERIALIZATION_ONLY -#if __cplusplus >= 201703L +#if JITIFY_CPLUSPLUS >= 201703L #include #endif @@ -94,10 +104,21 @@ #define JITIFY_FAIL_IMMEDIATELY 0 #endif +// Adds options and headers logging to compilation error messages. +#ifndef JITIFY_VERBOSE_ERRORS +#define JITIFY_VERBOSE_ERRORS 0 +#endif + #ifndef JITIFY_USE_LIBCUFILT #define JITIFY_USE_LIBCUFILT 0 // Use Jitify's builtin demangler by default #endif +// Users can enable this to disable the is_trivially_copyable assertion on +// kernel args. +#ifndef JITIFY_IGNORE_NOT_TRIVIALLY_COPYABLE_ARGS +#define JITIFY_IGNORE_NOT_TRIVIALLY_COPYABLE_ARGS 0 +#endif + #if CUDA_VERSION >= 11040 && JITIFY_USE_LIBCUFILT #include // For __cu_demangle (requires linking with libcufilt.a) #endif @@ -110,6 +131,7 @@ #include #include #include +#include #include #include #include @@ -122,7 +144,7 @@ #define JITIFY_IF_THREAD_SAFE(x) #endif -#if __cplusplus >= 201402L +#if JITIFY_CPLUSPLUS >= 201402L #define JITIFY_DEPRECATED(msg) [[deprecated(msg)]] #else #define JITIFY_DEPRECATED(msg) @@ -147,6 +169,7 @@ #include // For mkdir #include // For open, O_RDWR etc. #include // For _sopen_s +#include // For _fullpath #include // For _LK_LOCK etc. #define JITIFY_PATH_MAX MAX_PATH #else @@ -177,9 +200,11 @@ #else // TODO: Would std::exit or std::abort be better than std::terminate? #include -#define JITIFY_THROW_OR_TERMINATE(msg) \ - std::cerr << "Jitify fatal error: " << (msg) << std::endl; \ - std::terminate() +#define JITIFY_THROW_OR_TERMINATE(msg) \ + do { \ + std::cerr << "Jitify fatal error: " << (msg) << std::endl; \ + std::terminate(); \ + } while (0) #endif #if JITIFY_ENABLE_EXCEPTIONS @@ -201,13 +226,34 @@ #endif // not JITIFY_SERIALIZATION_ONLY +#ifndef JITIFY_ENABLE_NVTX +#define JITIFY_ENABLE_NVTX 0 +#endif + +#if JITIFY_ENABLE_NVTX +#include +#define JITIFY_NVTX_FUNC_RANGE() NVTX3_FUNC_RANGE_IN(JitifyNvtxDomain) +#else +#define JITIFY_NVTX_FUNC_RANGE() +#endif // JITIFY_ENABLE_NVTX + namespace jitify2 { +#if JITIFY_ENABLE_NVTX +struct JitifyNvtxDomain { + static constexpr char const* name{"jitify"}; +}; + +using nvtx_scoped_range = nvtx3::scoped_range_in; +#else +using nvtx_scoped_range = std::string; +#endif + // Convenience aliases. using StringVec = std::vector; using StringMap = std::unordered_map; -#if __cplusplus >= 201703L +#if JITIFY_CPLUSPLUS >= 201703L using StringRef = std::string_view; using StringSlice = std::string_view; #else @@ -215,11 +261,81 @@ using StringRef = const std::string&; using StringSlice = std::string; #endif +namespace detail { + +// Strip whitespace from string in-place. +inline void ltrim(std::string* s) { + s->erase(s->begin(), std::find_if(s->begin(), s->end(), [](unsigned char c) { + return !std::isspace(c); + })); +} +inline void rtrim(std::string* s) { + s->erase(std::find_if(s->rbegin(), s->rend(), + [](unsigned char c) { return !std::isspace(c); }) + .base(), + s->end()); +} +inline void trim(std::string* s) { + ltrim(s); + rtrim(s); +} + +// Strip whitespace from a string view. +inline StringSlice ltrim(StringRef s) { + size_t beg = std::find_if(s.begin(), s.end(), + [](unsigned char c) { return !std::isspace(c); }) - + s.begin(); + return s.substr(beg); +} +inline StringSlice rtrim(StringRef s) { + size_t end = std::find_if(s.rbegin(), s.rend(), + [](unsigned char c) { return !std::isspace(c); }) + .base() - + s.begin(); + return s.substr(0, end); +} +inline StringSlice trim(StringRef s) { return rtrim(ltrim(s)); } + +} // namespace detail + class Option { + void set_key_and_value() { + // TODO: Consider changing key and value to be views into key_and_value to + // avoid double-storage. + if (value_.empty()) { + key_and_value_ = key_; + } else { + key_and_value_.reserve(key_.size() + 1 + value_.size()); + key_and_value_.append(key_); + key_and_value_.append("="); + key_and_value_.append(value_); + } + } + public: Option() = default; - explicit Option(std::string _key, std::string _value = {}, - StringVec _repr = {}) + explicit Option(std::string raw) { + const size_t eql = raw.find('='); + if (eql != std::string::npos) { + // Parse "-key=val". + key_ = raw.substr(0, eql); + value_ = raw.substr(eql + 1); + } else if (raw.size() > 2 && + // HACK: Special case for '-l' linker flag. + (std::isupper(static_cast(raw[1])) || + (raw[1] == 'l' && raw.substr(0, 9) != "-lineinfo"))) { + // Parse "-Kval". + key_ = raw.substr(0, 2); + value_ = raw.substr(2); + } else { + // Parse "-key" (no value). + key_ = raw; + } + detail::trim(&value_); // Strip whitespace + repr_ = {std::move(raw)}; + set_key_and_value(); + } + Option(std::string _key, std::string _value, StringVec _repr = {}) : key_(std::move(_key)), value_(std::move(_value)), repr_(std::move(_repr)) { @@ -229,16 +345,7 @@ class Option { repr_.front() += "=" + value_; } } - // TODO: Consider changing key and value to be views into key_and_value to - // avoid double-storage. - if (value_.empty()) { - key_and_value_ = key_; - } else { - key_and_value_.reserve(key_.size() + 1 + value_.size()); - key_and_value_.append(key_); - key_and_value_.append("="); - key_and_value_.append(value_); - } + set_key_and_value(); } const std::string& key() const { return key_; } @@ -261,6 +368,8 @@ class Option { return !(lhs == rhs); } + explicit operator bool() const { return !key_.empty(); } + private: std::string key_; std::string value_; @@ -268,43 +377,6 @@ class Option { StringVec repr_; }; -namespace detail { - -// Strip whitespace from string in-place. -inline void ltrim(std::string* s) { - s->erase(s->begin(), std::find_if(s->begin(), s->end(), [](unsigned char c) { - return !std::isspace(c); - })); -} -inline void rtrim(std::string* s) { - s->erase(std::find_if(s->rbegin(), s->rend(), - [](unsigned char c) { return !std::isspace(c); }) - .base(), - s->end()); -} -inline void trim(std::string* s) { - ltrim(s); - rtrim(s); -} - -// Strip whitespace from a string view. -inline StringSlice ltrim(StringRef s) { - size_t beg = std::find_if(s.begin(), s.end(), - [](unsigned char c) { return !std::isspace(c); }) - - s.begin(); - return s.substr(beg); -} -inline StringSlice rtrim(StringRef s) { - size_t end = std::find_if(s.rbegin(), s.rend(), - [](unsigned char c) { return !std::isspace(c); }) - .base() - - s.begin(); - return s.substr(0, end); -} -inline StringSlice trim(StringRef s) { return rtrim(ltrim(s)); } - -} // namespace detail - class OptionsVec { using vec_type = std::vector