From 22e7b412ac6ab85767facc782feea68bfb3226c2 Mon Sep 17 00:00:00 2001 From: Ben Barsdell Date: Tue, 17 Oct 2023 17:51:27 +1100 Subject: [PATCH 01/12] Add helpful error if F_OFD_SETLKW is not defined --- jitify2.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/jitify2.hpp b/jitify2.hpp index 7e0c434..cb40f42 100644 --- a/jitify2.hpp +++ b/jitify2.hpp @@ -6336,6 +6336,9 @@ class NewFile { #if defined _WIN32 || defined _WIN64 bool success = ::_locking(fd_, _LK_LOCK, 1) == 0; #else +#ifndef F_OFD_SETLKW +#error F_OFD_SETLKW is not defined; try building with -D_FILE_OFFSET_BITS=64 +#endif // F_OFD_SETLKW flock fl = {}; fl.l_type = F_WRLCK; // Exclusive lock for writing fl.l_whence = SEEK_SET; // Start at beginning of file From 712be5b446fd70bab79cc2207530c6258d39485c Mon Sep 17 00:00:00 2001 From: Ben Barsdell Date: Tue, 17 Oct 2023 17:52:38 +1100 Subject: [PATCH 02/12] Only link tests against nvJitLink when CUDA >= 12 --- CMakeLists.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b7f9ccd..c75c508 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -133,7 +133,10 @@ foreach(test ${TESTS}) PUBLIC JITIFY_LINK_CUDA_STATIC=1 PUBLIC JITIFY_LINK_NVRTC_STATIC=1 PUBLIC JITIFY_LINK_NVJITLINK_STATIC=1) - target_link_libraries(${test} cuda nvrtc nvJitLink) + target_link_libraries(${test} cuda nvrtc) + if (CUDA_VERSION_MAJOR GREATER_EQUAL 12) + target_link_libraries(${test} nvJitLink) + endif() endif() if (NOT WIN32) target_link_libraries(${test} ${CMAKE_DL_LIBS}) From 0e33846a48274fffe0ffa2fa61fad80cad456c22 Mon Sep 17 00:00:00 2001 From: Ben Barsdell Date: Tue, 17 Oct 2023 17:53:28 +1100 Subject: [PATCH 03/12] Add ASAN to debug build, with option to disable - This can be useful for debugging. --- CMakeLists.txt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index c75c508..0736f43 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,8 @@ cmake_minimum_required(VERSION 3.9) project(jitify LANGUAGES CXX CUDA) +option(ASAN "Enable address sanitizer in debug build" ON) + # C++ compiler options. set (CMAKE_CXX_STANDARD 11) set (CMAKE_CUDA_STANDARD 11) # Doesn't work? @@ -12,6 +14,13 @@ else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -Wall -Wextra -Wconversion -Wshadow -fmessage-length=80") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g") + if (ASAN) + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} \ + -fsanitize=undefined,address") + else() + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} \ + -fsanitize=undefined") + endif() endif() # CUDA compiler options. From 7dbefd8dc75bb0b947d78bacad9db6de5399e28d Mon Sep 17 00:00:00 2001 From: Ben Barsdell Date: Thu, 19 Oct 2023 11:57:54 +1100 Subject: [PATCH 04/12] Update jitify2 copyright years --- LICENSE | 2 +- example_headers/class_arg_kernel.cuh | 2 +- example_headers/constant_header.cuh | 2 +- example_headers/my_header1.cuh | 2 +- example_headers/my_header2.cuh | 2 +- example_headers/my_header3.cuh | 2 +- jitify2.hpp | 2 +- jitify2_preprocess.cpp | 2 +- jitify2_test.cu | 2 +- jitify2_test_kernels.cu | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/LICENSE b/LICENSE index 76d9aee..b678a46 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ BSD 3-Clause License -Copyright (c) 2017-2020, NVIDIA Corporation +Copyright (c) 2017-2024, NVIDIA Corporation All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/example_headers/class_arg_kernel.cuh b/example_headers/class_arg_kernel.cuh index 318be35..19dd48a 100644 --- a/example_headers/class_arg_kernel.cuh +++ b/example_headers/class_arg_kernel.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2024, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/example_headers/constant_header.cuh b/example_headers/constant_header.cuh index 8879cb4..f3f1cc9 100644 --- a/example_headers/constant_header.cuh +++ b/example_headers/constant_header.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2024, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/example_headers/my_header1.cuh b/example_headers/my_header1.cuh index 92deddf..7f07df7 100644 --- a/example_headers/my_header1.cuh +++ b/example_headers/my_header1.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2024, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/example_headers/my_header2.cuh b/example_headers/my_header2.cuh index cd793ae..f5a90c2 100644 --- a/example_headers/my_header2.cuh +++ b/example_headers/my_header2.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2024, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/example_headers/my_header3.cuh b/example_headers/my_header3.cuh index 81fd018..4933de5 100644 --- a/example_headers/my_header3.cuh +++ b/example_headers/my_header3.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2024, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/jitify2.hpp b/jitify2.hpp index cb40f42..afffb8c 100644 --- a/jitify2.hpp +++ b/jitify2.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2024, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/jitify2_preprocess.cpp b/jitify2_preprocess.cpp index fc19c90..a211dd3 100644 --- a/jitify2_preprocess.cpp +++ b/jitify2_preprocess.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2024, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/jitify2_test.cu b/jitify2_test.cu index 6a1e1cc..816e1e3 100644 --- a/jitify2_test.cu +++ b/jitify2_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2024, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/jitify2_test_kernels.cu b/jitify2_test_kernels.cu index 7b6f86f..8dbcbab 100644 --- a/jitify2_test_kernels.cu +++ b/jitify2_test_kernels.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2024, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions From 304e1d3ce733b21c63dcbee32f34fd7129bba432 Mon Sep 17 00:00:00 2001 From: Ben Barsdell Date: Fri, 20 Oct 2023 16:01:08 +1100 Subject: [PATCH 05/12] Change [u]intptr_t definitions to match libcudacxx - This avoids "incompatible redefinition" errors. --- jitify2.hpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/jitify2.hpp b/jitify2.hpp index afffb8c..2f41358 100644 --- a/jitify2.hpp +++ b/jitify2.hpp @@ -4126,7 +4126,6 @@ typedef signed short int_least16_t; typedef signed int int_least32_t; typedef signed long long int_least64_t; typedef signed long long intmax_t; -typedef signed long intptr_t; // optional typedef unsigned char uint8_t; typedef unsigned short uint16_t; typedef unsigned int uint32_t; @@ -4140,11 +4139,8 @@ typedef unsigned short uint_least16_t; typedef unsigned int uint_least32_t; typedef unsigned long long uint_least64_t; typedef unsigned long long uintmax_t; -#if defined _WIN32 || defined _WIN64 -typedef unsigned long long uintptr_t; // optional -#else // not Windows -typedef unsigned long uintptr_t; // optional -#endif +typedef int64_t intptr_t; // optional +typedef uint64_t uintptr_t; // optional )"); JITIFY_DEFINE_C_AND_CXX_HEADERS_EX(stdio, "#include ", R"( From 24c72868ae4174f851bccd3fd240a06243f75ac6 Mon Sep 17 00:00:00 2001 From: Ben Barsdell Date: Fri, 20 Oct 2023 16:03:31 +1100 Subject: [PATCH 06/12] Fix warning and tests for CUDA 12.3 --- jitify2.hpp | 3 +++ jitify2_test.cu | 45 ++++++++++++++++++++++++++++++++++----------- 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/jitify2.hpp b/jitify2.hpp index 2f41358..21ad52d 100644 --- a/jitify2.hpp +++ b/jitify2.hpp @@ -1411,6 +1411,9 @@ class LibNvJitLink case NVJITLINK_ERROR_PTX_COMPILE: return "NVJITLINK_ERROR_PTX_COMPILE"; case NVJITLINK_ERROR_NVVM_COMPILE: return "NVJITLINK_ERROR_NVVM_COMPILE"; case NVJITLINK_ERROR_INTERNAL: return "NVJITLINK_ERROR_INTERNAL"; +#if CUDA_VERSION >= 12030 + case NVJITLINK_ERROR_THREADPOOL: return "NVJITLINK_ERROR_THREADPOOL"; +#endif } // clang-format on return "(unknown nvJitLink error)"; diff --git a/jitify2_test.cu b/jitify2_test.cu index 816e1e3..64a43d0 100644 --- a/jitify2_test.cu +++ b/jitify2_test.cu @@ -1220,10 +1220,12 @@ __global__ void foo_kernel(int* data) { std::string::npos); EXPECT_TRUE(ptx.find(".global .align 4 .u32 used_scalar_init = 3;") != std::string::npos); - EXPECT_TRUE(ptx.find(".global .align 4 .b8 used_array_init[8] = {4, 0, 0, 0, " - "5, 0, 0, 0};") != std::string::npos); - EXPECT_TRUE(ptx.find(".global .align 8 .b8 used_struct_init[16] = {6, 0, 0, " - "0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};") != + // Note: Since CUDA 12.3, the array initialization values do not include + // trailing zeros (e.g., "{6};" instead of "{6, 0, 0, 0, ...};". + EXPECT_TRUE( + ptx.find(".global .align 4 .b8 used_array_init[8] = {4, 0, 0, 0, 5") != + std::string::npos); + EXPECT_TRUE(ptx.find(".global .align 8 .b8 used_struct_init[16] = {6") != std::string::npos); EXPECT_FALSE(ptx.find("_ZN3Foo5valueE") != std::string::npos); EXPECT_FALSE(ptx.find("unused_scalar;") != std::string::npos); @@ -1454,11 +1456,16 @@ __global__ void my_kernel() {} } TEST(Jitify2Test, Thrust) { - // TODO: The need to include cstddef here under CUDA 12.0 may be related to - // the local/system include ambiguity problem in Jitify. // clang-format off static const char* const source = R"( -#include // WAR for CUDA 12.0 build +// WAR for header include issue (note: order of includes matters): +// https://github.com/NVIDIA/jitify/issues/107#issuecomment-1225617951 +#include +#include +#include +#include +namespace std { using ::ptrdiff_t; } + #include __global__ void my_kernel(thrust::counting_iterator begin, thrust::counting_iterator end) { @@ -1488,6 +1495,13 @@ TEST(Jitify2Test, CubBlockPrimitives) { #define ProcessFloatMinusZero BaseDigitExtractor::ProcessFloatMinusZero #endif +// WAR for header include issue (note: order of includes matters): +// https://github.com/NVIDIA/jitify/issues/107#issuecomment-1225617951 +#include +#include +#include +#include + #include #include #include @@ -1662,15 +1676,19 @@ hopefully.*/ const char* const foo = R"foo(abc\def ghi"')foo"; // )' - #include // Here's a comment - #include // Here's another comment - const char* const linecont_str = "line1 \ line2"; const char c = '\xff'; -#include #if CUDA_VERSION >= 11000 + +// WAR for header include issue (note: order of includes matters): +// https://github.com/NVIDIA/jitify/issues/107#issuecomment-1225617951 +#include +#include +#include +#include + // CUB headers can be tricky to parse. #include #include @@ -1678,6 +1696,11 @@ const char c = '\xff'; #include #endif // CUDA_VERSION >= 11000 +#include + + #include // Here's a comment + #include // Here's another comment + #include "example_headers/my_header1.cuh" __global__ void my_kernel() {} )"; From e4084e075e6e32abde83c73748a815e532046180 Mon Sep 17 00:00:00 2001 From: Ben Barsdell Date: Mon, 23 Oct 2023 10:59:16 +1100 Subject: [PATCH 07/12] Fix some more locale issues --- jitify2.hpp | 6 +++++- jitify2_preprocess.cpp | 13 ++++++++----- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/jitify2.hpp b/jitify2.hpp index 21ad52d..ac32cd2 100644 --- a/jitify2.hpp +++ b/jitify2.hpp @@ -3441,6 +3441,10 @@ inline bool ptx_parse_decl_name(const std::string& line, std::string* name) { return true; } +inline bool is_alpha(char c) { + return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); +} + inline bool ptx_remove_unused_globals(std::string* ptx) { std::istringstream iss(*ptx); StringVec lines; @@ -3468,7 +3472,7 @@ inline bool ptx_remove_unused_globals(std::string* ptx) { const char* token_delims = " \t()[]{},;+-*/~&|^?:=!<>\"'\\"; for (auto token : split_string(terms[i], -1, token_delims)) { if ( // Ignore non-names - !(std::isalpha(token[0]) || token[0] == '_' || token[0] == '$') || + !(is_alpha(token[0]) || token[0] == '_' || token[0] == '$') || token.find('.') != std::string::npos || // Ignore variable/parameter declarations terms[i - 1][0] == '.' || diff --git a/jitify2_preprocess.cpp b/jitify2_preprocess.cpp index a211dd3..36b84d1 100644 --- a/jitify2_preprocess.cpp +++ b/jitify2_preprocess.cpp @@ -104,11 +104,11 @@ void write_serialized_headers_as_cpp_source(std::istream& istream, // begins with a digit. std::string sanitize_varname(const std::string& s) { std::string r = s; - if (std::isdigit(r[0])) { + if (std::isdigit((unsigned char)r[0])) { r = '_' + r; } for (std::string::iterator it = r.begin(); it != r.end(); ++it) { - if (!std::isalnum(*it)) { + if (!std::isalnum((unsigned char)*it)) { *it = '_'; } } @@ -200,7 +200,8 @@ int main(int argc, char* argv[]) { } else if (arg == "-s" || arg == "--shared-headers") { arg_c = *++argv; if (!arg_c) { - std::cerr << "Expected filename after -s" << std::endl; + std::cerr << "Expected filename after -s / --shared-headers" + << std::endl; return EXIT_FAILURE; } shared_headers_filename = arg_c; @@ -279,7 +280,8 @@ int main(int argc, char* argv[]) { std::string output_filename = path_join(output_dir, source_filename + ".jit.hpp"); if (!make_directories_for(output_filename)) return EXIT_FAILURE; - std::ofstream file(output_filename, std::ios::binary); + std::ofstream file(output_filename); + file.imbue(std::locale::classic()); write_serialized_program_as_cpp_header(ss, file, source_varname, shared_headers_varname); if (!file) { @@ -306,7 +308,8 @@ int main(int argc, char* argv[]) { std::string output_filename = path_join(output_dir, shared_headers_filename + ".jit.cpp"); if (!make_directories_for(output_filename)) return EXIT_FAILURE; - std::ofstream file(output_filename, std::ios::binary); + std::ofstream file(output_filename); + file.imbue(std::locale::classic()); write_serialized_headers_as_cpp_source(ss, file, shared_headers_varname); if (!file) { std::cerr << "Error writing output to " << output_filename << std::endl; From 6d892ecf912361f73a433b3d6b940e1b3e2f45af Mon Sep 17 00:00:00 2001 From: Ben Barsdell Date: Mon, 23 Oct 2023 11:03:27 +1100 Subject: [PATCH 08/12] Fix and optimize the way text files are loaded --- jitify2.hpp | 27 +++++++++++++++++++++++---- jitify2_preprocess.cpp | 12 +----------- 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/jitify2.hpp b/jitify2.hpp index ac32cd2..53c393c 100644 --- a/jitify2.hpp +++ b/jitify2.hpp @@ -5531,12 +5531,31 @@ inline std::string path_simplify(StringRef path) { return ss.str(); } +// Reads a whole text file into *content. Returns false on failure. inline bool read_text_file(const std::string& fullpath, std::string* content) { - std::ifstream file(fullpath.c_str()); + FILE* file = ::fopen(fullpath.c_str(), "r"); if (!file) return false; - std::stringstream buf; - buf << file.rdbuf(); - *content = buf.str(); + std::unique_ptr> + unique_file(file); +#ifdef POSIX_FADV_WILLNEED + // Hints to potentially improve read performance. + ::posix_fadvise(::fileno(file), 0, 0, POSIX_FADV_SEQUENTIAL); + ::posix_fadvise(::fileno(file), 0, 0, POSIX_FADV_WILLNEED); +#endif + if (::fseek(file, 0, SEEK_END)) return false; + const long size = ::ftell(file); + if (::fseek(file, 0, SEEK_SET)) return false; + content->resize(size); + // Note: This supports empty (size=0) files. + if ((long)::fread(&(*content)[0], 1, size, file) != size) return false; + // Crop off trailing null characters that may arise due to multi-character + // newline conversions (e.g., on Windows). + const size_t last_char_pos = content->find_last_not_of("\0"); + if (last_char_pos == std::string::npos) { + content->resize(0); + } else { + content->resize(last_char_pos + 1); + } return true; } diff --git a/jitify2_preprocess.cpp b/jitify2_preprocess.cpp index 36b84d1..18ab29d 100644 --- a/jitify2_preprocess.cpp +++ b/jitify2_preprocess.cpp @@ -115,16 +115,6 @@ std::string sanitize_varname(const std::string& s) { return r; } -bool read_file(const std::string& fullpath, std::string* content) { - std::ifstream file(fullpath.c_str(), std::ios::binary | std::ios::ate); - if (!file) return false; - std::streamsize size = file.tellg(); - file.seekg(0, std::ios::beg); - content->resize(size); - file.read(&(*content)[0], size); - return true; -} - bool make_directories_for(const std::string& filename) { using jitify2::detail::make_directories; using jitify2::detail::path_base; @@ -245,7 +235,7 @@ int main(int argc, char* argv[]) { StringMap all_header_sources; for (const std::string& source_filename : source_filenames) { std::string source; - if (!read_file(source_filename, &source)) { + if (!jitify2::detail::read_text_file(source_filename, &source)) { std::cerr << "Error reading source file " << source_filename << std::endl; return EXIT_FAILURE; } From bb4bac945b56973d117f24b26fee98aa68f8d3ec Mon Sep 17 00:00:00 2001 From: Ben Barsdell Date: Thu, 19 Oct 2023 11:49:17 +1100 Subject: [PATCH 09/12] Add dry-run flag to jitify2_preprocess --- jitify2_preprocess.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/jitify2_preprocess.cpp b/jitify2_preprocess.cpp index 18ab29d..575efe9 100644 --- a/jitify2_preprocess.cpp +++ b/jitify2_preprocess.cpp @@ -139,6 +139,7 @@ jitify2_preprocess \ [-o / --output-directory ] Write output files to the specified dir. [-p / --variable-prefix ] Prefix to add to variable names (see -i). [-s / --shared-headers ] Write headers to a separate file. + [-n / --dry-run] Don't write any output files. [-v / --verbose] Print header locations. [-h / --help] Show this help. @@ -180,6 +181,7 @@ int main(int argc, char* argv[]) { StringVec source_filenames; bool write_as_cpp_headers = false; bool verbose = false; + bool dry_run = false; const char* arg_c; while ((arg_c = *++argv)) { std::string arg = arg_c; @@ -213,6 +215,8 @@ int main(int argc, char* argv[]) { varname_prefix = arg_c; } else if (arg == "-i" || arg == "--include-style") { write_as_cpp_headers = true; + } else if (arg == "-n" || arg == "--dry-run") { + dry_run = true; } else if (arg == "-v" || arg == "--verbose") { verbose = true; } else { @@ -261,6 +265,8 @@ int main(int argc, char* argv[]) { preprocessed->header_sources().end()); } + if (dry_run) continue; // Skip writing output files + if (write_as_cpp_headers) { std::stringstream ss(std::stringstream::in | std::stringstream::out | std::stringstream::binary); @@ -290,6 +296,7 @@ int main(int argc, char* argv[]) { } } } + if (dry_run) return EXIT_SUCCESS; // Skip writing output file if (share_headers) { if (write_as_cpp_headers) { std::stringstream ss(std::stringstream::in | std::stringstream::out | From b034121e3dbe3701b4c13f7f926ed775c2c82178 Mon Sep 17 00:00:00 2001 From: Ben Barsdell Date: Mon, 23 Oct 2023 11:38:00 +1100 Subject: [PATCH 10/12] Add [[deprecated]] attribute --- jitify2.hpp | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/jitify2.hpp b/jitify2.hpp index 53c393c..52611af 100644 --- a/jitify2.hpp +++ b/jitify2.hpp @@ -127,6 +127,12 @@ #define JITIFY_IF_THREAD_SAFE(x) #endif +#if __cplusplus >= 201402L +#define JITIFY_DEPRECATED(msg) [[deprecated(msg)]] +#else +#define JITIFY_DEPRECATED(msg) +#endif + #ifdef __linux__ #include // For abi::__cxa_demangle #include // For struct dirent, opendir etc. @@ -2599,6 +2605,7 @@ class CompiledProgramData * "-dlto" compiler option. * \deprecated Use lto_ir() instead. */ + JITIFY_DEPRECATED("Use lto_ir() instead") const std::string& nvvm() const { return nvvm_; } /*! Get the Link-Time Optimization (LTO) IR of the compiled program. * \note The LTO IR is only available here with NVRTC version >= 11.4 and the @@ -2685,25 +2692,25 @@ inline LinkedProgram LinkedProgram::link( program_types.reserve(num_programs); for (size_t i = 0; i < num_programs; ++i) { const CompiledProgramData& compiled_program = *compiled_programs[i]; - if (!compiled_program.nvvm().empty()) { + if (!compiled_program.lto_ir().empty()) { if (!cuda()) return Error(cuda().error()); const int min_cuda_version = std::min(CUDA_VERSION, cuda().get_version()); if (min_cuda_version < 11040) { return Error("Linking LTO IR is not supported with CUDA < 11.4"); } } - const std::string& program = !compiled_program.nvvm().empty() - ? compiled_program.nvvm() + const std::string& program = !compiled_program.lto_ir().empty() + ? compiled_program.lto_ir() : !compiled_program.cubin().empty() ? compiled_program.cubin() : compiled_program.ptx(); CUjitInputType program_type = #if CUDA_VERSION >= 11040 - !compiled_program.nvvm().empty() ? CU_JIT_INPUT_NVVM : + !compiled_program.lto_ir().empty() ? CU_JIT_INPUT_NVVM : #endif - !compiled_program.cubin().empty() - ? CU_JIT_INPUT_CUBIN - : CU_JIT_INPUT_PTX; + !compiled_program.cubin().empty() + ? CU_JIT_INPUT_CUBIN + : CU_JIT_INPUT_PTX; programs.emplace_back(&program); program_types.emplace_back(program_type); } From 68ac99b9419aa356897f366b8b6ec555c5d0507e Mon Sep 17 00:00:00 2001 From: Ben Barsdell Date: Mon, 23 Oct 2023 11:39:49 +1100 Subject: [PATCH 11/12] Fix ambiguous overloads of launch() - Adds launch_raw() methods to replace overloads that take array of arg pointers, which were dangerously ambiguous with the variadic overload. - Adds explicit no-argument overload of launch() to avoid forming zero-sized array. --- jitify2.hpp | 40 ++++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/jitify2.hpp b/jitify2.hpp index 52611af..996ab25 100644 --- a/jitify2.hpp +++ b/jitify2.hpp @@ -1796,13 +1796,30 @@ class ConfiguredKernelData { /*! Get the configured CUDA stream. */ CUstream stream() const { return stream_; } - // TODO: Taking void** here is dangerous due to ambiguity with the variadic // overload below. E.g., passing void*const* silently fails. /*! Launch the configured kernel. * \param arg_ptrs Array of pointers to kernel arguments. * \return An empty string on success, otherwise an error message. + * \deprecated Use \p launch_raw instead. */ - ErrorMsg launch(void** arg_ptrs) const { + JITIFY_DEPRECATED("Use launch_raw instead") + ErrorMsg launch(void** arg_ptrs) const { return launch_raw(arg_ptrs); } + + /*! Launch the configured kernel. + * \param arg_ptrs Vector of pointers to kernel arguments. + * \return An empty string on success, otherwise an error message. + * \deprecated Use \p launch_raw instead. + */ + JITIFY_DEPRECATED("Use launch_raw instead") + ErrorMsg launch(const std::vector& arg_ptrs) const { + return launch_raw(arg_ptrs); + } + + /*! Launch the configured kernel. + * \param arg_ptrs Array of pointers to kernel arguments. + * \return An empty string on success, otherwise an error message. + */ + ErrorMsg launch_raw(void** arg_ptrs) const { if (!cuda()) JITIFY_THROW_OR_RETURN(cuda().error()); JITIFY_THROW_OR_RETURN_IF_CUDA_ERROR(cuda().LaunchKernel()( kernel_.function(), grid_.x, grid_.y, grid_.z, block_.x, block_.y, @@ -1814,8 +1831,8 @@ class ConfiguredKernelData { * \param arg_ptrs Vector of pointers to kernel arguments. * \return An empty string on success, otherwise an error message. */ - ErrorMsg launch(const std::vector& arg_ptrs = {}) const { - return launch(const_cast(arg_ptrs.data())); + ErrorMsg launch_raw(const std::vector& arg_ptrs) const { + return launch_raw(const_cast(arg_ptrs.data())); } /*! Launch the configured kernel. @@ -1823,10 +1840,17 @@ class ConfiguredKernelData { * be passed as pointers. * \return An empty string on success, otherwise an error message. */ - template - ErrorMsg launch(const Args&... args) const { - void* arg_ptrs[] = {(void*)&args...}; - return this->launch(arg_ptrs); + template + ErrorMsg launch(const Arg& arg, const Args&... args) const { + void* arg_ptrs[] = {(void*)&arg, (void*)&args...}; + return this->launch_raw(arg_ptrs); + } + + /*! Launch the configured kernel. + * \return An empty string on success, otherwise an error message. + */ + ErrorMsg launch() const { + return this->launch_raw(nullptr); } }; From 0d4224855638c2e98dc0e30bb404a3974e20b658 Mon Sep 17 00:00:00 2001 From: Ben Barsdell Date: Mon, 6 Nov 2023 13:44:16 +1100 Subject: [PATCH 12/12] Define CHAR_MIN/MAX as macros - This matches the definitions in libcucxx. --- jitify2.hpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/jitify2.hpp b/jitify2.hpp index 996ab25..879e14b 100644 --- a/jitify2.hpp +++ b/jitify2.hpp @@ -3947,11 +3947,14 @@ JITIFY_DEFINE_C_AND_CXX_HEADERS(limits, R"( #define SCHAR_MIN (-128) #define SCHAR_MAX 127 #define UCHAR_MAX 255 -enum { - _JITIFY_CHAR_IS_UNSIGNED = ((char)-1 >= 0), - CHAR_MIN = (_JITIFY_CHAR_IS_UNSIGNED ? 0 : SCHAR_MIN), - CHAR_MAX = (_JITIFY_CHAR_IS_UNSIGNED ? UCHAR_MAX : SCHAR_MAX), -}; +#define _JITIFY_CHAR_IS_UNSIGNED ('\xff' > 0) +#if _JITIFY_CHAR_IS_UNSIGNED +#define CHAR_MIN 0 +#define CHAR_MAX UCHAR_MAX +#else +#define CHAR_MIN SCHAR_MIN +#define CHAR_MAX SCHAR_MAX +#endif #define SHRT_MIN (-SHRT_MAX - 1) #define SHRT_MAX 0x7fff #define USHRT_MAX 0xffff