Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
cmake_minimum_required(VERSION 3.9)
project(jitify LANGUAGES CXX CUDA)

option(ASAN "Enable address sanitizer in debug build" ON)

# C++ compiler options.
set (CMAKE_CXX_STANDARD 11)
set (CMAKE_CUDA_STANDARD 11) # Doesn't work?
Expand All @@ -12,6 +14,13 @@ else()
set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -O3 -Wall -Wextra -Wconversion -Wshadow -fmessage-length=80")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g")
if (ASAN)
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} \
-fsanitize=undefined,address")
else()
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} \
-fsanitize=undefined")
endif()
endif()

# CUDA compiler options.
Expand Down Expand Up @@ -133,7 +142,10 @@ foreach(test ${TESTS})
PUBLIC JITIFY_LINK_CUDA_STATIC=1
PUBLIC JITIFY_LINK_NVRTC_STATIC=1
PUBLIC JITIFY_LINK_NVJITLINK_STATIC=1)
target_link_libraries(${test} cuda nvrtc nvJitLink)
target_link_libraries(${test} cuda nvrtc)
if (CUDA_VERSION_MAJOR GREATER_EQUAL 12)
target_link_libraries(${test} nvJitLink)
endif()
endif()
if (NOT WIN32)
target_link_libraries(${test} ${CMAKE_DL_LIBS})
Expand Down
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
BSD 3-Clause License

Copyright (c) 2017-2020, NVIDIA Corporation
Copyright (c) 2017-2024, NVIDIA Corporation
All rights reserved.

Redistribution and use in source and binary forms, with or without
Expand Down
2 changes: 1 addition & 1 deletion example_headers/class_arg_kernel.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2024, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
Expand Down
2 changes: 1 addition & 1 deletion example_headers/constant_header.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2024, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
Expand Down
2 changes: 1 addition & 1 deletion example_headers/my_header1.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2024, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
Expand Down
2 changes: 1 addition & 1 deletion example_headers/my_header2.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2024, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
Expand Down
2 changes: 1 addition & 1 deletion example_headers/my_header3.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2024, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
Expand Down
123 changes: 91 additions & 32 deletions jitify2.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2024, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -127,6 +127,12 @@
#define JITIFY_IF_THREAD_SAFE(x)
#endif

#if __cplusplus >= 201402L
#define JITIFY_DEPRECATED(msg) [[deprecated(msg)]]
#else
#define JITIFY_DEPRECATED(msg)
#endif

#ifdef __linux__
#include <cxxabi.h> // For abi::__cxa_demangle
#include <dirent.h> // For struct dirent, opendir etc.
Expand Down Expand Up @@ -1411,6 +1417,9 @@ class LibNvJitLink
case NVJITLINK_ERROR_PTX_COMPILE: return "NVJITLINK_ERROR_PTX_COMPILE";
case NVJITLINK_ERROR_NVVM_COMPILE: return "NVJITLINK_ERROR_NVVM_COMPILE";
case NVJITLINK_ERROR_INTERNAL: return "NVJITLINK_ERROR_INTERNAL";
#if CUDA_VERSION >= 12030
case NVJITLINK_ERROR_THREADPOOL: return "NVJITLINK_ERROR_THREADPOOL";
#endif
}
// clang-format on
return "(unknown nvJitLink error)";
Expand Down Expand Up @@ -1787,13 +1796,30 @@ class ConfiguredKernelData {
/*! Get the configured CUDA stream. */
CUstream stream() const { return stream_; }

// TODO: Taking void** here is dangerous due to ambiguity with the variadic
// overload below. E.g., passing void*const* silently fails.
/*! Launch the configured kernel.
* \param arg_ptrs Array of pointers to kernel arguments.
* \return An empty string on success, otherwise an error message.
* \deprecated Use \p launch_raw instead.
*/
JITIFY_DEPRECATED("Use launch_raw instead")
ErrorMsg launch(void** arg_ptrs) const { return launch_raw(arg_ptrs); }

/*! Launch the configured kernel.
* \param arg_ptrs Vector of pointers to kernel arguments.
* \return An empty string on success, otherwise an error message.
* \deprecated Use \p launch_raw instead.
*/
JITIFY_DEPRECATED("Use launch_raw instead")
ErrorMsg launch(const std::vector<void*>& arg_ptrs) const {
return launch_raw(arg_ptrs);
}

/*! Launch the configured kernel.
* \param arg_ptrs Array of pointers to kernel arguments.
* \return An empty string on success, otherwise an error message.
*/
ErrorMsg launch(void** arg_ptrs) const {
ErrorMsg launch_raw(void** arg_ptrs) const {
if (!cuda()) JITIFY_THROW_OR_RETURN(cuda().error());
JITIFY_THROW_OR_RETURN_IF_CUDA_ERROR(cuda().LaunchKernel()(
kernel_.function(), grid_.x, grid_.y, grid_.z, block_.x, block_.y,
Expand All @@ -1805,19 +1831,26 @@ class ConfiguredKernelData {
* \param arg_ptrs Vector of pointers to kernel arguments.
* \return An empty string on success, otherwise an error message.
*/
ErrorMsg launch(const std::vector<void*>& arg_ptrs = {}) const {
return launch(const_cast<void**>(arg_ptrs.data()));
ErrorMsg launch_raw(const std::vector<void*>& arg_ptrs) const {
return launch_raw(const_cast<void**>(arg_ptrs.data()));
}

/*! Launch the configured kernel.
* \param args Arguments for the kernel. Note that reference arguments must
* be passed as pointers.
* \return An empty string on success, otherwise an error message.
*/
template <typename... Args>
ErrorMsg launch(const Args&... args) const {
void* arg_ptrs[] = {(void*)&args...};
return this->launch(arg_ptrs);
template <typename Arg, typename... Args>
ErrorMsg launch(const Arg& arg, const Args&... args) const {
void* arg_ptrs[] = {(void*)&arg, (void*)&args...};
return this->launch_raw(arg_ptrs);
}

/*! Launch the configured kernel.
* \return An empty string on success, otherwise an error message.
*/
ErrorMsg launch() const {
return this->launch_raw(nullptr);
}
};

Expand Down Expand Up @@ -2596,6 +2629,7 @@ class CompiledProgramData
* "-dlto" compiler option.
* \deprecated Use lto_ir() instead.
*/
JITIFY_DEPRECATED("Use lto_ir() instead")
const std::string& nvvm() const { return nvvm_; }
/*! Get the Link-Time Optimization (LTO) IR of the compiled program.
* \note The LTO IR is only available here with NVRTC version >= 11.4 and the
Expand Down Expand Up @@ -2682,25 +2716,25 @@ inline LinkedProgram LinkedProgram::link(
program_types.reserve(num_programs);
for (size_t i = 0; i < num_programs; ++i) {
const CompiledProgramData& compiled_program = *compiled_programs[i];
if (!compiled_program.nvvm().empty()) {
if (!compiled_program.lto_ir().empty()) {
if (!cuda()) return Error(cuda().error());
const int min_cuda_version = std::min(CUDA_VERSION, cuda().get_version());
if (min_cuda_version < 11040) {
return Error("Linking LTO IR is not supported with CUDA < 11.4");
}
}
const std::string& program = !compiled_program.nvvm().empty()
? compiled_program.nvvm()
const std::string& program = !compiled_program.lto_ir().empty()
? compiled_program.lto_ir()
: !compiled_program.cubin().empty()
? compiled_program.cubin()
: compiled_program.ptx();
CUjitInputType program_type =
#if CUDA_VERSION >= 11040
!compiled_program.nvvm().empty() ? CU_JIT_INPUT_NVVM :
!compiled_program.lto_ir().empty() ? CU_JIT_INPUT_NVVM :
#endif
!compiled_program.cubin().empty()
? CU_JIT_INPUT_CUBIN
: CU_JIT_INPUT_PTX;
!compiled_program.cubin().empty()
? CU_JIT_INPUT_CUBIN
: CU_JIT_INPUT_PTX;
programs.emplace_back(&program);
program_types.emplace_back(program_type);
}
Expand Down Expand Up @@ -3438,6 +3472,10 @@ inline bool ptx_parse_decl_name(const std::string& line, std::string* name) {
return true;
}

inline bool is_alpha(char c) {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
}

inline bool ptx_remove_unused_globals(std::string* ptx) {
std::istringstream iss(*ptx);
StringVec lines;
Expand Down Expand Up @@ -3465,7 +3503,7 @@ inline bool ptx_remove_unused_globals(std::string* ptx) {
const char* token_delims = " \t()[]{},;+-*/~&|^?:=!<>\"'\\";
for (auto token : split_string(terms[i], -1, token_delims)) {
if ( // Ignore non-names
!(std::isalpha(token[0]) || token[0] == '_' || token[0] == '$') ||
!(is_alpha(token[0]) || token[0] == '_' || token[0] == '$') ||
token.find('.') != std::string::npos ||
// Ignore variable/parameter declarations
terms[i - 1][0] == '.' ||
Expand Down Expand Up @@ -3909,11 +3947,14 @@ JITIFY_DEFINE_C_AND_CXX_HEADERS(limits, R"(
#define SCHAR_MIN (-128)
#define SCHAR_MAX 127
#define UCHAR_MAX 255
enum {
_JITIFY_CHAR_IS_UNSIGNED = ((char)-1 >= 0),
CHAR_MIN = (_JITIFY_CHAR_IS_UNSIGNED ? 0 : SCHAR_MIN),
CHAR_MAX = (_JITIFY_CHAR_IS_UNSIGNED ? UCHAR_MAX : SCHAR_MAX),
};
#define _JITIFY_CHAR_IS_UNSIGNED ('\xff' > 0)
#if _JITIFY_CHAR_IS_UNSIGNED
#define CHAR_MIN 0
#define CHAR_MAX UCHAR_MAX
#else
#define CHAR_MIN SCHAR_MIN
#define CHAR_MAX SCHAR_MAX
#endif
#define SHRT_MIN (-SHRT_MAX - 1)
#define SHRT_MAX 0x7fff
#define USHRT_MAX 0xffff
Expand Down Expand Up @@ -4126,7 +4167,6 @@ typedef signed short int_least16_t;
typedef signed int int_least32_t;
typedef signed long long int_least64_t;
typedef signed long long intmax_t;
typedef signed long intptr_t; // optional
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
Expand All @@ -4140,11 +4180,8 @@ typedef unsigned short uint_least16_t;
typedef unsigned int uint_least32_t;
typedef unsigned long long uint_least64_t;
typedef unsigned long long uintmax_t;
#if defined _WIN32 || defined _WIN64
typedef unsigned long long uintptr_t; // optional
#else // not Windows
typedef unsigned long uintptr_t; // optional
#endif
typedef int64_t intptr_t; // optional
typedef uint64_t uintptr_t; // optional
)");

JITIFY_DEFINE_C_AND_CXX_HEADERS_EX(stdio, "#include <cstddef>", R"(
Expand Down Expand Up @@ -5528,12 +5565,31 @@ inline std::string path_simplify(StringRef path) {
return ss.str();
}

// Reads a whole text file into *content. Returns false on failure.
inline bool read_text_file(const std::string& fullpath, std::string* content) {
std::ifstream file(fullpath.c_str());
FILE* file = ::fopen(fullpath.c_str(), "r");
if (!file) return false;
std::stringstream buf;
buf << file.rdbuf();
*content = buf.str();
std::unique_ptr<FILE, std::integral_constant<decltype(::fclose)*, ::fclose>>
unique_file(file);
#ifdef POSIX_FADV_WILLNEED
// Hints to potentially improve read performance.
::posix_fadvise(::fileno(file), 0, 0, POSIX_FADV_SEQUENTIAL);
::posix_fadvise(::fileno(file), 0, 0, POSIX_FADV_WILLNEED);
#endif
if (::fseek(file, 0, SEEK_END)) return false;
const long size = ::ftell(file);
if (::fseek(file, 0, SEEK_SET)) return false;
content->resize(size);
// Note: This supports empty (size=0) files.
if ((long)::fread(&(*content)[0], 1, size, file) != size) return false;
// Crop off trailing null characters that may arise due to multi-character
// newline conversions (e.g., on Windows).
const size_t last_char_pos = content->find_last_not_of("\0");
if (last_char_pos == std::string::npos) {
content->resize(0);
} else {
content->resize(last_char_pos + 1);
}
return true;
}

Expand Down Expand Up @@ -6336,6 +6392,9 @@ class NewFile {
#if defined _WIN32 || defined _WIN64
bool success = ::_locking(fd_, _LK_LOCK, 1) == 0;
#else
#ifndef F_OFD_SETLKW
#error F_OFD_SETLKW is not defined; try building with -D_FILE_OFFSET_BITS=64
#endif // F_OFD_SETLKW
flock fl = {};
fl.l_type = F_WRLCK; // Exclusive lock for writing
fl.l_whence = SEEK_SET; // Start at beginning of file
Expand Down
Loading