Skip to content
9 changes: 6 additions & 3 deletions sycl/source/detail/buffer_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,16 @@ void *buffer_impl::allocateMem(context_impl *Context, bool InitFromUserData,
ur_event_handle_t &OutEventToWait) {
bool HostPtrReadOnly = false;
BaseT::determineHostPtr(Context, InitFromUserData, HostPtr, HostPtrReadOnly);
const bool BackendOwnedWriteBack = HostPtr != nullptr &&
HostPtr == BaseT::getUserPtr() &&
BaseT::backendOwnsWriteBack();
assert(!(nullptr == HostPtr && BaseT::useHostPtr() && !Context) &&
"Internal error. Allocating memory on the host "
"while having use_host_ptr property");
return MemoryManager::allocateMemBuffer(
Context, this, HostPtr, HostPtrReadOnly, BaseT::getSizeInBytes(),
BaseT::MInteropEvent, BaseT::MInteropContext.get(), MProps,
OutEventToWait);
Context, this, HostPtr, HostPtrReadOnly, BackendOwnedWriteBack,
BaseT::getSizeInBytes(), BaseT::MInteropEvent,
BaseT::MInteropContext.get(), MProps, OutEventToWait);
}
void buffer_impl::constructorNotification(const detail::code_location &CodeLoc,
void *UserObj, const void *HostObj,
Expand Down
38 changes: 21 additions & 17 deletions sycl/source/detail/memory_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,8 @@ void *MemoryManager::allocate(context_impl *TargetContext, SYCLMemObjI *MemObj,
waitForEvents(DepEvents);
OutEvent = nullptr;

MemObj->prepareForAllocation(TargetContext);

return MemObj->allocateMem(TargetContext, InitFromUserData, HostPtr,
OutEvent);
}
Expand Down Expand Up @@ -322,13 +324,17 @@ void *MemoryManager::allocateInteropMemObject(
return UserPtr;
}

static ur_mem_flags_t getMemObjCreationFlags(void *UserPtr,
bool HostPtrReadOnly) {
static ur_mem_flags_t
getMemObjCreationFlags(void *UserPtr, bool HostPtrReadOnly,
bool BackendOwnedWriteBack = false) {
// Create read_write mem object to handle arbitrary uses.
ur_mem_flags_t Result =
HostPtrReadOnly ? UR_MEM_FLAG_READ_ONLY : UR_MEM_FLAG_READ_WRITE;
if (UserPtr)
if (UserPtr) {
Result |= UR_MEM_FLAG_USE_HOST_POINTER;
if (BackendOwnedWriteBack)
Result |= UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER;
}
return Result;
}

Expand All @@ -348,12 +354,12 @@ void *MemoryManager::allocateImageObject(context_impl *TargetContext,
return NewMem;
}

void *
MemoryManager::allocateBufferObject(context_impl *TargetContext, void *UserPtr,
bool HostPtrReadOnly, const size_t Size,
const sycl::property_list &PropsList) {
void *MemoryManager::allocateBufferObject(
context_impl *TargetContext, void *UserPtr, bool HostPtrReadOnly,
bool BackendOwnedWriteBack, const size_t Size,
const sycl::property_list &PropsList) {
ur_mem_flags_t CreationFlags =
getMemObjCreationFlags(UserPtr, HostPtrReadOnly);
getMemObjCreationFlags(UserPtr, HostPtrReadOnly, BackendOwnedWriteBack);
if (PropsList.has_property<
sycl::ext::oneapi::property::buffer::use_pinned_host_memory>())
CreationFlags |= UR_MEM_FLAG_ALLOC_HOST_POINTER;
Expand All @@ -369,13 +375,11 @@ MemoryManager::allocateBufferObject(context_impl *TargetContext, void *UserPtr,
return NewMem;
}

void *MemoryManager::allocateMemBuffer(context_impl *TargetContext,
SYCLMemObjI *MemObj, void *UserPtr,
bool HostPtrReadOnly, size_t Size,
const EventImplPtr &InteropEvent,
context_impl *InteropContext,
const sycl::property_list &PropsList,
ur_event_handle_t &OutEventToWait) {
void *MemoryManager::allocateMemBuffer(
context_impl *TargetContext, SYCLMemObjI *MemObj, void *UserPtr,
bool HostPtrReadOnly, bool BackendOwnedWriteBack, size_t Size,
const EventImplPtr &InteropEvent, context_impl *InteropContext,
const sycl::property_list &PropsList, ur_event_handle_t &OutEventToWait) {
void *MemPtr;
if (!TargetContext)
MemPtr =
Expand All @@ -385,8 +389,8 @@ void *MemoryManager::allocateMemBuffer(context_impl *TargetContext,
allocateInteropMemObject(TargetContext, UserPtr, InteropEvent,
InteropContext, PropsList, OutEventToWait);
else
MemPtr = allocateBufferObject(TargetContext, UserPtr, HostPtrReadOnly, Size,
PropsList);
MemPtr = allocateBufferObject(TargetContext, UserPtr, HostPtrReadOnly,
BackendOwnedWriteBack, Size, PropsList);
XPTIRegistry::bufferAssociateNotification(MemObj, MemPtr);
return MemPtr;
}
Expand Down
7 changes: 5 additions & 2 deletions sycl/source/detail/memory_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ class MemoryManager {
// one(not host).
static void *allocateMemBuffer(context_impl *TargetContext,
SYCLMemObjI *MemObj, void *UserPtr,
bool HostPtrReadOnly, size_t Size,
bool HostPtrReadOnly,
bool BackendOwnedWriteBack, size_t Size,
const EventImplPtr &InteropEvent,
context_impl *InteropContext,
const sycl::property_list &PropsList,
Expand Down Expand Up @@ -101,7 +102,9 @@ class MemoryManager {
const sycl::property_list &PropsList);

static void *allocateBufferObject(context_impl *TargetContext, void *UserPtr,
bool HostPtrReadOnly, const size_t Size,
bool HostPtrReadOnly,
bool BackendOwnedWriteBack,
const size_t Size,
const sycl::property_list &PropsList);

// Copies memory between: host and device, host and host,
Expand Down
6 changes: 6 additions & 0 deletions sycl/source/detail/sycl_mem_obj_i.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ class SYCLMemObjI {
virtual void *allocateMem(context_impl *Context, bool InitFromUserData,
void *HostPtr, ur_event_handle_t &InteropEvent) = 0;

// Optional hook executed right before allocateMem(). Memory objects can use
// it to resolve context/backend-dependent allocation policy.
virtual void prepareForAllocation(context_impl *Context) {
(void)Context;
}

// Should be used for memory object created without use_host_ptr property.
virtual void *allocateHostMem() = 0;

Expand Down
150 changes: 141 additions & 9 deletions sycl/source/detail/sycl_mem_obj_t.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,32 @@
#include <detail/scheduler/scheduler.hpp>
#include <detail/sycl_mem_obj_t.hpp>

#include <cstdint>

namespace sycl {
inline namespace _V1 {
namespace detail {

namespace {

size_t getBackendShadowCopyAlignment(context_impl *Context) {
size_t RequiredAlign = 1;
for (const auto &Device : Context->getDevices()) {
const uint32_t AlignBits =
Device.get_info<info::device::mem_base_addr_align>();
if (AlignBits == 0)
continue;

// UR reports MEM_BASE_ADDR_ALIGN in bits.
const size_t AlignBytes = (static_cast<size_t>(AlignBits) + 7) / 8;
if (AlignBytes > RequiredAlign)
RequiredAlign = AlignBytes;
}
return RequiredAlign;
}

} // namespace

SYCLMemObjT::SYCLMemObjT(ur_native_handle_t MemObject,
const context &SyclContext, const size_t,
event AvailableEvent,
Expand Down Expand Up @@ -143,7 +165,7 @@ void SYCLMemObjT::updateHostMemory(void *const Ptr) {
void SYCLMemObjT::updateHostMemory() {
// Don't try updating host memory when shutting down.
if ((MUploadDataFunctor != nullptr) && MNeedWriteBack &&
GlobalHandler::instance().isOkToDefer())
!MBackendOwnsWriteBack && GlobalHandler::instance().isOkToDefer())
MUploadDataFunctor();

// If we're attached to a memory record, process the deletion of the memory
Expand All @@ -162,12 +184,69 @@ void SYCLMemObjT::updateHostMemory() {
(Result || !GlobalHandler::instance().isOkToDefer()) &&
"removeMemoryObject should not return false in mem object destructor");
}
releaseHostMem(MShadowCopy);
detail::OSUtil::alignedFree(MShadowCopy);

if (MOpenCLInterop) {
getAdapter().call<UrApiKind::urMemRelease>(MInteropMemObject);
}
}

void SYCLMemObjT::materializeShadowCopy(const void *SourcePtr,
size_t RequiredAlign) {
if (MPendingShadowCopyAlignment > RequiredAlign)
RequiredAlign = MPendingShadowCopyAlignment;

if (RequiredAlign == 0)
RequiredAlign = 1;

MPendingShadowCopyAlignment = RequiredAlign;

void *OldUserPtr = MUserPtr;
void *OldShadowCopy = MShadowCopy;
const void *CopySource = SourcePtr;
if (OldShadowCopy) {
if ((reinterpret_cast<std::uintptr_t>(OldShadowCopy) % RequiredAlign) ==
0) {
MUserPtr = OldShadowCopy;
return;
}
CopySource = OldShadowCopy;
}

assert(CopySource != nullptr &&
"Cannot materialize a shadow copy without source data");

// Allocate the shadow copy via the platform-aligned allocator directly,
// bypassing the user-provided allocator. Shadow copies are an internal
// runtime detail; the user allocator cannot be relied upon to satisfy
// backend alignment requirements (e.g. CL_DEVICE_MEM_BASE_ADDR_ALIGN).
const size_t AllocBytes =
MSizeInBytes == 0 ? RequiredAlign
: ((MSizeInBytes + RequiredAlign - 1) / RequiredAlign) *
RequiredAlign;
void *NewShadowCopy = detail::OSUtil::alignedAlloc(RequiredAlign, AllocBytes);
if (!NewShadowCopy)
throw std::bad_alloc();
if (MSizeInBytes != 0)
std::memcpy(NewShadowCopy, CopySource, MSizeInBytes);

MShadowCopy = NewShadowCopy;
MUserPtr = NewShadowCopy;
updateRecordedMemAllocation(OldUserPtr, NewShadowCopy);

detail::OSUtil::alignedFree(OldShadowCopy);
}

void SYCLMemObjT::updateRecordedMemAllocation(void *OldPtr, void *NewPtr) {
if (MRecord == nullptr || OldPtr == nullptr || OldPtr == NewPtr)
return;

for (auto *AllocaCmd : MRecord->MAllocaCommands) {
if (AllocaCmd->MMemAllocation == OldPtr)
AllocaCmd->MMemAllocation = NewPtr;
}
}

adapter_impl &SYCLMemObjT::getAdapter() const {
assert((MInteropContext != nullptr) &&
"Trying to get Adapter from SYCLMemObjT with nullptr ContextImpl.");
Expand All @@ -176,6 +255,65 @@ adapter_impl &SYCLMemObjT::getAdapter() const {

bool SYCLMemObjT::isInterop() const { return MOpenCLInterop; }

void SYCLMemObjT::prepareForAllocation(context_impl *Context) {
// Context may be null for host allocations; nothing backend-specific to do.
if (!Context)
return;

if (!MHasPendingAlignedShadowCopy)
return;

bool SkipShadowCopy = false;
backend Backend = Context->getPlatformImpl().getBackend();
auto Devices = Context->getDevices();
if (Devices.size() != 0)
Backend = Devices.front().getBackend();

const size_t BackendRequiredAlign = getBackendShadowCopyAlignment(Context);
if (BackendRequiredAlign > MPendingShadowCopyAlignment)
MPendingShadowCopyAlignment = BackendRequiredAlign;

switch (Backend) {
case backend::ext_oneapi_level_zero:
case backend::ext_oneapi_cuda:
case backend::ext_oneapi_hip:
SkipShadowCopy = true;
break;
case backend::opencl:
case backend::ext_oneapi_native_cpu:
case backend::ext_oneapi_offload:
SkipShadowCopy = false;
break;
case backend::all:
default:
assert(false && "Unexpected SYCL backend");
break;
}

std::lock_guard<std::mutex> Lock(MCreateShadowCopyMtx);
if (SkipShadowCopy) {
if (MShadowCopy != nullptr) {
// A writable host accessor already forced a SYCL shadow copy. Keep using
// that path so the final copy-back still targets the original user ptr.
return;
}

// Backend (UR) will manage the misaligned host pointer through its own
// internal staging buffer and owns the final copy-back to the original ptr.
MCreateShadowCopy = []() -> void {};
MBackendOwnsWriteBack = true;
if (!MHostPtrReadOnly)
MUploadDataFunctor = nullptr;
MHasPendingAlignedShadowCopy = false;
return;
}

materializeShadowCopy(MUserPtr, BackendRequiredAlign);
MCreateShadowCopy = []() -> void {};
MBackendOwnsWriteBack = false;
MHasPendingAlignedShadowCopy = false;
}

void SYCLMemObjT::determineHostPtr(context_impl *Context, bool InitFromUserData,
void *&HostPtr, bool &HostPtrReadOnly) {
// The data for the allocation can be provided via either the user pointer
Expand Down Expand Up @@ -232,13 +370,7 @@ void SYCLMemObjT::handleWriteAccessorCreation() {
MCreateShadowCopy();
MCreateShadowCopy = []() -> void {};
}
if (MRecord != nullptr && MUserPtr != InitialUserPtr) {
for (auto &it : MRecord->MAllocaCommands) {
if (it->MMemAllocation == InitialUserPtr) {
it->MMemAllocation = MUserPtr;
}
}
}
updateRecordedMemAllocation(InitialUserPtr, MUserPtr);
}

} // namespace detail
Expand Down
Loading
Loading