Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/a2a3/platform/onboard/host/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/../../../../c
list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/../../../../common/device_comm")
list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/../../../../common/host")
list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/../../../../common/aicpu_dispatcher")
list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/../../../../common/platform/onboard/host")
if(DEFINED CUSTOM_INCLUDE_DIRS)
foreach(INC_DIR ${CUSTOM_INCLUDE_DIRS})
list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${INC_DIR}")
Expand Down Expand Up @@ -65,6 +66,12 @@ list(APPEND HOST_RUNTIME_SOURCES
list(APPEND HOST_RUNTIME_SOURCES
"${CMAKE_CURRENT_SOURCE_DIR}/../../../../common/host/load_aicpu_op.cpp"
)
# Add common/platform/onboard/host sources (shared between a2a3 / a5 onboard).
# Currently scaffolding only — real content moves here in follow-up PRs
# (see .docs/ONBOARD_HOST_COMMON_REFACTOR.md).
list(APPEND HOST_RUNTIME_SOURCES
"${CMAKE_CURRENT_SOURCE_DIR}/../../../../common/platform/onboard/host/device_runner_helpers.cpp"
)
if(DEFINED CUSTOM_SOURCE_DIRS)
foreach(SRC_DIR ${CUSTOM_SOURCE_DIRS})
file(GLOB_RECURSE DIR_SOURCES "${SRC_DIR}/*.cpp" "${SRC_DIR}/*.c")
Expand Down
103 changes: 4 additions & 99 deletions src/a2a3/platform/onboard/host/device_runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,113 +92,18 @@ HalHostUnregisterFn get_halHostUnregister() {
} // namespace

// =============================================================================
// KernelArgsHelper Implementation
// a2a3-only KernelArgsHelper extension
// =============================================================================

int KernelArgsHelper::init_device_args(const DeviceArgs &host_device_args, MemoryAllocator &allocator) {
allocator_ = &allocator;

// Allocate device memory for device_args
if (args.device_args == nullptr) {
uint64_t device_args_size = sizeof(DeviceArgs);
void *device_args_dev = allocator_->alloc(device_args_size);
if (device_args_dev == nullptr) {
LOG_ERROR("Alloc for device_args failed");
return -1;
}
args.device_args = reinterpret_cast<DeviceArgs *>(device_args_dev);
}
// Copy host_device_args to device memory via device_args
int rc =
rtMemcpy(args.device_args, sizeof(DeviceArgs), &host_device_args, sizeof(DeviceArgs), RT_MEMCPY_HOST_TO_DEVICE);
if (rc != 0) {
LOG_ERROR("rtMemcpy failed: %d", rc);
allocator_->free(args.device_args);
args.device_args = nullptr;
return rc;
}
return 0;
}

int KernelArgsHelper::finalize_device_args() {
if (args.device_args != nullptr && allocator_ != nullptr) {
int rc = allocator_->free(args.device_args);
args.device_args = nullptr;
return rc;
}
return 0;
}

int KernelArgsHelper::init_runtime_args(const Runtime &host_runtime, MemoryAllocator &allocator) {
allocator_ = &allocator;

if (args.runtime_args == nullptr) {
uint64_t runtime_size = sizeof(Runtime);
void *runtime_dev = allocator_->alloc(runtime_size);
if (runtime_dev == nullptr) {
LOG_ERROR("Alloc for runtime_args failed");
return -1;
}
args.runtime_args = reinterpret_cast<Runtime *>(runtime_dev);
}
int rc = rtMemcpy(args.runtime_args, sizeof(Runtime), &host_runtime, sizeof(Runtime), RT_MEMCPY_HOST_TO_DEVICE);
if (rc != 0) {
LOG_ERROR("rtMemcpy for runtime failed: %d", rc);
allocator_->free(args.runtime_args);
args.runtime_args = nullptr;
return rc;
}
return 0;
}

int KernelArgsHelper::finalize_runtime_args() {
if (args.runtime_args != nullptr && allocator_ != nullptr) {
int rc = allocator_->free(args.runtime_args);
args.runtime_args = nullptr;
return rc;
}
return 0;
}

int KernelArgsHelper::init_ffts_base_addr() {
int kernel_args_init_ffts_base_addr(KernelArgsHelper &helper) {
uint64_t ffts_base_addr{0};
uint32_t ffts_len{0};
int rc = rtGetC2cCtrlAddr(&ffts_base_addr, &ffts_len);
if (rc != 0) {
LOG_ERROR("rtGetC2cCtrlAddr failed: %d", rc);
return rc;
}
args.ffts_base_addr = ffts_base_addr;
return 0;
}

int KernelArgsHelper::init_device_kernel_args(MemoryAllocator &allocator) {
allocator_ = &allocator;

if (device_k_args_ == nullptr) {
void *dev_ptr = allocator_->alloc(sizeof(KernelArgs));
if (dev_ptr == nullptr) {
LOG_ERROR("Alloc for device KernelArgs failed");
return -1;
}
device_k_args_ = reinterpret_cast<KernelArgs *>(dev_ptr);
}
int rc = rtMemcpy(device_k_args_, sizeof(KernelArgs), &args, sizeof(KernelArgs), RT_MEMCPY_HOST_TO_DEVICE);
if (rc != 0) {
LOG_ERROR("rtMemcpy for KernelArgs failed: %d", rc);
allocator_->free(device_k_args_);
device_k_args_ = nullptr;
return rc;
}
return 0;
}

int KernelArgsHelper::finalize_device_kernel_args() {
if (device_k_args_ != nullptr && allocator_ != nullptr) {
int rc = allocator_->free(device_k_args_);
device_k_args_ = nullptr;
return rc;
}
helper.args.ffts_base_addr = ffts_base_addr;
return 0;
}

Expand Down Expand Up @@ -763,7 +668,7 @@ int DeviceRunner::run(Runtime &runtime, int block_dim, int launch_aicpu_num) {
kernel_args_.args.log_level = static_cast<uint32_t>(HostLogger::get_instance().level());
kernel_args_.args.log_info_v = static_cast<uint32_t>(HostLogger::get_instance().info_v());

rc = kernel_args_.init_ffts_base_addr();
rc = kernel_args_init_ffts_base_addr(kernel_args_);
if (rc != 0) {
LOG_ERROR("init_ffts_base_addr failed: %d", rc);
return rc;
Expand Down
105 changes: 7 additions & 98 deletions src/a2a3/platform/onboard/host/device_runner.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
#include "common/platform_config.h"
#include "common/unified_log.h"
#include "device_arena.h"
#include "device_runner_helpers.h" // common DeviceArgs + KernelArgsHelper
#include "host/function_cache.h"
#include "host/memory_allocator.h"
#include "host/l2_perf_collector.h"
Expand All @@ -54,106 +55,14 @@
#include "runtime.h"

/**
* DeviceArgs structure for AICPU device arguments.
* a2a3-only `KernelArgsHelper` extension: retrieve the FFTS base address via
* `rtGetC2cCtrlAddr` and store it in the wrapped `KernelArgs`. a5's
* `KernelArgs` has no `ffts_base_addr` field, so this helper lives in the
* arch-specific header rather than on the common `KernelArgsHelper` struct.
*
* Layout offsets are still nominally fixed by libaicpu_extend_kernels.so for
* aicpu_so_bin / aicpu_so_len (at offsets 96 / 104), but per-task AICPU
* launches go through rtsLaunchCpuKernel against the cached rtFuncHandle on
* LoadAicpuOp — none of our code reads these fields. The fields are kept
* (zero-initialized, never assigned) so the H2D struct layout matches the
* historical contract on both archs; an earlier "the H2D allocation pointed
* to by aicpu_so_bin is load-bearing on a5 onboard" finding no longer
* reproduces against current HEAD (post #864/#870), so the runner-side
* AicpuSoInfo allocation was removed.
* @return 0 on success, error code on failure.
*/
struct DeviceArgs {
uint64_t unused[12] = {0};
uint64_t aicpu_so_bin{0};
uint64_t aicpu_so_len{0};
};

/**
* Helper class for managing KernelArgs with device memory
*
* This class wraps KernelArgs and provides host-side initialization methods
* for allocating device memory and copying data to the device. It separates
* the concerns of device memory management (host-only) from the structure
* layout (shared with kernels).
*
* The helper provides implicit conversion to KernelArgs* for seamless use
* with runtime APIs.
*/
struct KernelArgsHelper {
KernelArgs args;
MemoryAllocator *allocator_{nullptr};
KernelArgs *device_k_args_{nullptr}; // Device copy of KernelArgs for AICore

/**
* Initialize device arguments by allocating device memory and copying data
*
* @param host_device_args Host-side device arguments to copy
* @param allocator Memory allocator to use
* @return 0 on success, error code on failure
*/
int init_device_args(const DeviceArgs &host_device_args, MemoryAllocator &allocator);

/**
* Free device memory allocated for device arguments
*
* @return 0 on success, error code on failure
*/
int finalize_device_args();

/**
* Initialize runtime arguments by allocating device memory and copying data
*
* @param host_runtime Host-side runtime to copy to device
* @param allocator Memory allocator to use
* @return 0 on success, error code on failure
*/
int init_runtime_args(const Runtime &host_runtime, MemoryAllocator &allocator);

/**
* Free device memory allocated for runtime arguments
*
* @return 0 on success, error code on failure
*/
int finalize_runtime_args();

/**
* Retrieve FFTS base address via rtGetC2cCtrlAddr and store in KernelArgs
*
* @return 0 on success, error code on failure
*/
int init_ffts_base_addr();

/**
* Copy KernelArgs to device memory for AICore kernel parameter passing
*
* Must be called after init_runtime_args and init_ffts_base_addr.
*
* @param allocator Memory allocator to use
* @return 0 on success, error code on failure
*/
int init_device_kernel_args(MemoryAllocator &allocator);

/**
* Free device memory allocated for KernelArgs copy
*
* @return 0 on success, error code on failure
*/
int finalize_device_kernel_args();

/**
* Implicit conversion operators for seamless use with runtime APIs
*
* These operators allow KernelArgsHelper to be used wherever KernelArgs*
* is expected, enabling transparent device memory management while
* maintaining API compatibility.
*/
operator KernelArgs *() { return &args; }
KernelArgs *operator&() { return &args; }
};
int kernel_args_init_ffts_base_addr(KernelArgsHelper &helper);

/**
* Device runner for kernel execution
Expand Down
7 changes: 7 additions & 0 deletions src/a5/platform/onboard/host/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/../../../../c
list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/../../../../common/device_comm")
list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/../../../../common/host")
list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/../../../../common/aicpu_dispatcher")
list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/../../../../common/platform/onboard/host")
if(DEFINED CUSTOM_INCLUDE_DIRS)
foreach(INC_DIR ${CUSTOM_INCLUDE_DIRS})
list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${INC_DIR}")
Expand All @@ -50,6 +51,12 @@ list(APPEND HOST_RUNTIME_SOURCES
list(APPEND HOST_RUNTIME_SOURCES
"${CMAKE_CURRENT_SOURCE_DIR}/../../../../common/host/load_aicpu_op.cpp"
)
# Add common/platform/onboard/host sources (shared between a2a3 / a5 onboard).
# Currently scaffolding only — real content moves here in follow-up PRs
# (see .docs/ONBOARD_HOST_COMMON_REFACTOR.md).
list(APPEND HOST_RUNTIME_SOURCES
"${CMAKE_CURRENT_SOURCE_DIR}/../../../../common/platform/onboard/host/device_runner_helpers.cpp"
)
if(DEFINED CUSTOM_SOURCE_DIRS)
foreach(SRC_DIR ${CUSTOM_SOURCE_DIRS})
file(GLOB_RECURSE DIR_SOURCES "${SRC_DIR}/*.cpp" "${SRC_DIR}/*.c")
Expand Down
98 changes: 0 additions & 98 deletions src/a5/platform/onboard/host/device_runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,104 +38,6 @@
#include "host/host_regs.h" // Register address retrieval
#include "host/raii_scope_guard.h"

// =============================================================================
// KernelArgsHelper Implementation
// =============================================================================

int KernelArgsHelper::init_device_args(const DeviceArgs &host_device_args, MemoryAllocator &allocator) {
allocator_ = &allocator;

// Allocate device memory for device_args
if (args.device_args == nullptr) {
uint64_t device_args_size = sizeof(DeviceArgs);
void *device_args_dev = allocator_->alloc(device_args_size);
if (device_args_dev == nullptr) {
LOG_ERROR("Alloc for device_args failed");
return -1;
}
args.device_args = reinterpret_cast<DeviceArgs *>(device_args_dev);
}
// Copy host_device_args to device memory via device_args
int rc =
rtMemcpy(args.device_args, sizeof(DeviceArgs), &host_device_args, sizeof(DeviceArgs), RT_MEMCPY_HOST_TO_DEVICE);
if (rc != 0) {
LOG_ERROR("rtMemcpy failed: %d", rc);
allocator_->free(args.device_args);
args.device_args = nullptr;
return rc;
}
return 0;
}

int KernelArgsHelper::finalize_device_args() {
if (args.device_args != nullptr && allocator_ != nullptr) {
int rc = allocator_->free(args.device_args);
args.device_args = nullptr;
return rc;
}
return 0;
}

int KernelArgsHelper::init_runtime_args(const Runtime &host_runtime, MemoryAllocator &allocator) {
allocator_ = &allocator;

if (args.runtime_args == nullptr) {
uint64_t runtime_size = sizeof(Runtime);
void *runtime_dev = allocator_->alloc(runtime_size);
if (runtime_dev == nullptr) {
LOG_ERROR("Alloc for runtime_args failed");
return -1;
}
args.runtime_args = reinterpret_cast<Runtime *>(runtime_dev);
}
int rc = rtMemcpy(args.runtime_args, sizeof(Runtime), &host_runtime, sizeof(Runtime), RT_MEMCPY_HOST_TO_DEVICE);
if (rc != 0) {
LOG_ERROR("rtMemcpy for runtime failed: %d", rc);
allocator_->free(args.runtime_args);
args.runtime_args = nullptr;
return rc;
}
return 0;
}

int KernelArgsHelper::finalize_runtime_args() {
if (args.runtime_args != nullptr && allocator_ != nullptr) {
int rc = allocator_->free(args.runtime_args);
args.runtime_args = nullptr;
return rc;
}
return 0;
}

int KernelArgsHelper::init_device_kernel_args(MemoryAllocator &allocator) {
allocator_ = &allocator;
if (device_k_args_ == nullptr) {
void *dev_ptr = allocator_->alloc(sizeof(KernelArgs));
if (dev_ptr == nullptr) {
LOG_ERROR("Alloc for device KernelArgs failed");
return -1;
}
device_k_args_ = reinterpret_cast<KernelArgs *>(dev_ptr);
}
int rc = rtMemcpy(device_k_args_, sizeof(KernelArgs), &args, sizeof(KernelArgs), RT_MEMCPY_HOST_TO_DEVICE);
if (rc != 0) {
LOG_ERROR("rtMemcpy for KernelArgs failed: %d", rc);
allocator_->free(device_k_args_);
device_k_args_ = nullptr;
return rc;
}
return 0;
}

int KernelArgsHelper::finalize_device_kernel_args() {
if (device_k_args_ != nullptr && allocator_ != nullptr) {
int rc = allocator_->free(device_k_args_);
device_k_args_ = nullptr;
return rc;
}
return 0;
}

// =============================================================================
// DeviceRunner Implementation
// =============================================================================
Expand Down
Loading