Skip to content

Commit 3fcb515

Browse files
committed
wsl/librocdxg: add more hsakmt api implementation
adapt to ROCm/rocm-systems#1912 Signed-off-by: Flora Cui <flora.cui@amd.com> Reviewed-by: Horatio Zhang <Hongkun.Zhang@amd.com> Reviewed-by: Longlong Yao <Longlong.Yao@amd.com>
1 parent e42e7d3 commit 3fcb515

File tree

7 files changed

+312
-5
lines changed

7 files changed

+312
-5
lines changed

include/hsakmt/hsakmt.h

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,23 @@ hsaKmtGetNodeProperties(
115115
HsaNodeProperties* NodeProperties //OUT
116116
);
117117

118+
/**
119+
Retrieves the wall clock frequency of a specific HSA node.
120+
121+
The returned frequency is in hertz (Hz), i.e., KHz * 1000.
122+
When possible, prefer using HsaNodeProperties.WallClockKHz from
123+
hsaKmtGetNodeProperties(), as this function is mainly for compatibility
124+
with clients that expect this API to exist.
125+
Not all implementations are required to support this API.
126+
*/
127+
128+
HSAKMT_STATUS
129+
HSAKMTAPI
130+
hsaKmtGetNodeWallclockFrequency(
131+
HSAuint32 NodeId, // IN
132+
uint64_t* Frequency // OUT (Hz)
133+
);
134+
118135
/**
119136
Retrieves the memory properties of a specific HSA node.
120137
the memory pointer passed as MemoryProperties is sized as
@@ -1267,6 +1284,59 @@ hsaKmtModelEnabled(
12671284
bool* enable // OUT
12681285
);
12691286

1287+
1288+
/**
1289+
* Experimental APIs to abstract DRM calls to thunk
1290+
*/
1291+
HSAKMT_STATUS
1292+
HSAKMTAPI
1293+
hsaKmtHandleImport(
1294+
const HsaExternalHandleDesc* ImportDesc,
1295+
HsaHandleImportResult* ImportResult,
1296+
HsaHandleImportFlags* Flags
1297+
);
1298+
1299+
HSAKMT_STATUS
1300+
HSAKMTAPI
1301+
hsaKmtMemoryVaMap(
1302+
HsaMemoryObjectHandle Handle,
1303+
HSAuint64 offset,
1304+
HSAuint64 size,
1305+
HSAuint64 addr,
1306+
HsaMemoryMapFlags flags
1307+
);
1308+
1309+
HSAKMT_STATUS
1310+
HSAKMTAPI
1311+
hsaKmtMemoryVaUnmap(
1312+
HsaMemoryObjectHandle Handle,
1313+
HSAuint64 offset,
1314+
HSAuint64 size,
1315+
HSAuint64 addr
1316+
);
1317+
1318+
HSAKMT_STATUS
1319+
HSAKMTAPI
1320+
hsaKmtMemoryCpuMap(
1321+
HsaMemoryObjectHandle Handle,
1322+
void** out_cpu_ptr
1323+
);
1324+
1325+
HSAKMT_STATUS
1326+
HSAKMTAPI
1327+
hsaKmtMemHandleFree(
1328+
HsaMemoryObjectHandle Handle
1329+
);
1330+
1331+
HSAKMT_STATUS
1332+
HSAKMTAPI
1333+
hsaKmtMemoryGetCpuAddr(
1334+
HsaAMDGPUDeviceHandle DeviceHandle,
1335+
HsaMemoryObjectHandle MemoryHandle,
1336+
HSAint32* fd, // OUT
1337+
HSAuint64* cpu_addr // OUT
1338+
);
1339+
12701340
#ifdef __cplusplus
12711341
} //extern "C"
12721342
#endif

include/hsakmt/hsakmttypes.h

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1510,6 +1510,7 @@ typedef union
15101510
struct
15111511
{
15121512
unsigned int requiresVAddr : 1; // Requires virtual address
1513+
unsigned int kmtHandle : 1; // Handle is a KMT handle
15131514
} ui32;
15141515
} HSA_REGISTER_MEM_FLAGS;
15151516

@@ -1520,6 +1521,49 @@ typedef enum _HsaAisFlags {
15201521
HSA_AIS_WRITE= 0x2
15211522
} HsaAisFlags;
15221523

1524+
/* memory object handle used for translating drm BO object*/
1525+
typedef struct _HsaMemoryObjectHandle* HsaMemoryObjectHandle;
1526+
1527+
/* Access Permissions for memory mapping */
1528+
typedef enum _HsaMemoryMapFlags {
1529+
HSA_MEMORY_ACCESS_NONE = 0,
1530+
HSA_MEMORY_ACCESS_RO = 1,
1531+
HSA_MEMORY_ACCESS_WO = 2,
1532+
HSA_MEMORY_ACCESS_RW = 3
1533+
} HsaMemoryMapFlags;
1534+
1535+
/* Handle type for import */
1536+
typedef enum _HsaExternalHandleType{
1537+
HSA_EXTERNAL_HANDLE_GEM_FLINK_NAME = 0,
1538+
HSA_EXTERNAL_HANDLE_KMS = 1,
1539+
HSA_EXTERNAL_HANDLE_DMA_BUF = 2
1540+
} HsaExternalHandleType;
1541+
1542+
typedef struct _HsaExternalHandleDesc {
1543+
HsaAMDGPUDeviceHandle device_handle; // GPU device handle (used for import only)
1544+
HSAint32 fd; // dmabuf fd
1545+
HsaExternalHandleType type; // handle type
1546+
HSAuint32 metadata; // Used for IPC handles
1547+
} HsaExternalHandleDesc;
1548+
1549+
typedef struct _HsaHandleImportResult {
1550+
HsaMemoryObjectHandle buf_handle; // Thunk buffer object handle
1551+
HSAuint64 alloc_size; // allocation size for import
1552+
HSAuint32 metadata; // Used for IPC handles
1553+
} HsaHandleImportResult;
1554+
1555+
typedef struct _HsaMemoryExportResult {
1556+
HSAint32 fd; // dmabuf fd
1557+
} HsaMemoryExportResult;
1558+
1559+
typedef struct _HsaHandleImportFlags {
1560+
struct {
1561+
unsigned int IPCHandle : 1; // Handle type is IPC
1562+
unsigned int SysMem : 1; // Memory type is System Memory
1563+
unsigned int UpdateMetadata : 1; // Update metadata with IPC handle
1564+
unsigned int Reserved : 29;
1565+
} ui32;
1566+
} HsaHandleImportFlags;
15231567

15241568
#ifdef __cplusplus
15251569
} //extern "C"

src/libdrm.cpp

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -76,32 +76,45 @@ HSAKMTAPI int amdgpu_query_gpu_info(amdgpu_device_handle dev,
7676
return 0;
7777
}
7878

79-
HSAKMTAPI int amdgpu_device_get_fd(amdgpu_device_handle dev) {
79+
int amdgpu_device_get_fd_impl(amdgpu_device_handle dev) {
8080
return dxg_runtime->dxg_fd;
8181
}
8282

83-
HSAKMTAPI int amdgpu_bo_cpu_map(amdgpu_bo_handle bo, void **cpu) {
83+
HSAKMTAPI int amdgpu_device_get_fd(amdgpu_device_handle dev) {
84+
return amdgpu_device_get_fd_impl(dev);
85+
}
86+
87+
int amdgpu_bo_cpu_map_impl(amdgpu_bo_handle bo, void **cpu) {
8488
wsl::thunk::GpuMemory *gpu_mem = reinterpret_cast<wsl::thunk::GpuMemory *>(bo);
8589
if (gpu_mem->IsSysMemFd())
8690
*cpu = gpu_mem->CpuAddress();
8791
return 0;
8892
}
8993

90-
HSAKMTAPI int amdgpu_bo_free(amdgpu_bo_handle buf_handle) {
94+
HSAKMTAPI int amdgpu_bo_cpu_map(amdgpu_bo_handle bo, void **cpu) {
95+
return amdgpu_bo_cpu_map_impl(bo, cpu);
96+
}
97+
98+
int amdgpu_bo_free_impl(amdgpu_bo_handle buf_handle) {
9199
wsl::thunk::GpuMemory *gpu_mem = reinterpret_cast<wsl::thunk::GpuMemory *>(buf_handle);
92100
void *MemoryAddress = gpu_mem->IsVaAllocated() ? (void*)gpu_mem->GpuAddress() : (void*)gpu_mem->HandleApeAddress();
93101
auto ret = hsaKmtFreeMemory((void*)MemoryAddress, gpu_mem->Size());
94102
return ret == HSAKMT_STATUS_SUCCESS ? 0 : -1;
95103
}
96104

105+
HSAKMTAPI int amdgpu_bo_free(amdgpu_bo_handle buf_handle) {
106+
return amdgpu_bo_free_impl(buf_handle);
107+
}
108+
109+
97110
HSAKMTAPI int amdgpu_bo_export(amdgpu_bo_handle bo,
98111
enum amdgpu_bo_handle_type type,
99112
uint32_t *shared_handle) {
100113
*shared_handle = 0;
101114
return 0;
102115
}
103116

104-
HSAKMTAPI int amdgpu_bo_import(amdgpu_device_handle dev,
117+
int amdgpu_bo_import_impl(amdgpu_device_handle dev,
105118
enum amdgpu_bo_handle_type type,
106119
uint32_t shared_handle,
107120
struct amdgpu_bo_import_result *output) {
@@ -121,13 +134,21 @@ HSAKMTAPI int amdgpu_bo_import(amdgpu_device_handle dev,
121134
if (ret == HSAKMT_STATUS_SUCCESS) {
122135
//use GpuMemory object handle as drm buf handle
123136
output->buf_handle = reinterpret_cast<amdgpu_bo_handle>(mem_handle);
137+
output->alloc_size = wsl::thunk::GpuMemory::Convert(mem_handle)->ClientSize();
124138
return 0;
125139
} else {
126140
return -1;
127141
}
128142
}
129143

130-
HSAKMTAPI int amdgpu_bo_va_op(amdgpu_bo_handle bo,
144+
HSAKMTAPI int amdgpu_bo_import(amdgpu_device_handle dev,
145+
enum amdgpu_bo_handle_type type,
146+
uint32_t shared_handle,
147+
struct amdgpu_bo_import_result *output) {
148+
return amdgpu_bo_import_impl(dev, type, shared_handle, output);
149+
}
150+
151+
int amdgpu_bo_va_op_impl(amdgpu_bo_handle bo,
131152
uint64_t offset,
132153
uint64_t size,
133154
uint64_t addr,
@@ -168,6 +189,15 @@ HSAKMTAPI int amdgpu_bo_va_op(amdgpu_bo_handle bo,
168189
return 0;
169190
}
170191

192+
HSAKMTAPI int amdgpu_bo_va_op(amdgpu_bo_handle bo,
193+
uint64_t offset,
194+
uint64_t size,
195+
uint64_t addr,
196+
uint64_t flags,
197+
uint32_t ops) {
198+
return amdgpu_bo_va_op_impl(bo, offset, size, addr, flags, ops);
199+
}
200+
171201
HSAKMTAPI int amdgpu_bo_query_info(amdgpu_bo_handle bo, struct amdgpu_bo_info* info) {
172202
return 0;
173203
}

src/librocdxg.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,4 +286,23 @@ HSAKMT_STATUS import_dmabuf_fd(int DMABufFd,
286286
wsl::thunk::GpuMemoryHandle *GpuMemHandle);
287287

288288
bool hsakmt_hsa_loader_init();
289+
290+
int amdgpu_device_get_fd_impl(amdgpu_device_handle dev);
291+
292+
int amdgpu_bo_cpu_map_impl(amdgpu_bo_handle bo, void **cpu);
293+
294+
int amdgpu_bo_free_impl(amdgpu_bo_handle buf_handle);
295+
296+
int amdgpu_bo_import_impl(amdgpu_device_handle dev,
297+
enum amdgpu_bo_handle_type type,
298+
uint32_t shared_handle,
299+
struct amdgpu_bo_import_result *output);
300+
301+
int amdgpu_bo_va_op_impl(amdgpu_bo_handle bo,
302+
uint64_t offset,
303+
uint64_t size,
304+
uint64_t addr,
305+
uint64_t flags,
306+
uint32_t ops);
307+
289308
#endif

src/librocdxg.ver

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ hsaKmtGetVersion;
77
hsaKmtAcquireSystemProperties;
88
hsaKmtReleaseSystemProperties;
99
hsaKmtGetNodeProperties;
10+
hsaKmtGetNodeWallclockFrequency;
1011
hsaKmtGetNodeMemoryProperties;
1112
hsaKmtGetNodeCacheProperties;
1213
hsaKmtGetNodeIoLinkProperties;
@@ -95,6 +96,12 @@ hsaKmtPcSamplingSupport;
9596
hsaKmtAisReadWriteFile;
9697
hsaKmtModelEnabled;
9798
hsaKmtQueueRingDoorbell;
99+
hsaKmtHandleImport;
100+
hsaKmtMemoryVaMap;
101+
hsaKmtMemoryVaUnmap;
102+
hsaKmtMemHandleFree;
103+
hsaKmtMemoryGetCpuAddr;
104+
hsaKmtMemoryCpuMap;
98105
amdgpu_device_initialize;
99106
amdgpu_device_deinitialize;
100107
amdgpu_query_gpu_info;

0 commit comments

Comments
 (0)