Skip to content
This repository was archived by the owner on Sep 29, 2025. It is now read-only.

Commit f6b49f2

Browse files
authored
Merge pull request #1615 from davidgfnet/memx64
Implement fast memory ops on x64
2 parents a6b773a + f4c90e7 commit f6b49f2

File tree

7 files changed

+389
-240
lines changed

7 files changed

+389
-240
lines changed

core/hw/sh4/dyna/blockmanager.cpp

Lines changed: 20 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -34,53 +34,7 @@ bm_List all_blocks;
3434
bm_List del_blocks;
3535
#include <set>
3636

37-
struct BlockMapCMP
38-
{
39-
static bool is_code(RuntimeBlockInfo* blk)
40-
{
41-
if ((unat)((u8*)blk-CodeCache)<CODE_SIZE)
42-
return true;
43-
else
44-
return false;
45-
}
46-
47-
static unat get_blkstart(RuntimeBlockInfo* blk)
48-
{
49-
if (is_code(blk))
50-
return (unat)blk;
51-
else
52-
return (unat)blk->code;
53-
}
54-
55-
static unat get_blkend(RuntimeBlockInfo* blk)
56-
{
57-
if (is_code(blk))
58-
return (unat)blk;
59-
else
60-
return (unat)blk->code+blk->host_code_size-1;
61-
}
62-
63-
//return true if blkl > blkr
64-
bool operator()(RuntimeBlockInfo* blkl, RuntimeBlockInfo* blkr) const
65-
{
66-
if (!is_code(blkl) && !is_code(blkr))
67-
return (unat)blkl->code<(unat)blkr->code;
68-
69-
unat blkr_start=get_blkstart(blkr),blkl_end=get_blkend(blkl);
70-
71-
if (blkl_end<blkr_start)
72-
{
73-
return true;
74-
}
75-
else
76-
{
77-
return false;
78-
}
79-
}
80-
};
81-
82-
typedef std::set<RuntimeBlockInfo*,BlockMapCMP> blkmap_t;
83-
blkmap_t blkmap;
37+
std::map<void*, RuntimeBlockInfo*> blkmap;
8438
u32 bm_gc_luc,bm_gcf_luc;
8539

8640

@@ -114,18 +68,20 @@ RuntimeBlockInfo* DYNACALL bm_GetBlock(u32 addr)
11468
// This takes a RX address and returns the info block ptr (RW space)
11569
RuntimeBlockInfo* bm_GetBlock(void* dynarec_code)
11670
{
71+
if (blkmap.empty())
72+
return 0;
73+
11774
void *dynarecrw = CC_RX2RW(dynarec_code);
118-
blkmap_t::iterator iter = blkmap.find((RuntimeBlockInfo*)dynarecrw);
119-
if (iter != blkmap.end())
120-
{
121-
verify((*iter)->contains_code((u8*)dynarecrw));
122-
return *iter;
123-
}
124-
else
125-
{
126-
printf("bm_GetBlock(%p) failed ..\n", dynarec_code);
75+
// Returns a block who's code addr is bigger than dynarec_code (or end)
76+
auto iter = blkmap.upper_bound(dynarecrw);
77+
iter--; // Need to go back to find the potential candidate
78+
79+
// However it might be out of bounds, check for that
80+
if ((char*)iter->second->code + iter->second->host_code_size < dynarec_code)
12781
return 0;
128-
}
82+
83+
verify(iter->second->contains_code((u8*)dynarecrw));
84+
return iter->second;
12985
}
13086

13187
// Takes RX pointer and returns a RW pointer
@@ -143,13 +99,13 @@ RuntimeBlockInfo* bm_GetStaleBlock(void* dynarec_code)
14399

144100
void bm_AddBlock(RuntimeBlockInfo* blk)
145101
{
146-
all_blocks.push_back(blk);
147-
if (blkmap.find(blk)!=blkmap.end())
148-
{
149-
printf("DUP: %08X %p %08X %p\n", (*blkmap.find(blk))->addr,(*blkmap.find(blk))->code,blk->addr,blk->code);
102+
auto iter = blkmap.find((void*)blk->code);
103+
if (iter != blkmap.end()) {
104+
printf("DUP: %08X %p %08X %p\n", iter->second->addr, iter->second->code, blk->addr, blk->code);
150105
verify(false);
151106
}
152-
blkmap.insert(blk);
107+
blkmap[(void*)blk->code] = blk;
108+
all_blocks.push_back(blk);
153109

154110
verify((void*)bm_GetCode(blk->addr)==(void*)ngen_FailedToFindBlock);
155111
FPCA(blk->addr) = (DynarecCodeEntryPtr)CC_RW2RX(blk->code);
@@ -304,6 +260,7 @@ void bm_Periodical_1s()
304260
#endif
305261
}
306262

263+
#if 0
307264
void constprop(RuntimeBlockInfo* blk);
308265
void bm_Rebuild()
309266
{
@@ -343,6 +300,7 @@ void bm_Rebuild()
343300

344301
rebuild_counter=30;
345302
}
303+
#endif
346304

347305
void bm_vmem_pagefill(void** ptr, u32 size_bytes)
348306
{

core/hw/sh4/dyna/blockmanager.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,14 @@ struct RuntimeBlockInfo: RuntimeBlockInfo_Core
7272

7373
u32 memops;
7474
u32 linkedmemops;
75-
std::map<void*, u32> memory_accesses; // key is host pc when access is made, value is opcode id
75+
// key is a host pc for a load/store in a jit block
76+
// value contains information for mem-op rewriting purposes.
77+
struct memop_info {
78+
uint16_t opid;
79+
uint8_t rewrite_offset;
80+
uint8_t emitted_bytes;
81+
};
82+
std::map<void*, memop_info> memory_accesses;
7683
};
7784

7885
struct CachedBlockInfo: RuntimeBlockInfo_Core

core/linux/common.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,10 @@ void fault_handler (int sn, siginfo_t * si, void *segfault_ctx)
8282
context_to_segfault(&ctx, segfault_ctx);
8383
}
8484
#elif HOST_CPU == CPU_X64
85-
//x64 has no rewrite support
85+
else if (dyna_cde && ngen_Rewrite((unat&)ctx.pc, 0, 0))
86+
{
87+
context_to_segfault(&ctx, segfault_ctx);
88+
}
8689
#elif HOST_CPU == CPU_ARM64
8790
else if (dyna_cde && ngen_Rewrite(ctx.pc, 0, 0))
8891
{

core/rec-ARM64/rec_arm64.cpp

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -229,9 +229,6 @@ class Arm64Assembler : public MacroAssembler
229229
typedef void (MacroAssembler::*Arm64Op_RROF)(const Register&, const Register&, const Operand&, enum FlagsUpdate);
230230

231231
public:
232-
Arm64Assembler() : Arm64Assembler(emit_GetCCPtr())
233-
{
234-
}
235232
Arm64Assembler(void *buffer) : MacroAssembler((u8 *)buffer, 64 * 1024), regalloc(this)
236233
{
237234
call_regs.push_back(&w0);
@@ -1148,15 +1145,13 @@ class Arm64Assembler : public MacroAssembler
11481145
if (!_nvmem_enabled())
11491146
return false;
11501147

1151-
Instruction *start_instruction = GetCursorAddress<Instruction *>();
1152-
11531148
// WARNING: the rewrite code relies on having two ops before the memory access
11541149
// Update ngen_Rewrite (and perhaps read_memory_rewrite_size) if adding or removing code
1150+
Instruction *start_instruction = GetCursorAddress<Instruction *>();
11551151
Add(w1, *call_regs[0], sizeof(Sh4Context), LeaveFlags);
11561152
Bfc(w1, 29, 3); // addr &= ~0xE0000000
1157-
1158-
//printf("direct read memory access opid %d pc %p code addr %08x\n", opid, GetCursorAddress<void *>(), this->block->addr);
1159-
this->block->memory_accesses[GetCursorAddress<void *>()] = (u32)opid;
1153+
Instruction *mem_instruction = GetCursorAddress<Instruction *>();
1154+
unsigned prologue_size = (char*)mem_instruction - (char*)start_instruction;
11601155

11611156
u32 size = op.flags & 0x7f;
11621157
switch(size)
@@ -1192,7 +1187,10 @@ class Arm64Assembler : public MacroAssembler
11921187
Str(x1, sh4_context_mem_operand(op.rd.reg_ptr()));
11931188
#endif
11941189
}
1190+
unsigned code_size = (unsigned)(GetCursorAddress<char*>() - (char*)start_instruction);
11951191
EnsureCodeSize(start_instruction, read_memory_rewrite_size);
1192+
verify(code_size < 256 && prologue_size < 256);
1193+
this->block->memory_accesses[mem_instruction] = { (uint16_t)opid, (uint8_t)prologue_size, (uint8_t)code_size };
11961194

11971195
return true;
11981196
}
@@ -1228,15 +1226,13 @@ class Arm64Assembler : public MacroAssembler
12281226
if (!_nvmem_enabled())
12291227
return false;
12301228

1231-
Instruction *start_instruction = GetCursorAddress<Instruction *>();
1232-
12331229
// WARNING: the rewrite code relies on having two ops before the memory access
12341230
// Update ngen_Rewrite (and perhaps write_memory_rewrite_size) if adding or removing code
1231+
Instruction *start_instruction = GetCursorAddress<Instruction *>();
12351232
Add(w7, *call_regs[0], sizeof(Sh4Context), LeaveFlags);
12361233
Bfc(w7, 29, 3); // addr &= ~0xE0000000
1237-
1238-
//printf("direct write memory access opid %d pc %p code addr %08x\n", opid, GetCursorAddress<void *>(), this->block->addr);
1239-
this->block->memory_accesses[GetCursorAddress<void *>()] = (u32)opid;
1234+
Instruction *mem_instruction = GetCursorAddress<Instruction *>();
1235+
unsigned prologue_size = (char*)mem_instruction - (char*)start_instruction;
12401236

12411237
u32 size = op.flags & 0x7f;
12421238
switch(size)
@@ -1257,7 +1253,11 @@ class Arm64Assembler : public MacroAssembler
12571253
Str(x1, MemOperand(x28, x7));
12581254
break;
12591255
}
1256+
1257+
unsigned code_size = (unsigned)(GetCursorAddress<char*>() - (char*)start_instruction);
12601258
EnsureCodeSize(start_instruction, write_memory_rewrite_size);
1259+
verify(code_size < 256 && prologue_size < 256);
1260+
this->block->memory_accesses[mem_instruction] = { (uint16_t)opid, (uint8_t)prologue_size, (uint8_t)code_size };
12611261

12621262
return true;
12631263
}
@@ -1411,7 +1411,7 @@ void ngen_Compile(RuntimeBlockInfo* block, SmcCheckEnum smc_checks, bool reset,
14111411
{
14121412
verify(emit_FreeSpace() >= 16 * 1024);
14131413

1414-
compiler = new Arm64Assembler();
1414+
compiler = new Arm64Assembler(emit_GetCCPtr());
14151415

14161416
compiler->ngen_Compile(block, smc_checks, reset, staging, optimise);
14171417

@@ -1456,7 +1456,7 @@ bool ngen_Rewrite(unat& host_pc, unat, unat)
14561456
printf("ngen_Rewrite: memory access at %p not found (%lu entries)\n", code_ptr, block->memory_accesses.size());
14571457
return false;
14581458
}
1459-
u32 opid = it->second;
1459+
u32 opid = it->second.opid;
14601460
verify(opid < block->oplist.size());
14611461
const shil_opcode& op = block->oplist[opid];
14621462
Arm64Assembler *assembler = new Arm64Assembler(code_ptr - 2); // Skip the 2 preceding ops (bic, add)

0 commit comments

Comments
 (0)