Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions src/mono/wasm/runtime/jiterpreter-trace-generator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3245,13 +3245,65 @@ function emit_simd_3(builder: WasmBuilder, ip: MintOpcodePtr, index: SimdIntrins
builder.appendU8(WasmOpcode.i32_eqz);
append_stloc_tail(builder, getArgU16(ip, 1), WasmOpcode.i32_store);
return true;
case SimdIntrinsic3.V128_I2_SHUFFLE:
case SimdIntrinsic3.V128_I4_SHUFFLE:
// FIXME: I8
// FIXME: Many uses of these shuffles have constant shuffle indices,
// which we could convert into bytes at compile time for vastly improved performance
return emit_shuffle(builder, ip, index === SimdIntrinsic3.V128_I2_SHUFFLE ? 8 : 4);
default:
return false;
}

return false;
}

// implement i16 and i32 shuffles on top of wasm's only shuffle opcode by expanding the
// element shuffle indices into byte indices
function emit_shuffle(builder: WasmBuilder, ip: MintOpcodePtr, elementCount: number): boolean {
const elementSize = 16 / elementCount;
mono_assert((elementSize === 2) || (elementSize === 4), "Unsupported shuffle element size");
builder.local("pLocals");
// Load vec
append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load);
// Load indices (in chars)
append_ldloc(builder, getArgU16(ip, 3), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load);
// There's no direct narrowing opcode for i32 -> i8, so we have to do two steps :(
if (elementCount === 4) {
// i32{lane0 ... lane3} -> i16{lane0 ... lane3, 0 ...}
builder.i52_const(0);
builder.appendSimd(WasmSimdOpcode.i64x2_splat);
builder.appendSimd(WasmSimdOpcode.i16x8_narrow_i32x4_u);
}
// Load a zero vector (narrow takes two vectors)
builder.i52_const(0);
builder.appendSimd(WasmSimdOpcode.i64x2_splat);
// i16{lane0 ... lane7} -> i8{lane0 ... lane7, 0 ...}
builder.appendSimd(WasmSimdOpcode.i8x16_narrow_i16x8_u);
// i8{0, 1, 2, 3 ...} -> i8{0, 0, 1, 1, 2, 2, 3, 3 ...}
builder.appendSimd(WasmSimdOpcode.v128_const);
for (let i = 0; i < elementCount; i++) {
for (let j = 0; j < elementSize; j++)
builder.appendU8(i);
}
builder.appendSimd(WasmSimdOpcode.i8x16_swizzle);
// multiply indices by 2 to scale from char indices to byte indices
builder.i32_const(elementCount === 4 ? 2 : 1);
builder.appendSimd(WasmSimdOpcode.i8x16_shl);
// now add 1 to the secondary lane of each char
builder.appendSimd(WasmSimdOpcode.v128_const);
for (let i = 0; i < elementCount; i++) {
for (let j = 0; j < elementSize; j++)
builder.appendU8(j);
}
// we can do a bitwise or since we know we previously multiplied all the lanes by 2
builder.appendSimd(WasmSimdOpcode.v128_or);
// we now have two vectors on the stack, the values and the byte indices
builder.appendSimd(WasmSimdOpcode.i8x16_swizzle);
append_simd_store(builder, ip);
return true;
}

function emit_simd_4(builder: WasmBuilder, ip: MintOpcodePtr, index: SimdIntrinsic4): boolean {
const simple = <WasmSimdOpcode>cwraps.mono_jiterp_get_simd_opcode(3, index);
if (simple) {
Expand Down