Skip to content

Commit 4cb21be

Browse files
bonzinisean-jc
authored andcommitted
KVM: x86: Add AVX support to the emulator's register fetch and writeback
Prepare struct operand for hosting AVX registers. Remove the existing, incomplete code that placed the Avx flag in the operand alignment field, and repurpose the name for a separate bit that indicates: - after decode, whether an instruction supports the VEX prefix; - before writeback, that the instruction did have the VEX prefix and therefore 1) it can have op_bytes == 32; 2) t should clear high bytes of XMM registers. Right now the bit will never be set and the patch has no intended functional change. However, this is actually more vexing than the decoder changes itself, and therefore worth separating. Co-developed-by: Keith Busch <kbusch@kernel.org> Signed-off-by: Keith Busch <kbusch@kernel.org> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Link: https://patch.msgid.link/20251114003633.60689-8-pbonzini@redhat.com [sean: guard ymm[8-15] accesses with #ifdef CONFIG_X86_64] Signed-off-by: Sean Christopherson <seanjc@google.com>
1 parent f106797 commit 4cb21be

File tree

3 files changed

+114
-17
lines changed

3 files changed

+114
-17
lines changed

arch/x86/kvm/emulate.c

Lines changed: 44 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@
141141
#define No64 (1<<28) /* Instruction generates #UD in 64-bit mode */
142142
#define PageTable (1 << 29) /* instruction used to write page table */
143143
#define NotImpl (1 << 30) /* instruction is not implemented */
144+
#define Avx ((u64)1 << 31) /* Instruction uses VEX prefix */
144145
#define Src2Shift (32) /* Source 2 operand type at bits 32-36 */
145146
#define Src2None (OpNone << Src2Shift)
146147
#define Src2Mem (OpMem << Src2Shift)
@@ -157,12 +158,11 @@
157158
#define Src2Mask (OpMask << Src2Shift)
158159
/* free: 37-39 */
159160
#define Mmx ((u64)1 << 40) /* MMX Vector instruction */
160-
#define AlignMask ((u64)7 << 41) /* Memory alignment requirement at bits 41-43 */
161+
#define AlignMask ((u64)3 << 41) /* Memory alignment requirement at bits 41-42 */
161162
#define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */
162163
#define Unaligned ((u64)2 << 41) /* Explicitly unaligned (e.g. MOVDQU) */
163-
#define Avx ((u64)3 << 41) /* Advanced Vector Extensions */
164-
#define Aligned16 ((u64)4 << 41) /* Aligned to 16 byte boundary (e.g. FXSAVE) */
165-
/* free: 44 */
164+
#define Aligned16 ((u64)3 << 41) /* Aligned to 16 byte boundary (e.g. FXSAVE) */
165+
/* free: 43-44 */
166166
#define NoWrite ((u64)1 << 45) /* No writeback */
167167
#define SrcWrite ((u64)1 << 46) /* Write back src operand */
168168
#define NoMod ((u64)1 << 47) /* Mod field is ignored */
@@ -618,7 +618,6 @@ static unsigned insn_alignment(struct x86_emulate_ctxt *ctxt, unsigned size)
618618

619619
switch (alignment) {
620620
case Unaligned:
621-
case Avx:
622621
return 1;
623622
case Aligned16:
624623
return 16;
@@ -1075,7 +1074,14 @@ static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
10751074
static void __decode_register_operand(struct x86_emulate_ctxt *ctxt,
10761075
struct operand *op, int reg)
10771076
{
1078-
if (ctxt->d & Sse) {
1077+
if ((ctxt->d & Avx) && ctxt->op_bytes == 32) {
1078+
op->type = OP_YMM;
1079+
op->bytes = 32;
1080+
op->addr.xmm = reg;
1081+
kvm_read_avx_reg(reg, &op->vec_val2);
1082+
return;
1083+
}
1084+
if (ctxt->d & (Avx|Sse)) {
10791085
op->type = OP_XMM;
10801086
op->bytes = 16;
10811087
op->addr.xmm = reg;
@@ -1767,7 +1773,15 @@ static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
17671773
op->data,
17681774
op->bytes * op->count);
17691775
case OP_XMM:
1770-
kvm_write_sse_reg(op->addr.xmm, &op->vec_val);
1776+
if (!(ctxt->d & Avx)) {
1777+
kvm_write_sse_reg(op->addr.xmm, &op->vec_val);
1778+
break;
1779+
}
1780+
/* full YMM write but with high bytes cleared */
1781+
memset(op->valptr + 16, 0, 16);
1782+
fallthrough;
1783+
case OP_YMM:
1784+
kvm_write_avx_reg(op->addr.xmm, &op->vec_val2);
17711785
break;
17721786
case OP_MM:
17731787
kvm_write_mmx_reg(op->addr.mm, &op->mm_val);
@@ -4861,9 +4875,8 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
48614875
ctxt->op_bytes = 8; /* REX.W */
48624876

48634877
/* Opcode byte(s). */
4864-
opcode = opcode_table[ctxt->b];
4865-
/* Two-byte opcode? */
48664878
if (ctxt->b == 0x0f) {
4879+
/* Two- or three-byte opcode */
48674880
ctxt->opcode_len = 2;
48684881
ctxt->b = insn_fetch(u8, ctxt);
48694882
opcode = twobyte_table[ctxt->b];
@@ -4874,6 +4887,9 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
48744887
ctxt->b = insn_fetch(u8, ctxt);
48754888
opcode = opcode_map_0f_38[ctxt->b];
48764889
}
4890+
} else {
4891+
/* Opcode byte(s). */
4892+
opcode = opcode_table[ctxt->b];
48774893
}
48784894
ctxt->d = opcode.flags;
48794895

@@ -5022,7 +5038,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
50225038
ctxt->op_bytes = 4;
50235039

50245040
if (ctxt->d & Sse)
5025-
ctxt->op_bytes = 16;
5041+
ctxt->op_bytes = 16, ctxt->d &= ~Avx;
50265042
else if (ctxt->d & Mmx)
50275043
ctxt->op_bytes = 8;
50285044
}
@@ -5154,20 +5170,34 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, bool check_intercepts)
51545170
}
51555171

51565172
if (unlikely(ctxt->d &
5157-
(No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) {
5173+
(No64|Undefined|Avx|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) {
51585174
if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
51595175
(ctxt->d & Undefined)) {
51605176
rc = emulate_ud(ctxt);
51615177
goto done;
51625178
}
51635179

5164-
if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)))
5165-
|| ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
5180+
if ((ctxt->d & (Avx|Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM))) {
51665181
rc = emulate_ud(ctxt);
51675182
goto done;
51685183
}
51695184

5170-
if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
5185+
if (ctxt->d & Avx) {
5186+
u64 xcr = 0;
5187+
if (!(ops->get_cr(ctxt, 4) & X86_CR4_OSXSAVE)
5188+
|| ops->get_xcr(ctxt, 0, &xcr)
5189+
|| !(xcr & XFEATURE_MASK_YMM)) {
5190+
rc = emulate_ud(ctxt);
5191+
goto done;
5192+
}
5193+
} else if (ctxt->d & Sse) {
5194+
if (!(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR)) {
5195+
rc = emulate_ud(ctxt);
5196+
goto done;
5197+
}
5198+
}
5199+
5200+
if ((ctxt->d & (Avx|Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
51715201
rc = emulate_nm(ctxt);
51725202
goto done;
51735203
}

arch/x86/kvm/fpu.h

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,58 @@ typedef u32 __attribute__((vector_size(16))) sse128_t;
1515
#define sse128_l3(x) ({ __sse128_u t; t.vec = x; t.as_u32[3]; })
1616
#define sse128(lo, hi) ({ __sse128_u t; t.as_u64[0] = lo; t.as_u64[1] = hi; t.vec; })
1717

18+
typedef u32 __attribute__((vector_size(32))) avx256_t;
19+
20+
static inline void _kvm_read_avx_reg(int reg, avx256_t *data)
21+
{
22+
switch (reg) {
23+
case 0: asm("vmovdqa %%ymm0, %0" : "=m"(*data)); break;
24+
case 1: asm("vmovdqa %%ymm1, %0" : "=m"(*data)); break;
25+
case 2: asm("vmovdqa %%ymm2, %0" : "=m"(*data)); break;
26+
case 3: asm("vmovdqa %%ymm3, %0" : "=m"(*data)); break;
27+
case 4: asm("vmovdqa %%ymm4, %0" : "=m"(*data)); break;
28+
case 5: asm("vmovdqa %%ymm5, %0" : "=m"(*data)); break;
29+
case 6: asm("vmovdqa %%ymm6, %0" : "=m"(*data)); break;
30+
case 7: asm("vmovdqa %%ymm7, %0" : "=m"(*data)); break;
31+
#ifdef CONFIG_X86_64
32+
case 8: asm("vmovdqa %%ymm8, %0" : "=m"(*data)); break;
33+
case 9: asm("vmovdqa %%ymm9, %0" : "=m"(*data)); break;
34+
case 10: asm("vmovdqa %%ymm10, %0" : "=m"(*data)); break;
35+
case 11: asm("vmovdqa %%ymm11, %0" : "=m"(*data)); break;
36+
case 12: asm("vmovdqa %%ymm12, %0" : "=m"(*data)); break;
37+
case 13: asm("vmovdqa %%ymm13, %0" : "=m"(*data)); break;
38+
case 14: asm("vmovdqa %%ymm14, %0" : "=m"(*data)); break;
39+
case 15: asm("vmovdqa %%ymm15, %0" : "=m"(*data)); break;
40+
#endif
41+
default: BUG();
42+
}
43+
}
44+
45+
static inline void _kvm_write_avx_reg(int reg, const avx256_t *data)
46+
{
47+
switch (reg) {
48+
case 0: asm("vmovdqa %0, %%ymm0" : : "m"(*data)); break;
49+
case 1: asm("vmovdqa %0, %%ymm1" : : "m"(*data)); break;
50+
case 2: asm("vmovdqa %0, %%ymm2" : : "m"(*data)); break;
51+
case 3: asm("vmovdqa %0, %%ymm3" : : "m"(*data)); break;
52+
case 4: asm("vmovdqa %0, %%ymm4" : : "m"(*data)); break;
53+
case 5: asm("vmovdqa %0, %%ymm5" : : "m"(*data)); break;
54+
case 6: asm("vmovdqa %0, %%ymm6" : : "m"(*data)); break;
55+
case 7: asm("vmovdqa %0, %%ymm7" : : "m"(*data)); break;
56+
#ifdef CONFIG_X86_64
57+
case 8: asm("vmovdqa %0, %%ymm8" : : "m"(*data)); break;
58+
case 9: asm("vmovdqa %0, %%ymm9" : : "m"(*data)); break;
59+
case 10: asm("vmovdqa %0, %%ymm10" : : "m"(*data)); break;
60+
case 11: asm("vmovdqa %0, %%ymm11" : : "m"(*data)); break;
61+
case 12: asm("vmovdqa %0, %%ymm12" : : "m"(*data)); break;
62+
case 13: asm("vmovdqa %0, %%ymm13" : : "m"(*data)); break;
63+
case 14: asm("vmovdqa %0, %%ymm14" : : "m"(*data)); break;
64+
case 15: asm("vmovdqa %0, %%ymm15" : : "m"(*data)); break;
65+
#endif
66+
default: BUG();
67+
}
68+
}
69+
1870
static inline void _kvm_read_sse_reg(int reg, sse128_t *data)
1971
{
2072
switch (reg) {
@@ -109,6 +161,20 @@ static inline void kvm_fpu_put(void)
109161
fpregs_unlock();
110162
}
111163

164+
static inline void kvm_read_avx_reg(int reg, avx256_t *data)
165+
{
166+
kvm_fpu_get();
167+
_kvm_read_avx_reg(reg, data);
168+
kvm_fpu_put();
169+
}
170+
171+
static inline void kvm_write_avx_reg(int reg, const avx256_t *data)
172+
{
173+
kvm_fpu_get();
174+
_kvm_write_avx_reg(reg, data);
175+
kvm_fpu_put();
176+
}
177+
112178
static inline void kvm_read_sse_reg(int reg, sse128_t *data)
113179
{
114180
kvm_fpu_get();

arch/x86/kvm/kvm_emulate.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ struct x86_emulate_ops {
249249

250250
/* Type, address-of, and value of an instruction's operand. */
251251
struct operand {
252-
enum { OP_REG, OP_MEM, OP_MEM_STR, OP_IMM, OP_XMM, OP_MM, OP_NONE } type;
252+
enum { OP_REG, OP_MEM, OP_MEM_STR, OP_IMM, OP_XMM, OP_YMM, OP_MM, OP_NONE } type;
253253
unsigned int bytes;
254254
unsigned int count;
255255
union {
@@ -268,11 +268,12 @@ struct operand {
268268
union {
269269
unsigned long val;
270270
u64 val64;
271-
char valptr[sizeof(sse128_t)];
271+
char valptr[sizeof(avx256_t)];
272272
sse128_t vec_val;
273+
avx256_t vec_val2;
273274
u64 mm_val;
274275
void *data;
275-
};
276+
} __aligned(32);
276277
};
277278

278279
#define X86_MAX_INSTRUCTION_LENGTH 15

0 commit comments

Comments
 (0)