Skip to content

Commit 687603f

Browse files
committed
Merge tag 'kvm-x86-vmx-6.20' of https://github.com/kvm-x86/linux into HEAD
KVM VMX changes for 6.20 - Fix an SGX bug where KVM would incorrectly try to handle EPCM #PFs by always relecting EPCM #PFs back into the guest. KVM doesn't shadow EPCM entries, and so EPCM violations cannot be due to KVM interference, and can't be resolved by KVM. - Fix a bug where KVM would register its posted interrupt wakeup handler even if loading kvm-intel.ko ultimately failed. - Disallow access to vmcb12 fields that aren't fully supported, mostly to avoid weirdness and complexity for FRED and other features, where KVM wants enable VMCS shadowing for fields that conditionally exist. - Print out the "bad" offsets and values if kvm-intel.ko refuses to load (or refuses to online a CPU) due to a VMCS config mismatch.
2 parents a0c468e + c0d6b8b commit 687603f

File tree

7 files changed

+171
-41
lines changed

7 files changed

+171
-41
lines changed

arch/x86/kvm/vmx/hyperv_evmcs.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
#include "hyperv_evmcs.h"
88

99
#define EVMCS1_OFFSET(x) offsetof(struct hv_enlightened_vmcs, x)
10-
#define EVMCS1_FIELD(number, name, clean_field)[ROL16(number, 6)] = \
10+
#define EVMCS1_FIELD(number, name, clean_field)[ENC_TO_VMCS12_IDX(number)] = \
1111
{EVMCS1_OFFSET(name), clean_field}
1212

1313
const struct evmcs_field vmcs_field_to_evmcs_1[] = {

arch/x86/kvm/vmx/hyperv_evmcs.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ static __always_inline int evmcs_field_offset(unsigned long field,
130130
u16 *clean_field)
131131
{
132132
const struct evmcs_field *evmcs_field;
133-
unsigned int index = ROL16(field, 6);
133+
unsigned int index = ENC_TO_VMCS12_IDX(field);
134134

135135
if (unlikely(index >= nr_evmcs_1_fields))
136136
return -ENOENT;

arch/x86/kvm/vmx/nested.c

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,9 @@ static void init_vmcs_shadow_fields(void)
8686
pr_err("Missing field from shadow_read_only_field %x\n",
8787
field + 1);
8888

89+
if (get_vmcs12_field_offset(field) < 0)
90+
continue;
91+
8992
clear_bit(field, vmx_vmread_bitmap);
9093
if (field & 1)
9194
#ifdef CONFIG_X86_64
@@ -111,10 +114,14 @@ static void init_vmcs_shadow_fields(void)
111114
field <= GUEST_TR_AR_BYTES,
112115
"Update vmcs12_write_any() to drop reserved bits from AR_BYTES");
113116

117+
if (get_vmcs12_field_offset(field) < 0)
118+
continue;
119+
114120
/*
115-
* PML and the preemption timer can be emulated, but the
116-
* processor cannot vmwrite to fields that don't exist
117-
* on bare metal.
121+
* KVM emulates PML and the VMX preemption timer irrespective
122+
* of hardware support, but shadowing their related VMCS fields
123+
* requires hardware support as the CPU will reject VMWRITEs to
124+
* fields that don't exist.
118125
*/
119126
switch (field) {
120127
case GUEST_PML_INDEX:
@@ -125,10 +132,6 @@ static void init_vmcs_shadow_fields(void)
125132
if (!cpu_has_vmx_preemption_timer())
126133
continue;
127134
break;
128-
case GUEST_INTR_STATUS:
129-
if (!cpu_has_vmx_apicv())
130-
continue;
131-
break;
132135
default:
133136
break;
134137
}
@@ -7074,12 +7077,6 @@ void nested_vmx_set_vmcs_shadowing_bitmap(void)
70747077
}
70757078
}
70767079

7077-
/*
7078-
* Indexing into the vmcs12 uses the VMCS encoding rotated left by 6. Undo
7079-
* that madness to get the encoding for comparison.
7080-
*/
7081-
#define VMCS12_IDX_TO_ENC(idx) ((u16)(((u16)(idx) >> 6) | ((u16)(idx) << 10)))
7082-
70837080
static u64 nested_vmx_calc_vmcs_enum_msr(void)
70847081
{
70857082
/*
@@ -7407,6 +7404,14 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
74077404
{
74087405
int i;
74097406

7407+
/*
7408+
* Note! The set of supported vmcs12 fields is consumed by both VMX
7409+
* MSR and shadow VMCS setup.
7410+
*/
7411+
nested_vmx_setup_vmcs12_fields();
7412+
7413+
nested_vmx_setup_ctls_msrs(&vmcs_config, vmx_capability.ept);
7414+
74107415
if (!cpu_has_vmx_shadow_vmcs())
74117416
enable_shadow_vmcs = 0;
74127417
if (enable_shadow_vmcs) {

arch/x86/kvm/vmx/vmcs.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,16 @@
1111

1212
#include "capabilities.h"
1313

14+
/*
15+
* Indexing into the vmcs12 uses the VMCS encoding rotated left by 6 as a very
16+
* rudimentary compression of the range of indices. The compression ratio is
17+
* good enough to allow KVM to use a (very sparsely populated) array without
18+
* wasting too much memory, while the "algorithm" is fast enough to be used to
19+
* lookup vmcs12 fields on-demand, e.g. for emulation.
20+
*/
1421
#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n)))))
22+
#define VMCS12_IDX_TO_ENC(idx) ROL16(idx, 10)
23+
#define ENC_TO_VMCS12_IDX(enc) ROL16(enc, 6)
1524

1625
struct vmcs_hdr {
1726
u32 revision_id:31;

arch/x86/kvm/vmx/vmcs12.c

Lines changed: 70 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@
44
#include "vmcs12.h"
55

66
#define VMCS12_OFFSET(x) offsetof(struct vmcs12, x)
7-
#define FIELD(number, name) [ROL16(number, 6)] = VMCS12_OFFSET(name)
7+
#define FIELD(number, name) [ENC_TO_VMCS12_IDX(number)] = VMCS12_OFFSET(name)
88
#define FIELD64(number, name) \
99
FIELD(number, name), \
10-
[ROL16(number##_HIGH, 6)] = VMCS12_OFFSET(name) + sizeof(u32)
10+
[ENC_TO_VMCS12_IDX(number##_HIGH)] = VMCS12_OFFSET(name) + sizeof(u32)
1111

12-
const unsigned short vmcs12_field_offsets[] = {
12+
static const u16 kvm_supported_vmcs12_field_offsets[] __initconst = {
1313
FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id),
1414
FIELD(POSTED_INTR_NV, posted_intr_nv),
1515
FIELD(GUEST_ES_SELECTOR, guest_es_selector),
@@ -158,4 +158,70 @@ const unsigned short vmcs12_field_offsets[] = {
158158
FIELD(HOST_SSP, host_ssp),
159159
FIELD(HOST_INTR_SSP_TABLE, host_ssp_tbl),
160160
};
161-
const unsigned int nr_vmcs12_fields = ARRAY_SIZE(vmcs12_field_offsets);
161+
162+
u16 vmcs12_field_offsets[ARRAY_SIZE(kvm_supported_vmcs12_field_offsets)] __ro_after_init;
163+
unsigned int nr_vmcs12_fields __ro_after_init;
164+
165+
#define VMCS12_CASE64(enc) case enc##_HIGH: case enc
166+
167+
static __init bool cpu_has_vmcs12_field(unsigned int idx)
168+
{
169+
switch (VMCS12_IDX_TO_ENC(idx)) {
170+
case VIRTUAL_PROCESSOR_ID:
171+
return cpu_has_vmx_vpid();
172+
case POSTED_INTR_NV:
173+
return cpu_has_vmx_posted_intr();
174+
VMCS12_CASE64(TSC_MULTIPLIER):
175+
return cpu_has_vmx_tsc_scaling();
176+
case TPR_THRESHOLD:
177+
VMCS12_CASE64(VIRTUAL_APIC_PAGE_ADDR):
178+
return cpu_has_vmx_tpr_shadow();
179+
VMCS12_CASE64(APIC_ACCESS_ADDR):
180+
return cpu_has_vmx_virtualize_apic_accesses();
181+
VMCS12_CASE64(POSTED_INTR_DESC_ADDR):
182+
return cpu_has_vmx_posted_intr();
183+
case GUEST_INTR_STATUS:
184+
return cpu_has_vmx_virtual_intr_delivery();
185+
VMCS12_CASE64(VM_FUNCTION_CONTROL):
186+
VMCS12_CASE64(EPTP_LIST_ADDRESS):
187+
return cpu_has_vmx_vmfunc();
188+
VMCS12_CASE64(EPT_POINTER):
189+
return cpu_has_vmx_ept();
190+
VMCS12_CASE64(XSS_EXIT_BITMAP):
191+
return cpu_has_vmx_xsaves();
192+
VMCS12_CASE64(ENCLS_EXITING_BITMAP):
193+
return cpu_has_vmx_encls_vmexit();
194+
VMCS12_CASE64(GUEST_IA32_PERF_GLOBAL_CTRL):
195+
VMCS12_CASE64(HOST_IA32_PERF_GLOBAL_CTRL):
196+
return cpu_has_load_perf_global_ctrl();
197+
case SECONDARY_VM_EXEC_CONTROL:
198+
return cpu_has_secondary_exec_ctrls();
199+
case GUEST_S_CET:
200+
case GUEST_SSP:
201+
case GUEST_INTR_SSP_TABLE:
202+
case HOST_S_CET:
203+
case HOST_SSP:
204+
case HOST_INTR_SSP_TABLE:
205+
return cpu_has_load_cet_ctrl();
206+
207+
/* KVM always emulates PML and the VMX preemption timer in software. */
208+
case GUEST_PML_INDEX:
209+
case VMX_PREEMPTION_TIMER_VALUE:
210+
default:
211+
return true;
212+
}
213+
}
214+
215+
void __init nested_vmx_setup_vmcs12_fields(void)
216+
{
217+
unsigned int i;
218+
219+
for (i = 0; i < ARRAY_SIZE(kvm_supported_vmcs12_field_offsets); i++) {
220+
if (!kvm_supported_vmcs12_field_offsets[i] ||
221+
!cpu_has_vmcs12_field(i))
222+
continue;
223+
224+
vmcs12_field_offsets[i] = kvm_supported_vmcs12_field_offsets[i];
225+
nr_vmcs12_fields = i + 1;
226+
}
227+
}

arch/x86/kvm/vmx/vmcs12.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -374,8 +374,10 @@ static inline void vmx_check_vmcs12_offsets(void)
374374
CHECK_OFFSET(guest_pml_index, 996);
375375
}
376376

377-
extern const unsigned short vmcs12_field_offsets[];
378-
extern const unsigned int nr_vmcs12_fields;
377+
extern u16 vmcs12_field_offsets[] __ro_after_init;
378+
extern unsigned int nr_vmcs12_fields __ro_after_init;
379+
380+
void __init nested_vmx_setup_vmcs12_fields(void);
379381

380382
static inline short get_vmcs12_field_offset(unsigned long field)
381383
{
@@ -385,7 +387,7 @@ static inline short get_vmcs12_field_offset(unsigned long field)
385387
if (field >> 15)
386388
return -ENOENT;
387389

388-
index = ROL16(field, 6);
390+
index = ENC_TO_VMCS12_IDX(field);
389391
if (index >= nr_vmcs12_fields)
390392
return -ENOENT;
391393

arch/x86/kvm/vmx/vmx.c

Lines changed: 67 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2921,8 +2921,23 @@ int vmx_check_processor_compat(void)
29212921
}
29222922
if (nested)
29232923
nested_vmx_setup_ctls_msrs(&vmcs_conf, vmx_cap.ept);
2924+
29242925
if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config))) {
2925-
pr_err("Inconsistent VMCS config on CPU %d\n", cpu);
2926+
u32 *gold = (void *)&vmcs_config;
2927+
u32 *mine = (void *)&vmcs_conf;
2928+
int i;
2929+
2930+
BUILD_BUG_ON(sizeof(struct vmcs_config) % sizeof(u32));
2931+
2932+
pr_err("VMCS config on CPU %d doesn't match reference config:", cpu);
2933+
for (i = 0; i < sizeof(struct vmcs_config) / sizeof(u32); i++) {
2934+
if (gold[i] == mine[i])
2935+
continue;
2936+
2937+
pr_cont("\n Offset %u REF = 0x%08x, CPU%u = 0x%08x, mismatch = 0x%08x",
2938+
i * (int)sizeof(u32), gold[i], cpu, mine[i], gold[i] ^ mine[i]);
2939+
}
2940+
pr_cont("\n");
29262941
return -EIO;
29272942
}
29282943
return 0;
@@ -5303,12 +5318,53 @@ static bool is_xfd_nm_fault(struct kvm_vcpu *vcpu)
53035318
!kvm_is_cr0_bit_set(vcpu, X86_CR0_TS);
53045319
}
53055320

5321+
static int vmx_handle_page_fault(struct kvm_vcpu *vcpu, u32 error_code)
5322+
{
5323+
unsigned long cr2 = vmx_get_exit_qual(vcpu);
5324+
5325+
if (vcpu->arch.apf.host_apf_flags)
5326+
goto handle_pf;
5327+
5328+
/* When using EPT, KVM intercepts #PF only to detect illegal GPAs. */
5329+
WARN_ON_ONCE(enable_ept && !allow_smaller_maxphyaddr);
5330+
5331+
/*
5332+
* On SGX2 hardware, EPCM violations are delivered as #PF with the SGX
5333+
* flag set in the error code (SGX1 hardware generates #GP(0)). EPCM
5334+
* violations have nothing to do with shadow paging and can never be
5335+
* resolved by KVM; always reflect them into the guest.
5336+
*/
5337+
if (error_code & PFERR_SGX_MASK) {
5338+
WARN_ON_ONCE(!IS_ENABLED(CONFIG_X86_SGX_KVM) ||
5339+
!cpu_feature_enabled(X86_FEATURE_SGX2));
5340+
5341+
if (guest_cpu_cap_has(vcpu, X86_FEATURE_SGX2))
5342+
kvm_fixup_and_inject_pf_error(vcpu, cr2, error_code);
5343+
else
5344+
kvm_inject_gp(vcpu, 0);
5345+
return 1;
5346+
}
5347+
5348+
/*
5349+
* If EPT is enabled, fixup and inject the #PF. KVM intercepts #PFs
5350+
* only to set PFERR_RSVD as appropriate (hardware won't set RSVD due
5351+
* to the GPA being legal with respect to host.MAXPHYADDR).
5352+
*/
5353+
if (enable_ept) {
5354+
kvm_fixup_and_inject_pf_error(vcpu, cr2, error_code);
5355+
return 1;
5356+
}
5357+
5358+
handle_pf:
5359+
return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0);
5360+
}
5361+
53065362
static int handle_exception_nmi(struct kvm_vcpu *vcpu)
53075363
{
53085364
struct vcpu_vmx *vmx = to_vmx(vcpu);
53095365
struct kvm_run *kvm_run = vcpu->run;
53105366
u32 intr_info, ex_no, error_code;
5311-
unsigned long cr2, dr6;
5367+
unsigned long dr6;
53125368
u32 vect_info;
53135369

53145370
vect_info = vmx->idt_vectoring_info;
@@ -5383,19 +5439,8 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
53835439
return 0;
53845440
}
53855441

5386-
if (is_page_fault(intr_info)) {
5387-
cr2 = vmx_get_exit_qual(vcpu);
5388-
if (enable_ept && !vcpu->arch.apf.host_apf_flags) {
5389-
/*
5390-
* EPT will cause page fault only if we need to
5391-
* detect illegal GPAs.
5392-
*/
5393-
WARN_ON_ONCE(!allow_smaller_maxphyaddr);
5394-
kvm_fixup_and_inject_pf_error(vcpu, cr2, error_code);
5395-
return 1;
5396-
} else
5397-
return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0);
5398-
}
5442+
if (is_page_fault(intr_info))
5443+
return vmx_handle_page_fault(vcpu, error_code);
53995444

54005445
ex_no = intr_info & INTR_INFO_VECTOR_MASK;
54015446

@@ -8672,16 +8717,14 @@ __init int vmx_hardware_setup(void)
86728717
* can hide/show features based on kvm_cpu_cap_has().
86738718
*/
86748719
if (nested) {
8675-
nested_vmx_setup_ctls_msrs(&vmcs_config, vmx_capability.ept);
8676-
86778720
r = nested_vmx_hardware_setup(kvm_vmx_exit_handlers);
86788721
if (r)
86798722
return r;
86808723
}
86818724

86828725
r = alloc_kvm_area();
8683-
if (r && nested)
8684-
nested_vmx_hardware_unsetup();
8726+
if (r)
8727+
goto err_kvm_area;
86858728

86868729
kvm_set_posted_intr_wakeup_handler(pi_wakeup_handler);
86878730

@@ -8708,6 +8751,11 @@ __init int vmx_hardware_setup(void)
87088751

87098752
kvm_caps.inapplicable_quirks &= ~KVM_X86_QUIRK_IGNORE_GUEST_PAT;
87108753

8754+
return 0;
8755+
8756+
err_kvm_area:
8757+
if (nested)
8758+
nested_vmx_hardware_unsetup();
87118759
return r;
87128760
}
87138761

0 commit comments

Comments
 (0)