Skip to content

Commit 0de4a0e

Browse files
committed
Merge tag 'kvm-x86-fixes-6.19-rc8' of https://github.com/kvm-x86/linux into HEAD
Final KVM fixes for 6.19: - Fix a bug where AVIC is incorrectly inhibited when running with x2AVIC disabled via module param (or on a system without x2AVIC). - Fix a dangling device posted IRQs bug by explicitly checking if the irqfd is still active (on the list) when handling an eventfd signal, instead of zeroing the irqfd's routing information when the irqfd is deassigned. Zeroing the irqfd's routing info causes arm64 and x86's to not disable posting for the IRQ (kvm_arch_irq_bypass_del_producer() looks for an MSI), incorrectly leaving the IRQ in posted mode (and leading to use-after-free and memory leaks on AMD in particular). This is both the most pressing and scariest, but it's been in -next for a while. - Disable FORTIFY_SOURCE for KVM selftests to prevent the compiler from generating calls to the checked versions of memset() and friends, which leads to unexpected page faults in guest code due e.g. __memset_chk@plt not being resolved. - Explicitly configure the support XSS from within {svm,vmx}_set_cpu_caps() to fix a bug where VMX will compute the reference VMCS configuration with SHSTK and IBT enabled, but then compute each CPUs local config with SHSTK and IBT disabled if not all CET xfeatures are enabled, e.g. if the kernel is built with X86_KERNEL_IBT=n. The mismatch in features results in differing nVMX setting, and ultimately causes kvm-intel.ko to refuse to load with nested=1.
2 parents e89f0e9 + f8ade83 commit 0de4a0e

File tree

8 files changed

+52
-36
lines changed

8 files changed

+52
-36
lines changed

arch/x86/kvm/irq.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -514,7 +514,8 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
514514
*/
515515
spin_lock_irq(&kvm->irqfds.lock);
516516

517-
if (irqfd->irq_entry.type == KVM_IRQ_ROUTING_MSI) {
517+
if (irqfd->irq_entry.type == KVM_IRQ_ROUTING_MSI ||
518+
WARN_ON_ONCE(irqfd->irq_bypass_vcpu)) {
518519
ret = kvm_pi_update_irte(irqfd, NULL);
519520
if (ret)
520521
pr_info("irq bypass consumer (eventfd %p) unregistration fails: %d\n",

arch/x86/kvm/svm/avic.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,7 @@ void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb)
376376

377377
static int avic_init_backing_page(struct kvm_vcpu *vcpu)
378378
{
379+
u32 max_id = x2avic_enabled ? x2avic_max_physical_id : AVIC_MAX_PHYSICAL_ID;
379380
struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
380381
struct vcpu_svm *svm = to_svm(vcpu);
381382
u32 id = vcpu->vcpu_id;
@@ -388,8 +389,7 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
388389
* avic_vcpu_load() expects to be called if and only if the vCPU has
389390
* fully initialized AVIC.
390391
*/
391-
if ((!x2avic_enabled && id > AVIC_MAX_PHYSICAL_ID) ||
392-
(id > x2avic_max_physical_id)) {
392+
if (id > max_id) {
393393
kvm_set_apicv_inhibit(vcpu->kvm, APICV_INHIBIT_REASON_PHYSICAL_ID_TOO_BIG);
394394
vcpu->arch.apic->apicv_active = false;
395395
return 0;

arch/x86/kvm/svm/svm.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5284,6 +5284,8 @@ static __init void svm_set_cpu_caps(void)
52845284
*/
52855285
kvm_cpu_cap_clear(X86_FEATURE_BUS_LOCK_DETECT);
52865286
kvm_cpu_cap_clear(X86_FEATURE_MSR_IMM);
5287+
5288+
kvm_setup_xss_caps();
52875289
}
52885290

52895291
static __init int svm_hardware_setup(void)

arch/x86/kvm/vmx/vmx.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8051,6 +8051,8 @@ static __init void vmx_set_cpu_caps(void)
80518051
kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
80528052
kvm_cpu_cap_clear(X86_FEATURE_IBT);
80538053
}
8054+
8055+
kvm_setup_xss_caps();
80548056
}
80558057

80568058
static bool vmx_is_io_intercepted(struct kvm_vcpu *vcpu,

arch/x86/kvm/x86.c

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9953,6 +9953,23 @@ static struct notifier_block pvclock_gtod_notifier = {
99539953
};
99549954
#endif
99559955

9956+
void kvm_setup_xss_caps(void)
9957+
{
9958+
if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
9959+
kvm_caps.supported_xss = 0;
9960+
9961+
if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) &&
9962+
!kvm_cpu_cap_has(X86_FEATURE_IBT))
9963+
kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL;
9964+
9965+
if ((kvm_caps.supported_xss & XFEATURE_MASK_CET_ALL) != XFEATURE_MASK_CET_ALL) {
9966+
kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
9967+
kvm_cpu_cap_clear(X86_FEATURE_IBT);
9968+
kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL;
9969+
}
9970+
}
9971+
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_setup_xss_caps);
9972+
99569973
static inline void kvm_ops_update(struct kvm_x86_init_ops *ops)
99579974
{
99589975
memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops));
@@ -10125,19 +10142,6 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
1012510142
if (!tdp_enabled)
1012610143
kvm_caps.supported_quirks &= ~KVM_X86_QUIRK_IGNORE_GUEST_PAT;
1012710144

10128-
if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
10129-
kvm_caps.supported_xss = 0;
10130-
10131-
if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) &&
10132-
!kvm_cpu_cap_has(X86_FEATURE_IBT))
10133-
kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL;
10134-
10135-
if ((kvm_caps.supported_xss & XFEATURE_MASK_CET_ALL) != XFEATURE_MASK_CET_ALL) {
10136-
kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
10137-
kvm_cpu_cap_clear(X86_FEATURE_IBT);
10138-
kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL;
10139-
}
10140-
1014110145
if (kvm_caps.has_tsc_control) {
1014210146
/*
1014310147
* Make sure the user can only configure tsc_khz values that

arch/x86/kvm/x86.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -471,6 +471,8 @@ extern struct kvm_host_values kvm_host;
471471

472472
extern bool enable_pmu;
473473

474+
void kvm_setup_xss_caps(void);
475+
474476
/*
475477
* Get a filtered version of KVM's supported XCR0 that strips out dynamic
476478
* features for which the current process doesn't (yet) have permission to use.

tools/testing/selftests/kvm/Makefile.kvm

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,7 @@ LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include
251251
LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
252252
CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
253253
-Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \
254+
-U_FORTIFY_SOURCE \
254255
-fno-builtin-memcmp -fno-builtin-memcpy \
255256
-fno-builtin-memset -fno-builtin-strnlen \
256257
-fno-stack-protector -fno-PIE -fno-strict-aliasing \

virt/kvm/eventfd.c

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -157,21 +157,28 @@ irqfd_shutdown(struct work_struct *work)
157157
}
158158

159159

160-
/* assumes kvm->irqfds.lock is held */
161-
static bool
162-
irqfd_is_active(struct kvm_kernel_irqfd *irqfd)
160+
static bool irqfd_is_active(struct kvm_kernel_irqfd *irqfd)
163161
{
162+
/*
163+
* Assert that either irqfds.lock or SRCU is held, as irqfds.lock must
164+
* be held to prevent false positives (on the irqfd being active), and
165+
* while false negatives are impossible as irqfds are never added back
166+
* to the list once they're deactivated, the caller must at least hold
167+
* SRCU to guard against routing changes if the irqfd is deactivated.
168+
*/
169+
lockdep_assert_once(lockdep_is_held(&irqfd->kvm->irqfds.lock) ||
170+
srcu_read_lock_held(&irqfd->kvm->irq_srcu));
171+
164172
return list_empty(&irqfd->list) ? false : true;
165173
}
166174

167175
/*
168176
* Mark the irqfd as inactive and schedule it for removal
169-
*
170-
* assumes kvm->irqfds.lock is held
171177
*/
172-
static void
173-
irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
178+
static void irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
174179
{
180+
lockdep_assert_held(&irqfd->kvm->irqfds.lock);
181+
175182
BUG_ON(!irqfd_is_active(irqfd));
176183

177184
list_del_init(&irqfd->list);
@@ -217,8 +224,15 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
217224
seq = read_seqcount_begin(&irqfd->irq_entry_sc);
218225
irq = irqfd->irq_entry;
219226
} while (read_seqcount_retry(&irqfd->irq_entry_sc, seq));
220-
/* An event has been signaled, inject an interrupt */
221-
if (kvm_arch_set_irq_inatomic(&irq, kvm,
227+
228+
/*
229+
* An event has been signaled, inject an interrupt unless the
230+
* irqfd is being deassigned (isn't active), in which case the
231+
* routing information may be stale (once the irqfd is removed
232+
* from the list, it will stop receiving routing updates).
233+
*/
234+
if (unlikely(!irqfd_is_active(irqfd)) ||
235+
kvm_arch_set_irq_inatomic(&irq, kvm,
222236
KVM_USERSPACE_IRQ_SOURCE_ID, 1,
223237
false) == -EWOULDBLOCK)
224238
schedule_work(&irqfd->inject);
@@ -585,18 +599,8 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
585599
spin_lock_irq(&kvm->irqfds.lock);
586600

587601
list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
588-
if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) {
589-
/*
590-
* This clearing of irq_entry.type is needed for when
591-
* another thread calls kvm_irq_routing_update before
592-
* we flush workqueue below (we synchronize with
593-
* kvm_irq_routing_update using irqfds.lock).
594-
*/
595-
write_seqcount_begin(&irqfd->irq_entry_sc);
596-
irqfd->irq_entry.type = 0;
597-
write_seqcount_end(&irqfd->irq_entry_sc);
602+
if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi)
598603
irqfd_deactivate(irqfd);
599-
}
600604
}
601605

602606
spin_unlock_irq(&kvm->irqfds.lock);

0 commit comments

Comments
 (0)