Skip to content

Commit f58e70c

Browse files
committed
KVM/arm64 updates for 6.19 - Support for userspace handling of synchronous external aborts (SEAs), allowing the VMM to potentially handle the abort in a non-fatal manner. - Large rework of the VGIC's list register handling with the goal of supporting more active/pending IRQs than available list registers in hardware. In addition, the VGIC now supports EOImode==1 style deactivations for IRQs which may occur on a separate vCPU than the one that acked the IRQ. - Support for FEAT_XNX (user / privileged execute permissions) and FEAT_HAF (hardware update to the Access Flag) in the software page table walkers and shadow MMU. - Allow page table destruction to reschedule, fixing long need_resched latencies observed when destroying a large VM. - Minor fixes to KVM and selftests
2 parents 63a9b0b + 3eef0c8 commit f58e70c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+2575
-531
lines changed

Documentation/virt/kvm/api.rst

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7286,6 +7286,41 @@ exit, even without calls to ``KVM_ENABLE_CAP`` or similar. In this case,
72867286
it will enter with output fields already valid; in the common case, the
72877287
``unknown.ret`` field of the union will be ``TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED``.
72887288
Userspace need not do anything if it does not wish to support a TDVMCALL.
7289+
7290+
::
7291+
7292+
/* KVM_EXIT_ARM_SEA */
7293+
struct {
7294+
#define KVM_EXIT_ARM_SEA_FLAG_GPA_VALID (1ULL << 0)
7295+
__u64 flags;
7296+
__u64 esr;
7297+
__u64 gva;
7298+
__u64 gpa;
7299+
} arm_sea;
7300+
7301+
Used on arm64 systems. When the VM capability ``KVM_CAP_ARM_SEA_TO_USER`` is
7302+
enabled, a KVM exits to userspace if a guest access causes a synchronous
7303+
external abort (SEA) and the host APEI fails to handle the SEA.
7304+
7305+
``esr`` is set to a sanitized value of ESR_EL2 from the exception taken to KVM,
7306+
consisting of the following fields:
7307+
7308+
- ``ESR_EL2.EC``
7309+
- ``ESR_EL2.IL``
7310+
- ``ESR_EL2.FnV``
7311+
- ``ESR_EL2.EA``
7312+
- ``ESR_EL2.CM``
7313+
- ``ESR_EL2.WNR``
7314+
- ``ESR_EL2.FSC``
7315+
- ``ESR_EL2.SET`` (when FEAT_RAS is implemented for the VM)
7316+
7317+
``gva`` is set to the value of FAR_EL2 from the exception taken to KVM when
7318+
``ESR_EL2.FnV == 0``. Otherwise, the value of ``gva`` is unknown.
7319+
7320+
``gpa`` is set to the faulting IPA from the exception taken to KVM when
7321+
the ``KVM_EXIT_ARM_SEA_FLAG_GPA_VALID`` flag is set. Otherwise, the value of
7322+
``gpa`` is unknown.
7323+
72897324
::
72907325

72917326
/* Fix the size of the union. */
@@ -8703,6 +8738,18 @@ This capability indicate to the userspace whether a PFNMAP memory region
87038738
can be safely mapped as cacheable. This relies on the presence of
87048739
force write back (FWB) feature support on the hardware.
87058740

8741+
7.45 KVM_CAP_ARM_SEA_TO_USER
8742+
----------------------------
8743+
8744+
:Architecture: arm64
8745+
:Target: VM
8746+
:Parameters: none
8747+
:Returns: 0 on success, -EINVAL if unsupported.
8748+
8749+
When this capability is enabled, KVM may exit to userspace for SEAs taken to
8750+
EL2 resulting from a guest access. See ``KVM_EXIT_ARM_SEA`` for more
8751+
information.
8752+
87068753
8. Other capabilities.
87078754
======================
87088755

arch/arm64/include/asm/kvm_arm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@
111111
#define TCR_EL2_DS (1UL << 32)
112112
#define TCR_EL2_RES1 ((1U << 31) | (1 << 23))
113113
#define TCR_EL2_HPD (1 << 24)
114+
#define TCR_EL2_HA (1 << 21)
114115
#define TCR_EL2_TBI (1 << 20)
115116
#define TCR_EL2_PS_SHIFT 16
116117
#define TCR_EL2_PS_MASK (7 << TCR_EL2_PS_SHIFT)

arch/arm64/include/asm/kvm_asm.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ enum __kvm_host_smccc_func {
7979
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_range,
8080
__KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context,
8181
__KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
82-
__KVM_HOST_SMCCC_FUNC___vgic_v3_save_vmcr_aprs,
82+
__KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs,
8383
__KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs,
8484
__KVM_HOST_SMCCC_FUNC___pkvm_reserve_vm,
8585
__KVM_HOST_SMCCC_FUNC___pkvm_unreserve_vm,
@@ -246,9 +246,9 @@ extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
246246
extern int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding);
247247

248248
extern void __kvm_timer_set_cntvoff(u64 cntvoff);
249-
extern void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
250-
extern void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
251-
extern void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
249+
extern int __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
250+
extern int __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
251+
extern int __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
252252

253253
extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
254254

arch/arm64/include/asm/kvm_host.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
#define KVM_REQ_NESTED_S2_UNMAP KVM_ARCH_REQ(8)
5555
#define KVM_REQ_GUEST_HYP_IRQ_PENDING KVM_ARCH_REQ(9)
5656
#define KVM_REQ_MAP_L1_VNCR_EL2 KVM_ARCH_REQ(10)
57+
#define KVM_REQ_VGIC_PROCESS_UPDATE KVM_ARCH_REQ(11)
5758

5859
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
5960
KVM_DIRTY_LOG_INITIALLY_SET)
@@ -350,6 +351,8 @@ struct kvm_arch {
350351
#define KVM_ARCH_FLAG_GUEST_HAS_SVE 9
351352
/* MIDR_EL1, REVIDR_EL1, and AIDR_EL1 are writable from userspace */
352353
#define KVM_ARCH_FLAG_WRITABLE_IMP_ID_REGS 10
354+
/* Unhandled SEAs are taken to userspace */
355+
#define KVM_ARCH_FLAG_EXIT_SEA 11
353356
unsigned long flags;
354357

355358
/* VM-wide vCPU feature set */

arch/arm64/include/asm/kvm_hyp.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,12 +77,13 @@ DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
7777
int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu);
7878

7979
u64 __gic_v3_get_lr(unsigned int lr);
80+
void __gic_v3_set_lr(u64 val, int lr);
8081

8182
void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if);
8283
void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if);
8384
void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if);
8485
void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if);
85-
void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if);
86+
void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if);
8687
void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if);
8788
int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu);
8889

arch/arm64/include/asm/kvm_nested.h

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,9 +120,42 @@ static inline bool kvm_s2_trans_writable(struct kvm_s2_trans *trans)
120120
return trans->writable;
121121
}
122122

123-
static inline bool kvm_s2_trans_executable(struct kvm_s2_trans *trans)
123+
static inline bool kvm_has_xnx(struct kvm *kvm)
124124
{
125-
return !(trans->desc & BIT(54));
125+
return cpus_have_final_cap(ARM64_HAS_XNX) &&
126+
kvm_has_feat(kvm, ID_AA64MMFR1_EL1, XNX, IMP);
127+
}
128+
129+
static inline bool kvm_s2_trans_exec_el0(struct kvm *kvm, struct kvm_s2_trans *trans)
130+
{
131+
u8 xn = FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, trans->desc);
132+
133+
if (!kvm_has_xnx(kvm))
134+
xn &= FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, 0b10);
135+
136+
switch (xn) {
137+
case 0b00:
138+
case 0b01:
139+
return true;
140+
default:
141+
return false;
142+
}
143+
}
144+
145+
static inline bool kvm_s2_trans_exec_el1(struct kvm *kvm, struct kvm_s2_trans *trans)
146+
{
147+
u8 xn = FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, trans->desc);
148+
149+
if (!kvm_has_xnx(kvm))
150+
xn &= FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, 0b10);
151+
152+
switch (xn) {
153+
case 0b00:
154+
case 0b11:
155+
return true;
156+
default:
157+
return false;
158+
}
126159
}
127160

128161
extern int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
@@ -320,6 +353,7 @@ struct s1_walk_info {
320353
bool be;
321354
bool s2;
322355
bool pa52bit;
356+
bool ha;
323357
};
324358

325359
struct s1_walk_result {
@@ -370,4 +404,6 @@ void kvm_handle_s1e2_tlbi(struct kvm_vcpu *vcpu, u32 inst, u64 val);
370404
(FIX_VNCR - __c); \
371405
})
372406

407+
int __kvm_at_swap_desc(struct kvm *kvm, gpa_t ipa, u64 old, u64 new);
408+
373409
#endif /* __ARM64_KVM_NESTED_H */

arch/arm64/include/asm/kvm_pgtable.h

Lines changed: 42 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ typedef u64 kvm_pte_t;
8989

9090
#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54)
9191

92-
#define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54)
92+
#define KVM_PTE_LEAF_ATTR_HI_S2_XN GENMASK(54, 53)
9393

9494
#define KVM_PTE_LEAF_ATTR_HI_S1_GP BIT(50)
9595

@@ -240,7 +240,9 @@ enum kvm_pgtable_stage2_flags {
240240

241241
/**
242242
* enum kvm_pgtable_prot - Page-table permissions and attributes.
243-
* @KVM_PGTABLE_PROT_X: Execute permission.
243+
* @KVM_PGTABLE_PROT_UX: Unprivileged execute permission.
244+
* @KVM_PGTABLE_PROT_PX: Privileged execute permission.
245+
* @KVM_PGTABLE_PROT_X: Privileged and unprivileged execute permission.
244246
* @KVM_PGTABLE_PROT_W: Write permission.
245247
* @KVM_PGTABLE_PROT_R: Read permission.
246248
* @KVM_PGTABLE_PROT_DEVICE: Device attributes.
@@ -251,12 +253,15 @@ enum kvm_pgtable_stage2_flags {
251253
* @KVM_PGTABLE_PROT_SW3: Software bit 3.
252254
*/
253255
enum kvm_pgtable_prot {
254-
KVM_PGTABLE_PROT_X = BIT(0),
255-
KVM_PGTABLE_PROT_W = BIT(1),
256-
KVM_PGTABLE_PROT_R = BIT(2),
256+
KVM_PGTABLE_PROT_PX = BIT(0),
257+
KVM_PGTABLE_PROT_UX = BIT(1),
258+
KVM_PGTABLE_PROT_X = KVM_PGTABLE_PROT_PX |
259+
KVM_PGTABLE_PROT_UX,
260+
KVM_PGTABLE_PROT_W = BIT(2),
261+
KVM_PGTABLE_PROT_R = BIT(3),
257262

258-
KVM_PGTABLE_PROT_DEVICE = BIT(3),
259-
KVM_PGTABLE_PROT_NORMAL_NC = BIT(4),
263+
KVM_PGTABLE_PROT_DEVICE = BIT(4),
264+
KVM_PGTABLE_PROT_NORMAL_NC = BIT(5),
260265

261266
KVM_PGTABLE_PROT_SW0 = BIT(55),
262267
KVM_PGTABLE_PROT_SW1 = BIT(56),
@@ -355,6 +360,11 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke
355360
return pteref;
356361
}
357362

363+
static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref)
364+
{
365+
return pteref;
366+
}
367+
358368
static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
359369
{
360370
/*
@@ -384,6 +394,11 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke
384394
return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED));
385395
}
386396

397+
static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref)
398+
{
399+
return rcu_dereference_raw(pteref);
400+
}
401+
387402
static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
388403
{
389404
if (walker->flags & KVM_PGTABLE_WALK_SHARED)
@@ -551,6 +566,26 @@ static inline int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2
551566
*/
552567
void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
553568

569+
/**
570+
* kvm_pgtable_stage2_destroy_range() - Destroy the unlinked range of addresses.
571+
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
572+
* @addr: Intermediate physical address at which to place the mapping.
573+
* @size: Size of the mapping.
574+
*
575+
* The page-table is assumed to be unreachable by any hardware walkers prior
576+
* to freeing and therefore no TLB invalidation is performed.
577+
*/
578+
void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
579+
u64 addr, u64 size);
580+
581+
/**
582+
* kvm_pgtable_stage2_destroy_pgd() - Destroy the PGD of guest stage-2 page-table.
583+
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
584+
*
585+
* It is assumed that the rest of the page-table is freed before this operation.
586+
*/
587+
void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt);
588+
554589
/**
555590
* kvm_pgtable_stage2_free_unlinked() - Free an unlinked stage-2 paging structure.
556591
* @mm_ops: Memory management callbacks.

arch/arm64/include/asm/kvm_pkvm.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,9 @@ struct pkvm_mapping {
180180

181181
int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
182182
struct kvm_pgtable_mm_ops *mm_ops);
183-
void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
183+
void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
184+
u64 addr, u64 size);
185+
void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt);
184186
int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
185187
enum kvm_pgtable_prot prot, void *mc,
186188
enum kvm_pgtable_walk_flags flags);

arch/arm64/include/asm/virt.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,13 @@
4040
*/
4141
#define HVC_FINALISE_EL2 3
4242

43+
/*
44+
* HVC_GET_ICH_VTR_EL2 - Retrieve the ICH_VTR_EL2 value
45+
*/
46+
#define HVC_GET_ICH_VTR_EL2 4
47+
4348
/* Max number of HYP stub hypercalls */
44-
#define HVC_STUB_HCALL_NR 4
49+
#define HVC_STUB_HCALL_NR 5
4550

4651
/* Error returned when an invalid stub number is passed into x0 */
4752
#define HVC_STUB_ERR 0xbadca11

arch/arm64/kernel/cpufeature.c

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2304,6 +2304,49 @@ static bool has_gic_prio_relaxed_sync(const struct arm64_cpu_capabilities *entry
23042304
}
23052305
#endif
23062306

2307+
static bool can_trap_icv_dir_el1(const struct arm64_cpu_capabilities *entry,
2308+
int scope)
2309+
{
2310+
static const struct midr_range has_vgic_v3[] = {
2311+
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM),
2312+
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM),
2313+
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_PRO),
2314+
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_PRO),
2315+
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_MAX),
2316+
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX),
2317+
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD),
2318+
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE),
2319+
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_PRO),
2320+
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_PRO),
2321+
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_MAX),
2322+
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_MAX),
2323+
{},
2324+
};
2325+
struct arm_smccc_res res = {};
2326+
2327+
BUILD_BUG_ON(ARM64_HAS_ICH_HCR_EL2_TDIR <= ARM64_HAS_GICV3_CPUIF);
2328+
BUILD_BUG_ON(ARM64_HAS_ICH_HCR_EL2_TDIR <= ARM64_HAS_GICV5_LEGACY);
2329+
if (!this_cpu_has_cap(ARM64_HAS_GICV3_CPUIF) &&
2330+
!is_midr_in_range_list(has_vgic_v3))
2331+
return false;
2332+
2333+
if (!is_hyp_mode_available())
2334+
return false;
2335+
2336+
if (this_cpu_has_cap(ARM64_HAS_GICV5_LEGACY))
2337+
return true;
2338+
2339+
if (is_kernel_in_hyp_mode())
2340+
res.a1 = read_sysreg_s(SYS_ICH_VTR_EL2);
2341+
else
2342+
arm_smccc_1_1_hvc(HVC_GET_ICH_VTR_EL2, &res);
2343+
2344+
if (res.a0 == HVC_STUB_ERR)
2345+
return false;
2346+
2347+
return res.a1 & ICH_VTR_EL2_TDS;
2348+
}
2349+
23072350
#ifdef CONFIG_ARM64_BTI
23082351
static void bti_enable(const struct arm64_cpu_capabilities *__unused)
23092352
{
@@ -2815,6 +2858,15 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
28152858
.matches = has_gic_prio_relaxed_sync,
28162859
},
28172860
#endif
2861+
{
2862+
/*
2863+
* Depends on having GICv3
2864+
*/
2865+
.desc = "ICV_DIR_EL1 trapping",
2866+
.capability = ARM64_HAS_ICH_HCR_EL2_TDIR,
2867+
.type = ARM64_CPUCAP_EARLY_LOCAL_CPU_FEATURE,
2868+
.matches = can_trap_icv_dir_el1,
2869+
},
28182870
#ifdef CONFIG_ARM64_E0PD
28192871
{
28202872
.desc = "E0PD",
@@ -3089,6 +3141,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
30893141
.capability = ARM64_HAS_GICV5_LEGACY,
30903142
.matches = test_has_gicv5_legacy,
30913143
},
3144+
{
3145+
.desc = "XNX",
3146+
.capability = ARM64_HAS_XNX,
3147+
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
3148+
.matches = has_cpuid_feature,
3149+
ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, XNX, IMP)
3150+
},
30923151
{},
30933152
};
30943153

0 commit comments

Comments
 (0)