Skip to content

Commit 54f15eb

Browse files
committed
Merge tag 'kvm-riscv-6.20-1' of https://github.com/kvm-riscv/linux into HEAD
KVM/riscv changes for 6.20 - Fixes for issues discoverd by KVM API fuzzing in kvm_riscv_aia_imsic_has_attr(), kvm_riscv_aia_imsic_rw_attr(), and kvm_riscv_vcpu_aia_imsic_update() - Allow Zalasr, Zilsd and Zclsd extensions for Guest/VM - Add riscv vm satp modes in KVM selftests - Transparent huge page support for G-stage - Adjust the number of available guest irq files based on MMIO register sizes in DeviceTree or ACPI
2 parents 9e03b7c + 376e2f8 commit 54f15eb

File tree

15 files changed

+330
-22
lines changed

15 files changed

+330
-22
lines changed

arch/riscv/include/uapi/asm/kvm.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,9 @@ enum KVM_RISCV_ISA_EXT_ID {
192192
KVM_RISCV_ISA_EXT_ZFBFMIN,
193193
KVM_RISCV_ISA_EXT_ZVFBFMIN,
194194
KVM_RISCV_ISA_EXT_ZVFBFWMA,
195+
KVM_RISCV_ISA_EXT_ZCLSD,
196+
KVM_RISCV_ISA_EXT_ZILSD,
197+
KVM_RISCV_ISA_EXT_ZALASR,
195198
KVM_RISCV_ISA_EXT_MAX,
196199
};
197200

arch/riscv/kvm/aia.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -630,7 +630,7 @@ int kvm_riscv_aia_init(void)
630630
*/
631631
if (gc)
632632
kvm_riscv_aia_nr_hgei = min((ulong)kvm_riscv_aia_nr_hgei,
633-
BIT(gc->guest_index_bits) - 1);
633+
gc->nr_guest_files);
634634
else
635635
kvm_riscv_aia_nr_hgei = 0;
636636

arch/riscv/kvm/aia_imsic.c

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -797,6 +797,10 @@ int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu)
797797
if (kvm->arch.aia.mode == KVM_DEV_RISCV_AIA_MODE_EMUL)
798798
return 1;
799799

800+
/* IMSIC vCPU state may not be initialized yet */
801+
if (!imsic)
802+
return 1;
803+
800804
/* Read old IMSIC VS-file details */
801805
read_lock_irqsave(&imsic->vsfile_lock, flags);
802806
old_vsfile_hgei = imsic->vsfile_hgei;
@@ -952,8 +956,10 @@ int kvm_riscv_aia_imsic_rw_attr(struct kvm *kvm, unsigned long type,
952956
if (!vcpu)
953957
return -ENODEV;
954958

955-
isel = KVM_DEV_RISCV_AIA_IMSIC_GET_ISEL(type);
956959
imsic = vcpu->arch.aia_context.imsic_state;
960+
if (!imsic)
961+
return -ENODEV;
962+
isel = KVM_DEV_RISCV_AIA_IMSIC_GET_ISEL(type);
957963

958964
read_lock_irqsave(&imsic->vsfile_lock, flags);
959965

@@ -993,8 +999,11 @@ int kvm_riscv_aia_imsic_has_attr(struct kvm *kvm, unsigned long type)
993999
if (!vcpu)
9941000
return -ENODEV;
9951001

996-
isel = KVM_DEV_RISCV_AIA_IMSIC_GET_ISEL(type);
9971002
imsic = vcpu->arch.aia_context.imsic_state;
1003+
if (!imsic)
1004+
return -ENODEV;
1005+
1006+
isel = KVM_DEV_RISCV_AIA_IMSIC_GET_ISEL(type);
9981007
return imsic_mrif_isel_check(imsic->nr_eix, isel);
9991008
}
10001009

arch/riscv/kvm/mmu.c

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,142 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
305305
return pte_young(ptep_get(ptep));
306306
}
307307

308+
static bool fault_supports_gstage_huge_mapping(struct kvm_memory_slot *memslot,
309+
unsigned long hva)
310+
{
311+
hva_t uaddr_start, uaddr_end;
312+
gpa_t gpa_start;
313+
size_t size;
314+
315+
size = memslot->npages * PAGE_SIZE;
316+
uaddr_start = memslot->userspace_addr;
317+
uaddr_end = uaddr_start + size;
318+
319+
gpa_start = memslot->base_gfn << PAGE_SHIFT;
320+
321+
/*
322+
* Pages belonging to memslots that don't have the same alignment
323+
* within a PMD for userspace and GPA cannot be mapped with g-stage
324+
* PMD entries, because we'll end up mapping the wrong pages.
325+
*
326+
* Consider a layout like the following:
327+
*
328+
* memslot->userspace_addr:
329+
* +-----+--------------------+--------------------+---+
330+
* |abcde|fgh vs-stage block | vs-stage block tv|xyz|
331+
* +-----+--------------------+--------------------+---+
332+
*
333+
* memslot->base_gfn << PAGE_SHIFT:
334+
* +---+--------------------+--------------------+-----+
335+
* |abc|def g-stage block | g-stage block |tvxyz|
336+
* +---+--------------------+--------------------+-----+
337+
*
338+
* If we create those g-stage blocks, we'll end up with this incorrect
339+
* mapping:
340+
* d -> f
341+
* e -> g
342+
* f -> h
343+
*/
344+
if ((gpa_start & (PMD_SIZE - 1)) != (uaddr_start & (PMD_SIZE - 1)))
345+
return false;
346+
347+
/*
348+
* Next, let's make sure we're not trying to map anything not covered
349+
* by the memslot. This means we have to prohibit block size mappings
350+
* for the beginning and end of a non-block aligned and non-block sized
351+
* memory slot (illustrated by the head and tail parts of the
352+
* userspace view above containing pages 'abcde' and 'xyz',
353+
* respectively).
354+
*
355+
* Note that it doesn't matter if we do the check using the
356+
* userspace_addr or the base_gfn, as both are equally aligned (per
357+
* the check above) and equally sized.
358+
*/
359+
return (hva >= ALIGN(uaddr_start, PMD_SIZE)) && (hva < ALIGN_DOWN(uaddr_end, PMD_SIZE));
360+
}
361+
362+
static int get_hva_mapping_size(struct kvm *kvm,
363+
unsigned long hva)
364+
{
365+
int size = PAGE_SIZE;
366+
unsigned long flags;
367+
pgd_t pgd;
368+
p4d_t p4d;
369+
pud_t pud;
370+
pmd_t pmd;
371+
372+
/*
373+
* Disable IRQs to prevent concurrent tear down of host page tables,
374+
* e.g. if the primary MMU promotes a P*D to a huge page and then frees
375+
* the original page table.
376+
*/
377+
local_irq_save(flags);
378+
379+
/*
380+
* Read each entry once. As above, a non-leaf entry can be promoted to
381+
* a huge page _during_ this walk. Re-reading the entry could send the
382+
* walk into the weeks, e.g. p*d_leaf() returns false (sees the old
383+
* value) and then p*d_offset() walks into the target huge page instead
384+
* of the old page table (sees the new value).
385+
*/
386+
pgd = pgdp_get(pgd_offset(kvm->mm, hva));
387+
if (pgd_none(pgd))
388+
goto out;
389+
390+
p4d = p4dp_get(p4d_offset(&pgd, hva));
391+
if (p4d_none(p4d) || !p4d_present(p4d))
392+
goto out;
393+
394+
pud = pudp_get(pud_offset(&p4d, hva));
395+
if (pud_none(pud) || !pud_present(pud))
396+
goto out;
397+
398+
if (pud_leaf(pud)) {
399+
size = PUD_SIZE;
400+
goto out;
401+
}
402+
403+
pmd = pmdp_get(pmd_offset(&pud, hva));
404+
if (pmd_none(pmd) || !pmd_present(pmd))
405+
goto out;
406+
407+
if (pmd_leaf(pmd))
408+
size = PMD_SIZE;
409+
410+
out:
411+
local_irq_restore(flags);
412+
return size;
413+
}
414+
415+
static unsigned long transparent_hugepage_adjust(struct kvm *kvm,
416+
struct kvm_memory_slot *memslot,
417+
unsigned long hva,
418+
kvm_pfn_t *hfnp, gpa_t *gpa)
419+
{
420+
kvm_pfn_t hfn = *hfnp;
421+
422+
/*
423+
* Make sure the adjustment is done only for THP pages. Also make
424+
* sure that the HVA and GPA are sufficiently aligned and that the
425+
* block map is contained within the memslot.
426+
*/
427+
if (fault_supports_gstage_huge_mapping(memslot, hva)) {
428+
int sz;
429+
430+
sz = get_hva_mapping_size(kvm, hva);
431+
if (sz < PMD_SIZE)
432+
return sz;
433+
434+
*gpa &= PMD_MASK;
435+
hfn &= ~(PTRS_PER_PMD - 1);
436+
*hfnp = hfn;
437+
438+
return PMD_SIZE;
439+
}
440+
441+
return PAGE_SIZE;
442+
}
443+
308444
int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
309445
gpa_t gpa, unsigned long hva, bool is_write,
310446
struct kvm_gstage_mapping *out_map)
@@ -398,6 +534,10 @@ int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
398534
if (mmu_invalidate_retry(kvm, mmu_seq))
399535
goto out_unlock;
400536

537+
/* Check if we are backed by a THP and thus use block mapping if possible */
538+
if (vma_pagesize == PAGE_SIZE)
539+
vma_pagesize = transparent_hugepage_adjust(kvm, memslot, hva, &hfn, &gpa);
540+
401541
if (writable) {
402542
mark_page_dirty_in_slot(kvm, memslot, gfn);
403543
ret = kvm_riscv_gstage_map_page(&gstage, pcache, gpa, hfn << PAGE_SHIFT,

arch/riscv/kvm/vcpu_onereg.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ static const unsigned long kvm_isa_ext_arr[] = {
5050
KVM_ISA_EXT_ARR(ZAAMO),
5151
KVM_ISA_EXT_ARR(ZABHA),
5252
KVM_ISA_EXT_ARR(ZACAS),
53+
KVM_ISA_EXT_ARR(ZALASR),
5354
KVM_ISA_EXT_ARR(ZALRSC),
5455
KVM_ISA_EXT_ARR(ZAWRS),
5556
KVM_ISA_EXT_ARR(ZBA),
@@ -63,6 +64,7 @@ static const unsigned long kvm_isa_ext_arr[] = {
6364
KVM_ISA_EXT_ARR(ZCB),
6465
KVM_ISA_EXT_ARR(ZCD),
6566
KVM_ISA_EXT_ARR(ZCF),
67+
KVM_ISA_EXT_ARR(ZCLSD),
6668
KVM_ISA_EXT_ARR(ZCMOP),
6769
KVM_ISA_EXT_ARR(ZFA),
6870
KVM_ISA_EXT_ARR(ZFBFMIN),
@@ -79,6 +81,7 @@ static const unsigned long kvm_isa_ext_arr[] = {
7981
KVM_ISA_EXT_ARR(ZIHINTNTL),
8082
KVM_ISA_EXT_ARR(ZIHINTPAUSE),
8183
KVM_ISA_EXT_ARR(ZIHPM),
84+
KVM_ISA_EXT_ARR(ZILSD),
8285
KVM_ISA_EXT_ARR(ZIMOP),
8386
KVM_ISA_EXT_ARR(ZKND),
8487
KVM_ISA_EXT_ARR(ZKNE),
@@ -187,6 +190,7 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
187190
case KVM_RISCV_ISA_EXT_ZAAMO:
188191
case KVM_RISCV_ISA_EXT_ZABHA:
189192
case KVM_RISCV_ISA_EXT_ZACAS:
193+
case KVM_RISCV_ISA_EXT_ZALASR:
190194
case KVM_RISCV_ISA_EXT_ZALRSC:
191195
case KVM_RISCV_ISA_EXT_ZAWRS:
192196
case KVM_RISCV_ISA_EXT_ZBA:

arch/riscv/kvm/vcpu_pmu.c

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -494,12 +494,9 @@ int kvm_riscv_vcpu_pmu_event_info(struct kvm_vcpu *vcpu, unsigned long saddr_low
494494
}
495495

496496
ret = kvm_vcpu_write_guest(vcpu, shmem, einfo, shmem_size);
497-
if (ret) {
497+
if (ret)
498498
ret = SBI_ERR_INVALID_ADDRESS;
499-
goto free_mem;
500-
}
501499

502-
ret = 0;
503500
free_mem:
504501
kfree(einfo);
505502
out:

arch/riscv/mm/pgtable.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ pud_t *pud_offset(p4d_t *p4d, unsigned long address)
4747

4848
return (pud_t *)p4d;
4949
}
50+
EXPORT_SYMBOL_GPL(pud_offset);
5051

5152
p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
5253
{
@@ -55,6 +56,7 @@ p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
5556

5657
return (p4d_t *)pgd;
5758
}
59+
EXPORT_SYMBOL_GPL(p4d_offset);
5860
#endif
5961

6062
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP

drivers/irqchip/irq-riscv-imsic-state.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -784,7 +784,7 @@ static int __init imsic_parse_fwnode(struct fwnode_handle *fwnode,
784784

785785
int __init imsic_setup_state(struct fwnode_handle *fwnode, void *opaque)
786786
{
787-
u32 i, j, index, nr_parent_irqs, nr_mmios, nr_handlers = 0;
787+
u32 i, j, index, nr_parent_irqs, nr_mmios, nr_guest_files, nr_handlers = 0;
788788
struct imsic_global_config *global;
789789
struct imsic_local_config *local;
790790
void __iomem **mmios_va = NULL;
@@ -878,6 +878,7 @@ int __init imsic_setup_state(struct fwnode_handle *fwnode, void *opaque)
878878
}
879879

880880
/* Configure handlers for target CPUs */
881+
global->nr_guest_files = BIT(global->guest_index_bits) - 1;
881882
for (i = 0; i < nr_parent_irqs; i++) {
882883
rc = imsic_get_parent_hartid(fwnode, i, &hartid);
883884
if (rc) {
@@ -918,6 +919,15 @@ int __init imsic_setup_state(struct fwnode_handle *fwnode, void *opaque)
918919
local->msi_pa = mmios[index].start + reloff;
919920
local->msi_va = mmios_va[index] + reloff;
920921

922+
/*
923+
* KVM uses global->nr_guest_files to determine the available guest
924+
* interrupt files on each CPU. Take the minimum number of guest
925+
* interrupt files across all CPUs to avoid KVM incorrectly allocating
926+
* an unexisted or unmapped guest interrupt file on some CPUs.
927+
*/
928+
nr_guest_files = (resource_size(&mmios[index]) - reloff) / IMSIC_MMIO_PAGE_SZ - 1;
929+
global->nr_guest_files = min(global->nr_guest_files, nr_guest_files);
930+
921931
nr_handlers++;
922932
}
923933

include/linux/irqchip/riscv-imsic.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,9 @@ struct imsic_global_config {
6868
/* Number of guest interrupt identities */
6969
u32 nr_guest_ids;
7070

71+
/* Number of guest interrupt files per core */
72+
u32 nr_guest_files;
73+
7174
/* Per-CPU IMSIC addresses */
7275
struct imsic_local_config __percpu *local;
7376
};

tools/testing/selftests/kvm/include/kvm_util.h

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,17 @@ enum vm_guest_mode {
198198
VM_MODE_P36V48_64K,
199199
VM_MODE_P47V47_16K,
200200
VM_MODE_P36V47_16K,
201+
202+
VM_MODE_P56V57_4K, /* For riscv64 */
203+
VM_MODE_P56V48_4K,
204+
VM_MODE_P56V39_4K,
205+
VM_MODE_P50V57_4K,
206+
VM_MODE_P50V48_4K,
207+
VM_MODE_P50V39_4K,
208+
VM_MODE_P41V57_4K,
209+
VM_MODE_P41V48_4K,
210+
VM_MODE_P41V39_4K,
211+
201212
NUM_VM_MODES,
202213
};
203214

@@ -222,10 +233,10 @@ kvm_static_assert(sizeof(struct vm_shape) == sizeof(uint64_t));
222233
shape; \
223234
})
224235

225-
#if defined(__aarch64__)
226-
227236
extern enum vm_guest_mode vm_mode_default;
228237

238+
#if defined(__aarch64__)
239+
229240
#define VM_MODE_DEFAULT vm_mode_default
230241
#define MIN_PAGE_SHIFT 12U
231242
#define ptes_per_page(page_size) ((page_size) / 8)
@@ -248,7 +259,7 @@ extern enum vm_guest_mode vm_mode_default;
248259
#error "RISC-V 32-bit kvm selftests not supported"
249260
#endif
250261

251-
#define VM_MODE_DEFAULT VM_MODE_P40V48_4K
262+
#define VM_MODE_DEFAULT vm_mode_default
252263
#define MIN_PAGE_SHIFT 12U
253264
#define ptes_per_page(page_size) ((page_size) / 8)
254265

0 commit comments

Comments
 (0)