Skip to content

ARM64-SVE: Use non predicated instruction when mask is all true #114431

@a74nh

Description

@a74nh

Consider:

static void truecndselect1(Vector<int> op1, Vector<int> op2) {
  var result1 = Sve.ConditionalSelect(Sve.CreateTrueMaskInt32(), Sve.Add(op1, op2), op1);
  Consume(result1);
}

G_M21589_IG01:  ;; offset=0x0000
            stp     fp, lr, [sp, #-0x10]!
            mov     fp, sp
						;; size=8 bbWeight=1 PerfScore 1.50
G_M21589_IG02:  ;; offset=0x0008
            add     z0.s, z0.s, z1.s
            movz    x0, #0x72B8      // code for CSharpTutorials.Program:Consume[System.Numerics.Vector`1[int]](System.Numerics.Vector`1[int])
            movk    x0, #0x2218 LSL #16
            movk    x0, #0xE088 LSL #32
            ldr     x0, [x0]
            blr     x0
						;; size=24 bbWeight=1 PerfScore 7.50
G_M21589_IG03:  ;; offset=0x0020
            ldp     fp, lr, [sp], #0x10
            ret     lr
						;; size=8 bbWeight=1 PerfScore 2.00



static void truecndselect2(Vector<int> op1, Vector<int> op2) {
  var result2 = Sve.ConditionalSelect(Vector<int>.AllBitsSet, Sve.Add(op1, op2), op1);
  Consume(result2);
}

G_M25078_IG01:  ;; offset=0x0000
            stp     fp, lr, [sp, #-0x10]!
            mov     fp, sp
						;; size=8 bbWeight=1 PerfScore 1.50
G_M25078_IG02:  ;; offset=0x0008
            ptrue   p0.s
            mvni    v16.4s, #0
            cmpne   p0.s, p0/z, z16.s, #0
            add     z0.s, p0/m, z0.s, z1.s
            movz    x0, #0x72B8      // code for CSharpTutorials.Program:Consume[System.Numerics.Vector`1[int]](System.Numerics.Vector`1[int])
            movk    x0, #0x2218 LSL #16
            movk    x0, #0xE088 LSL #32
            ldr     x0, [x0]
            blr     x0
						;; size=36 bbWeight=1 PerfScore 12.00
G_M25078_IG03:  ;; offset=0x002C
            ldp     fp, lr, [sp], #0x10
            ret     lr
						;; size=8 bbWeight=1 PerfScore 2.00

For both of these, a non-predicated ADD can be used, optimising away the mask. Becoming:

            stp     fp, lr, [sp, #-0x10]!
            mov     fp, sp
            add     z0.s, z0.s, z1.s
            movz    x0, #0x72B8      // code for CSharpTutorials.Program:Consume[System.Numerics.Vector`1[int]](System.Numerics.Vector`1[int])
            movk    x0, #0x2218 LSL #16
            movk    x0, #0xE088 LSL #32
            ldr     x0, [x0]
            blr     x0
            ldp     fp, lr, [sp], #0x10
            ret     lr

This should be possible for all HW_Flag_OptionalEmbeddedMaskedOperation instructions: ADD, AND, BIC, ORR, SUB, EOR

Metadata

Metadata

Assignees

Labels

area-CodeGen-coreclrCLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMIarm-sveWork related to arm64 SVE/SVE2 supportin-prThere is an active PR which will close this issue when it is merged

Type

No type
No fields configured for issues without a type.

Projects

No projects

Milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions