diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs index e8fec7a4bde585..93d69b4d41ea72 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs @@ -827,7 +827,7 @@ private static T Aggregate( // efficiently, we simply have a small jump table and fallthrough. So we get a simple // length check, single jump, and then linear execution. - result = Vectorized512Small(ref xRef, remainder); + result = VectorizedSmall(ref xRef, remainder); } return result; @@ -847,7 +847,7 @@ private static T Aggregate( // efficiently, we simply have a small jump table and fallthrough. So we get a simple // length check, single jump, and then linear execution. - result = Vectorized256Small(ref xRef, remainder); + result = VectorizedSmall(ref xRef, remainder); } return result; @@ -867,7 +867,7 @@ private static T Aggregate( // efficiently, we simply have a small jump table and fallthrough. So we get a simple // length check, single jump, and then linear execution. - result = Vectorized128Small(ref xRef, remainder); + result = VectorizedSmall(ref xRef, remainder); } return result; @@ -1060,40 +1060,6 @@ static T Vectorized128(ref T xRef, nuint remainder) return TAggregationOperator.Invoke(vresult); } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static T Vectorized128Small(ref T xRef, nuint remainder) - { - T result = TAggregationOperator.IdentityValue; - - switch (remainder) - { - case 3: - { - result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 2))); - goto case 2; - } - - case 2: - { - result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 1))); - goto case 1; - } - - case 1: - { - result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(xRef)); - goto case 0; - } - - case 0: - { - break; - } - } - - return result; - } - static T Vectorized256(ref T xRef, nuint remainder) { Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); @@ -1263,71 +1229,6 @@ static T Vectorized256(ref T xRef, nuint remainder) return TAggregationOperator.Invoke(vresult); } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static T Vectorized256Small(ref T xRef, nuint remainder) - { - T result = TAggregationOperator.IdentityValue; - - switch (remainder) - { - case 7: - case 6: - case 5: - { - Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); - - Vector128 beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); - Vector128 end = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count)); - - end = Vector128.ConditionalSelect(CreateRemainderMaskVector128((int)(remainder % (uint)Vector128.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue)); - - vresult = TAggregationOperator.Invoke(vresult, beg); - vresult = TAggregationOperator.Invoke(vresult, end); - - result = TAggregationOperator.Invoke(vresult); - break; - } - - case 4: - { - Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); - - Vector128 beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); - vresult = TAggregationOperator.Invoke(vresult, beg); - - result = TAggregationOperator.Invoke(vresult); - break; - } - - case 3: - { - result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 2))); - goto case 2; - } - - case 2: - { - result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 1))); - goto case 1; - } - - case 1: - { - result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(xRef)); - goto case 0; - } - - case 0: - { - break; - } - } - - return result; - } - static T Vectorized512(ref T xRef, nuint remainder) { Vector512 vresult = Vector512.Create(TAggregationOperator.IdentityValue); @@ -1498,8 +1399,23 @@ static T Vectorized512(ref T xRef, nuint remainder) } [MethodImpl(MethodImplOptions.AggressiveInlining)] - static T Vectorized512Small(ref T xRef, nuint remainder) + static T VectorizedSmall(ref T xRef, nuint remainder) + { + if (sizeof(T) == 4) + { + return VectorizedSmall4(ref xRef, remainder); + } + else + { + Debug.Assert(sizeof(T) == 8); + return VectorizedSmall8(ref xRef, remainder); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static T VectorizedSmall4(ref T xRef, nuint remainder) { + Debug.Assert(sizeof(T) == 4); T result = TAggregationOperator.IdentityValue; switch (remainder) @@ -1596,6 +1512,89 @@ static T Vectorized512Small(ref T xRef, nuint remainder) return result; } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static T VectorizedSmall8(ref T xRef, nuint remainder) + { + Debug.Assert(sizeof(T) == 8); + T result = TAggregationOperator.IdentityValue; + + switch (remainder) + { + case 7: + case 6: + case 5: + { + Debug.Assert(Vector256.IsHardwareAccelerated); + Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); + + Vector256 beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); + Vector256 end = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count)); + + end = Vector256.ConditionalSelect(CreateRemainderMaskVector256((int)(remainder % (uint)Vector256.Count)), end, Vector256.Create(TAggregationOperator.IdentityValue)); + + vresult = TAggregationOperator.Invoke(vresult, beg); + vresult = TAggregationOperator.Invoke(vresult, end); + + result = TAggregationOperator.Invoke(vresult); + break; + } + + case 4: + { + Debug.Assert(Vector256.IsHardwareAccelerated); + Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); + + Vector256 beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); + vresult = TAggregationOperator.Invoke(vresult, beg); + + result = TAggregationOperator.Invoke(vresult); + break; + } + + case 3: + { + Debug.Assert(Vector128.IsHardwareAccelerated); + Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); + + Vector128 beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); + Vector128 end = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count)); + + end = Vector128.ConditionalSelect(CreateRemainderMaskVector128((int)(remainder % (uint)Vector128.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue)); + + vresult = TAggregationOperator.Invoke(vresult, beg); + vresult = TAggregationOperator.Invoke(vresult, end); + + result = TAggregationOperator.Invoke(vresult); + break; + } + + case 2: + { + Debug.Assert(Vector128.IsHardwareAccelerated); + Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); + + Vector128 beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); + vresult = TAggregationOperator.Invoke(vresult, beg); + + result = TAggregationOperator.Invoke(vresult); + break; + } + + case 1: + { + result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(xRef)); + goto case 0; + } + + case 0: + { + break; + } + } + + return result; + } } /// Performs an aggregation over all pair-wise elements in and to produce a single-precision floating-point value. @@ -1639,7 +1638,7 @@ private static T Aggregate( // efficiently, we simply have a small jump table and fallthrough. So we get a simple // length check, single jump, and then linear execution. - result = Vectorized512Small(ref xRef, ref yRef, remainder); + result = VectorizedSmall(ref xRef, ref yRef, remainder); } return result; @@ -1659,7 +1658,7 @@ private static T Aggregate( // efficiently, we simply have a small jump table and fallthrough. So we get a simple // length check, single jump, and then linear execution. - result = Vectorized256Small(ref xRef, ref yRef, remainder); + result = VectorizedSmall(ref xRef, ref yRef, remainder); } return result; @@ -1679,7 +1678,7 @@ private static T Aggregate( // efficiently, we simply have a small jump table and fallthrough. So we get a simple // length check, single jump, and then linear execution. - result = Vectorized128Small(ref xRef, ref yRef, remainder); + result = VectorizedSmall(ref xRef, ref yRef, remainder); } return result; @@ -1895,42 +1894,6 @@ static T Vectorized128(ref T xRef, ref T yRef, nuint remainder) return TAggregationOperator.Invoke(vresult); } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static T Vectorized128Small(ref T xRef, ref T yRef, nuint remainder) - { - T result = TAggregationOperator.IdentityValue; - - switch (remainder) - { - case 3: - { - result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 2), - Unsafe.Add(ref yRef, 2))); - goto case 2; - } - - case 2: - { - result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 1), - Unsafe.Add(ref yRef, 1))); - goto case 1; - } - - case 1: - { - result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(xRef, yRef)); - goto case 0; - } - - case 0: - { - break; - } - } - - return result; - } - static T Vectorized256(ref T xRef, ref T yRef, nuint remainder) { Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); @@ -2122,93 +2085,23 @@ static T Vectorized256(ref T xRef, ref T yRef, nuint remainder) return TAggregationOperator.Invoke(vresult); } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static T Vectorized256Small(ref T xRef, ref T yRef, nuint remainder) + static T Vectorized512(ref T xRef, ref T yRef, nuint remainder) { - T result = TAggregationOperator.IdentityValue; - - switch (remainder) - { - case 7: - case 6: - case 5: - { - Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); + Vector512 vresult = Vector512.Create(TAggregationOperator.IdentityValue); - Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef)); - Vector128 end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count)); + // Preload the beginning and end so that overlapping accesses don't negatively impact the data - end = Vector128.ConditionalSelect(CreateRemainderMaskVector128((int)(remainder % (uint)Vector128.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue)); + Vector512 beg = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef), + Vector512.LoadUnsafe(ref yRef)); + Vector512 end = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512.Count), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)Vector512.Count)); - vresult = TAggregationOperator.Invoke(vresult, beg); - vresult = TAggregationOperator.Invoke(vresult, end); + nuint misalignment = 0; - result = TAggregationOperator.Invoke(vresult); - break; - } - - case 4: - { - Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); - - Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef)); - vresult = TAggregationOperator.Invoke(vresult, beg); - - result = TAggregationOperator.Invoke(vresult); - break; - } - - case 3: - { - result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 2), - Unsafe.Add(ref yRef, 2))); - goto case 2; - } - - case 2: - { - result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 1), - Unsafe.Add(ref yRef, 1))); - goto case 1; - } - - case 1: - { - result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(xRef, yRef)); - goto case 0; - } - - case 0: - { - break; - } - } - - return result; - } - - static T Vectorized512(ref T xRef, ref T yRef, nuint remainder) - { - Vector512 vresult = Vector512.Create(TAggregationOperator.IdentityValue); - - // Preload the beginning and end so that overlapping accesses don't negatively impact the data - - Vector512 beg = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef), - Vector512.LoadUnsafe(ref yRef)); - Vector512 end = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512.Count), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)Vector512.Count)); - - nuint misalignment = 0; - - if (remainder > (uint)(Vector512.Count * 8)) - { - // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful - // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted. + if (remainder > (uint)(Vector512.Count * 8)) + { + // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful + // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted. fixed (T* px = &xRef) fixed (T* py = &yRef) @@ -2384,8 +2277,23 @@ static T Vectorized512(ref T xRef, ref T yRef, nuint remainder) } [MethodImpl(MethodImplOptions.AggressiveInlining)] - static T Vectorized512Small(ref T xRef, ref T yRef, nuint remainder) + static T VectorizedSmall(ref T xRef, ref T yRef, nuint remainder) + { + if (sizeof(T) == 4) + { + return VectorizedSmall4(ref xRef, ref yRef, remainder); + } + else + { + Debug.Assert(sizeof(T) == 8); + return VectorizedSmall8(ref xRef, ref yRef, remainder); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static T VectorizedSmall4(ref T xRef, ref T yRef, nuint remainder) { + Debug.Assert(sizeof(T) == 4); T result = TAggregationOperator.IdentityValue; switch (remainder) @@ -2490,6 +2398,95 @@ static T Vectorized512Small(ref T xRef, ref T yRef, nuint remainder) return result; } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static T VectorizedSmall8(ref T xRef, ref T yRef, nuint remainder) + { + Debug.Assert(sizeof(T) == 8); + T result = TAggregationOperator.IdentityValue; + + switch (remainder) + { + case 7: + case 6: + case 5: + { + Debug.Assert(Vector256.IsHardwareAccelerated); + Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); + + Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef)); + Vector256 end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count)); + + end = Vector256.ConditionalSelect(CreateRemainderMaskVector256((int)(remainder % (uint)Vector256.Count)), end, Vector256.Create(TAggregationOperator.IdentityValue)); + + vresult = TAggregationOperator.Invoke(vresult, beg); + vresult = TAggregationOperator.Invoke(vresult, end); + + result = TAggregationOperator.Invoke(vresult); + break; + } + + case 4: + { + Debug.Assert(Vector256.IsHardwareAccelerated); + Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); + + Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef)); + vresult = TAggregationOperator.Invoke(vresult, beg); + + result = TAggregationOperator.Invoke(vresult); + break; + } + + case 3: + { + Debug.Assert(Vector128.IsHardwareAccelerated); + Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); + + Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef)); + Vector128 end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count)); + + end = Vector128.ConditionalSelect(CreateRemainderMaskVector128((int)(remainder % (uint)Vector128.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue)); + + vresult = TAggregationOperator.Invoke(vresult, beg); + vresult = TAggregationOperator.Invoke(vresult, end); + + result = TAggregationOperator.Invoke(vresult); + break; + } + + case 2: + { + Debug.Assert(Vector128.IsHardwareAccelerated); + Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); + + Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef)); + vresult = TAggregationOperator.Invoke(vresult, beg); + + result = TAggregationOperator.Invoke(vresult); + break; + } + + case 1: + { + result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(xRef, yRef)); + goto case 0; + } + + case 0: + { + break; + } + } + + return result; + } } /// @@ -2988,7 +2985,7 @@ private static void InvokeSpanIntoSpan( // efficiently, we simply have a small jump table and fallthrough. So we get a simple // length check, single jump, and then linear execution. - Vectorized512Small(ref xRef, ref dRef, remainder); + VectorizedSmall(ref xRef, ref dRef, remainder); } return; @@ -3006,7 +3003,7 @@ private static void InvokeSpanIntoSpan( // efficiently, we simply have a small jump table and fallthrough. So we get a simple // length check, single jump, and then linear execution. - Vectorized256Small(ref xRef, ref dRef, remainder); + VectorizedSmall(ref xRef, ref dRef, remainder); } return; @@ -3024,7 +3021,7 @@ private static void InvokeSpanIntoSpan( // efficiently, we simply have a small jump table and fallthrough. So we get a simple // length check, single jump, and then linear execution. - Vectorized128Small(ref xRef, ref dRef, remainder); + VectorizedSmall(ref xRef, ref dRef, remainder); } return; @@ -3255,36 +3252,6 @@ static void Vectorized128(ref T xRef, ref T dRef, nuint remainder) } } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static void Vectorized128Small(ref T xRef, ref T dRef, nuint remainder) - { - switch (remainder) - { - case 3: - { - Unsafe.Add(ref dRef, 2) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 2)); - goto case 2; - } - - case 2: - { - Unsafe.Add(ref dRef, 1) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 1)); - goto case 1; - } - - case 1: - { - dRef = TUnaryOperator.Invoke(xRef); - goto case 0; - } - - case 0: - { - break; - } - } - } - static void Vectorized256(ref T xRef, ref T dRef, nuint remainder) { ref T dRefBeg = ref dRef; @@ -3496,61 +3463,6 @@ static void Vectorized256(ref T xRef, ref T dRef, nuint remainder) } } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static void Vectorized256Small(ref T xRef, ref T dRef, nuint remainder) - { - switch (remainder) - { - case 7: - case 6: - case 5: - { - Debug.Assert(Vector128.IsHardwareAccelerated); - - Vector128 beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); - Vector128 end = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count)); - - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); - - break; - } - - case 4: - { - Debug.Assert(Vector128.IsHardwareAccelerated); - - Vector128 beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); - beg.StoreUnsafe(ref dRef); - - break; - } - - case 3: - { - Unsafe.Add(ref dRef, 2) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 2)); - goto case 2; - } - - case 2: - { - Unsafe.Add(ref dRef, 1) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 1)); - goto case 1; - } - - case 1: - { - dRef = TUnaryOperator.Invoke(xRef); - goto case 0; - } - - case 0: - { - break; - } - } - } - static void Vectorized512(ref T xRef, ref T dRef, nuint remainder) { ref T dRefBeg = ref dRef; @@ -3763,8 +3675,24 @@ static void Vectorized512(ref T xRef, ref T dRef, nuint remainder) } [MethodImpl(MethodImplOptions.AggressiveInlining)] - static void Vectorized512Small(ref T xRef, ref T dRef, nuint remainder) + static void VectorizedSmall(ref T xRef, ref T dRef, nuint remainder) + { + if (sizeof(T) == 4) + { + VectorizedSmall4(ref xRef, ref dRef, remainder); + } + else + { + Debug.Assert(sizeof(T) == 8); + VectorizedSmall8(ref xRef, ref dRef, remainder); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static void VectorizedSmall4(ref T xRef, ref T dRef, nuint remainder) { + Debug.Assert(sizeof(T) == 4); + switch (remainder) { case 15: @@ -3845,15 +3773,83 @@ static void Vectorized512Small(ref T xRef, ref T dRef, nuint remainder) } } } - } - /// - /// Performs an element-wise operation on and , - /// and writes the results to . - /// - /// The element type. - /// - /// Specifies the operation to perform on the pair-wise elements loaded from and . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static void VectorizedSmall8(ref T xRef, ref T dRef, nuint remainder) + { + Debug.Assert(sizeof(T) == 8); + + switch (remainder) + { + case 7: + case 6: + case 5: + { + Debug.Assert(Vector256.IsHardwareAccelerated); + + Vector256 beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); + Vector256 end = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count)); + + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); + + break; + } + + case 4: + { + Debug.Assert(Vector256.IsHardwareAccelerated); + + Vector256 beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); + beg.StoreUnsafe(ref dRef); + + break; + } + + case 3: + { + Debug.Assert(Vector128.IsHardwareAccelerated); + + Vector128 beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); + Vector128 end = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count)); + + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); + + break; + } + + case 2: + { + Debug.Assert(Vector128.IsHardwareAccelerated); + + Vector128 beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); + beg.StoreUnsafe(ref dRef); + + break; + } + + case 1: + { + dRef = TUnaryOperator.Invoke(xRef); + goto case 0; + } + + case 0: + { + break; + } + } + } + } + + /// + /// Performs an element-wise operation on and , + /// and writes the results to . + /// + /// The element type. + /// + /// Specifies the operation to perform on the pair-wise elements loaded from and . /// private static void InvokeSpanSpanIntoSpan( ReadOnlySpan x, ReadOnlySpan y, Span destination) @@ -3895,7 +3891,7 @@ private static void InvokeSpanSpanIntoSpan( // efficiently, we simply have a small jump table and fallthrough. So we get a simple // length check, single jump, and then linear execution. - Vectorized512Small(ref xRef, ref yRef, ref dRef, remainder); + VectorizedSmall(ref xRef, ref yRef, ref dRef, remainder); } return; @@ -3913,7 +3909,7 @@ private static void InvokeSpanSpanIntoSpan( // efficiently, we simply have a small jump table and fallthrough. So we get a simple // length check, single jump, and then linear execution. - Vectorized256Small(ref xRef, ref yRef, ref dRef, remainder); + VectorizedSmall(ref xRef, ref yRef, ref dRef, remainder); } return; @@ -3931,7 +3927,7 @@ private static void InvokeSpanSpanIntoSpan( // efficiently, we simply have a small jump table and fallthrough. So we get a simple // length check, single jump, and then linear execution. - Vectorized128Small(ref xRef, ref yRef, ref dRef, remainder); + VectorizedSmall(ref xRef, ref yRef, ref dRef, remainder); } return; @@ -4194,38 +4190,6 @@ static void Vectorized128(ref T xRef, ref T yRef, ref T dRef, nuint remainder) } } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static void Vectorized128Small(ref T xRef, ref T yRef, ref T dRef, nuint remainder) - { - switch (remainder) - { - case 3: - { - Unsafe.Add(ref dRef, 2) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 2), - Unsafe.Add(ref yRef, 2)); - goto case 2; - } - - case 2: - { - Unsafe.Add(ref dRef, 1) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 1), - Unsafe.Add(ref yRef, 1)); - goto case 1; - } - - case 1: - { - dRef = TBinaryOperator.Invoke(xRef, yRef); - goto case 0; - } - - case 0: - { - break; - } - } - } - static void Vectorized256(ref T xRef, ref T yRef, ref T dRef, nuint remainder) { ref T dRefBeg = ref dRef; @@ -4468,66 +4432,6 @@ static void Vectorized256(ref T xRef, ref T yRef, ref T dRef, nuint remainder) } } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static void Vectorized256Small(ref T xRef, ref T yRef, ref T dRef, nuint remainder) - { - switch (remainder) - { - case 7: - case 6: - case 5: - { - Debug.Assert(Vector128.IsHardwareAccelerated); - - Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef)); - Vector128 end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count)); - - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); - - break; - } - - case 4: - { - Debug.Assert(Vector128.IsHardwareAccelerated); - - Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef)); - beg.StoreUnsafe(ref dRef); - - break; - } - - case 3: - { - Unsafe.Add(ref dRef, 2) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 2), - Unsafe.Add(ref yRef, 2)); - goto case 2; - } - - case 2: - { - Unsafe.Add(ref dRef, 1) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 1), - Unsafe.Add(ref yRef, 1)); - goto case 1; - } - - case 1: - { - dRef = TBinaryOperator.Invoke(xRef, yRef); - goto case 0; - } - - case 0: - { - break; - } - } - } - static void Vectorized512(ref T xRef, ref T yRef, ref T dRef, nuint remainder) { ref T dRefBeg = ref dRef; @@ -4771,8 +4675,24 @@ static void Vectorized512(ref T xRef, ref T yRef, ref T dRef, nuint remainder) } [MethodImpl(MethodImplOptions.AggressiveInlining)] - static void Vectorized512Small(ref T xRef, ref T yRef, ref T dRef, nuint remainder) + static void VectorizedSmall(ref T xRef, ref T yRef, ref T dRef, nuint remainder) + { + if (sizeof(T) == 4) + { + VectorizedSmall4(ref xRef, ref yRef, ref dRef, remainder); + } + else + { + Debug.Assert(sizeof(T) == 8); + VectorizedSmall8(ref xRef, ref yRef, ref dRef, remainder); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static void VectorizedSmall4(ref T xRef, ref T yRef, ref T dRef, nuint remainder) { + Debug.Assert(sizeof(T) == 4); + switch (remainder) { case 15: @@ -4861,6 +4781,80 @@ static void Vectorized512Small(ref T xRef, ref T yRef, ref T dRef, nuint remaind } } } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static void VectorizedSmall8(ref T xRef, ref T yRef, ref T dRef, nuint remainder) + { + Debug.Assert(sizeof(T) == 8); + + switch (remainder) + { + case 7: + case 6: + case 5: + { + Debug.Assert(Vector256.IsHardwareAccelerated); + + Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef)); + Vector256 end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count)); + + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); + + break; + } + + case 4: + { + Debug.Assert(Vector256.IsHardwareAccelerated); + + Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef)); + beg.StoreUnsafe(ref dRef); + + break; + } + + case 3: + { + Debug.Assert(Vector128.IsHardwareAccelerated); + + Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef)); + Vector128 end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count)); + + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); + + break; + } + + case 2: + { + Debug.Assert(Vector128.IsHardwareAccelerated); + + Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef)); + beg.StoreUnsafe(ref dRef); + + break; + } + + case 1: + { + dRef = TBinaryOperator.Invoke(xRef, yRef); + goto case 0; + } + + case 0: + { + break; + } + } + } } /// @@ -4922,7 +4916,7 @@ private static void InvokeSpanScalarIntoSpan yVec = Vector128.Create(y); - - Vector128 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)), - yVec); - Vector128 end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count)), - yVec); - - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); - - break; - } - - case 4: - { - Debug.Assert(Vector128.IsHardwareAccelerated); - - Vector128 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)), - Vector128.Create(y)); - beg.StoreUnsafe(ref dRef); - - break; - } - - case 3: - { - Unsafe.Add(ref dRef, 2) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 2)), - y); - goto case 2; - } - - case 2: - { - Unsafe.Add(ref dRef, 1) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 1)), - y); - goto case 1; - } - - case 1: - { - dRef = TBinaryOperator.Invoke(TTransformOperator.Invoke(xRef), y); - goto case 0; - } - - case 0: - { - break; - } - } - } - static void Vectorized512(ref T xRef, T y, ref T dRef, nuint remainder) { ref T dRefBeg = ref dRef; @@ -5747,58 +5647,168 @@ static void Vectorized512(ref T xRef, T y, ref T dRef, nuint remainder) goto case 4; } - case 4: - { - Vector512 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 4)); - goto case 3; + case 4: + { + Vector512 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 4)); + goto case 3; + } + + case 3: + { + Vector512 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 3)); + goto case 2; + } + + case 2: + { + Vector512 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 2)); + goto case 1; + } + + case 1: + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512.Count); + goto case 0; + } + + case 0: + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static void VectorizedSmall(ref T xRef, T y, ref T dRef, nuint remainder) + { + if (sizeof(T) == 4) + { + VectorizedSmall4(ref xRef, y, ref dRef, remainder); + } + else + { + Debug.Assert(sizeof(T) == 8); + VectorizedSmall8(ref xRef, y, ref dRef, remainder); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static void VectorizedSmall4(ref T xRef, T y, ref T dRef, nuint remainder) + { + Debug.Assert(sizeof(T) == 4); + + switch (remainder) + { + case 15: + case 14: + case 13: + case 12: + case 11: + case 10: + case 9: + { + Debug.Assert(Vector256.IsHardwareAccelerated); + + Vector256 yVec = Vector256.Create(y); + + Vector256 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)), + yVec); + Vector256 end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count)), + yVec); + + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); + + break; + } + + case 8: + { + Debug.Assert(Vector256.IsHardwareAccelerated); + + Vector256 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)), + Vector256.Create(y)); + beg.StoreUnsafe(ref dRef); + + break; + } + + case 7: + case 6: + case 5: + { + Debug.Assert(Vector128.IsHardwareAccelerated); + + Vector128 yVec = Vector128.Create(y); + + Vector128 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)), + yVec); + Vector128 end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count)), + yVec); + + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); + + break; + } + + case 4: + { + Debug.Assert(Vector128.IsHardwareAccelerated); + + Vector128 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)), + Vector128.Create(y)); + beg.StoreUnsafe(ref dRef); + + break; } case 3: { - Vector512 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 3)); + Unsafe.Add(ref dRef, 2) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 2)), + y); goto case 2; } case 2: { - Vector512 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 2)); + Unsafe.Add(ref dRef, 1) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 1)), + y); goto case 1; } case 1: { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512.Count); + dRef = TBinaryOperator.Invoke(TTransformOperator.Invoke(xRef), y); goto case 0; } case 0: { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); break; } } } [MethodImpl(MethodImplOptions.AggressiveInlining)] - static void Vectorized512Small(ref T xRef, T y, ref T dRef, nuint remainder) + static void VectorizedSmall8(ref T xRef, T y, ref T dRef, nuint remainder) { + Debug.Assert(sizeof(T) == 8); + switch (remainder) { - case 15: - case 14: - case 13: - case 12: - case 11: - case 10: - case 9: + case 7: + case 6: + case 5: { Debug.Assert(Vector256.IsHardwareAccelerated); @@ -5815,7 +5825,7 @@ static void Vectorized512Small(ref T xRef, T y, ref T dRef, nuint remainder) break; } - case 8: + case 4: { Debug.Assert(Vector256.IsHardwareAccelerated); @@ -5826,9 +5836,7 @@ static void Vectorized512Small(ref T xRef, T y, ref T dRef, nuint remainder) break; } - case 7: - case 6: - case 5: + case 3: { Debug.Assert(Vector128.IsHardwareAccelerated); @@ -5845,7 +5853,7 @@ static void Vectorized512Small(ref T xRef, T y, ref T dRef, nuint remainder) break; } - case 4: + case 2: { Debug.Assert(Vector128.IsHardwareAccelerated); @@ -5856,20 +5864,6 @@ static void Vectorized512Small(ref T xRef, T y, ref T dRef, nuint remainder) break; } - case 3: - { - Unsafe.Add(ref dRef, 2) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 2)), - y); - goto case 2; - } - - case 2: - { - Unsafe.Add(ref dRef, 1) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 1)), - y); - goto case 1; - } - case 1: { dRef = TBinaryOperator.Invoke(TTransformOperator.Invoke(xRef), y); @@ -5935,7 +5929,7 @@ private static void InvokeSpanSpanSpanIntoSpan( // efficiently, we simply have a small jump table and fallthrough. So we get a simple // length check, single jump, and then linear execution. - Vectorized512Small(ref xRef, ref yRef, ref zRef, ref dRef, remainder); + VectorizedSmall(ref xRef, ref yRef, ref zRef, ref dRef, remainder); } return; @@ -5953,7 +5947,7 @@ private static void InvokeSpanSpanSpanIntoSpan( // efficiently, we simply have a small jump table and fallthrough. So we get a simple // length check, single jump, and then linear execution. - Vectorized256Small(ref xRef, ref yRef, ref zRef, ref dRef, remainder); + VectorizedSmall(ref xRef, ref yRef, ref zRef, ref dRef, remainder); } return; @@ -5971,7 +5965,7 @@ private static void InvokeSpanSpanSpanIntoSpan( // efficiently, we simply have a small jump table and fallthrough. So we get a simple // length check, single jump, and then linear execution. - Vectorized128Small(ref xRef, ref yRef, ref zRef, ref dRef, remainder); + VectorizedSmall(ref xRef, ref yRef, ref zRef, ref dRef, remainder); } return; @@ -6266,40 +6260,6 @@ static void Vectorized128(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nuint } } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static void Vectorized128Small(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nuint remainder) - { - switch (remainder) - { - case 3: - { - Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2), - Unsafe.Add(ref yRef, 2), - Unsafe.Add(ref zRef, 2)); - goto case 2; - } - - case 2: - { - Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1), - Unsafe.Add(ref yRef, 1), - Unsafe.Add(ref zRef, 1)); - goto case 1; - } - - case 1: - { - dRef = TTernaryOperator.Invoke(xRef, yRef, zRef); - goto case 0; - } - - case 0: - { - break; - } - } - } - static void Vectorized256(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nuint remainder) { ref T dRefBeg = ref dRef; @@ -6573,71 +6533,6 @@ static void Vectorized256(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nuint } } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static void Vectorized256Small(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nuint remainder) - { - switch (remainder) - { - case 7: - case 6: - case 5: - { - Debug.Assert(Vector128.IsHardwareAccelerated); - - Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef), - Vector128.LoadUnsafe(ref zRef)); - Vector128 end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count), - Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128.Count)); - - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); - - break; - } - - case 4: - { - Debug.Assert(Vector128.IsHardwareAccelerated); - - Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef), - Vector128.LoadUnsafe(ref zRef)); - beg.StoreUnsafe(ref dRef); - - break; - } - - case 3: - { - Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2), - Unsafe.Add(ref yRef, 2), - Unsafe.Add(ref zRef, 2)); - goto case 2; - } - - case 2: - { - Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1), - Unsafe.Add(ref yRef, 1), - Unsafe.Add(ref zRef, 1)); - goto case 1; - } - - case 1: - { - dRef = TTernaryOperator.Invoke(xRef, yRef, zRef); - goto case 0; - } - - case 0: - { - break; - } - } - } - static void Vectorized512(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nuint remainder) { ref T dRefBeg = ref dRef; @@ -6897,32 +6792,146 @@ static void Vectorized512(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nuint case 1: { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512.Count); + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512.Count); + goto case 0; + } + + case 0: + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static void VectorizedSmall(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nuint remainder) + { + if (sizeof(T) == 4) + { + VectorizedSmall4(ref xRef, ref yRef, ref zRef, ref dRef, remainder); + } + else + { + Debug.Assert(sizeof(T) == 8); + VectorizedSmall8(ref xRef, ref yRef, ref zRef, ref dRef, remainder); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static void VectorizedSmall4(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nuint remainder) + { + Debug.Assert(sizeof(T) == 4); + + switch (remainder) + { + case 15: + case 14: + case 13: + case 12: + case 11: + case 10: + case 9: + { + Debug.Assert(Vector256.IsHardwareAccelerated); + + Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef), + Vector256.LoadUnsafe(ref zRef)); + Vector256 end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count), + Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256.Count)); + + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); + + break; + } + + case 8: + { + Debug.Assert(Vector256.IsHardwareAccelerated); + + Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef), + Vector256.LoadUnsafe(ref zRef)); + beg.StoreUnsafe(ref dRef); + + break; + } + + case 7: + case 6: + case 5: + { + Debug.Assert(Vector128.IsHardwareAccelerated); + + Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef), + Vector128.LoadUnsafe(ref zRef)); + Vector128 end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count), + Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128.Count)); + + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); + + break; + } + + case 4: + { + Debug.Assert(Vector128.IsHardwareAccelerated); + + Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef), + Vector128.LoadUnsafe(ref zRef)); + beg.StoreUnsafe(ref dRef); + + break; + } + + case 3: + { + Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2), + Unsafe.Add(ref yRef, 2), + Unsafe.Add(ref zRef, 2)); + goto case 2; + } + + case 2: + { + Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1), + Unsafe.Add(ref yRef, 1), + Unsafe.Add(ref zRef, 1)); + goto case 1; + } + + case 1: + { + dRef = TTernaryOperator.Invoke(xRef, yRef, zRef); goto case 0; } case 0: { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); break; } } } [MethodImpl(MethodImplOptions.AggressiveInlining)] - static void Vectorized512Small(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nuint remainder) + static void VectorizedSmall8(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nuint remainder) { + Debug.Assert(sizeof(T) == 8); + switch (remainder) { - case 15: - case 14: - case 13: - case 12: - case 11: - case 10: - case 9: + case 7: + case 6: + case 5: { Debug.Assert(Vector256.IsHardwareAccelerated); @@ -6939,7 +6948,7 @@ static void Vectorized512Small(ref T xRef, ref T yRef, ref T zRef, ref T dRef, n break; } - case 8: + case 4: { Debug.Assert(Vector256.IsHardwareAccelerated); @@ -6951,9 +6960,7 @@ static void Vectorized512Small(ref T xRef, ref T yRef, ref T zRef, ref T dRef, n break; } - case 7: - case 6: - case 5: + case 3: { Debug.Assert(Vector128.IsHardwareAccelerated); @@ -6970,7 +6977,7 @@ static void Vectorized512Small(ref T xRef, ref T yRef, ref T zRef, ref T dRef, n break; } - case 4: + case 2: { Debug.Assert(Vector128.IsHardwareAccelerated); @@ -6982,22 +6989,6 @@ static void Vectorized512Small(ref T xRef, ref T yRef, ref T zRef, ref T dRef, n break; } - case 3: - { - Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2), - Unsafe.Add(ref yRef, 2), - Unsafe.Add(ref zRef, 2)); - goto case 2; - } - - case 2: - { - Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1), - Unsafe.Add(ref yRef, 1), - Unsafe.Add(ref zRef, 1)); - goto case 1; - } - case 1: { dRef = TTernaryOperator.Invoke(xRef, yRef, zRef); @@ -7061,7 +7052,7 @@ private static void InvokeSpanSpanScalarIntoSpan( // efficiently, we simply have a small jump table and fallthrough. So we get a simple // length check, single jump, and then linear execution. - Vectorized512Small(ref xRef, ref yRef, z, ref dRef, remainder); + VectorizedSmall(ref xRef, ref yRef, z, ref dRef, remainder); } return; @@ -7079,7 +7070,7 @@ private static void InvokeSpanSpanScalarIntoSpan( // efficiently, we simply have a small jump table and fallthrough. So we get a simple // length check, single jump, and then linear execution. - Vectorized256Small(ref xRef, ref yRef, z, ref dRef, remainder); + VectorizedSmall(ref xRef, ref yRef, z, ref dRef, remainder); } return; @@ -7097,7 +7088,7 @@ private static void InvokeSpanSpanScalarIntoSpan( // efficiently, we simply have a small jump table and fallthrough. So we get a simple // length check, single jump, and then linear execution. - Vectorized128Small(ref xRef, ref yRef, z, ref dRef, remainder); + VectorizedSmall(ref xRef, ref yRef, z, ref dRef, remainder); } return; @@ -7388,40 +7379,6 @@ static void Vectorized128(ref T xRef, ref T yRef, T z, ref T dRef, nuint remaind } } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static void Vectorized128Small(ref T xRef, ref T yRef, T z, ref T dRef, nuint remainder) - { - switch (remainder) - { - case 3: - { - Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2), - Unsafe.Add(ref yRef, 2), - z); - goto case 2; - } - - case 2: - { - Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1), - Unsafe.Add(ref yRef, 1), - z); - goto case 1; - } - - case 1: - { - dRef = TTernaryOperator.Invoke(xRef, yRef, z); - goto case 0; - } - - case 0: - { - break; - } - } - } - static void Vectorized256(ref T xRef, ref T yRef, T z, ref T dRef, nuint remainder) { ref T dRefBeg = ref dRef; @@ -7691,73 +7648,6 @@ static void Vectorized256(ref T xRef, ref T yRef, T z, ref T dRef, nuint remaind } } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static void Vectorized256Small(ref T xRef, ref T yRef, T z, ref T dRef, nuint remainder) - { - switch (remainder) - { - case 7: - case 6: - case 5: - { - Debug.Assert(Vector128.IsHardwareAccelerated); - - Vector128 zVec = Vector128.Create(z); - - Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef), - zVec); - Vector128 end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count), - zVec); - - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); - - break; - } - - case 4: - { - Debug.Assert(Vector128.IsHardwareAccelerated); - - Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef), - Vector128.Create(z)); - beg.StoreUnsafe(ref dRef); - - break; - } - - case 3: - { - Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2), - Unsafe.Add(ref yRef, 2), - z); - goto case 2; - } - - case 2: - { - Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1), - Unsafe.Add(ref yRef, 1), - z); - goto case 1; - } - - case 1: - { - dRef = TTernaryOperator.Invoke(xRef, yRef, z); - goto case 0; - } - - case 0: - { - break; - } - } - } - static void Vectorized512(ref T xRef, ref T yRef, T z, ref T dRef, nuint remainder) { ref T dRefBeg = ref dRef; @@ -7984,61 +7874,179 @@ static void Vectorized512(ref T xRef, ref T yRef, T z, ref T dRef, nuint remaind goto case 4; } - case 4: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 4)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 4)); - goto case 3; + case 4: + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 4)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 4)); + goto case 3; + } + + case 3: + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 3)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 3)); + goto case 2; + } + + case 2: + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 2)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 2)); + goto case 1; + } + + case 1: + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512.Count); + goto case 0; + } + + case 0: + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static void VectorizedSmall(ref T xRef, ref T yRef, T z, ref T dRef, nuint remainder) + { + if (sizeof(T) == 4) + { + VectorizedSmall4(ref xRef, ref yRef, z, ref dRef, remainder); + } + else + { + Debug.Assert(sizeof(T) == 8); + VectorizedSmall8(ref xRef, ref yRef, z, ref dRef, remainder); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static void VectorizedSmall4(ref T xRef, ref T yRef, T z, ref T dRef, nuint remainder) + { + Debug.Assert(sizeof(T) == 4); + + switch (remainder) + { + case 15: + case 14: + case 13: + case 12: + case 11: + case 10: + case 9: + { + Debug.Assert(Vector256.IsHardwareAccelerated); + + Vector256 zVec = Vector256.Create(z); + + Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef), + zVec); + Vector256 end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count), + zVec); + + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); + + break; + } + + case 8: + { + Debug.Assert(Vector256.IsHardwareAccelerated); + + Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef), + Vector256.Create(z)); + beg.StoreUnsafe(ref dRef); + + break; + } + + case 7: + case 6: + case 5: + { + Debug.Assert(Vector128.IsHardwareAccelerated); + + Vector128 zVec = Vector128.Create(z); + + Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef), + zVec); + Vector128 end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count), + zVec); + + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); + + break; + } + + case 4: + { + Debug.Assert(Vector128.IsHardwareAccelerated); + + Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef), + Vector128.Create(z)); + beg.StoreUnsafe(ref dRef); + + break; } case 3: { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 3)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 3)); + Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2), + Unsafe.Add(ref yRef, 2), + z); goto case 2; } case 2: { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 2)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 2)); + Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1), + Unsafe.Add(ref yRef, 1), + z); goto case 1; } case 1: { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512.Count); + dRef = TTernaryOperator.Invoke(xRef, yRef, z); goto case 0; } case 0: { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); break; } } } [MethodImpl(MethodImplOptions.AggressiveInlining)] - static void Vectorized512Small(ref T xRef, ref T yRef, T z, ref T dRef, nuint remainder) + static void VectorizedSmall8(ref T xRef, ref T yRef, T z, ref T dRef, nuint remainder) { + Debug.Assert(sizeof(T) == 8); + switch (remainder) { - case 15: - case 14: - case 13: - case 12: - case 11: - case 10: - case 9: + case 7: + case 6: + case 5: { Debug.Assert(Vector256.IsHardwareAccelerated); @@ -8057,7 +8065,7 @@ static void Vectorized512Small(ref T xRef, ref T yRef, T z, ref T dRef, nuint re break; } - case 8: + case 4: { Debug.Assert(Vector256.IsHardwareAccelerated); @@ -8069,9 +8077,7 @@ static void Vectorized512Small(ref T xRef, ref T yRef, T z, ref T dRef, nuint re break; } - case 7: - case 6: - case 5: + case 3: { Debug.Assert(Vector128.IsHardwareAccelerated); @@ -8090,7 +8096,7 @@ static void Vectorized512Small(ref T xRef, ref T yRef, T z, ref T dRef, nuint re break; } - case 4: + case 2: { Debug.Assert(Vector128.IsHardwareAccelerated); @@ -8102,22 +8108,6 @@ static void Vectorized512Small(ref T xRef, ref T yRef, T z, ref T dRef, nuint re break; } - case 3: - { - Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2), - Unsafe.Add(ref yRef, 2), - z); - goto case 2; - } - - case 2: - { - Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1), - Unsafe.Add(ref yRef, 1), - z); - goto case 1; - } - case 1: { dRef = TTernaryOperator.Invoke(xRef, yRef, z); @@ -8181,7 +8171,7 @@ private static void InvokeSpanScalarSpanIntoSpan( // efficiently, we simply have a small jump table and fallthrough. So we get a simple // length check, single jump, and then linear execution. - Vectorized512Small(ref xRef, y, ref zRef, ref dRef, remainder); + VectorizedSmall(ref xRef, y, ref zRef, ref dRef, remainder); } return; @@ -8199,7 +8189,7 @@ private static void InvokeSpanScalarSpanIntoSpan( // efficiently, we simply have a small jump table and fallthrough. So we get a simple // length check, single jump, and then linear execution. - Vectorized256Small(ref xRef, y, ref zRef, ref dRef, remainder); + VectorizedSmall(ref xRef, y, ref zRef, ref dRef, remainder); } return; @@ -8217,7 +8207,7 @@ private static void InvokeSpanScalarSpanIntoSpan( // efficiently, we simply have a small jump table and fallthrough. So we get a simple // length check, single jump, and then linear execution. - Vectorized128Small(ref xRef, y, ref zRef, ref dRef, remainder); + VectorizedSmall(ref xRef, y, ref zRef, ref dRef, remainder); } return; @@ -8508,40 +8498,6 @@ static void Vectorized128(ref T xRef, T y, ref T zRef, ref T dRef, nuint remaind } } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static void Vectorized128Small(ref T xRef, T y, ref T zRef, ref T dRef, nuint remainder) - { - switch (remainder) - { - case 3: - { - Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2), - y, - Unsafe.Add(ref zRef, 2)); - goto case 2; - } - - case 2: - { - Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1), - y, - Unsafe.Add(ref zRef, 1)); - goto case 1; - } - - case 1: - { - dRef = TTernaryOperator.Invoke(xRef, y, zRef); - goto case 0; - } - - case 0: - { - break; - } - } - } - static void Vectorized256(ref T xRef, T y, ref T zRef, ref T dRef, nuint remainder) { ref T dRefBeg = ref dRef; @@ -8811,73 +8767,6 @@ static void Vectorized256(ref T xRef, T y, ref T zRef, ref T dRef, nuint remaind } } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static void Vectorized256Small(ref T xRef, T y, ref T zRef, ref T dRef, nuint remainder) - { - switch (remainder) - { - case 7: - case 6: - case 5: - { - Debug.Assert(Vector128.IsHardwareAccelerated); - - Vector128 yVec = Vector128.Create(y); - - Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - yVec, - Vector128.LoadUnsafe(ref zRef)); - Vector128 end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), - yVec, - Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128.Count)); - - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); - - break; - } - - case 4: - { - Debug.Assert(Vector128.IsHardwareAccelerated); - - Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.Create(y), - Vector128.LoadUnsafe(ref zRef)); - beg.StoreUnsafe(ref dRef); - - break; - } - - case 3: - { - Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2), - y, - Unsafe.Add(ref zRef, 2)); - goto case 2; - } - - case 2: - { - Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1), - y, - Unsafe.Add(ref zRef, 1)); - goto case 1; - } - - case 1: - { - dRef = TTernaryOperator.Invoke(xRef, y, zRef); - goto case 0; - } - - case 0: - { - break; - } - } - } - static void Vectorized512(ref T xRef, T y, ref T zRef, ref T dRef, nuint remainder) { ref T dRefBeg = ref dRef; @@ -9148,8 +9037,24 @@ static void Vectorized512(ref T xRef, T y, ref T zRef, ref T dRef, nuint remaind } [MethodImpl(MethodImplOptions.AggressiveInlining)] - static void Vectorized512Small(ref T xRef, T y, ref T zRef, ref T dRef, nuint remainder) + static void VectorizedSmall(ref T xRef, T y, ref T zRef, ref T dRef, nuint remainder) + { + if (sizeof(T) == 4) + { + VectorizedSmall4(ref xRef, y, ref zRef, ref dRef, remainder); + } + else + { + Debug.Assert(sizeof(T) == 8); + VectorizedSmall8(ref xRef, y, ref zRef, ref dRef, remainder); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static void VectorizedSmall4(ref T xRef, T y, ref T zRef, ref T dRef, nuint remainder) { + Debug.Assert(sizeof(T) == 4); + switch (remainder) { case 15: @@ -9250,6 +9155,90 @@ static void Vectorized512Small(ref T xRef, T y, ref T zRef, ref T dRef, nuint re } } } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static void VectorizedSmall8(ref T xRef, T y, ref T zRef, ref T dRef, nuint remainder) + { + Debug.Assert(sizeof(T) == 8); + + switch (remainder) + { + case 7: + case 6: + case 5: + { + Debug.Assert(Vector256.IsHardwareAccelerated); + + Vector256 yVec = Vector256.Create(y); + + Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + yVec, + Vector256.LoadUnsafe(ref zRef)); + Vector256 end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), + yVec, + Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256.Count)); + + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); + + break; + } + + case 4: + { + Debug.Assert(Vector256.IsHardwareAccelerated); + + Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.Create(y), + Vector256.LoadUnsafe(ref zRef)); + beg.StoreUnsafe(ref dRef); + + break; + } + + case 3: + { + Debug.Assert(Vector128.IsHardwareAccelerated); + + Vector128 yVec = Vector128.Create(y); + + Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + yVec, + Vector128.LoadUnsafe(ref zRef)); + Vector128 end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), + yVec, + Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128.Count)); + + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); + + break; + } + + case 2: + { + Debug.Assert(Vector128.IsHardwareAccelerated); + + Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.Create(y), + Vector128.LoadUnsafe(ref zRef)); + beg.StoreUnsafe(ref dRef); + + break; + } + + case 1: + { + dRef = TTernaryOperator.Invoke(xRef, y, zRef); + goto case 0; + } + + case 0: + { + break; + } + } + } } /// Performs (x * y) + z. It will be rounded as one ternary operation if such an operation is accelerated on the current hardware.