From 73cf30dd4a00a1ee7b12a82a2cff99b1870a3c4e Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Fri, 5 Aug 2022 14:58:40 +0200 Subject: [PATCH] port SpanHelpers.IndexOfAny(ref byte, byte, byte, int) to Vector128/256 (#73384) --- .../src/System/SpanHelpers.Byte.cs | 97 ++++--------------- 1 file changed, 21 insertions(+), 76 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index f46371ebac60..9af0e6ac14b2 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -781,7 +781,7 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, int nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations nuint lengthToExamine = (nuint)(uint)length; - if (Sse2.IsSupported || AdvSimd.Arm64.IsSupported) + if (Vector128.IsHardwareAccelerated) { // Avx2 branch also operates on Sse2 sizes, so check is combined. nint vectorDiff = (nint)length - Vector128.Count; @@ -897,10 +897,10 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, int // the end and forwards, which may overlap on an earlier compare. // We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported. - if (Sse2.IsSupported) + if (Vector128.IsHardwareAccelerated) { - int matches; - if (Avx2.IsSupported) + uint matches; + if (Vector256.IsHardwareAccelerated) { Vector256 search; // Guard as we may only have a valid size for Vector128; when we will move to the Sse2 @@ -916,13 +916,10 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, int // First time this checks again against 0, however we will move into final compare if it fails. while (lengthToExamine > offset) { - search = LoadVector256(ref searchSpace, offset); + search = Vector256.LoadUnsafe(ref searchSpace, offset); // Bitwise Or to combine the flagged matches for the second value to our match flags - matches = Avx2.MoveMask( - Avx2.Or( - Avx2.CompareEqual(values0, search), - Avx2.CompareEqual(values1, search))); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, + matches = (Vector256.Equals(values0, search) | Vector256.Equals(values1, search)).ExtractMostSignificantBits(); + // Note that ExtractMostSignificantBits has converted the equal vector elements into a set of bit flags, // So the bit position in 'matches' corresponds to the element offset. if (matches == 0) { @@ -935,13 +932,10 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, int } // Move to Vector length from end for final compare - search = LoadVector256(ref searchSpace, lengthToExamine); + search = Vector256.LoadUnsafe(ref searchSpace, lengthToExamine); offset = lengthToExamine; // Same as method as above - matches = Avx2.MoveMask( - Avx2.Or( - Avx2.CompareEqual(values0, search), - Avx2.CompareEqual(values1, search))); + matches = (Vector256.Equals(values0, search) | Vector256.Equals(values1, search)).ExtractMostSignificantBits(); if (matches == 0) { // None matched @@ -953,6 +947,7 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, int } // Initial size check was done on method entry. + Vector128 compareResult; Debug.Assert(length >= Vector128.Count); { Vector128 search; @@ -961,37 +956,33 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, int // First time this checks against 0 and we will move into final compare if it fails. while (lengthToExamine > offset) { - search = LoadVector128(ref searchSpace, offset); + search = Vector128.LoadUnsafe(ref searchSpace, offset); - matches = Sse2.MoveMask( - Sse2.Or( - Sse2.CompareEqual(values0, search), - Sse2.CompareEqual(values1, search)) - .AsByte()); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) + compareResult = Vector128.Equals(values0, search) | Vector128.Equals(values1, search); + + if (compareResult == Vector128.Zero) { // None matched offset += (nuint)Vector128.Count; continue; } + matches = compareResult.ExtractMostSignificantBits(); goto IntrinsicsMatch; } // Move to Vector length from end for final compare - search = LoadVector128(ref searchSpace, lengthToExamine); + search = Vector128.LoadUnsafe(ref searchSpace, lengthToExamine); offset = lengthToExamine; // Same as method as above - matches = Sse2.MoveMask( - Sse2.Or( - Sse2.CompareEqual(values0, search), - Sse2.CompareEqual(values1, search))); - if (matches == 0) + compareResult = Vector128.Equals(values0, search) | Vector128.Equals(values1, search); + + if (compareResult == Vector128.Zero) { // None matched goto NotFound; } + + matches = compareResult.ExtractMostSignificantBits(); } IntrinsicsMatch: @@ -999,52 +990,6 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, int offset += (nuint)BitOperations.TrailingZeroCount(matches); goto Found; } - else if (AdvSimd.Arm64.IsSupported) - { - Vector128 search; - Vector128 matches; - Vector128 values0 = Vector128.Create(value0); - Vector128 values1 = Vector128.Create(value1); - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector128(ref searchSpace, offset); - - matches = AdvSimd.Or( - AdvSimd.CompareEqual(values0, search), - AdvSimd.CompareEqual(values1, search)); - - if (matches == Vector128.Zero) - { - offset += (nuint)Vector128.Count; - continue; - } - - // Find bitflag offset of first match and add to current offset - offset += FindFirstMatchedLane(matches); - - goto Found; - } - - // Move to Vector length from end for final compare - search = LoadVector128(ref searchSpace, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = AdvSimd.Or( - AdvSimd.CompareEqual(values0, search), - AdvSimd.CompareEqual(values1, search)); - - if (matches == Vector128.Zero) - { - // None matched - goto NotFound; - } - - // Find bitflag offset of first match and add to current offset - offset += FindFirstMatchedLane(matches); - - goto Found; - } else if (Vector.IsHardwareAccelerated) { Vector values0 = new Vector(value0);