diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs index 39d0baf753f962..d568696836d768 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs @@ -1188,7 +1188,21 @@ public string Replace(char oldChar, char newChar) // process the remaining elements vectorized too. // Thus we adjust the pointers so that at least one full vector from the end can be processed. nuint length = (uint)Length; - if (Vector128.IsHardwareAccelerated && length >= (uint)Vector128.Count) + if (Vector512.IsHardwareAccelerated && length >= (uint)Vector512.Count) + { + nuint adjust = (length - remainingLength) & ((uint)Vector512.Count - 1); + pSrc = ref Unsafe.Subtract(ref pSrc, adjust); + pDst = ref Unsafe.Subtract(ref pDst, adjust); + remainingLength += adjust; + } + else if (Vector256.IsHardwareAccelerated && length >= (uint)Vector256.Count) + { + nuint adjust = (length - remainingLength) & ((uint)Vector256.Count - 1); + pSrc = ref Unsafe.Subtract(ref pSrc, adjust); + pDst = ref Unsafe.Subtract(ref pDst, adjust); + remainingLength += adjust; + } + else if (Vector128.IsHardwareAccelerated && length >= (uint)Vector128.Count) { nuint adjust = (length - remainingLength) & ((uint)Vector128.Count - 1); pSrc = ref Unsafe.Subtract(ref pSrc, adjust); @@ -1905,40 +1919,98 @@ private static void MakeSeparatorListVectorized(ReadOnlySpan sourceSpan, r { throw new PlatformNotSupportedException(); } - Debug.Assert(sourceSpan.Length >= Vector128.Count); - - nuint offset = 0; nuint lengthToExamine = (uint)sourceSpan.Length; - + nuint offset = 0; ref char source = ref MemoryMarshal.GetReference(sourceSpan); - Vector128 v1 = Vector128.Create((ushort)c); - Vector128 v2 = Vector128.Create((ushort)c2); - Vector128 v3 = Vector128.Create((ushort)c3); + if (Vector512.IsHardwareAccelerated && lengthToExamine >= (uint)Vector512.Count*2) + { + Vector512 v1 = Vector512.Create((ushort)c); + Vector512 v2 = Vector512.Create((ushort)c2); + Vector512 v3 = Vector512.Create((ushort)c3); + + do + { + Vector512 vector = Vector512.LoadUnsafe(ref source, offset); + Vector512 v1Eq = Vector512.Equals(vector, v1); + Vector512 v2Eq = Vector512.Equals(vector, v2); + Vector512 v3Eq = Vector512.Equals(vector, v3); + Vector512 cmp = (v1Eq | v2Eq | v3Eq).AsByte(); - do + if (cmp != Vector512.Zero) + { + // Skip every other bit + ulong mask = cmp.ExtractMostSignificantBits() & 0x5555555555555555; + do + { + uint bitPos = (uint)BitOperations.TrailingZeroCount(mask) / sizeof(char); + sepListBuilder.Append((int)(offset + bitPos)); + mask = BitOperations.ResetLowestSetBit(mask); + } while (mask != 0); + } + + offset += (nuint)Vector512.Count; + } while (offset <= lengthToExamine - (nuint)Vector512.Count); + } + else if (Vector256.IsHardwareAccelerated && lengthToExamine >= (uint)Vector256.Count*2) { - Vector128 vector = Vector128.LoadUnsafe(ref source, offset); - Vector128 v1Eq = Vector128.Equals(vector, v1); - Vector128 v2Eq = Vector128.Equals(vector, v2); - Vector128 v3Eq = Vector128.Equals(vector, v3); - Vector128 cmp = (v1Eq | v2Eq | v3Eq).AsByte(); + Vector256 v1 = Vector256.Create((ushort)c); + Vector256 v2 = Vector256.Create((ushort)c2); + Vector256 v3 = Vector256.Create((ushort)c3); - if (cmp != Vector128.Zero) + do { - // Skip every other bit - uint mask = cmp.ExtractMostSignificantBits() & 0x5555; - do + Vector256 vector = Vector256.LoadUnsafe(ref source, offset); + Vector256 v1Eq = Vector256.Equals(vector, v1); + Vector256 v2Eq = Vector256.Equals(vector, v2); + Vector256 v3Eq = Vector256.Equals(vector, v3); + Vector256 cmp = (v1Eq | v2Eq | v3Eq).AsByte(); + + if (cmp != Vector256.Zero) { - uint bitPos = (uint)BitOperations.TrailingZeroCount(mask) / sizeof(char); - sepListBuilder.Append((int)(offset + bitPos)); - mask = BitOperations.ResetLowestSetBit(mask); - } while (mask != 0); - } + // Skip every other bit + uint mask = cmp.ExtractMostSignificantBits() & 0x55555555; + do + { + uint bitPos = (uint)BitOperations.TrailingZeroCount(mask) / sizeof(char); + sepListBuilder.Append((int)(offset + bitPos)); + mask = BitOperations.ResetLowestSetBit(mask); + } while (mask != 0); + } + + offset += (nuint)Vector256.Count; + } while (offset <= lengthToExamine - (nuint)Vector256.Count); + } + else if (Vector128.IsHardwareAccelerated) + { + Vector128 v1 = Vector128.Create((ushort)c); + Vector128 v2 = Vector128.Create((ushort)c2); + Vector128 v3 = Vector128.Create((ushort)c3); + + do + { + Vector128 vector = Vector128.LoadUnsafe(ref source, offset); + Vector128 v1Eq = Vector128.Equals(vector, v1); + Vector128 v2Eq = Vector128.Equals(vector, v2); + Vector128 v3Eq = Vector128.Equals(vector, v3); + Vector128 cmp = (v1Eq | v2Eq | v3Eq).AsByte(); - offset += (nuint)Vector128.Count; - } while (offset <= lengthToExamine - (nuint)Vector128.Count); + if (cmp != Vector128.Zero) + { + // Skip every other bit + uint mask = cmp.ExtractMostSignificantBits() & 0x5555; + do + { + uint bitPos = (uint)BitOperations.TrailingZeroCount(mask) / sizeof(char); + sepListBuilder.Append((int)(offset + bitPos)); + mask = BitOperations.ResetLowestSetBit(mask); + } while (mask != 0); + } + + offset += (nuint)Vector128.Count; + } while (offset <= lengthToExamine - (nuint)Vector128.Count); + } while (offset < lengthToExamine) {