From 456f416cacbea9b452ab6ce0503a33362a096b15 Mon Sep 17 00:00:00 2001 From: Edward Neal <55035479+edwardneal@users.noreply.github.com> Date: Thu, 22 Jan 2026 00:20:33 +0000 Subject: [PATCH 1/3] Introduce shims for Encoding Shim GetByteCount and GetBytes methods, and make sure these are used more widely. Improves performance on netfx and netcore. --- .../netfx/src/Microsoft.Data.SqlClient.csproj | 3 + .../src/Microsoft/Data/SqlClient/TdsParser.cs | 35 +--------- .../Data/SqlClient/TdsValueSetter.cs | 5 +- .../System/Text/EncodingExtensions.netfx.cs | 67 +++++++++++++++++++ 4 files changed, 75 insertions(+), 35 deletions(-) create mode 100644 src/Microsoft.Data.SqlClient/src/System/Text/EncodingExtensions.netfx.cs diff --git a/src/Microsoft.Data.SqlClient/netfx/src/Microsoft.Data.SqlClient.csproj b/src/Microsoft.Data.SqlClient/netfx/src/Microsoft.Data.SqlClient.csproj index 05a4284565..92cd11f098 100644 --- a/src/Microsoft.Data.SqlClient/netfx/src/Microsoft.Data.SqlClient.csproj +++ b/src/Microsoft.Data.SqlClient/netfx/src/Microsoft.Data.SqlClient.csproj @@ -1003,6 +1003,9 @@ System\IO\StreamExtensions.netfx.cs + + System\Text\EncodingExtensions.netfx.cs + System\Runtime\CompilerServices\IsExternalInit.netfx.cs diff --git a/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/TdsParser.cs b/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/TdsParser.cs index 94281b4998..c971eccf7d 100644 --- a/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/TdsParser.cs +++ b/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/TdsParser.cs @@ -8651,36 +8651,18 @@ private Task WriteEncodingChar(string s, Encoding encoding, TdsParserStateObject private byte[] SerializeEncodingChar(string s, int numChars, int offset, Encoding encoding) { -#if NETFRAMEWORK - char[] charData; - byte[] byteData = null; - // if hitting 7.0 server, encoding will be null in metadata for columns or return values since // 7.0 has no support for multiple code pages in data - single code page support only - if (encoding == null) - { - encoding = _defaultEncoding; - } - - charData = s.ToCharArray(offset, numChars); - - byteData = new byte[encoding.GetByteCount(charData, 0, charData.Length)]; - encoding.GetBytes(charData, 0, charData.Length, byteData, 0); + encoding ??= _defaultEncoding; - return byteData; -#else return encoding.GetBytes(s, offset, numChars); -#endif } private Task WriteEncodingChar(string s, int numChars, int offset, Encoding encoding, TdsParserStateObject stateObj, bool canAccumulate = true) { // if hitting 7.0 server, encoding will be null in metadata for columns or return values since // 7.0 has no support for multiple code pages in data - single code page support only - if (encoding == null) - { - encoding = _defaultEncoding; - } + encoding ??= _defaultEncoding; // Optimization: if the entire string fits in the current buffer, then copy it directly int bytesLeft = stateObj._outBuff.Length - stateObj._outBytesUsed; @@ -8692,23 +8674,14 @@ private Task WriteEncodingChar(string s, int numChars, int offset, Encoding enco } else { -#if NETFRAMEWORK - char[] charData = s.ToCharArray(offset, numChars); - byte[] byteData = encoding.GetBytes(charData, 0, numChars); - Debug.Assert(byteData != null, "no data from encoding"); - return stateObj.WriteByteArray(byteData, byteData.Length, 0, canAccumulate); -#else byte[] byteData = encoding.GetBytes(s, offset, numChars); Debug.Assert(byteData != null, "no data from encoding"); return stateObj.WriteByteArray(byteData, byteData.Length, 0, canAccumulate); -#endif } } internal int GetEncodingCharLength(string value, int numChars, int charOffset, Encoding encoding) { - // UNDONE: (PERF) this is an expensive way to get the length. Also, aren't we - // UNDONE: (PERF) going through these steps twice when we write out a value? if (string.IsNullOrEmpty(value)) { return 0; @@ -8726,9 +8699,7 @@ internal int GetEncodingCharLength(string value, int numChars, int charOffset, E encoding = _defaultEncoding; } - char[] charData = value.ToCharArray(charOffset, numChars); - - return encoding.GetByteCount(charData, 0, numChars); + return encoding.GetByteCount(value, charOffset, numChars); } // diff --git a/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/TdsValueSetter.cs b/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/TdsValueSetter.cs index 198460791b..fd9d64943e 100644 --- a/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/TdsValueSetter.cs +++ b/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/TdsValueSetter.cs @@ -354,8 +354,7 @@ internal void SetString(string value, int offset, int length) } else { - char[] chars = value.ToCharArray(offset, length); - bytes = _stateObj.Parser._defaultEncoding.GetBytes(chars); + bytes = _stateObj.Parser._defaultEncoding.GetBytes(value, offset, length); } SetBytes(0, bytes, 0, bytes.Length); SetBytesLength(bytes.Length); @@ -376,7 +375,7 @@ internal void SetString(string value, int offset, int length) } else { - bytes = _stateObj.Parser._defaultEncoding.GetBytes(value.ToCharArray(offset, length)); + bytes = _stateObj.Parser._defaultEncoding.GetBytes(value, offset, length); } _stateObj.Parser.WriteSqlVariantHeader(9 + bytes.Length, TdsEnums.SQLBIGVARCHAR, 7, _stateObj); _stateObj.Parser.WriteUnsignedInt(collation._info, _stateObj); // propbytes: collation.Info diff --git a/src/Microsoft.Data.SqlClient/src/System/Text/EncodingExtensions.netfx.cs b/src/Microsoft.Data.SqlClient/src/System/Text/EncodingExtensions.netfx.cs new file mode 100644 index 0000000000..4a2cd19e9f --- /dev/null +++ b/src/Microsoft.Data.SqlClient/src/System/Text/EncodingExtensions.netfx.cs @@ -0,0 +1,67 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#if NETFRAMEWORK + +using System.Diagnostics; + +namespace System.Text; + +internal static class EncodingExtensions +{ + public static int GetByteCount(this Encoding encoding, string s, int offset, int count) + { + ReadOnlySpan slicedString = s.AsSpan(offset, count); + + // This also implicitly checks for a null string. If the input string is null, slicedString + // will be default(ReadOnlySpan), which also has a length of null. + if (slicedString.Length == 0) + { + return 0; + } + + unsafe + { + fixed (char* str = slicedString) + { + return encoding.GetByteCount(str, slicedString.Length); + } + } + } + + public static byte[] GetBytes(this Encoding encoding, string s, int index, int count) + { + ReadOnlySpan slicedString = s.AsSpan(index, count); + byte[] bytes; + int bytesWritten; + + // This also implicitly checks for a null string. If the input string is null, slicedString + // will be default(ReadOnlySpan), which also has a length of null. + if (slicedString.Length == 0) + { + return Array.Empty(); + } + + unsafe + { + fixed (char* str = slicedString) + { + int byteCount = encoding.GetByteCount(str, slicedString.Length); + + bytes = new byte[byteCount]; + + fixed (byte* destArray = &bytes[0]) + { + bytesWritten = encoding.GetBytes(str, slicedString.Length, destArray, bytes.Length); + + Debug.Assert(bytesWritten == byteCount); + } + } + } + + return bytes; + } +} + +#endif From b0f9eec81446b433c5c8095d528623f5c553892d Mon Sep 17 00:00:00 2001 From: Edward Neal <55035479+edwardneal@users.noreply.github.com> Date: Wed, 25 Mar 2026 18:15:00 +0000 Subject: [PATCH 2/3] Address comments in EncodingExtensions --- .../src/System/Text/EncodingExtensions.netfx.cs | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/Microsoft.Data.SqlClient/src/System/Text/EncodingExtensions.netfx.cs b/src/Microsoft.Data.SqlClient/src/System/Text/EncodingExtensions.netfx.cs index 4a2cd19e9f..136f8bac62 100644 --- a/src/Microsoft.Data.SqlClient/src/System/Text/EncodingExtensions.netfx.cs +++ b/src/Microsoft.Data.SqlClient/src/System/Text/EncodingExtensions.netfx.cs @@ -6,6 +6,8 @@ using System.Diagnostics; +#nullable enable + namespace System.Text; internal static class EncodingExtensions @@ -15,7 +17,7 @@ public static int GetByteCount(this Encoding encoding, string s, int offset, int ReadOnlySpan slicedString = s.AsSpan(offset, count); // This also implicitly checks for a null string. If the input string is null, slicedString - // will be default(ReadOnlySpan), which also has a length of null. + // will be default(ReadOnlySpan), which also has a length of zero. if (slicedString.Length == 0) { return 0; @@ -33,11 +35,9 @@ public static int GetByteCount(this Encoding encoding, string s, int offset, int public static byte[] GetBytes(this Encoding encoding, string s, int index, int count) { ReadOnlySpan slicedString = s.AsSpan(index, count); - byte[] bytes; - int bytesWritten; // This also implicitly checks for a null string. If the input string is null, slicedString - // will be default(ReadOnlySpan), which also has a length of null. + // will be default(ReadOnlySpan), which also has a length of zero. if (slicedString.Length == 0) { return Array.Empty(); @@ -48,19 +48,17 @@ public static byte[] GetBytes(this Encoding encoding, string s, int index, int c fixed (char* str = slicedString) { int byteCount = encoding.GetByteCount(str, slicedString.Length); - - bytes = new byte[byteCount]; + byte[] bytes = new byte[byteCount]; fixed (byte* destArray = &bytes[0]) { - bytesWritten = encoding.GetBytes(str, slicedString.Length, destArray, bytes.Length); + int bytesWritten = encoding.GetBytes(str, slicedString.Length, destArray, bytes.Length); Debug.Assert(bytesWritten == byteCount); + return bytes; } } } - - return bytes; } } From c66397e69d083ca71ccb63a3a6de54dcb091a278 Mon Sep 17 00:00:00 2001 From: Edward Neal <55035479+edwardneal@users.noreply.github.com> Date: Wed, 25 Mar 2026 19:56:56 +0000 Subject: [PATCH 3/3] Add unit tests for EncodingExtensions This also highlighted that passing a null string should throw an exception rather than return 0/[]. --- .../System/Text/EncodingExtensions.netfx.cs | 18 +- .../UnitTests/System/Text/EncodingTest.cs | 161 ++++++++++++++++++ 2 files changed, 173 insertions(+), 6 deletions(-) create mode 100644 src/Microsoft.Data.SqlClient/tests/UnitTests/System/Text/EncodingTest.cs diff --git a/src/Microsoft.Data.SqlClient/src/System/Text/EncodingExtensions.netfx.cs b/src/Microsoft.Data.SqlClient/src/System/Text/EncodingExtensions.netfx.cs index 136f8bac62..baf2a275e5 100644 --- a/src/Microsoft.Data.SqlClient/src/System/Text/EncodingExtensions.netfx.cs +++ b/src/Microsoft.Data.SqlClient/src/System/Text/EncodingExtensions.netfx.cs @@ -12,12 +12,15 @@ namespace System.Text; internal static class EncodingExtensions { - public static int GetByteCount(this Encoding encoding, string s, int offset, int count) + public static int GetByteCount(this Encoding encoding, string? s, int offset, int count) { + if (s is null) + { + throw new ArgumentNullException(nameof(s)); + } + ReadOnlySpan slicedString = s.AsSpan(offset, count); - // This also implicitly checks for a null string. If the input string is null, slicedString - // will be default(ReadOnlySpan), which also has a length of zero. if (slicedString.Length == 0) { return 0; @@ -32,12 +35,15 @@ public static int GetByteCount(this Encoding encoding, string s, int offset, int } } - public static byte[] GetBytes(this Encoding encoding, string s, int index, int count) + public static byte[] GetBytes(this Encoding encoding, string? s, int index, int count) { + if (s is null) + { + throw new ArgumentNullException(nameof(s)); + } + ReadOnlySpan slicedString = s.AsSpan(index, count); - // This also implicitly checks for a null string. If the input string is null, slicedString - // will be default(ReadOnlySpan), which also has a length of zero. if (slicedString.Length == 0) { return Array.Empty(); diff --git a/src/Microsoft.Data.SqlClient/tests/UnitTests/System/Text/EncodingTest.cs b/src/Microsoft.Data.SqlClient/tests/UnitTests/System/Text/EncodingTest.cs new file mode 100644 index 0000000000..9b5b7ec785 --- /dev/null +++ b/src/Microsoft.Data.SqlClient/tests/UnitTests/System/Text/EncodingTest.cs @@ -0,0 +1,161 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Xunit; + +namespace System.Text.UnitTests; + +/// +/// Tests that the Encoding polyfills in netfx operate correctly and handle +/// invalid parameter values. +/// +/// +/// In the netcore cases, we're testing the built-in GetBytes and GetByteCount +/// methods. The contract for our extension polyfills must match these implementations. +/// +public class EncodingTest +{ + private const string ExampleStringValue = "ABCDéFG1234567abcdefg"; + + /// + /// Represents a series of invalid [offset, count] pairs into the + /// constant. + /// + public static TheoryData InvalidOffsetsAndCounts => + new() + { + // Group 1: offset starts before the string. + // * Count extends beyond it. + { -1, 999 }, + // * Count is valid. + { -1, 5 }, + // Group 2: offset is valid. + // * Count extends beyond the end of it. + { 0, 999 }, + // * Count extends backwards to the start it. + { 5, -5 }, + // Group 3: offset starts beyond the end of the string. + // * Count extends beyond the end of it. + { 999, 999 }, + // * Count extends backwards into the string. + { 999, -1005 } + }; + + #if NET + static EncodingTest() + { + Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); + } +#endif + + /// + /// Verifies that GetByteCount throws an ArgumentNullException when passed a null string. + /// + [Fact] + public void GetByteCount_ThrowsOnNullString() + { + string nullString = null!; + Action act = () => Encoding.Unicode.GetByteCount(nullString, 0, 0); + + Assert.Throws(act); + } + + /// + /// Verifies that GetBytes throws an ArgumentNullException when passed a null string. + /// + [Fact] + public void GetBytes_ThrowsOnNullString() + { + string nullString = null!; + Action act = () => Encoding.Unicode.GetBytes(nullString, 0, 0); + + Assert.Throws(act); + } + + /// + /// Verifies that GetByteCount throws an ArgumentOutOfRangeException when passes an offset + /// or count which is outside of the string. + /// + /// offset parameter of GetByteCount. + /// count parameter of GetByteCount. + /// + [Theory] + [MemberData(nameof(InvalidOffsetsAndCounts))] + public void GetByteCount_ThrowsOnOutOfRangeOffsetOrCount(int offset, int count) + { + Action act = () => Encoding.Unicode.GetByteCount(ExampleStringValue, offset, count); + + Assert.Throws(act); + } + + /// + /// Verifies that GetBytes throws an ArgumentOutOfRangeException when passes an offset + /// or count which is outside of the string. + /// + /// offset parameter of GetBytes. + /// count parameter of GetBytes. + [Theory] + [MemberData(nameof(InvalidOffsetsAndCounts))] + public void GetBytes_ThrowsOnOutOfRangeOffsetOrCount(int offset, int count) + { + Action act = () => Encoding.Unicode.GetBytes(ExampleStringValue, offset, count); + + Assert.Throws(act); + } + + /// + /// Verifies that when using the new GetByteCount and GetBytes polyfills to encode the entire string, the return + /// value is equal to passing the string as-is to GetByteCount(string) and GetBytes(string). + /// + [Fact] + public void GetBytesOfFullStringByLength_MatchesGetBytesOfFullString() + { + byte[] fullStringBytes = Encoding.Unicode.GetBytes(ExampleStringValue); + int fullStringByteCount = Encoding.Unicode.GetByteCount(ExampleStringValue); + + byte[] partialStringBytes = Encoding.Unicode.GetBytes(ExampleStringValue, 0, ExampleStringValue.Length); + int partialStringByteCount = Encoding.Unicode.GetByteCount(ExampleStringValue, 0, ExampleStringValue.Length); + + Assert.Equal(fullStringByteCount, partialStringByteCount); + Assert.Equal(fullStringByteCount, partialStringBytes.Length); + Assert.Equal(fullStringBytes, partialStringBytes); + } + + /// + /// Verifies that encoding a specific substring returns a byte array which can be decoded into the same string, in + /// various code pages. + /// + /// The code page identifier to use for transcoding. + [Theory] + // Unicode + [InlineData(1200)] + // UTF8 + [InlineData(65001)] + public void GetBytes_Roundtrips(int codePage) + { + Encoding encoding = Encoding.GetEncoding(codePage); + byte[] partialStringBytes = encoding.GetBytes(ExampleStringValue, 4, 5); + string expectedRoundtrippedValue = ExampleStringValue.Substring(4, 5); + string roundtrip = encoding.GetString(partialStringBytes); + + Assert.Equal(expectedRoundtrippedValue, roundtrip); + } + + /// + /// Verifies that when a string contains a multibyte character, the byte array returns the correct number of + /// elements for the encoding. + /// + [Fact] + public void GetByteCount_ReturnsCorrectValueOnMultiCharacterRune() + { + // The character é is two bytes in UTF8. + Assert.Equal(6, Encoding.UTF8.GetByteCount(ExampleStringValue, 4, 5)); + + // All Unicode characters in our sample string are two bytes long. + Assert.Equal(10, Encoding.Unicode.GetByteCount(ExampleStringValue, 4, 5)); + + // Code page 1251 does not have the é character, so treats it as the single-byte character "e". + Assert.Equal(5, Encoding.GetEncoding(1251).GetByteCount(ExampleStringValue, 4, 5)); + } +}