From 456f416cacbea9b452ab6ce0503a33362a096b15 Mon Sep 17 00:00:00 2001
From: Edward Neal <55035479+edwardneal@users.noreply.github.com>
Date: Thu, 22 Jan 2026 00:20:33 +0000
Subject: [PATCH 1/3] Introduce shims for Encoding
Shim GetByteCount and GetBytes methods, and make sure these are used more widely.
Improves performance on netfx and netcore.
---
.../netfx/src/Microsoft.Data.SqlClient.csproj | 3 +
.../src/Microsoft/Data/SqlClient/TdsParser.cs | 35 +---------
.../Data/SqlClient/TdsValueSetter.cs | 5 +-
.../System/Text/EncodingExtensions.netfx.cs | 67 +++++++++++++++++++
4 files changed, 75 insertions(+), 35 deletions(-)
create mode 100644 src/Microsoft.Data.SqlClient/src/System/Text/EncodingExtensions.netfx.cs
diff --git a/src/Microsoft.Data.SqlClient/netfx/src/Microsoft.Data.SqlClient.csproj b/src/Microsoft.Data.SqlClient/netfx/src/Microsoft.Data.SqlClient.csproj
index 05a4284565..92cd11f098 100644
--- a/src/Microsoft.Data.SqlClient/netfx/src/Microsoft.Data.SqlClient.csproj
+++ b/src/Microsoft.Data.SqlClient/netfx/src/Microsoft.Data.SqlClient.csproj
@@ -1003,6 +1003,9 @@
System\IO\StreamExtensions.netfx.cs
+
+ System\Text\EncodingExtensions.netfx.cs
+
System\Runtime\CompilerServices\IsExternalInit.netfx.cs
diff --git a/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/TdsParser.cs b/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/TdsParser.cs
index 94281b4998..c971eccf7d 100644
--- a/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/TdsParser.cs
+++ b/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/TdsParser.cs
@@ -8651,36 +8651,18 @@ private Task WriteEncodingChar(string s, Encoding encoding, TdsParserStateObject
private byte[] SerializeEncodingChar(string s, int numChars, int offset, Encoding encoding)
{
-#if NETFRAMEWORK
- char[] charData;
- byte[] byteData = null;
-
// if hitting 7.0 server, encoding will be null in metadata for columns or return values since
// 7.0 has no support for multiple code pages in data - single code page support only
- if (encoding == null)
- {
- encoding = _defaultEncoding;
- }
-
- charData = s.ToCharArray(offset, numChars);
-
- byteData = new byte[encoding.GetByteCount(charData, 0, charData.Length)];
- encoding.GetBytes(charData, 0, charData.Length, byteData, 0);
+ encoding ??= _defaultEncoding;
- return byteData;
-#else
return encoding.GetBytes(s, offset, numChars);
-#endif
}
private Task WriteEncodingChar(string s, int numChars, int offset, Encoding encoding, TdsParserStateObject stateObj, bool canAccumulate = true)
{
// if hitting 7.0 server, encoding will be null in metadata for columns or return values since
// 7.0 has no support for multiple code pages in data - single code page support only
- if (encoding == null)
- {
- encoding = _defaultEncoding;
- }
+ encoding ??= _defaultEncoding;
// Optimization: if the entire string fits in the current buffer, then copy it directly
int bytesLeft = stateObj._outBuff.Length - stateObj._outBytesUsed;
@@ -8692,23 +8674,14 @@ private Task WriteEncodingChar(string s, int numChars, int offset, Encoding enco
}
else
{
-#if NETFRAMEWORK
- char[] charData = s.ToCharArray(offset, numChars);
- byte[] byteData = encoding.GetBytes(charData, 0, numChars);
- Debug.Assert(byteData != null, "no data from encoding");
- return stateObj.WriteByteArray(byteData, byteData.Length, 0, canAccumulate);
-#else
byte[] byteData = encoding.GetBytes(s, offset, numChars);
Debug.Assert(byteData != null, "no data from encoding");
return stateObj.WriteByteArray(byteData, byteData.Length, 0, canAccumulate);
-#endif
}
}
internal int GetEncodingCharLength(string value, int numChars, int charOffset, Encoding encoding)
{
- // UNDONE: (PERF) this is an expensive way to get the length. Also, aren't we
- // UNDONE: (PERF) going through these steps twice when we write out a value?
if (string.IsNullOrEmpty(value))
{
return 0;
@@ -8726,9 +8699,7 @@ internal int GetEncodingCharLength(string value, int numChars, int charOffset, E
encoding = _defaultEncoding;
}
- char[] charData = value.ToCharArray(charOffset, numChars);
-
- return encoding.GetByteCount(charData, 0, numChars);
+ return encoding.GetByteCount(value, charOffset, numChars);
}
//
diff --git a/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/TdsValueSetter.cs b/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/TdsValueSetter.cs
index 198460791b..fd9d64943e 100644
--- a/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/TdsValueSetter.cs
+++ b/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/TdsValueSetter.cs
@@ -354,8 +354,7 @@ internal void SetString(string value, int offset, int length)
}
else
{
- char[] chars = value.ToCharArray(offset, length);
- bytes = _stateObj.Parser._defaultEncoding.GetBytes(chars);
+ bytes = _stateObj.Parser._defaultEncoding.GetBytes(value, offset, length);
}
SetBytes(0, bytes, 0, bytes.Length);
SetBytesLength(bytes.Length);
@@ -376,7 +375,7 @@ internal void SetString(string value, int offset, int length)
}
else
{
- bytes = _stateObj.Parser._defaultEncoding.GetBytes(value.ToCharArray(offset, length));
+ bytes = _stateObj.Parser._defaultEncoding.GetBytes(value, offset, length);
}
_stateObj.Parser.WriteSqlVariantHeader(9 + bytes.Length, TdsEnums.SQLBIGVARCHAR, 7, _stateObj);
_stateObj.Parser.WriteUnsignedInt(collation._info, _stateObj); // propbytes: collation.Info
diff --git a/src/Microsoft.Data.SqlClient/src/System/Text/EncodingExtensions.netfx.cs b/src/Microsoft.Data.SqlClient/src/System/Text/EncodingExtensions.netfx.cs
new file mode 100644
index 0000000000..4a2cd19e9f
--- /dev/null
+++ b/src/Microsoft.Data.SqlClient/src/System/Text/EncodingExtensions.netfx.cs
@@ -0,0 +1,67 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#if NETFRAMEWORK
+
+using System.Diagnostics;
+
+namespace System.Text;
+
+internal static class EncodingExtensions
+{
+ public static int GetByteCount(this Encoding encoding, string s, int offset, int count)
+ {
+ ReadOnlySpan slicedString = s.AsSpan(offset, count);
+
+ // This also implicitly checks for a null string. If the input string is null, slicedString
+ // will be default(ReadOnlySpan), which also has a length of null.
+ if (slicedString.Length == 0)
+ {
+ return 0;
+ }
+
+ unsafe
+ {
+ fixed (char* str = slicedString)
+ {
+ return encoding.GetByteCount(str, slicedString.Length);
+ }
+ }
+ }
+
+ public static byte[] GetBytes(this Encoding encoding, string s, int index, int count)
+ {
+ ReadOnlySpan slicedString = s.AsSpan(index, count);
+ byte[] bytes;
+ int bytesWritten;
+
+ // This also implicitly checks for a null string. If the input string is null, slicedString
+ // will be default(ReadOnlySpan), which also has a length of null.
+ if (slicedString.Length == 0)
+ {
+ return Array.Empty();
+ }
+
+ unsafe
+ {
+ fixed (char* str = slicedString)
+ {
+ int byteCount = encoding.GetByteCount(str, slicedString.Length);
+
+ bytes = new byte[byteCount];
+
+ fixed (byte* destArray = &bytes[0])
+ {
+ bytesWritten = encoding.GetBytes(str, slicedString.Length, destArray, bytes.Length);
+
+ Debug.Assert(bytesWritten == byteCount);
+ }
+ }
+ }
+
+ return bytes;
+ }
+}
+
+#endif
From b0f9eec81446b433c5c8095d528623f5c553892d Mon Sep 17 00:00:00 2001
From: Edward Neal <55035479+edwardneal@users.noreply.github.com>
Date: Wed, 25 Mar 2026 18:15:00 +0000
Subject: [PATCH 2/3] Address comments in EncodingExtensions
---
.../src/System/Text/EncodingExtensions.netfx.cs | 16 +++++++---------
1 file changed, 7 insertions(+), 9 deletions(-)
diff --git a/src/Microsoft.Data.SqlClient/src/System/Text/EncodingExtensions.netfx.cs b/src/Microsoft.Data.SqlClient/src/System/Text/EncodingExtensions.netfx.cs
index 4a2cd19e9f..136f8bac62 100644
--- a/src/Microsoft.Data.SqlClient/src/System/Text/EncodingExtensions.netfx.cs
+++ b/src/Microsoft.Data.SqlClient/src/System/Text/EncodingExtensions.netfx.cs
@@ -6,6 +6,8 @@
using System.Diagnostics;
+#nullable enable
+
namespace System.Text;
internal static class EncodingExtensions
@@ -15,7 +17,7 @@ public static int GetByteCount(this Encoding encoding, string s, int offset, int
ReadOnlySpan slicedString = s.AsSpan(offset, count);
// This also implicitly checks for a null string. If the input string is null, slicedString
- // will be default(ReadOnlySpan), which also has a length of null.
+ // will be default(ReadOnlySpan), which also has a length of zero.
if (slicedString.Length == 0)
{
return 0;
@@ -33,11 +35,9 @@ public static int GetByteCount(this Encoding encoding, string s, int offset, int
public static byte[] GetBytes(this Encoding encoding, string s, int index, int count)
{
ReadOnlySpan slicedString = s.AsSpan(index, count);
- byte[] bytes;
- int bytesWritten;
// This also implicitly checks for a null string. If the input string is null, slicedString
- // will be default(ReadOnlySpan), which also has a length of null.
+ // will be default(ReadOnlySpan), which also has a length of zero.
if (slicedString.Length == 0)
{
return Array.Empty();
@@ -48,19 +48,17 @@ public static byte[] GetBytes(this Encoding encoding, string s, int index, int c
fixed (char* str = slicedString)
{
int byteCount = encoding.GetByteCount(str, slicedString.Length);
-
- bytes = new byte[byteCount];
+ byte[] bytes = new byte[byteCount];
fixed (byte* destArray = &bytes[0])
{
- bytesWritten = encoding.GetBytes(str, slicedString.Length, destArray, bytes.Length);
+ int bytesWritten = encoding.GetBytes(str, slicedString.Length, destArray, bytes.Length);
Debug.Assert(bytesWritten == byteCount);
+ return bytes;
}
}
}
-
- return bytes;
}
}
From c66397e69d083ca71ccb63a3a6de54dcb091a278 Mon Sep 17 00:00:00 2001
From: Edward Neal <55035479+edwardneal@users.noreply.github.com>
Date: Wed, 25 Mar 2026 19:56:56 +0000
Subject: [PATCH 3/3] Add unit tests for EncodingExtensions
This also highlighted that passing a null string should throw an exception rather than return 0/[].
---
.../System/Text/EncodingExtensions.netfx.cs | 18 +-
.../UnitTests/System/Text/EncodingTest.cs | 161 ++++++++++++++++++
2 files changed, 173 insertions(+), 6 deletions(-)
create mode 100644 src/Microsoft.Data.SqlClient/tests/UnitTests/System/Text/EncodingTest.cs
diff --git a/src/Microsoft.Data.SqlClient/src/System/Text/EncodingExtensions.netfx.cs b/src/Microsoft.Data.SqlClient/src/System/Text/EncodingExtensions.netfx.cs
index 136f8bac62..baf2a275e5 100644
--- a/src/Microsoft.Data.SqlClient/src/System/Text/EncodingExtensions.netfx.cs
+++ b/src/Microsoft.Data.SqlClient/src/System/Text/EncodingExtensions.netfx.cs
@@ -12,12 +12,15 @@ namespace System.Text;
internal static class EncodingExtensions
{
- public static int GetByteCount(this Encoding encoding, string s, int offset, int count)
+ public static int GetByteCount(this Encoding encoding, string? s, int offset, int count)
{
+ if (s is null)
+ {
+ throw new ArgumentNullException(nameof(s));
+ }
+
ReadOnlySpan slicedString = s.AsSpan(offset, count);
- // This also implicitly checks for a null string. If the input string is null, slicedString
- // will be default(ReadOnlySpan), which also has a length of zero.
if (slicedString.Length == 0)
{
return 0;
@@ -32,12 +35,15 @@ public static int GetByteCount(this Encoding encoding, string s, int offset, int
}
}
- public static byte[] GetBytes(this Encoding encoding, string s, int index, int count)
+ public static byte[] GetBytes(this Encoding encoding, string? s, int index, int count)
{
+ if (s is null)
+ {
+ throw new ArgumentNullException(nameof(s));
+ }
+
ReadOnlySpan slicedString = s.AsSpan(index, count);
- // This also implicitly checks for a null string. If the input string is null, slicedString
- // will be default(ReadOnlySpan), which also has a length of zero.
if (slicedString.Length == 0)
{
return Array.Empty();
diff --git a/src/Microsoft.Data.SqlClient/tests/UnitTests/System/Text/EncodingTest.cs b/src/Microsoft.Data.SqlClient/tests/UnitTests/System/Text/EncodingTest.cs
new file mode 100644
index 0000000000..9b5b7ec785
--- /dev/null
+++ b/src/Microsoft.Data.SqlClient/tests/UnitTests/System/Text/EncodingTest.cs
@@ -0,0 +1,161 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Xunit;
+
+namespace System.Text.UnitTests;
+
+///
+/// Tests that the Encoding polyfills in netfx operate correctly and handle
+/// invalid parameter values.
+///
+///
+/// In the netcore cases, we're testing the built-in GetBytes and GetByteCount
+/// methods. The contract for our extension polyfills must match these implementations.
+///
+public class EncodingTest
+{
+ private const string ExampleStringValue = "ABCDéFG1234567abcdefg";
+
+ ///
+ /// Represents a series of invalid [offset, count] pairs into the
+ /// constant.
+ ///
+ public static TheoryData InvalidOffsetsAndCounts =>
+ new()
+ {
+ // Group 1: offset starts before the string.
+ // * Count extends beyond it.
+ { -1, 999 },
+ // * Count is valid.
+ { -1, 5 },
+ // Group 2: offset is valid.
+ // * Count extends beyond the end of it.
+ { 0, 999 },
+ // * Count extends backwards to the start it.
+ { 5, -5 },
+ // Group 3: offset starts beyond the end of the string.
+ // * Count extends beyond the end of it.
+ { 999, 999 },
+ // * Count extends backwards into the string.
+ { 999, -1005 }
+ };
+
+ #if NET
+ static EncodingTest()
+ {
+ Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
+ }
+#endif
+
+ ///
+ /// Verifies that GetByteCount throws an ArgumentNullException when passed a null string.
+ ///
+ [Fact]
+ public void GetByteCount_ThrowsOnNullString()
+ {
+ string nullString = null!;
+ Action act = () => Encoding.Unicode.GetByteCount(nullString, 0, 0);
+
+ Assert.Throws(act);
+ }
+
+ ///
+ /// Verifies that GetBytes throws an ArgumentNullException when passed a null string.
+ ///
+ [Fact]
+ public void GetBytes_ThrowsOnNullString()
+ {
+ string nullString = null!;
+ Action act = () => Encoding.Unicode.GetBytes(nullString, 0, 0);
+
+ Assert.Throws(act);
+ }
+
+ ///
+ /// Verifies that GetByteCount throws an ArgumentOutOfRangeException when passes an offset
+ /// or count which is outside of the string.
+ ///
+ /// offset parameter of GetByteCount.
+ /// count parameter of GetByteCount.
+ ///
+ [Theory]
+ [MemberData(nameof(InvalidOffsetsAndCounts))]
+ public void GetByteCount_ThrowsOnOutOfRangeOffsetOrCount(int offset, int count)
+ {
+ Action act = () => Encoding.Unicode.GetByteCount(ExampleStringValue, offset, count);
+
+ Assert.Throws(act);
+ }
+
+ ///
+ /// Verifies that GetBytes throws an ArgumentOutOfRangeException when passes an offset
+ /// or count which is outside of the string.
+ ///
+ /// offset parameter of GetBytes.
+ /// count parameter of GetBytes.
+ [Theory]
+ [MemberData(nameof(InvalidOffsetsAndCounts))]
+ public void GetBytes_ThrowsOnOutOfRangeOffsetOrCount(int offset, int count)
+ {
+ Action act = () => Encoding.Unicode.GetBytes(ExampleStringValue, offset, count);
+
+ Assert.Throws(act);
+ }
+
+ ///
+ /// Verifies that when using the new GetByteCount and GetBytes polyfills to encode the entire string, the return
+ /// value is equal to passing the string as-is to GetByteCount(string) and GetBytes(string).
+ ///
+ [Fact]
+ public void GetBytesOfFullStringByLength_MatchesGetBytesOfFullString()
+ {
+ byte[] fullStringBytes = Encoding.Unicode.GetBytes(ExampleStringValue);
+ int fullStringByteCount = Encoding.Unicode.GetByteCount(ExampleStringValue);
+
+ byte[] partialStringBytes = Encoding.Unicode.GetBytes(ExampleStringValue, 0, ExampleStringValue.Length);
+ int partialStringByteCount = Encoding.Unicode.GetByteCount(ExampleStringValue, 0, ExampleStringValue.Length);
+
+ Assert.Equal(fullStringByteCount, partialStringByteCount);
+ Assert.Equal(fullStringByteCount, partialStringBytes.Length);
+ Assert.Equal(fullStringBytes, partialStringBytes);
+ }
+
+ ///
+ /// Verifies that encoding a specific substring returns a byte array which can be decoded into the same string, in
+ /// various code pages.
+ ///
+ /// The code page identifier to use for transcoding.
+ [Theory]
+ // Unicode
+ [InlineData(1200)]
+ // UTF8
+ [InlineData(65001)]
+ public void GetBytes_Roundtrips(int codePage)
+ {
+ Encoding encoding = Encoding.GetEncoding(codePage);
+ byte[] partialStringBytes = encoding.GetBytes(ExampleStringValue, 4, 5);
+ string expectedRoundtrippedValue = ExampleStringValue.Substring(4, 5);
+ string roundtrip = encoding.GetString(partialStringBytes);
+
+ Assert.Equal(expectedRoundtrippedValue, roundtrip);
+ }
+
+ ///
+ /// Verifies that when a string contains a multibyte character, the byte array returns the correct number of
+ /// elements for the encoding.
+ ///
+ [Fact]
+ public void GetByteCount_ReturnsCorrectValueOnMultiCharacterRune()
+ {
+ // The character é is two bytes in UTF8.
+ Assert.Equal(6, Encoding.UTF8.GetByteCount(ExampleStringValue, 4, 5));
+
+ // All Unicode characters in our sample string are two bytes long.
+ Assert.Equal(10, Encoding.Unicode.GetByteCount(ExampleStringValue, 4, 5));
+
+ // Code page 1251 does not have the é character, so treats it as the single-byte character "e".
+ Assert.Equal(5, Encoding.GetEncoding(1251).GetByteCount(ExampleStringValue, 4, 5));
+ }
+}