From 0bbc9d14b5740dea8073278edfd4ffbcc2e6ad38 Mon Sep 17 00:00:00 2001
From: jswhit2 <jwhitaker@Jeffreys-MacBook-Air.local>
Date: Mon, 27 Oct 2025 15:04:51 -0600
Subject: [PATCH 01/10] tentative fixes for issue #1440

---
 Changelog                |  2 ++
 include/netcdf-compat.h  |  2 +-
 src/netCDF4/_netCDF4.pyx | 13 +++++++------
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/Changelog b/Changelog
index f9d0de0f5..455bd1e06 100644
--- a/Changelog
+++ b/Changelog
@@ -1,3 +1,5 @@
+ version 1.7.4 (not yet released)
+ ================================
  version 1.7.3 (tag v1.7.3rel)
  =============================
  * Python 3.14 wheels (issue #1432)
diff --git a/include/netcdf-compat.h b/include/netcdf-compat.h
index d1144d979..ccfb8322e 100644
--- a/include/netcdf-compat.h
+++ b/include/netcdf-compat.h
@@ -60,7 +60,7 @@ static inline int nc_get_alignment(int* thresholdp, int* alignmentp) {
 #else
 #define HAS_NCRCSET 0
 static inline int nc_rc_set(const char* key, const char* value) { return NC_EINVAL; }
-static inline const char *nc_rc_get(const char* key) { return NC_EINVAL; }
+static inline const char *nc_rc_get(const char* key) { return NULL; }
 #endif
 
 #if NC_VERSION_GE(4, 4, 0)
diff --git a/src/netCDF4/_netCDF4.pyx b/src/netCDF4/_netCDF4.pyx
index acdfdf5a5..487dbcb02 100644
--- a/src/netCDF4/_netCDF4.pyx
+++ b/src/netCDF4/_netCDF4.pyx
@@ -5525,7 +5525,11 @@ cannot be safely cast to variable data type""" % attname
                 # if data is a string or a bytes object, convert to a numpy string array
                 # whose length is equal to the rightmost dimension of the
                 # variable.
-                if type(data) in [str,bytes]: data = numpy.asarray(data,dtype='S'+repr(self.shape[-1]))
+                if type(data) in [str,bytes]:
+                    if encoding == 'ascii':
+                        data = numpy.asarray(data,dtype='S'+repr(self.shape[-1]))
+                    else:
+                        data = numpy.asarray(data,dtype='U'+repr(self.shape[-1]))
                 if data.dtype.kind in ['S','U'] and data.dtype.itemsize > 1:
                     # if data is a numpy string array, convert it to an array
                     # of characters with one more dimension.
@@ -6816,15 +6820,12 @@ returns a numpy string array with datatype `'UN'` (or `'SN'`) and shape
     dtype = b.dtype.kind
     if dtype not in ["S","U"]:
         raise ValueError("type must be string or unicode ('S' or 'U')")
-    if encoding in ['none','None','bytes']:
-        bs = b.tobytes()
-    else:
-        bs = b.tobytes().decode(encoding)
+    bs = b.tobytes()
     slen = int(b.shape[-1])
     if encoding in ['none','None','bytes']:
         a = numpy.array([bs[n1:n1+slen] for n1 in range(0,len(bs),slen)],'S'+repr(slen))
     else:
-        a = numpy.array([bs[n1:n1+slen] for n1 in range(0,len(bs),slen)],'U'+repr(slen))
+        a = numpy.array([bs[n1:n1+slen].decode(encoding) for n1 in range(0,len(bs),slen)],'U'+repr(slen))
     a.shape = b.shape[:-1]
     return a
 

From 826b634a7896db9e4e1e9aaa41b267010fcbba12 Mon Sep 17 00:00:00 2001
From: jswhit2 <jwhitaker@Jeffreys-MacBook-Air.local>
Date: Mon, 27 Oct 2025 19:19:24 -0600
Subject: [PATCH 02/10] update

---
 src/netCDF4/_netCDF4.pyx | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/src/netCDF4/_netCDF4.pyx b/src/netCDF4/_netCDF4.pyx
index 487dbcb02..5f9108b1e 100644
--- a/src/netCDF4/_netCDF4.pyx
+++ b/src/netCDF4/_netCDF4.pyx
@@ -5533,7 +5533,7 @@ cannot be safely cast to variable data type""" % attname
                 if data.dtype.kind in ['S','U'] and data.dtype.itemsize > 1:
                     # if data is a numpy string array, convert it to an array
                     # of characters with one more dimension.
-                    data = stringtochar(data, encoding=encoding)
+                    data = stringtochar(data, encoding=encoding,n_strlen=self.shape[-1])
 
         # if structured data has strings (and _Encoding att set), create view as char arrays
         # (issue #773)
@@ -6775,9 +6775,9 @@ returns a rank 1 numpy character array of length NUMCHARS with datatype `'S1'`
     arr[0:len(string)] = tuple(string)
     return arr
 
-def stringtochar(a,encoding='utf-8'):
+def stringtochar(a,encoding='utf-8',n_strlen=None):
     """
-**`stringtochar(a,encoding='utf-8')`**
+**`stringtochar(a,encoding='utf-8',n_strlen=None)`**
 
 convert a string array to a character array with one extra dimension
 
@@ -6789,16 +6789,29 @@ optional kwarg `encoding` can be used to specify character encoding (default
 `utf-8`). If `encoding` is 'none' or 'bytes', a `numpy.string_` the input array
 is treated a raw byte strings (`numpy.string_`).
 
+optional kwarg `n_strlen` is the number of characters in each string.  Default
+is None, which means `n_strlen` will be set to a.itemsize (the number of bytes
+used to represent each string in the input array).
+
 returns a numpy character array with datatype `'S1'` or `'U1'`
 and shape `a.shape + (N,)`, where N is the length of each string in a."""
     dtype = a.dtype.kind
+    if n_strlen is None:
+        n_strlen = a.dtype.itemsize
     if dtype not in ["S","U"]:
         raise ValueError("type must string or unicode ('S' or 'U')")
     if encoding in ['none','None','bytes']:
         b = numpy.array(tuple(a.tobytes()),'S1')
+    elif encoding == 'ascii':
+        b = numpy.array(tuple(a.tobytes().decode('ascii')))
+        b.shape = a.shape + (n_strlen,)
     else:
-        b = numpy.array(tuple(a.tobytes().decode(encoding)),dtype+'1')
-    b.shape = a.shape + (a.itemsize,)
+        if not a.ndim:
+            a = numpy.array([a])
+        bbytes = [text.encode(encoding) for text in a]
+        pad = b'\0' * n_strlen
+        bbytes = [(x + pad)[:n_strlen] for x in bbytes]
+        b = numpy.array([[bb[i:i+1] for i in range(n_strlen)] for bb in bbytes])
     return b
 
 def chartostring(b,encoding='utf-8'):

From 915c133b7680df723339942d4ce870579f441b70 Mon Sep 17 00:00:00 2001
From: jswhit2 <jwhitaker@Jeffreys-MacBook-Air.local>
Date: Mon, 27 Oct 2025 20:22:41 -0600
Subject: [PATCH 03/10] update

---
 src/netCDF4/_netCDF4.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/netCDF4/_netCDF4.pyx b/src/netCDF4/_netCDF4.pyx
index 5f9108b1e..87880cc45 100644
--- a/src/netCDF4/_netCDF4.pyx
+++ b/src/netCDF4/_netCDF4.pyx
@@ -6803,7 +6803,7 @@ and shape `a.shape + (N,)`, where N is the length of each string in a."""
     if encoding in ['none','None','bytes']:
         b = numpy.array(tuple(a.tobytes()),'S1')
     elif encoding == 'ascii':
-        b = numpy.array(tuple(a.tobytes().decode('ascii')))
+        b = numpy.array(tuple(a.tobytes().decode(encoding)),dtype+'1')
         b.shape = a.shape + (n_strlen,)
     else:
         if not a.ndim:

From c5c25875b647fa0d76fb90a53bb8f678b4f73e67 Mon Sep 17 00:00:00 2001
From: jswhit2 <jwhitaker@Jeffreys-MacBook-Air.local>
Date: Mon, 27 Oct 2025 20:48:23 -0600
Subject: [PATCH 04/10] update stringtochar stub

---
 src/netCDF4/__init__.pyi | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/netCDF4/__init__.pyi b/src/netCDF4/__init__.pyi
index 97062cf51..f07aa0b8f 100644
--- a/src/netCDF4/__init__.pyi
+++ b/src/netCDF4/__init__.pyi
@@ -699,6 +699,7 @@ def stringtoarr(
 def stringtochar(
     a: npt.NDArray[np.character],
     encoding: Literal["none", "None", "bytes"],
+    n_strlen: int
 ) -> npt.NDArray[np.bytes_]: ...
 @overload
 def stringtochar(

From 4b7201e50e21e41e7f19e4e8f8559e98b8e4284e Mon Sep 17 00:00:00 2001
From: jswhit2 <jwhitaker@Jeffreys-MacBook-Air.local>
Date: Mon, 27 Oct 2025 20:57:24 -0600
Subject: [PATCH 05/10] update

---
 src/netCDF4/__init__.pyi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/netCDF4/__init__.pyi b/src/netCDF4/__init__.pyi
index f07aa0b8f..e27fb6370 100644
--- a/src/netCDF4/__init__.pyi
+++ b/src/netCDF4/__init__.pyi
@@ -699,7 +699,7 @@ def stringtoarr(
 def stringtochar(
     a: npt.NDArray[np.character],
     encoding: Literal["none", "None", "bytes"],
-    n_strlen: int
+    n_strlen: int | None = None,
 ) -> npt.NDArray[np.bytes_]: ...
 @overload
 def stringtochar(

From 4e0abacdc5821bdbc1ce16b3e1eda2d081b84ac0 Mon Sep 17 00:00:00 2001
From: jswhit2 <jwhitaker@Jeffreys-MacBook-Air.local>
Date: Tue, 28 Oct 2025 09:03:35 -0600
Subject: [PATCH 06/10] add test for issue1440

---
 test/test_stringarr.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/test/test_stringarr.py b/test/test_stringarr.py
index 9d4fcd909..aed2203df 100644
--- a/test/test_stringarr.py
+++ b/test/test_stringarr.py
@@ -3,6 +3,7 @@
 import unittest
 import os
 from numpy.testing import assert_array_equal, assert_array_almost_equal
+import numpy as np
 
 def generateString(length, alphabet=string.ascii_letters + string.digits + string.punctuation):
     return(''.join([random.choice(alphabet) for i in range(length)]))
@@ -20,6 +21,10 @@ def generateString(length, alphabet=string.ascii_letters + string.digits + strin
 datau = data.astype('U')
 datac = stringtochar(data, encoding='ascii')
 
+nx, n_strlen = 3, 10
+unicode_strings = np.array(['Münster', 'London', 'Amsterdam'],dtype='U'+str(n_strlen))
+unicode_strings2 = np.array(['Münster', 'Liége', 'Amsterdam'],dtype='U'+str(n_strlen))
+
 class StringArrayTestCase(unittest.TestCase):
 
     def setUp(self):
@@ -28,6 +33,8 @@ def setUp(self):
         nc.createDimension('n1',None)
         nc.createDimension('n2',n2)
         nc.createDimension('nchar',nchar)
+        nc.createDimension("x", nx)
+        nc.createDimension("nstr", n_strlen)
         v = nc.createVariable('strings','S1',('n1','n2','nchar'))
         v2 = nc.createVariable('strings2','S1',('n1','n2','nchar'))
         # if _Encoding set, string array should automatically be converted
@@ -44,6 +51,11 @@ def setUp(self):
         v2[-1,-1] = data[-1,-1].tobytes() # write single python string
         # _Encoding should be ignored if an array of characters is specified
         v3[:] = stringtochar(data, encoding='ascii')
+        # test unicode strings (issue #1440)
+        v4 = nc.createVariable("strings4", "S1", dimensions=("x", "nstr",))
+        v4._Encoding = "UTF-8"
+        v4[:] = unicode_strings
+        v4[1] = "Liége"
         nc.close()
 
     def tearDown(self):
@@ -57,6 +69,8 @@ def runTest(self):
         v = nc.variables['strings']
         v2 = nc.variables['strings2']
         v3 = nc.variables['strings3']
+        v4 = nc.variables['strings4']
+        assert np.all(v4[:]==unicode_strings2)
         assert v.dtype.str[1:] in ['S1','U1']
         assert v.shape == (nrecs,n2,nchar)
         for nrec in range(nrecs):

From 51edbab95ba10083f592615b789238d255ff403e Mon Sep 17 00:00:00 2001
From: jswhit2 <jwhitaker@Jeffreys-MacBook-Air.local>
Date: Tue, 28 Oct 2025 09:38:06 -0600
Subject: [PATCH 07/10] update

---
 Changelog | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Changelog b/Changelog
index 455bd1e06..c74134c2a 100644
--- a/Changelog
+++ b/Changelog
@@ -1,5 +1,8 @@
  version 1.7.4 (not yet released)
  ================================
+ * Make sure automatic conversion of character arrays <--> string arrays works for Unicode strings (issue #1440).
+   (previously only worked correctly for encoding="ascii").
+
  version 1.7.3 (tag v1.7.3rel)
  =============================
  * Python 3.14 wheels (issue #1432)

From ea3d3a16b634aa60897114de0fc906a149220a6c Mon Sep 17 00:00:00 2001
From: jswhit2 <jwhitaker@Jeffreys-MacBook-Air.local>
Date: Tue, 28 Oct 2025 09:41:25 -0600
Subject: [PATCH 08/10] update

---
 src/netCDF4/_netCDF4.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/netCDF4/_netCDF4.pyx b/src/netCDF4/_netCDF4.pyx
index 87880cc45..f0025dcd2 100644
--- a/src/netCDF4/_netCDF4.pyx
+++ b/src/netCDF4/_netCDF4.pyx
@@ -1066,7 +1066,7 @@ If the `_Encoding` special attribute is set for a character array
 (dtype `S1`) variable, the `chartostring` utility function is used to convert the array of
 characters to an array of strings with one less dimension (the last dimension is
 interpreted as the length of each string) when reading the data. The character
-set (usually ascii) is specified by the `_Encoding` attribute. If `_Encoding`
+set is specified by the `_Encoding` attribute. If `_Encoding`
 is 'none' or 'bytes', then the character array is converted to a numpy
 fixed-width byte string array (dtype `S#`), otherwise a numpy unicode (dtype
 `U#`) array is created.  When writing the data,

From a0486d2ba8baef19dc243ca4431988c8e6bed0a3 Mon Sep 17 00:00:00 2001
From: jswhit2 <jwhitaker@Jeffreys-MacBook-Air.local>
Date: Tue, 28 Oct 2025 10:00:10 -0600
Subject: [PATCH 09/10] update

---
 test/test_stringarr.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/test/test_stringarr.py b/test/test_stringarr.py
index aed2203df..36a3ea03f 100644
--- a/test/test_stringarr.py
+++ b/test/test_stringarr.py
@@ -21,9 +21,9 @@ def generateString(length, alphabet=string.ascii_letters + string.digits + strin
 datau = data.astype('U')
 datac = stringtochar(data, encoding='ascii')
 
-nx, n_strlen = 3, 10
-unicode_strings = np.array(['Münster', 'London', 'Amsterdam'],dtype='U'+str(n_strlen))
-unicode_strings2 = np.array(['Münster', 'Liége', 'Amsterdam'],dtype='U'+str(n_strlen))
+nx, n_strlen = 3, 12
+unicode_strings = np.array(['Münster', 'Liége', '東京'],dtype='U'+str(n_strlen))
+unicode_strings2 = np.array(['Münster', 'Москва', '東京'],dtype='U'+str(n_strlen))
 
 class StringArrayTestCase(unittest.TestCase):
 
@@ -55,7 +55,7 @@ def setUp(self):
         v4 = nc.createVariable("strings4", "S1", dimensions=("x", "nstr",))
         v4._Encoding = "UTF-8"
         v4[:] = unicode_strings
-        v4[1] = "Liége"
+        v4[1] = "Москва"
         nc.close()
 
     def tearDown(self):
@@ -70,6 +70,7 @@ def runTest(self):
         v2 = nc.variables['strings2']
         v3 = nc.variables['strings3']
         v4 = nc.variables['strings4']
+        print(v4[:])
         assert np.all(v4[:]==unicode_strings2)
         assert v.dtype.str[1:] in ['S1','U1']
         assert v.shape == (nrecs,n2,nchar)

From caed197d76a8cd86bc2362e9d672ae3103f7d109 Mon Sep 17 00:00:00 2001
From: jswhit2 <jwhitaker@Jeffreys-MacBook-Air.local>
Date: Tue, 28 Oct 2025 10:10:41 -0600
Subject: [PATCH 10/10] update

---
 test/test_stringarr.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/test/test_stringarr.py b/test/test_stringarr.py
index 36a3ea03f..780dd7ca7 100644
--- a/test/test_stringarr.py
+++ b/test/test_stringarr.py
@@ -24,6 +24,7 @@ def generateString(length, alphabet=string.ascii_letters + string.digits + strin
 nx, n_strlen = 3, 12
 unicode_strings = np.array(['Münster', 'Liége', '東京'],dtype='U'+str(n_strlen))
 unicode_strings2 = np.array(['Münster', 'Москва', '東京'],dtype='U'+str(n_strlen))
+unicode_strings2_bytes = [b'M', b'\xc3', b'\xbc', b'n', b's', b't', b'e', b'r', b'\xd0', b'\x9c', b'\xd0', b'\xbe', b'\xd1', b'\x81', b'\xd0', b'\xba', b'\xd0', b'\xb2', b'\xd0', b'\xb0', b'\xe6', b'\x9d', b'\xb1', b'\xe4', b'\xba', b'\xac']
 
 class StringArrayTestCase(unittest.TestCase):
 
@@ -70,8 +71,9 @@ def runTest(self):
         v2 = nc.variables['strings2']
         v3 = nc.variables['strings3']
         v4 = nc.variables['strings4']
-        print(v4[:])
         assert np.all(v4[:]==unicode_strings2)
+        v4.set_auto_chartostring(False)
+        assert (v4[:].compressed().tolist() == unicode_strings2_bytes)
         assert v.dtype.str[1:] in ['S1','U1']
         assert v.shape == (nrecs,n2,nchar)
         for nrec in range(nrecs):