From 56a464e8281623dc2f76b1935f204fe6c73460a5 Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Sat, 14 Mar 2026 23:37:53 +0000
Subject: [PATCH 01/12] Test with DECODE_TO_STRINGS off: WIP problem with cube
 contents.

---
 lib/iris/fileformats/cf.py                    |  2 +
 .../integration/netcdf/test_stringdata.py     | 72 +++++++++++++++----
 .../netcdf/test_bytecoding_datasets.py        |  5 +-
 3 files changed, 61 insertions(+), 18 deletions(-)

diff --git a/lib/iris/fileformats/cf.py b/lib/iris/fileformats/cf.py
index 247914bf61..9c093feea5 100644
--- a/lib/iris/fileformats/cf.py
+++ b/lib/iris/fileformats/cf.py
@@ -1344,6 +1344,8 @@ def __init__(self, file_source, warn=False, monotonic=False):
             self._with_ugrid = False
 
         # Read the variables in the dataset only once to reduce runtime.
+        # Turn off *any* automatic decoding in the underlying netCDF4 dataset
+        self._dataset._contained_instance.set_auto_chartostring(False)
         variables = self._dataset.variables
         self._translate(variables)
         self._build_cf_groups(variables)
diff --git a/lib/iris/tests/integration/netcdf/test_stringdata.py b/lib/iris/tests/integration/netcdf/test_stringdata.py
index 585388c5e4..800ca3c502 100644
--- a/lib/iris/tests/integration/netcdf/test_stringdata.py
+++ b/lib/iris/tests/integration/netcdf/test_stringdata.py
@@ -19,7 +19,11 @@
 import iris
 from iris.coords import AuxCoord, DimCoord
 from iris.cube import Cube
-from iris.fileformats.netcdf import SUPPORTED_ENCODINGS, _thread_safe_nc
+from iris.fileformats.netcdf import (
+    DECODE_TO_STRINGS_ON_READ,
+    SUPPORTED_ENCODINGS,
+    _thread_safe_nc,
+)
 
 
 @pytest.fixture(scope="module")
@@ -87,7 +91,9 @@ class SamplefileDetails:
 
     filepath: Path
     datavar_data: ArrayLike
+    datavar_bytes: ArrayLike
     stringcoord_data: ArrayLike
+    stringcoord_bytes: ArrayLike
     numericcoord_data: ArrayLike
 
 
@@ -171,7 +177,9 @@ def make_testfile(
     return SamplefileDetails(
         filepath=testfile_path,
         datavar_data=datavar_strings,
+        datavar_bytes=datavar_bytearray,
         stringcoord_data=coordvar_strings,
+        stringcoord_bytes=coordvar_bytearray,
         numericcoord_data=numeric_values,
     )
 
@@ -237,29 +245,63 @@ def readtest_data(
         # ncdump(str(tempfile_path))
         return testdata
 
-    def test_valid_encodings(self, encoding, readtest_data: SamplefileDetails):
-        testfile_path, datavar_strings, coordvar_strings, numeric_data = (
+    @pytest.fixture(params=["strings", "bytes"])
+    def readmode(self, request):
+        return request.param
+
+    def test_valid_encodings(
+        self, encoding, readtest_data: SamplefileDetails, readmode
+    ):
+        (
+            testfile_path,
+            datavar_strings,
+            datavar_bytes,
+            coordvar_strings,
+            coordvar_bytes,
+            numeric_data,
+        ) = (
             readtest_data.filepath,
             readtest_data.datavar_data,
+            readtest_data.datavar_bytes,
             readtest_data.stringcoord_data,
+            readtest_data.stringcoord_bytes,
             readtest_data.numericcoord_data,
         )
-        cube = iris.load_cube(testfile_path)
-        assert load_problems_list() == []
-        assert cube.shape == (N_XDIM,)
+        as_strings = readmode == "strings"
+        if as_strings:
+            # Regular load
+            cube = iris.load_cube(testfile_path)
+            expected_shape: tuple = (N_XDIM,)
+        else:
+            # Special NON-decoded read
+            with DECODE_TO_STRINGS_ON_READ.context(False):
+                cube = iris.load_cube(testfile_path)
+            expected_shape = (N_XDIM, N_CHARS_DIM)
 
-        if encoding == "utf-32":
-            expected_string_width = (N_CHARS_DIM // 4) - 1
-        elif encoding == "utf-16":
-            expected_string_width = (N_CHARS_DIM) // 2 - 1
+        assert load_problems_list() == []
+        assert cube.shape == expected_shape
+
+        if as_strings:
+            if encoding == "utf-32":
+                expected_string_width = (N_CHARS_DIM // 4) - 1
+            elif encoding == "utf-16":
+                expected_string_width = (N_CHARS_DIM) // 2 - 1
+            else:
+                expected_string_width = N_CHARS_DIM
+            expected_dtype = f"<U{expected_string_width}"
         else:
-            expected_string_width = N_CHARS_DIM
-        assert cube.dtype == f"<U{expected_string_width}"
+            expected_dtype = "S1"
+        assert cube.dtype == expected_dtype
+
         cube_data = cube.data
-        assert np.all(cube_data == datavar_strings)
+        expected_data = datavar_strings if as_strings else datavar_bytes
+        assert np.all(cube_data == expected_data)
+
         coord_var = cube.coord("v_co")
-        assert coord_var.dtype == f"<U{expected_string_width}"
-        assert np.all(coord_var.points == coordvar_strings)
+        assert coord_var.dtype == expected_dtype
+        expected_points = coordvar_strings if as_strings else coordvar_bytes
+        assert np.all(coord_var.points == expected_points)
+
         # Also check the numeric one.
         coord_var_2 = cube.coord("v_numeric")
         assert coord_var_2.dtype == np.float64
diff --git a/lib/iris/tests/unit/fileformats/netcdf/test_bytecoding_datasets.py b/lib/iris/tests/unit/fileformats/netcdf/test_bytecoding_datasets.py
index e5e5dd21d6..29b4889e4d 100644
--- a/lib/iris/tests/unit/fileformats/netcdf/test_bytecoding_datasets.py
+++ b/lib/iris/tests/unit/fileformats/netcdf/test_bytecoding_datasets.py
@@ -14,7 +14,6 @@
 from iris import tests
 from iris.exceptions import TranslationError
 from iris.fileformats.netcdf._bytecoding_datasets import (
-    DECODE_TO_STRINGS_ON_READ,
     SUPPORTED_ENCODINGS,
     EncodedDataset,
     EncodedGroup,
@@ -314,8 +313,8 @@ class TestRead:
     datafiles with 'make_dataset' and assigning raw bytes, as-per 'TestWriteChars'.
 
     We are mostly checking here that reading back produces string arrays as expected.
-    However, it is simple + convenient to also check the 'DECODE_TO_STRINGS_ON_READ'
-    function here, i.e. "raw" bytes reads.  So that is also done in this class.
+    However, each testcase also reads and checks the "raw" byte content by re-opening
+    with a non-encoded _thread_safe_nc.DatasetWrapper, to check content is as expected.
     """
 
     @pytest.fixture(params=["strings", "bytes"])

From dd5330f6e1fa738042ca556ec4a14240555a3fe5 Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Tue, 26 May 2026 17:32:46 +0100
Subject: [PATCH 02/12] Resolve load problems with non-string read loading by
 skipping problem testcases.

---
 lib/iris/tests/integration/netcdf/test_stringdata.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/lib/iris/tests/integration/netcdf/test_stringdata.py b/lib/iris/tests/integration/netcdf/test_stringdata.py
index 800ca3c502..dd89e8c239 100644
--- a/lib/iris/tests/integration/netcdf/test_stringdata.py
+++ b/lib/iris/tests/integration/netcdf/test_stringdata.py
@@ -250,7 +250,7 @@ def readmode(self, request):
         return request.param
 
     def test_valid_encodings(
-        self, encoding, readtest_data: SamplefileDetails, readmode
+        self, encoding, readtest_data: SamplefileDetails, readmode, use_separate_dims
     ):
         (
             testfile_path,
@@ -267,6 +267,14 @@ def test_valid_encodings(
             readtest_data.stringcoord_bytes,
             readtest_data.numericcoord_data,
         )
+
+        if readmode == "bytes" and use_separate_dims == True:
+            msg = (
+                "Unsupported load combination : character coordinates with a non-cube "
+                "string dimension can't attach to the cube, when read as bytes."
+            )
+            pytest.skip(msg)
+
         as_strings = readmode == "strings"
         if as_strings:
             # Regular load

From a7e928a327846c5229cc9b4c001b2256b6c6834f Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Wed, 27 May 2026 10:42:22 +0100
Subject: [PATCH 03/12] Make cf chartostring disable work for non-owned regular
 datasets.

---
 lib/iris/fileformats/cf.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/lib/iris/fileformats/cf.py b/lib/iris/fileformats/cf.py
index 9c093feea5..01440450f9 100644
--- a/lib/iris/fileformats/cf.py
+++ b/lib/iris/fileformats/cf.py
@@ -1345,7 +1345,11 @@ def __init__(self, file_source, warn=False, monotonic=False):
 
         # Read the variables in the dataset only once to reduce runtime.
         # Turn off *any* automatic decoding in the underlying netCDF4 dataset
-        self._dataset._contained_instance.set_auto_chartostring(False)
+        ds = self._dataset
+        if isinstance(ds, _thread_safe_nc.DatasetWrapper):
+            ds._contained_instance.set_auto_chartostring(False)
+        else:
+            ds.set_auto_chartostring(False)
         variables = self._dataset.variables
         self._translate(variables)
         self._build_cf_groups(variables)

From e6bdb53442e591a1d0d59c7ebbbb277e2fd13cc0 Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Wed, 27 May 2026 17:35:31 +0100
Subject: [PATCH 04/12] Checks for alternate encoding name.

---
 .../integration/netcdf/test_stringdata.py     | 17 ++++---
 .../netcdf/test_bytecoding_datasets.py        | 45 ++++++++++++-------
 2 files changed, 36 insertions(+), 26 deletions(-)

diff --git a/lib/iris/tests/integration/netcdf/test_stringdata.py b/lib/iris/tests/integration/netcdf/test_stringdata.py
index dd89e8c239..eae8006834 100644
--- a/lib/iris/tests/integration/netcdf/test_stringdata.py
+++ b/lib/iris/tests/integration/netcdf/test_stringdata.py
@@ -42,7 +42,8 @@ def all_lazy_auxcoords():
 PERSIST_TESTFILES: str | None = None
 
 NO_ENCODING_STR = "<noencoding>"
-TEST_ENCODINGS = [NO_ENCODING_STR] + SUPPORTED_ENCODINGS
+ALIAS_UTF8_STR = "UTF8"  # an alternative acceptable form (should be written as-is)
+TEST_ENCODINGS = [NO_ENCODING_STR, ALIAS_UTF8_STR] + SUPPORTED_ENCODINGS
 
 
 #
@@ -223,7 +224,7 @@ def readtest_path(
         else:
             filetag = encoding
         dimtag = "diffdims" if use_separate_dims else "samedims"
-        tempfile_path = tmp_path / f"sample_read_{filetag}_{dimtag}.nc"
+        tempfile_path = tmp_path / f"sample_stringdata_read_{filetag}_{dimtag}.nc"
         return tempfile_path
 
     @pytest.fixture
@@ -408,7 +409,7 @@ def write_bytes(self, request):
         return request.param == "dataAsBytes"
 
     @pytest.fixture
-    def writetest_path(self, encoding, write_bytes, tmp_path):
+    def writetest_path(self, encoding, write_bytes, lazy_data, tmp_path):
         """Create a suitable test cube, with either string or byte content."""
         if PERSIST_TESTFILES:
             tmp_path = Path(PERSIST_TESTFILES).expanduser()
@@ -417,7 +418,10 @@ def writetest_path(self, encoding, write_bytes, tmp_path):
         else:
             filetag = encoding
         datatag = "writebytes" if write_bytes else "writestrings"
-        tempfile_path = tmp_path / f"sample_write_{filetag}_{datatag}.nc"
+        lazytag = "alllazy" if lazy_data else "smallreal"
+        tempfile_path = (
+            tmp_path / f"sample_stringdata_write_{filetag}_{datatag}_{lazytag}.nc"
+        )
         return tempfile_path
 
     @pytest.fixture
@@ -440,11 +444,6 @@ def writetest_data(self, writetest_path, encoding, write_bytes, lazy_data):
     def test_valid_encodings(self, encoding, writetest_data, write_bytes):
         cube_info = writetest_data
         cube, path = cube_info.cube, cube_info.save_path
-        # TODO: not testing the "byte read/write" yet
-        # Make a quick check for cube equality : but the presentation depends on the read mode
-        # with DECODE_TO_STRINGS_ON_READ.context(not write_bytes):
-        # read_cube = iris.load_cube(path)
-        # assert read_cube == cube
 
         # N.B. file content should not depend on whether bytes or strings were written
         vararray, coordarray = cube_info.datavar_data, cube_info.stringcoord_data
diff --git a/lib/iris/tests/unit/fileformats/netcdf/test_bytecoding_datasets.py b/lib/iris/tests/unit/fileformats/netcdf/test_bytecoding_datasets.py
index 29b4889e4d..f153033fa9 100644
--- a/lib/iris/tests/unit/fileformats/netcdf/test_bytecoding_datasets.py
+++ b/lib/iris/tests/unit/fileformats/netcdf/test_bytecoding_datasets.py
@@ -28,7 +28,10 @@
 from iris.tests.stock.netcdf import ncgen_from_cdl
 from iris.warnings import IrisCfLoadWarning, IrisCfSaveWarning
 
-encoding_options = [None] + SUPPORTED_ENCODINGS
+# Note: for test options, include "no encoding" and an alias name
+ENCODING_NONE = None
+ENCODING_UTF8_ALIAS = "UTF8"
+encoding_options = [ENCODING_NONE, ENCODING_UTF8_ALIAS] + SUPPORTED_ENCODINGS
 
 samples_3_ascii = np.array(
     ["one", "", "seven"],  # N.B. include empty!
@@ -45,9 +48,17 @@ def encoding(request):
     return request.param
 
 
+# TODO: remove (debug)
+# PERSIST_TESTFILES: str | None = "~/chararray_testfiles"
+PERSIST_TESTFILES: str | None = None
+
+
 @pytest.fixture(scope="module")
 def tempdir(tmp_path_factory):
-    path = tmp_path_factory.mktemp("netcdf")
+    if PERSIST_TESTFILES:
+        path = Path(PERSIST_TESTFILES).expanduser()
+    else:
+        path = tmp_path_factory.mktemp("netcdf")
     return path
 
 
@@ -134,7 +145,7 @@ class TestWriteStrings:
 
     def test_encodings(self, encoding, tempdir):
         # Create a dataset with the variable
-        path = tempdir / f"test_writestrings_encoding_{encoding!s}.nc"
+        path = tempdir / f"test_bytecoded_writestrings_encoding_{encoding!s}.nc"
 
         if encoding in [None, "ascii"]:
             writedata = samples_3_ascii
@@ -164,7 +175,7 @@ def test_encodings(self, encoding, tempdir):
 
     def test_scalar(self, tempdir):
         # Like 'test_write_strings', but the variable has *only* the string dimension.
-        path = tempdir / "test_writestrings_scalar.nc"
+        path = tempdir / "test_bytecoded_writestrings_scalar.nc"
 
         strlen = 5
         ds_encoded = make_encoded_dataset(path, strlen=strlen)
@@ -180,7 +191,7 @@ def test_scalar(self, tempdir):
 
     def test_multidim(self, tempdir):
         # Like 'test_write_strings', but the variable has additional dimensions.
-        path = tempdir / "test_writestrings_multidim.nc"
+        path = tempdir / "test_bytecoded_writestrings_multidim.nc"
 
         strlen = 5
         ds_encoded = make_encoded_dataset(path, strlen=strlen)
@@ -209,7 +220,7 @@ def test_multidim(self, tempdir):
 
     @pytest.mark.parametrize("encoding", [None, "ascii"])
     def test_write_encoding_failure(self, tempdir, encoding):
-        path = tempdir / f"test_writestrings_encoding_{encoding}_fail.nc"
+        path = tempdir / f"test_bytecoded_writestrings_encoding_{encoding}_fail.nc"
         ds = make_encoded_dataset(path, strlen=5, encoding=encoding)
         v = ds.variables["vxs"]
         encoding_name = encoding
@@ -223,7 +234,7 @@ def test_write_encoding_failure(self, tempdir, encoding):
             v[:] = samples_3_nonascii
 
     def test_write_badencoding_ignore(self, tempdir):
-        path = tempdir / "test_writestrings_badencoding_ignore.nc"
+        path = tempdir / "test_bytecoded_writestrings_badencoding_ignore.nc"
         ds = make_encoded_dataset(path, strlen=5, encoding="unknown")
         v = ds.variables["vxs"]
         msg = (
@@ -235,7 +246,7 @@ def test_write_badencoding_ignore(self, tempdir):
 
     def test_overlength(self, tempdir):
         # Check expected behaviour with over-length data
-        path = tempdir / "test_writestrings_overlength.nc"
+        path = tempdir / "test_bytecoded_writestrings_overlength.nc"
         strlen = 6
         ds = make_encoded_dataset(path, strlen=strlen, encoding="utf8")
         v = ds.variables["vxs"]
@@ -248,7 +259,7 @@ def test_overlength(self, tempdir):
 
     def test_overlength_splitcoding(self, tempdir):
         # Check expected behaviour when non-ascii multibyte coding gets truncated
-        path = tempdir / "test_writestrings_overlength_splitcoding.nc"
+        path = tempdir / "test_bytecoded_writestrings_overlength_splitcoding.nc"
         strlen = 5
         ds = make_encoded_dataset(path, strlen=strlen, encoding="utf-8")
         v = ds.variables["vxs"]
@@ -291,7 +302,7 @@ def test_write_chars(self, tempdir, write_form):
         strlen = strings_maxbytes(write_strings, encoding)
         write_bytes = make_bytearray(write_strings, strlen, encoding=encoding)
         # NOTE: 'flexi' form util decides the width needs to be 7 !!
-        path = tempdir / f"test_writechars_{write_form}.nc"
+        path = tempdir / f"test_bytecoded_writechars_{write_form}.nc"
         ds = make_encoded_dataset(path, encoding=encoding, strlen=strlen)
         v = ds.variables["vxs"]
 
@@ -331,7 +342,7 @@ def undecoded_testvar(self, ds_encoded, varname: str):
 
     def test_encodings(self, encoding, tempdir, readmode):
         # Create a dataset with the variable
-        path = tempdir / f"test_read_encodings_{encoding!s}_{readmode}.nc"
+        path = tempdir / f"test_bytecoded_read_encodings_{encoding!s}_{readmode}.nc"
 
         if encoding in [None, "ascii"]:
             write_strings = samples_3_ascii
@@ -352,10 +363,10 @@ def test_encodings(self, encoding, tempdir, readmode):
             # Test "normal" read --> string array
             result = v[:]
             expected = write_strings
-            if encoding in ("utf-8", "utf-16"):
+            if encoding in ("utf-8", ENCODING_UTF8_ALIAS, "utf-16"):
                 # In these cases, with the given non-ascii sample data, the
                 #  "default minimum string length" is overestimated.
-                if encoding == "utf-8":
+                if encoding in ["utf-8", ENCODING_UTF8_ALIAS]:
                     assert strlen == 7
                     assert result.dtype == "U7"
                     # correct the result dtype to pass the write_strings comparison below
@@ -378,7 +389,7 @@ def test_encodings(self, encoding, tempdir, readmode):
 
     def test_scalar(self, tempdir, readmode):
         # Like 'test_write_strings', but the variable has *only* the string dimension.
-        path = tempdir / f"test_read_scalar_{readmode}.nc"
+        path = tempdir / f"test_bytecoded_read_scalar_{readmode}.nc"
 
         strlen = 5
         ds_encoded = make_encoded_dataset(path, strlen=strlen)
@@ -404,7 +415,7 @@ def test_scalar(self, tempdir, readmode):
 
     def test_multidim(self, tempdir, readmode):
         # Like 'test_write_strings', but the variable has additional dimensions.
-        path = tempdir / f"test_read_multidim_{readmode}.nc"
+        path = tempdir / f"test_bytecoded_read_multidim_{readmode}.nc"
 
         strlen = 5
         ds_encoded = make_encoded_dataset(path, strlen=strlen)
@@ -440,7 +451,7 @@ def test_multidim(self, tempdir, readmode):
         check_array_matching(result, expected)
 
     def test_read_encoding_failure(self, tempdir, readmode):
-        path = tempdir / f"test_read_encoding_failure_{readmode}.nc"
+        path = tempdir / f"test_bytecoded_read_encoding_failure_{readmode}.nc"
         strlen = 10
         ds_encoded = make_encoded_dataset(path, strlen=strlen, encoding="ascii")
         v = ds_encoded.variables["vxs"]
@@ -463,7 +474,7 @@ def test_read_encoding_failure(self, tempdir, readmode):
             assert np.all(result == test_utf8_bytes)
 
     def test_read_badencoding_ignore(self, tempdir):
-        path = tempdir / f"test_read_badencoding_ignore.nc"
+        path = tempdir / f"test_bytecoded_read_badencoding_ignore.nc"
         strlen = 10
         ds = make_encoded_dataset(path, strlen=strlen, encoding="unknown")
         v = ds.variables["vxs"]

From fa9c66df7606e9fe4749d0e9e580762dfffbcfe6 Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Thu, 28 May 2026 14:42:17 +0100
Subject: [PATCH 05/12] Fix bug where nc save was removing _Encoding attributes
 from saved cubes.

---
 lib/iris/fileformats/netcdf/saver.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/lib/iris/fileformats/netcdf/saver.py b/lib/iris/fileformats/netcdf/saver.py
index 08f577c499..bde1cbd1fe 100644
--- a/lib/iris/fileformats/netcdf/saver.py
+++ b/lib/iris/fileformats/netcdf/saver.py
@@ -1719,20 +1719,23 @@ def add_names_attrs():
         if element.units.calendar:
             _setncattr(cf_var, "calendar", str(element.units.calendar))
 
+        # Take a copy so we can remove things
+        element_attrs = element.attributes.copy()
+
         # Note: when writing UGRID, "element" can be a Mesh which has no "dtype",
         # and for dataless cubes it will have a 'None' dtype.
         if getattr(element, "dtype", None) is not None:
             # Most attributes are dealt with later.  But _Encoding needs to be defined
             #  *before* we can write to a character variable.
-            if element.dtype.kind in "SU" and "_Encoding" in element.attributes:
-                encoding = element.attributes.pop("_Encoding")
+            if element.dtype.kind in "SU" and "_Encoding" in element_attrs:
+                encoding = element_attrs.pop("_Encoding")
                 _setncattr(cf_var, "_Encoding", encoding)
 
         if not isinstance(element, Cube):
             # Add any other custom coordinate attributes.
             # N.B. not Cube, which has specific handling in  _create_cf_data_variable
-            for name in sorted(element.attributes):
-                value = element.attributes[name]
+            for name in sorted(element_attrs):
+                value = element_attrs[name]
 
                 if name == "STASH":
                     # Adopting provisional Metadata Conventions for representing MO

From 3def8f1056172a8b3a72020446b7b5bd4aa8fcdb Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Thu, 28 May 2026 14:48:58 +0100
Subject: [PATCH 06/12] Fix bug: stop cube equality failing with char/string
 data.

---
 lib/iris/cube.py                      | 21 +++++++++++++--------
 lib/iris/tests/unit/cube/test_Cube.py | 10 ++++++++++
 2 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/lib/iris/cube.py b/lib/iris/cube.py
index 44be3a63d7..e8f1107773 100644
--- a/lib/iris/cube.py
+++ b/lib/iris/cube.py
@@ -4468,15 +4468,20 @@ def __eq__(self, other):
 
             # Having checked everything else, check approximate data equality.
             if result and not dataless_equality:
-                # TODO: why do we use allclose() here, but strict equality in
-                #  _DimensionalMetadata (via util.array_equal())?
-                result = bool(
-                    np.allclose(
-                        self.core_data(),
-                        other.core_data(),
-                        equal_nan=True,
+                if self.dtype.kind in "if":
+                    # numbers
+                    # TODO: why do we use allclose() here, but strict equality in
+                    #  _DimensionalMetadata (via util.array_equal())?
+                    result = bool(
+                        np.allclose(
+                            self.core_data(),
+                            other.core_data(),
+                            equal_nan=True,
+                        )
                     )
-                )
+                else:
+                    # non-numeric: use exact equality
+                    result = bool(np.all(self.core_data() == other.core_data()))
         return result
 
     # Must supply __ne__, Python does not defer to __eq__ for negative equality
diff --git a/lib/iris/tests/unit/cube/test_Cube.py b/lib/iris/tests/unit/cube/test_Cube.py
index d91c7e81c0..756809e128 100644
--- a/lib/iris/tests/unit/cube/test_Cube.py
+++ b/lib/iris/tests/unit/cube/test_Cube.py
@@ -3620,6 +3620,16 @@ def test_data_bool_not_eq(self):
         cube2 = Cube([True, True])
         assert cube1 != cube2
 
+    def test_data_string_eq(self):
+        cube1 = Cube(["a", "b", "c"])
+        cube2 = Cube(["a", "b", "c"])
+        assert cube1 == cube2
+
+    def test_data_string_not_eq(self):
+        cube1 = Cube(["a", "b", "c"])
+        cube2 = Cube(["a", "b", "d"])
+        assert cube1 != cube2
+
 
 class Test__eq__meta:
     def test_ancillary_fail(self):

From 713f6316b4d49e65940dffa4a0a8b984c161ab58 Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Thu, 28 May 2026 15:32:36 +0100
Subject: [PATCH 07/12] Remove ban on string data for cubes: Extracting a
 scalar string cube then works.

---
 lib/iris/cube.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/iris/cube.py b/lib/iris/cube.py
index e8f1107773..038950bc7a 100644
--- a/lib/iris/cube.py
+++ b/lib/iris/cube.py
@@ -1280,9 +1280,9 @@ def __init__(
             ...                                  (longitude, 1)])
 
         """
-        # Temporary error while we transition the API.
-        if isinstance(data, str):
-            raise TypeError("Invalid data type: {!r}.".format(data))
+        # # Temporary error while we transition the API.
+        # if isinstance(data, str):
+        #     raise TypeError("Invalid data type: {!r}.".format(data))
 
         # Configure the metadata manager.
         self._metadata_manager = metadata_manager_factory(CubeMetadata)

From 4e956e02df63b958a60bd52eb2742d79a382a944 Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Thu, 28 May 2026 16:22:35 +0100
Subject: [PATCH 08/12] Tests for saving scalar strings, and a mix of encodings
 and lengths.

---
 .../integration/netcdf/test_stringdata.py     | 124 ++++++++++++++++++
 1 file changed, 124 insertions(+)

diff --git a/lib/iris/tests/integration/netcdf/test_stringdata.py b/lib/iris/tests/integration/netcdf/test_stringdata.py
index eae8006834..0d3ac5bda8 100644
--- a/lib/iris/tests/integration/netcdf/test_stringdata.py
+++ b/lib/iris/tests/integration/netcdf/test_stringdata.py
@@ -19,6 +19,7 @@
 import iris
 from iris.coords import AuxCoord, DimCoord
 from iris.cube import Cube
+import iris.exceptions
 from iris.fileformats.netcdf import (
     DECODE_TO_STRINGS_ON_READ,
     SUPPORTED_ENCODINGS,
@@ -46,6 +47,13 @@ def all_lazy_auxcoords():
 TEST_ENCODINGS = [NO_ENCODING_STR, ALIAS_UTF8_STR] + SUPPORTED_ENCODINGS
 
 
+# Common fixture to save with split-attrs ONLY in these tests
+@pytest.fixture(scope="module", autouse=True)
+def all_split_attrs():
+    with iris.FUTURE.context(save_split_attrs=True):
+        yield
+
+
 #
 # Routines to convert between byte and string arrays.
 # Independently defined here, to avoid relying on any code we are testing.
@@ -476,3 +484,119 @@ def test_valid_encodings(self, encoding, writetest_data, write_bytes):
             )
         assert np.all(data_main == vararray)
         assert np.all(data_co == coordarray)
+
+
+class TestStringCubeBehaviour:
+    def test_create(self):
+        cube = Cube(["this", "that", "cliché"])
+        assert isinstance(cube.core_data(), np.ndarray)
+        assert cube.shape == (3,)
+        assert cube.dtype == np.dtype("U6")
+
+    def test_scalar_extract(self):
+        cube = Cube(["one", "two", "thirteen"])
+        cube = cube[0]
+        assert isinstance(cube.core_data(), np.ndarray)
+        assert cube.shape == ()
+        assert cube.dtype == np.dtype("U3")
+
+    def test_scalar_create(self):
+        cube = Cube("éclair")
+        assert isinstance(cube.core_data(), np.ndarray)
+        assert cube.shape == ()
+        assert cube.dtype == np.dtype("U6")
+
+
+class TestWriteReadMixedEncodings:
+    """Check saving of different types of string data, in cubes.
+
+    Checks that encodings are preserved through save/load.
+    Checks that scalar cubes save.
+    Checks that multiple cubes with different encodings save correctly.
+    """
+
+    def test_mixed(self, tmp_path):
+        # Save a mixture of string + numeric cubes, 1-D and scalar
+        # Ensure that they save, and read back correctly.
+        c1 = Cube(["test-string"], var_name="c1")
+        c2 = Cube(["test=éclair"], var_name="c2", attributes={"_Encoding": "utf16"})
+        c3 = Cube(4.5, var_name="c3")
+        c4 = Cube(np.array("q"), var_name="c4")  # a SCALAR character-type cube
+        cubes = [c1, c2, c3, c4]
+        originals = [c.copy() for c in cubes]
+
+        # Check they save OK
+        filepath = tmp_path / "tst.nc"
+        iris.save(cubes, filepath)
+
+        # Check they also read back the same (except for Conventions attribute)
+        results = iris.load_cubes(filepath, ["c1", "c2", "c3", "c4"])
+        for cube in results:
+            cube.attributes.pop("Conventions", None)
+        assert all(orig == result for orig, result in zip(originals, results))
+
+
+class TestWriteReadScalarStringCubes:
+    """Check how scalar string-typed cubes are saved.
+    NB all these gain a string dimension, even when only a single byte character,
+    so they are not actually "scalar" in the file.
+    """
+
+    def test_save_scalar_ascii__ok(self, tmp_path):
+        # We can save a scalar cube containing a *single ascii character*
+        scalar_char_cube = Cube(
+            np.array("x"),
+            var_name="c1",
+            attributes={"_Encoding": "utf8"},  # NB no encoding is *needed* here.
+        )
+        assert scalar_char_cube.shape == ()
+        filepath = tmp_path / "tst.nc"
+        iris.save(scalar_char_cube, filepath)
+
+        # Check dims in file
+        ds = _thread_safe_nc.DatasetWrapper(filepath)
+        assert ds.variables["c1"].dimensions == ("string1",)
+        assert ds.dimensions["string1"].size == 1
+        ds.close()
+
+        # check read-back result
+        result = iris.load_cube(filepath)
+        result.attributes.pop("Conventions", None)
+        assert result == scalar_char_cube
+
+    def test_save_scalar_unicode__fail(self, tmp_path):
+        # You *can't* save a scalar cube containing a non-ascii character
+        # *without an explicitly lengthened dtype*,
+        # because it doesn't convert to a single "char".
+        scalar_char_bad = Cube(
+            np.array("ü"), var_name="c1", attributes={"_Encoding": "utf8"}
+        )
+        assert scalar_char_bad.shape == ()
+        filepath = tmp_path / "tst.nc"
+        msg = (
+            "String 'ü' written .* is 2 bytes long, "
+            "which exceeds the string dimension length"
+        )
+        with pytest.raises(iris.exceptions.TranslationError, match=msg):
+            iris.save(scalar_char_bad, filepath)
+
+    def test_save_single_unicode__okay(self, tmp_path):
+        # You *can* save a scalar cube containing a non-ascii character,
+        # *if* the dtype is extended to allow for multiple encoded bytes.
+        scalar_char_cube = Cube(
+            np.array("ü", dtype="U2"), var_name="c1", attributes={"_Encoding": "utf8"}
+        )
+        assert scalar_char_cube.shape == ()
+        filepath = tmp_path / "tst.nc"
+        iris.save(scalar_char_cube, filepath)
+
+        # Check dims in file
+        ds = _thread_safe_nc.DatasetWrapper(filepath)
+        assert ds.variables["c1"].dimensions == ("string2",)
+        assert ds.dimensions["string2"].size == 2
+        ds.close()
+
+        # check read-back result
+        result = iris.load_cube(filepath)
+        result.attributes.pop("Conventions", None)
+        assert result == scalar_char_cube

From 2c276a8b78bf1c0b1042c728ad40b334c982e431 Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Fri, 29 May 2026 12:05:40 +0100
Subject: [PATCH 09/12] Tests for some specific save/load usecases.

---
 lib/iris/fileformats/netcdf/saver.py          |  77 +++++++------
 .../integration/netcdf/test_stringdata.py     | 102 +++++++++++++++++-
 2 files changed, 141 insertions(+), 38 deletions(-)

diff --git a/lib/iris/fileformats/netcdf/saver.py b/lib/iris/fileformats/netcdf/saver.py
index bde1cbd1fe..49fbbb1221 100644
--- a/lib/iris/fileformats/netcdf/saver.py
+++ b/lib/iris/fileformats/netcdf/saver.py
@@ -1833,8 +1833,8 @@ def _create_generic_cf_array_var(
         if cube is not None and data is not None and cube.shape != data.shape:
             compression_kwargs = {}
 
-        if not is_dataless and np.issubdtype(data.dtype, np.str_):
-            # Deal with string-type variables.
+        if not is_dataless and data.dtype.kind == "U":
+            # Deal with unicode-string-type variables.
             # Typically CF label variables, but also possibly ancil-vars ?
 
             # NOTE: all we are doing here is to calculate the byte dimension length,
@@ -1843,37 +1843,26 @@ def _create_generic_cf_array_var(
             # being a _bytecoding_datasets.EncodedVariable.
             string_dimension_depth = data.dtype.itemsize
 
-            if data.dtype.kind == "U":
-                # String content (U) instead of bytes (S).
-                # For numpy strings, itemsize is **always** a multiple of 4
-                if string_dimension_depth % 4 != 0:
-                    msg = (
-                        "Unexpected numpy string 'itemsize' for element "
-                        f"{cube_or_mesh.name()}: "
-                        f"'dtype.itemsize = {string_dimension_depth}, expected "
-                        "a multiple of four (always)."
-                    )
-                    raise ValueError(msg)
-                nchars = string_dimension_depth // 4
-
-                encoding_attr = element.attributes.get("_Encoding", "ascii")
-                # Look this up + return a supported encoding name
-                # NB implements defaults and raises a warning if given not recognised.
-                encoding = bytecoding_datasets._identify_encoding(
-                    encoding=encoding_attr, var_name=cf_name, writing=True
+            # String content (U) instead of bytes (S).
+            # For numpy strings, itemsize is **always** a multiple of 4
+            if string_dimension_depth % 4 != 0:
+                msg = (
+                    "Unexpected numpy string 'itemsize' for element "
+                    f"{cube_or_mesh.name()}: "
+                    f"'dtype.itemsize = {string_dimension_depth}, expected "
+                    "a multiple of four (always)."
                 )
-                width_fns = bytecoding_datasets._ENCODING_WIDTH_TRANSLATIONS[encoding]
-                string_dimension_depth = width_fns.nchars_2_nbytes(nchars)
-            else:
-                if data.dtype.kind != "S" or data.dtype.itemsize != 1:
-                    # Some type of data we don't "understand".
-                    # NB this includes "Sxx" types other than "S1" :  It seems that
-                    # netCDF4 saves Sxx as variable-length strings.  But we don't support that type in Iris.
-                    msg = (
-                        f"Variable {cf_name!r} has unexpected string/character dtype, "
-                        f"{data.dtype} -- should be either 'S' or 'U' type."
-                    )
-                    raise ValueError(msg)
+                raise ValueError(msg)
+            nchars = string_dimension_depth // 4
+
+            encoding_attr = element.attributes.get("_Encoding", "ascii")
+            # Look this up + return a supported encoding name
+            # NB implements defaults and raises a warning if given not recognised.
+            encoding = bytecoding_datasets._identify_encoding(
+                encoding=encoding_attr, var_name=cf_name, writing=True
+            )
+            width_fns = bytecoding_datasets._ENCODING_WIDTH_TRANSLATIONS[encoding]
+            string_dimension_depth = width_fns.nchars_2_nbytes(nchars)
 
             string_dimension_name = "string%d" % string_dimension_depth
 
@@ -1893,12 +1882,34 @@ def _create_generic_cf_array_var(
             # Create the label coordinate variable.
             cf_var = self._dataset.createVariable(cf_name, "|S1", element_dims)
         else:
-            # A normal (numeric) variable.
+            # A non-string variable.
             # ensure a valid datatype for the file format.
             if is_dataless:
                 dtype = self._DATALESS_DTYPE
                 fill_value = self._DATALESS_FILLVALUE
             else:
+                # Normal non-string data.
+                # NOTE: this includes byte-arrays (S1 only) : however these must
+                # use an actual cube dimension for the 'string dimension', which
+                # seriously limits the utility of DECODE_TO_STRINGS_ON_READ.
+                # TODO: also support netCDF variable-length strings ("string" type).
+                #  Currently hit a **write error here**, being numpy object dtype ("O").
+                if (
+                    data.dtype.kind not in "iufSU"
+                    or data.dtype.kind == "S"
+                    and data.dtype.itemsize != 1
+                ):
+                    # This is a type of data we don't "understand".
+                    # NB this includes "Sxx" types other than "S1" :  It seems that
+                    # netCDF4 saves Sxx as variable-length strings.
+                    # But we don't support that type in Iris.
+                    msg = (
+                        f"Variable {cf_name!r} has unexpected dtype, {data.dtype!r}."
+                        f"Data content arrays must be numeric, or contain "
+                        "single-bytes (dtype 'S1'), or unicode strings (dtype 'U<n>')."
+                    )
+                    raise ValueError(msg)
+
                 element_type = type(element).__name__
                 data = self._ensure_valid_dtype(data, element_type, element)
                 if not packing_controls:
diff --git a/lib/iris/tests/integration/netcdf/test_stringdata.py b/lib/iris/tests/integration/netcdf/test_stringdata.py
index 0d3ac5bda8..71a02fc093 100644
--- a/lib/iris/tests/integration/netcdf/test_stringdata.py
+++ b/lib/iris/tests/integration/netcdf/test_stringdata.py
@@ -109,7 +109,10 @@ class SamplefileDetails:
 def make_testfile(
     testfile_path: Path,
     encoding_str: str,
-    coords_on_separate_dim: bool,
+    coords_on_separate_dim: bool = False,
+    # If set, determines the "_Encoding" attrs content, including None --> no attr.
+    # Otherwise, they  follow 'encoding_str', including NO_ENCODING_STR --> no attr.
+    encoding_attr: str | None = "<as_encoding_str>",
 ) -> SamplefileDetails:
     """Create a test netcdf file.
 
@@ -120,6 +123,9 @@ def make_testfile(
     else:
         encoding = encoding_str
 
+    if encoding_attr == "<as_encoding_str>":
+        encoding_attr = encoding
+
     data_is_ascii = encoding in (None, "ascii")
 
     numeric_values = np.arange(3.0)
@@ -156,8 +162,8 @@ def make_testfile(
         )
         v_co[:] = coordvar_bytearray
 
-        if encoding is not None:
-            v_co._Encoding = encoding
+        if encoding_attr is not None:
+            v_co._Encoding = encoding_attr
 
         v_numeric = ds.createVariable(
             "v_numeric",
@@ -176,8 +182,8 @@ def make_testfile(
         )
         v_datavar[:] = datavar_bytearray
 
-        if encoding is not None:
-            v_datavar._Encoding = encoding
+        if encoding_attr is not None:
+            v_datavar._Encoding = encoding_attr
 
         v_datavar.coordinates = "v_co v_numeric"
     finally:
@@ -600,3 +606,89 @@ def test_save_single_unicode__okay(self, tmp_path):
         result = iris.load_cube(filepath)
         result.attributes.pop("Conventions", None)
         assert result == scalar_char_cube
+
+
+class TestReadParticularCases:
+    @pytest.mark.parametrize("data_encoding", ["utf8", "utf16", "utf32"])
+    def test_read_no_encoding(self, tmp_path, data_encoding):
+        # Check that we can read UTF-8 encoded data, even with no _Encoding attribute.
+        # This is a common case in the wild, and now accepted by CF as a default.
+        # However, other encodings will FAIL to decode.
+        filepath = tmp_path / "utf8_no_encoding.nc"
+        testdata = make_testfile(
+            testfile_path=filepath,
+            encoding_str=data_encoding,
+            encoding_attr=None,
+        )
+        cube = iris.load_cube(filepath)
+        assert "_Encoding" not in cube.attributes
+
+        if data_encoding == "utf8":
+            assert np.all(cube.data == testdata.datavar_data)
+        else:
+            msg = "Character data .* could not be decoded with the 'utf-8' encoding"
+            with pytest.raises(ValueError, match=msg):
+                cube.data
+
+    def test_read_wrong_encoding__fail(self, tmp_path):
+        filepath = tmp_path / "missing_encoding.nc"
+        testdata = make_testfile(
+            testfile_path=filepath,
+            encoding_str="utf-16",
+            encoding_attr="utf-8",
+        )
+        cube = iris.load_cube(filepath)
+        # NOTE: error only occurs when you attempt to fetch + translate the content.
+        msg = "Character data .* could not be decoded with the 'utf-8' encoding."
+        with pytest.raises(ValueError, match=msg):
+            data = cube.data
+
+
+class TestWriteParticularCases:
+    def test_write_unicode_no_encoding__fail(self, tmp_path):
+        cube = Cube(np.array("éclair"))
+        filepath = tmp_path / "write_unicode_no_encoding.nc"
+        msg = (
+            "String data written to netcdf character variable 'unknown' "
+            "could not be represented in encoding 'ascii'"
+        )
+        with pytest.raises(ValueError, match=msg):
+            iris.save(cube, filepath)
+
+    def test_write_encoded_overlength__fail(self, tmp_path):
+        cube = Cube(np.array("éclair"), attributes={"_Encoding": "utf8"})
+        filepath = tmp_path / "write_encoded_overlength.nc"
+        msg = (
+            "String 'éclair' written into netcdf variable 'unknown' "
+            "with encoding 'utf-8' is 7 bytes long, which exceeds the "
+            "string dimension length, 6. "
+            r"This can be fixed by converting the data to a \"wider\" string dtype, "
+            r"e.g. cube.data = cube.data.astype\(\"U7\"\)"
+        )
+        with pytest.raises(iris.exceptions.TranslationError, match=msg):
+            iris.save(cube, filepath)
+
+    def test_write_multibytes__fail(self, tmp_path):
+        encoded_bytes = "éclair".encode("utf8")
+        byte_array = np.array(encoded_bytes)
+        cube = Cube(byte_array, attributes={"_Encoding": "utf8"})
+        filepath = tmp_path / "write_multibyte_Sxx.nc"
+        msg = (
+            r"Variable 'unknown' has unexpected dtype, dtype\('S7'\)."
+            "Data content arrays must be numeric, or contain single-bytes "
+            r"\(dtype 'S1'\), or unicode strings \(dtype 'U<n>'\)."
+        )
+        with pytest.raises(ValueError, match=msg):
+            iris.save(cube, filepath)
+
+    def test_write_stringobjects__fail(self, tmp_path):
+        string_array = np.array(["one", "four"], dtype="O")
+        cube = Cube(string_array)
+        filepath = tmp_path / "write_stringobjects.nc"
+        msg = (
+            r"Variable 'unknown' has unexpected dtype, dtype\('O'\)."
+            "Data content arrays must be numeric, or contain single-bytes "
+            r"\(dtype 'S1'\), or unicode strings \(dtype 'U<n>'\)."
+        )
+        with pytest.raises(ValueError, match=msg):
+            iris.save(cube, filepath)

From 6c4be387e87ae20c96740cd4109ffcdc8f664450 Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Fri, 29 May 2026 16:05:48 +0100
Subject: [PATCH 10/12] Test save/load of a 'bad' unicode sequence as a byte
 array.

---
 .../integration/netcdf/test_stringdata.py     | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/lib/iris/tests/integration/netcdf/test_stringdata.py b/lib/iris/tests/integration/netcdf/test_stringdata.py
index 71a02fc093..cf5ad42187 100644
--- a/lib/iris/tests/integration/netcdf/test_stringdata.py
+++ b/lib/iris/tests/integration/netcdf/test_stringdata.py
@@ -692,3 +692,23 @@ def test_write_stringobjects__fail(self, tmp_path):
         )
         with pytest.raises(ValueError, match=msg):
             iris.save(cube, filepath)
+
+
+class TestSaveloadBadUnicodeAsBytes:
+    def test_save_load_bad_unicode(self, tmp_path):
+        filepath = tmp_path / "bad_unicode_utf8.nc"
+        test_string = "marré"
+        bytes_array = test_string.encode("utf8")
+        s1_array = np.array([bytes_array[i : i + 1] for i in range(len(bytes_array))])
+        s1_array_bad_utf8 = s1_array[:-1]  # invalid without the last byte
+        cube = Cube(s1_array_bad_utf8, attributes={"_Encoding": "utf8"})
+        iris.save(cube, filepath)
+        # First check for error when reading back *normally*
+        msg = "could not be decoded with the 'utf-8' encoding"
+        with pytest.raises(ValueError, match=msg):
+            iris.load(filepath)
+        # .. but OK in byte-reading mode
+        with iris.fileformats.netcdf.DECODE_TO_STRINGS_ON_READ.context(False):
+            readback_cube = iris.load_cube(filepath)
+        assert readback_cube.dtype == "S1"
+        assert np.all(readback_cube.data == s1_array_bad_utf8)

From 5d5bedc79a6120d52b07efc3a9bf6ed8bcfd89be Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Fri, 29 May 2026 16:29:56 +0100
Subject: [PATCH 11/12] Remove various obsolete comments, and debug code.

---
 lib/iris/cube.py                                   |  4 ----
 lib/iris/fileformats/_nc_load_rules/helpers.py     |  4 ----
 .../fileformats/netcdf/_bytecoding_datasets.py     |  1 -
 lib/iris/fileformats/netcdf/_thread_safe_nc.py     |  2 +-
 .../tests/integration/netcdf/test_stringdata.py    | 14 --------------
 .../fileformats/netcdf/test_bytecoding_datasets.py | 10 +---------
 6 files changed, 2 insertions(+), 33 deletions(-)

diff --git a/lib/iris/cube.py b/lib/iris/cube.py
index 038950bc7a..0bed1c2d10 100644
--- a/lib/iris/cube.py
+++ b/lib/iris/cube.py
@@ -1280,10 +1280,6 @@ def __init__(
             ...                                  (longitude, 1)])
 
         """
-        # # Temporary error while we transition the API.
-        # if isinstance(data, str):
-        #     raise TypeError("Invalid data type: {!r}.".format(data))
-
         # Configure the metadata manager.
         self._metadata_manager = metadata_manager_factory(CubeMetadata)
 
diff --git a/lib/iris/fileformats/_nc_load_rules/helpers.py b/lib/iris/fileformats/_nc_load_rules/helpers.py
index 0815be335d..7c4810ffe7 100644
--- a/lib/iris/fileformats/_nc_load_rules/helpers.py
+++ b/lib/iris/fileformats/_nc_load_rules/helpers.py
@@ -1637,11 +1637,7 @@ def _add_auxiliary_coordinate(
     # Determine the name of the dimension/s shared between the CF-netCDF data variable
     # and the coordinate being built.
     coord_dims = cf_coord_var.dimensions
-    # if cf._is_str_dtype(cf_coord_var):
-    #     coord_dims = coord_dims[:-1]
     datavar_dims = engine.cf_var.dimensions
-    # if cf._is_str_dtype(engine.cf_var):
-    #     datavar_dims = datavar_dims[:-1]
     common_dims = [dim for dim in coord_dims if dim in datavar_dims]
     data_dims = None
     if common_dims:
diff --git a/lib/iris/fileformats/netcdf/_bytecoding_datasets.py b/lib/iris/fileformats/netcdf/_bytecoding_datasets.py
index da36c10c98..cab4eb9421 100644
--- a/lib/iris/fileformats/netcdf/_bytecoding_datasets.py
+++ b/lib/iris/fileformats/netcdf/_bytecoding_datasets.py
@@ -116,7 +116,6 @@ def encode_stringarray_as_bytearray(
             raise ValueError(msg) from err
 
         n_bytes = len(bytes)
-        # TODO: may want to issue warning or error if we overflow the length?
         if n_bytes > string_dimension_length:
             from iris.exceptions import TranslationError
 
diff --git a/lib/iris/fileformats/netcdf/_thread_safe_nc.py b/lib/iris/fileformats/netcdf/_thread_safe_nc.py
index 8bc1af7a7c..486ad518fc 100644
--- a/lib/iris/fileformats/netcdf/_thread_safe_nc.py
+++ b/lib/iris/fileformats/netcdf/_thread_safe_nc.py
@@ -160,7 +160,7 @@ class GroupWrapper(_ThreadSafeWrapper):
     # Note: will also accept a whole Dataset object, but that is OK.
     _DUCKTYPE_CHECK_PROPERTIES = ["createVariable"]
     # Class to use when creating variable wrappers (default=VariableWrapper).
-    # - needed to support _byte_encoded_data.EncodedDataset.
+    # - needed to support _bytecoding_datasets.EncodedDataset.
     VAR_WRAPPER_CLS = VariableWrapper
     GRP_WRAPPER_CLS: typing.Any | None = None  # self-reference : fill in later
 
diff --git a/lib/iris/tests/integration/netcdf/test_stringdata.py b/lib/iris/tests/integration/netcdf/test_stringdata.py
index cf5ad42187..8cc7d9a7eb 100644
--- a/lib/iris/tests/integration/netcdf/test_stringdata.py
+++ b/lib/iris/tests/integration/netcdf/test_stringdata.py
@@ -38,9 +38,6 @@ def all_lazy_auxcoords():
 
 N_XDIM = 3
 N_CHARS_DIM = 64
-# TODO: remove (debug)
-# PERSIST_TESTFILES: str | None = "~/chararray_testfiles"
-PERSIST_TESTFILES: str | None = None
 
 NO_ENCODING_STR = "<noencoding>"
 ALIAS_UTF8_STR = "UTF8"  # an alternative acceptable form (should be written as-is)
@@ -228,11 +225,6 @@ def readtest_path(
         use_separate_dims,
     ) -> Iterable[SamplefileDetails]:
         """Create a suitable valid testfile, and return expected string content."""
-        match PERSIST_TESTFILES:
-            case str():
-                tmp_path = Path(PERSIST_TESTFILES).expanduser()
-            case _:
-                pass
         if encoding == "<noencoding>":
             filetag = "noencoding"
         else:
@@ -254,10 +246,6 @@ def readtest_data(
             encoding_str=encoding,
             coords_on_separate_dim=use_separate_dims,
         )
-
-        # # TODO: temporary for debug -- TO REMOVE
-        # from iris.tests.integration.netcdf.test_chararrays import ncdump
-        # ncdump(str(tempfile_path))
         return testdata
 
     @pytest.fixture(params=["strings", "bytes"])
@@ -425,8 +413,6 @@ def write_bytes(self, request):
     @pytest.fixture
     def writetest_path(self, encoding, write_bytes, lazy_data, tmp_path):
         """Create a suitable test cube, with either string or byte content."""
-        if PERSIST_TESTFILES:
-            tmp_path = Path(PERSIST_TESTFILES).expanduser()
         if encoding == "<noencoding>":
             filetag = "noencoding"
         else:
diff --git a/lib/iris/tests/unit/fileformats/netcdf/test_bytecoding_datasets.py b/lib/iris/tests/unit/fileformats/netcdf/test_bytecoding_datasets.py
index f153033fa9..8432a0831f 100644
--- a/lib/iris/tests/unit/fileformats/netcdf/test_bytecoding_datasets.py
+++ b/lib/iris/tests/unit/fileformats/netcdf/test_bytecoding_datasets.py
@@ -48,17 +48,9 @@ def encoding(request):
     return request.param
 
 
-# TODO: remove (debug)
-# PERSIST_TESTFILES: str | None = "~/chararray_testfiles"
-PERSIST_TESTFILES: str | None = None
-
-
 @pytest.fixture(scope="module")
 def tempdir(tmp_path_factory):
-    if PERSIST_TESTFILES:
-        path = Path(PERSIST_TESTFILES).expanduser()
-    else:
-        path = tmp_path_factory.mktemp("netcdf")
+    path = tmp_path_factory.mktemp("netcdf")
     return path
 
 

From cbbaf5ab96c5b6c71278a62045971d77fb3b1faa Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Mon, 8 Jun 2026 17:36:22 +0100
Subject: [PATCH 12/12] Review changes.

---
 lib/iris/fileformats/netcdf/saver.py                 | 6 ++----
 lib/iris/tests/integration/netcdf/test_stringdata.py | 3 ++-
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/lib/iris/fileformats/netcdf/saver.py b/lib/iris/fileformats/netcdf/saver.py
index 49fbbb1221..4938f481c4 100644
--- a/lib/iris/fileformats/netcdf/saver.py
+++ b/lib/iris/fileformats/netcdf/saver.py
@@ -1894,10 +1894,8 @@ def _create_generic_cf_array_var(
                 # seriously limits the utility of DECODE_TO_STRINGS_ON_READ.
                 # TODO: also support netCDF variable-length strings ("string" type).
                 #  Currently hit a **write error here**, being numpy object dtype ("O").
-                if (
-                    data.dtype.kind not in "iufSU"
-                    or data.dtype.kind == "S"
-                    and data.dtype.itemsize != 1
+                if data.dtype.kind not in "iufSU" or (
+                    data.dtype.kind == "S" and data.dtype.itemsize != 1
                 ):
                     # This is a type of data we don't "understand".
                     # NB this includes "Sxx" types other than "S1" :  It seems that
diff --git a/lib/iris/tests/integration/netcdf/test_stringdata.py b/lib/iris/tests/integration/netcdf/test_stringdata.py
index 8cc7d9a7eb..925da599a6 100644
--- a/lib/iris/tests/integration/netcdf/test_stringdata.py
+++ b/lib/iris/tests/integration/netcdf/test_stringdata.py
@@ -612,6 +612,7 @@ def test_read_no_encoding(self, tmp_path, data_encoding):
         if data_encoding == "utf8":
             assert np.all(cube.data == testdata.datavar_data)
         else:
+            # NOTE: no error on loading, only when you fetch content + it decodes.
             msg = "Character data .* could not be decoded with the 'utf-8' encoding"
             with pytest.raises(ValueError, match=msg):
                 cube.data
@@ -624,7 +625,7 @@ def test_read_wrong_encoding__fail(self, tmp_path):
             encoding_attr="utf-8",
         )
         cube = iris.load_cube(filepath)
-        # NOTE: error only occurs when you attempt to fetch + translate the content.
+        # NOTE: no error on loading, only when you fetch content + it decodes.
         msg = "Character data .* could not be decoded with the 'utf-8' encoding."
         with pytest.raises(ValueError, match=msg):
             data = cube.data