From bee655358f1c5aedbc30bd9469cab86e607af45f Mon Sep 17 00:00:00 2001 From: jakirkham Date: Thu, 10 Oct 2024 02:15:39 -0700 Subject: [PATCH 1/3] Update optimized `Delta` codec for `bool` Previously `bool` just worked with `Delta`. However this was not actually tested. The optimized version switched to `np.subtract` for in-place computation, which works for other types. Though `bool` needs special handling. Fortunately this can be done with `np.not_equal`, which has the same behavior. Also include a test for `bool` data to make sure this is handled correctly going forward. --- numcodecs/delta.py | 5 ++++- numcodecs/tests/test_delta.py | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/numcodecs/delta.py b/numcodecs/delta.py index 09a5601e..28a5233e 100644 --- a/numcodecs/delta.py +++ b/numcodecs/delta.py @@ -64,7 +64,10 @@ def encode(self, buf): # compute differences # using np.subtract for in-place operations - np.subtract(arr[1:], arr[0:-1], out=enc[1:]) + if self.dtype == np.dtype("bool"): + np.not_equal(arr[1:], arr[0:-1], out=enc[1:]) + else: + np.subtract(arr[1:], arr[0:-1], out=enc[1:]) return enc diff --git a/numcodecs/tests/test_delta.py b/numcodecs/tests/test_delta.py index fb5714bb..9664efee 100644 --- a/numcodecs/tests/test_delta.py +++ b/numcodecs/tests/test_delta.py @@ -14,6 +14,7 @@ # mix of shapes: 1D, 2D, 3D # mix of orders: C, F arrays = [ + np.random.randint(0, 1, size=110, dtype='?').reshape(10, 11), np.arange(1000, dtype=' Date: Thu, 10 Oct 2024 10:54:40 -0700 Subject: [PATCH 2/3] Simplify type check This is a bit more succinct and gets to the core point. Namely `arr.dtype` determines this code path. Also comparing directly to `bool` works here. It is a bit faster as well since we need not construct an `np.dtype` object. --- numcodecs/delta.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numcodecs/delta.py b/numcodecs/delta.py index 28a5233e..e86a0768 100644 --- a/numcodecs/delta.py +++ b/numcodecs/delta.py @@ -64,7 +64,7 @@ def encode(self, buf): # compute differences # using np.subtract for in-place operations - if self.dtype == np.dtype("bool"): + if arr.dtype == bool: np.not_equal(arr[1:], arr[0:-1], out=enc[1:]) else: np.subtract(arr[1:], arr[0:-1], out=enc[1:]) From cf6fde0607854452dc8da5704a2dcce4da830f80 Mon Sep 17 00:00:00 2001 From: jakirkham Date: Thu, 10 Oct 2024 11:05:25 -0700 Subject: [PATCH 3/3] Drop extraneous `0`s --- numcodecs/delta.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/numcodecs/delta.py b/numcodecs/delta.py index e86a0768..f6307312 100644 --- a/numcodecs/delta.py +++ b/numcodecs/delta.py @@ -65,9 +65,9 @@ def encode(self, buf): # compute differences # using np.subtract for in-place operations if arr.dtype == bool: - np.not_equal(arr[1:], arr[0:-1], out=enc[1:]) + np.not_equal(arr[1:], arr[:-1], out=enc[1:]) else: - np.subtract(arr[1:], arr[0:-1], out=enc[1:]) + np.subtract(arr[1:], arr[:-1], out=enc[1:]) return enc