From 41150f2cf5ac8eb4eecea09fc30a3519e6636bd6 Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Mon, 18 Aug 2025 11:43:10 -0400 Subject: [PATCH 1/4] Update zstd.pyx with new content --- numcodecs/zstd.pyx | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/numcodecs/zstd.pyx b/numcodecs/zstd.pyx index f93da633..bc86e03f 100644 --- a/numcodecs/zstd.pyx +++ b/numcodecs/zstd.pyx @@ -129,7 +129,7 @@ def compress(source, int level=DEFAULT_CLEVEL, bint checksum=False): level = MAX_CLEVEL # obtain source memoryview - source_mv = ensure_continguous_memoryview(source) + source_mv = ensure_contiguous_memoryview(source) source_pb = PyMemoryView_GET_BUFFER(source_mv) # setup source buffer @@ -202,11 +202,11 @@ def decompress(source, dest=None): Py_buffer* dest_pb char* dest_ptr size_t source_size, dest_size, decompressed_size - size_t nbytes, cbytes, blocksize size_t dest_nbytes + unsigned long long content_size # obtain source memoryview - source_mv = ensure_continguous_memoryview(source) + source_mv = ensure_contiguous_memoryview(source) source_pb = PyMemoryView_GET_BUFFER(source_mv) # get source pointer @@ -214,18 +214,19 @@ def decompress(source, dest=None): source_size = source_pb.len try: - - # determine uncompressed size - dest_size = ZSTD_getFrameContentSize(source_ptr, source_size) - if dest_size == 0 or dest_size == ZSTD_CONTENTSIZE_ERROR: + # determine uncompressed size using unsigned long long for full range + content_size = ZSTD_getFrameContentSize(source_ptr, source_size) + if content_size == ZSTD_CONTENTSIZE_UNKNOWN and dest is None: + return stream_decompress(source_pb) + elif content_size == ZSTD_CONTENTSIZE_ERROR or content_size == 0: raise RuntimeError('Zstd decompression error: invalid input data') + elif content_size > (SIZE_MAX): + raise RuntimeError('Zstd decompression error: content size too large for platform') - if dest_size == ZSTD_CONTENTSIZE_UNKNOWN and dest is None: - return stream_decompress(source_pb) + dest_size = content_size # setup destination buffer if dest is None: - # allocate memory dest_1d = dest = PyBytes_FromStringAndSize(NULL, dest_size) else: dest_1d = ensure_contiguous_ndarray(dest) @@ -236,9 +237,6 @@ def decompress(source, dest=None): dest_ptr = dest_pb.buf dest_nbytes = dest_pb.len - if dest_size == ZSTD_CONTENTSIZE_UNKNOWN: - dest_size = dest_nbytes - # validate output buffer if dest_nbytes < dest_size: raise ValueError('destination buffer too small; expected at least %s, ' @@ -388,7 +386,7 @@ class Zstd(Codec): return decompress(buf, out) def __repr__(self): - r = '%s(level=%r)' % \ + r = '%s(level=%r)' % (type(self).__name__, self.level) return r @@ -406,4 +404,4 @@ class Zstd(Codec): @classmethod def max_level(cls): """Returns the maximum compression level of the underlying zstd library.""" - return ZSTD_maxCLevel() + return ZSTD_maxCLevel() \ No newline at end of file From b0fe5560b3980631457d6652f8d3d4116d8b4452 Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Mon, 18 Aug 2025 12:01:19 -0400 Subject: [PATCH 2/4] Apply suggestion from @mkitti --- numcodecs/zstd.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numcodecs/zstd.pyx b/numcodecs/zstd.pyx index bc86e03f..cdf951db 100644 --- a/numcodecs/zstd.pyx +++ b/numcodecs/zstd.pyx @@ -386,7 +386,7 @@ class Zstd(Codec): return decompress(buf, out) def __repr__(self): - r = '%s(level=%r)' % + r = '%s(level=%r)' % \ (type(self).__name__, self.level) return r From 1349304b83c7b4e150d0f0d9341a317ddc9d3e1e Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Mon, 18 Aug 2025 17:31:17 +0000 Subject: [PATCH 3/4] Fix LLM issues .. --- numcodecs/zstd.pyx | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/numcodecs/zstd.pyx b/numcodecs/zstd.pyx index cdf951db..644e142d 100644 --- a/numcodecs/zstd.pyx +++ b/numcodecs/zstd.pyx @@ -15,6 +15,9 @@ from .abc import Codec from libc.stdlib cimport malloc, realloc, free +cdef extern from "stdint.h": + cdef size_t SIZE_MAX + cdef extern from "zstd.h": unsigned ZSTD_versionNumber() nogil @@ -129,7 +132,7 @@ def compress(source, int level=DEFAULT_CLEVEL, bint checksum=False): level = MAX_CLEVEL # obtain source memoryview - source_mv = ensure_contiguous_memoryview(source) + source_mv = ensure_continguous_memoryview(source) source_pb = PyMemoryView_GET_BUFFER(source_mv) # setup source buffer @@ -206,7 +209,7 @@ def decompress(source, dest=None): unsigned long long content_size # obtain source memoryview - source_mv = ensure_contiguous_memoryview(source) + source_mv = ensure_continguous_memoryview(source) source_pb = PyMemoryView_GET_BUFFER(source_mv) # get source pointer @@ -218,6 +221,8 @@ def decompress(source, dest=None): content_size = ZSTD_getFrameContentSize(source_ptr, source_size) if content_size == ZSTD_CONTENTSIZE_UNKNOWN and dest is None: return stream_decompress(source_pb) + elif content_size == ZSTD_CONTENTSIZE_UNKNOWN: + # dest is not None elif content_size == ZSTD_CONTENTSIZE_ERROR or content_size == 0: raise RuntimeError('Zstd decompression error: invalid input data') elif content_size > (SIZE_MAX): @@ -227,6 +232,7 @@ def decompress(source, dest=None): # setup destination buffer if dest is None: + # allocate memory dest_1d = dest = PyBytes_FromStringAndSize(NULL, dest_size) else: dest_1d = ensure_contiguous_ndarray(dest) @@ -237,6 +243,9 @@ def decompress(source, dest=None): dest_ptr = dest_pb.buf dest_nbytes = dest_pb.len + if content_size == ZSTD_CONTENTSIZE_UNKNOWN: + dest_size = dest_nbytes + # validate output buffer if dest_nbytes < dest_size: raise ValueError('destination buffer too small; expected at least %s, ' @@ -404,4 +413,4 @@ class Zstd(Codec): @classmethod def max_level(cls): """Returns the maximum compression level of the underlying zstd library.""" - return ZSTD_maxCLevel() \ No newline at end of file + return ZSTD_maxCLevel() From 786d31fcde2366bfb93df430f0b0222964b650ab Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Mon, 18 Aug 2025 21:03:19 +0000 Subject: [PATCH 4/4] Add pass, comment for dest_size --- numcodecs/zstd.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/numcodecs/zstd.pyx b/numcodecs/zstd.pyx index 644e142d..7b88a8da 100644 --- a/numcodecs/zstd.pyx +++ b/numcodecs/zstd.pyx @@ -223,6 +223,8 @@ def decompress(source, dest=None): return stream_decompress(source_pb) elif content_size == ZSTD_CONTENTSIZE_UNKNOWN: # dest is not None + # set dest_size based on dest + pass elif content_size == ZSTD_CONTENTSIZE_ERROR or content_size == 0: raise RuntimeError('Zstd decompression error: invalid input data') elif content_size > (SIZE_MAX):