From 0c823e2d5a7d0e54dd663d110406c5d326fb59f9 Mon Sep 17 00:00:00 2001 From: jakirkham Date: Thu, 21 Nov 2024 20:15:43 -0800 Subject: [PATCH 01/30] Switch `Buffer`s to `memoryview`s When this was written in the code, Python's Buffer Protocol support was inconsistent across Python versions (specifically on Python 2.7). Since Python 2.7 reached EOL and it was dropped from Numcodecs, the Python Buffer Protocol support has become more consistent. At this stage the `memoryview` object, which Cython also supports, does all the same things that `Buffer` would do for us. Plus it is builtin to the Python standard library. It behaves similarly in a lot of ways. Given this, switch the code over to `memoryview`s internally and drop `Buffer`. --- numcodecs/blosc.pyx | 257 ++++++++++++++++++++------------------- numcodecs/compat_ext.pxd | 12 -- numcodecs/compat_ext.pyx | 28 ----- numcodecs/lz4.pyx | 146 +++++++++++----------- numcodecs/vlen.pyx | 96 +++++++++------ numcodecs/zstd.pyx | 130 ++++++++++---------- setup.py | 20 --- 7 files changed, 324 insertions(+), 365 deletions(-) delete mode 100644 numcodecs/compat_ext.pxd delete mode 100644 numcodecs/compat_ext.pyx diff --git a/numcodecs/blosc.pyx b/numcodecs/blosc.pyx index 2c43dd04..6e6851db 100644 --- a/numcodecs/blosc.pyx +++ b/numcodecs/blosc.pyx @@ -8,12 +8,11 @@ import multiprocessing import os -from cpython.buffer cimport PyBUF_ANY_CONTIGUOUS, PyBUF_WRITEABLE +from cpython.buffer cimport PyBuffer_IsContiguous from cpython.bytes cimport PyBytes_FromStringAndSize, PyBytes_AS_STRING +from cpython.memoryview cimport PyMemoryView_GET_BUFFER -from .compat_ext cimport Buffer -from .compat_ext import Buffer from .compat import ensure_contiguous_ndarray from .abc import Codec @@ -146,17 +145,18 @@ def cbuffer_sizes(source): """ cdef: - Buffer buffer + memoryview source_mv + const Py_buffer* source_pb size_t nbytes, cbytes, blocksize - # obtain buffer - buffer = Buffer(source, PyBUF_ANY_CONTIGUOUS) + # obtain source memoryview + source_mv = memoryview(source) + source_pb = PyMemoryView_GET_BUFFER(source_mv) + if not PyBuffer_IsContiguous(source_pb, b'A'): + raise BufferError("`source` must contain contiguous memory") # determine buffer size - blosc_cbuffer_sizes(buffer.ptr, &nbytes, &cbytes, &blocksize) - - # release buffers - buffer.release() + blosc_cbuffer_sizes(source_pb.buf, &nbytes, &cbytes, &blocksize) return nbytes, cbytes, blocksize @@ -164,16 +164,17 @@ def cbuffer_sizes(source): def cbuffer_complib(source): """Return the name of the compression library used to compress `source`.""" cdef: - Buffer buffer + memoryview source_mv + const Py_buffer* source_pb - # obtain buffer - buffer = Buffer(source, PyBUF_ANY_CONTIGUOUS) + # obtain source memoryview + source_mv = memoryview(source) + source_pb = PyMemoryView_GET_BUFFER(source_mv) + if not PyBuffer_IsContiguous(source_pb, b'A'): + raise BufferError("`source` must contain contiguous memory") # determine buffer size - complib = blosc_cbuffer_complib(buffer.ptr) - - # release buffers - buffer.release() + complib = blosc_cbuffer_complib(source_pb.buf) complib = complib.decode('ascii') @@ -193,18 +194,19 @@ def cbuffer_metainfo(source): """ cdef: - Buffer buffer + memoryview source_mv + const Py_buffer* source_pb size_t typesize int flags - # obtain buffer - buffer = Buffer(source, PyBUF_ANY_CONTIGUOUS) + # obtain source memoryview + source_mv = memoryview(source) + source_pb = PyMemoryView_GET_BUFFER(source_mv) + if not PyBuffer_IsContiguous(source_pb, b'A'): + raise BufferError("`source` must contain contiguous memory") # determine buffer size - blosc_cbuffer_metainfo(buffer.ptr, &typesize, &flags) - - # release buffers - buffer.release() + blosc_cbuffer_metainfo(source_pb.buf, &typesize, &flags) # decompose flags if flags & BLOSC_DOSHUFFLE: @@ -252,23 +254,29 @@ def compress(source, char* cname, int clevel, int shuffle=SHUFFLE, """ cdef: - char *source_ptr - char *dest_ptr - Buffer source_buffer + memoryview source_mv + const Py_buffer* source_pb + const char* source_ptr size_t nbytes, itemsize int cbytes bytes dest + char* dest_ptr # check valid cname early cname_str = cname.decode('ascii') if cname_str not in list_compressors(): err_bad_cname(cname_str) - # setup source buffer - source_buffer = Buffer(source, PyBUF_ANY_CONTIGUOUS) - source_ptr = source_buffer.ptr - nbytes = source_buffer.nbytes - itemsize = source_buffer.itemsize + # obtain source memoryview + source_mv = memoryview(source) + source_pb = PyMemoryView_GET_BUFFER(source_mv) + if not PyBuffer_IsContiguous(source_pb, b'A'): + raise BufferError("`source` must contain contiguous memory") + + # extract metadata + source_ptr = source_pb.buf + nbytes = source_pb.len + itemsize = source_pb.itemsize # determine shuffle if shuffle == AUTOSHUFFLE: @@ -280,46 +288,40 @@ def compress(source, char* cname, int clevel, int shuffle=SHUFFLE, raise ValueError('invalid shuffle argument; expected -1, 0, 1 or 2, found %r' % shuffle) - try: - - # setup destination - dest = PyBytes_FromStringAndSize(NULL, nbytes + BLOSC_MAX_OVERHEAD) - dest_ptr = PyBytes_AS_STRING(dest) - - # perform compression - if _get_use_threads(): - # allow blosc to use threads internally + # setup destination + dest = PyBytes_FromStringAndSize(NULL, nbytes + BLOSC_MAX_OVERHEAD) + dest_ptr = PyBytes_AS_STRING(dest) - # N.B., we are using blosc's global context, and so we need to use a lock - # to ensure no-one else can modify the global context while we're setting it - # up and using it. - with get_mutex(): + # perform compression + if _get_use_threads(): + # allow blosc to use threads internally - # set compressor - compressor_set = blosc_set_compressor(cname) - if compressor_set < 0: - # shouldn't happen if we checked against list of compressors - # already, but just in case - err_bad_cname(cname_str) + # N.B., we are using blosc's global context, and so we need to use a lock + # to ensure no-one else can modify the global context while we're setting it + # up and using it. + with get_mutex(): - # set blocksize - blosc_set_blocksize(blocksize) + # set compressor + compressor_set = blosc_set_compressor(cname) + if compressor_set < 0: + # shouldn't happen if we checked against list of compressors + # already, but just in case + err_bad_cname(cname_str) - # perform compression - with nogil: - cbytes = blosc_compress(clevel, shuffle, itemsize, nbytes, source_ptr, - dest_ptr, nbytes + BLOSC_MAX_OVERHEAD) + # set blocksize + blosc_set_blocksize(blocksize) - else: + # perform compression with nogil: - cbytes = blosc_compress_ctx(clevel, shuffle, itemsize, nbytes, source_ptr, - dest_ptr, nbytes + BLOSC_MAX_OVERHEAD, - cname, blocksize, 1) + cbytes = blosc_compress(clevel, shuffle, itemsize, nbytes, source_ptr, + dest_ptr, nbytes + BLOSC_MAX_OVERHEAD) - finally: + else: + with nogil: + cbytes = blosc_compress_ctx(clevel, shuffle, itemsize, nbytes, source_ptr, + dest_ptr, nbytes + BLOSC_MAX_OVERHEAD, + cname, blocksize, 1) - # release buffers - source_buffer.release() # check compression was successful if cbytes <= 0: @@ -350,15 +352,22 @@ def decompress(source, dest=None): """ cdef: int ret - char *source_ptr - char *dest_ptr - Buffer source_buffer - Buffer dest_buffer = None + memoryview source_mv + const Py_buffer* source_pb + const char* source_ptr + memoryview dest_mv + Py_buffer* dest_pb + char* dest_ptr size_t nbytes, cbytes, blocksize - # setup source buffer - source_buffer = Buffer(source, PyBUF_ANY_CONTIGUOUS) - source_ptr = source_buffer.ptr + # obtain source memoryview + source_mv = memoryview(source) + source_pb = PyMemoryView_GET_BUFFER(source_mv) + if not PyBuffer_IsContiguous(source_pb, b'A'): + raise BufferError("`source` must contain contiguous memory") + + # get source pointer + source_ptr = source_pb.buf # determine buffer size blosc_cbuffer_sizes(source_ptr, &nbytes, &cbytes, &blocksize) @@ -366,37 +375,29 @@ def decompress(source, dest=None): # setup destination buffer if dest is None: # allocate memory - dest = PyBytes_FromStringAndSize(NULL, nbytes) - dest_ptr = PyBytes_AS_STRING(dest) - dest_nbytes = nbytes + dest_1d = dest = PyBytes_FromStringAndSize(NULL, nbytes) else: - arr = ensure_contiguous_ndarray(dest) - dest_buffer = Buffer(arr, PyBUF_ANY_CONTIGUOUS | PyBUF_WRITEABLE) - dest_ptr = dest_buffer.ptr - dest_nbytes = dest_buffer.nbytes - - try: - - # guard condition - if dest_nbytes < nbytes: - raise ValueError('destination buffer too small; expected at least %s, ' - 'got %s' % (nbytes, dest_nbytes)) - - # perform decompression - if _get_use_threads(): - # allow blosc to use threads internally - with nogil: - ret = blosc_decompress(source_ptr, dest_ptr, nbytes) - else: - with nogil: - ret = blosc_decompress_ctx(source_ptr, dest_ptr, nbytes, 1) - - finally: - - # release buffers - source_buffer.release() - if dest_buffer is not None: - dest_buffer.release() + dest_1d = ensure_contiguous_ndarray(dest) + + # obtain dest memoryview + dest_mv = memoryview(dest_1d) + dest_pb = PyMemoryView_GET_BUFFER(dest_mv) + dest_ptr = dest_pb.buf + dest_nbytes = dest_pb.len + + # guard condition + if dest_nbytes < nbytes: + raise ValueError('destination buffer too small; expected at least %s, ' + 'got %s' % (nbytes, dest_nbytes)) + + # perform decompression + if _get_use_threads(): + # allow blosc to use threads internally + with nogil: + ret = blosc_decompress(source_ptr, dest_ptr, nbytes) + else: + with nogil: + ret = blosc_decompress_ctx(source_ptr, dest_ptr, nbytes, 1) # handle errors if ret <= 0: @@ -433,14 +434,22 @@ def decompress_partial(source, start, nitems, dest=None): int encoding_size int nitems_bytes int start_bytes - char *source_ptr - char *dest_ptr - Buffer source_buffer - Buffer dest_buffer = None - - # setup source buffer - source_buffer = Buffer(source, PyBUF_ANY_CONTIGUOUS) - source_ptr = source_buffer.ptr + const char* source_ptr + memoryview source_mv + const Py_buffer* source_pb + memoryview dest_mv + Py_buffer* dest_pb + char* dest_ptr + size_t dest_nbytes + + # obtain source memoryview + source_mv = memoryview(source) + source_pb = PyMemoryView_GET_BUFFER(source_mv) + if not PyBuffer_IsContiguous(source_pb, b"A"): + raise BufferError("`source` must contain contiguous memory") + + # setup source pointer + source_ptr = source_pb.buf # get encoding size from source buffer header encoding_size = source[3] @@ -451,26 +460,22 @@ def decompress_partial(source, start, nitems, dest=None): # setup destination buffer if dest is None: - dest = PyBytes_FromStringAndSize(NULL, nitems_bytes) - dest_ptr = PyBytes_AS_STRING(dest) - dest_nbytes = nitems_bytes + # allocate memory + dest_1d = dest = PyBytes_FromStringAndSize(NULL, nitems_bytes) else: - arr = ensure_contiguous_ndarray(dest) - dest_buffer = Buffer(arr, PyBUF_ANY_CONTIGUOUS | PyBUF_WRITEABLE) - dest_ptr = dest_buffer.ptr - dest_nbytes = dest_buffer.nbytes + dest_1d = ensure_contiguous_ndarray(dest) + + # obtain dest memoryview + dest_mv = memoryview(dest_1d) + dest_pb = PyMemoryView_GET_BUFFER(dest_mv) + dest_ptr = dest_pb.buf + dest_nbytes = dest_pb.len # try decompression - try: - if dest_nbytes < nitems_bytes: - raise ValueError('destination buffer too small; expected at least %s, ' - 'got %s' % (nitems_bytes, dest_nbytes)) - ret = blosc_getitem(source_ptr, start, nitems, dest_ptr) - - finally: - source_buffer.release() - if dest_buffer is not None: - dest_buffer.release() + if dest_nbytes < nitems_bytes: + raise ValueError('destination buffer too small; expected at least %s, ' + 'got %s' % (nitems_bytes, dest_nbytes)) + ret = blosc_getitem(source_ptr, start, nitems, dest_ptr) # ret refers to the number of bytes returned from blosc_getitem. if ret <= 0: diff --git a/numcodecs/compat_ext.pxd b/numcodecs/compat_ext.pxd deleted file mode 100644 index dfcaee0f..00000000 --- a/numcodecs/compat_ext.pxd +++ /dev/null @@ -1,12 +0,0 @@ -# cython: language_level=3 - - -cdef class Buffer: - cdef: - char *ptr - Py_buffer buffer - size_t nbytes - size_t itemsize - bint acquired - - cpdef release(self) diff --git a/numcodecs/compat_ext.pyx b/numcodecs/compat_ext.pyx deleted file mode 100644 index f57e3cfd..00000000 --- a/numcodecs/compat_ext.pyx +++ /dev/null @@ -1,28 +0,0 @@ -# cython: embedsignature=True -# cython: profile=False -# cython: linetrace=False -# cython: binding=False -# cython: language_level=3 -from cpython.buffer cimport PyObject_GetBuffer, PyBuffer_Release - - -from .compat import ensure_contiguous_ndarray - - -cdef class Buffer: - """Convenience class for buffer interface.""" - - def __cinit__(self, obj, int flags): - PyObject_GetBuffer(obj, &(self.buffer), flags) - self.acquired = True - self.ptr = self.buffer.buf - self.itemsize = self.buffer.itemsize - self.nbytes = self.buffer.len - - cpdef release(self): - if self.acquired: - PyBuffer_Release(&(self.buffer)) - self.acquired = False - - def __dealloc__(self): - self.release() diff --git a/numcodecs/lz4.pyx b/numcodecs/lz4.pyx index 87a7f6ca..a6b544fa 100644 --- a/numcodecs/lz4.pyx +++ b/numcodecs/lz4.pyx @@ -5,13 +5,11 @@ # cython: language_level=3 -from cpython.buffer cimport PyBUF_ANY_CONTIGUOUS, PyBUF_WRITEABLE +from cpython.buffer cimport PyBuffer_IsContiguous from cpython.bytes cimport PyBytes_FromStringAndSize, PyBytes_AS_STRING +from cpython.memoryview cimport PyMemoryView_GET_BUFFER from libc.stdint cimport uint8_t, uint32_t - -from .compat_ext cimport Buffer -from .compat_ext import Buffer from ._utils cimport store_le32, load_le32 from .compat import ensure_contiguous_ndarray from .abc import Codec @@ -66,40 +64,38 @@ def compress(source, int acceleration=DEFAULT_ACCELERATION): """ cdef: - char *source_ptr - char *dest_ptr - char *dest_start - Buffer source_buffer - int source_size, dest_size, compressed_size + memoryview source_mv + const Py_buffer* source_pb + const char* source_ptr bytes dest + char* dest_ptr + char* dest_start + int source_size, dest_size, compressed_size # check level if acceleration <= 0: acceleration = DEFAULT_ACCELERATION # setup source buffer - source_buffer = Buffer(source, PyBUF_ANY_CONTIGUOUS) - source_ptr = source_buffer.ptr - source_size = source_buffer.nbytes - - try: - - # setup destination - dest_size = LZ4_compressBound(source_size) - dest = PyBytes_FromStringAndSize(NULL, dest_size + sizeof(uint32_t)) - dest_ptr = PyBytes_AS_STRING(dest) - store_le32(dest_ptr, source_size) - dest_start = dest_ptr + sizeof(uint32_t) - - # perform compression - with nogil: - compressed_size = LZ4_compress_fast(source_ptr, dest_start, source_size, dest_size, - acceleration) - - finally: - - # release buffers - source_buffer.release() + source_mv = memoryview(source) + source_pb = PyMemoryView_GET_BUFFER(source_mv) + if not PyBuffer_IsContiguous(source_pb, b'A'): + raise BufferError("`source` must contain contiguous memory") + + source_ptr = source_pb.buf + source_size = source_pb.len + + # setup destination + dest_size = LZ4_compressBound(source_size) + dest = PyBytes_FromStringAndSize(NULL, dest_size + sizeof(uint32_t)) + dest_ptr = PyBytes_AS_STRING(dest) + store_le32(dest_ptr, source_size) + dest_start = dest_ptr + sizeof(uint32_t) + + # perform compression + with nogil: + compressed_size = LZ4_compress_fast(source_ptr, dest_start, source_size, dest_size, + acceleration) # check compression was successful if compressed_size <= 0: @@ -129,52 +125,54 @@ def decompress(source, dest=None): """ cdef: - char *source_ptr - char *source_start - char *dest_ptr - Buffer source_buffer - Buffer dest_buffer = None + memoryview source_mv + const Py_buffer* source_pb + const char* source_ptr + const char* source_start + memoryview dest_mv + Py_buffer* dest_pb + char* dest_ptr int source_size, dest_size, decompressed_size # setup source buffer - source_buffer = Buffer(source, PyBUF_ANY_CONTIGUOUS) - source_ptr = source_buffer.ptr - source_size = source_buffer.nbytes - - try: - - # determine uncompressed size - if source_size < sizeof(uint32_t): - raise ValueError('bad input data') - dest_size = load_le32(source_ptr) - if dest_size <= 0: - raise RuntimeError('LZ4 decompression error: invalid input data') - source_start = source_ptr + sizeof(uint32_t) - source_size -= sizeof(uint32_t) - - # setup destination buffer - if dest is None: - # allocate memory - dest = PyBytes_FromStringAndSize(NULL, dest_size) - dest_ptr = PyBytes_AS_STRING(dest) - else: - arr = ensure_contiguous_ndarray(dest) - dest_buffer = Buffer(arr, PyBUF_ANY_CONTIGUOUS | PyBUF_WRITEABLE) - dest_ptr = dest_buffer.ptr - if dest_buffer.nbytes < dest_size: - raise ValueError('destination buffer too small; expected at least %s, ' - 'got %s' % (dest_size, dest_buffer.nbytes)) - - # perform decompression - with nogil: - decompressed_size = LZ4_decompress_safe(source_start, dest_ptr, source_size, dest_size) - - finally: - - # release buffers - source_buffer.release() - if dest_buffer is not None: - dest_buffer.release() + source_mv = memoryview(source) + source_pb = PyMemoryView_GET_BUFFER(source_mv) + if not PyBuffer_IsContiguous(source_pb, b'A'): + raise BufferError("`source` must contain contiguous memory") + + # extract source metadata + source_ptr = source_pb.buf + source_size = source_pb.len + + # determine uncompressed size + if source_size < sizeof(uint32_t): + raise ValueError('bad input data') + dest_size = load_le32(source_ptr) + if dest_size <= 0: + raise RuntimeError('LZ4 decompression error: invalid input data') + source_start = source_ptr + sizeof(uint32_t) + source_size -= sizeof(uint32_t) + + # setup destination buffer + if dest is None: + # allocate memory + dest_1d = dest = PyBytes_FromStringAndSize(NULL, dest_size) + else: + dest_1d = ensure_contiguous_ndarray(dest) + + # obtain dest memoryview + dest_mv = memoryview(dest_1d) + dest_pb = PyMemoryView_GET_BUFFER(dest_mv) + dest_ptr = dest_pb.buf + dest_nbytes = dest_pb.len + + if dest_nbytes < dest_size: + raise ValueError('destination buffer too small; expected at least %s, ' + 'got %s' % (dest_size, dest_nbytes)) + + # perform decompression + with nogil: + decompressed_size = LZ4_decompress_safe(source_start, dest_ptr, source_size, dest_size) # check decompression was successful if decompressed_size <= 0: diff --git a/numcodecs/vlen.pyx b/numcodecs/vlen.pyx index e1e149ee..61efe347 100644 --- a/numcodecs/vlen.pyx +++ b/numcodecs/vlen.pyx @@ -10,12 +10,16 @@ cimport cython from numpy cimport ndarray import numpy as np from .abc import Codec -from .compat_ext cimport Buffer -from .compat_ext import Buffer from .compat import ensure_contiguous_ndarray -from cpython cimport (PyBytes_GET_SIZE, PyBytes_AS_STRING, PyBytes_Check, - PyBytes_FromStringAndSize, PyUnicode_AsUTF8String) -from cpython.buffer cimport PyBUF_ANY_CONTIGUOUS +from cpython.buffer cimport PyBuffer_IsContiguous +from cpython.bytes cimport ( + PyBytes_AS_STRING, + PyBytes_GET_SIZE, + PyBytes_Check, + PyBytes_FromStringAndSize, +) +from cpython.memoryview cimport PyMemoryView_GET_BUFFER +from cpython.unicode cimport PyUnicode_AsUTF8String from libc.stdint cimport uint8_t from libc.string cimport memcpy from ._utils cimport store_le32, load_le32 @@ -132,23 +136,26 @@ class VLenUTF8(Codec): @cython.boundscheck(False) def decode(self, buf, out=None): cdef: - Buffer input_buffer - char* data - char* data_end - Py_ssize_t i, l, n_items, data_length, input_length + memoryview buf_mv + const Py_buffer* buf_pb + const char* data + const char* data_end + Py_ssize_t i, l, n_items, data_length - # accept any buffer + # obtain memoryview buf = ensure_contiguous_ndarray(buf) - input_buffer = Buffer(buf, PyBUF_ANY_CONTIGUOUS) - input_length = input_buffer.nbytes + buf_mv = memoryview(buf) + buf_pb = PyMemoryView_GET_BUFFER(buf_mv) # sanity checks - if input_length < HEADER_LENGTH: + if not PyBuffer_IsContiguous(buf_pb, b'A'): + raise BufferError("`buf` must contain contiguous memory") + if buf_pb.len < HEADER_LENGTH: raise ValueError('corrupt buffer, missing or truncated header') # obtain input data pointer - data = input_buffer.ptr - data_end = data + input_length + data = buf_pb.buf + data_end = data + buf_pb.len # load number of items n_items = load_le32(data) @@ -260,23 +267,26 @@ class VLenBytes(Codec): @cython.boundscheck(False) def decode(self, buf, out=None): cdef: - Buffer input_buffer - char* data - char* data_end - Py_ssize_t i, l, n_items, data_length, input_length + memoryview buf_mv + const Py_buffer* buf_pb + const char* data + const char* data_end + Py_ssize_t i, l, n_items, data_length - # accept any buffer + # obtain memoryview buf = ensure_contiguous_ndarray(buf) - input_buffer = Buffer(buf, PyBUF_ANY_CONTIGUOUS) - input_length = input_buffer.nbytes + buf_mv = memoryview(buf) + buf_pb = PyMemoryView_GET_BUFFER(buf_mv) # sanity checks - if input_length < HEADER_LENGTH: + if not PyBuffer_IsContiguous(buf_pb, b'A'): + raise BufferError("`buf` must contain contiguous memory") + if buf_pb.len < HEADER_LENGTH: raise ValueError('corrupt buffer, missing or truncated header') # obtain input data pointer - data = input_buffer.ptr - data_end = data + input_length + data = buf_pb.buf + data_end = data + buf_pb.len # load number of items n_items = load_le32(data) @@ -352,11 +362,12 @@ class VLenArray(Codec): object[:] values object[:] normed_values int[:] lengths - char* encv + const char* encv bytes b bytearray out char* data - Buffer value_buffer + memoryview value_mv + const Py_buffer* value_pb object v # normalise input @@ -398,11 +409,13 @@ class VLenArray(Codec): l = lengths[i] store_le32(data, l) data += 4 - value_buffer = Buffer(normed_values[i], PyBUF_ANY_CONTIGUOUS) - encv = value_buffer.ptr + + value_mv = memoryview(normed_values[i]) + value_pb = PyMemoryView_GET_BUFFER(value_mv) + + encv = value_pb.buf memcpy(data, encv, l) data += l - value_buffer.release() return out @@ -410,23 +423,26 @@ class VLenArray(Codec): @cython.boundscheck(False) def decode(self, buf, out=None): cdef: - Buffer input_buffer - char* data - char* data_end - Py_ssize_t i, l, n_items, data_length, input_length + memoryview buf_mv + const Py_buffer* buf_pb + const char* data + const char* data_end + Py_ssize_t i, l, n_items, data_length - # accept any buffer + # obtain memoryview buf = ensure_contiguous_ndarray(buf) - input_buffer = Buffer(buf, PyBUF_ANY_CONTIGUOUS) - input_length = input_buffer.nbytes + buf_mv = memoryview(buf) + buf_pb = PyMemoryView_GET_BUFFER(buf_mv) # sanity checks - if input_length < HEADER_LENGTH: + if not PyBuffer_IsContiguous(buf_pb, b'A'): + raise BufferError("`buf` must contain contiguous memory") + if buf_pb.len < HEADER_LENGTH: raise ValueError('corrupt buffer, missing or truncated header') # obtain input data pointer - data = input_buffer.ptr - data_end = data + input_length + data = buf_pb.buf + data_end = data + buf_pb.len # load number of items n_items = load_le32(data) diff --git a/numcodecs/zstd.pyx b/numcodecs/zstd.pyx index efd12fa2..ced430dc 100644 --- a/numcodecs/zstd.pyx +++ b/numcodecs/zstd.pyx @@ -5,12 +5,10 @@ # cython: language_level=3 -from cpython.buffer cimport PyBUF_ANY_CONTIGUOUS, PyBUF_WRITEABLE +from cpython.buffer cimport PyBuffer_IsContiguous from cpython.bytes cimport PyBytes_FromStringAndSize, PyBytes_AS_STRING +from cpython.memoryview cimport PyMemoryView_GET_BUFFER - -from .compat_ext cimport Buffer -from .compat_ext import Buffer from .compat import ensure_contiguous_ndarray from .abc import Codec @@ -92,20 +90,26 @@ def compress(source, int level=DEFAULT_CLEVEL, bint checksum=False): """ cdef: - char *source_ptr - char *dest_ptr - Buffer source_buffer + memoryview source_mv + const Py_buffer* source_pb + const char* source_ptr size_t source_size, dest_size, compressed_size bytes dest + char* dest_ptr # check level if level > MAX_CLEVEL: level = MAX_CLEVEL + # obtain source memoryview + source_mv = memoryview(source) + source_pb = PyMemoryView_GET_BUFFER(source_mv) + if not PyBuffer_IsContiguous(source_pb, b'A'): + raise BufferError("`source` must contain contiguous memory") + # setup source buffer - source_buffer = Buffer(source, PyBUF_ANY_CONTIGUOUS) - source_ptr = source_buffer.ptr - source_size = source_buffer.nbytes + source_ptr = source_pb.buf + source_size = source_pb.len cctx = ZSTD_createCCtx() param_set_result = ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, level) @@ -120,22 +124,14 @@ def compress(source, int level=DEFAULT_CLEVEL, bint checksum=False): error = ZSTD_getErrorName(param_set_result) raise RuntimeError('Could not set zstd checksum flag: %s' % error) - try: - - # setup destination - dest_size = ZSTD_compressBound(source_size) - dest = PyBytes_FromStringAndSize(NULL, dest_size) - dest_ptr = PyBytes_AS_STRING(dest) + # setup destination + dest_size = ZSTD_compressBound(source_size) + dest = PyBytes_FromStringAndSize(NULL, dest_size) + dest_ptr = PyBytes_AS_STRING(dest) - # perform compression - with nogil: - compressed_size = ZSTD_compress2(cctx, dest_ptr, dest_size, source_ptr, source_size) - - finally: - if cctx: - ZSTD_freeCCtx(cctx) - # release buffers - source_buffer.release() + # perform compression + with nogil: + compressed_size = ZSTD_compress2(cctx, dest_ptr, dest_size, source_ptr, source_size) # check compression was successful if ZSTD_isError(compressed_size): @@ -165,47 +161,51 @@ def decompress(source, dest=None): """ cdef: - char *source_ptr - char *dest_ptr - Buffer source_buffer - Buffer dest_buffer = None + memoryview source_mv + const Py_buffer* source_pb + char* source_ptr + memoryview dest_mv + Py_buffer* dest_pb + char* dest_ptr size_t source_size, dest_size, decompressed_size - - # setup source buffer - source_buffer = Buffer(source, PyBUF_ANY_CONTIGUOUS) - source_ptr = source_buffer.ptr - source_size = source_buffer.nbytes - - try: - - # determine uncompressed size - dest_size = ZSTD_getFrameContentSize(source_ptr, source_size) - if dest_size == 0 or dest_size == ZSTD_CONTENTSIZE_UNKNOWN or dest_size == ZSTD_CONTENTSIZE_ERROR: - raise RuntimeError('Zstd decompression error: invalid input data') - - # setup destination buffer - if dest is None: - # allocate memory - dest = PyBytes_FromStringAndSize(NULL, dest_size) - dest_ptr = PyBytes_AS_STRING(dest) - else: - arr = ensure_contiguous_ndarray(dest) - dest_buffer = Buffer(arr, PyBUF_ANY_CONTIGUOUS | PyBUF_WRITEABLE) - dest_ptr = dest_buffer.ptr - if dest_buffer.nbytes < dest_size: - raise ValueError('destination buffer too small; expected at least %s, ' - 'got %s' % (dest_size, dest_buffer.nbytes)) - - # perform decompression - with nogil: - decompressed_size = ZSTD_decompress(dest_ptr, dest_size, source_ptr, source_size) - - finally: - - # release buffers - source_buffer.release() - if dest_buffer is not None: - dest_buffer.release() + size_t nbytes, cbytes, blocksize + + # obtain source memoryview + source_mv = memoryview(source) + source_pb = PyMemoryView_GET_BUFFER(source_mv) + if not PyBuffer_IsContiguous(source_pb, b'A'): + raise BufferError("`source` must contain contiguous memory") + + # get source pointer + source_ptr = source_pb.buf + source_size = source_pb.len + + # determine uncompressed size + dest_size = ZSTD_getFrameContentSize(source_ptr, source_size) + if dest_size == 0 or dest_size == ZSTD_CONTENTSIZE_UNKNOWN or dest_size == ZSTD_CONTENTSIZE_ERROR: + raise RuntimeError('Zstd decompression error: invalid input data') + + # setup destination buffer + if dest is None: + # allocate memory + dest_1d = dest = PyBytes_FromStringAndSize(NULL, dest_size) + else: + dest_1d = ensure_contiguous_ndarray(dest) + + # obtain dest memoryview + dest_mv = memoryview(dest_1d) + dest_pb = PyMemoryView_GET_BUFFER(dest_mv) + dest_ptr = dest_pb.buf + dest_nbytes = dest_pb.len + + # validate output buffer + if dest_nbytes < dest_size: + raise ValueError('destination buffer too small; expected at least %s, ' + 'got %s' % (dest_size, dest_nbytes)) + + # perform decompression + with nogil: + decompressed_size = ZSTD_decompress(dest_ptr, dest_size, source_ptr, source_size) # check decompression was successful if ZSTD_isError(decompressed_size): diff --git a/setup.py b/setup.py index b6db0797..0f06795e 100644 --- a/setup.py +++ b/setup.py @@ -274,25 +274,6 @@ def jenkins_extension(): return extensions -def compat_extension(): - info('setting up compat extension') - - extra_compile_args = base_compile_args.copy() - - sources = ['numcodecs/compat_ext.pyx'] - - # define extension module - extensions = [ - Extension( - 'numcodecs.compat_ext', - sources=sources, - extra_compile_args=extra_compile_args, - ), - ] - - return extensions - - def shuffle_extension(): info('setting up shuffle extension') @@ -361,7 +342,6 @@ def run_setup(with_extensions): blosc_extension() + zstd_extension() + lz4_extension() - + compat_extension() + shuffle_extension() + vlen_extension() + fletcher_extension() From 9ac8d8e57254393636571423a60b4747e620610d Mon Sep 17 00:00:00 2001 From: jakirkham Date: Tue, 25 Mar 2025 23:41:47 -0700 Subject: [PATCH 02/30] Add back `ZSTD_freeCCtx` --- numcodecs/zstd.pyx | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/numcodecs/zstd.pyx b/numcodecs/zstd.pyx index ced430dc..030781a1 100644 --- a/numcodecs/zstd.pyx +++ b/numcodecs/zstd.pyx @@ -124,14 +124,20 @@ def compress(source, int level=DEFAULT_CLEVEL, bint checksum=False): error = ZSTD_getErrorName(param_set_result) raise RuntimeError('Could not set zstd checksum flag: %s' % error) - # setup destination - dest_size = ZSTD_compressBound(source_size) - dest = PyBytes_FromStringAndSize(NULL, dest_size) - dest_ptr = PyBytes_AS_STRING(dest) + try: - # perform compression - with nogil: - compressed_size = ZSTD_compress2(cctx, dest_ptr, dest_size, source_ptr, source_size) + # setup destination + dest_size = ZSTD_compressBound(source_size) + dest = PyBytes_FromStringAndSize(NULL, dest_size) + dest_ptr = PyBytes_AS_STRING(dest) + + # perform compression + with nogil: + compressed_size = ZSTD_compress2(cctx, dest_ptr, dest_size, source_ptr, source_size) + + finally: + if cctx: + ZSTD_freeCCtx(cctx) # check compression was successful if ZSTD_isError(compressed_size): From 0ade20cf388933e530381a6f43cad26875235198 Mon Sep 17 00:00:00 2001 From: jakirkham Date: Tue, 25 Mar 2025 23:45:14 -0700 Subject: [PATCH 03/30] Drop leftover `Buffer` from merge conflict --- numcodecs/blosc.pyx | 5 ----- 1 file changed, 5 deletions(-) diff --git a/numcodecs/blosc.pyx b/numcodecs/blosc.pyx index 26dd4ab5..e6d328f5 100644 --- a/numcodecs/blosc.pyx +++ b/numcodecs/blosc.pyx @@ -288,11 +288,6 @@ def compress(source, char* cname, int clevel, int shuffle=SHUFFLE, source_ptr = source_pb.buf nbytes = source_pb.len - # setup source buffer - source_buffer = Buffer(source, PyBUF_ANY_CONTIGUOUS) - source_ptr = source_buffer.ptr - nbytes = source_buffer.nbytes - # validate typesize if isinstance(typesize, int): if typesize < 1: From 13f8e50de9b6f31cb1a3a038d475c3b6df78e787 Mon Sep 17 00:00:00 2001 From: jakirkham Date: Tue, 25 Mar 2025 23:51:40 -0700 Subject: [PATCH 04/30] Add minor comment --- numcodecs/lz4.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/numcodecs/lz4.pyx b/numcodecs/lz4.pyx index a6b544fa..bf1fce13 100644 --- a/numcodecs/lz4.pyx +++ b/numcodecs/lz4.pyx @@ -82,6 +82,7 @@ def compress(source, int acceleration=DEFAULT_ACCELERATION): if not PyBuffer_IsContiguous(source_pb, b'A'): raise BufferError("`source` must contain contiguous memory") + # extract metadata source_ptr = source_pb.buf source_size = source_pb.len From bd1c4013d06c087a393211d4daf2f20d2a2bacd1 Mon Sep 17 00:00:00 2001 From: jakirkham Date: Tue, 25 Mar 2025 23:56:55 -0700 Subject: [PATCH 05/30] Add trivial `try...finally...`s to cleanup diff --- numcodecs/blosc.pyx | 107 ++++++++++++++++++++++++-------------------- numcodecs/lz4.pyx | 88 +++++++++++++++++++----------------- numcodecs/zstd.pyx | 57 ++++++++++++----------- 3 files changed, 138 insertions(+), 114 deletions(-) diff --git a/numcodecs/blosc.pyx b/numcodecs/blosc.pyx index e6d328f5..c9c1568d 100644 --- a/numcodecs/blosc.pyx +++ b/numcodecs/blosc.pyx @@ -306,40 +306,44 @@ def compress(source, char* cname, int clevel, int shuffle=SHUFFLE, raise ValueError('invalid shuffle argument; expected -1, 0, 1 or 2, found %r' % shuffle) - # setup destination - dest = PyBytes_FromStringAndSize(NULL, nbytes + BLOSC_MAX_OVERHEAD) - dest_ptr = PyBytes_AS_STRING(dest) - - # perform compression - if _get_use_threads(): - # allow blosc to use threads internally - - # N.B., we are using blosc's global context, and so we need to use a lock - # to ensure no-one else can modify the global context while we're setting it - # up and using it. - with get_mutex(): - - # set compressor - compressor_set = blosc_set_compressor(cname) - if compressor_set < 0: - # shouldn't happen if we checked against list of compressors - # already, but just in case - _err_bad_cname(cname_str) - - # set blocksize - blosc_set_blocksize(blocksize) - - # perform compression - with nogil: - cbytes = blosc_compress(clevel, shuffle, itemsize, nbytes, source_ptr, - dest_ptr, nbytes + BLOSC_MAX_OVERHEAD) + try: - else: - with nogil: - cbytes = blosc_compress_ctx(clevel, shuffle, itemsize, nbytes, source_ptr, - dest_ptr, nbytes + BLOSC_MAX_OVERHEAD, - cname, blocksize, 1) + # setup destination + dest = PyBytes_FromStringAndSize(NULL, nbytes + BLOSC_MAX_OVERHEAD) + dest_ptr = PyBytes_AS_STRING(dest) + + # perform compression + if _get_use_threads(): + # allow blosc to use threads internally + + # N.B., we are using blosc's global context, and so we need to use a lock + # to ensure no-one else can modify the global context while we're setting it + # up and using it. + with get_mutex(): + # set compressor + compressor_set = blosc_set_compressor(cname) + if compressor_set < 0: + # shouldn't happen if we checked against list of compressors + # already, but just in case + _err_bad_cname(cname_str) + + # set blocksize + blosc_set_blocksize(blocksize) + + # perform compression + with nogil: + cbytes = blosc_compress(clevel, shuffle, itemsize, nbytes, source_ptr, + dest_ptr, nbytes + BLOSC_MAX_OVERHEAD) + + else: + with nogil: + cbytes = blosc_compress_ctx(clevel, shuffle, itemsize, nbytes, source_ptr, + dest_ptr, nbytes + BLOSC_MAX_OVERHEAD, + cname, blocksize, 1) + + finally: + pass # check compression was successful if cbytes <= 0: @@ -403,19 +407,23 @@ def decompress(source, dest=None): dest_ptr = dest_pb.buf dest_nbytes = dest_pb.len - # guard condition - if dest_nbytes < nbytes: - raise ValueError('destination buffer too small; expected at least %s, ' - 'got %s' % (nbytes, dest_nbytes)) + try: - # perform decompression - if _get_use_threads(): - # allow blosc to use threads internally - with nogil: - ret = blosc_decompress(source_ptr, dest_ptr, nbytes) - else: - with nogil: - ret = blosc_decompress_ctx(source_ptr, dest_ptr, nbytes, 1) + # guard condition + if dest_nbytes < nbytes: + raise ValueError('destination buffer too small; expected at least %s, ' + 'got %s' % (nbytes, dest_nbytes)) + + # perform decompression + if _get_use_threads(): + # allow blosc to use threads internally + with nogil: + ret = blosc_decompress(source_ptr, dest_ptr, nbytes) + else: + with nogil: + ret = blosc_decompress_ctx(source_ptr, dest_ptr, nbytes, 1) + finally: + pass # handle errors if ret <= 0: @@ -490,10 +498,13 @@ def _decompress_partial(source, start, nitems, dest=None): dest_nbytes = dest_pb.len # try decompression - if dest_nbytes < nitems_bytes: - raise ValueError('destination buffer too small; expected at least %s, ' - 'got %s' % (nitems_bytes, dest_nbytes)) - ret = blosc_getitem(source_ptr, start, nitems, dest_ptr) + try: + if dest_nbytes < nitems_bytes: + raise ValueError('destination buffer too small; expected at least %s, ' + 'got %s' % (nitems_bytes, dest_nbytes)) + ret = blosc_getitem(source_ptr, start, nitems, dest_ptr) + finally: + pass # ret refers to the number of bytes returned from blosc_getitem. if ret <= 0: diff --git a/numcodecs/lz4.pyx b/numcodecs/lz4.pyx index bf1fce13..79ef4ca6 100644 --- a/numcodecs/lz4.pyx +++ b/numcodecs/lz4.pyx @@ -86,17 +86,20 @@ def compress(source, int acceleration=DEFAULT_ACCELERATION): source_ptr = source_pb.buf source_size = source_pb.len - # setup destination - dest_size = LZ4_compressBound(source_size) - dest = PyBytes_FromStringAndSize(NULL, dest_size + sizeof(uint32_t)) - dest_ptr = PyBytes_AS_STRING(dest) - store_le32(dest_ptr, source_size) - dest_start = dest_ptr + sizeof(uint32_t) - - # perform compression - with nogil: - compressed_size = LZ4_compress_fast(source_ptr, dest_start, source_size, dest_size, - acceleration) + try: + # setup destination + dest_size = LZ4_compressBound(source_size) + dest = PyBytes_FromStringAndSize(NULL, dest_size + sizeof(uint32_t)) + dest_ptr = PyBytes_AS_STRING(dest) + store_le32(dest_ptr, source_size) + dest_start = dest_ptr + sizeof(uint32_t) + + # perform compression + with nogil: + compressed_size = LZ4_compress_fast(source_ptr, dest_start, source_size, dest_size, + acceleration) + finally: + pass # check compression was successful if compressed_size <= 0: @@ -145,35 +148,40 @@ def decompress(source, dest=None): source_ptr = source_pb.buf source_size = source_pb.len - # determine uncompressed size - if source_size < sizeof(uint32_t): - raise ValueError('bad input data') - dest_size = load_le32(source_ptr) - if dest_size <= 0: - raise RuntimeError('LZ4 decompression error: invalid input data') - source_start = source_ptr + sizeof(uint32_t) - source_size -= sizeof(uint32_t) - - # setup destination buffer - if dest is None: - # allocate memory - dest_1d = dest = PyBytes_FromStringAndSize(NULL, dest_size) - else: - dest_1d = ensure_contiguous_ndarray(dest) - - # obtain dest memoryview - dest_mv = memoryview(dest_1d) - dest_pb = PyMemoryView_GET_BUFFER(dest_mv) - dest_ptr = dest_pb.buf - dest_nbytes = dest_pb.len - - if dest_nbytes < dest_size: - raise ValueError('destination buffer too small; expected at least %s, ' - 'got %s' % (dest_size, dest_nbytes)) - - # perform decompression - with nogil: - decompressed_size = LZ4_decompress_safe(source_start, dest_ptr, source_size, dest_size) + try: + + # determine uncompressed size + if source_size < sizeof(uint32_t): + raise ValueError('bad input data') + dest_size = load_le32(source_ptr) + if dest_size <= 0: + raise RuntimeError('LZ4 decompression error: invalid input data') + source_start = source_ptr + sizeof(uint32_t) + source_size -= sizeof(uint32_t) + + # setup destination buffer + if dest is None: + # allocate memory + dest_1d = dest = PyBytes_FromStringAndSize(NULL, dest_size) + else: + dest_1d = ensure_contiguous_ndarray(dest) + + # obtain dest memoryview + dest_mv = memoryview(dest_1d) + dest_pb = PyMemoryView_GET_BUFFER(dest_mv) + dest_ptr = dest_pb.buf + dest_nbytes = dest_pb.len + + if dest_nbytes < dest_size: + raise ValueError('destination buffer too small; expected at least %s, ' + 'got %s' % (dest_size, dest_nbytes)) + + # perform decompression + with nogil: + decompressed_size = LZ4_decompress_safe(source_start, dest_ptr, source_size, dest_size) + + finally: + pass # check decompression was successful if decompressed_size <= 0: diff --git a/numcodecs/zstd.pyx b/numcodecs/zstd.pyx index 030781a1..f1096ddb 100644 --- a/numcodecs/zstd.pyx +++ b/numcodecs/zstd.pyx @@ -186,32 +186,37 @@ def decompress(source, dest=None): source_ptr = source_pb.buf source_size = source_pb.len - # determine uncompressed size - dest_size = ZSTD_getFrameContentSize(source_ptr, source_size) - if dest_size == 0 or dest_size == ZSTD_CONTENTSIZE_UNKNOWN or dest_size == ZSTD_CONTENTSIZE_ERROR: - raise RuntimeError('Zstd decompression error: invalid input data') - - # setup destination buffer - if dest is None: - # allocate memory - dest_1d = dest = PyBytes_FromStringAndSize(NULL, dest_size) - else: - dest_1d = ensure_contiguous_ndarray(dest) - - # obtain dest memoryview - dest_mv = memoryview(dest_1d) - dest_pb = PyMemoryView_GET_BUFFER(dest_mv) - dest_ptr = dest_pb.buf - dest_nbytes = dest_pb.len - - # validate output buffer - if dest_nbytes < dest_size: - raise ValueError('destination buffer too small; expected at least %s, ' - 'got %s' % (dest_size, dest_nbytes)) - - # perform decompression - with nogil: - decompressed_size = ZSTD_decompress(dest_ptr, dest_size, source_ptr, source_size) + try: + + # determine uncompressed size + dest_size = ZSTD_getFrameContentSize(source_ptr, source_size) + if dest_size == 0 or dest_size == ZSTD_CONTENTSIZE_UNKNOWN or dest_size == ZSTD_CONTENTSIZE_ERROR: + raise RuntimeError('Zstd decompression error: invalid input data') + + # setup destination buffer + if dest is None: + # allocate memory + dest_1d = dest = PyBytes_FromStringAndSize(NULL, dest_size) + else: + dest_1d = ensure_contiguous_ndarray(dest) + + # obtain dest memoryview + dest_mv = memoryview(dest_1d) + dest_pb = PyMemoryView_GET_BUFFER(dest_mv) + dest_ptr = dest_pb.buf + dest_nbytes = dest_pb.len + + # validate output buffer + if dest_nbytes < dest_size: + raise ValueError('destination buffer too small; expected at least %s, ' + 'got %s' % (dest_size, dest_nbytes)) + + # perform decompression + with nogil: + decompressed_size = ZSTD_decompress(dest_ptr, dest_size, source_ptr, source_size) + + finally: + pass # check decompression was successful if ZSTD_isError(decompressed_size): From 64eed124e9b5aba32255b57904b2b5850b53e92d Mon Sep 17 00:00:00 2001 From: jakirkham Date: Wed, 26 Mar 2025 00:17:18 -0700 Subject: [PATCH 06/30] Use Cython `cimport`s for Python C API --- numcodecs/vlen.pyx | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/numcodecs/vlen.pyx b/numcodecs/vlen.pyx index 61efe347..4a0fbe84 100644 --- a/numcodecs/vlen.pyx +++ b/numcodecs/vlen.pyx @@ -12,6 +12,10 @@ import numpy as np from .abc import Codec from .compat import ensure_contiguous_ndarray from cpython.buffer cimport PyBuffer_IsContiguous +from cpython.bytearray cimport ( + PyByteArray_AS_STRING, + PyByteArray_FromStringAndSize, +) from cpython.bytes cimport ( PyBytes_AS_STRING, PyBytes_GET_SIZE, @@ -19,19 +23,16 @@ from cpython.bytes cimport ( PyBytes_FromStringAndSize, ) from cpython.memoryview cimport PyMemoryView_GET_BUFFER -from cpython.unicode cimport PyUnicode_AsUTF8String +from cpython.unicode cimport ( + PyUnicode_AsUTF8String, + PyUnicode_Check, + PyUnicode_FromStringAndSize, +) from libc.stdint cimport uint8_t from libc.string cimport memcpy from ._utils cimport store_le32, load_le32 -cdef extern from "Python.h": - bytearray PyByteArray_FromStringAndSize(char *v, Py_ssize_t l) - char* PyByteArray_AS_STRING(object string) - object PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size) - int PyUnicode_Check(object text) - - # 4 bytes to store number of items cdef Py_ssize_t HEADER_LENGTH = 4 From 31a7446b40546c9274694bb9fae3c33aa3fa2e8d Mon Sep 17 00:00:00 2001 From: jakirkham Date: Wed, 26 Mar 2025 00:34:41 -0700 Subject: [PATCH 07/30] Add news entry --- docs/release.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/release.rst b/docs/release.rst index 59ed6d28..831fc53c 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -38,6 +38,11 @@ Fixes * Remove redundant ``id`` from codec metadata serialization in Zarr3 codecs. By :user:`Norman Rzepka `, :issue:`685` +Maintenance +~~~~~~~~~~~ +* Replace internal ``Buffer`` usage with ``memoryview``\ s. + By :user:`John Kirkham `, :issue:`656` + .. _release_0.15.0: 0.15.0 From 9069553f530d70884144e5d41044fcbc9e28056e Mon Sep 17 00:00:00 2001 From: jakirkham Date: Wed, 26 Mar 2025 01:00:48 -0700 Subject: [PATCH 08/30] Move `cimport`s from `libc` up top --- numcodecs/lz4.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/numcodecs/lz4.pyx b/numcodecs/lz4.pyx index 79ef4ca6..3fb8152f 100644 --- a/numcodecs/lz4.pyx +++ b/numcodecs/lz4.pyx @@ -5,10 +5,11 @@ # cython: language_level=3 +from libc.stdint cimport uint8_t, uint32_t + from cpython.buffer cimport PyBuffer_IsContiguous from cpython.bytes cimport PyBytes_FromStringAndSize, PyBytes_AS_STRING from cpython.memoryview cimport PyMemoryView_GET_BUFFER -from libc.stdint cimport uint8_t, uint32_t from ._utils cimport store_le32, load_le32 from .compat import ensure_contiguous_ndarray From 1ab9983f5ecf560fc82e72de8e8303ccbbf0959b Mon Sep 17 00:00:00 2001 From: jakirkham Date: Wed, 26 Mar 2025 01:09:10 -0700 Subject: [PATCH 09/30] Resize buffers without copying --- numcodecs/blosc.pyx | 11 +++++++++-- numcodecs/lz4.pyx | 11 +++++++++-- numcodecs/zstd.pyx | 11 +++++++++-- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/numcodecs/blosc.pyx b/numcodecs/blosc.pyx index c9c1568d..69b5bb44 100644 --- a/numcodecs/blosc.pyx +++ b/numcodecs/blosc.pyx @@ -10,8 +10,13 @@ from deprecated import deprecated from cpython.buffer cimport PyBuffer_IsContiguous -from cpython.bytes cimport PyBytes_FromStringAndSize, PyBytes_AS_STRING +from cpython.bytes cimport ( + PyBytes_AS_STRING, + PyBytes_FromStringAndSize, + _PyBytes_Resize, +) from cpython.memoryview cimport PyMemoryView_GET_BUFFER +from cpython.object cimport PyObject from .compat import ensure_contiguous_ndarray @@ -271,6 +276,7 @@ def compress(source, char* cname, int clevel, int shuffle=SHUFFLE, size_t nbytes, itemsize int cbytes bytes dest + PyObject* dest_objptr char* dest_ptr # check valid cname early @@ -350,7 +356,8 @@ def compress(source, char* cname, int clevel, int shuffle=SHUFFLE, raise RuntimeError('error during blosc compression: %d' % cbytes) # resize after compression - dest = dest[:cbytes] + dest_objptr = dest + _PyBytes_Resize(&dest_objptr, cbytes) return dest diff --git a/numcodecs/lz4.pyx b/numcodecs/lz4.pyx index 3fb8152f..7b258e4a 100644 --- a/numcodecs/lz4.pyx +++ b/numcodecs/lz4.pyx @@ -8,8 +8,13 @@ from libc.stdint cimport uint8_t, uint32_t from cpython.buffer cimport PyBuffer_IsContiguous -from cpython.bytes cimport PyBytes_FromStringAndSize, PyBytes_AS_STRING +from cpython.bytes cimport ( + PyBytes_AS_STRING, + PyBytes_FromStringAndSize, + _PyBytes_Resize, +) from cpython.memoryview cimport PyMemoryView_GET_BUFFER +from cpython.object cimport PyObject from ._utils cimport store_le32, load_le32 from .compat import ensure_contiguous_ndarray @@ -69,6 +74,7 @@ def compress(source, int acceleration=DEFAULT_ACCELERATION): const Py_buffer* source_pb const char* source_ptr bytes dest + PyObject* dest_objptr char* dest_ptr char* dest_start int source_size, dest_size, compressed_size @@ -108,7 +114,8 @@ def compress(source, int acceleration=DEFAULT_ACCELERATION): # resize after compression compressed_size += sizeof(uint32_t) - dest = dest[:compressed_size] + dest_objptr = dest + _PyBytes_Resize(&dest_objptr, compressed_size) return dest diff --git a/numcodecs/zstd.pyx b/numcodecs/zstd.pyx index f1096ddb..1dc5b3aa 100644 --- a/numcodecs/zstd.pyx +++ b/numcodecs/zstd.pyx @@ -6,8 +6,13 @@ from cpython.buffer cimport PyBuffer_IsContiguous -from cpython.bytes cimport PyBytes_FromStringAndSize, PyBytes_AS_STRING +from cpython.bytes cimport ( + PyBytes_AS_STRING, + PyBytes_FromStringAndSize, + _PyBytes_Resize, +) from cpython.memoryview cimport PyMemoryView_GET_BUFFER +from cpython.object cimport PyObject from .compat import ensure_contiguous_ndarray from .abc import Codec @@ -95,6 +100,7 @@ def compress(source, int level=DEFAULT_CLEVEL, bint checksum=False): const char* source_ptr size_t source_size, dest_size, compressed_size bytes dest + PyObject* dest_objptr char* dest_ptr # check level @@ -145,7 +151,8 @@ def compress(source, int level=DEFAULT_CLEVEL, bint checksum=False): raise RuntimeError('Zstd compression error: %s' % error) # resize after compression - dest = dest[:compressed_size] + dest_objptr = dest + _PyBytes_Resize(&dest_objptr, compressed_size) return dest From f91c283abd3a1540448d108c090dea5dcdabbdda Mon Sep 17 00:00:00 2001 From: jakirkham Date: Wed, 26 Mar 2025 01:08:37 -0700 Subject: [PATCH 10/30] Write directly to output array in VLen* --- numcodecs/vlen.pyx | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/numcodecs/vlen.pyx b/numcodecs/vlen.pyx index 4a0fbe84..1dc987d6 100644 --- a/numcodecs/vlen.pyx +++ b/numcodecs/vlen.pyx @@ -428,6 +428,9 @@ class VLenArray(Codec): const Py_buffer* buf_pb const char* data const char* data_end + object v + memoryview v_mv + Py_buffer* v_pb Py_ssize_t i, l, n_items, data_length # obtain memoryview @@ -465,7 +468,14 @@ class VLenArray(Codec): data += 4 if data + l > data_end: raise ValueError('corrupt buffer, data seem truncated') - out[i] = np.frombuffer(data[:l], dtype=self.dtype) + + # Create & fill array value + v = np.empty((l,), dtype="uint8").view(self.dtype) + v_mv = memoryview(v) + v_pb = PyMemoryView_GET_BUFFER(v_mv) + memcpy(v_pb.buf, data, l) + + out[i] = v data += l return out From d0a7721343cc936071b1a2d1206c65cbeeab65ef Mon Sep 17 00:00:00 2001 From: jakirkham Date: Wed, 26 Mar 2025 01:36:18 -0700 Subject: [PATCH 11/30] Use `_mv` subscript name for typed-memoryivews --- numcodecs/fletcher32.pyx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/numcodecs/fletcher32.pyx b/numcodecs/fletcher32.pyx index 7c7b159f..b0d74621 100644 --- a/numcodecs/fletcher32.pyx +++ b/numcodecs/fletcher32.pyx @@ -63,15 +63,15 @@ class Fletcher32(Codec): def encode(self, buf): """Return buffer plus 4-byte fletcher checksum""" buf = ensure_contiguous_ndarray(buf).ravel().view('uint8') - cdef const uint8_t[::1] b_ptr = buf - val = _fletcher32(b_ptr) + cdef const uint8_t[::1] b_mv = buf + val = _fletcher32(b_mv) return buf.tobytes() + struct.pack(" Date: Wed, 26 Mar 2025 02:13:06 -0700 Subject: [PATCH 12/30] Avoid excess copies in fletcher32 During encoding preallocate a `bytes` object for the final result and write everything directly into it. This avoids unnecessary staging and copying of intermediate results. Make use of Cython typed-`memoryview`s throughout encode and decode for efficient access of the underlying data. Further leverage the `store_le32` and `load_le32` functions to quickly pack and unpack little-endian 32-bit unsigned integers from buffers when encoding and decoding. --- numcodecs/fletcher32.pyx | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/numcodecs/fletcher32.pyx b/numcodecs/fletcher32.pyx index b0d74621..1ee1379f 100644 --- a/numcodecs/fletcher32.pyx +++ b/numcodecs/fletcher32.pyx @@ -1,13 +1,14 @@ # cython: language_level=3 # cython: overflowcheck=False # cython: cdivision=True -import struct from numcodecs.abc import Codec from numcodecs.compat import ensure_contiguous_ndarray from libc.stdint cimport uint8_t, uint16_t, uint32_t +from ._utils cimport store_le32, load_le32 + cdef uint32_t _fletcher32(const uint8_t[::1] _data): # converted from @@ -64,22 +65,34 @@ class Fletcher32(Codec): """Return buffer plus 4-byte fletcher checksum""" buf = ensure_contiguous_ndarray(buf).ravel().view('uint8') cdef const uint8_t[::1] b_mv = buf - val = _fletcher32(b_mv) - return buf.tobytes() + struct.pack("out + cdef uint8_t[::1] out_mv = (out_ptr)[:out_len] + + out_mv[:-4] = b_mv + store_le32(&out_mv[-4], _fletcher32(b_mv)) + + return out def decode(self, buf, out=None): """Check fletcher checksum, and return buffer without it""" b = ensure_contiguous_ndarray(buf).view('uint8') - cdef const uint8_t[::1] b_mv = b[:-4] - val = _fletcher32(b_mv) - found = b[-4:].view(" Date: Wed, 26 Mar 2025 02:35:36 -0700 Subject: [PATCH 13/30] Add news entry for better memory usage --- docs/release.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/release.rst b/docs/release.rst index 831fc53c..e865a9e0 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -37,6 +37,8 @@ Fixes ~~~~~ * Remove redundant ``id`` from codec metadata serialization in Zarr3 codecs. By :user:`Norman Rzepka `, :issue:`685` +* Preallocate output buffers and resize directly as needed. + By :user:`John Kirkham `, :issue:`656` Maintenance ~~~~~~~~~~~ From 4baf1e1f1c9cabdb4eaceda852c6ad403dfbe777 Mon Sep 17 00:00:00 2001 From: jakirkham Date: Wed, 26 Mar 2025 02:56:30 -0700 Subject: [PATCH 14/30] Fix fletecher32's `cimport`s --- numcodecs/fletcher32.pyx | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/numcodecs/fletcher32.pyx b/numcodecs/fletcher32.pyx index 1ee1379f..ed561d42 100644 --- a/numcodecs/fletcher32.pyx +++ b/numcodecs/fletcher32.pyx @@ -2,13 +2,16 @@ # cython: overflowcheck=False # cython: cdivision=True -from numcodecs.abc import Codec -from numcodecs.compat import ensure_contiguous_ndarray from libc.stdint cimport uint8_t, uint16_t, uint32_t +from cpython.bytes cimport PyBytes_FromStringAndSize + from ._utils cimport store_le32, load_le32 +from numcodecs.abc import Codec +from numcodecs.compat import ensure_contiguous_ndarray + cdef uint32_t _fletcher32(const uint8_t[::1] _data): # converted from From 0ea019c48e09f5f25d6c901a6008e1cd16267d84 Mon Sep 17 00:00:00 2001 From: jakirkham Date: Wed, 26 Mar 2025 03:22:04 -0700 Subject: [PATCH 15/30] Fix blank lines to match --- numcodecs/blosc.pyx | 1 + numcodecs/lz4.pyx | 1 + 2 files changed, 2 insertions(+) diff --git a/numcodecs/blosc.pyx b/numcodecs/blosc.pyx index 69b5bb44..e821d30c 100644 --- a/numcodecs/blosc.pyx +++ b/numcodecs/blosc.pyx @@ -429,6 +429,7 @@ def decompress(source, dest=None): else: with nogil: ret = blosc_decompress_ctx(source_ptr, dest_ptr, nbytes, 1) + finally: pass diff --git a/numcodecs/lz4.pyx b/numcodecs/lz4.pyx index 7b258e4a..09b81409 100644 --- a/numcodecs/lz4.pyx +++ b/numcodecs/lz4.pyx @@ -105,6 +105,7 @@ def compress(source, int acceleration=DEFAULT_ACCELERATION): with nogil: compressed_size = LZ4_compress_fast(source_ptr, dest_start, source_size, dest_size, acceleration) + finally: pass From 2b871c770291c022549808da9a28fedb7bf9a62a Mon Sep 17 00:00:00 2001 From: jakirkham Date: Wed, 26 Mar 2025 03:35:15 -0700 Subject: [PATCH 16/30] Reassign `dest` with `dest_objptr` --- numcodecs/blosc.pyx | 1 + numcodecs/lz4.pyx | 1 + numcodecs/zstd.pyx | 1 + 3 files changed, 3 insertions(+) diff --git a/numcodecs/blosc.pyx b/numcodecs/blosc.pyx index e821d30c..76b68761 100644 --- a/numcodecs/blosc.pyx +++ b/numcodecs/blosc.pyx @@ -358,6 +358,7 @@ def compress(source, char* cname, int clevel, int shuffle=SHUFFLE, # resize after compression dest_objptr = dest _PyBytes_Resize(&dest_objptr, cbytes) + dest = dest_objptr return dest diff --git a/numcodecs/lz4.pyx b/numcodecs/lz4.pyx index 09b81409..f2e9d4cf 100644 --- a/numcodecs/lz4.pyx +++ b/numcodecs/lz4.pyx @@ -117,6 +117,7 @@ def compress(source, int acceleration=DEFAULT_ACCELERATION): compressed_size += sizeof(uint32_t) dest_objptr = dest _PyBytes_Resize(&dest_objptr, compressed_size) + dest = dest_objptr return dest diff --git a/numcodecs/zstd.pyx b/numcodecs/zstd.pyx index 1dc5b3aa..969705d1 100644 --- a/numcodecs/zstd.pyx +++ b/numcodecs/zstd.pyx @@ -153,6 +153,7 @@ def compress(source, int level=DEFAULT_CLEVEL, bint checksum=False): # resize after compression dest_objptr = dest _PyBytes_Resize(&dest_objptr, compressed_size) + dest = dest_objptr return dest From bdc7bc9285e2fcd429adb1711ee56d413f2c201e Mon Sep 17 00:00:00 2001 From: jakirkham Date: Wed, 26 Mar 2025 04:07:45 -0700 Subject: [PATCH 17/30] Fix `source_ptr` type --- numcodecs/zstd.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numcodecs/zstd.pyx b/numcodecs/zstd.pyx index 969705d1..30828201 100644 --- a/numcodecs/zstd.pyx +++ b/numcodecs/zstd.pyx @@ -177,7 +177,7 @@ def decompress(source, dest=None): cdef: memoryview source_mv const Py_buffer* source_pb - char* source_ptr + const char* source_ptr memoryview dest_mv Py_buffer* dest_pb char* dest_ptr From 0bfafeda63e999a400db176bc95e2fd4653f513d Mon Sep 17 00:00:00 2001 From: jakirkham Date: Wed, 26 Mar 2025 04:10:35 -0700 Subject: [PATCH 18/30] Fix declaration order --- numcodecs/blosc.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numcodecs/blosc.pyx b/numcodecs/blosc.pyx index 76b68761..01318e44 100644 --- a/numcodecs/blosc.pyx +++ b/numcodecs/blosc.pyx @@ -469,9 +469,9 @@ def _decompress_partial(source, start, nitems, dest=None): int encoding_size int nitems_bytes int start_bytes - const char* source_ptr memoryview source_mv const Py_buffer* source_pb + const char* source_ptr memoryview dest_mv Py_buffer* dest_pb char* dest_ptr From c17e3146bcce636025544e78b6faa823f790a8e6 Mon Sep 17 00:00:00 2001 From: jakirkham Date: Thu, 27 Mar 2025 03:04:24 -0700 Subject: [PATCH 19/30] Wrap `_PyBytes_Resize` for improved usability The `_PyBytes_Resize` function is helpful for resizing a `bytes` object after it is allocated. When the underlying `bytes` object only has one reference to it, the function can potentially use realloc to shrink or grow the allocation in-place. While the function signature of `_PyBytes_Resize` makes sense, it is a little unwieldly when used directly in Cython. To smooth this out a bit, use a macro to wrap calls to `_PyBytes_Resize`. This allows us to work with `PyObject*`s, which Cython handles well, instead of `PyObject**`s, which Cython handles awkwardly. The end result is a function from Cython's perspective, which is easy to use, and one under-the-hood that simply massages our input arguments into something `_PyBytes_Resize` expects. --- numcodecs/blosc.pyx | 14 ++++++++------ numcodecs/lz4.pyx | 14 ++++++++------ numcodecs/zstd.pyx | 14 ++++++++------ 3 files changed, 24 insertions(+), 18 deletions(-) diff --git a/numcodecs/blosc.pyx b/numcodecs/blosc.pyx index 01318e44..23d778cb 100644 --- a/numcodecs/blosc.pyx +++ b/numcodecs/blosc.pyx @@ -13,16 +13,21 @@ from cpython.buffer cimport PyBuffer_IsContiguous from cpython.bytes cimport ( PyBytes_AS_STRING, PyBytes_FromStringAndSize, - _PyBytes_Resize, ) from cpython.memoryview cimport PyMemoryView_GET_BUFFER -from cpython.object cimport PyObject from .compat import ensure_contiguous_ndarray from .abc import Codec +cdef extern from *: + """ + #define PyBytes_RESIZE(b, n) _PyBytes_Resize(&b, n) + """ + int PyBytes_RESIZE(object b, Py_ssize_t n) except -1 + + cdef extern from "blosc.h": cdef enum: BLOSC_MAX_OVERHEAD, @@ -276,7 +281,6 @@ def compress(source, char* cname, int clevel, int shuffle=SHUFFLE, size_t nbytes, itemsize int cbytes bytes dest - PyObject* dest_objptr char* dest_ptr # check valid cname early @@ -356,9 +360,7 @@ def compress(source, char* cname, int clevel, int shuffle=SHUFFLE, raise RuntimeError('error during blosc compression: %d' % cbytes) # resize after compression - dest_objptr = dest - _PyBytes_Resize(&dest_objptr, cbytes) - dest = dest_objptr + PyBytes_RESIZE(dest, cbytes) return dest diff --git a/numcodecs/lz4.pyx b/numcodecs/lz4.pyx index f2e9d4cf..bd8dfd09 100644 --- a/numcodecs/lz4.pyx +++ b/numcodecs/lz4.pyx @@ -11,16 +11,21 @@ from cpython.buffer cimport PyBuffer_IsContiguous from cpython.bytes cimport ( PyBytes_AS_STRING, PyBytes_FromStringAndSize, - _PyBytes_Resize, ) from cpython.memoryview cimport PyMemoryView_GET_BUFFER -from cpython.object cimport PyObject from ._utils cimport store_le32, load_le32 from .compat import ensure_contiguous_ndarray from .abc import Codec +cdef extern from *: + """ + #define PyBytes_RESIZE(b, n) _PyBytes_Resize(&b, n) + """ + int PyBytes_RESIZE(object b, Py_ssize_t n) except -1 + + cdef extern from "lz4.h": const char* LZ4_versionString() nogil @@ -74,7 +79,6 @@ def compress(source, int acceleration=DEFAULT_ACCELERATION): const Py_buffer* source_pb const char* source_ptr bytes dest - PyObject* dest_objptr char* dest_ptr char* dest_start int source_size, dest_size, compressed_size @@ -115,9 +119,7 @@ def compress(source, int acceleration=DEFAULT_ACCELERATION): # resize after compression compressed_size += sizeof(uint32_t) - dest_objptr = dest - _PyBytes_Resize(&dest_objptr, compressed_size) - dest = dest_objptr + PyBytes_RESIZE(dest, compressed_size) return dest diff --git a/numcodecs/zstd.pyx b/numcodecs/zstd.pyx index 30828201..1585584c 100644 --- a/numcodecs/zstd.pyx +++ b/numcodecs/zstd.pyx @@ -9,15 +9,20 @@ from cpython.buffer cimport PyBuffer_IsContiguous from cpython.bytes cimport ( PyBytes_AS_STRING, PyBytes_FromStringAndSize, - _PyBytes_Resize, ) from cpython.memoryview cimport PyMemoryView_GET_BUFFER -from cpython.object cimport PyObject from .compat import ensure_contiguous_ndarray from .abc import Codec +cdef extern from *: + """ + #define PyBytes_RESIZE(b, n) _PyBytes_Resize(&b, n) + """ + int PyBytes_RESIZE(object b, Py_ssize_t n) except -1 + + cdef extern from "zstd.h": unsigned ZSTD_versionNumber() nogil @@ -100,7 +105,6 @@ def compress(source, int level=DEFAULT_CLEVEL, bint checksum=False): const char* source_ptr size_t source_size, dest_size, compressed_size bytes dest - PyObject* dest_objptr char* dest_ptr # check level @@ -151,9 +155,7 @@ def compress(source, int level=DEFAULT_CLEVEL, bint checksum=False): raise RuntimeError('Zstd compression error: %s' % error) # resize after compression - dest_objptr = dest - _PyBytes_Resize(&dest_objptr, compressed_size) - dest = dest_objptr + PyBytes_RESIZE(dest, compressed_size) return dest From 54fd4b23cd6f7579acb34d5289b19a353af74bfa Mon Sep 17 00:00:00 2001 From: jakirkham Date: Thu, 27 Mar 2025 03:17:08 -0700 Subject: [PATCH 20/30] Add `ensure_continguous_memoryview` function Include a function to ensure an object is converted into a contiguous `memoryview` object. --- numcodecs/compat_ext.pxd | 4 ++++ numcodecs/compat_ext.pyx | 19 +++++++++++++++++++ setup.py | 18 ++++++++++++++++++ 3 files changed, 41 insertions(+) create mode 100644 numcodecs/compat_ext.pxd create mode 100644 numcodecs/compat_ext.pyx diff --git a/numcodecs/compat_ext.pxd b/numcodecs/compat_ext.pxd new file mode 100644 index 00000000..f025eb95 --- /dev/null +++ b/numcodecs/compat_ext.pxd @@ -0,0 +1,4 @@ +# cython: language_level=3 + + +cpdef memoryview ensure_continguous_memoryview(obj) diff --git a/numcodecs/compat_ext.pyx b/numcodecs/compat_ext.pyx new file mode 100644 index 00000000..ae6a8c44 --- /dev/null +++ b/numcodecs/compat_ext.pyx @@ -0,0 +1,19 @@ +# cython: embedsignature=True +# cython: profile=False +# cython: linetrace=False +# cython: binding=False +# cython: language_level=3 + +from cpython.buffer cimport PyBuffer_IsContiguous +from cpython.memoryview cimport PyMemoryView_GET_BUFFER + + +cpdef memoryview ensure_continguous_memoryview(obj): + cdef memoryview mv + if type(obj) is memoryview: + mv = obj + else: + mv = memoryview(obj) + if not PyBuffer_IsContiguous(PyMemoryView_GET_BUFFER(mv), b'A'): + raise BufferError("Expected contiguous memory") + return mv diff --git a/setup.py b/setup.py index 591782e8..c27fad35 100644 --- a/setup.py +++ b/setup.py @@ -276,6 +276,23 @@ def jenkins_extension(): ] +def compat_extension(): + info('setting up compat extension') + + extra_compile_args = base_compile_args.copy() + + sources = ['numcodecs/compat_ext.pyx'] + + # define extension module + return [ + Extension( + 'numcodecs.compat_ext', + sources=sources, + extra_compile_args=extra_compile_args, + ), + ] + + def shuffle_extension(): info('setting up shuffle extension') @@ -344,6 +361,7 @@ def run_setup(with_extensions): blosc_extension() + zstd_extension() + lz4_extension() + + compat_extension() + shuffle_extension() + vlen_extension() + fletcher_extension() From eac4ef18b5a67956b7f5f5c4bd313e03ccfea3cc Mon Sep 17 00:00:00 2001 From: jakirkham Date: Thu, 27 Mar 2025 03:25:13 -0700 Subject: [PATCH 21/30] Use `ensure_contiguous_memoryview` --- numcodecs/blosc.pyx | 26 +++++++------------------- numcodecs/lz4.pyx | 11 ++++------- numcodecs/zstd.pyx | 11 ++++------- 3 files changed, 15 insertions(+), 33 deletions(-) diff --git a/numcodecs/blosc.pyx b/numcodecs/blosc.pyx index 23d778cb..8b20e9c0 100644 --- a/numcodecs/blosc.pyx +++ b/numcodecs/blosc.pyx @@ -9,13 +9,13 @@ import os from deprecated import deprecated -from cpython.buffer cimport PyBuffer_IsContiguous from cpython.bytes cimport ( PyBytes_AS_STRING, PyBytes_FromStringAndSize, ) from cpython.memoryview cimport PyMemoryView_GET_BUFFER +from .compat_ext cimport ensure_continguous_memoryview from .compat import ensure_contiguous_ndarray from .abc import Codec @@ -168,10 +168,8 @@ def _cbuffer_sizes(source): size_t nbytes, cbytes, blocksize # obtain source memoryview - source_mv = memoryview(source) + source_mv = ensure_continguous_memoryview(source) source_pb = PyMemoryView_GET_BUFFER(source_mv) - if not PyBuffer_IsContiguous(source_pb, b'A'): - raise BufferError("`source` must contain contiguous memory") # determine buffer size blosc_cbuffer_sizes(source_pb.buf, &nbytes, &cbytes, &blocksize) @@ -187,10 +185,8 @@ def cbuffer_complib(source): const Py_buffer* source_pb # obtain source memoryview - source_mv = memoryview(source) + source_mv = ensure_continguous_memoryview(source) source_pb = PyMemoryView_GET_BUFFER(source_mv) - if not PyBuffer_IsContiguous(source_pb, b'A'): - raise BufferError("`source` must contain contiguous memory") # determine buffer size complib = blosc_cbuffer_complib(source_pb.buf) @@ -219,10 +215,8 @@ def _cbuffer_metainfo(source): int flags # obtain source memoryview - source_mv = memoryview(source) + source_mv = ensure_continguous_memoryview(source) source_pb = PyMemoryView_GET_BUFFER(source_mv) - if not PyBuffer_IsContiguous(source_pb, b'A'): - raise BufferError("`source` must contain contiguous memory") # determine buffer size blosc_cbuffer_metainfo(source_pb.buf, &typesize, &flags) @@ -289,10 +283,8 @@ def compress(source, char* cname, int clevel, int shuffle=SHUFFLE, _err_bad_cname(cname_str) # obtain source memoryview - source_mv = memoryview(source) + source_mv = ensure_continguous_memoryview(source) source_pb = PyMemoryView_GET_BUFFER(source_mv) - if not PyBuffer_IsContiguous(source_pb, b'A'): - raise BufferError("`source` must contain contiguous memory") # extract metadata source_ptr = source_pb.buf @@ -393,10 +385,8 @@ def decompress(source, dest=None): size_t nbytes, cbytes, blocksize # obtain source memoryview - source_mv = memoryview(source) + source_mv = ensure_continguous_memoryview(source) source_pb = PyMemoryView_GET_BUFFER(source_mv) - if not PyBuffer_IsContiguous(source_pb, b'A'): - raise BufferError("`source` must contain contiguous memory") # get source pointer source_ptr = source_pb.buf @@ -480,10 +470,8 @@ def _decompress_partial(source, start, nitems, dest=None): size_t dest_nbytes # obtain source memoryview - source_mv = memoryview(source) + source_mv = ensure_continguous_memoryview(source) source_pb = PyMemoryView_GET_BUFFER(source_mv) - if not PyBuffer_IsContiguous(source_pb, b"A"): - raise BufferError("`source` must contain contiguous memory") # setup source pointer source_ptr = source_pb.buf diff --git a/numcodecs/lz4.pyx b/numcodecs/lz4.pyx index bd8dfd09..648bb648 100644 --- a/numcodecs/lz4.pyx +++ b/numcodecs/lz4.pyx @@ -7,14 +7,15 @@ from libc.stdint cimport uint8_t, uint32_t -from cpython.buffer cimport PyBuffer_IsContiguous from cpython.bytes cimport ( PyBytes_AS_STRING, PyBytes_FromStringAndSize, ) from cpython.memoryview cimport PyMemoryView_GET_BUFFER +from .compat_ext cimport ensure_continguous_memoryview from ._utils cimport store_le32, load_le32 + from .compat import ensure_contiguous_ndarray from .abc import Codec @@ -88,10 +89,8 @@ def compress(source, int acceleration=DEFAULT_ACCELERATION): acceleration = DEFAULT_ACCELERATION # setup source buffer - source_mv = memoryview(source) + source_mv = ensure_continguous_memoryview(source) source_pb = PyMemoryView_GET_BUFFER(source_mv) - if not PyBuffer_IsContiguous(source_pb, b'A'): - raise BufferError("`source` must contain contiguous memory") # extract metadata source_ptr = source_pb.buf @@ -151,10 +150,8 @@ def decompress(source, dest=None): int source_size, dest_size, decompressed_size # setup source buffer - source_mv = memoryview(source) + source_mv = ensure_continguous_memoryview(source) source_pb = PyMemoryView_GET_BUFFER(source_mv) - if not PyBuffer_IsContiguous(source_pb, b'A'): - raise BufferError("`source` must contain contiguous memory") # extract source metadata source_ptr = source_pb.buf diff --git a/numcodecs/zstd.pyx b/numcodecs/zstd.pyx index 1585584c..e4a6e75a 100644 --- a/numcodecs/zstd.pyx +++ b/numcodecs/zstd.pyx @@ -5,13 +5,14 @@ # cython: language_level=3 -from cpython.buffer cimport PyBuffer_IsContiguous from cpython.bytes cimport ( PyBytes_AS_STRING, PyBytes_FromStringAndSize, ) from cpython.memoryview cimport PyMemoryView_GET_BUFFER +from .compat_ext cimport ensure_continguous_memoryview + from .compat import ensure_contiguous_ndarray from .abc import Codec @@ -112,10 +113,8 @@ def compress(source, int level=DEFAULT_CLEVEL, bint checksum=False): level = MAX_CLEVEL # obtain source memoryview - source_mv = memoryview(source) + source_mv = ensure_continguous_memoryview(source) source_pb = PyMemoryView_GET_BUFFER(source_mv) - if not PyBuffer_IsContiguous(source_pb, b'A'): - raise BufferError("`source` must contain contiguous memory") # setup source buffer source_ptr = source_pb.buf @@ -187,10 +186,8 @@ def decompress(source, dest=None): size_t nbytes, cbytes, blocksize # obtain source memoryview - source_mv = memoryview(source) + source_mv = ensure_continguous_memoryview(source) source_pb = PyMemoryView_GET_BUFFER(source_mv) - if not PyBuffer_IsContiguous(source_pb, b'A'): - raise BufferError("`source` must contain contiguous memory") # get source pointer source_ptr = source_pb.buf From a77e8cfdc556a549d7005a8f70e6488634d5fac1 Mon Sep 17 00:00:00 2001 From: jakirkham Date: Thu, 27 Mar 2025 03:28:46 -0700 Subject: [PATCH 22/30] Move `PyBytes_RESIZE` macro to `compat_ext` Provide this macro in one place and `cimport` it everywhere else. --- numcodecs/blosc.pyx | 9 +-------- numcodecs/compat_ext.pxd | 7 +++++++ numcodecs/lz4.pyx | 9 +-------- numcodecs/zstd.pyx | 9 +-------- 4 files changed, 10 insertions(+), 24 deletions(-) diff --git a/numcodecs/blosc.pyx b/numcodecs/blosc.pyx index 8b20e9c0..9fb4af19 100644 --- a/numcodecs/blosc.pyx +++ b/numcodecs/blosc.pyx @@ -15,19 +15,12 @@ from cpython.bytes cimport ( ) from cpython.memoryview cimport PyMemoryView_GET_BUFFER -from .compat_ext cimport ensure_continguous_memoryview +from .compat_ext cimport PyBytes_RESIZE, ensure_continguous_memoryview from .compat import ensure_contiguous_ndarray from .abc import Codec -cdef extern from *: - """ - #define PyBytes_RESIZE(b, n) _PyBytes_Resize(&b, n) - """ - int PyBytes_RESIZE(object b, Py_ssize_t n) except -1 - - cdef extern from "blosc.h": cdef enum: BLOSC_MAX_OVERHEAD, diff --git a/numcodecs/compat_ext.pxd b/numcodecs/compat_ext.pxd index f025eb95..436c23fb 100644 --- a/numcodecs/compat_ext.pxd +++ b/numcodecs/compat_ext.pxd @@ -1,4 +1,11 @@ # cython: language_level=3 +cdef extern from *: + """ + #define PyBytes_RESIZE(b, n) _PyBytes_Resize(&b, n) + """ + int PyBytes_RESIZE(object b, Py_ssize_t n) except -1 + + cpdef memoryview ensure_continguous_memoryview(obj) diff --git a/numcodecs/lz4.pyx b/numcodecs/lz4.pyx index 648bb648..6b20de8a 100644 --- a/numcodecs/lz4.pyx +++ b/numcodecs/lz4.pyx @@ -13,20 +13,13 @@ from cpython.bytes cimport ( ) from cpython.memoryview cimport PyMemoryView_GET_BUFFER -from .compat_ext cimport ensure_continguous_memoryview +from .compat_ext cimport PyBytes_RESIZE, ensure_continguous_memoryview from ._utils cimport store_le32, load_le32 from .compat import ensure_contiguous_ndarray from .abc import Codec -cdef extern from *: - """ - #define PyBytes_RESIZE(b, n) _PyBytes_Resize(&b, n) - """ - int PyBytes_RESIZE(object b, Py_ssize_t n) except -1 - - cdef extern from "lz4.h": const char* LZ4_versionString() nogil diff --git a/numcodecs/zstd.pyx b/numcodecs/zstd.pyx index e4a6e75a..06f60837 100644 --- a/numcodecs/zstd.pyx +++ b/numcodecs/zstd.pyx @@ -11,19 +11,12 @@ from cpython.bytes cimport ( ) from cpython.memoryview cimport PyMemoryView_GET_BUFFER -from .compat_ext cimport ensure_continguous_memoryview +from .compat_ext cimport PyBytes_RESIZE, ensure_continguous_memoryview from .compat import ensure_contiguous_ndarray from .abc import Codec -cdef extern from *: - """ - #define PyBytes_RESIZE(b, n) _PyBytes_Resize(&b, n) - """ - int PyBytes_RESIZE(object b, Py_ssize_t n) except -1 - - cdef extern from "zstd.h": unsigned ZSTD_versionNumber() nogil From fb5ffac37e1a1abefda0bd30b63388a8ac1d313d Mon Sep 17 00:00:00 2001 From: jakirkham Date: Thu, 27 Mar 2025 03:38:49 -0700 Subject: [PATCH 23/30] Minimize diff by readding blank line after `try` --- numcodecs/lz4.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/numcodecs/lz4.pyx b/numcodecs/lz4.pyx index 6b20de8a..6562ef6f 100644 --- a/numcodecs/lz4.pyx +++ b/numcodecs/lz4.pyx @@ -90,6 +90,7 @@ def compress(source, int acceleration=DEFAULT_ACCELERATION): source_size = source_pb.len try: + # setup destination dest_size = LZ4_compressBound(source_size) dest = PyBytes_FromStringAndSize(NULL, dest_size + sizeof(uint32_t)) From 9b289c6db1d4d6b15e79c9cbea053389ab91a05e Mon Sep 17 00:00:00 2001 From: jakirkham Date: Thu, 27 Mar 2025 03:42:57 -0700 Subject: [PATCH 24/30] Group `encv` with `value` args --- numcodecs/vlen.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numcodecs/vlen.pyx b/numcodecs/vlen.pyx index 1dc987d6..8ba84be1 100644 --- a/numcodecs/vlen.pyx +++ b/numcodecs/vlen.pyx @@ -413,8 +413,8 @@ class VLenArray(Codec): value_mv = memoryview(normed_values[i]) value_pb = PyMemoryView_GET_BUFFER(value_mv) - encv = value_pb.buf + memcpy(data, encv, l) data += l From 7d593b8eb1a22ed188c37a6049c7293843fa21ea Mon Sep 17 00:00:00 2001 From: jakirkham Date: Thu, 27 Mar 2025 03:46:09 -0700 Subject: [PATCH 25/30] Organize `vlen`'s `imports` * Global `cimport`'s first * Start with core modules `cython`, `libc`, etc. * Add extensions `cpython` & `numpy` * Internal extensions * Then `import`s similarly grouped --- numcodecs/vlen.pyx | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/numcodecs/vlen.pyx b/numcodecs/vlen.pyx index 8ba84be1..e6f687c4 100644 --- a/numcodecs/vlen.pyx +++ b/numcodecs/vlen.pyx @@ -5,12 +5,11 @@ # cython: language_level=3 -import cython cimport cython -from numpy cimport ndarray -import numpy as np -from .abc import Codec -from .compat import ensure_contiguous_ndarray + +from libc.stdint cimport uint8_t +from libc.string cimport memcpy + from cpython.buffer cimport PyBuffer_IsContiguous from cpython.bytearray cimport ( PyByteArray_AS_STRING, @@ -28,10 +27,16 @@ from cpython.unicode cimport ( PyUnicode_Check, PyUnicode_FromStringAndSize, ) -from libc.stdint cimport uint8_t -from libc.string cimport memcpy + +from numpy cimport ndarray + from ._utils cimport store_le32, load_le32 +import numpy as np + +from .abc import Codec +from .compat import ensure_contiguous_ndarray + # 4 bytes to store number of items cdef Py_ssize_t HEADER_LENGTH = 4 From 7d68d5375f4fc3a6ca5baf3c9b6920616f417d3f Mon Sep 17 00:00:00 2001 From: jakirkham Date: Thu, 27 Mar 2025 03:48:46 -0700 Subject: [PATCH 26/30] Use `ensure_contiguous_memoryview` with VLen --- numcodecs/vlen.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/numcodecs/vlen.pyx b/numcodecs/vlen.pyx index e6f687c4..f564a66e 100644 --- a/numcodecs/vlen.pyx +++ b/numcodecs/vlen.pyx @@ -30,6 +30,7 @@ from cpython.unicode cimport ( from numpy cimport ndarray +from .compat_ext cimport ensure_continguous_memoryview from ._utils cimport store_le32, load_le32 import numpy as np @@ -416,7 +417,7 @@ class VLenArray(Codec): store_le32(data, l) data += 4 - value_mv = memoryview(normed_values[i]) + value_mv = ensure_continguous_memoryview(normed_values[i]) value_pb = PyMemoryView_GET_BUFFER(value_mv) encv = value_pb.buf From b6d91ce460e644087008dbb223881a522dd6a6e9 Mon Sep 17 00:00:00 2001 From: jakirkham Date: Thu, 27 Mar 2025 04:27:14 -0700 Subject: [PATCH 27/30] Space out input arg handling & checksum check --- numcodecs/fletcher32.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/numcodecs/fletcher32.pyx b/numcodecs/fletcher32.pyx index ed561d42..49f96731 100644 --- a/numcodecs/fletcher32.pyx +++ b/numcodecs/fletcher32.pyx @@ -84,6 +84,7 @@ class Fletcher32(Codec): """Check fletcher checksum, and return buffer without it""" b = ensure_contiguous_ndarray(buf).view('uint8') cdef const uint8_t[::1] b_mv = b + val = _fletcher32(b_mv[:-4]) found = load_le32(&b_mv[-4]) if val != found: From 2dd1cdfbcc48bb3a79d969b34300ccdae02966e3 Mon Sep 17 00:00:00 2001 From: jakirkham Date: Thu, 27 Mar 2025 04:38:03 -0700 Subject: [PATCH 28/30] Use `memcpy` to speedup copies in `fletcher32` --- numcodecs/fletcher32.pyx | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/numcodecs/fletcher32.pyx b/numcodecs/fletcher32.pyx index 49f96731..2af26264 100644 --- a/numcodecs/fletcher32.pyx +++ b/numcodecs/fletcher32.pyx @@ -4,6 +4,7 @@ from libc.stdint cimport uint8_t, uint16_t, uint32_t +from libc.string cimport memcpy from cpython.bytes cimport PyBytes_FromStringAndSize @@ -68,15 +69,15 @@ class Fletcher32(Codec): """Return buffer plus 4-byte fletcher checksum""" buf = ensure_contiguous_ndarray(buf).ravel().view('uint8') cdef const uint8_t[::1] b_mv = buf + cdef uint8_t* b_ptr = &b_mv[0] cdef Py_ssize_t b_len = len(b_mv) cdef Py_ssize_t out_len = b_len + 4 cdef bytes out = PyBytes_FromStringAndSize(NULL, out_len) cdef uint8_t* out_ptr = out - cdef uint8_t[::1] out_mv = (out_ptr)[:out_len] - out_mv[:-4] = b_mv - store_le32(&out_mv[-4], _fletcher32(b_mv)) + memcpy(out_ptr, b_ptr, b_len) + store_le32(out_ptr + b_len, _fletcher32(b_mv)) return out @@ -84,6 +85,8 @@ class Fletcher32(Codec): """Check fletcher checksum, and return buffer without it""" b = ensure_contiguous_ndarray(buf).view('uint8') cdef const uint8_t[::1] b_mv = b + cdef uint8_t* b_ptr = &b_mv[0] + cdef Py_ssize_t b_len = len(b_mv) val = _fletcher32(b_mv[:-4]) found = load_le32(&b_mv[-4]) @@ -95,8 +98,10 @@ class Fletcher32(Codec): ) cdef uint8_t[::1] out_mv + cdef uint8_t* out_ptr if out is not None: out_mv = ensure_contiguous_ndarray(out).view("uint8") - out_mv[:] = b_mv[:-4] + out_ptr = &out_mv[0] + memcpy(out_ptr, b_ptr, b_len - 4) return out return memoryview(b[:-4]) From b7bb7ef4a84d5dbc3b380602ad569e8729da3f73 Mon Sep 17 00:00:00 2001 From: jakirkham Date: Thu, 27 Mar 2025 04:19:38 -0700 Subject: [PATCH 29/30] In fletcher32's if output buffer, slice from input --- numcodecs/fletcher32.pyx | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/numcodecs/fletcher32.pyx b/numcodecs/fletcher32.pyx index 2af26264..ed549331 100644 --- a/numcodecs/fletcher32.pyx +++ b/numcodecs/fletcher32.pyx @@ -103,5 +103,6 @@ class Fletcher32(Codec): out_mv = ensure_contiguous_ndarray(out).view("uint8") out_ptr = &out_mv[0] memcpy(out_ptr, b_ptr, b_len - 4) - return out - return memoryview(b[:-4]) + else: + out = b_mv[:-4] + return out From c83136ab7a46138253b3aadc017b6d66aac07666 Mon Sep 17 00:00:00 2001 From: jakirkham Date: Thu, 27 Mar 2025 04:44:23 -0700 Subject: [PATCH 30/30] Unwrap lines no longer needing wrapping --- numcodecs/blosc.pyx | 5 +---- numcodecs/lz4.pyx | 5 +---- numcodecs/zstd.pyx | 5 +---- 3 files changed, 3 insertions(+), 12 deletions(-) diff --git a/numcodecs/blosc.pyx b/numcodecs/blosc.pyx index 9fb4af19..17e6b4d8 100644 --- a/numcodecs/blosc.pyx +++ b/numcodecs/blosc.pyx @@ -9,10 +9,7 @@ import os from deprecated import deprecated -from cpython.bytes cimport ( - PyBytes_AS_STRING, - PyBytes_FromStringAndSize, -) +from cpython.bytes cimport PyBytes_AS_STRING, PyBytes_FromStringAndSize from cpython.memoryview cimport PyMemoryView_GET_BUFFER from .compat_ext cimport PyBytes_RESIZE, ensure_continguous_memoryview diff --git a/numcodecs/lz4.pyx b/numcodecs/lz4.pyx index 6562ef6f..40c37bdd 100644 --- a/numcodecs/lz4.pyx +++ b/numcodecs/lz4.pyx @@ -7,10 +7,7 @@ from libc.stdint cimport uint8_t, uint32_t -from cpython.bytes cimport ( - PyBytes_AS_STRING, - PyBytes_FromStringAndSize, -) +from cpython.bytes cimport PyBytes_AS_STRING, PyBytes_FromStringAndSize from cpython.memoryview cimport PyMemoryView_GET_BUFFER from .compat_ext cimport PyBytes_RESIZE, ensure_continguous_memoryview diff --git a/numcodecs/zstd.pyx b/numcodecs/zstd.pyx index 06f60837..82f2844a 100644 --- a/numcodecs/zstd.pyx +++ b/numcodecs/zstd.pyx @@ -5,10 +5,7 @@ # cython: language_level=3 -from cpython.bytes cimport ( - PyBytes_AS_STRING, - PyBytes_FromStringAndSize, -) +from cpython.bytes cimport PyBytes_AS_STRING, PyBytes_FromStringAndSize from cpython.memoryview cimport PyMemoryView_GET_BUFFER from .compat_ext cimport PyBytes_RESIZE, ensure_continguous_memoryview