diff --git a/cassandra/buffer.pxd b/cassandra/buffer.pxd index 0bbb1d5f57..7711546f34 100644 --- a/cassandra/buffer.pxd +++ b/cassandra/buffer.pxd @@ -41,18 +41,13 @@ cdef inline char *buf_read(Buffer *buf, Py_ssize_t size) except NULL: raise IndexError("Requested more than length of buffer") return buf.ptr -cdef inline int slice_buffer(Buffer *buf, Buffer *out, - Py_ssize_t start, Py_ssize_t size) except -1: - if size < 0: - raise ValueError("Length must be positive") +cdef inline void from_ptr_and_size(char *ptr, Py_ssize_t size, Buffer *buf): + """Initialize buf from ptr and size. - if start + size > buf.size: - raise IndexError("Buffer slice out of bounds") + Negative sizes are valid sentinel values: -1 means NULL, -2 means not-set. + Callers should check buf.size < 0 to detect these cases. + """ + buf.ptr = ptr + buf.size = size - out.ptr = buf.ptr + start - out.size = size - return 0 -cdef inline void from_ptr_and_size(char *ptr, Py_ssize_t size, Buffer *out): - out.ptr = ptr - out.size = size diff --git a/cassandra/cython_marshal.pyx b/cassandra/cython_marshal.pyx index 0a926b6eef..ac07b6378f 100644 --- a/cassandra/cython_marshal.pyx +++ b/cassandra/cython_marshal.pyx @@ -19,6 +19,19 @@ from libc.stdint cimport (int8_t, int16_t, int32_t, int64_t, from libc.string cimport memcpy from cassandra.buffer cimport Buffer, buf_read, to_bytes +# Use ntohs/ntohl for efficient big-endian to native conversion (single bswap instruction on x86) +# Platform-specific header: arpa/inet.h on POSIX, winsock2.h on Windows +cdef extern from *: + """ + #ifdef _WIN32 + #include + #else + #include + #endif + """ + uint16_t ntohs(uint16_t netshort) nogil + uint32_t ntohl(uint32_t netlong) nogil + cdef bint is_little_endian from cassandra.util import is_little_endian @@ -36,35 +49,40 @@ ctypedef fused num_t: cdef inline num_t unpack_num(Buffer *buf, num_t *dummy=NULL): # dummy pointer because cython wants the fused type as an arg """ - Copy to aligned destination, conditionally swapping to native byte order + Copy to aligned destination, conditionally swapping to native byte order. + Uses ntohs/ntohl for 16/32-bit types (compiles to single bswap instruction). """ - cdef Py_ssize_t start, end, i + cdef Py_ssize_t i cdef char *src = buf_read(buf, sizeof(num_t)) - cdef num_t ret = 0 + cdef num_t ret cdef char *out = &ret + cdef uint32_t temp32 # For float byte-swapping + + # Copy to aligned location first + memcpy(&ret, src, sizeof(num_t)) + + if not is_little_endian: + return ret - if is_little_endian: + # Use optimized byte-swap intrinsics for 16-bit and 32-bit types + if num_t is int16_t or num_t is uint16_t: + return ntohs(ret) + elif num_t is int32_t or num_t is uint32_t: + return ntohl(ret) + elif num_t is float: + # For float, reinterpret bits as uint32, swap, then reinterpret back + temp32 = (&ret)[0] + temp32 = ntohl(temp32) + return (&temp32)[0] + else: + # 64-bit, double, or 8-bit: use byte-swap loop (8-bit loop is no-op) for i in range(sizeof(num_t)): out[sizeof(num_t) - i - 1] = src[i] - else: - memcpy(out, src, sizeof(num_t)) - - return ret + return ret cdef varint_unpack(Buffer *term): """Unpack a variable-sized integer""" return varint_unpack_py3(to_bytes(term)) -# TODO: Optimize these two functions cdef varint_unpack_py3(bytes term): - val = int(''.join(["%02x" % i for i in term]), 16) - if (term[0] & 128) != 0: - shift = len(term) * 8 # * Note below - val -= 1 << shift - return val - -# * Note * -# '1 << (len(term) * 8)' Cython tries to do native -# integer shifts, which overflows. We need this to -# emulate Python shifting, which will expand the long -# to accommodate + return int.from_bytes(term, byteorder='big', signed=True) diff --git a/cassandra/deserializers.pyx b/cassandra/deserializers.pyx index 98e8676bbc..8db1612339 100644 --- a/cassandra/deserializers.pyx +++ b/cassandra/deserializers.pyx @@ -13,10 +13,11 @@ # limitations under the License. -from libc.stdint cimport int32_t, uint16_t +from libc.stdint cimport int32_t, int64_t, int16_t, uint16_t, uint32_t +from libc.string cimport memcpy include 'cython_marshal.pyx' -from cassandra.buffer cimport Buffer, to_bytes, slice_buffer +from cassandra.buffer cimport Buffer, to_bytes, from_ptr_and_size from cassandra.cython_utils cimport datetime_from_timestamp, datetime_from_ms_timestamp from cython.view cimport array as cython_array @@ -29,6 +30,11 @@ from uuid import UUID from cassandra import cqltypes from cassandra import util +# Import numpy availability flag and conditionally import numpy +from cassandra.cython_deps import HAVE_NUMPY +if HAVE_NUMPY: + import numpy as np + cdef class Deserializer: """Cython-based deserializer class for a cqltype""" @@ -58,10 +64,11 @@ cdef class DesBytesTypeByteArray(Deserializer): # TODO: Use libmpdec: http://www.bytereef.org/mpdecimal/index.html cdef class DesDecimalType(Deserializer): cdef deserialize(self, Buffer *buf, int protocol_version): - cdef Buffer varint_buf - slice_buffer(buf, &varint_buf, 4, buf.size - 4) - cdef int32_t scale = unpack_num[int32_t](buf) + + # Create a view of the remaining bytes (after the 4-byte scale) + cdef Buffer varint_buf + from_ptr_and_size(buf.ptr + 4, buf.size - 4, &varint_buf) unscaled = varint_unpack(&varint_buf) return Decimal('%de%d' % (unscaled, -scale)) @@ -181,8 +188,240 @@ cdef class DesVarcharType(DesUTF8Type): pass +#-------------------------------------------------------------------------- +# Vector deserialization + +cdef inline bint _is_float_type(object subtype): + return subtype is cqltypes.FloatType or issubclass(subtype, cqltypes.FloatType) + +cdef inline bint _is_double_type(object subtype): + return subtype is cqltypes.DoubleType or issubclass(subtype, cqltypes.DoubleType) + +cdef inline bint _is_int32_type(object subtype): + return subtype is cqltypes.Int32Type or issubclass(subtype, cqltypes.Int32Type) + +cdef inline bint _is_int64_type(object subtype): + return subtype is cqltypes.LongType or issubclass(subtype, cqltypes.LongType) + +cdef inline bint _is_int16_type(object subtype): + return subtype is cqltypes.ShortType or issubclass(subtype, cqltypes.ShortType) + +cdef inline list _deserialize_numpy_vector(Buffer *buf, int vector_size, str dtype): + """Unified numpy deserialization for large vectors""" + return np.frombuffer(buf.ptr[:buf.size], dtype=dtype, count=vector_size).tolist() + +cdef class DesVectorType(Deserializer): + """ + Optimized Cython deserializer for VectorType. + + For float and double vectors, uses direct memory access with C-level casting + for significantly better performance than Python-level deserialization. + """ + + cdef int vector_size + cdef object subtype + + def __init__(self, cqltype): + super().__init__(cqltype) + self.vector_size = cqltype.vector_size + self.subtype = cqltype.subtype + + def deserialize_bytes(self, bytes data, int protocol_version): + """Python-callable wrapper for deserialize that takes bytes.""" + cdef Buffer buf + buf.ptr = data + buf.size = len(data) + return self.deserialize(&buf, protocol_version) + + cdef deserialize(self, Buffer *buf, int protocol_version): + cdef int expected_size + cdef int elem_size + cdef bint use_numpy = HAVE_NUMPY and self.vector_size >= 32 + + # Determine element type, size, and dispatch appropriately + if _is_float_type(self.subtype): + elem_size = 4 + expected_size = self.vector_size * elem_size + if buf.size == expected_size: + if use_numpy: + return _deserialize_numpy_vector(buf, self.vector_size, '>f4') + return self._deserialize_float(buf) + raise ValueError( + f"Expected vector of type {self.subtype.typename} and dimension {self.vector_size} " + f"to have serialized size {expected_size}; observed serialized size of {buf.size} instead") + elif _is_double_type(self.subtype): + elem_size = 8 + expected_size = self.vector_size * elem_size + if buf.size == expected_size: + if use_numpy: + return _deserialize_numpy_vector(buf, self.vector_size, '>f8') + return self._deserialize_double(buf) + raise ValueError( + f"Expected vector of type {self.subtype.typename} and dimension {self.vector_size} " + f"to have serialized size {expected_size}; observed serialized size of {buf.size} instead") + elif _is_int32_type(self.subtype): + elem_size = 4 + expected_size = self.vector_size * elem_size + if buf.size == expected_size: + if use_numpy: + return _deserialize_numpy_vector(buf, self.vector_size, '>i4') + return self._deserialize_int32(buf) + raise ValueError( + f"Expected vector of type {self.subtype.typename} and dimension {self.vector_size} " + f"to have serialized size {expected_size}; observed serialized size of {buf.size} instead") + elif _is_int64_type(self.subtype): + elem_size = 8 + expected_size = self.vector_size * elem_size + if buf.size == expected_size: + if use_numpy: + return _deserialize_numpy_vector(buf, self.vector_size, '>i8') + return self._deserialize_int64(buf) + raise ValueError( + f"Expected vector of type {self.subtype.typename} and dimension {self.vector_size} " + f"to have serialized size {expected_size}; observed serialized size of {buf.size} instead") + elif _is_int16_type(self.subtype): + elem_size = 2 + expected_size = self.vector_size * elem_size + if buf.size == expected_size: + if use_numpy: + return _deserialize_numpy_vector(buf, self.vector_size, '>i2') + return self._deserialize_int16(buf) + raise ValueError( + f"Expected vector of type {self.subtype.typename} and dimension {self.vector_size} " + f"to have serialized size {expected_size}; observed serialized size of {buf.size} instead") + else: + # Unsupported type, use generic deserialization + return self._deserialize_generic(buf, protocol_version) + + cdef inline list _deserialize_float(self, Buffer *buf): + """Deserialize float vector using direct C-level access with byte swapping""" + cdef Py_ssize_t i + cdef list result + cdef float temp + cdef uint32_t temp32 + + result = [None] * self.vector_size + for i in range(self.vector_size): + # Copy to aligned local, then convert from big-endian + memcpy(&temp32, buf.ptr + i * 4, 4) + temp32 = ntohl(temp32) + temp = (&temp32)[0] + result[i] = temp + + return result + + cdef inline list _deserialize_double(self, Buffer *buf): + """Deserialize double vector using direct C-level access with byte swapping""" + cdef Py_ssize_t i + cdef list result + cdef double temp + cdef char *src_bytes + cdef char *out_bytes + cdef int j + + result = [None] * self.vector_size + for i in range(self.vector_size): + src_bytes = buf.ptr + i * 8 + out_bytes = &temp + + # Swap bytes for big-endian to native conversion + if is_little_endian: + for j in range(8): + out_bytes[7 - j] = src_bytes[j] + else: + memcpy(&temp, src_bytes, 8) + + result[i] = temp + + return result + + cdef inline list _deserialize_int32(self, Buffer *buf): + """Deserialize int32 vector using direct C-level access with ntohl""" + cdef Py_ssize_t i + cdef list result + cdef int32_t temp + cdef uint32_t temp32 + + result = [None] * self.vector_size + for i in range(self.vector_size): + # Copy to aligned local, then convert from big-endian + memcpy(&temp32, buf.ptr + i * 4, 4) + temp = ntohl(temp32) + result[i] = temp + + return result + + cdef inline list _deserialize_int64(self, Buffer *buf): + """Deserialize int64/long vector using direct C-level access with byte swapping""" + cdef Py_ssize_t i + cdef list result + cdef int64_t temp + cdef char *src_bytes + cdef char *out_bytes + cdef int j + + result = [None] * self.vector_size + for i in range(self.vector_size): + src_bytes = buf.ptr + i * 8 + out_bytes = &temp + + # Swap bytes for big-endian to native conversion + if is_little_endian: + for j in range(8): + out_bytes[7 - j] = src_bytes[j] + else: + memcpy(&temp, src_bytes, 8) + + result[i] = temp + + return result + + cdef inline list _deserialize_int16(self, Buffer *buf): + """Deserialize int16/short vector using direct C-level access with ntohs""" + cdef Py_ssize_t i + cdef list result + cdef int16_t temp + + result = [None] * self.vector_size + for i in range(self.vector_size): + temp = ntohs(((buf.ptr + i * 2))[0]) + result[i] = temp + + return result + + cdef inline list _deserialize_generic(self, Buffer *buf, int protocol_version): + """Fallback: element-by-element deserialization for non-optimized types""" + cdef Py_ssize_t i + cdef Buffer elem_buf + cdef int offset = 0 + cdef list result = [None] * self.vector_size + + _serialized_size = self.subtype.serial_size() + if _serialized_size is None: + raise ValueError( + f"VectorType with variable-size subtype {self.subtype.typename} " + "is not supported in Cython deserializer") + cdef int serialized_size = _serialized_size + + # Validate total size before processing + cdef int expected_size = self.vector_size * serialized_size + if buf.size != expected_size: + raise ValueError( + f"Expected vector of type {self.subtype.typename} and dimension {self.vector_size} " + f"to have serialized size {expected_size}; observed serialized size of {buf.size} instead") + + for i in range(self.vector_size): + from_ptr_and_size(buf.ptr + offset, serialized_size, &elem_buf) + result[i] = self.subtype.deserialize(to_bytes(&elem_buf), protocol_version) + offset += serialized_size + + return result + + + cdef class _DesParameterizedType(Deserializer): + cdef object subtypes cdef Deserializer[::1] deserializers cdef Py_ssize_t subtypes_len @@ -247,22 +486,40 @@ cdef inline int subelem( Read the next element from the buffer: first read the size (in bytes) of the element, then fill elem_buf with a newly sliced buffer of this size (and the right offset). + + Protocol: n >= 0: n bytes follow + n == -1: NULL value + n == -2: not set value + n < -2: invalid """ cdef int32_t elemlen _unpack_len(buf, offset[0], &elemlen) offset[0] += sizeof(int32_t) - slice_buffer(buf, elem_buf, offset[0], elemlen) - offset[0] += elemlen - return 0 - -cdef int _unpack_len(Buffer *buf, int offset, int32_t *output) except -1: - cdef Buffer itemlen_buf - slice_buffer(buf, &itemlen_buf, offset, sizeof(int32_t)) + # Happy path: non-negative length element that fits in buffer + if elemlen >= 0: + if offset[0] + elemlen <= buf.size: + from_ptr_and_size(buf.ptr + offset[0], elemlen, elem_buf) + offset[0] += elemlen + return 0 + raise IndexError("Element length %d at offset %d exceeds buffer size %d" % (elemlen, offset[0], buf.size)) + # NULL value (-1) or not set value (-2) + elif elemlen == -1 or elemlen == -2: + from_ptr_and_size(NULL, elemlen, elem_buf) + return 0 + # Invalid value (n < -2) + else: + raise ValueError("Invalid element length %d at offset %d" % (elemlen, offset[0])) - output[0] = unpack_num[int32_t](&itemlen_buf) +cdef inline int _unpack_len(Buffer *buf, int offset, int32_t *output) except -1: + """Read a big-endian int32 at the given offset using memcpy for alignment safety.""" + if offset + sizeof(int32_t) > buf.size: + raise IndexError("Cannot read length field: offset %d + 4 exceeds buffer size %d" % (offset, buf.size)) + cdef uint32_t temp + memcpy(&temp, buf.ptr + offset, sizeof(uint32_t)) + output[0] = ntohl(temp) return 0 #-------------------------------------------------------------------------- @@ -320,9 +577,9 @@ cdef class DesTupleType(_DesParameterizedType): cdef deserialize(self, Buffer *buf, int protocol_version): cdef Py_ssize_t i, p cdef int32_t itemlen + cdef uint32_t _tuple_tmp cdef tuple res = tuple_new(self.subtypes_len) cdef Buffer item_buf - cdef Buffer itemlen_buf cdef Deserializer deserializer # collections inside UDTs are always encoded with at least the @@ -330,19 +587,27 @@ cdef class DesTupleType(_DesParameterizedType): protocol_version = max(3, protocol_version) p = 0 - values = [] for i in range(self.subtypes_len): item = None - if p < buf.size: - slice_buffer(buf, &itemlen_buf, p, 4) - itemlen = unpack_num[int32_t](&itemlen_buf) + if p + 4 <= buf.size: + # Read itemlen using memcpy for alignment safety + memcpy(&_tuple_tmp, buf.ptr + p, 4) + itemlen = ntohl(_tuple_tmp) p += 4 - if itemlen >= 0: - slice_buffer(buf, &item_buf, p, itemlen) + + if itemlen >= 0 and p + itemlen <= buf.size: + from_ptr_and_size(buf.ptr + p, itemlen, &item_buf) p += itemlen deserializer = self.deserializers[i] item = from_binary(deserializer, &item_buf, protocol_version) + elif itemlen < 0: + # NULL value, item stays None + pass + else: + raise IndexError("Tuple item length %d at offset %d exceeds buffer size %d" % (itemlen, p, buf.size)) + elif p < buf.size: + raise IndexError("Cannot read tuple item length at offset %d: only %d bytes remain" % (p, buf.size - p)) tuple_set(res, i, item) @@ -384,15 +649,23 @@ cdef class DesCompositeType(_DesParameterizedType): break element_length = unpack_num[uint16_t](buf) - slice_buffer(buf, &elem_buf, 2, element_length) - deserializer = self.deserializers[i] - item = from_binary(deserializer, &elem_buf, protocol_version) - tuple_set(res, i, item) + # Validate that we have enough data for the element and EOC byte (happy path check) + if 2 + element_length + 1 <= buf.size: + from_ptr_and_size(buf.ptr + 2, element_length, &elem_buf) + + deserializer = self.deserializers[i] + item = from_binary(deserializer, &elem_buf, protocol_version) + tuple_set(res, i, item) - # skip element length, element, and the EOC (one byte) - start = 2 + element_length + 1 - slice_buffer(buf, buf, start, buf.size - start) + # skip element length, element, and the EOC (one byte) + # Advance buffer in-place with direct assignment + start = 2 + element_length + 1 + buf.ptr = buf.ptr + start + buf.size = buf.size - start + else: + raise IndexError("Composite element length %d requires %d bytes but only %d remain" % + (element_length, 2 + element_length + 1, buf.size)) return res @@ -474,6 +747,8 @@ cpdef Deserializer find_deserializer(cqltype): cls = DesReversedType elif issubclass(cqltype, cqltypes.FrozenType): cls = DesFrozenType + elif issubclass(cqltype, cqltypes.VectorType): + cls = DesVectorType else: cls = GenericDeserializer diff --git a/cassandra/ioutils.pyx b/cassandra/ioutils.pyx index b0ab4f16cb..f1e489c7cf 100644 --- a/cassandra/ioutils.pyx +++ b/cassandra/ioutils.pyx @@ -15,7 +15,8 @@ include 'cython_marshal.pyx' from cassandra.buffer cimport Buffer, from_ptr_and_size -from libc.stdint cimport int32_t +from libc.stdint cimport int32_t, uint32_t +from libc.string cimport memcpy from cassandra.bytesio cimport BytesIOReader @@ -41,7 +42,8 @@ cdef inline int get_buf(BytesIOReader reader, Buffer *buf_out) except -1: return 0 cdef inline int32_t read_int(BytesIOReader reader) except ?0xDEAD: - cdef Buffer buf - buf.ptr = reader.read(4) - buf.size = 4 - return unpack_num[int32_t](&buf) + """Read a big-endian int32 directly from the reader using memcpy for alignment safety.""" + cdef char *src = reader.read(4) + cdef uint32_t temp + memcpy(&temp, src, 4) + return ntohl(temp) diff --git a/cassandra/marshal.py b/cassandra/marshal.py index 413e1831d4..a7238ea4b7 100644 --- a/cassandra/marshal.py +++ b/cassandra/marshal.py @@ -40,11 +40,7 @@ def _make_packer(format_string): def varint_unpack(term): - val = int(''.join("%02x" % i for i in term), 16) - if (term[0] & 128) != 0: - len_term = len(term) # pulling this out of the expression to avoid overflow in cython optimized code - val -= 1 << (len_term * 8) - return val + return int.from_bytes(term, byteorder='big', signed=True) def bit_length(n): diff --git a/tests/unit/test_types.py b/tests/unit/test_types.py index 11aab2748d..d6af90e227 100644 --- a/tests/unit/test_types.py +++ b/tests/unit/test_types.py @@ -525,6 +525,75 @@ def test_deserialization_variable_size_too_big(self): with pytest.raises(ValueError, match="Additional bytes remaining after vector deserialization completed"): ctype_four.deserialize(ctype_five_bytes, 0) + def test_vector_cython_deserializer(self): + """ + Test that VectorType uses the Cython DesVectorType deserializer + and correctly deserializes vectors of supported numeric types. + + @since 3.x + @expected_result Cython deserializer exists and correctly deserializes vector data + + @test_category data_types:vector + """ + import struct + try: + from cassandra.deserializers import find_deserializer + except ImportError: + self.skipTest("Cython deserializers not available") + + # Test float vector + vt_float = VectorType.apply_parameters(['FloatType', 4], {}) + des_float = find_deserializer(vt_float) + self.assertEqual(des_float.__class__.__name__, 'DesVectorType') + + data_float = struct.pack('>4f', 1.0, 2.0, 3.0, 4.0) + result_float = vt_float.deserialize(data_float, 5) + self.assertEqual(result_float, [1.0, 2.0, 3.0, 4.0]) + + # Test double vector + from cassandra.cqltypes import DoubleType + vt_double = VectorType.apply_parameters(['DoubleType', 3], {}) + des_double = find_deserializer(vt_double) + self.assertEqual(des_double.__class__.__name__, 'DesVectorType') + + data_double = struct.pack('>3d', 1.5, 2.5, 3.5) + result_double = vt_double.deserialize(data_double, 5) + self.assertEqual(result_double, [1.5, 2.5, 3.5]) + + # Test int32 vector + vt_int32 = VectorType.apply_parameters(['Int32Type', 4], {}) + des_int32 = find_deserializer(vt_int32) + self.assertEqual(des_int32.__class__.__name__, 'DesVectorType') + + data_int32 = struct.pack('>4i', 1, 2, 3, 4) + result_int32 = vt_int32.deserialize(data_int32, 5) + self.assertEqual(result_int32, [1, 2, 3, 4]) + + # Test int64/long vector + vt_int64 = VectorType.apply_parameters(['LongType', 2], {}) + des_int64 = find_deserializer(vt_int64) + self.assertEqual(des_int64.__class__.__name__, 'DesVectorType') + + data_int64 = struct.pack('>2q', 100, 200) + result_int64 = vt_int64.deserialize(data_int64, 5) + self.assertEqual(result_int64, [100, 200]) + + # Test int16/short vector + from cassandra.cqltypes import ShortType + vt_int16 = VectorType.apply_parameters(['ShortType', 3], {}) + des_int16 = find_deserializer(vt_int16) + self.assertEqual(des_int16.__class__.__name__, 'DesVectorType') + + data_int16 = struct.pack('>3h', 10, 20, 30) + result_int16 = des_int16.deserialize_bytes(data_int16, 5) + self.assertEqual(result_int16, [10, 20, 30]) + + # Test error handling: wrong buffer size + with self.assertRaises(ValueError) as cm: + vt_float.deserialize(struct.pack('>3f', 1.0, 2.0, 3.0), 5) # 3 floats instead of 4 + self.assertIn('Expected vector', str(cm.exception)) + self.assertIn('serialized size', str(cm.exception)) + ZERO = datetime.timedelta(0)