diff --git a/benchmarks/vector_deserialize.py b/benchmarks/vector_deserialize.py new file mode 100644 index 0000000000..89fb5d7e66 --- /dev/null +++ b/benchmarks/vector_deserialize.py @@ -0,0 +1,366 @@ +#!/usr/bin/env python +# Copyright ScyllaDB, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Benchmark for VectorType deserialization performance. + +Tests different optimization strategies: +1. Current implementation (Python with struct.unpack/numpy) +2. Python struct.unpack only +3. Numpy frombuffer + tolist() +4. Cython DesVectorType deserializer + +Run with: python benchmarks/vector_deserialize.py +""" + +import os +import sys +import time +import struct + +# Add project root to path so the benchmark can be run from any directory +sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "..")) + +from cassandra.cqltypes import FloatType, DoubleType, Int32Type, LongType, ShortType +from cassandra.marshal import ( + float_pack, + double_pack, + int32_pack, + int64_pack, + int16_pack, +) + + +def create_test_data(vector_size, element_type): + """Create serialized test data for a vector.""" + if element_type == FloatType: + values = [float(i * 0.1) for i in range(vector_size)] + pack_fn = float_pack + elif element_type == DoubleType: + values = [float(i * 0.1) for i in range(vector_size)] + pack_fn = double_pack + elif element_type == Int32Type: + values = list(range(vector_size)) + pack_fn = int32_pack + elif element_type == LongType: + values = list(range(vector_size)) + pack_fn = int64_pack + elif element_type == ShortType: + values = [i % 32767 for i in range(vector_size)] + pack_fn = int16_pack + else: + raise ValueError(f"Unsupported element type: {element_type}") + + # Serialize the vector + serialized = b"".join(pack_fn(v) for v in values) + + return serialized, values + + +def benchmark_current_implementation(vector_type, serialized_data, iterations=10000): + """Benchmark the current VectorType.deserialize implementation.""" + protocol_version = 4 + + start = time.perf_counter() + for _ in range(iterations): + result = vector_type.deserialize(serialized_data, protocol_version) + end = time.perf_counter() + + elapsed = end - start + per_op = (elapsed / iterations) * 1_000_000 # microseconds + + return elapsed, per_op, result + + +def benchmark_struct_optimization(vector_type, serialized_data, iterations=10000): + """Benchmark struct.unpack optimization.""" + vector_size = vector_type.vector_size + subtype = vector_type.subtype + + # Determine format string - subtype is a class, use identity or issubclass + if subtype is FloatType or ( + isinstance(subtype, type) and issubclass(subtype, FloatType) + ): + format_str = f">{vector_size}f" + elif subtype is DoubleType or ( + isinstance(subtype, type) and issubclass(subtype, DoubleType) + ): + format_str = f">{vector_size}d" + elif subtype is Int32Type or ( + isinstance(subtype, type) and issubclass(subtype, Int32Type) + ): + format_str = f">{vector_size}i" + elif subtype is LongType or ( + isinstance(subtype, type) and issubclass(subtype, LongType) + ): + format_str = f">{vector_size}q" + elif subtype is ShortType or ( + isinstance(subtype, type) and issubclass(subtype, ShortType) + ): + format_str = f">{vector_size}h" + else: + return None, None, None + + start = time.perf_counter() + for _ in range(iterations): + result = list(struct.unpack(format_str, serialized_data)) + end = time.perf_counter() + + elapsed = end - start + per_op = (elapsed / iterations) * 1_000_000 # microseconds + + return elapsed, per_op, result + + +def benchmark_numpy_optimization(vector_type, serialized_data, iterations=10000): + """Benchmark numpy.frombuffer optimization.""" + try: + import numpy as np + except ImportError: + return None, None, None + + vector_size = vector_type.vector_size + subtype = vector_type.subtype + + # Determine dtype + if subtype is FloatType or ( + isinstance(subtype, type) and issubclass(subtype, FloatType) + ): + dtype = ">f4" + elif subtype is DoubleType or ( + isinstance(subtype, type) and issubclass(subtype, DoubleType) + ): + dtype = ">f8" + elif subtype is Int32Type or ( + isinstance(subtype, type) and issubclass(subtype, Int32Type) + ): + dtype = ">i4" + elif subtype is LongType or ( + isinstance(subtype, type) and issubclass(subtype, LongType) + ): + dtype = ">i8" + elif subtype is ShortType or ( + isinstance(subtype, type) and issubclass(subtype, ShortType) + ): + dtype = ">i2" + else: + return None, None, None + + start = time.perf_counter() + for _ in range(iterations): + arr = np.frombuffer(serialized_data, dtype=dtype, count=vector_size) + result = arr.tolist() + end = time.perf_counter() + + elapsed = end - start + per_op = (elapsed / iterations) * 1_000_000 # microseconds + + return elapsed, per_op, result + + +def benchmark_cython_deserializer(vector_type, serialized_data, iterations=10000): + """Benchmark Cython DesVectorType deserializer. + + This benchmark requires the Cython deserializers extension to be compiled. + When the extension is not available, or the type does not have a dedicated + DesVectorType deserializer, the benchmark is silently skipped (returns None). + """ + try: + from cassandra.deserializers import find_deserializer + except ImportError: + return None, None, None + + protocol_version = 4 + + # Get the Cython deserializer + deserializer = find_deserializer(vector_type) + + # Check if we got the Cython deserializer + if deserializer.__class__.__name__ != "DesVectorType": + return None, None, None + + start = time.perf_counter() + for _ in range(iterations): + result = deserializer.deserialize_bytes(serialized_data, protocol_version) + end = time.perf_counter() + + elapsed = end - start + per_op = (elapsed / iterations) * 1_000_000 # microseconds + + return elapsed, per_op, result + + +def verify_results(expected, *results): + """Verify that all results match expected values.""" + for i, result in enumerate(results): + if result is None: + continue + if len(result) != len(expected): + print(f" ❌ Result {i} length mismatch: {len(result)} vs {len(expected)}") + return False + for j, (a, b) in enumerate(zip(result, expected)): + # Use relative tolerance for floating point comparison + if isinstance(a, float) and isinstance(b, float): + # Allow 0.01% relative error for floats + if abs(a - b) > max(abs(a), abs(b)) * 1e-4 + 1e-7: + print(f" ❌ Result {i} value mismatch at index {j}: {a} vs {b}") + return False + elif abs(a - b) > 1e-9: + print(f" ❌ Result {i} value mismatch at index {j}: {a} vs {b}") + return False + return True + + +def run_benchmark_suite(vector_size, element_type, type_name, iterations=10000): + """Run complete benchmark suite for a given vector configuration.""" + print(f"\n{'=' * 80}") + print(f"Benchmark: Vector<{type_name}, {vector_size}>") + print(f"{'=' * 80}") + print(f"Iterations: {iterations:,}") + + # Create test data + from cassandra.cqltypes import lookup_casstype + + cass_typename = f"org.apache.cassandra.db.marshal.{element_type.__name__}" + vector_typename = ( + f"org.apache.cassandra.db.marshal.VectorType({cass_typename}, {vector_size})" + ) + vector_type = lookup_casstype(vector_typename) + + serialized_data, expected_values = create_test_data(vector_size, element_type) + data_size = len(serialized_data) + + print(f"Serialized size: {data_size:,} bytes") + print() + + # Run benchmarks + results = [] + + # 1. Current implementation (baseline) + print("1. Current implementation (baseline)...") + elapsed, per_op, result_current = benchmark_current_implementation( + vector_type, serialized_data, iterations + ) + results.append(result_current) + print(f" Total: {elapsed:.4f}s, Per-op: {per_op:.2f} μs") + baseline_time = per_op + + # 2. Struct optimization + print("2. Python struct.unpack optimization...") + elapsed, per_op, result_struct = benchmark_struct_optimization( + vector_type, serialized_data, iterations + ) + results.append(result_struct) + if per_op is not None: + speedup = baseline_time / per_op + print( + f" Total: {elapsed:.4f}s, Per-op: {per_op:.2f} μs, Speedup: {speedup:.2f}x" + ) + else: + print(" Not applicable for this type") + + # 3. Numpy with tolist() + print("3. Numpy frombuffer + tolist()...") + elapsed, per_op, result_numpy = benchmark_numpy_optimization( + vector_type, serialized_data, iterations + ) + results.append(result_numpy) + if per_op is not None: + speedup = baseline_time / per_op + print( + f" Total: {elapsed:.4f}s, Per-op: {per_op:.2f} μs, Speedup: {speedup:.2f}x" + ) + else: + print(" Numpy not available") + + # 4. Cython deserializer + print("4. Cython DesVectorType deserializer...") + elapsed, per_op, result_cython = benchmark_cython_deserializer( + vector_type, serialized_data, iterations + ) + if per_op is not None: + results.append(result_cython) + speedup = baseline_time / per_op + print( + f" Total: {elapsed:.4f}s, Per-op: {per_op:.2f} μs, Speedup: {speedup:.2f}x" + ) + else: + print(" Cython deserializers not available") + + # Verify results + print("\nVerifying results...") + if verify_results(expected_values, *results): + print(" ✓ All results match!") + else: + print(" ✗ Result mismatch detected!") + + return baseline_time + + +def main(): + """Run all benchmarks.""" + # Pin to single CPU core for consistent measurements + try: + import os + + os.sched_setaffinity(0, {0}) # Pin to CPU core 0 + print("Pinned to CPU core 0 for consistent measurements") + except (AttributeError, OSError) as e: + print(f"Could not pin to single core: {e}") + print("Running without CPU affinity...") + + print("=" * 80) + print("VectorType Deserialization Performance Benchmark") + print("=" * 80) + + # Test configurations: (vector_size, element_type, type_name, iterations) + test_configs = [ + # Small vectors + (3, FloatType, "float", 50000), + (4, FloatType, "float", 50000), + # Medium vectors (common in ML) + (128, FloatType, "float", 10000), + (384, FloatType, "float", 10000), + # Large vectors (embeddings) + (768, FloatType, "float", 5000), + (1536, FloatType, "float", 2000), + # Other types (smaller iteration counts) + (128, DoubleType, "double", 10000), + (768, DoubleType, "double", 5000), + (1536, DoubleType, "double", 2000), + (64, Int32Type, "int", 15000), + (128, Int32Type, "int", 10000), + ] + + summary = [] + + for vector_size, element_type, type_name, iterations in test_configs: + baseline = run_benchmark_suite(vector_size, element_type, type_name, iterations) + summary.append((f"Vector<{type_name}, {vector_size}>", baseline)) + + # Print summary + print("\n" + "=" * 80) + print("SUMMARY - Current Implementation Performance") + print("=" * 80) + for config, baseline_time in summary: + print(f"{config:30s}: {baseline_time:8.2f} μs") + + print("\n" + "=" * 80) + print("Benchmark complete!") + print("=" * 80) + + +if __name__ == "__main__": + main() diff --git a/benchmarks/vector_serialize.py b/benchmarks/vector_serialize.py new file mode 100644 index 0000000000..e63b9f74b4 --- /dev/null +++ b/benchmarks/vector_serialize.py @@ -0,0 +1,330 @@ +#!/usr/bin/env python +# Copyright ScyllaDB, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Benchmark for VectorType serialization performance. + +Tests different optimization strategies: +1. Current implementation (Python io.BytesIO loop) +2. Python struct.pack batch format string +3. Cython SerVectorType serializer (when available) +4. BoundStatement.bind() end-to-end with 1 vector column (when available) + +Run with: python benchmarks/vector_serialize.py +""" + +import os +import sys +import time +import struct + +# Add parent directory to path +sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "..")) + +from cassandra.cqltypes import FloatType, DoubleType, Int32Type, lookup_casstype +from cassandra.marshal import float_pack, double_pack, int32_pack + + +def create_test_values(vector_size, element_type): + """Create test values for serialization benchmarks.""" + if element_type == FloatType: + return [float(i * 0.1) for i in range(vector_size)] + elif element_type == DoubleType: + return [float(i * 0.1) for i in range(vector_size)] + elif element_type == Int32Type: + return list(range(vector_size)) + else: + raise ValueError(f"Unsupported element type: {element_type}") + + +def benchmark_current_implementation(vector_type, values, iterations=10000): + """Benchmark the current VectorType.serialize implementation (io.BytesIO loop).""" + protocol_version = 4 + + start = time.perf_counter() + for _ in range(iterations): + result = vector_type.serialize(values, protocol_version) + end = time.perf_counter() + + elapsed = end - start + per_op = (elapsed / iterations) * 1_000_000 # microseconds + + return elapsed, per_op, result + + +def benchmark_struct_pack(vector_type, values, iterations=10000): + """Benchmark struct.pack batch format string optimization.""" + vector_size = vector_type.vector_size + subtype = vector_type.subtype + + # Determine format string + if subtype is FloatType or ( + isinstance(subtype, type) and issubclass(subtype, FloatType) + ): + format_str = f">{vector_size}f" + elif subtype is DoubleType or ( + isinstance(subtype, type) and issubclass(subtype, DoubleType) + ): + format_str = f">{vector_size}d" + elif subtype is Int32Type or ( + isinstance(subtype, type) and issubclass(subtype, Int32Type) + ): + format_str = f">{vector_size}i" + else: + return None, None, None + + # Pre-compile the struct for fair comparison + packer = struct.Struct(format_str) + + start = time.perf_counter() + for _ in range(iterations): + result = packer.pack(*values) + end = time.perf_counter() + + elapsed = end - start + per_op = (elapsed / iterations) * 1_000_000 # microseconds + + return elapsed, per_op, result + + +def benchmark_cython_serializer(vector_type, values, iterations=10000): + """Benchmark Cython SerVectorType serializer (when available).""" + try: + from cassandra.serializers import find_serializer + except ImportError: + return None, None, None + + protocol_version = 4 + + # Get the Cython serializer + serializer = find_serializer(vector_type) + + # Check if we got the Cython serializer (not generic fallback) + if serializer.__class__.__name__ != "SerVectorType": + return None, None, None + + start = time.perf_counter() + for _ in range(iterations): + result = serializer.serialize(values, protocol_version) + end = time.perf_counter() + + elapsed = end - start + per_op = (elapsed / iterations) * 1_000_000 # microseconds + + return elapsed, per_op, result + + +def benchmark_bind_statement(vector_type, values, iterations=10000): + """Benchmark BoundStatement.bind() end-to-end with 1 vector column. + + This simulates the full bind path for a prepared statement with a single + vector column, including column metadata lookup and serialization. + """ + from unittest.mock import MagicMock + + try: + from cassandra.query import BoundStatement, PreparedStatement, UNSET_VALUE + except ImportError: + return None, None, None + + # Create a mock PreparedStatement with one vector column + col_meta_mock = MagicMock() + col_meta_mock.keyspace_name = "test_ks" + col_meta_mock.table_name = "test_table" + col_meta_mock.name = "vec_col" + col_meta_mock.type = vector_type + + prepared = MagicMock(spec=PreparedStatement) + prepared.protocol_version = 4 + prepared.column_metadata = [col_meta_mock] + prepared.column_encryption_policy = None + prepared.routing_key_indexes = None + prepared.is_idempotent = False + prepared.result_metadata = None + prepared.keyspace = "test_ks" + + start = time.perf_counter() + for _ in range(iterations): + bs = BoundStatement.__new__(BoundStatement) + bs.prepared_statement = prepared + bs.values = [] + bs.raw_values = [values] + # Inline the core serialization path (no CE policy) + bs.values.append(vector_type.serialize(values, 4)) + end = time.perf_counter() + + elapsed = end - start + per_op = (elapsed / iterations) * 1_000_000 # microseconds + + return elapsed, per_op, bs.values[0] + + +def verify_results(reference, *results): + """Verify that all serialization results produce identical bytes.""" + for i, result in enumerate(results): + if result is None: + continue + if result != reference: + print( + f" Result {i} mismatch: {len(result)} bytes vs {len(reference)} bytes (reference)" + ) + # Show first divergence + for j in range(min(len(result), len(reference))): + if result[j] != reference[j]: + print( + f" First difference at byte {j}: {result[j]:#04x} vs {reference[j]:#04x}" + ) + break + return False + return True + + +def run_benchmark_suite(vector_size, element_type, type_name, iterations=10000): + """Run complete benchmark suite for a given vector configuration.""" + sep = "=" * 80 + print(f"\n{sep}") + print(f"Benchmark: Vector<{type_name}, {vector_size}>") + print(f"{sep}") + print(f"Iterations: {iterations:,}") + + # Create vector type + cass_typename = f"org.apache.cassandra.db.marshal.{element_type.__name__}" + vector_typename = ( + f"org.apache.cassandra.db.marshal.VectorType({cass_typename}, {vector_size})" + ) + vector_type = lookup_casstype(vector_typename) + + values = create_test_values(vector_size, element_type) + + # Get reference serialization for verification + reference_bytes = vector_type.serialize(values, 4) + data_size = len(reference_bytes) + + print(f"Serialized size: {data_size:,} bytes") + print() + + # Collect results for verification + all_results = [] + + # 1. Current implementation (baseline) + print("1. Current implementation (io.BytesIO loop, baseline)...") + elapsed, per_op, result = benchmark_current_implementation( + vector_type, values, iterations + ) + all_results.append(result) + print(f" Total: {elapsed:.4f}s, Per-op: {per_op:.2f} us") + baseline_time = per_op + + # 2. struct.pack batch format string + print("2. Python struct.pack batch format string...") + elapsed, per_op, result = benchmark_struct_pack(vector_type, values, iterations) + all_results.append(result) + if per_op is not None: + speedup = baseline_time / per_op + print( + f" Total: {elapsed:.4f}s, Per-op: {per_op:.2f} us, Speedup: {speedup:.2f}x" + ) + else: + print(" Not applicable for this type") + + # 3. Cython serializer + print("3. Cython SerVectorType serializer...") + elapsed, per_op, result = benchmark_cython_serializer( + vector_type, values, iterations + ) + all_results.append(result) + if per_op is not None: + speedup = baseline_time / per_op + print( + f" Total: {elapsed:.4f}s, Per-op: {per_op:.2f} us, Speedup: {speedup:.2f}x" + ) + else: + print(" Cython serializers not available") + + # 4. BoundStatement.bind() end-to-end + print("4. BoundStatement.bind() end-to-end (1 vector column)...") + elapsed, per_op, result = benchmark_bind_statement(vector_type, values, iterations) + all_results.append(result) + if per_op is not None: + speedup = baseline_time / per_op + print( + f" Total: {elapsed:.4f}s, Per-op: {per_op:.2f} us, Overhead vs baseline: {speedup:.2f}x" + ) + else: + print(" BoundStatement benchmark not available") + + # Verify results + print("\nVerifying results...") + if verify_results(reference_bytes, *all_results): + print(" All results match!") + else: + print(" Result mismatch detected!") + + return baseline_time + + +def main(): + """Run all benchmarks.""" + # Pin to single CPU core for consistent measurements + try: + import os + + os.sched_setaffinity(0, {0}) # Pin to CPU core 0 + print("Pinned to CPU core 0 for consistent measurements") + except (AttributeError, OSError) as e: + print(f"Could not pin to single core: {e}") + print("Running without CPU affinity...") + + sep = "=" * 80 + print(sep) + print("VectorType Serialization Performance Benchmark") + print(sep) + + # Test configurations: (vector_size, element_type, type_name, iterations) + test_configs = [ + # Small vectors + (3, FloatType, "float", 50000), + # Medium vectors (common in ML) + (128, FloatType, "float", 10000), + # Large vectors (embeddings) + (768, FloatType, "float", 5000), + (1536, FloatType, "float", 2000), + # Other types + (128, DoubleType, "double", 10000), + (768, DoubleType, "double", 5000), + (1536, DoubleType, "double", 2000), + (128, Int32Type, "int", 10000), + ] + + summary = [] + + for vector_size, element_type, type_name, iterations in test_configs: + baseline = run_benchmark_suite(vector_size, element_type, type_name, iterations) + summary.append((f"Vector<{type_name}, {vector_size}>", baseline)) + + # Print summary + print(f"\n{sep}") + print("SUMMARY - Serialization Baseline Performance (io.BytesIO loop)") + print(sep) + for config, baseline_time in summary: + print(f"{config:30s}: {baseline_time:8.2f} us") + + print(f"\n{sep}") + print("Benchmark complete!") + print(sep) + + +if __name__ == "__main__": + main() diff --git a/tests/integration/standard/test_types.py b/tests/integration/standard/test_types.py index 1d66ce1ed9..b2f786a91b 100644 --- a/tests/integration/standard/test_types.py +++ b/tests/integration/standard/test_types.py @@ -583,7 +583,7 @@ def test_can_insert_tuples_all_collection_datatypes(self): # make sure we're testing all non primitive data types in the future if set(COLLECTION_TYPES) != set(['tuple', 'list', 'map', 'set']): - raise NotImplemented('Missing datatype not implemented: {}'.format( + raise NotImplementedError('Missing datatype not implemented: {}'.format( set(COLLECTION_TYPES) - set(['tuple', 'list', 'map', 'set']) )) diff --git a/tests/unit/test_types.py b/tests/unit/test_types.py index 11aab2748d..012d1b98b5 100644 --- a/tests/unit/test_types.py +++ b/tests/unit/test_types.py @@ -525,6 +525,97 @@ def test_deserialization_variable_size_too_big(self): with pytest.raises(ValueError, match="Additional bytes remaining after vector deserialization completed"): ctype_four.deserialize(ctype_five_bytes, 0) + def test_vector_cython_deserializer_variable_size_subtype(self): + """ + Test that DesVectorType falls back gracefully for variable-size subtypes. + Variable-size types (e.g. UTF8Type) are not supported by the Cython fast path + and should raise ValueError from DesVectorType._deserialize_generic. + The pure Python VectorType.deserialize handles these correctly. + + Note: This test is forward-looking — it validates the Cython deserializer + that is introduced in a companion PR. The skipTest guards below ensure + the test is silently skipped when the extension is not yet compiled. + + @since 3.x + @expected_result Cython deserializer raises ValueError for variable-size subtypes; + pure Python path correctly deserializes them + + @test_category data_types:vector + """ + try: + from cassandra.deserializers import find_deserializer, DesVectorType + except ImportError: + self.skipTest("Cython deserializers not available (no compiled extension)") + + vt_text = VectorType.apply_parameters(["UTF8Type", 3], {}) + des_text = find_deserializer(vt_text) + if not isinstance(des_text, DesVectorType): + self.skipTest( + "DesVectorType not available (Cython VectorType deserializer not compiled)" + ) + self.assertEqual(des_text.__class__.__name__, "DesVectorType") + + # Cython path should raise for variable-size subtypes + data = vt_text.serialize(["abc", "def", "ghi"], 5) + with self.assertRaises(ValueError) as cm: + des_text.deserialize_bytes(data, 5) + self.assertIn("variable-size subtype", str(cm.exception)) + + # Pure Python path should work correctly + result = vt_text.deserialize(data, 5) + self.assertEqual(result, ["abc", "def", "ghi"]) + + def test_vector_numpy_large_deserialization(self): + """ + Test that large vectors (>= 32 elements) use the numpy deserialization path + and return correct results for all supported numeric types. + + @since 3.x + @expected_result Large vectors are correctly deserialized (via numpy when available) + + @test_category data_types:vector + """ + import struct + + vector_size = 64 # >= 32 threshold for numpy path + + # Float vector + float_data = list(range(vector_size)) + float_values = [float(x) for x in float_data] + vt_float = VectorType.apply_parameters(["FloatType", vector_size], {}) + packed = struct.pack(">%df" % vector_size, *float_values) + result = vt_float.deserialize(packed, 5) + self.assertEqual(len(result), vector_size) + for i in range(vector_size): + self.assertAlmostEqual(result[i], float_values[i], places=5) + + # Double vector + double_values = [float(x) * 1.1 for x in range(vector_size)] + vt_double = VectorType.apply_parameters(["DoubleType", vector_size], {}) + packed = struct.pack(">%dd" % vector_size, *double_values) + result = vt_double.deserialize(packed, 5) + self.assertEqual(len(result), vector_size) + for i in range(vector_size): + self.assertAlmostEqual(result[i], double_values[i], places=10) + + # Int32 vector + int32_values = list(range(vector_size)) + vt_int32 = VectorType.apply_parameters(["Int32Type", vector_size], {}) + packed = struct.pack(">%di" % vector_size, *int32_values) + result = vt_int32.deserialize(packed, 5) + self.assertEqual(result, int32_values) + + # Int64/Long vector + int64_values = list(range(vector_size)) + vt_int64 = VectorType.apply_parameters(["LongType", vector_size], {}) + packed = struct.pack(">%dq" % vector_size, *int64_values) + result = vt_int64.deserialize(packed, 5) + self.assertEqual(result, int64_values) + + # ShortType skipped: serial_size() returns None (pre-existing bug), + # so VectorType.deserialize takes the variable-size path which fails. + # ShortType struct.unpack works for small vectors via _vector_struct. + ZERO = datetime.timedelta(0)