diff --git a/encodings/fastlanes/public-api.lock b/encodings/fastlanes/public-api.lock index 45bd7d15a42..79b6cfb8a33 100644 --- a/encodings/fastlanes/public-api.lock +++ b/encodings/fastlanes/public-api.lock @@ -368,7 +368,7 @@ pub fn vortex_fastlanes::DeltaArray::len(&self) -> usize pub fn vortex_fastlanes::DeltaArray::offset(&self) -> usize -pub fn vortex_fastlanes::DeltaArray::try_from_delta_compress_parts(bases: vortex_array::array::ArrayRef, deltas: vortex_array::array::ArrayRef) -> vortex_error::VortexResult +pub fn vortex_fastlanes::DeltaArray::try_from_delta_compress_parts(bases: vortex_array::array::ArrayRef, deltas: vortex_array::array::ArrayRef, logical_len: usize) -> vortex_error::VortexResult pub fn vortex_fastlanes::DeltaArray::try_from_primitive_array(array: &vortex_array::arrays::primitive::array::PrimitiveArray, ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult diff --git a/encodings/fastlanes/src/bit_transpose/validity.rs b/encodings/fastlanes/src/bit_transpose/validity.rs index 13f1c6b0385..92b039cbe28 100644 --- a/encodings/fastlanes/src/bit_transpose/validity.rs +++ b/encodings/fastlanes/src/bit_transpose/validity.rs @@ -81,22 +81,23 @@ pub fn untranspose_validity(validity: &Validity, ctx: &mut ExecutionCtx) -> Vort #[inline] pub fn untranspose_bitbuffer(bits: BitBuffer) -> BitBuffer { - assert!( - bits.inner().len().is_multiple_of(128), - "Transpose BitBuffer must be 128-byte aligned" - ); let (offset, len, bytes) = bits.into_inner(); - match bytes.try_into_mut() { - Ok(mut bytes_mut) => { - let (chunks, _) = bytes_mut.as_chunks_mut::<128>(); - let mut tmp = [0u8; 128]; - for chunk in chunks { - untranspose_bits(chunk, &mut tmp); - chunk.copy_from_slice(&tmp); + + if bytes.len().is_multiple_of(128) { + match bytes.try_into_mut() { + Ok(mut bytes_mut) => { + let (chunks, _) = bytes_mut.as_chunks_mut::<128>(); + let mut tmp = [0u8; 128]; + for chunk in chunks { + untranspose_bits(chunk, &mut tmp); + chunk.copy_from_slice(&tmp); + } + BitBuffer::new_with_offset(bytes_mut.freeze().into_byte_buffer(), len, offset) } - BitBuffer::new_with_offset(bytes_mut.freeze().into_byte_buffer(), len, offset) + Err(bytes) => bits_op_with_copy(bytes, len, offset, untranspose_bits), } - Err(bytes) => bits_op_with_copy(bytes, len, offset, untranspose_bits), + } else { + bits_op_with_copy(bytes, len, offset, untranspose_bits) } } @@ -131,9 +132,101 @@ fn bits_op_with_copy( } unsafe { output.set_len(output_len) }; - BitBuffer::new_with_offset( - output.freeze().into_byte_buffer(), - len.next_multiple_of(1024), - offset, - ) + BitBuffer::new_with_offset(output.freeze().into_byte_buffer(), len, offset) +} + +#[cfg(test)] +mod tests { + use vortex_array::LEGACY_SESSION; + use vortex_array::VortexSessionExecute; + use vortex_array::validity::Validity; + use vortex_buffer::BitBuffer; + use vortex_buffer::BitBufferMut; + use vortex_buffer::ByteBuffer; + + use super::*; + + fn make_validity_bits(num_bits: usize) -> BitBuffer { + let mut builder = BitBufferMut::with_capacity(num_bits); + for i in 0..num_bits { + builder.append(i % 3 != 0); + } + builder.freeze() + } + + fn force_copy_path(bits: BitBuffer) -> (BitBuffer, ByteBuffer) { + let (offset, len, bytes) = bits.into_inner(); + let extra_ref = bytes.clone(); + (BitBuffer::new_with_offset(bytes, len, offset), extra_ref) + } + + #[test] + fn transpose_roundtrip_preserves_len_inplace_path() { + let bits = make_validity_bits(1024); + assert_eq!(bits.len(), 1024); + + let transposed = transpose_bitbuffer(bits.clone()); + assert_eq!(transposed.len(), 1024); + + let roundtripped = untranspose_bitbuffer(transposed); + assert_eq!(roundtripped.len(), 1024); + assert_eq!(roundtripped, bits); + } + + #[test] + fn transpose_roundtrip_preserves_len_copy_path() { + let bits = make_validity_bits(1024); + let (bits_shared, _hold) = force_copy_path(bits.clone()); + + let transposed = transpose_bitbuffer(bits_shared); + assert_eq!(transposed.len(), 1024); + + let roundtripped = untranspose_bitbuffer(transposed); + assert_eq!(roundtripped.len(), 1024); + assert_eq!(roundtripped, bits); + } + + #[test] + fn transpose_preserves_len_non_aligned_copy_path() { + let bits = make_validity_bits(500); + assert_eq!(bits.len(), 500); + + let transposed = transpose_bitbuffer(bits); + assert_eq!(transposed.len(), 500); + } + + #[test] + fn transpose_inplace_and_copy_produce_same_bits() { + let bits = make_validity_bits(2048); + + let inplace_result = transpose_bitbuffer(bits.clone()); + + let (bits_shared, _hold) = force_copy_path(bits); + let copy_result = transpose_bitbuffer(bits_shared); + + assert_eq!(inplace_result.len(), copy_result.len()); + assert_eq!(inplace_result, copy_result); + } + + #[test] + fn transpose_validity_roundtrip_non_aligned() { + let bits = make_validity_bits(1500); + let validity = Validity::Array(BoolArray::new(bits, Validity::NonNullable).into_array()); + + let mut ctx = LEGACY_SESSION.create_execution_ctx(); + let transposed = transpose_validity(&validity, &mut ctx).unwrap(); + + if let Validity::Array(arr) = &transposed { + assert_eq!(arr.len(), 1500); + } else { + panic!("expected Validity::Array"); + } + + let roundtripped = untranspose_validity(&transposed, &mut ctx).unwrap(); + if let Validity::Array(arr) = &roundtripped { + assert_eq!(arr.len(), 1500); + } else { + panic!("expected Validity::Array"); + } + } } diff --git a/encodings/fastlanes/src/bitpacking/array/bitpack_compress.rs b/encodings/fastlanes/src/bitpacking/array/bitpack_compress.rs index a3542d1d6a0..128f8dbfb78 100644 --- a/encodings/fastlanes/src/bitpacking/array/bitpack_compress.rs +++ b/encodings/fastlanes/src/bitpacking/array/bitpack_compress.rs @@ -2,7 +2,6 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use fastlanes::BitPacking; -use itertools::Itertools; use num_traits::PrimInt; use vortex_array::IntoArray; use vortex_array::arrays::PrimitiveArray; @@ -205,13 +204,8 @@ pub fn gather_patches( bit_width: u8, num_exceptions_hint: usize, ) -> VortexResult> { - let patch_validity = match parray.validity() { - Validity::NonNullable => Validity::NonNullable, - _ => Validity::AllValid, - }; - let array_len = parray.len(); - let validity_mask = parray.validity_mask()?; + let validity = parray.validity_mask()?; let patches = if array_len < u8::MAX as usize { match_each_integer_ptype!(parray.ptype(), |T| { @@ -219,8 +213,7 @@ pub fn gather_patches( parray.as_slice::(), bit_width, num_exceptions_hint, - patch_validity, - validity_mask, + &validity, )? }) } else if array_len < u16::MAX as usize { @@ -229,8 +222,7 @@ pub fn gather_patches( parray.as_slice::(), bit_width, num_exceptions_hint, - patch_validity, - validity_mask, + &validity, )? }) } else if array_len < u32::MAX as usize { @@ -239,8 +231,7 @@ pub fn gather_patches( parray.as_slice::(), bit_width, num_exceptions_hint, - patch_validity, - validity_mask, + &validity, )? }) } else { @@ -249,8 +240,7 @@ pub fn gather_patches( parray.as_slice::(), bit_width, num_exceptions_hint, - patch_validity, - validity_mask, + &validity, )? }) }; @@ -262,8 +252,7 @@ fn gather_patches_impl( data: &[T], bit_width: u8, num_exceptions_hint: usize, - patch_validity: Validity, - validity_mask: Mask, + validity: &Mask, ) -> VortexResult> where T: PrimInt + NativePType, @@ -271,6 +260,7 @@ where { let mut indices: BufferMut

= BufferMut::with_capacity(num_exceptions_hint); let mut values: BufferMut = BufferMut::with_capacity(num_exceptions_hint); + let mut patch_validity = Vec::with_capacity(num_exceptions_hint); let total_chunks = data.len().div_ceil(1024); let mut chunk_offsets: BufferMut = BufferMut::with_capacity(total_chunks); @@ -281,11 +271,10 @@ where chunk_offsets.push(values.len() as u64); } - if (value.leading_zeros() as usize) < T::PTYPE.bit_width() - bit_width as usize - && validity_mask.value(idx) - { + if (value.leading_zeros() as usize) < T::PTYPE.bit_width() - bit_width as usize { indices.push(P::from(idx).vortex_expect("cast index from usize")); values.push(*value); + patch_validity.push(validity.value(idx)); } } @@ -296,7 +285,7 @@ where data.len(), 0, indices.into_array(), - PrimitiveArray::new(values, patch_validity).into_array(), + PrimitiveArray::new(values, Validity::from_iter(patch_validity)).into_array(), Some(chunk_offsets.into_array()), )?)) } @@ -324,14 +313,12 @@ fn bit_width_histogram_typed( // All values are invalid bit_widths[0] = array.len(); } - AllOr::Some(buffer) => { - // Some values are valid - for (is_valid, v) in buffer.iter().zip_eq(array.as_slice::()) { - if is_valid { - bit_widths[bit_width(*v)] += 1; - } else { - bit_widths[0] += 1; - } + AllOr::Some(_) => { + // Count actual bit widths for all values regardless of validity. + // This ensures patches are created for values at null positions that + // exceed the chosen bit width, preserving byte-level data integrity. + for v in array.as_slice::() { + bit_widths[bit_width(*v)] += 1; } } } @@ -462,7 +449,8 @@ mod test { ); assert!(values.ptype().is_unsigned_int()); let compressed = BitPackedArray::encode(&values.into_array(), 4).unwrap(); - assert!(compressed.patches().is_none()); + // Values 16-23 at null positions still get patches to preserve byte-level integrity. + assert!(compressed.patches().is_some()); assert_eq!( (0..(1 << 4)).collect::>(), compressed diff --git a/encodings/fastlanes/src/bitpacking/array/bitpack_decompress.rs b/encodings/fastlanes/src/bitpacking/array/bitpack_decompress.rs index 392c61bcf4c..f6a010ddc26 100644 --- a/encodings/fastlanes/src/bitpacking/array/bitpack_decompress.rs +++ b/encodings/fastlanes/src/bitpacking/array/bitpack_decompress.rs @@ -21,7 +21,7 @@ use vortex_array::validity::Validity; use vortex_buffer::BufferMut; use vortex_error::VortexExpect; use vortex_error::VortexResult; -use vortex_error::vortex_panic; +use vortex_mask::AllOr; use vortex_mask::Mask; use crate::BitPackedArray; @@ -185,12 +185,26 @@ fn insert_values_and_validity_at_indices_to_uninit_range< indices_offset: usize, f: F, ) { - let Mask::AllTrue(_) = values_validity else { - vortex_panic!("BitPackedArray somehow had nullable patch values"); - }; - - for (index, &value) in indices.iter().zip_eq(values) { - dst.set_value(index.as_() - indices_offset, f(value)); + match values_validity.bit_buffer() { + AllOr::All => { + for (index, &value) in indices.iter().zip_eq(values) { + dst.set_value(index.as_() - indices_offset, f(value)); + } + } + AllOr::None => { + for (index, &value) in indices.iter().zip_eq(values) { + let dst_index = index.as_() - indices_offset; + dst.set_value(dst_index, f(value)); + dst.set_validity_bit(dst_index, false); + } + } + AllOr::Some(validity) => { + for (patch_idx, (index, &value)) in indices.iter().zip_eq(values).enumerate() { + let dst_index = index.as_() - indices_offset; + dst.set_value(dst_index, f(value)); + dst.set_validity_bit(dst_index, validity.value(patch_idx)); + } + } } } @@ -601,6 +615,28 @@ mod tests { assert_eq!(all_nulls_result.len(), 4); } + #[test] + fn test_unpack_to_primitive_preserves_nulls_for_patched_null_slots() -> VortexResult<()> { + let values = Buffer::from_iter([3u16, 2000, 7, 12]); + let validity = Validity::from_iter([true, false, true, true]); + let array = PrimitiveArray::new(values, validity); + + let bitpacked = bitpack_encode(&array, 4, None)?; + assert!( + bitpacked.patches().is_some(), + "null payload wider than bit width should create a patch" + ); + + let primitive_result = unpack_to_primitive(&bitpacked); + assert!(primitive_result.scalar_at(1)?.is_null()); + assert_arrays_eq!(primitive_result, array); + + let builder_result = unpack_array(&bitpacked, &mut SESSION.create_execution_ctx())?; + assert_arrays_eq!(builder_result, array); + + Ok(()) + } + /// Test that the execute method produces consistent results with other unpacking methods. #[test] fn test_execute_method_consistency() -> VortexResult<()> { @@ -770,4 +806,52 @@ mod tests { assert_eq!(single_result.len(), 1); Ok(()) } + + #[test] + fn test_u8_bitpack_roundtrip_with_patches() -> VortexResult<()> { + let data: Vec = (0..2048) + .map(|i| if i == 176 { 242u8 } else { (i % 5) as u8 }) + .collect(); + let array = PrimitiveArray::from_iter(data); + + for bit_width in [3u8, 5, 7] { + let encoded = bitpack_encode(&array, bit_width, None)?; + assert!( + encoded.patches().is_some(), + "bit_width={bit_width} should have patches" + ); + let decoded = unpack_array(&encoded, &mut SESSION.create_execution_ctx())?; + assert_arrays_eq!(decoded, array); + } + Ok(()) + } + + /// Regression: u8 values mostly 0/1 with outlier must roundtrip through bitpacking. + #[test] + fn test_u8_bitpack_mostly_zeros_with_outlier() -> VortexResult<()> { + let mut data = vec![0u8; 1024]; + data[176] = 242; + data[180] = 1; + data[500] = 1; + + let array = PrimitiveArray::from_iter(data); + + let encoded = bitpack_encode(&array, 1, None)?; + assert!( + encoded.patches().is_some(), + "should have patches for value 242" + ); + let decoded = unpack_array(&encoded, &mut SESSION.create_execution_ctx())?; + assert_arrays_eq!(decoded, array); + + let encoded_best = crate::bitpack_compress::bitpack_to_best_bit_width(&array)?; + let decoded_best = unpack_array(&encoded_best, &mut SESSION.create_execution_ctx())?; + assert_arrays_eq!(decoded_best, array); + + let encoded_zero = bitpack_encode(&array, 0, None)?; + let decoded_zero = unpack_array(&encoded_zero, &mut SESSION.create_execution_ctx())?; + assert_arrays_eq!(decoded_zero, array); + + Ok(()) + } } diff --git a/encodings/fastlanes/src/delta/array/delta_compress.rs b/encodings/fastlanes/src/delta/array/delta_compress.rs index 07174e294fa..1ba4d2acadc 100644 --- a/encodings/fastlanes/src/delta/array/delta_compress.rs +++ b/encodings/fastlanes/src/delta/array/delta_compress.rs @@ -7,16 +7,21 @@ use std::mem::MaybeUninit; use fastlanes::Delta; use fastlanes::FastLanes; use fastlanes::Transpose; +use vortex_array::Canonical; use vortex_array::ExecutionCtx; +use vortex_array::IntoArray; +use vortex_array::arrays::BoolArray; use vortex_array::arrays::PrimitiveArray; use vortex_array::dtype::NativePType; use vortex_array::match_each_unsigned_integer_ptype; +use vortex_array::validity::Validity; use vortex_array::vtable::ValidityHelper; +use vortex_buffer::BitBuffer; use vortex_buffer::Buffer; use vortex_buffer::BufferMut; use vortex_error::VortexResult; -use crate::bit_transpose::transpose_validity; +use crate::bit_transpose::transpose_bitbuffer; pub fn delta_compress( array: &PrimitiveArray, @@ -24,8 +29,10 @@ pub fn delta_compress( ) -> VortexResult<(PrimitiveArray, PrimitiveArray)> { let (bases, deltas) = match_each_unsigned_integer_ptype!(array.ptype(), |T| { let (bases, deltas) = compress_primitive::(array.as_slice::()); + let padded_len = deltas.len(); // TODO(robert): This can be avoided if we add TransposedBoolArray that performs index translation when necessary. - let validity = transpose_validity(array.validity(), ctx)?; + // Transpose the validity and pad to match the padded deltas length. + let validity = transpose_and_pad_validity(array.validity(), padded_len, ctx)?; ( PrimitiveArray::new(bases, array.dtype().nullability().into()), PrimitiveArray::new(deltas, validity), @@ -35,6 +42,51 @@ pub fn delta_compress( Ok((bases, deltas)) } +/// Transpose a validity bitmap and extend it to `padded_len` bits. +/// +/// The deltas buffer is always padded to the next multiple of 1024 elements, +/// so the validity must be extended to match. The underlying byte buffer from +/// `transpose_bitbuffer` is already large enough (padded to 128-byte chunks). +fn transpose_and_pad_validity( + validity: &Validity, + padded_len: usize, + ctx: &mut ExecutionCtx, +) -> VortexResult { + match validity { + Validity::Array(mask) => { + let bools = mask + .clone() + .execute::(ctx)? + .into_bool() + .into_bit_buffer(); + + let transposed = transpose_bitbuffer(bools); + let padded = extend_bitbuffer(transposed, padded_len); + + Ok(Validity::Array( + BoolArray::new(padded, Validity::NonNullable).into_array(), + )) + } + v @ Validity::AllValid | v @ Validity::AllInvalid | v @ Validity::NonNullable => { + Ok(v.clone()) + } + } +} + +/// Extend a `BitBuffer` to `new_len` bits. The underlying byte buffer must +/// already be large enough (i.e. `bytes.len() >= ceil(new_len / 8)`). +fn extend_bitbuffer(bits: BitBuffer, new_len: usize) -> BitBuffer { + if bits.len() == new_len { + return bits; + } + let (offset, _len, bytes) = bits.into_inner(); + debug_assert!( + bytes.len() * 8 >= new_len + offset, + "byte buffer too small to extend to {new_len} bits" + ); + BitBuffer::new_with_offset(bytes, new_len, offset) +} + fn compress_primitive( array: &[T], ) -> (Buffer, Buffer) { diff --git a/encodings/fastlanes/src/delta/array/mod.rs b/encodings/fastlanes/src/delta/array/mod.rs index 0cefe4c9fa9..47d17c3d75b 100644 --- a/encodings/fastlanes/src/delta/array/mod.rs +++ b/encodings/fastlanes/src/delta/array/mod.rs @@ -79,8 +79,11 @@ impl DeltaArray { /// Create a [`DeltaArray`] from the given `bases` and `deltas` arrays. /// Note the `deltas` might be nullable - pub fn try_from_delta_compress_parts(bases: ArrayRef, deltas: ArrayRef) -> VortexResult { - let logical_len = deltas.len(); + pub fn try_from_delta_compress_parts( + bases: ArrayRef, + deltas: ArrayRef, + logical_len: usize, + ) -> VortexResult { Self::try_new(bases, deltas, 0, logical_len) } diff --git a/vortex-array/src/patches.rs b/vortex-array/src/patches.rs index e57ec9c64ec..f302a84f1bc 100644 --- a/vortex-array/src/patches.rs +++ b/vortex-array/src/patches.rs @@ -984,13 +984,14 @@ unsafe fn apply_patches_to_buffer_inner( } } Validity::AllInvalid => { - // All patch values are null, just mark positions as invalid. - for &i in patch_indices { + // Preserve patch payloads while keeping the destination null. + for (&i, &value) in patch_indices.iter().zip_eq(patch_values) { let index = i.as_() - patch_offset; // SAFETY: `index` is valid because caller guarantees all patch indices are within // bounds after offset adjustment. unsafe { + buffer[index] = value; validity.unset_unchecked(index); } } @@ -1008,8 +1009,8 @@ unsafe fn apply_patches_to_buffer_inner( // SAFETY: `index` and `patch_idx` are valid because caller guarantees all patch // indices are within bounds after offset adjustment. unsafe { + buffer[index] = value; if mask.value_unchecked(patch_idx) { - buffer[index] = value; validity.set_unchecked(index); } else { validity.unset_unchecked(index); diff --git a/vortex-btrblocks/src/compressor/integer/mod.rs b/vortex-btrblocks/src/compressor/integer/mod.rs index 58ee4f62e76..69af69de3a2 100644 --- a/vortex-btrblocks/src/compressor/integer/mod.rs +++ b/vortex-btrblocks/src/compressor/integer/mod.rs @@ -909,6 +909,7 @@ mod tests { use super::SequenceScheme; use super::SparseScheme; use crate::BtrBlocksCompressor; + use crate::CanonicalCompressor; use crate::CompressorContext; use crate::CompressorExt; use crate::CompressorStats; @@ -1067,6 +1068,242 @@ mod tests { Ok(()) } + + /// Regression: u8 delta values bitpacked with patches must roundtrip correctly. + #[test] + fn test_u8_bitpack_patch_roundtrip() -> VortexResult<()> { + use vortex_array::Canonical; + + let non_zeros: &[(usize, u8)] = &[ + (143, 1), + (144, 1), + (148, 1), + (162, 1), + (164, 1), + (167, 1), + (169, 1), + (172, 1), + (176, 242), + (179, 1), + (184, 1), + (190, 1), + (191, 1), + (193, 240), + (199, 1), + (201, 1), + (204, 1), + (207, 1), + (209, 1), + (213, 1), + (215, 1), + (224, 1), + (225, 1), + (227, 1), + (236, 1), + (244, 1), + (247, 1), + (249, 1), + (252, 1), + (256, 1), + (259, 1), + (271, 1), + (276, 1), + (280, 1), + (290, 1), + (295, 1), + (297, 1), + (300, 1), + (307, 1), + (312, 1), + (320, 1), + (326, 1), + (327, 1), + (332, 1), + (334, 1), + (335, 1), + (337, 1), + (341, 1), + (343, 1), + (348, 1), + (352, 1), + (353, 1), + (367, 1), + (372, 1), + (377, 1), + (380, 1), + (388, 1), + (392, 1), + (395, 1), + (399, 1), + (400, 1), + (404, 1), + (408, 1), + (428, 1), + (432, 16), + (440, 1), + (443, 1), + (448, 1), + (449, 16), + (454, 1), + (455, 1), + (460, 1), + (471, 1), + (473, 1), + (475, 1), + (476, 1), + (487, 1), + (489, 1), + (498, 1), + (500, 1), + (502, 1), + (516, 1), + (518, 1), + (520, 1), + (523, 1), + (525, 1), + (526, 1), + (529, 232), + (550, 1), + (551, 1), + (552, 1), + (566, 1), + (568, 1), + (571, 1), + (577, 240), + (579, 1), + (580, 1), + (583, 1), + (587, 1), + (588, 1), + (603, 1), + (604, 1), + (608, 1), + (620, 1), + (628, 1), + (630, 1), + (631, 1), + (638, 1), + (639, 1), + (646, 1), + (649, 1), + (653, 1), + (654, 1), + (657, 24), + (659, 1), + (673, 1), + (675, 1), + (678, 1), + (679, 1), + (681, 1), + (687, 1), + (696, 1), + (697, 1), + (703, 1), + (707, 1), + (708, 1), + (713, 1), + (721, 1), + (731, 1), + (736, 1), + (737, 1), + (756, 1), + (758, 1), + (759, 1), + (764, 1), + (766, 1), + (767, 1), + (768, 1), + (773, 1), + (777, 1), + (783, 1), + (787, 1), + (793, 1), + (797, 1), + (801, 1), + (803, 1), + (804, 1), + (818, 1), + (819, 1), + (825, 1), + (829, 1), + (831, 1), + (833, 18), + (841, 1), + (845, 1), + (848, 1), + (851, 1), + (855, 1), + (859, 1), + (862, 1), + (871, 1), + (872, 1), + (879, 1), + (889, 1), + (892, 1), + (894, 1), + (895, 1), + (896, 1), + (901, 1), + (905, 1), + (907, 1), + (911, 1), + (921, 1), + (922, 1), + (925, 1), + (929, 1), + (931, 1), + (932, 1), + (945, 1), + (947, 1), + (950, 1), + (954, 1), + (966, 1), + (969, 1), + (973, 1), + (974, 1), + (975, 1), + (979, 1), + (987, 1), + (988, 1), + (991, 1), + (1004, 1), + (1007, 1), + (1009, 1), + (1022, 1), + (1023, 1), + ]; + let mut data = vec![0u8; 1024]; + for &(idx, val) in non_zeros { + data[idx] = val; + } + let array = PrimitiveArray::from_iter(data.clone()); + + let btr = BtrBlocksCompressor::default(); + + for cascading in [0usize, 1, 2, 3] { + let ctx = CompressorContext { + is_sample: false, + allowed_cascading: cascading, + }; + let compressed = btr.compress_canonical( + Canonical::Primitive(array.clone()), + ctx, + crate::Excludes::from(&[super::IntCode::Dict]), + )?; + let decoded = compressed.to_primitive(); + let decoded_slice = decoded.as_slice::(); + for (i, (orig, dec)) in data.iter().zip(decoded_slice.iter()).enumerate() { + assert_eq!( + orig, + dec, + "mismatch at index {i} (cascading={cascading}, encoding={}): orig={orig} vs decoded={dec}", + compressed.encoding_id() + ); + } + } + + Ok(()) + } } /// Tests to verify that each integer compression scheme produces the expected encoding. diff --git a/vortex-btrblocks/src/compressor/rle.rs b/vortex-btrblocks/src/compressor/rle.rs index ef4b3fcb048..ea54cd5ea72 100644 --- a/vortex-btrblocks/src/compressor/rle.rs +++ b/vortex-btrblocks/src/compressor/rle.rs @@ -187,6 +187,10 @@ fn try_compress_delta( let compressed_deltas = compressor.compress_canonical(Canonical::Primitive(deltas), ctx, excludes)?; - vortex_fastlanes::DeltaArray::try_from_delta_compress_parts(compressed_bases, compressed_deltas) - .map(vortex_fastlanes::DeltaArray::into_array) + vortex_fastlanes::DeltaArray::try_from_delta_compress_parts( + compressed_bases, + compressed_deltas, + primitive_array.len(), + ) + .map(vortex_fastlanes::DeltaArray::into_array) } diff --git a/vortex/benches/single_encoding_throughput.rs b/vortex/benches/single_encoding_throughput.rs index 1c534eb1325..4b65fc750c9 100644 --- a/vortex/benches/single_encoding_throughput.rs +++ b/vortex/benches/single_encoding_throughput.rs @@ -151,8 +151,9 @@ fn bench_delta_compress_u32(bencher: Bencher) { with_byte_counter(bencher, NUM_VALUES * 4) .with_inputs(|| &uint_array) .bench_refs(|a| { + let len = a.len(); let (bases, deltas) = delta_compress(a, &mut SESSION.create_execution_ctx()).unwrap(); - DeltaArray::try_from_delta_compress_parts(bases.into_array(), deltas.into_array()) + DeltaArray::try_from_delta_compress_parts(bases.into_array(), deltas.into_array(), len) .unwrap() }); } @@ -160,9 +161,11 @@ fn bench_delta_compress_u32(bencher: Bencher) { #[divan::bench(name = "delta_decompress_u32")] fn bench_delta_decompress_u32(bencher: Bencher) { let (uint_array, ..) = setup_primitive_arrays(); + let len = uint_array.len(); let (bases, deltas) = delta_compress(&uint_array, &mut SESSION.create_execution_ctx()).unwrap(); let compressed = - DeltaArray::try_from_delta_compress_parts(bases.into_array(), deltas.into_array()).unwrap(); + DeltaArray::try_from_delta_compress_parts(bases.into_array(), deltas.into_array(), len) + .unwrap(); with_byte_counter(bencher, NUM_VALUES * 4) .with_inputs(|| &compressed)