From 0af878a1e720215dc79b5e9ba952360c593aae88 Mon Sep 17 00:00:00 2001 From: Kurouto Agent Date: Fri, 20 Mar 2026 08:27:04 +0000 Subject: [PATCH 1/2] test: add golden parity tests for List nullable item field (PLT-1048) Adds TestListStructNullableInnerField with 3 tests covering the edge case where a LargeList's item field has nullable=True vs nullable=False. Golden values (0000012c... and 000001713f...) are verified byte-exact against the Rust starfix crate via cargo test list_struct_nullable_inner_field_parity. This completes the cross-implementation parity verification for PLT-1048: all 129 Python tests and 67 Rust tests pass, and all golden values are confirmed consistent between the two implementations. Co-Authored-By: Claude Sonnet 4.6 --- tests/test_golden_parity_r2.py | 76 +++++++++++++++++++++++++++++++++- 1 file changed, 75 insertions(+), 1 deletion(-) diff --git a/tests/test_golden_parity_r2.py b/tests/test_golden_parity_r2.py index 971f0cc..254dc13 100644 --- a/tests/test_golden_parity_r2.py +++ b/tests/test_golden_parity_r2.py @@ -5,7 +5,7 @@ of truth; these tests are regression guards that will fail immediately if the Python implementation diverges. -Covers 37+ distinct Arrow tables / arrays across categories: +Covers 40+ distinct Arrow tables / arrays across categories: - Empty arrays (0 elements) - All-null arrays - Large boolean arrays (multi-byte bitvec) @@ -14,6 +14,7 @@ - Three-level nested structs - Struct with list child - List, nullable List + - List with nullable vs non-nullable item field (PLT-1048 edge case) - Non-nullable lists - Mixed-type structs - Mixed struct+list record batches @@ -899,6 +900,79 @@ def test_multi_batch_list(self): # --------------------------------------------------------------------------- +# --------------------------------------------------------------------------- +# LargeList with nullable vs non-nullable inner (item) field +# +# This exercises the structural-only BTreeMap entry created for List. +# When inner_field.nullable=True the entry's BitVec is allocated but never +# populated (bit_count=0), so 8 zero bytes are prepended to the structural +# digest at finalisation. Both Rust and Python handle this identically. +# Golden values verified via `cargo test list_struct_nullable_inner_field_parity` +# against the Rust starfix crate (PLT-1048). +# --------------------------------------------------------------------------- + + +class TestListStructNullableInnerField: + """LargeList — nullable vs non-nullable item field parity.""" + + _ids = [1, 2, 3, 4] + _labels = ["a", "b", "c", "d"] + + def _make_list_array(self, inner_nullable: bool) -> "pa.LargeListArray": + ids = pa.array(self._ids, type=pa.int32()) + labels = pa.array(self._labels, type=pa.large_utf8()) + struct_fields = [ + pa.field("id", pa.int32(), nullable=False), + pa.field("label", pa.large_utf8(), nullable=False), + ] + struct_arr = pa.StructArray.from_arrays( + [ids, labels], + fields=struct_fields, + ) + inner_field = pa.field("item", pa.struct(struct_fields), nullable=inner_nullable) + offsets = pa.array([0, 2, 4], type=pa.int64()) + return pa.LargeListArray.from_arrays( + offsets, + struct_arr, + type=pa.large_list(inner_field), + ) + + def test_nullable_inner_field(self): + """LargeList with nullable=True item field. + + ids=[1,2,3,4], labels=['a','b','c','d'], offsets=[0,2,4] + inner_field.nullable=True → structural-only entry has empty BitVec. + Golden value confirmed against Rust (PLT-1048). + """ + result = ArrowDigester.hash_array(self._make_list_array(inner_nullable=True)).hex() + assert result == "0000012c2b1c1d5b4c3dc46ed5335834dbf0d7386c38e607a398a2897fdaf1df387e1c" + + def test_non_nullable_inner_field(self): + """LargeList with nullable=False item field. + + ids=[1,2,3,4], labels=['a','b','c','d'], offsets=[0,2,4] + inner_field.nullable=False → no BitVec in structural-only entry. + Golden value confirmed against Rust (PLT-1048). + """ + result = ArrowDigester.hash_array(self._make_list_array(inner_nullable=False)).hex() + assert result == "000001713fa0e500c9aebea61039b30371fd84c0dff8cd3b96b4266978658bf73e4d8c" + + def test_nullable_differs_from_non_nullable(self): + """Nullable vs non-nullable inner field must produce different hashes.""" + hash_nullable = ArrowDigester.hash_array(self._make_list_array(inner_nullable=True)).hex() + hash_not_nullable = ArrowDigester.hash_array( + self._make_list_array(inner_nullable=False) + ).hex() + assert hash_nullable != hash_not_nullable, ( + "nullable and non-nullable item fields must produce distinct hashes" + ) + + +# --------------------------------------------------------------------------- +# Float special values +# --------------------------------------------------------------------------- + + class TestFloatSpecialValues: def test_float64_special_values(self): """Float64 array with NaN, Inf, -Inf, 0.0, -0.0.""" From ff48ac652161a59b1a8cf506ebec777b2723f013 Mon Sep 17 00:00:00 2001 From: Kurouto Agent Date: Fri, 20 Mar 2026 08:32:18 +0000 Subject: [PATCH 2/2] fix: remove duplicate Float special values section header The new LargeList test block was inserted between the float section banner and the float test class, leaving a stray orphaned header. Remove the duplicate so each section header correctly precedes its own test class. Co-Authored-By: Claude Sonnet 4.6 --- tests/test_golden_parity_r2.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/test_golden_parity_r2.py b/tests/test_golden_parity_r2.py index 254dc13..b2a1886 100644 --- a/tests/test_golden_parity_r2.py +++ b/tests/test_golden_parity_r2.py @@ -895,11 +895,6 @@ def test_multi_batch_list(self): assert result == "000001fcf529d339593cb8537af4c1ed7d02aae69cfbbd2608b02e811035edb6ba4ec3" -# --------------------------------------------------------------------------- -# Float special values -# --------------------------------------------------------------------------- - - # --------------------------------------------------------------------------- # LargeList with nullable vs non-nullable inner (item) field #