From 7dbdcc43912058edf51434907ecc6dddc981a1b3 Mon Sep 17 00:00:00 2001 From: Yicong Huang Date: Fri, 16 Jan 2026 16:04:59 -0800 Subject: [PATCH 1/6] fix: offset buffer with empty array --- .../arrow/vector/complex/LargeListVector.java | 11 +++++- .../arrow/vector/complex/ListVector.java | 11 +++++- .../arrow/vector/TestLargeListVector.java | 30 ++++++++++++++ .../apache/arrow/vector/TestListVector.java | 39 +++++++++++++++++++ 4 files changed, 89 insertions(+), 2 deletions(-) diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java index 997b5a8b78..29e930d889 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java @@ -275,6 +275,14 @@ public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers @Override public List getFieldBuffers() { List result = new ArrayList<>(2); + + // Ensure offset buffer has at least one entry for offset[0]. + // According to Arrow specification, offset buffer must have N+1 entries, + // even when N=0, it should contain [0]. + if (offsetBuffer.capacity() == 0) { + offsetBuffer = allocateOffsetBuffer(OFFSET_WIDTH); + } + setReaderAndWriterIndex(); result.add(validityBuffer); result.add(offsetBuffer); @@ -309,7 +317,8 @@ private void setReaderAndWriterIndex() { offsetBuffer.readerIndex(0); if (valueCount == 0) { validityBuffer.writerIndex(0); - offsetBuffer.writerIndex(0); + // Even when valueCount is 0, offset buffer should have offset[0] per Arrow spec + offsetBuffer.writerIndex(Math.min(OFFSET_WIDTH, offsetBuffer.capacity())); } else { validityBuffer.writerIndex(BitVectorHelper.getValidityBufferSizeFromCount(valueCount)); offsetBuffer.writerIndex((valueCount + 1) * OFFSET_WIDTH); diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 93a313ef4f..0890289e55 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -233,6 +233,14 @@ public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers @Override public List getFieldBuffers() { List result = new ArrayList<>(2); + + // Ensure offset buffer has at least one entry for offset[0]. + // According to Arrow specification, offset buffer must have N+1 entries, + // even when N=0, it should contain [0]. + if (offsetBuffer.capacity() == 0) { + offsetBuffer = allocateOffsetBuffer(OFFSET_WIDTH); + } + setReaderAndWriterIndex(); result.add(validityBuffer); result.add(offsetBuffer); @@ -267,7 +275,8 @@ private void setReaderAndWriterIndex() { offsetBuffer.readerIndex(0); if (valueCount == 0) { validityBuffer.writerIndex(0); - offsetBuffer.writerIndex(0); + // Even when valueCount is 0, offset buffer should have offset[0] per Arrow spec + offsetBuffer.writerIndex(Math.min(OFFSET_WIDTH, offsetBuffer.capacity())); } else { validityBuffer.writerIndex(BitVectorHelper.getValidityBufferSizeFromCount(valueCount)); offsetBuffer.writerIndex((valueCount + 1) * OFFSET_WIDTH); diff --git a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java index ccc0d3e176..d8fa53a30f 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java @@ -1100,6 +1100,36 @@ public void testCopyValueSafeForExtensionType() throws Exception { } } + @Test + public void testNestedEmptyLargeListOffsetBuffer() { + // Test that nested LargeListVector properly allocates offset buffer + // even when nested writers are never invoked. According to Arrow spec, + // offset buffer must have N+1 entries. Even when N=0, it should contain [0]. + try (LargeListVector outerList = LargeListVector.empty("outer", allocator)) { + // Setup LargeList> + outerList.addOrGetVector(FieldType.nullable(MinorType.LARGELIST.getType())); + LargeListVector innerList = (LargeListVector) outerList.getDataVector(); + innerList.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + + // Allocate outer only - simulates case where inner is never written to + outerList.allocateNew(); + outerList.setValueCount(0); + + // Get field buffers - this is what IPC serialization uses + List innerBuffers = innerList.getFieldBuffers(); + + // Verify inner list offset buffer has at least OFFSET_WIDTH (8) bytes + assertTrue( + innerBuffers.get(1).readableBytes() >= LargeListVector.OFFSET_WIDTH, + "Inner LargeList offset buffer should have at least " + + LargeListVector.OFFSET_WIDTH + + " bytes for offset[0]"); + + // Verify offset[0] = 0 + assertEquals(0L, innerList.getOffsetBuffer().getLong(0)); + } + } + private void writeIntValues(UnionLargeListWriter writer, int[] values) { writer.startList(); for (int v : values) { diff --git a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index 1fe4c59f63..035bd14532 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -1379,6 +1379,45 @@ public void testCopyValueSafeForExtensionType() throws Exception { } } + @Test + public void testNestedEmptyListOffsetBuffer() { + // Test that 3-level nested ListVector properly allocates offset buffers + // even when nested writers are never invoked. According to Arrow spec, + // offset buffer must have N+1 entries. Even when N=0, it should contain [0]. + try (ListVector level0 = ListVector.empty("level0", allocator)) { + // Setup List>> - 3 levels + level0.addOrGetVector(FieldType.nullable(MinorType.LIST.getType())); + ListVector level1 = (ListVector) level0.getDataVector(); + level1.addOrGetVector(FieldType.nullable(MinorType.LIST.getType())); + ListVector level2 = (ListVector) level1.getDataVector(); + level2.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + + // Only allocate level0 - simulates case where all nested levels are empty + level0.allocateNew(); + level0.setValueCount(0); + + // Verify all levels have properly allocated offset buffers + List level1Buffers = level1.getFieldBuffers(); + List level2Buffers = level2.getFieldBuffers(); + + assertTrue( + level1Buffers.get(1).readableBytes() >= BaseRepeatedValueVector.OFFSET_WIDTH, + "Level1 offset buffer should have at least " + + BaseRepeatedValueVector.OFFSET_WIDTH + + " bytes for offset[0]"); + + assertTrue( + level2Buffers.get(1).readableBytes() >= BaseRepeatedValueVector.OFFSET_WIDTH, + "Level2 offset buffer should have at least " + + BaseRepeatedValueVector.OFFSET_WIDTH + + " bytes for offset[0]"); + + // Verify offset[0] = 0 for all levels + assertEquals(0, level1.getOffsetBuffer().getInt(0)); + assertEquals(0, level2.getOffsetBuffer().getInt(0)); + } + } + private void writeIntValues(UnionListWriter writer, int[] values) { writer.startList(); for (int v : values) { From 878c8d5d258cfb0de9cf8ad7aabde0c7d7d2c942 Mon Sep 17 00:00:00 2001 From: Yicong Huang Date: Tue, 20 Jan 2026 17:28:58 -0800 Subject: [PATCH 2/6] fix: resotre allocation size --- .../java/org/apache/arrow/vector/complex/LargeListVector.java | 3 +++ .../main/java/org/apache/arrow/vector/complex/ListVector.java | 3 +++ 2 files changed, 6 insertions(+) diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java index 29e930d889..08b2743f5a 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java @@ -280,7 +280,10 @@ public List getFieldBuffers() { // According to Arrow specification, offset buffer must have N+1 entries, // even when N=0, it should contain [0]. if (offsetBuffer.capacity() == 0) { + // Save and restore offsetAllocationSizeInBytes to avoid affecting subsequent allocateNew() + long savedOffsetAllocationSize = offsetAllocationSizeInBytes; offsetBuffer = allocateOffsetBuffer(OFFSET_WIDTH); + offsetAllocationSizeInBytes = savedOffsetAllocationSize; } setReaderAndWriterIndex(); diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 0890289e55..8c73b63a1a 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -238,7 +238,10 @@ public List getFieldBuffers() { // According to Arrow specification, offset buffer must have N+1 entries, // even when N=0, it should contain [0]. if (offsetBuffer.capacity() == 0) { + // Save and restore offsetAllocationSizeInBytes to avoid affecting subsequent allocateNew() + long savedOffsetAllocationSize = offsetAllocationSizeInBytes; offsetBuffer = allocateOffsetBuffer(OFFSET_WIDTH); + offsetAllocationSizeInBytes = savedOffsetAllocationSize; } setReaderAndWriterIndex(); From b0d39cdcf1bf996aa6a41c76553abb6a6196aeef Mon Sep 17 00:00:00 2001 From: Yicong Huang Date: Wed, 21 Jan 2026 11:18:18 -0800 Subject: [PATCH 3/6] fix: change write index explicitly --- .../arrow/vector/complex/LargeListVector.java | 26 +++++-------------- .../arrow/vector/complex/ListVector.java | 26 +++++-------------- 2 files changed, 12 insertions(+), 40 deletions(-) diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java index 08b2743f5a..bc069f49e3 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java @@ -275,21 +275,9 @@ public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers @Override public List getFieldBuffers() { List result = new ArrayList<>(2); - - // Ensure offset buffer has at least one entry for offset[0]. - // According to Arrow specification, offset buffer must have N+1 entries, - // even when N=0, it should contain [0]. - if (offsetBuffer.capacity() == 0) { - // Save and restore offsetAllocationSizeInBytes to avoid affecting subsequent allocateNew() - long savedOffsetAllocationSize = offsetAllocationSizeInBytes; - offsetBuffer = allocateOffsetBuffer(OFFSET_WIDTH); - offsetAllocationSizeInBytes = savedOffsetAllocationSize; - } - setReaderAndWriterIndex(); result.add(validityBuffer); result.add(offsetBuffer); - return result; } @@ -318,14 +306,12 @@ public void exportCDataBuffers(List buffers, ArrowBuf buffersPtr, long private void setReaderAndWriterIndex() { validityBuffer.readerIndex(0); offsetBuffer.readerIndex(0); - if (valueCount == 0) { - validityBuffer.writerIndex(0); - // Even when valueCount is 0, offset buffer should have offset[0] per Arrow spec - offsetBuffer.writerIndex(Math.min(OFFSET_WIDTH, offsetBuffer.capacity())); - } else { - validityBuffer.writerIndex(BitVectorHelper.getValidityBufferSizeFromCount(valueCount)); - offsetBuffer.writerIndex((valueCount + 1) * OFFSET_WIDTH); - } + validityBuffer.writerIndex(BitVectorHelper.getValidityBufferSizeFromCount(valueCount)); + // IPC serializer will determine readable bytes based on `readerIndex` and `writerIndex`. + // Both are set to 0 means 0 bytes are written to the IPC stream which will crash IPC readers + // in other libraries. According to Arrow spec, we should still output the offset buffer which + // is [0]. + offsetBuffer.writerIndex((long) (valueCount + 1) * OFFSET_WIDTH); } /** diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 8c73b63a1a..697c6831ae 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -233,21 +233,9 @@ public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers @Override public List getFieldBuffers() { List result = new ArrayList<>(2); - - // Ensure offset buffer has at least one entry for offset[0]. - // According to Arrow specification, offset buffer must have N+1 entries, - // even when N=0, it should contain [0]. - if (offsetBuffer.capacity() == 0) { - // Save and restore offsetAllocationSizeInBytes to avoid affecting subsequent allocateNew() - long savedOffsetAllocationSize = offsetAllocationSizeInBytes; - offsetBuffer = allocateOffsetBuffer(OFFSET_WIDTH); - offsetAllocationSizeInBytes = savedOffsetAllocationSize; - } - setReaderAndWriterIndex(); result.add(validityBuffer); result.add(offsetBuffer); - return result; } @@ -276,14 +264,12 @@ public void exportCDataBuffers(List buffers, ArrowBuf buffersPtr, long private void setReaderAndWriterIndex() { validityBuffer.readerIndex(0); offsetBuffer.readerIndex(0); - if (valueCount == 0) { - validityBuffer.writerIndex(0); - // Even when valueCount is 0, offset buffer should have offset[0] per Arrow spec - offsetBuffer.writerIndex(Math.min(OFFSET_WIDTH, offsetBuffer.capacity())); - } else { - validityBuffer.writerIndex(BitVectorHelper.getValidityBufferSizeFromCount(valueCount)); - offsetBuffer.writerIndex((valueCount + 1) * OFFSET_WIDTH); - } + validityBuffer.writerIndex(BitVectorHelper.getValidityBufferSizeFromCount(valueCount)); + // IPC serializer will determine readable bytes based on `readerIndex` and `writerIndex`. + // Both are set to 0 means 0 bytes are written to the IPC stream which will crash IPC readers + // in other libraries. According to Arrow spec, we should still output the offset buffer which + // is [0]. + offsetBuffer.writerIndex((long) (valueCount + 1) * OFFSET_WIDTH); } /** From 9841039f418bfa40d9e95125b87ea565c5527b01 Mon Sep 17 00:00:00 2001 From: Yicong Huang Date: Wed, 21 Jan 2026 13:25:39 -0800 Subject: [PATCH 4/6] fix: comment --- .../org/apache/arrow/vector/complex/LargeListVector.java | 7 ++++++- .../java/org/apache/arrow/vector/complex/ListVector.java | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java index bc069f49e3..92dd3eaef7 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java @@ -278,6 +278,7 @@ public List getFieldBuffers() { setReaderAndWriterIndex(); result.add(validityBuffer); result.add(offsetBuffer); + return result; } @@ -306,7 +307,11 @@ public void exportCDataBuffers(List buffers, ArrowBuf buffersPtr, long private void setReaderAndWriterIndex() { validityBuffer.readerIndex(0); offsetBuffer.readerIndex(0); - validityBuffer.writerIndex(BitVectorHelper.getValidityBufferSizeFromCount(valueCount)); + if (valueCount == 0) { + validityBuffer.writerIndex(0); + } else { + validityBuffer.writerIndex(BitVectorHelper.getValidityBufferSizeFromCount(valueCount)); + } // IPC serializer will determine readable bytes based on `readerIndex` and `writerIndex`. // Both are set to 0 means 0 bytes are written to the IPC stream which will crash IPC readers // in other libraries. According to Arrow spec, we should still output the offset buffer which diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 697c6831ae..6c3993df63 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -236,6 +236,7 @@ public List getFieldBuffers() { setReaderAndWriterIndex(); result.add(validityBuffer); result.add(offsetBuffer); + return result; } @@ -264,7 +265,11 @@ public void exportCDataBuffers(List buffers, ArrowBuf buffersPtr, long private void setReaderAndWriterIndex() { validityBuffer.readerIndex(0); offsetBuffer.readerIndex(0); - validityBuffer.writerIndex(BitVectorHelper.getValidityBufferSizeFromCount(valueCount)); + if (valueCount == 0) { + validityBuffer.writerIndex(0); + } else { + validityBuffer.writerIndex(BitVectorHelper.getValidityBufferSizeFromCount(valueCount)); + } // IPC serializer will determine readable bytes based on `readerIndex` and `writerIndex`. // Both are set to 0 means 0 bytes are written to the IPC stream which will crash IPC readers // in other libraries. According to Arrow spec, we should still output the offset buffer which From af5d01912f80229f0cff9054b76200046e0aebfc Mon Sep 17 00:00:00 2001 From: Yicong Huang Date: Wed, 21 Jan 2026 14:28:43 -0800 Subject: [PATCH 5/6] fix: allocateNew on inner arrays --- .../java/org/apache/arrow/vector/TestLargeListVector.java | 4 +++- .../test/java/org/apache/arrow/vector/TestListVector.java | 6 +++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java index d8fa53a30f..421547a0e3 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java @@ -1111,9 +1111,11 @@ public void testNestedEmptyLargeListOffsetBuffer() { LargeListVector innerList = (LargeListVector) outerList.getDataVector(); innerList.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); - // Allocate outer only - simulates case where inner is never written to + // Allocate both outer and inner - simulates case where inner is never written to outerList.allocateNew(); + innerList.allocateNew(); outerList.setValueCount(0); + innerList.setValueCount(0); // Get field buffers - this is what IPC serialization uses List innerBuffers = innerList.getFieldBuffers(); diff --git a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index 035bd14532..14e5aa78a8 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -1392,9 +1392,13 @@ public void testNestedEmptyListOffsetBuffer() { ListVector level2 = (ListVector) level1.getDataVector(); level2.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); - // Only allocate level0 - simulates case where all nested levels are empty + // Allocate all levels - simulates case where nested levels are never written to level0.allocateNew(); + level1.allocateNew(); + level2.allocateNew(); level0.setValueCount(0); + level1.setValueCount(0); + level2.setValueCount(0); // Verify all levels have properly allocated offset buffers List level1Buffers = level1.getFieldBuffers(); From 60cf20f8b4847b6faf647e70077fd3381340158f Mon Sep 17 00:00:00 2001 From: Yicong Huang Date: Wed, 21 Jan 2026 16:17:28 -0800 Subject: [PATCH 6/6] test: simplify tests --- .../arrow/vector/TestLargeListVector.java | 38 +++++--------- .../apache/arrow/vector/TestListVector.java | 49 +++++-------------- 2 files changed, 26 insertions(+), 61 deletions(-) diff --git a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java index 421547a0e3..bf9bba9c78 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java @@ -1101,34 +1101,22 @@ public void testCopyValueSafeForExtensionType() throws Exception { } @Test - public void testNestedEmptyLargeListOffsetBuffer() { - // Test that nested LargeListVector properly allocates offset buffer - // even when nested writers are never invoked. According to Arrow spec, - // offset buffer must have N+1 entries. Even when N=0, it should contain [0]. - try (LargeListVector outerList = LargeListVector.empty("outer", allocator)) { - // Setup LargeList> - outerList.addOrGetVector(FieldType.nullable(MinorType.LARGELIST.getType())); - LargeListVector innerList = (LargeListVector) outerList.getDataVector(); - innerList.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); - - // Allocate both outer and inner - simulates case where inner is never written to - outerList.allocateNew(); - innerList.allocateNew(); - outerList.setValueCount(0); - innerList.setValueCount(0); - - // Get field buffers - this is what IPC serialization uses - List innerBuffers = innerList.getFieldBuffers(); - - // Verify inner list offset buffer has at least OFFSET_WIDTH (8) bytes + public void testEmptyLargeListOffsetBuffer() { + // Test that LargeListVector has correct readableBytes after allocation. + // According to Arrow spec, offset buffer must have N+1 entries. + // Even when N=0, it should contain [0]. + try (LargeListVector list = LargeListVector.empty("list", allocator)) { + list.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + list.allocateNew(); + list.setValueCount(0); + + List buffers = list.getFieldBuffers(); assertTrue( - innerBuffers.get(1).readableBytes() >= LargeListVector.OFFSET_WIDTH, - "Inner LargeList offset buffer should have at least " + buffers.get(1).readableBytes() >= LargeListVector.OFFSET_WIDTH, + "Offset buffer should have at least " + LargeListVector.OFFSET_WIDTH + " bytes for offset[0]"); - - // Verify offset[0] = 0 - assertEquals(0L, innerList.getOffsetBuffer().getLong(0)); + assertEquals(0L, list.getOffsetBuffer().getLong(0)); } } diff --git a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index 14e5aa78a8..0c90b32abc 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -1380,45 +1380,22 @@ public void testCopyValueSafeForExtensionType() throws Exception { } @Test - public void testNestedEmptyListOffsetBuffer() { - // Test that 3-level nested ListVector properly allocates offset buffers - // even when nested writers are never invoked. According to Arrow spec, - // offset buffer must have N+1 entries. Even when N=0, it should contain [0]. - try (ListVector level0 = ListVector.empty("level0", allocator)) { - // Setup List>> - 3 levels - level0.addOrGetVector(FieldType.nullable(MinorType.LIST.getType())); - ListVector level1 = (ListVector) level0.getDataVector(); - level1.addOrGetVector(FieldType.nullable(MinorType.LIST.getType())); - ListVector level2 = (ListVector) level1.getDataVector(); - level2.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); - - // Allocate all levels - simulates case where nested levels are never written to - level0.allocateNew(); - level1.allocateNew(); - level2.allocateNew(); - level0.setValueCount(0); - level1.setValueCount(0); - level2.setValueCount(0); - - // Verify all levels have properly allocated offset buffers - List level1Buffers = level1.getFieldBuffers(); - List level2Buffers = level2.getFieldBuffers(); - - assertTrue( - level1Buffers.get(1).readableBytes() >= BaseRepeatedValueVector.OFFSET_WIDTH, - "Level1 offset buffer should have at least " - + BaseRepeatedValueVector.OFFSET_WIDTH - + " bytes for offset[0]"); - + public void testEmptyListOffsetBuffer() { + // Test that ListVector has correct readableBytes after allocation. + // According to Arrow spec, offset buffer must have N+1 entries. + // Even when N=0, it should contain [0]. + try (ListVector list = ListVector.empty("list", allocator)) { + list.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + list.allocateNew(); + list.setValueCount(0); + + List buffers = list.getFieldBuffers(); assertTrue( - level2Buffers.get(1).readableBytes() >= BaseRepeatedValueVector.OFFSET_WIDTH, - "Level2 offset buffer should have at least " + buffers.get(1).readableBytes() >= BaseRepeatedValueVector.OFFSET_WIDTH, + "Offset buffer should have at least " + BaseRepeatedValueVector.OFFSET_WIDTH + " bytes for offset[0]"); - - // Verify offset[0] = 0 for all levels - assertEquals(0, level1.getOffsetBuffer().getInt(0)); - assertEquals(0, level2.getOffsetBuffer().getInt(0)); + assertEquals(0, list.getOffsetBuffer().getInt(0)); } }