diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java index 997b5a8b7..92dd3eaef 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java @@ -309,11 +309,14 @@ private void setReaderAndWriterIndex() { offsetBuffer.readerIndex(0); if (valueCount == 0) { validityBuffer.writerIndex(0); - offsetBuffer.writerIndex(0); } else { validityBuffer.writerIndex(BitVectorHelper.getValidityBufferSizeFromCount(valueCount)); - offsetBuffer.writerIndex((valueCount + 1) * OFFSET_WIDTH); } + // IPC serializer will determine readable bytes based on `readerIndex` and `writerIndex`. + // Both are set to 0 means 0 bytes are written to the IPC stream which will crash IPC readers + // in other libraries. According to Arrow spec, we should still output the offset buffer which + // is [0]. + offsetBuffer.writerIndex((long) (valueCount + 1) * OFFSET_WIDTH); } /** diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 93a313ef4..6c3993df6 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -267,11 +267,14 @@ private void setReaderAndWriterIndex() { offsetBuffer.readerIndex(0); if (valueCount == 0) { validityBuffer.writerIndex(0); - offsetBuffer.writerIndex(0); } else { validityBuffer.writerIndex(BitVectorHelper.getValidityBufferSizeFromCount(valueCount)); - offsetBuffer.writerIndex((valueCount + 1) * OFFSET_WIDTH); } + // IPC serializer will determine readable bytes based on `readerIndex` and `writerIndex`. + // Both are set to 0 means 0 bytes are written to the IPC stream which will crash IPC readers + // in other libraries. According to Arrow spec, we should still output the offset buffer which + // is [0]. + offsetBuffer.writerIndex((long) (valueCount + 1) * OFFSET_WIDTH); } /** diff --git a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java index ccc0d3e17..bf9bba9c7 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java @@ -1100,6 +1100,26 @@ public void testCopyValueSafeForExtensionType() throws Exception { } } + @Test + public void testEmptyLargeListOffsetBuffer() { + // Test that LargeListVector has correct readableBytes after allocation. + // According to Arrow spec, offset buffer must have N+1 entries. + // Even when N=0, it should contain [0]. + try (LargeListVector list = LargeListVector.empty("list", allocator)) { + list.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + list.allocateNew(); + list.setValueCount(0); + + List buffers = list.getFieldBuffers(); + assertTrue( + buffers.get(1).readableBytes() >= LargeListVector.OFFSET_WIDTH, + "Offset buffer should have at least " + + LargeListVector.OFFSET_WIDTH + + " bytes for offset[0]"); + assertEquals(0L, list.getOffsetBuffer().getLong(0)); + } + } + private void writeIntValues(UnionLargeListWriter writer, int[] values) { writer.startList(); for (int v : values) { diff --git a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index 1fe4c59f6..0c90b32ab 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -1379,6 +1379,26 @@ public void testCopyValueSafeForExtensionType() throws Exception { } } + @Test + public void testEmptyListOffsetBuffer() { + // Test that ListVector has correct readableBytes after allocation. + // According to Arrow spec, offset buffer must have N+1 entries. + // Even when N=0, it should contain [0]. + try (ListVector list = ListVector.empty("list", allocator)) { + list.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + list.allocateNew(); + list.setValueCount(0); + + List buffers = list.getFieldBuffers(); + assertTrue( + buffers.get(1).readableBytes() >= BaseRepeatedValueVector.OFFSET_WIDTH, + "Offset buffer should have at least " + + BaseRepeatedValueVector.OFFSET_WIDTH + + " bytes for offset[0]"); + assertEquals(0, list.getOffsetBuffer().getInt(0)); + } + } + private void writeIntValues(UnionListWriter writer, int[] values) { writer.startList(); for (int v : values) {