From d4d92e4896d8108aef25c6ef199e87890d027b22 Mon Sep 17 00:00:00 2001 From: Vibhatha Lakmal Abeykoon Date: Thu, 1 Aug 2024 14:42:37 +0530 Subject: [PATCH] GH-41569: [Java] ListViewVector Implementation for UnionListViewReader (#43077) ### Rationale for this change This PR contains the multiple components which are mainly required to add the C Data interface for `ListViewVector`. This PR solves the following major issues associated with this exercise. #### What changes are included in this PR? - [x] https://github.com/apache/arrow/issues/41269 - [x] https://github.com/apache/arrow/issues/41270 Apart from that, the following features have also been added - [x] JSON Writer/Reader - [x] Complex Writer functionality ### Are these changes tested? Yes ### Are there any user-facing changes? Yes, we are introducing the usage of `listview` instead of `list`, `startListView` instead of `startList` and `endListView` instead of `endList` for `ListView` related APIs in building the `ListViewVector`. * GitHub Issue: #41569 Authored-by: Vibhatha Abeykoon Signed-off-by: David Li --- .../templates/AbstractFieldWriter.java | 22 + .../AbstractPromotableFieldWriter.java | 22 + .../main/codegen/templates/BaseWriter.java | 5 + .../main/codegen/templates/ComplexCopier.java | 7 + .../codegen/templates/DenseUnionWriter.java | 21 + .../templates/PromotableViewWriter.java | 167 +++ .../templates}/PromotableWriter.java | 142 +- .../main/codegen/templates/StructWriters.java | 28 + .../codegen/templates/UnionListWriter.java | 32 + .../main/codegen/templates/UnionReader.java | 13 + .../main/codegen/templates/UnionVector.java | 17 + .../codegen/templates/UnionViewWriter.java | 210 +++ .../main/codegen/templates/UnionWriter.java | 72 +- .../complex/AbstractContainerVector.java | 5 + .../complex/BaseRepeatedValueViewVector.java | 2 +- .../arrow/vector/complex/ListViewVector.java | 211 ++- .../complex/impl/ComplexWriterImpl.java | 40 + .../complex/impl/UnionListViewReader.java | 111 ++ .../arrow/vector/ipc/JsonFileReader.java | 8 +- .../arrow/vector/ipc/JsonFileWriter.java | 12 +- .../arrow/vector/TestListViewVector.java | 681 ++++++++-- .../apache/arrow/vector/TestValueVector.java | 33 + .../complex/writer/TestComplexWriter.java | 1170 ++++++++++++----- .../apache/arrow/vector/ipc/BaseFileTest.java | 16 +- .../testing/ValueVectorDataPopulator.java | 32 + 25 files changed, 2513 insertions(+), 566 deletions(-) create mode 100644 java/vector/src/main/codegen/templates/PromotableViewWriter.java rename java/vector/src/main/{java/org/apache/arrow/vector/complex/impl => codegen/templates}/PromotableWriter.java (81%) create mode 100644 java/vector/src/main/codegen/templates/UnionViewWriter.java create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListViewReader.java diff --git a/java/vector/src/main/codegen/templates/AbstractFieldWriter.java b/java/vector/src/main/codegen/templates/AbstractFieldWriter.java index 6c2368117f7c2..5ebfb6877fc5b 100644 --- a/java/vector/src/main/codegen/templates/AbstractFieldWriter.java +++ b/java/vector/src/main/codegen/templates/AbstractFieldWriter.java @@ -67,6 +67,16 @@ public void endList() { throw new IllegalStateException(String.format("You tried to end a list when you are using a ValueWriter of type %s.", this.getClass().getSimpleName())); } + @Override + public void startListView() { + throw new IllegalStateException(String.format("You tried to start a list view when you are using a ValueWriter of type %s.", this.getClass().getSimpleName())); + } + + @Override + public void endListView() { + throw new IllegalStateException(String.format("You tried to end a list view when you are using a ValueWriter of type %s.", this.getClass().getSimpleName())); + } + @Override public void startMap() { throw new IllegalStateException(String.format("You tried to start a map when you are using a ValueWriter of type %s.", this.getClass().getSimpleName())); @@ -184,6 +194,12 @@ public ListWriter list() { return null; } + @Override + public ListWriter listView() { + fail("ListView"); + return null; + } + @Override public MapWriter map() { fail("Map"); @@ -202,6 +218,12 @@ public ListWriter list(String name) { return null; } + @Override + public ListWriter listView(String name) { + fail("ListView"); + return null; + } + @Override public MapWriter map(String name) { fail("Map"); diff --git a/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java b/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java index 59f9fb5b8098d..06cb235f7dd99 100644 --- a/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java +++ b/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java @@ -76,6 +76,17 @@ public void endList() { setPosition(idx() + 1); } + @Override + public void startListView() { + getWriter(MinorType.LISTVIEW).startListView(); + } + + @Override + public void endListView() { + getWriter(MinorType.LISTVIEW).endListView(); + setPosition(idx() + 1); + } + @Override public void startMap() { getWriter(MinorType.MAP).startMap(); @@ -267,6 +278,11 @@ public ListWriter list() { return getWriter(MinorType.LIST).list(); } + @Override + public ListWriter listView() { + return getWriter(MinorType.LISTVIEW).listView(); + } + @Override public MapWriter map() { return getWriter(MinorType.LIST).map(); @@ -287,6 +303,11 @@ public ListWriter list(String name) { return getWriter(MinorType.STRUCT).list(name); } + @Override + public ListWriter listView(String name) { + return getWriter(MinorType.STRUCT).listView(name); + } + @Override public MapWriter map(String name) { return getWriter(MinorType.STRUCT).map(name); @@ -296,6 +317,7 @@ public MapWriter map(String name) { public MapWriter map(String name, boolean keysSorted) { return getWriter(MinorType.STRUCT).map(name, keysSorted); } + <#list vv.types as type><#list type.minor as minor> <#assign lowerName = minor.class?uncap_first /> <#if lowerName == "int" ><#assign lowerName = "integer" /> diff --git a/java/vector/src/main/codegen/templates/BaseWriter.java b/java/vector/src/main/codegen/templates/BaseWriter.java index 35df256b324b5..458a4df1eec82 100644 --- a/java/vector/src/main/codegen/templates/BaseWriter.java +++ b/java/vector/src/main/codegen/templates/BaseWriter.java @@ -62,6 +62,7 @@ public interface StructWriter extends BaseWriter { void copyReaderToField(String name, FieldReader reader); StructWriter struct(String name); ListWriter list(String name); + ListWriter listView(String name); MapWriter map(String name); MapWriter map(String name, boolean keysSorted); void start(); @@ -71,8 +72,11 @@ public interface StructWriter extends BaseWriter { public interface ListWriter extends BaseWriter { void startList(); void endList(); + void startListView(); + void endListView(); StructWriter struct(); ListWriter list(); + ListWriter listView(); MapWriter map(); MapWriter map(boolean keysSorted); void copyReader(FieldReader reader); @@ -106,6 +110,7 @@ public interface ComplexWriter { void copyReader(FieldReader reader); StructWriter rootAsStruct(); ListWriter rootAsList(); + ListWriter rootAsListView(); MapWriter rootAsMap(boolean keysSorted); void setPosition(int index); diff --git a/java/vector/src/main/codegen/templates/ComplexCopier.java b/java/vector/src/main/codegen/templates/ComplexCopier.java index 1a3ba940e7977..1eebba018b321 100644 --- a/java/vector/src/main/codegen/templates/ComplexCopier.java +++ b/java/vector/src/main/codegen/templates/ComplexCopier.java @@ -51,6 +51,7 @@ private static void writeValue(FieldReader reader, FieldWriter writer) { switch (mt) { case LIST: + case LISTVIEW: case LARGELIST: case FIXED_SIZE_LIST: if (reader.isSet()) { @@ -158,6 +159,8 @@ private static FieldWriter getStructWriterForReader(FieldReader reader, StructWr return (FieldWriter) writer.list(name); case MAP: return (FieldWriter) writer.map(name); + case LISTVIEW: + return (FieldWriter) writer.listView(name); default: throw new UnsupportedOperationException(reader.getMinorType().toString()); } @@ -180,6 +183,8 @@ private static FieldWriter getListWriterForReader(FieldReader reader, ListWriter case MAP: case NULL: return (FieldWriter) writer.list(); + case LISTVIEW: + return (FieldWriter) writer.listView(); default: throw new UnsupportedOperationException(reader.getMinorType().toString()); } @@ -201,6 +206,8 @@ private static FieldWriter getMapWriterForReader(FieldReader reader, MapWriter w case LIST: case NULL: return (FieldWriter) writer.list(); + case LISTVIEW: + return (FieldWriter) writer.listView(); case MAP: return (FieldWriter) writer.map(false); default: diff --git a/java/vector/src/main/codegen/templates/DenseUnionWriter.java b/java/vector/src/main/codegen/templates/DenseUnionWriter.java index e69a62a9e0f6f..8515b759e669e 100644 --- a/java/vector/src/main/codegen/templates/DenseUnionWriter.java +++ b/java/vector/src/main/codegen/templates/DenseUnionWriter.java @@ -83,6 +83,18 @@ public void endList() { getListWriter(typeId).endList(); } + @Override + public void startListView() { + byte typeId = data.getTypeId(idx()); + getListViewWriter(typeId).startList(); + } + + @Override + public void endListView() { + byte typeId = data.getTypeId(idx()); + getListViewWriter(typeId).endList(); + } + private StructWriter getStructWriter(byte typeId) { StructWriter structWriter = (StructWriter) writers[typeId]; if (structWriter == null) { @@ -106,6 +118,15 @@ private ListWriter getListWriter(byte typeId) { return listWriter; } + private ListWriter getListViewWriter(byte typeId) { + ListWriter listWriter = (ListWriter) writers[typeId]; + if (listWriter == null) { + listWriter = new UnionListViewWriter((ListViewVector) data.getVectorByType(typeId), nullableStructWriterFactory); + writers[typeId] = listWriter; + } + return listWriter; + } + public ListWriter asList(byte typeId) { data.setTypeId(idx(), typeId); return getListWriter(typeId); diff --git a/java/vector/src/main/codegen/templates/PromotableViewWriter.java b/java/vector/src/main/codegen/templates/PromotableViewWriter.java new file mode 100644 index 0000000000000..373abbe4b98f8 --- /dev/null +++ b/java/vector/src/main/codegen/templates/PromotableViewWriter.java @@ -0,0 +1,167 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +<@pp.dropOutputFile /> +<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/PromotableViewWriter.java" /> + +<#include "/@includes/license.ftl" /> + + package org.apache.arrow.vector.complex.impl; + +import java.util.Locale; +<#include "/@includes/vv_imports.ftl" /> + +/** + * This FieldWriter implementation delegates all FieldWriter API calls to an inner FieldWriter. This + * inner field writer can start as a specific type, and this class will promote the writer to a + * UnionWriter if a call is made that the specifically typed writer cannot handle. A new UnionVector + * is created, wrapping the original vector, and replaces the original vector in the parent vector, + * which can be either an AbstractStructVector or a ListViewVector. + * + *

The writer used can either be for single elements (struct) or lists. + */ +public class PromotableViewWriter extends PromotableWriter { + + public PromotableViewWriter(ValueVector v, FixedSizeListVector fixedListVector) { + super(v, fixedListVector); + } + + public PromotableViewWriter(ValueVector v, FixedSizeListVector fixedListVector, + NullableStructWriterFactory nullableStructWriterFactory) { + super(v, fixedListVector, nullableStructWriterFactory); + } + + public PromotableViewWriter(ValueVector v, LargeListVector largeListVector) { + super(v, largeListVector); + } + + public PromotableViewWriter(ValueVector v, LargeListVector largeListVector, + NullableStructWriterFactory nullableStructWriterFactory) { + super(v, largeListVector, nullableStructWriterFactory); + } + + public PromotableViewWriter(ValueVector v, ListVector listVector) { + super(v, listVector); + } + + public PromotableViewWriter(ValueVector v, ListVector listVector, + NullableStructWriterFactory nullableStructWriterFactory) { + super(v, listVector, nullableStructWriterFactory); + } + + public PromotableViewWriter(ValueVector v, ListViewVector listViewVector, + NullableStructWriterFactory nullableStructWriterFactory) { + super(v, listViewVector, nullableStructWriterFactory); + } + + public PromotableViewWriter(ValueVector v, AbstractStructVector parentContainer) { + super(v, parentContainer); + } + + public PromotableViewWriter(ValueVector v, AbstractStructVector parentContainer, + NullableStructWriterFactory nullableStructWriterFactory) { + super(v, parentContainer, nullableStructWriterFactory); + } + + @Override + protected FieldWriter getWriter(MinorType type, ArrowType arrowType) { + if (state == State.UNION) { + if (requiresArrowType(type)) { + writer = ((UnionWriter) writer).toViewWriter(); + ((UnionViewWriter) writer).getWriter(type, arrowType); + } else { + writer = ((UnionWriter) writer).toViewWriter(); + ((UnionViewWriter) writer).getWriter(type); + } + } else if (state == State.UNTYPED) { + if (type == null) { + // ??? + return null; + } + if (arrowType == null) { + arrowType = type.getType(); + } + FieldType fieldType = new FieldType(addVectorAsNullable, arrowType, null, null); + ValueVector v; + if (listVector != null) { + v = listVector.addOrGetVector(fieldType).getVector(); + } else if (fixedListVector != null) { + v = fixedListVector.addOrGetVector(fieldType).getVector(); + } else if (listViewVector != null) { + v = listViewVector.addOrGetVector(fieldType).getVector(); + } else { + v = largeListVector.addOrGetVector(fieldType).getVector(); + } + v.allocateNew(); + setWriter(v); + writer.setPosition(position); + } else if (type != this.type) { + promoteToUnion(); + if (requiresArrowType(type)) { + writer = ((UnionWriter) writer).toViewWriter(); + ((UnionViewWriter) writer).getWriter(type, arrowType); + } else { + writer = ((UnionWriter) writer).toViewWriter(); + ((UnionViewWriter) writer).getWriter(type); + } + } + return writer; + } + + @Override + public StructWriter struct() { + return getWriter(MinorType.LISTVIEW).struct(); + } + + <#list vv.types as type><#list type.minor as minor> + <#assign lowerName = minor.class?uncap_first /> + <#if lowerName == "int" ><#assign lowerName = "integer" /> + <#assign upperName = minor.class?upper_case /> + <#assign capName = minor.class?cap_first /> + + @Override + public ${capName}Writer ${lowerName}() { + return getWriter(MinorType.LISTVIEW).${lowerName}(); + } + + + + @Override + public void allocate() { + getWriter().allocate(); + } + + @Override + public void clear() { + getWriter().clear(); + } + + @Override + public Field getField() { + return getWriter().getField(); + } + + @Override + public int getValueCapacity() { + return getWriter().getValueCapacity(); + } + + @Override + public void close() throws Exception { + getWriter().close(); + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java b/java/vector/src/main/codegen/templates/PromotableWriter.java similarity index 81% rename from java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java rename to java/vector/src/main/codegen/templates/PromotableWriter.java index 7fd0def967388..82bd3c5345cdd 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java +++ b/java/vector/src/main/codegen/templates/PromotableWriter.java @@ -14,32 +14,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +<@pp.dropOutputFile /> +<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/PromotableWriter.java" /> + +<#include "/@includes/license.ftl" /> + package org.apache.arrow.vector.complex.impl; -import java.math.BigDecimal; -import java.nio.ByteBuffer; import java.util.Locale; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.NullVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.complex.AbstractStructVector; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.complex.LargeListVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.ListViewVector; -import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.UnionVector; -import org.apache.arrow.vector.complex.writer.FieldWriter; -import org.apache.arrow.vector.holders.Decimal256Holder; -import org.apache.arrow.vector.holders.DecimalHolder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.Text; -import org.apache.arrow.vector.util.TransferPair; +<#include "/@includes/vv_imports.ftl" /> /** * This FieldWriter implementation delegates all FieldWriter API calls to an inner FieldWriter. This @@ -52,27 +36,27 @@ */ public class PromotableWriter extends AbstractPromotableFieldWriter { - private final AbstractStructVector parentContainer; - private final ListVector listVector; - private final ListViewVector listViewVector; - private final FixedSizeListVector fixedListVector; - private final LargeListVector largeListVector; - private final NullableStructWriterFactory nullableStructWriterFactory; - private int position; - private static final int MAX_DECIMAL_PRECISION = 38; - private static final int MAX_DECIMAL256_PRECISION = 76; - - private enum State { + protected final AbstractStructVector parentContainer; + protected final ListVector listVector; + protected final ListViewVector listViewVector; + protected final FixedSizeListVector fixedListVector; + protected final LargeListVector largeListVector; + protected final NullableStructWriterFactory nullableStructWriterFactory; + protected int position; + protected static final int MAX_DECIMAL_PRECISION = 38; + protected static final int MAX_DECIMAL256_PRECISION = 76; + + protected enum State { UNTYPED, SINGLE, UNION } - private MinorType type; - private ValueVector vector; - private UnionVector unionVector; - private State state; - private FieldWriter writer; + protected MinorType type; + protected ValueVector vector; + protected UnionVector unionVector; + protected State state; + protected FieldWriter writer; /** * Constructs a new instance. @@ -234,7 +218,7 @@ public void setAddVectorAsNullable(boolean nullable) { } } - private void setWriter(ValueVector v) { + protected void setWriter(ValueVector v) { state = State.SINGLE; vector = v; type = v.getMinorType(); @@ -245,6 +229,9 @@ private void setWriter(ValueVector v) { case LIST: writer = new UnionListWriter((ListVector) vector, nullableStructWriterFactory); break; + case LISTVIEW: + writer = new UnionListViewWriter((ListViewVector) vector, nullableStructWriterFactory); + break; case MAP: writer = new UnionMapWriter((MapVector) vector); break; @@ -277,7 +264,7 @@ public void setPosition(int index) { } } - private boolean requiresArrowType(MinorType type) { + protected boolean requiresArrowType(MinorType type) { return type == MinorType.DECIMAL || type == MinorType.MAP || type == MinorType.DURATION @@ -336,7 +323,7 @@ protected FieldWriter getWriter() { return writer; } - private FieldWriter promoteToUnion() { + protected FieldWriter promoteToUnion() { String name = vector.getField().getName(); TransferPair tp = vector.getTransferPair( @@ -369,76 +356,76 @@ private FieldWriter promoteToUnion() { @Override public void write(DecimalHolder holder) { getWriter( - MinorType.DECIMAL, - new ArrowType.Decimal(MAX_DECIMAL_PRECISION, holder.scale, /*bitWidth=*/ 128)) + MinorType.DECIMAL, + new ArrowType.Decimal(MAX_DECIMAL_PRECISION, holder.scale, /*bitWidth=*/ 128)) .write(holder); } @Override public void writeDecimal(long start, ArrowBuf buffer, ArrowType arrowType) { getWriter( - MinorType.DECIMAL, - new ArrowType.Decimal( - MAX_DECIMAL_PRECISION, - ((ArrowType.Decimal) arrowType).getScale(), - /*bitWidth=*/ 128)) + MinorType.DECIMAL, + new ArrowType.Decimal( + MAX_DECIMAL_PRECISION, + ((ArrowType.Decimal) arrowType).getScale(), + /*bitWidth=*/ 128)) .writeDecimal(start, buffer, arrowType); } @Override public void writeDecimal(BigDecimal value) { getWriter( - MinorType.DECIMAL, - new ArrowType.Decimal(MAX_DECIMAL_PRECISION, value.scale(), /*bitWidth=*/ 128)) + MinorType.DECIMAL, + new ArrowType.Decimal(MAX_DECIMAL_PRECISION, value.scale(), /*bitWidth=*/ 128)) .writeDecimal(value); } @Override public void writeBigEndianBytesToDecimal(byte[] value, ArrowType arrowType) { getWriter( - MinorType.DECIMAL, - new ArrowType.Decimal( - MAX_DECIMAL_PRECISION, - ((ArrowType.Decimal) arrowType).getScale(), - /*bitWidth=*/ 128)) + MinorType.DECIMAL, + new ArrowType.Decimal( + MAX_DECIMAL_PRECISION, + ((ArrowType.Decimal) arrowType).getScale(), + /*bitWidth=*/ 128)) .writeBigEndianBytesToDecimal(value, arrowType); } @Override public void write(Decimal256Holder holder) { getWriter( - MinorType.DECIMAL256, - new ArrowType.Decimal(MAX_DECIMAL256_PRECISION, holder.scale, /*bitWidth=*/ 256)) + MinorType.DECIMAL256, + new ArrowType.Decimal(MAX_DECIMAL256_PRECISION, holder.scale, /*bitWidth=*/ 256)) .write(holder); } @Override public void writeDecimal256(long start, ArrowBuf buffer, ArrowType arrowType) { getWriter( - MinorType.DECIMAL256, - new ArrowType.Decimal( - MAX_DECIMAL256_PRECISION, - ((ArrowType.Decimal) arrowType).getScale(), - /*bitWidth=*/ 256)) + MinorType.DECIMAL256, + new ArrowType.Decimal( + MAX_DECIMAL256_PRECISION, + ((ArrowType.Decimal) arrowType).getScale(), + /*bitWidth=*/ 256)) .writeDecimal256(start, buffer, arrowType); } @Override public void writeDecimal256(BigDecimal value) { getWriter( - MinorType.DECIMAL256, - new ArrowType.Decimal(MAX_DECIMAL256_PRECISION, value.scale(), /*bitWidth=*/ 256)) + MinorType.DECIMAL256, + new ArrowType.Decimal(MAX_DECIMAL256_PRECISION, value.scale(), /*bitWidth=*/ 256)) .writeDecimal256(value); } @Override public void writeBigEndianBytesToDecimal256(byte[] value, ArrowType arrowType) { getWriter( - MinorType.DECIMAL256, - new ArrowType.Decimal( - MAX_DECIMAL256_PRECISION, - ((ArrowType.Decimal) arrowType).getScale(), - /*bitWidth=*/ 256)) + MinorType.DECIMAL256, + new ArrowType.Decimal( + MAX_DECIMAL256_PRECISION, + ((ArrowType.Decimal) arrowType).getScale(), + /*bitWidth=*/ 256)) .writeBigEndianBytesToDecimal256(value, arrowType); } @@ -526,4 +513,19 @@ public int getValueCapacity() { public void close() throws Exception { getWriter().close(); } + + /** + * Convert the writer to a PromotableViewWriter. + * + * @return The writer as a PromotableViewWriter. + */ + public PromotableViewWriter toViewWriter() { + PromotableViewWriter promotableViewWriter = new PromotableViewWriter(unionVector, parentContainer, nullableStructWriterFactory); + promotableViewWriter.position = position; + promotableViewWriter.writer = writer; + promotableViewWriter.state = state; + promotableViewWriter.unionVector = unionVector; + promotableViewWriter.type = MinorType.LISTVIEW; + return promotableViewWriter; + } } diff --git a/java/vector/src/main/codegen/templates/StructWriters.java b/java/vector/src/main/codegen/templates/StructWriters.java index b676173ac39d9..3e6258a0c6c0e 100644 --- a/java/vector/src/main/codegen/templates/StructWriters.java +++ b/java/vector/src/main/codegen/templates/StructWriters.java @@ -69,6 +69,9 @@ public class ${mode}StructWriter extends AbstractFieldWriter { case LIST: list(child.getName()); break; + case LISTVIEW: + listView(child.getName()); + break; case MAP: { ArrowType.Map arrowType = (ArrowType.Map) child.getType(); map(child.getName(), arrowType.getKeysSorted()); @@ -200,6 +203,31 @@ public ListWriter list(String name) { return writer; } + @Override + public ListWriter listView(String name) { + String finalName = handleCase(name); + FieldWriter writer = fields.get(finalName); + int vectorCount = container.size(); + if(writer == null) { + FieldType fieldType = new FieldType(addVectorAsNullable, MinorType.LISTVIEW.getType(), null, null); + writer = new PromotableViewWriter(container.addOrGet(name, fieldType, ListViewVector.class), container, getNullableStructWriterFactory()); + if (container.size() > vectorCount) { + writer.allocate(); + } + writer.setPosition(idx()); + fields.put(finalName, writer); + } else { + if (writer instanceof PromotableViewWriter) { + // ensure writers are initialized + ((PromotableViewWriter) writer).getWriter(MinorType.LISTVIEW); + } else { + writer = ((PromotableWriter) writer).toViewWriter(); + ((PromotableViewWriter) writer).getWriter(MinorType.LISTVIEW); + } + } + return writer; + } + @Override public MapWriter map(String name) { return map(name, false); diff --git a/java/vector/src/main/codegen/templates/UnionListWriter.java b/java/vector/src/main/codegen/templates/UnionListWriter.java index eeb964c055f71..e40c70eaffdc1 100644 --- a/java/vector/src/main/codegen/templates/UnionListWriter.java +++ b/java/vector/src/main/codegen/templates/UnionListWriter.java @@ -69,7 +69,11 @@ public class Union${listName}Writer extends AbstractFieldWriter { public Union${listName}Writer(${listName}Vector vector, NullableStructWriterFactory nullableStructWriterFactory) { this.vector = vector; + <#if listName = "ListView"> + this.writer = new PromotableViewWriter(vector.getDataVector(), vector, nullableStructWriterFactory); + <#else> this.writer = new PromotableWriter(vector.getDataVector(), vector, nullableStructWriterFactory); + } public Union${listName}Writer(${listName}Vector vector, AbstractFieldWriter parent) { @@ -154,6 +158,17 @@ public ListWriter list(String name) { return listWriter; } + @Override + public ListWriter listView() { + return writer; + } + + @Override + public ListWriter listView(String name) { + ListWriter listWriter = writer.listView(name); + return listWriter; + } + @Override public StructWriter struct(String name) { StructWriter structWriter = writer.struct(name); @@ -215,6 +230,23 @@ public void endList() { setPosition(idx() + 1); listStarted = false; } + + public void startListView() { + vector.startNewValue(idx()); + writer.setPosition(vector.getOffsetBuffer().getInt((idx()) * OFFSET_WIDTH)); + listStarted = true; + } + + @Override + public void endListView() { + int sizeUptoIdx = 0; + for (int i = 0; i < idx(); i++) { + sizeUptoIdx += vector.getSizeBuffer().getInt(i * SIZE_WIDTH); + } + vector.getSizeBuffer().setInt(idx() * SIZE_WIDTH, writer.idx() - sizeUptoIdx); + setPosition(idx() + 1); + listStarted = false; + } <#else> @Override public void startList() { diff --git a/java/vector/src/main/codegen/templates/UnionReader.java b/java/vector/src/main/codegen/templates/UnionReader.java index 243bd832255c2..615ea3a536a15 100644 --- a/java/vector/src/main/codegen/templates/UnionReader.java +++ b/java/vector/src/main/codegen/templates/UnionReader.java @@ -91,6 +91,8 @@ private FieldReader getReaderForIndex(int index) { return (FieldReader) getStruct(); case LIST: return (FieldReader) getList(); + case LISTVIEW: + return (FieldReader) getListView(); case MAP: return (FieldReader) getMap(); <#list vv.types as type> @@ -130,6 +132,17 @@ private FieldReader getList() { return listReader; } + private UnionListViewReader listViewReader; + + private FieldReader getListView() { + if (listViewReader == null) { + listViewReader = new UnionListViewReader(data.getListView()); + listViewReader.setPosition(idx()); + readers[MinorType.LISTVIEW.ordinal()] = listViewReader; + } + return listViewReader; + } + private UnionMapReader mapReader; private FieldReader getMap() { diff --git a/java/vector/src/main/codegen/templates/UnionVector.java b/java/vector/src/main/codegen/templates/UnionVector.java index ea79c5c2fba76..e0fd0e4644313 100644 --- a/java/vector/src/main/codegen/templates/UnionVector.java +++ b/java/vector/src/main/codegen/templates/UnionVector.java @@ -94,6 +94,7 @@ public class UnionVector extends AbstractContainerVector implements FieldVector private StructVector structVector; private ListVector listVector; + private ListViewVector listViewVector; private MapVector mapVector; private FieldReader reader; @@ -335,6 +336,20 @@ public ListVector getList() { return listVector; } + public ListViewVector getListView() { + if (listViewVector == null) { + int vectorCount = internalStruct.size(); + listViewVector = addOrGet(MinorType.LISTVIEW, ListViewVector.class); + if (internalStruct.size() > vectorCount) { + listViewVector.allocateNew(); + if (callBack != null) { + callBack.doWork(); + } + } + } + return listViewVector; + } + public MapVector getMap() { if (mapVector == null) { throw new IllegalArgumentException("No map present. Provide ArrowType argument to create a new vector"); @@ -702,6 +717,8 @@ public ValueVector getVectorByType(int typeId, ArrowType arrowType) { return getStruct(); case LIST: return getList(); + case LISTVIEW: + return getListView(); case MAP: return getMap(name, arrowType); default: diff --git a/java/vector/src/main/codegen/templates/UnionViewWriter.java b/java/vector/src/main/codegen/templates/UnionViewWriter.java new file mode 100644 index 0000000000000..7b834d8b6cd86 --- /dev/null +++ b/java/vector/src/main/codegen/templates/UnionViewWriter.java @@ -0,0 +1,210 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +<@pp.dropOutputFile /> +<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/UnionViewWriter.java" /> + +package org.apache.arrow.vector.complex.impl; + +<#include "/@includes/vv_imports.ftl" /> +import org.apache.arrow.vector.complex.writer.BaseWriter; +import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.util.Preconditions; +import org.apache.arrow.vector.complex.impl.NullableStructWriterFactory; +import org.apache.arrow.vector.types.Types; + +<#function is_timestamp_tz type> + <#return type?starts_with("TimeStamp") && type?ends_with("TZ")> + + +/* + * This class is generated using freemarker and the ${.template_name} template. + */ +@SuppressWarnings("unused") +public class UnionViewWriter extends UnionWriter { + + public UnionViewWriter(UnionVector vector) { + this(vector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance()); + } + + public UnionViewWriter(UnionVector vector, NullableStructWriterFactory nullableStructWriterFactory) { + super(vector, nullableStructWriterFactory); + } + + @Override + public StructWriter struct() { + data.setType(idx(), MinorType.LISTVIEW); + getListWriter().setPosition(idx()); + return getListWriter().struct(); + } + + <#list vv.types as type> + <#list type.minor as minor> + <#assign name = minor.class?cap_first /> + <#assign fields = minor.fields!type.fields /> + <#assign uncappedName = name?uncap_first/> + <#assign friendlyType = (minor.friendlyType!minor.boxedType!type.boxedType) /> + <#if !minor.typeParams?? || minor.class?starts_with("Decimal") || is_timestamp_tz(minor.class) || minor.class == "Duration" || minor.class == "FixedSizeBinary"> + + private ${name}Writer ${name?uncap_first}Writer; + + <#if minor.class?starts_with("Decimal") || is_timestamp_tz(minor.class) || minor.class == "Duration" || minor.class == "FixedSizeBinary"> + private ${name}Writer get${name}Writer(ArrowType arrowType) { + if (${uncappedName}Writer == null) { + ${uncappedName}Writer = new ${name}WriterImpl(data.get${name}Vector(arrowType)); + ${uncappedName}Writer.setPosition(idx()); + writers.add(${uncappedName}Writer); + } + return ${uncappedName}Writer; + } + + public ${name}Writer as${name}(ArrowType arrowType) { + data.setType(idx(), MinorType.${name?upper_case}); + return get${name}Writer(arrowType); + } + <#else> + private ${name}Writer get${name}Writer() { + if (${uncappedName}Writer == null) { + ${uncappedName}Writer = new ${name}WriterImpl(data.get${name}Vector()); + ${uncappedName}Writer.setPosition(idx()); + writers.add(${uncappedName}Writer); + } + return ${uncappedName}Writer; + } + + public ${name}Writer as${name}() { + data.setType(idx(), MinorType.${name?upper_case}); + return get${name}Writer(); + } + + + @Override + public void write(${name}Holder holder) { + data.setType(idx(), MinorType.${name?upper_case}); + <#if minor.class?starts_with("Decimal")> + ArrowType arrowType = new ArrowType.Decimal(holder.precision, holder.scale, ${name}Holder.WIDTH * 8); + get${name}Writer(arrowType).setPosition(idx()); + get${name}Writer(arrowType).write${name}(<#list fields as field>holder.${field.name}<#if field_has_next>, , arrowType); + <#elseif is_timestamp_tz(minor.class)> + ArrowType.Timestamp arrowTypeWithoutTz = (ArrowType.Timestamp) MinorType.${name?upper_case?remove_ending("TZ")}.getType(); + ArrowType arrowType = new ArrowType.Timestamp(arrowTypeWithoutTz.getUnit(), holder.timezone); + get${name}Writer(arrowType).setPosition(idx()); + get${name}Writer(arrowType).write(holder); + <#elseif minor.class == "Duration"> + ArrowType arrowType = new ArrowType.Duration(holder.unit); + get${name}Writer(arrowType).setPosition(idx()); + get${name}Writer(arrowType).write(holder); + <#elseif minor.class == "FixedSizeBinary"> + ArrowType arrowType = new ArrowType.FixedSizeBinary(holder.byteWidth); + get${name}Writer(arrowType).setPosition(idx()); + get${name}Writer(arrowType).write(holder); + <#else> + get${name}Writer().setPosition(idx()); + get${name}Writer().write${name}(<#list fields as field>holder.${field.name}<#if field_has_next>, ); + + } + + public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, <#if minor.class?starts_with("Decimal")>, ArrowType arrowType) { + data.setType(idx(), MinorType.${name?upper_case}); + <#if minor.class?starts_with("Decimal")> + get${name}Writer(arrowType).setPosition(idx()); + get${name}Writer(arrowType).write${name}(<#list fields as field>${field.name}<#if field_has_next>, , arrowType); + <#elseif is_timestamp_tz(minor.class)> + ArrowType.Timestamp arrowTypeWithoutTz = (ArrowType.Timestamp) MinorType.${name?upper_case?remove_ending("TZ")}.getType(); + ArrowType arrowType = new ArrowType.Timestamp(arrowTypeWithoutTz.getUnit(), "UTC"); + get${name}Writer(arrowType).setPosition(idx()); + get${name}Writer(arrowType).write${name}(<#list fields as field>${field.name}<#if field_has_next>, ); + <#elseif minor.class == "Duration" || minor.class == "FixedSizeBinary"> + // This is expected to throw. There's nothing more that we can do here since we can't infer any + // sort of default unit for the Duration or a default width for the FixedSizeBinary types. + ArrowType arrowType = MinorType.${name?upper_case}.getType(); + get${name}Writer(arrowType).setPosition(idx()); + get${name}Writer(arrowType).write${name}(<#list fields as field>${field.name}<#if field_has_next>, ); + <#else> + get${name}Writer().setPosition(idx()); + get${name}Writer().write${name}(<#list fields as field>${field.name}<#if field_has_next>, ); + + } + <#if minor.class?starts_with("Decimal")> + public void write${name}(${friendlyType} value) { + data.setType(idx(), MinorType.${name?upper_case}); + ArrowType arrowType = new ArrowType.Decimal(value.precision(), value.scale(), ${name}Vector.TYPE_WIDTH * 8); + get${name}Writer(arrowType).setPosition(idx()); + get${name}Writer(arrowType).write${name}(value); + } + + public void writeBigEndianBytesTo${name}(byte[] value, ArrowType arrowType) { + data.setType(idx(), MinorType.${name?upper_case}); + get${name}Writer(arrowType).setPosition(idx()); + get${name}Writer(arrowType).writeBigEndianBytesTo${name}(value, arrowType); + } + <#elseif minor.class?ends_with("VarBinary")> + @Override + public void write${minor.class}(byte[] value) { + get${name}Writer().setPosition(idx()); + get${name}Writer().write${minor.class}(value); + } + + @Override + public void write${minor.class}(byte[] value, int offset, int length) { + get${name}Writer().setPosition(idx()); + get${name}Writer().write${minor.class}(value, offset, length); + } + + @Override + public void write${minor.class}(ByteBuffer value) { + get${name}Writer().setPosition(idx()); + get${name}Writer().write${minor.class}(value); + } + + @Override + public void write${minor.class}(ByteBuffer value, int offset, int length) { + get${name}Writer().setPosition(idx()); + get${name}Writer().write${minor.class}(value, offset, length); + } + <#elseif minor.class?ends_with("VarChar")> + @Override + public void write${minor.class}(${friendlyType} value) { + get${name}Writer().setPosition(idx()); + get${name}Writer().write${minor.class}(value); + } + + @Override + public void write${minor.class}(String value) { + get${name}Writer().setPosition(idx()); + get${name}Writer().write${minor.class}(value); + } + + + + + + <#list vv.types as type><#list type.minor as minor> + <#assign lowerName = minor.class?uncap_first /> + <#if lowerName == "int" ><#assign lowerName = "integer" /> + <#assign upperName = minor.class?upper_case /> + <#assign capName = minor.class?cap_first /> + <#if !minor.typeParams?? || minor.class?starts_with("Decimal") || is_timestamp_tz(minor.class) || minor.class == "Duration" || minor.class == "FixedSizeBinary"> + + @Override + public ${capName}Writer ${lowerName}() { + data.setType(idx(), MinorType.LISTVIEW); + getListViewWriter().setPosition(idx()); + return getListViewWriter().${lowerName}(); + } + + +} diff --git a/java/vector/src/main/codegen/templates/UnionWriter.java b/java/vector/src/main/codegen/templates/UnionWriter.java index 08dbf24324b17..bfe97e2770553 100644 --- a/java/vector/src/main/codegen/templates/UnionWriter.java +++ b/java/vector/src/main/codegen/templates/UnionWriter.java @@ -42,12 +42,13 @@ @SuppressWarnings("unused") public class UnionWriter extends AbstractFieldWriter implements FieldWriter { - UnionVector data; - private StructWriter structWriter; - private UnionListWriter listWriter; - private UnionMapWriter mapWriter; - private List writers = new java.util.ArrayList<>(); - private final NullableStructWriterFactory nullableStructWriterFactory; + protected UnionVector data; + protected StructWriter structWriter; + protected UnionListWriter listWriter; + protected UnionListViewWriter listViewWriter; + protected UnionMapWriter mapWriter; + protected List writers = new java.util.ArrayList<>(); + protected final NullableStructWriterFactory nullableStructWriterFactory; public UnionWriter(UnionVector vector) { this(vector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance()); @@ -58,6 +59,22 @@ public UnionWriter(UnionVector vector, NullableStructWriterFactory nullableStruc this.nullableStructWriterFactory = nullableStructWriterFactory; } + /** + * Convert the UnionWriter to a UnionViewWriter. + * + * @return the converted UnionViewWriter + */ + public UnionViewWriter toViewWriter() { + UnionViewWriter unionViewWriter = new UnionViewWriter(data, nullableStructWriterFactory); + unionViewWriter.structWriter = structWriter; + unionViewWriter.listWriter = listWriter; + unionViewWriter.listViewWriter = listViewWriter; + unionViewWriter.mapWriter = mapWriter; + unionViewWriter.writers = writers; + unionViewWriter.setPosition(this.getPosition()); + return unionViewWriter; + } + @Override public void setPosition(int index) { super.setPosition(index); @@ -89,6 +106,17 @@ public void endList() { getListWriter().endList(); } + @Override + public void startListView() { + getListViewWriter().startListView(); + data.setType(idx(), MinorType.LISTVIEW); + } + + @Override + public void endListView() { + getListViewWriter().endListView(); + } + @Override public void startMap() { getMapWriter().startMap(); @@ -134,7 +162,7 @@ public StructWriter asStruct() { return getStructWriter(); } - private ListWriter getListWriter() { + protected ListWriter getListWriter() { if (listWriter == null) { listWriter = new UnionListWriter(data.getList(), nullableStructWriterFactory); listWriter.setPosition(idx()); @@ -143,11 +171,25 @@ private ListWriter getListWriter() { return listWriter; } + protected ListWriter getListViewWriter() { + if (listViewWriter == null) { + listViewWriter = new UnionListViewWriter(data.getListView(), nullableStructWriterFactory); + listViewWriter.setPosition(idx()); + writers.add(listViewWriter); + } + return listViewWriter; + } + public ListWriter asList() { data.setType(idx(), MinorType.LIST); return getListWriter(); } + public ListWriter asListView() { + data.setType(idx(), MinorType.LISTVIEW); + return getListViewWriter(); + } + private MapWriter getMapWriter() { if (mapWriter == null) { mapWriter = new UnionMapWriter(data.getMap(new ArrowType.Map(false))); @@ -181,6 +223,8 @@ BaseWriter getWriter(MinorType minorType, ArrowType arrowType) { return getStructWriter(); case LIST: return getListWriter(); + case LISTVIEW: + return getListViewWriter(); case MAP: return getMapWriter(arrowType); <#list vv.types as type> @@ -367,6 +411,20 @@ public ListWriter list(String name) { return getStructWriter().list(name); } + @Override + public ListWriter listView() { + data.setType(idx(), MinorType.LISTVIEW); + getListViewWriter().setPosition(idx()); + return getListViewWriter().listView(); + } + + @Override + public ListWriter listView(String name) { + data.setType(idx(), MinorType.STRUCT); + getStructWriter().setPosition(idx()); + return getStructWriter().listView(name); + } + @Override public StructWriter struct(String name) { data.setType(idx(), MinorType.STRUCT); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java index 0cefbe4004b82..a6a71cf1a4190 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java @@ -25,6 +25,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList; import org.apache.arrow.vector.types.pojo.ArrowType.List; +import org.apache.arrow.vector.types.pojo.ArrowType.ListView; import org.apache.arrow.vector.types.pojo.ArrowType.Struct; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.CallBack; @@ -123,6 +124,10 @@ public ListVector addOrGetList(String name) { return addOrGet(name, FieldType.nullable(new List()), ListVector.class); } + public ListViewVector addOrGetListView(String name) { + return addOrGet(name, FieldType.nullable(new ListView()), ListViewVector.class); + } + public UnionVector addOrGetUnion(String name) { return addOrGet(name, FieldType.nullable(MinorType.UNION.getType()), UnionVector.class); } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java index 0040d12811258..031cc8037bb8b 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java @@ -101,7 +101,7 @@ private void allocateBuffers() { sizeBuffer = allocateBuffers(sizeAllocationSizeInBytes); } - private ArrowBuf allocateBuffers(final long size) { + protected ArrowBuf allocateBuffers(final long size) { final int curSize = (int) size; ArrowBuf buffer = allocator.buffer(curSize); buffer.readerIndex(0); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java index 864d08a661cd2..d719c9b1a9a4e 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -32,16 +32,20 @@ import org.apache.arrow.memory.util.ByteFunctionHelpers; import org.apache.arrow.memory.util.CommonUtil; import org.apache.arrow.memory.util.hash.ArrowBufHasher; +import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.AddOrGetResult; import org.apache.arrow.vector.BitVectorHelper; import org.apache.arrow.vector.BufferBacked; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.ValueIterableVector; import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.ZeroVector; import org.apache.arrow.vector.compare.VectorVisitor; -import org.apache.arrow.vector.complex.impl.UnionListReader; +import org.apache.arrow.vector.complex.impl.ComplexCopier; +import org.apache.arrow.vector.complex.impl.UnionListViewReader; import org.apache.arrow.vector.complex.impl.UnionListViewWriter; import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.complex.writer.FieldWriter; import org.apache.arrow.vector.ipc.message.ArrowFieldNode; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType; @@ -73,7 +77,7 @@ public class ListViewVector extends BaseRepeatedValueViewVector implements PromotableVector, ValueIterableVector> { protected ArrowBuf validityBuffer; - protected UnionListReader reader; + protected UnionListViewReader reader; private CallBack callBack; protected Field field; protected int validityAllocationSizeInBytes; @@ -245,7 +249,9 @@ public List getFieldBuffers() { */ @Override public void exportCDataBuffers(List buffers, ArrowBuf buffersPtr, long nullValue) { - throw new UnsupportedOperationException("exportCDataBuffers Not implemented yet"); + exportBuffer(validityBuffer, buffers, buffersPtr, nullValue, true); + exportBuffer(offsetBuffer, buffers, buffersPtr, nullValue, true); + exportBuffer(sizeBuffer, buffers, buffersPtr, nullValue, true); } @Override @@ -330,16 +336,22 @@ private long getNewAllocationSize(int currentBufferCapacity) { @Override public void copyFromSafe(int inIndex, int outIndex, ValueVector from) { - // TODO: https://github.com/apache/arrow/issues/41270 - throw new UnsupportedOperationException( - "ListViewVector does not support copyFromSafe operation yet."); + copyFrom(inIndex, outIndex, from); + } + + @Override + public OUT accept(VectorVisitor visitor, IN value) { + throw new UnsupportedOperationException("ListViewVector does not support visitor pattern."); } @Override public void copyFrom(int inIndex, int outIndex, ValueVector from) { - // TODO: https://github.com/apache/arrow/issues/41270 - throw new UnsupportedOperationException( - "ListViewVector does not support copyFrom operation yet."); + Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); + FieldReader in = from.getReader(); + in.setPosition(inIndex); + FieldWriter out = getWriter(); + out.setPosition(outIndex); + ComplexCopier.copy(in, out); } @Override @@ -359,23 +371,17 @@ public TransferPair getTransferPair(Field field, BufferAllocator allocator) { @Override public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { - // TODO: https://github.com/apache/arrow/issues/41269 - throw new UnsupportedOperationException( - "ListVector does not support getTransferPair(String, BufferAllocator, CallBack) yet"); + return new TransferImpl(ref, allocator, callBack); } @Override public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { - // TODO: https://github.com/apache/arrow/issues/41269 - throw new UnsupportedOperationException( - "ListVector does not support getTransferPair(Field, BufferAllocator, CallBack) yet"); + return new TransferImpl(field, allocator, callBack); } @Override public TransferPair makeTransferPair(ValueVector target) { - // TODO: https://github.com/apache/arrow/issues/41269 - throw new UnsupportedOperationException( - "ListVector does not support makeTransferPair(ValueVector) yet"); + return new TransferImpl((ListViewVector) target); } @Override @@ -448,23 +454,172 @@ public int hashCode(int index, ArrowBufHasher hasher) { return hash; } - @Override - public OUT accept(VectorVisitor visitor, IN value) { - throw new UnsupportedOperationException(); + private class TransferImpl implements TransferPair { + + ListViewVector to; + TransferPair dataTransferPair; + + public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) { + this(new ListViewVector(name, allocator, field.getFieldType(), callBack)); + } + + public TransferImpl(Field field, BufferAllocator allocator, CallBack callBack) { + this(new ListViewVector(field, allocator, callBack)); + } + + public TransferImpl(ListViewVector to) { + this.to = to; + to.addOrGetVector(vector.getField().getFieldType()); + if (to.getDataVector() instanceof ZeroVector) { + to.addOrGetVector(vector.getField().getFieldType()); + } + dataTransferPair = getDataVector().makeTransferPair(to.getDataVector()); + } + + @Override + public void transfer() { + to.clear(); + dataTransferPair.transfer(); + to.validityBuffer = transferBuffer(validityBuffer, to.allocator); + to.offsetBuffer = transferBuffer(offsetBuffer, to.allocator); + to.sizeBuffer = transferBuffer(sizeBuffer, to.allocator); + if (valueCount > 0) { + to.setValueCount(valueCount); + } + clear(); + } + + @Override + public void splitAndTransfer(int startIndex, int length) { + Preconditions.checkArgument( + startIndex >= 0 && length >= 0 && startIndex + length <= valueCount, + "Invalid parameters startIndex: %s, length: %s for valueCount: %s", + startIndex, + length, + valueCount); + to.clear(); + if (length > 0) { + final int startPoint = offsetBuffer.getInt((long) startIndex * OFFSET_WIDTH); + // we have to scan by index since there are out-of-order offsets + to.offsetBuffer = to.allocateBuffers((long) length * OFFSET_WIDTH); + to.sizeBuffer = to.allocateBuffers((long) length * SIZE_WIDTH); + + /* splitAndTransfer the size buffer */ + int maxOffsetAndSizeSum = -1; + int minOffsetValue = -1; + for (int i = 0; i < length; i++) { + final int offsetValue = offsetBuffer.getInt((long) (startIndex + i) * OFFSET_WIDTH); + final int sizeValue = sizeBuffer.getInt((long) (startIndex + i) * SIZE_WIDTH); + to.sizeBuffer.setInt((long) i * SIZE_WIDTH, sizeValue); + if (maxOffsetAndSizeSum < offsetValue + sizeValue) { + maxOffsetAndSizeSum = offsetValue + sizeValue; + } + if (minOffsetValue == -1 || minOffsetValue > offsetValue) { + minOffsetValue = offsetValue; + } + } + + /* splitAndTransfer the offset buffer */ + for (int i = 0; i < length; i++) { + final int offsetValue = offsetBuffer.getInt((long) (startIndex + i) * OFFSET_WIDTH); + final int relativeOffset = offsetValue - minOffsetValue; + to.offsetBuffer.setInt((long) i * OFFSET_WIDTH, relativeOffset); + } + + /* splitAndTransfer the validity buffer */ + splitAndTransferValidityBuffer(startIndex, length, to); + + /* splitAndTransfer the data buffer */ + final int childSliceLength = maxOffsetAndSizeSum - minOffsetValue; + dataTransferPair.splitAndTransfer(minOffsetValue, childSliceLength); + to.setValueCount(length); + } + } + + /* + * transfer the validity. + */ + private void splitAndTransferValidityBuffer(int startIndex, int length, ListViewVector target) { + int firstByteSource = BitVectorHelper.byteIndex(startIndex); + int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1); + int byteSizeTarget = getValidityBufferSizeFromCount(length); + int offset = startIndex % 8; + + if (length > 0) { + if (offset == 0) { + // slice + if (target.validityBuffer != null) { + target.validityBuffer.getReferenceManager().release(); + } + target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); + target.validityBuffer.getReferenceManager().retain(1); + } else { + /* Copy data + * When the first bit starts from the middle of a byte (offset != 0), + * copy data from src BitVector. + * Each byte in the target is composed by a part in i-th byte, + * another part in (i+1)-th byte. + */ + target.allocateValidityBuffer(byteSizeTarget); + + for (int i = 0; i < byteSizeTarget - 1; i++) { + byte b1 = + BitVectorHelper.getBitsFromCurrentByte(validityBuffer, firstByteSource + i, offset); + byte b2 = + BitVectorHelper.getBitsFromNextByte( + validityBuffer, firstByteSource + i + 1, offset); + + target.validityBuffer.setByte(i, (b1 + b2)); + } + + /* Copying the last piece is done in the following manner: + * if the source vector has 1 or more bytes remaining, we copy + * the last piece as a byte formed by shifting data + * from the current byte and the next byte. + * + * if the source vector has no more bytes remaining + * (we are at the last byte), we copy the last piece as a byte + * by shifting data from the current byte. + */ + if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) { + byte b1 = + BitVectorHelper.getBitsFromCurrentByte( + validityBuffer, firstByteSource + byteSizeTarget - 1, offset); + byte b2 = + BitVectorHelper.getBitsFromNextByte( + validityBuffer, firstByteSource + byteSizeTarget, offset); + + target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2); + } else { + byte b1 = + BitVectorHelper.getBitsFromCurrentByte( + validityBuffer, firstByteSource + byteSizeTarget - 1, offset); + target.validityBuffer.setByte(byteSizeTarget - 1, b1); + } + } + } + } + + @Override + public ValueVector getTo() { + return to; + } + + @Override + public void copyValueSafe(int from, int to) { + this.to.copyFrom(from, to, ListViewVector.this); + } } @Override protected FieldReader getReaderImpl() { - // TODO: https://github.com/apache/arrow/issues/41569 - throw new UnsupportedOperationException( - "ListViewVector does not support getReaderImpl operation yet."); + return new UnionListViewReader(this); } @Override - public UnionListReader getReader() { - // TODO: https://github.com/apache/arrow/issues/41569 - throw new UnsupportedOperationException( - "ListViewVector does not support getReader operation yet."); + public UnionListViewReader getReader() { + reader = (UnionListViewReader) super.getReader(); + return reader; } /** diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java index 453f3ebb0c6e9..f3e48aa050e30 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java @@ -18,6 +18,7 @@ import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.ListViewVector; import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.NonNullableStructVector; import org.apache.arrow.vector.complex.StateTool; @@ -30,6 +31,7 @@ public class ComplexWriterImpl extends AbstractFieldWriter implements ComplexWri private NullableStructWriter structRoot; private UnionListWriter listRoot; + private UnionListViewWriter listViewRoot; private UnionMapWriter mapRoot; private final NonNullableStructVector container; @@ -42,6 +44,7 @@ private enum Mode { INIT, STRUCT, LIST, + LISTVIEW, MAP } @@ -99,6 +102,9 @@ public void close() throws Exception { if (listRoot != null) { listRoot.close(); } + if (listViewRoot != null) { + listViewRoot.close(); + } } @Override @@ -110,6 +116,9 @@ public void clear() { case LIST: listRoot.clear(); break; + case LISTVIEW: + listViewRoot.clear(); + break; case MAP: mapRoot.clear(); break; @@ -127,6 +136,9 @@ public void setValueCount(int count) { case LIST: listRoot.setValueCount(count); break; + case LISTVIEW: + listViewRoot.setValueCount(count); + break; case MAP: mapRoot.setValueCount(count); break; @@ -145,6 +157,9 @@ public void setPosition(int index) { case LIST: listRoot.setPosition(index); break; + case LISTVIEW: + listViewRoot.setPosition(index); + break; case MAP: mapRoot.setPosition(index); break; @@ -232,6 +247,31 @@ public ListWriter rootAsList() { return listRoot; } + @Override + public ListWriter rootAsListView() { + switch (mode) { + case INIT: + int vectorCount = container.size(); + // TODO allow dictionaries in complex types + ListViewVector listVector = container.addOrGetListView(name); + if (container.size() > vectorCount) { + listVector.allocateNew(); + } + listViewRoot = new UnionListViewWriter(listVector, nullableStructWriterFactory); + listViewRoot.setPosition(idx()); + mode = Mode.LISTVIEW; + break; + + case LISTVIEW: + break; + + default: + check(Mode.INIT, Mode.STRUCT); + } + + return listViewRoot; + } + @Override public MapWriter rootAsMap(boolean keysSorted) { switch (mode) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListViewReader.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListViewReader.java new file mode 100644 index 0000000000000..17ac1150fd412 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListViewReader.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.complex.impl; + +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.complex.BaseRepeatedValueViewVector; +import org.apache.arrow.vector.complex.ListViewVector; +import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.holders.UnionHolder; +import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.Field; + +/** {@link FieldReader} for listview of union types. */ +public class UnionListViewReader extends AbstractFieldReader { + + private final ListViewVector vector; + private final ValueVector data; + private int currentOffset; + private int size; + + /** + * Constructor for UnionListViewReader. + * + * @param vector the vector to read from + */ + public UnionListViewReader(ListViewVector vector) { + this.vector = vector; + this.data = vector.getDataVector(); + } + + @Override + public Field getField() { + return vector.getField(); + } + + @Override + public boolean isSet() { + return !vector.isNull(idx()); + } + + @Override + public void setPosition(int index) { + super.setPosition(index); + if (vector.getOffsetBuffer().capacity() == 0) { + currentOffset = 0; + size = 0; + } else { + currentOffset = + vector.getOffsetBuffer().getInt(index * (long) BaseRepeatedValueViewVector.OFFSET_WIDTH); + size = vector.getSizeBuffer().getInt(index * (long) BaseRepeatedValueViewVector.SIZE_WIDTH); + } + } + + @Override + public FieldReader reader() { + return data.getReader(); + } + + @Override + public Object readObject() { + return vector.getObject(idx()); + } + + @Override + public MinorType getMinorType() { + return MinorType.LISTVIEW; + } + + @Override + public void read(int index, UnionHolder holder) { + setPosition(idx()); + for (int i = -1; i < index; i++) { + next(); + } + holder.reader = data.getReader(); + holder.isSet = data.getReader().isSet() ? 1 : 0; + } + + @Override + public int size() { + return Math.max(size, 0); + } + + @Override + public boolean next() { + // Here, the currentOffSet keeps track of the current position in the vector inside the list at + // set position. + // And, size keeps track of the elements count in the list, so to make sure we traverse + // the full list, we need to check if the currentOffset is less than the currentOffset + size + if (currentOffset < currentOffset + size) { + data.getReader().setPosition(currentOffset++); + return true; + } else { + return false; + } + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java index 604f18b56b5c7..626619a9483de 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java @@ -22,6 +22,7 @@ import static com.fasterxml.jackson.core.JsonToken.START_OBJECT; import static org.apache.arrow.vector.BufferLayout.BufferType.DATA; import static org.apache.arrow.vector.BufferLayout.BufferType.OFFSET; +import static org.apache.arrow.vector.BufferLayout.BufferType.SIZE; import static org.apache.arrow.vector.BufferLayout.BufferType.TYPE; import static org.apache.arrow.vector.BufferLayout.BufferType.VALIDITY; import static org.apache.arrow.vector.BufferLayout.BufferType.VARIADIC_DATA_BUFFERS; @@ -72,6 +73,7 @@ import org.apache.arrow.vector.ipc.message.ArrowFieldNode; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.ArrowType.ListView; import org.apache.arrow.vector.types.pojo.ArrowType.Union; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.Schema; @@ -724,7 +726,7 @@ private List readIntoBuffer( if (bufferType.equals(VALIDITY)) { reader = helper.BIT; - } else if (bufferType.equals(OFFSET)) { + } else if (bufferType.equals(OFFSET) || bufferType.equals(SIZE)) { if (type == MinorType.LARGELIST || type == MinorType.LARGEVARCHAR || type == MinorType.LARGEVARBINARY) { @@ -888,8 +890,8 @@ private void readFromJsonIntoVector(Field field, FieldVector vector) throws IOEx BufferType bufferType = vectorTypes.get(v); nextFieldIs(bufferType.getName()); int innerBufferValueCount = valueCount; - if (bufferType.equals(OFFSET) && !(type instanceof Union)) { - /* offset buffer has 1 additional value capacity except for dense unions */ + if (bufferType.equals(OFFSET) && !(type instanceof Union) && !(type instanceof ListView)) { + /* offset buffer has 1 additional value capacity except for dense unions and ListView */ innerBufferValueCount = valueCount + 1; } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java index d1ee890f5c596..929c8c97c0551 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java @@ -73,6 +73,7 @@ import org.apache.arrow.vector.UInt4Vector; import org.apache.arrow.vector.UInt8Vector; import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.complex.BaseRepeatedValueViewVector; import org.apache.arrow.vector.dictionary.Dictionary; import org.apache.arrow.vector.dictionary.DictionaryProvider; import org.apache.arrow.vector.types.Types.MinorType; @@ -229,7 +230,9 @@ private void writeFromVectorIntoJson(Field field, FieldVector vector) throws IOE // thus the values are only written to a single entity. generator.writeArrayFieldStart(bufferType.getName()); final int bufferValueCount = - (bufferType.equals(OFFSET) && vector.getMinorType() != MinorType.DENSEUNION) + (bufferType.equals(OFFSET) + && vector.getMinorType() != MinorType.DENSEUNION + && vector.getMinorType() != MinorType.LISTVIEW) ? valueCount + 1 : valueCount; for (int i = 0; i < bufferValueCount; i++) { @@ -259,6 +262,7 @@ private void writeFromVectorIntoJson(Field field, FieldVector vector) throws IOE } else if (bufferType.equals(OFFSET) && vector.getValueCount() == 0 && (vector.getMinorType() == MinorType.LIST + || vector.getMinorType() == MinorType.LISTVIEW || vector.getMinorType() == MinorType.MAP || vector.getMinorType() == MinorType.VARBINARY || vector.getMinorType() == MinorType.VARCHAR)) { @@ -419,6 +423,10 @@ private void writeValueToGenerator( case MAP: generator.writeNumber(buffer.getInt((long) index * BaseVariableWidthVector.OFFSET_WIDTH)); break; + case LISTVIEW: + generator.writeNumber( + buffer.getInt((long) index * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + break; case LARGELIST: case LARGEVARBINARY: case LARGEVARCHAR: @@ -573,6 +581,8 @@ private void writeValueToGenerator( default: throw new UnsupportedOperationException("minor type: " + vector.getMinorType()); } + } else if (bufferType.equals(SIZE)) { + generator.writeNumber(buffer.getInt((long) index * BaseRepeatedValueViewVector.SIZE_WIDTH)); } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java index 1a58b65e3be4a..4fa808c18aece 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java @@ -32,7 +32,6 @@ import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.ListViewVector; import org.apache.arrow.vector.complex.impl.UnionListViewWriter; -import org.apache.arrow.vector.complex.impl.UnionListWriter; import org.apache.arrow.vector.holders.DurationHolder; import org.apache.arrow.vector.holders.TimeStampMilliTZHolder; import org.apache.arrow.vector.types.TimeUnit; @@ -40,6 +39,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.TransferPair; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -68,40 +68,40 @@ public void testBasicListViewVector() { /* write the first list at index 0 */ listViewWriter.setPosition(0); - listViewWriter.startList(); + listViewWriter.startListView(); listViewWriter.bigInt().writeBigInt(12); listViewWriter.bigInt().writeBigInt(-7); listViewWriter.bigInt().writeBigInt(25); - listViewWriter.endList(); + listViewWriter.endListView(); /* the second list at index 1 is null (we are not setting any)*/ /* write the third list at index 2 */ listViewWriter.setPosition(2); - listViewWriter.startList(); + listViewWriter.startListView(); listViewWriter.bigInt().writeBigInt(0); listViewWriter.bigInt().writeBigInt(-127); listViewWriter.bigInt().writeBigInt(127); listViewWriter.bigInt().writeBigInt(50); - listViewWriter.endList(); + listViewWriter.endListView(); /* write the fourth list at index 3 (empty list) */ listViewWriter.setPosition(3); - listViewWriter.startList(); - listViewWriter.endList(); + listViewWriter.startListView(); + listViewWriter.endListView(); /* write the fifth list at index 4 */ listViewWriter.setPosition(4); - listViewWriter.startList(); + listViewWriter.startListView(); listViewWriter.bigInt().writeBigInt(1); listViewWriter.bigInt().writeBigInt(2); listViewWriter.bigInt().writeBigInt(3); listViewWriter.bigInt().writeBigInt(4); - listViewWriter.endList(); + listViewWriter.endListView(); - listViewVector.setValueCount(5); + listViewWriter.setValueCount(5); // check value count assertEquals(5, listViewVector.getValueCount()); @@ -158,7 +158,7 @@ public void testImplicitNullVectors() { listViewWriter.bigInt().writeBigInt(12); listViewWriter.bigInt().writeBigInt(-7); listViewWriter.bigInt().writeBigInt(25); - listViewWriter.endList(); + listViewWriter.endListView(); int offSet0 = offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH); int size0 = sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH); @@ -172,7 +172,7 @@ public void testImplicitNullVectors() { assertEquals(3, size0); listViewWriter.setPosition(5); - listViewWriter.startList(); + listViewWriter.startListView(); // writing the 6th list at index 5, // and the list items from index 1 through 4 are not populated. @@ -192,7 +192,7 @@ public void testImplicitNullVectors() { listViewWriter.bigInt().writeBigInt(12); listViewWriter.bigInt().writeBigInt(25); - listViewWriter.endList(); + listViewWriter.endListView(); int offSet5 = offSetBuffer.getInt(5 * BaseRepeatedValueViewVector.OFFSET_WIDTH); int size5 = sizeBuffer.getInt(5 * BaseRepeatedValueViewVector.SIZE_WIDTH); @@ -201,7 +201,7 @@ public void testImplicitNullVectors() { assertEquals(2, size5); listViewWriter.setPosition(10); - listViewWriter.startList(); + listViewWriter.startListView(); // writing the 11th list at index 10, // and the list items from index 6 through 10 are not populated. @@ -219,7 +219,7 @@ public void testImplicitNullVectors() { } listViewWriter.bigInt().writeBigInt(12); - listViewWriter.endList(); + listViewWriter.endListView(); int offSet11 = offSetBuffer.getInt(10 * BaseRepeatedValueViewVector.OFFSET_WIDTH); int size11 = sizeBuffer.getInt(10 * BaseRepeatedValueViewVector.SIZE_WIDTH); @@ -247,43 +247,43 @@ public void testNestedListViewVector() { /* write one or more inner lists at index 0 */ listViewWriter.setPosition(0); - listViewWriter.startList(); + listViewWriter.startListView(); - listViewWriter.list().startList(); - listViewWriter.list().bigInt().writeBigInt(50); - listViewWriter.list().bigInt().writeBigInt(100); - listViewWriter.list().bigInt().writeBigInt(200); - listViewWriter.list().endList(); + listViewWriter.listView().startListView(); + listViewWriter.listView().bigInt().writeBigInt(50); + listViewWriter.listView().bigInt().writeBigInt(100); + listViewWriter.listView().bigInt().writeBigInt(200); + listViewWriter.listView().endListView(); - listViewWriter.list().startList(); - listViewWriter.list().bigInt().writeBigInt(75); - listViewWriter.list().bigInt().writeBigInt(125); - listViewWriter.list().bigInt().writeBigInt(150); - listViewWriter.list().bigInt().writeBigInt(175); - listViewWriter.list().endList(); + listViewWriter.listView().startListView(); + listViewWriter.listView().bigInt().writeBigInt(75); + listViewWriter.listView().bigInt().writeBigInt(125); + listViewWriter.listView().bigInt().writeBigInt(150); + listViewWriter.listView().bigInt().writeBigInt(175); + listViewWriter.listView().endListView(); - listViewWriter.endList(); + listViewWriter.endListView(); /* write one or more inner lists at index 1 */ listViewWriter.setPosition(1); - listViewWriter.startList(); + listViewWriter.startListView(); - listViewWriter.list().startList(); - listViewWriter.list().bigInt().writeBigInt(10); - listViewWriter.list().endList(); + listViewWriter.listView().startListView(); + listViewWriter.listView().bigInt().writeBigInt(10); + listViewWriter.listView().endListView(); - listViewWriter.list().startList(); - listViewWriter.list().bigInt().writeBigInt(15); - listViewWriter.list().bigInt().writeBigInt(20); - listViewWriter.list().endList(); + listViewWriter.listView().startListView(); + listViewWriter.listView().bigInt().writeBigInt(15); + listViewWriter.listView().bigInt().writeBigInt(20); + listViewWriter.listView().endListView(); - listViewWriter.list().startList(); - listViewWriter.list().bigInt().writeBigInt(25); - listViewWriter.list().bigInt().writeBigInt(30); - listViewWriter.list().bigInt().writeBigInt(35); - listViewWriter.list().endList(); + listViewWriter.listView().startListView(); + listViewWriter.listView().bigInt().writeBigInt(25); + listViewWriter.listView().bigInt().writeBigInt(30); + listViewWriter.listView().bigInt().writeBigInt(35); + listViewWriter.listView().endListView(); - listViewWriter.endList(); + listViewWriter.endListView(); listViewVector.setValueCount(2); @@ -392,8 +392,8 @@ private void setValuesInBuffer(int[] bufValues, ArrowBuf buffer, long bufWidth) /* * Setting up the buffers directly needs to be validated with the base method used in - * the ListVector class where we use the approach of startList(), - * write to the child vector and endList(). + * the ListVector class where we use the approach of startListView(), + * write to the child vector and endListView(). *

* To support this, we have to consider the following scenarios; *

@@ -499,7 +499,7 @@ public void testBasicListViewSetNested() { listViewVector.allocateNew(); // Initialize the child vector using `initializeChildrenFromFields` method. - FieldType fieldType = new FieldType(true, new ArrowType.List(), null, null); + FieldType fieldType = new FieldType(true, new ArrowType.ListView(), null, null); FieldType childFieldType = new FieldType(true, new ArrowType.Int(64, true), null, null); Field childField = new Field("child-vector", childFieldType, null); List children = new ArrayList<>(); @@ -511,52 +511,52 @@ public void testBasicListViewSetNested() { FieldVector fieldVector = listViewVector.getDataVector(); fieldVector.clear(); - ListVector childVector = (ListVector) fieldVector; - UnionListWriter listWriter = childVector.getWriter(); - listWriter.allocate(); + ListViewVector childVector = (ListViewVector) fieldVector; + UnionListViewWriter listViewWriter = childVector.getWriter(); + listViewWriter.allocate(); - listWriter.setPosition(0); - listWriter.startList(); + listViewWriter.setPosition(0); + listViewWriter.startListView(); - listWriter.bigInt().writeBigInt(50); - listWriter.bigInt().writeBigInt(100); - listWriter.bigInt().writeBigInt(200); + listViewWriter.bigInt().writeBigInt(50); + listViewWriter.bigInt().writeBigInt(100); + listViewWriter.bigInt().writeBigInt(200); - listWriter.endList(); + listViewWriter.endListView(); - listWriter.setPosition(1); - listWriter.startList(); + listViewWriter.setPosition(1); + listViewWriter.startListView(); - listWriter.bigInt().writeBigInt(75); - listWriter.bigInt().writeBigInt(125); - listWriter.bigInt().writeBigInt(150); - listWriter.bigInt().writeBigInt(175); + listViewWriter.bigInt().writeBigInt(75); + listViewWriter.bigInt().writeBigInt(125); + listViewWriter.bigInt().writeBigInt(150); + listViewWriter.bigInt().writeBigInt(175); - listWriter.endList(); + listViewWriter.endListView(); - listWriter.setPosition(2); - listWriter.startList(); + listViewWriter.setPosition(2); + listViewWriter.startListView(); - listWriter.bigInt().writeBigInt(10); + listViewWriter.bigInt().writeBigInt(10); - listWriter.endList(); + listViewWriter.endListView(); - listWriter.startList(); - listWriter.setPosition(3); + listViewWriter.startListView(); + listViewWriter.setPosition(3); - listWriter.bigInt().writeBigInt(15); - listWriter.bigInt().writeBigInt(20); + listViewWriter.bigInt().writeBigInt(15); + listViewWriter.bigInt().writeBigInt(20); - listWriter.endList(); + listViewWriter.endListView(); - listWriter.startList(); - listWriter.setPosition(4); + listViewWriter.startListView(); + listViewWriter.setPosition(4); - listWriter.bigInt().writeBigInt(25); - listWriter.bigInt().writeBigInt(30); - listWriter.bigInt().writeBigInt(35); + listViewWriter.bigInt().writeBigInt(25); + listViewWriter.bigInt().writeBigInt(30); + listViewWriter.bigInt().writeBigInt(35); - listWriter.endList(); + listViewWriter.endListView(); childVector.setValueCount(5); @@ -713,12 +713,12 @@ public void testBasicListViewSetWithListViewWriter() { UnionListViewWriter listViewWriter = listViewVector.getWriter(); listViewWriter.setPosition(4); - listViewWriter.startList(); + listViewWriter.startListView(); listViewWriter.bigInt().writeBigInt(121); listViewWriter.bigInt().writeBigInt(-71); listViewWriter.bigInt().writeBigInt(251); - listViewWriter.endList(); + listViewWriter.endListView(); listViewVector.setValueCount(5); @@ -762,17 +762,17 @@ public void testGetBufferAddress() throws Exception { listViewWriter.allocate(); listViewWriter.setPosition(0); - listViewWriter.startList(); + listViewWriter.startListView(); listViewWriter.bigInt().writeBigInt(50); listViewWriter.bigInt().writeBigInt(100); listViewWriter.bigInt().writeBigInt(200); - listViewWriter.endList(); + listViewWriter.endListView(); listViewWriter.setPosition(1); - listViewWriter.startList(); + listViewWriter.startListView(); listViewWriter.bigInt().writeBigInt(250); listViewWriter.bigInt().writeBigInt(300); - listViewWriter.endList(); + listViewWriter.endListView(); listViewVector.setValueCount(2); @@ -919,10 +919,10 @@ public void testWriterGetField() { writer.allocate(); // set some values - writer.startList(); + writer.startListView(); writer.integer().writeInt(1); writer.integer().writeInt(2); - writer.endList(); + writer.endListView(); vector.setValueCount(2); Field expectedDataField = @@ -951,7 +951,7 @@ public void testWriterUsingHolderGetTimestampMilliTZField() { TimeStampMilliTZHolder holder = new TimeStampMilliTZHolder(); holder.timezone = "SomeFakeTimeZone"; - writer.startList(); + writer.startListView(); holder.value = 12341234L; writer.timeStampMilliTZ().write(holder); holder.value = 55555L; @@ -967,7 +967,7 @@ public void testWriterUsingHolderGetTimestampMilliTZField() { "holder.timezone: AsdfTimeZone not equal to vector timezone: SomeFakeTimeZone", ex.getMessage()); - writer.endList(); + writer.endListView(); vector.setValueCount(1); Field expectedDataField = @@ -997,7 +997,7 @@ public void testWriterGetDurationField() { DurationHolder durationHolder = new DurationHolder(); durationHolder.unit = TimeUnit.MILLISECOND; - writer.startList(); + writer.startListView(); durationHolder.value = 812374L; writer.duration().write(durationHolder); durationHolder.value = 143451L; @@ -1011,7 +1011,7 @@ public void testWriterGetDurationField() { IllegalArgumentException.class, () -> writer.duration().write(durationHolder)); assertEquals("holder.unit: SECOND not equal to vector unit: MILLISECOND", ex.getMessage()); - writer.endList(); + writer.endListView(); vector.setValueCount(1); Field expectedDataField = @@ -1039,10 +1039,10 @@ public void testClose() throws Exception { writer.allocate(); // set some values - writer.startList(); + writer.startListView(); writer.integer().writeInt(1); writer.integer().writeInt(2); - writer.endList(); + writer.endListView(); vector.setValueCount(2); assertTrue(vector.getBufferSize() > 0); @@ -1144,27 +1144,27 @@ public void testSetNull1() { writer.allocate(); writer.setPosition(0); - writer.startList(); + writer.startListView(); writer.bigInt().writeBigInt(10); writer.bigInt().writeBigInt(20); - writer.endList(); + writer.endListView(); vector.setNull(1); writer.setPosition(2); - writer.startList(); + writer.startListView(); writer.bigInt().writeBigInt(30); writer.bigInt().writeBigInt(40); - writer.endList(); + writer.endListView(); vector.setNull(3); vector.setNull(4); writer.setPosition(5); - writer.startList(); + writer.startListView(); writer.bigInt().writeBigInt(50); writer.bigInt().writeBigInt(60); - writer.endList(); + writer.endListView(); vector.setValueCount(6); @@ -1238,24 +1238,24 @@ public void testSetNull2() { vector.setNull(4); writer.setPosition(1); - writer.startList(); + writer.startListView(); writer.bigInt().writeBigInt(10); writer.bigInt().writeBigInt(20); writer.bigInt().writeBigInt(30); - writer.endList(); + writer.endListView(); writer.setPosition(3); - writer.startList(); + writer.startListView(); writer.bigInt().writeBigInt(40); writer.bigInt().writeBigInt(50); - writer.endList(); + writer.endListView(); writer.setPosition(5); - writer.startList(); + writer.startListView(); writer.bigInt().writeBigInt(60); writer.bigInt().writeBigInt(70); writer.bigInt().writeBigInt(80); - writer.endList(); + writer.endListView(); vector.setValueCount(6); @@ -1327,24 +1327,24 @@ public void testSetNull3() { writer.allocate(); writer.setPosition(1); - writer.startList(); + writer.startListView(); writer.bigInt().writeBigInt(10); writer.bigInt().writeBigInt(20); writer.bigInt().writeBigInt(30); - writer.endList(); + writer.endListView(); writer.setPosition(3); - writer.startList(); + writer.startListView(); writer.bigInt().writeBigInt(40); writer.bigInt().writeBigInt(50); - writer.endList(); + writer.endListView(); writer.setPosition(5); - writer.startList(); + writer.startListView(); writer.bigInt().writeBigInt(60); writer.bigInt().writeBigInt(70); writer.bigInt().writeBigInt(80); - writer.endList(); + writer.endListView(); vector.setNull(0); vector.setNull(2); @@ -1419,31 +1419,31 @@ public void testOverWrite1() { writer.allocate(); writer.setPosition(0); - writer.startList(); + writer.startListView(); writer.bigInt().writeBigInt(10); writer.bigInt().writeBigInt(20); writer.bigInt().writeBigInt(30); - writer.endList(); + writer.endListView(); writer.setPosition(1); - writer.startList(); + writer.startListView(); writer.bigInt().writeBigInt(40); writer.bigInt().writeBigInt(50); - writer.endList(); + writer.endListView(); vector.setValueCount(2); writer.setPosition(0); - writer.startList(); + writer.startListView(); writer.bigInt().writeBigInt(60); writer.bigInt().writeBigInt(70); - writer.endList(); + writer.endListView(); writer.setPosition(1); - writer.startList(); + writer.startListView(); writer.bigInt().writeBigInt(80); writer.bigInt().writeBigInt(90); - writer.endList(); + writer.endListView(); vector.setValueCount(2); @@ -1473,17 +1473,17 @@ public void testOverwriteWithNull() { ArrowBuf sizeBuffer = vector.getSizeBuffer(); writer.setPosition(0); - writer.startList(); + writer.startListView(); writer.bigInt().writeBigInt(10); writer.bigInt().writeBigInt(20); writer.bigInt().writeBigInt(30); - writer.endList(); + writer.endListView(); writer.setPosition(1); - writer.startList(); + writer.startListView(); writer.bigInt().writeBigInt(40); writer.bigInt().writeBigInt(50); - writer.endList(); + writer.endListView(); vector.setValueCount(2); @@ -1507,19 +1507,19 @@ public void testOverwriteWithNull() { assertTrue(vector.isNull(1)); writer.setPosition(0); - writer.startList(); + writer.startListView(); writer.bigInt().writeBigInt(60); writer.bigInt().writeBigInt(70); - writer.endList(); + writer.endListView(); assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); assertEquals(2, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); writer.setPosition(1); - writer.startList(); + writer.startListView(); writer.bigInt().writeBigInt(80); writer.bigInt().writeBigInt(90); - writer.endList(); + writer.endListView(); assertEquals(2, offsetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); assertEquals(2, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); @@ -1655,11 +1655,440 @@ public void testOutOfOrderOffset1() { } } + private int validateSizeBufferAndCalculateMinOffset( + int start, + int splitLength, + ArrowBuf fromOffsetBuffer, + ArrowBuf fromSizeBuffer, + ArrowBuf toSizeBuffer) { + int minOffset = fromOffsetBuffer.getInt((long) start * ListViewVector.OFFSET_WIDTH); + int fromDataLength; + int toDataLength; + + for (int i = 0; i < splitLength; i++) { + fromDataLength = fromSizeBuffer.getInt((long) (start + i) * ListViewVector.SIZE_WIDTH); + toDataLength = toSizeBuffer.getInt((long) (i) * ListViewVector.SIZE_WIDTH); + + /* validate size */ + assertEquals( + fromDataLength, + toDataLength, + "Different data lengths at index: " + i + " and start: " + start); + + /* calculate minimum offset */ + int currentOffset = fromOffsetBuffer.getInt((long) (start + i) * ListViewVector.OFFSET_WIDTH); + if (currentOffset < minOffset) { + minOffset = currentOffset; + } + } + + return minOffset; + } + + private void validateOffsetBuffer( + int start, + int splitLength, + ArrowBuf fromOffsetBuffer, + ArrowBuf toOffsetBuffer, + int minOffset) { + int offset1; + int offset2; + + for (int i = 0; i < splitLength; i++) { + offset1 = fromOffsetBuffer.getInt((long) (start + i) * ListViewVector.OFFSET_WIDTH); + offset2 = toOffsetBuffer.getInt((long) (i) * ListViewVector.OFFSET_WIDTH); + assertEquals( + offset1 - minOffset, + offset2, + "Different offset values at index: " + i + " and start: " + start); + } + } + + private void validateDataBuffer( + int start, + int splitLength, + ArrowBuf fromOffsetBuffer, + ArrowBuf fromSizeBuffer, + BigIntVector fromDataVector, + ArrowBuf toOffsetBuffer, + BigIntVector toDataVector) { + int dataLength; + Long fromValue; + for (int i = 0; i < splitLength; i++) { + dataLength = fromSizeBuffer.getInt((long) (start + i) * ListViewVector.SIZE_WIDTH); + for (int j = 0; j < dataLength; j++) { + fromValue = + fromDataVector.getObject( + (fromOffsetBuffer.getInt((long) (start + i) * ListViewVector.OFFSET_WIDTH) + j)); + Long toValue = + toDataVector.getObject( + (toOffsetBuffer.getInt((long) i * ListViewVector.OFFSET_WIDTH) + j)); + assertEquals( + fromValue, toValue, "Different data values at index: " + i + " and start: " + start); + } + } + } + + /** + * Validate split and transfer of data from fromVector to toVector. Note that this method assumes + * that the child vector is BigIntVector. + * + * @param start start index + * @param splitLength length of data to split and transfer + * @param fromVector fromVector + * @param toVector toVector + */ + private void validateSplitAndTransfer( + TransferPair transferPair, + int start, + int splitLength, + ListViewVector fromVector, + ListViewVector toVector) { + + transferPair.splitAndTransfer(start, splitLength); + + /* get offsetBuffer of toVector */ + final ArrowBuf toOffsetBuffer = toVector.getOffsetBuffer(); + + /* get sizeBuffer of toVector */ + final ArrowBuf toSizeBuffer = toVector.getSizeBuffer(); + + /* get dataVector of toVector */ + BigIntVector toDataVector = (BigIntVector) toVector.getDataVector(); + + /* get offsetBuffer of toVector */ + final ArrowBuf fromOffsetBuffer = fromVector.getOffsetBuffer(); + + /* get sizeBuffer of toVector */ + final ArrowBuf fromSizeBuffer = fromVector.getSizeBuffer(); + + /* get dataVector of toVector */ + BigIntVector fromDataVector = (BigIntVector) fromVector.getDataVector(); + + /* validate size buffers */ + int minOffset = + validateSizeBufferAndCalculateMinOffset( + start, splitLength, fromOffsetBuffer, fromSizeBuffer, toSizeBuffer); + /* validate offset buffers */ + validateOffsetBuffer(start, splitLength, fromOffsetBuffer, toOffsetBuffer, minOffset); + /* validate data */ + validateDataBuffer( + start, + splitLength, + fromOffsetBuffer, + fromSizeBuffer, + fromDataVector, + toOffsetBuffer, + toDataVector); + } + + @Test + public void testSplitAndTransfer() throws Exception { + try (ListViewVector fromVector = ListViewVector.empty("sourceVector", allocator)) { + + /* Explicitly add the dataVector */ + MinorType type = MinorType.BIGINT; + fromVector.addOrGetVector(FieldType.nullable(type.getType())); + + UnionListViewWriter listViewWriter = fromVector.getWriter(); + + /* allocate memory */ + listViewWriter.allocate(); + + /* populate data */ + listViewWriter.setPosition(0); + listViewWriter.startListView(); + listViewWriter.bigInt().writeBigInt(10); + listViewWriter.bigInt().writeBigInt(11); + listViewWriter.bigInt().writeBigInt(12); + listViewWriter.endListView(); + + listViewWriter.setPosition(1); + listViewWriter.startListView(); + listViewWriter.bigInt().writeBigInt(13); + listViewWriter.bigInt().writeBigInt(14); + listViewWriter.endListView(); + + listViewWriter.setPosition(2); + listViewWriter.startListView(); + listViewWriter.bigInt().writeBigInt(15); + listViewWriter.bigInt().writeBigInt(16); + listViewWriter.bigInt().writeBigInt(17); + listViewWriter.bigInt().writeBigInt(18); + listViewWriter.endListView(); + + listViewWriter.setPosition(3); + listViewWriter.startListView(); + listViewWriter.bigInt().writeBigInt(19); + listViewWriter.endListView(); + + listViewWriter.setPosition(4); + listViewWriter.startListView(); + listViewWriter.bigInt().writeBigInt(20); + listViewWriter.bigInt().writeBigInt(21); + listViewWriter.bigInt().writeBigInt(22); + listViewWriter.bigInt().writeBigInt(23); + listViewWriter.endListView(); + + fromVector.setValueCount(5); + + /* get offset buffer */ + final ArrowBuf offsetBuffer = fromVector.getOffsetBuffer(); + + /* get size buffer */ + final ArrowBuf sizeBuffer = fromVector.getSizeBuffer(); + + /* get dataVector */ + BigIntVector dataVector = (BigIntVector) fromVector.getDataVector(); + + /* check the vector output */ + + int index = 0; + int offset; + int size = 0; + Long actual; + + /* index 0 */ + assertFalse(fromVector.isNull(index)); + offset = offsetBuffer.getInt(index * ListViewVector.OFFSET_WIDTH); + assertEquals(Integer.toString(0), Integer.toString(offset)); + + actual = dataVector.getObject(offset); + assertEquals(Long.valueOf(10), actual); + offset++; + actual = dataVector.getObject(offset); + assertEquals(Long.valueOf(11), actual); + offset++; + actual = dataVector.getObject(offset); + assertEquals(Long.valueOf(12), actual); + assertEquals( + Integer.toString(3), + Integer.toString(sizeBuffer.getInt(index * ListViewVector.SIZE_WIDTH))); + + /* index 1 */ + index++; + assertFalse(fromVector.isNull(index)); + offset = offsetBuffer.getInt(index * ListViewVector.OFFSET_WIDTH); + assertEquals(Integer.toString(3), Integer.toString(offset)); + + actual = dataVector.getObject(offset); + assertEquals(Long.valueOf(13), actual); + offset++; + size++; + actual = dataVector.getObject(offset); + assertEquals(Long.valueOf(14), actual); + size++; + assertEquals( + Integer.toString(size), + Integer.toString(sizeBuffer.getInt(index * ListViewVector.SIZE_WIDTH))); + + /* index 2 */ + size = 0; + index++; + assertFalse(fromVector.isNull(index)); + offset = offsetBuffer.getInt(index * ListViewVector.OFFSET_WIDTH); + assertEquals(Integer.toString(5), Integer.toString(offset)); + size++; + + actual = dataVector.getObject(offset); + assertEquals(Long.valueOf(15), actual); + offset++; + size++; + actual = dataVector.getObject(offset); + assertEquals(Long.valueOf(16), actual); + offset++; + size++; + actual = dataVector.getObject(offset); + assertEquals(Long.valueOf(17), actual); + offset++; + size++; + actual = dataVector.getObject(offset); + assertEquals(Long.valueOf(18), actual); + assertEquals( + Integer.toString(size), + Integer.toString(sizeBuffer.getInt(index * ListViewVector.SIZE_WIDTH))); + + /* index 3 */ + size = 0; + index++; + assertFalse(fromVector.isNull(index)); + offset = offsetBuffer.getInt(index * ListViewVector.OFFSET_WIDTH); + assertEquals(Integer.toString(9), Integer.toString(offset)); + + actual = dataVector.getObject(offset); + assertEquals(Long.valueOf(19), actual); + size++; + assertEquals( + Integer.toString(size), + Integer.toString(sizeBuffer.getInt(index * ListViewVector.SIZE_WIDTH))); + + /* index 4 */ + size = 0; + index++; + assertFalse(fromVector.isNull(index)); + offset = offsetBuffer.getInt(index * ListViewVector.OFFSET_WIDTH); + assertEquals(Integer.toString(10), Integer.toString(offset)); + + actual = dataVector.getObject(offset); + assertEquals(Long.valueOf(20), actual); + offset++; + size++; + actual = dataVector.getObject(offset); + assertEquals(Long.valueOf(21), actual); + offset++; + size++; + actual = dataVector.getObject(offset); + assertEquals(Long.valueOf(22), actual); + offset++; + size++; + actual = dataVector.getObject(offset); + assertEquals(Long.valueOf(23), actual); + size++; + assertEquals( + Integer.toString(size), + Integer.toString(sizeBuffer.getInt(index * ListViewVector.SIZE_WIDTH))); + + /* do split and transfer */ + try (ListViewVector toVector = ListViewVector.empty("toVector", allocator)) { + int[][] transferLengths = {{0, 2}, {3, 1}, {4, 1}}; + TransferPair transferPair = fromVector.makeTransferPair(toVector); + + for (final int[] transferLength : transferLengths) { + int start = transferLength[0]; + int splitLength = transferLength[1]; + validateSplitAndTransfer(transferPair, start, splitLength, fromVector, toVector); + } + } + } + } + + @Test + public void testOutOfOrderOffsetSplitAndTransfer() { + // [[12, -7, 25], null, [0, -127, 127, 50], [], [50, 12]] + try (ListViewVector fromVector = ListViewVector.empty("fromVector", allocator)) { + // Allocate buffers in listViewVector by calling `allocateNew` method. + fromVector.allocateNew(); + + // Initialize the child vector using `initializeChildrenFromFields` method. + + FieldType fieldType = new FieldType(true, new ArrowType.Int(64, true), null, null); + Field field = new Field("child-vector", fieldType, null); + fromVector.initializeChildrenFromFields(Collections.singletonList(field)); + + // Set values in the child vector. + FieldVector fieldVector = fromVector.getDataVector(); + fieldVector.clear(); + + BigIntVector childVector = (BigIntVector) fieldVector; + + childVector.allocateNew(7); + + childVector.set(0, 0); + childVector.set(1, -127); + childVector.set(2, 127); + childVector.set(3, 50); + childVector.set(4, 12); + childVector.set(5, -7); + childVector.set(6, 25); + + childVector.setValueCount(7); + + // Set validity, offset and size buffers using `setValidity`, + // `setOffset` and `setSize` methods. + fromVector.setValidity(0, 1); + fromVector.setValidity(1, 0); + fromVector.setValidity(2, 1); + fromVector.setValidity(3, 1); + fromVector.setValidity(4, 1); + + fromVector.setOffset(0, 4); + fromVector.setOffset(1, 7); + fromVector.setOffset(2, 0); + fromVector.setOffset(3, 0); + fromVector.setOffset(4, 3); + + fromVector.setSize(0, 3); + fromVector.setSize(1, 0); + fromVector.setSize(2, 4); + fromVector.setSize(3, 0); + fromVector.setSize(4, 2); + + // Set value count using `setValueCount` method. + fromVector.setValueCount(5); + + final ArrowBuf offSetBuffer = fromVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = fromVector.getSizeBuffer(); + + // check offset buffer + assertEquals(4, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(2, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // check child vector + assertEquals(0, ((BigIntVector) fromVector.getDataVector()).get(0)); + assertEquals(-127, ((BigIntVector) fromVector.getDataVector()).get(1)); + assertEquals(127, ((BigIntVector) fromVector.getDataVector()).get(2)); + assertEquals(50, ((BigIntVector) fromVector.getDataVector()).get(3)); + assertEquals(12, ((BigIntVector) fromVector.getDataVector()).get(4)); + assertEquals(-7, ((BigIntVector) fromVector.getDataVector()).get(5)); + assertEquals(25, ((BigIntVector) fromVector.getDataVector()).get(6)); + + // check values + Object result = fromVector.getObject(0); + ArrayList resultSet = (ArrayList) result; + assertEquals(3, resultSet.size()); + assertEquals(Long.valueOf(12), resultSet.get(0)); + assertEquals(Long.valueOf(-7), resultSet.get(1)); + assertEquals(Long.valueOf(25), resultSet.get(2)); + + assertTrue(fromVector.isNull(1)); + + result = fromVector.getObject(2); + resultSet = (ArrayList) result; + assertEquals(4, resultSet.size()); + assertEquals(Long.valueOf(0), resultSet.get(0)); + assertEquals(Long.valueOf(-127), resultSet.get(1)); + assertEquals(Long.valueOf(127), resultSet.get(2)); + assertEquals(Long.valueOf(50), resultSet.get(3)); + + assertTrue(fromVector.isEmpty(3)); + + result = fromVector.getObject(4); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(50), resultSet.get(0)); + assertEquals(Long.valueOf(12), resultSet.get(1)); + + fromVector.validate(); + + /* do split and transfer */ + try (ListViewVector toVector = ListViewVector.empty("toVector", allocator)) { + int[][] transferLengths = {{2, 3}, {0, 1}, {0, 3}}; + TransferPair transferPair = fromVector.makeTransferPair(toVector); + + for (final int[] transferLength : transferLengths) { + int start = transferLength[0]; + int splitLength = transferLength[1]; + validateSplitAndTransfer(transferPair, start, splitLength, fromVector, toVector); + } + } + } + } + private void writeIntValues(UnionListViewWriter writer, int[] values) { - writer.startList(); + writer.startListView(); for (int v : values) { writer.integer().writeInt(v); } - writer.endList(); + writer.endListView(); } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java index 40e55fce9bfa2..376ad3ec7504f 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java @@ -47,9 +47,11 @@ import org.apache.arrow.vector.complex.DenseUnionVector; import org.apache.arrow.vector.complex.FixedSizeListVector; import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.ListViewVector; import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.complex.UnionVector; import org.apache.arrow.vector.complex.impl.NullableStructWriter; +import org.apache.arrow.vector.complex.impl.UnionListViewWriter; import org.apache.arrow.vector.complex.impl.UnionListWriter; import org.apache.arrow.vector.holders.NullableIntHolder; import org.apache.arrow.vector.holders.NullableUInt4Holder; @@ -2935,6 +2937,29 @@ public void testListVectorSetNull() { } } + @Test + public void testListViewVectorSetNull() { + try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + writeListViewVector(writer, new int[] {1, 2}); + writeListViewVector(writer, new int[] {3, 4}); + writeListViewVector(writer, new int[] {5, 6}); + vector.setNull(3); + vector.setNull(4); + vector.setNull(5); + writer.setValueCount(6); + + assertEquals(vector.getObject(0), Arrays.asList(1, 2)); + assertEquals(vector.getObject(1), Arrays.asList(3, 4)); + assertEquals(vector.getObject(2), Arrays.asList(5, 6)); + assertTrue(vector.isNull(3)); + assertTrue(vector.isNull(4)); + assertTrue(vector.isNull(5)); + } + } + @Test public void testStructVectorEqualsWithNull() { @@ -3266,6 +3291,14 @@ private void writeListVector(UnionListWriter writer, int[] values) { writer.endList(); } + private void writeListViewVector(UnionListViewWriter writer, int[] values) { + writer.startListView(); + for (int v : values) { + writer.integer().writeInt(v); + } + writer.endListView(); + } + @Test public void testVariableVectorGetEndOffset() { try (final VarCharVector vector1 = new VarCharVector("v1", allocator); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java index 654940908bf38..2745386db4e22 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java @@ -47,6 +47,7 @@ import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.ViewVarCharVector; import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.ListViewVector; import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.NonNullableStructVector; import org.apache.arrow.vector.complex.StructVector; @@ -57,6 +58,8 @@ import org.apache.arrow.vector.complex.impl.SingleStructReaderImpl; import org.apache.arrow.vector.complex.impl.SingleStructWriter; import org.apache.arrow.vector.complex.impl.UnionListReader; +import org.apache.arrow.vector.complex.impl.UnionListViewReader; +import org.apache.arrow.vector.complex.impl.UnionListViewWriter; import org.apache.arrow.vector.complex.impl.UnionListWriter; import org.apache.arrow.vector.complex.impl.UnionMapReader; import org.apache.arrow.vector.complex.impl.UnionReader; @@ -116,6 +119,195 @@ public void terminate() throws Exception { allocator.close(); } + /* Test Utils */ + + private void checkNullableStruct(NonNullableStructVector structVector) { + StructReader rootReader = new SingleStructReaderImpl(structVector).reader("root"); + for (int i = 0; i < COUNT; i++) { + rootReader.setPosition(i); + assertTrue(rootReader.isSet(), "index is set: " + i); + FieldReader struct = rootReader.reader("struct"); + if (i % 2 == 0) { + assertTrue(struct.isSet(), "index is set: " + i); + assertNotNull(struct.readObject(), "index is set: " + i); + assertEquals(i, struct.reader("nested").readLong().longValue()); + } else { + assertFalse(struct.isSet(), "index is not set: " + i); + assertNull(struct.readObject(), "index is not set: " + i); + } + } + } + + private void createListTypeVectorWithScalarType(FieldWriter writer) { + for (int i = 0; i < COUNT; i++) { + writer.startList(); + for (int j = 0; j < i % 7; j++) { + if (j % 2 == 0) { + writer.writeInt(j); + } else { + IntHolder holder = new IntHolder(); + holder.value = j; + writer.write(holder); + } + } + writer.endList(); + } + } + + private void checkListTypeVectorWithScalarType(FieldReader reader) { + for (int i = 0; i < COUNT; i++) { + reader.setPosition(i); + for (int j = 0; j < i % 7; j++) { + reader.next(); + assertEquals(j, reader.reader().readInteger().intValue()); + } + } + } + + private void createListTypeVectorWithScalarNull(FieldWriter writer) { + for (int i = 0; i < COUNT; i++) { + writer.startList(); + for (int j = 0; j < i % 7; j++) { + if (j % 2 == 0) { + writer.writeNull(); + } else { + IntHolder holder = new IntHolder(); + holder.value = j; + writer.write(holder); + } + } + writer.endList(); + } + } + + private void checkListTypeVectorWithScalarNull(FieldReader reader) { + for (int i = 0; i < COUNT; i++) { + reader.setPosition(i); + for (int j = 0; j < i % 7; j++) { + reader.next(); + if (j % 2 == 0) { + assertFalse(reader.reader().isSet(), "index is set: " + j); + } else { + assertTrue(reader.reader().isSet(), "index is not set: " + j); + assertEquals(j, reader.reader().readInteger().intValue()); + } + } + } + } + + private void createListTypeVectorWithDecimalType(FieldWriter writer, DecimalHolder holder) { + holder.buffer = allocator.buffer(DecimalVector.TYPE_WIDTH); + ArrowType arrowType = new ArrowType.Decimal(10, 0, 128); + for (int i = 0; i < COUNT; i++) { + writer.startList(); + for (int j = 0; j < i % 7; j++) { + if (j % 4 == 0) { + writer.writeDecimal(new BigDecimal(j)); + } else if (j % 4 == 1) { + DecimalUtility.writeBigDecimalToArrowBuf( + new BigDecimal(j), holder.buffer, 0, DecimalVector.TYPE_WIDTH); + holder.start = 0; + holder.scale = 0; + holder.precision = 10; + writer.write(holder); + } else if (j % 4 == 2) { + DecimalUtility.writeBigDecimalToArrowBuf( + new BigDecimal(j), holder.buffer, 0, DecimalVector.TYPE_WIDTH); + writer.writeDecimal(0, holder.buffer, arrowType); + } else { + byte[] value = BigDecimal.valueOf(j).unscaledValue().toByteArray(); + writer.writeBigEndianBytesToDecimal(value, arrowType); + } + } + writer.endList(); + } + } + + private void checkListTypeVectorWithDecimalType(FieldReader reader) { + for (int i = 0; i < COUNT; i++) { + reader.setPosition(i); + for (int j = 0; j < i % 7; j++) { + reader.next(); + Object expected = new BigDecimal(j); + Object actual = reader.reader().readBigDecimal(); + assertEquals(expected, actual); + } + } + } + + private void createListTypeVectorWithTimeStampMilliTZType(FieldWriter writer) { + for (int i = 0; i < COUNT; i++) { + writer.startList(); + for (int j = 0; j < i % 7; j++) { + if (j % 2 == 0) { + writer.writeNull(); + } else { + TimeStampMilliTZHolder holder = new TimeStampMilliTZHolder(); + holder.timezone = "FakeTimeZone"; + holder.value = j; + writer.timeStampMilliTZ().write(holder); + } + } + writer.endList(); + } + } + + private void checkListTypeVectorWithTimeStampMilliTZType(FieldReader reader) { + for (int i = 0; i < COUNT; i++) { + reader.setPosition(i); + for (int j = 0; j < i % 7; j++) { + reader.next(); + if (j % 2 == 0) { + assertFalse(reader.reader().isSet(), "index is set: " + j); + } else { + NullableTimeStampMilliTZHolder actual = new NullableTimeStampMilliTZHolder(); + reader.reader().read(actual); + assertEquals(j, actual.value); + assertEquals("FakeTimeZone", actual.timezone); + } + } + } + } + + private void createNullsWithListWriters(FieldWriter writer) { + for (int i = 0; i < COUNT; i++) { + writer.setPosition(i); + if (i % 2 == 0) { + writer.startList(); + if (i % 4 == 0) { + writer.integer().writeNull(); + } else { + writer.integer().writeInt(i); + writer.integer().writeInt(i * 2); + } + writer.endList(); + } else { + writer.writeNull(); + } + } + } + + private void checkNullsWithListWriters(FieldReader reader) { + for (int i = 0; i < COUNT; i++) { + reader.setPosition(i); + if (i % 2 == 0) { + assertTrue(reader.isSet()); + reader.next(); + if (i % 4 == 0) { + assertNull(reader.reader().readInteger()); + } else { + assertEquals(i, reader.reader().readInteger().intValue()); + reader.next(); + assertEquals(i * 2, reader.reader().readInteger().intValue()); + } + } else { + assertFalse(reader.isSet()); + } + } + } + + /* Test Cases */ + @Test public void simpleNestedTypes() { NonNullableStructVector parent = populateStructVector(null); @@ -213,23 +405,6 @@ public void nullableStruct2() { } } - private void checkNullableStruct(NonNullableStructVector structVector) { - StructReader rootReader = new SingleStructReaderImpl(structVector).reader("root"); - for (int i = 0; i < COUNT; i++) { - rootReader.setPosition(i); - assertTrue(rootReader.isSet(), "index is set: " + i); - FieldReader struct = rootReader.reader("struct"); - if (i % 2 == 0) { - assertTrue(struct.isSet(), "index is set: " + i); - assertNotNull(struct.readObject(), "index is set: " + i); - assertEquals(i, struct.reader("nested").readLong().longValue()); - } else { - assertFalse(struct.isSet(), "index is not set: " + i); - assertNull(struct.readObject(), "index is not set: " + i); - } - } - } - @Test public void testList() { try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { @@ -260,72 +435,259 @@ public void testList() { } } - @Test - public void listScalarType() { - try (ListVector listVector = ListVector.empty("list", allocator)) { - listVector.allocateNew(); - UnionListWriter listWriter = new UnionListWriter(listVector); - for (int i = 0; i < COUNT; i++) { - listWriter.startList(); + private void createListTypeVectorWithDurationType(FieldWriter writer) { + for (int i = 0; i < COUNT; i++) { + writer.startList(); + for (int j = 0; j < i % 7; j++) { + if (j % 2 == 0) { + writer.writeNull(); + } else { + DurationHolder holder = new DurationHolder(); + holder.unit = TimeUnit.MICROSECOND; + holder.value = j; + writer.duration().write(holder); + } + } + writer.endList(); + } + } + + private void checkListTypeVectorWithDurationType(FieldReader reader) { + for (int i = 0; i < COUNT; i++) { + reader.setPosition(i); + for (int j = 0; j < i % 7; j++) { + reader.next(); + if (j % 2 == 0) { + assertFalse(reader.reader().isSet(), "index is set: " + j); + } else { + NullableDurationHolder actual = new NullableDurationHolder(); + reader.reader().read(actual); + assertEquals(TimeUnit.MICROSECOND, actual.unit); + assertEquals(j, actual.value); + } + } + } + } + + private void createScalarTypeVectorWithNullableType(FieldWriter writer) { + for (int i = 0; i < COUNT; i++) { + if (i % 2 == 0) { + writer.setPosition(i); + writer.startList(); for (int j = 0; j < i % 7; j++) { - if (j % 2 == 0) { - listWriter.writeInt(j); + writer.writeInt(j); + } + writer.endList(); + } + } + } + + private void checkScalarTypeVectorWithNullableType(FieldReader reader) { + for (int i = 0; i < COUNT; i++) { + reader.setPosition(i); + if (i % 2 == 0) { + assertTrue(reader.isSet(), "index is set: " + i); + assertEquals(i % 7, ((List) reader.readObject()).size(), "correct length at: " + i); + } else { + assertFalse(reader.isSet(), "index is not set: " + i); + assertNull(reader.readObject(), "index is not set: " + i); + } + } + } + + private void createListTypeVectorWithStructType( + FieldWriter fieldWriter, StructWriter structWriter) { + for (int i = 0; i < COUNT; i++) { + fieldWriter.startList(); + for (int j = 0; j < i % 7; j++) { + structWriter.start(); + structWriter.integer("int").writeInt(j); + structWriter.bigInt("bigInt").writeBigInt(j); + structWriter.end(); + } + fieldWriter.endList(); + } + } + + private void checkListTypeVectorWithStructType(FieldReader reader) { + for (int i = 0; i < COUNT; i++) { + reader.setPosition(i); + for (int j = 0; j < i % 7; j++) { + reader.next(); + assertEquals(j, reader.reader().reader("int").readInteger().intValue(), "record: " + i); + assertEquals(j, reader.reader().reader("bigInt").readLong().longValue()); + } + } + } + + private void checkListOfListTypes(final FieldReader reader) { + for (int i = 0; i < COUNT; i++) { + reader.setPosition(i); + for (int j = 0; j < i % 7; j++) { + reader.next(); + FieldReader innerListReader = reader.reader(); + for (int k = 0; k < i % 13; k++) { + innerListReader.next(); + assertEquals(k, innerListReader.reader().readInteger().intValue(), "record: " + i); + } + } + } + } + + private void checkUnionListType(FieldReader reader) { + for (int i = 0; i < COUNT; i++) { + reader.setPosition(i); + for (int j = 0; j < i % 7; j++) { + reader.next(); + FieldReader innerListReader = reader.reader(); + for (int k = 0; k < i % 13; k++) { + innerListReader.next(); + if (k % 2 == 0) { + assertEquals(k, innerListReader.reader().readInteger().intValue(), "record: " + i); } else { - IntHolder holder = new IntHolder(); - holder.value = j; - listWriter.write(holder); + assertEquals(k, innerListReader.reader().readLong().longValue(), "record: " + i); } } - listWriter.endList(); } - listWriter.setValueCount(COUNT); - UnionListReader listReader = new UnionListReader(listVector); - for (int i = 0; i < COUNT; i++) { - listReader.setPosition(i); - for (int j = 0; j < i % 7; j++) { - listReader.next(); - assertEquals(j, listReader.reader().readInteger().intValue()); + } + } + + private static void createListTypeVectorWithMapType(FieldWriter writer) { + MapWriter innerMapWriter = writer.map(true); + for (int i = 0; i < COUNT; i++) { + writer.startList(); + for (int j = 0; j < i % 7; j++) { + innerMapWriter.startMap(); + for (int k = 0; k < i % 13; k++) { + innerMapWriter.startEntry(); + innerMapWriter.key().integer().writeInt(k); + if (k % 2 == 0) { + innerMapWriter.value().bigInt().writeBigInt(k); + } + innerMapWriter.endEntry(); } + innerMapWriter.endMap(); } + writer.endList(); + } + } + + private void checkListTypeMap(FieldReader reader) { + for (int i = 0; i < COUNT; i++) { + reader.setPosition(i); + for (int j = 0; j < i % 7; j++) { + reader.next(); + UnionMapReader mapReader = (UnionMapReader) reader.reader(); + for (int k = 0; k < i % 13; k++) { + mapReader.next(); + assertEquals(k, mapReader.key().readInteger().intValue(), "record key: " + i); + if (k % 2 == 0) { + assertEquals(k, mapReader.value().readLong().longValue(), "record value: " + i); + } else { + assertNull(mapReader.value().readLong(), "record value: " + i); + } + } + } + } + } + + /* Test Cases */ + + private void createListTypeVectorWithFixedSizeBinaryType( + FieldWriter writer, List buffers) { + for (int i = 0; i < COUNT; i++) { + writer.startList(); + for (int j = 0; j < i % 7; j++) { + if (j % 2 == 0) { + writer.writeNull(); + } else { + ArrowBuf buf = allocator.buffer(4); + buf.setInt(0, j); + FixedSizeBinaryHolder holder = new FixedSizeBinaryHolder(); + holder.byteWidth = 4; + holder.buffer = buf; + writer.fixedSizeBinary().write(holder); + buffers.add(buf); + } + } + writer.endList(); + } + } + + private void checkListTypeVectorWithFixedSizeBinaryType(FieldReader reader) { + for (int i = 0; i < COUNT; i++) { + reader.setPosition(i); + for (int j = 0; j < i % 7; j++) { + reader.next(); + if (j % 2 == 0) { + assertFalse(reader.reader().isSet(), "index is set: " + j); + } else { + NullableFixedSizeBinaryHolder actual = new NullableFixedSizeBinaryHolder(); + reader.reader().read(actual); + assertEquals(j, actual.buffer.getInt(0)); + assertEquals(4, actual.byteWidth); + } + } + } + } + + @Test + public void listScalarType() { + try (ListVector listVector = ListVector.empty("list", allocator)) { + listVector.allocateNew(); + UnionListWriter listWriter = new UnionListWriter(listVector); + createListTypeVectorWithScalarType(listWriter); + listWriter.setValueCount(COUNT); + UnionListReader listReader = new UnionListReader(listVector); + // validate + checkListTypeVectorWithScalarType(listReader); + } + } + + @Test + public void listViewScalarType() { + try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { + listViewVector.allocateNew(); + UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector); + createListTypeVectorWithScalarType(listViewWriter); + listViewWriter.setValueCount(COUNT); + UnionListViewReader listViewReader = new UnionListViewReader(listViewVector); + // validate + checkListTypeVectorWithScalarType(listViewReader); } } @Test public void testListScalarNull() { - /* Write to a integer list vector - * each list of size 8 and having it's data values alternating between null and a non-null. + /* Write to an integer list vector + * each list of size 8 + * and having its data values alternating between null and a non-null. * Read and verify */ try (ListVector listVector = ListVector.empty("list", allocator)) { listVector.allocateNew(); UnionListWriter listWriter = new UnionListWriter(listVector); - for (int i = 0; i < COUNT; i++) { - listWriter.startList(); - for (int j = 0; j < i % 7; j++) { - if (j % 2 == 0) { - listWriter.writeNull(); - } else { - IntHolder holder = new IntHolder(); - holder.value = j; - listWriter.write(holder); - } - } - listWriter.endList(); - } + createListTypeVectorWithScalarNull(listWriter); listWriter.setValueCount(COUNT); UnionListReader listReader = new UnionListReader(listVector); - for (int i = 0; i < COUNT; i++) { - listReader.setPosition(i); - for (int j = 0; j < i % 7; j++) { - listReader.next(); - if (j % 2 == 0) { - assertFalse(listReader.reader().isSet(), "index is set: " + j); - } else { - assertTrue(listReader.reader().isSet(), "index is not set: " + j); - assertEquals(j, listReader.reader().readInteger().intValue()); - } - } - } + checkListTypeVectorWithScalarNull(listReader); + } + } + + @Test + public void testListViewScalarNull() { + /* Write to an integer list vector + * each list of size 8 + * and having its data values alternating between null and a non-null. + * Read and verify + */ + try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { + listViewVector.allocateNew(); + UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector); + createListTypeVectorWithScalarNull(listViewWriter); + listViewWriter.setValueCount(COUNT); + UnionListViewReader listViewReader = new UnionListViewReader(listViewVector); + checkListTypeVectorWithScalarNull(listViewReader); } } @@ -335,42 +697,24 @@ public void listDecimalType() { listVector.allocateNew(); UnionListWriter listWriter = new UnionListWriter(listVector); DecimalHolder holder = new DecimalHolder(); - holder.buffer = allocator.buffer(DecimalVector.TYPE_WIDTH); - ArrowType arrowType = new ArrowType.Decimal(10, 0, 128); - for (int i = 0; i < COUNT; i++) { - listWriter.startList(); - for (int j = 0; j < i % 7; j++) { - if (j % 4 == 0) { - listWriter.writeDecimal(new BigDecimal(j)); - } else if (j % 4 == 1) { - DecimalUtility.writeBigDecimalToArrowBuf( - new BigDecimal(j), holder.buffer, 0, DecimalVector.TYPE_WIDTH); - holder.start = 0; - holder.scale = 0; - holder.precision = 10; - listWriter.write(holder); - } else if (j % 4 == 2) { - DecimalUtility.writeBigDecimalToArrowBuf( - new BigDecimal(j), holder.buffer, 0, DecimalVector.TYPE_WIDTH); - listWriter.writeDecimal(0, holder.buffer, arrowType); - } else { - byte[] value = BigDecimal.valueOf(j).unscaledValue().toByteArray(); - listWriter.writeBigEndianBytesToDecimal(value, arrowType); - } - } - listWriter.endList(); - } + createListTypeVectorWithDecimalType(listWriter, holder); listWriter.setValueCount(COUNT); UnionListReader listReader = new UnionListReader(listVector); - for (int i = 0; i < COUNT; i++) { - listReader.setPosition(i); - for (int j = 0; j < i % 7; j++) { - listReader.next(); - Object expected = new BigDecimal(j); - Object actual = listReader.reader().readBigDecimal(); - assertEquals(expected, actual); - } - } + checkListTypeVectorWithDecimalType(listReader); + holder.buffer.close(); + } + } + + @Test + public void listViewDecimalType() { + try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { + listViewVector.allocateNew(); + UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector); + DecimalHolder holder = new DecimalHolder(); + createListTypeVectorWithDecimalType(listViewWriter, holder); + listViewWriter.setValueCount(COUNT); + UnionListViewReader listViewReader = new UnionListViewReader(listViewVector); + checkListTypeVectorWithDecimalType(listViewReader); holder.buffer.close(); } } @@ -380,36 +724,22 @@ public void listTimeStampMilliTZType() { try (ListVector listVector = ListVector.empty("list", allocator)) { listVector.allocateNew(); UnionListWriter listWriter = new UnionListWriter(listVector); - for (int i = 0; i < COUNT; i++) { - listWriter.startList(); - for (int j = 0; j < i % 7; j++) { - if (j % 2 == 0) { - listWriter.writeNull(); - } else { - TimeStampMilliTZHolder holder = new TimeStampMilliTZHolder(); - holder.timezone = "FakeTimeZone"; - holder.value = j; - listWriter.timeStampMilliTZ().write(holder); - } - } - listWriter.endList(); - } + createListTypeVectorWithTimeStampMilliTZType(listWriter); listWriter.setValueCount(COUNT); UnionListReader listReader = new UnionListReader(listVector); - for (int i = 0; i < COUNT; i++) { - listReader.setPosition(i); - for (int j = 0; j < i % 7; j++) { - listReader.next(); - if (j % 2 == 0) { - assertFalse(listReader.reader().isSet(), "index is set: " + j); - } else { - NullableTimeStampMilliTZHolder actual = new NullableTimeStampMilliTZHolder(); - listReader.reader().read(actual); - assertEquals(j, actual.value); - assertEquals("FakeTimeZone", actual.timezone); - } - } - } + checkListTypeVectorWithTimeStampMilliTZType(listReader); + } + } + + @Test + public void listViewTimeStampMilliTZType() { + try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { + listViewVector.allocateNew(); + UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector); + createListTypeVectorWithTimeStampMilliTZType(listViewWriter); + listViewWriter.setValueCount(COUNT); + UnionListViewReader listViewReader = new UnionListViewReader(listViewVector); + checkListTypeVectorWithTimeStampMilliTZType(listViewReader); } } @@ -418,80 +748,51 @@ public void listDurationType() { try (ListVector listVector = ListVector.empty("list", allocator)) { listVector.allocateNew(); UnionListWriter listWriter = new UnionListWriter(listVector); - for (int i = 0; i < COUNT; i++) { - listWriter.startList(); - for (int j = 0; j < i % 7; j++) { - if (j % 2 == 0) { - listWriter.writeNull(); - } else { - DurationHolder holder = new DurationHolder(); - holder.unit = TimeUnit.MICROSECOND; - holder.value = j; - listWriter.duration().write(holder); - } - } - listWriter.endList(); - } + createListTypeVectorWithDurationType(listWriter); listWriter.setValueCount(COUNT); UnionListReader listReader = new UnionListReader(listVector); - for (int i = 0; i < COUNT; i++) { - listReader.setPosition(i); - for (int j = 0; j < i % 7; j++) { - listReader.next(); - if (j % 2 == 0) { - assertFalse(listReader.reader().isSet(), "index is set: " + j); - } else { - NullableDurationHolder actual = new NullableDurationHolder(); - listReader.reader().read(actual); - assertEquals(TimeUnit.MICROSECOND, actual.unit); - assertEquals(j, actual.value); - } - } - } + checkListTypeVectorWithDurationType(listReader); + } + } + + @Test + public void listViewDurationType() { + try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { + listViewVector.allocateNew(); + UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector); + createListTypeVectorWithDurationType(listViewWriter); + listViewWriter.setValueCount(COUNT); + UnionListViewReader listReader = new UnionListViewReader(listViewVector); + checkListTypeVectorWithDurationType(listReader); } } @Test public void listFixedSizeBinaryType() throws Exception { - List bufs = new ArrayList(); + List buffers = new ArrayList<>(); try (ListVector listVector = ListVector.empty("list", allocator)) { listVector.allocateNew(); UnionListWriter listWriter = new UnionListWriter(listVector); - for (int i = 0; i < COUNT; i++) { - listWriter.startList(); - for (int j = 0; j < i % 7; j++) { - if (j % 2 == 0) { - listWriter.writeNull(); - } else { - ArrowBuf buf = allocator.buffer(4); - buf.setInt(0, j); - FixedSizeBinaryHolder holder = new FixedSizeBinaryHolder(); - holder.byteWidth = 4; - holder.buffer = buf; - listWriter.fixedSizeBinary().write(holder); - bufs.add(buf); - } - } - listWriter.endList(); - } + createListTypeVectorWithFixedSizeBinaryType(listWriter, buffers); listWriter.setValueCount(COUNT); - UnionListReader listReader = new UnionListReader(listVector); - for (int i = 0; i < COUNT; i++) { - listReader.setPosition(i); - for (int j = 0; j < i % 7; j++) { - listReader.next(); - if (j % 2 == 0) { - assertFalse(listReader.reader().isSet(), "index is set: " + j); - } else { - NullableFixedSizeBinaryHolder actual = new NullableFixedSizeBinaryHolder(); - listReader.reader().read(actual); - assertEquals(j, actual.buffer.getInt(0)); - assertEquals(4, actual.byteWidth); - } - } - } + UnionListReader listReader = new UnionListReader(listVector); + checkListTypeVectorWithFixedSizeBinaryType(listReader); } - AutoCloseables.close(bufs); + AutoCloseables.close(buffers); + } + + @Test + public void listViewFixedSizeBinaryType() throws Exception { + List buffers = new ArrayList<>(); + try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { + listViewVector.allocateNew(); + UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector); + createListTypeVectorWithFixedSizeBinaryType(listViewWriter, buffers); + listViewWriter.setValueCount(COUNT); + UnionListViewReader listReader = new UnionListViewReader(listViewVector); + checkListTypeVectorWithFixedSizeBinaryType(listReader); + } + AutoCloseables.close(buffers); } @Test @@ -499,29 +800,22 @@ public void listScalarTypeNullable() { try (ListVector listVector = ListVector.empty("list", allocator)) { listVector.allocateNew(); UnionListWriter listWriter = new UnionListWriter(listVector); - for (int i = 0; i < COUNT; i++) { - if (i % 2 == 0) { - listWriter.setPosition(i); - listWriter.startList(); - for (int j = 0; j < i % 7; j++) { - listWriter.writeInt(j); - } - listWriter.endList(); - } - } + createScalarTypeVectorWithNullableType(listWriter); listWriter.setValueCount(COUNT); UnionListReader listReader = new UnionListReader(listVector); - for (int i = 0; i < COUNT; i++) { - listReader.setPosition(i); - if (i % 2 == 0) { - assertTrue(listReader.isSet(), "index is set: " + i); - assertEquals( - i % 7, ((List) listReader.readObject()).size(), "correct length at: " + i); - } else { - assertFalse(listReader.isSet(), "index is not set: " + i); - assertNull(listReader.readObject(), "index is not set: " + i); - } - } + checkScalarTypeVectorWithNullableType(listReader); + } + } + + @Test + public void listViewScalarTypeNullable() { + try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { + listViewVector.allocateNew(); + UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector); + createScalarTypeVectorWithNullableType(listViewWriter); + listViewWriter.setValueCount(COUNT); + UnionListViewReader listReader = new UnionListViewReader(listViewVector); + checkScalarTypeVectorWithNullableType(listReader); } } @@ -529,29 +823,25 @@ public void listScalarTypeNullable() { public void listStructType() { try (ListVector listVector = ListVector.empty("list", allocator)) { listVector.allocateNew(); - UnionListWriter listWriter = new UnionListWriter(listVector); - StructWriter structWriter = listWriter.struct(); - for (int i = 0; i < COUNT; i++) { - listWriter.startList(); - for (int j = 0; j < i % 7; j++) { - structWriter.start(); - structWriter.integer("int").writeInt(j); - structWriter.bigInt("bigInt").writeBigInt(j); - structWriter.end(); - } - listWriter.endList(); - } - listWriter.setValueCount(COUNT); + UnionListWriter listViewWriter = new UnionListWriter(listVector); + StructWriter structWriter = listViewWriter.struct(); + createListTypeVectorWithStructType(listViewWriter, structWriter); + listViewWriter.setValueCount(COUNT); UnionListReader listReader = new UnionListReader(listVector); - for (int i = 0; i < COUNT; i++) { - listReader.setPosition(i); - for (int j = 0; j < i % 7; j++) { - listReader.next(); - assertEquals( - j, listReader.reader().reader("int").readInteger().intValue(), "record: " + i); - assertEquals(j, listReader.reader().reader("bigInt").readLong().longValue()); - } - } + checkListTypeVectorWithStructType(listReader); + } + } + + @Test + public void listViewStructType() { + try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { + listViewVector.allocateNew(); + UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector); + StructWriter structWriter = listViewWriter.struct(); + createListTypeVectorWithStructType(listViewWriter, structWriter); + listViewWriter.setValueCount(COUNT); + UnionListViewReader listReader = new UnionListViewReader(listViewVector); + checkListTypeVectorWithStructType(listReader); } } @@ -573,7 +863,31 @@ public void listListType() { listWriter.endList(); } listWriter.setValueCount(COUNT); - checkListOfLists(listVector); + UnionListReader listReader = new UnionListReader(listVector); + checkListOfListTypes(listReader); + } + } + + @Test + public void listViewListType() { + try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { + listViewVector.allocateNew(); + UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector); + for (int i = 0; i < COUNT; i++) { + listViewWriter.startListView(); + for (int j = 0; j < i % 7; j++) { + ListWriter innerListWriter = listViewWriter.listView(); + innerListWriter.startListView(); + for (int k = 0; k < i % 13; k++) { + innerListWriter.integer().writeInt(k); + } + innerListWriter.endListView(); + } + listViewWriter.endListView(); + } + listViewWriter.setValueCount(COUNT); + UnionListViewReader listReader = new UnionListViewReader(listViewVector); + checkListOfListTypes(listReader); } } @@ -587,7 +901,6 @@ public void listListType2() { listVector.allocateNew(); UnionListWriter listWriter = new UnionListWriter(listVector); ListWriter innerListWriter = listWriter.list(); - for (int i = 0; i < COUNT; i++) { listWriter.startList(); for (int j = 0; j < i % 7; j++) { @@ -600,22 +913,31 @@ public void listListType2() { listWriter.endList(); } listWriter.setValueCount(COUNT); - checkListOfLists(listVector); + UnionListReader listReader = new UnionListReader(listVector); + checkListOfListTypes(listReader); } } - private void checkListOfLists(final ListVector listVector) { - UnionListReader listReader = new UnionListReader(listVector); - for (int i = 0; i < COUNT; i++) { - listReader.setPosition(i); - for (int j = 0; j < i % 7; j++) { - listReader.next(); - FieldReader innerListReader = listReader.reader(); - for (int k = 0; k < i % 13; k++) { - innerListReader.next(); - assertEquals(k, innerListReader.reader().readInteger().intValue(), "record: " + i); + @Test + public void listViewListType2() { + try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { + listViewVector.allocateNew(); + UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector); + ListWriter innerListWriter = listViewWriter.list(); + for (int i = 0; i < COUNT; i++) { + listViewWriter.startListView(); + for (int j = 0; j < i % 7; j++) { + innerListWriter.startListView(); + for (int k = 0; k < i % 13; k++) { + innerListWriter.integer().writeInt(k); + } + innerListWriter.endListView(); } + listViewWriter.endListView(); } + listViewWriter.setValueCount(COUNT); + UnionListViewReader listReader = new UnionListViewReader(listViewVector); + checkListOfListTypes(listReader); } } @@ -641,7 +963,35 @@ public void unionListListType() { listWriter.endList(); } listWriter.setValueCount(COUNT); - checkUnionList(listVector); + UnionListReader listReader = new UnionListReader(listVector); + checkUnionListType(listReader); + } + } + + @Test + public void unionListViewListType() { + try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { + listViewVector.allocateNew(); + UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector); + for (int i = 0; i < COUNT; i++) { + listViewWriter.startList(); + for (int j = 0; j < i % 7; j++) { + ListWriter innerListWriter = listViewWriter.listView(); + innerListWriter.startListView(); + for (int k = 0; k < i % 13; k++) { + if (k % 2 == 0) { + innerListWriter.integer().writeInt(k); + } else { + innerListWriter.bigInt().writeBigInt(k); + } + } + innerListWriter.endListView(); + } + listViewWriter.endListView(); + } + listViewWriter.setValueCount(COUNT); + UnionListViewReader listViewReader = new UnionListViewReader(listViewVector); + checkUnionListType(listViewReader); } } @@ -654,8 +1004,7 @@ public void unionListListType2() { try (ListVector listVector = ListVector.empty("list", allocator)) { listVector.allocateNew(); UnionListWriter listWriter = new UnionListWriter(listVector); - ListWriter innerListWriter = listWriter.list(); - + ListWriter innerListWriter = listWriter.listView(); for (int i = 0; i < COUNT; i++) { listWriter.startList(); for (int j = 0; j < i % 7; j++) { @@ -672,26 +1021,39 @@ public void unionListListType2() { listWriter.endList(); } listWriter.setValueCount(COUNT); - checkUnionList(listVector); + UnionListReader listReader = new UnionListReader(listVector); + checkUnionListType(listReader); } } - private void checkUnionList(ListVector listVector) { - UnionListReader listReader = new UnionListReader(listVector); - for (int i = 0; i < COUNT; i++) { - listReader.setPosition(i); - for (int j = 0; j < i % 7; j++) { - listReader.next(); - FieldReader innerListReader = listReader.reader(); - for (int k = 0; k < i % 13; k++) { - innerListReader.next(); - if (k % 2 == 0) { - assertEquals(k, innerListReader.reader().readInteger().intValue(), "record: " + i); - } else { - assertEquals(k, innerListReader.reader().readLong().longValue(), "record: " + i); + /** + * This test is similar to {@link #unionListViewListType()} but we get the inner list writer once + * at the beginning. + */ + @Test + public void unionListViewListType2() { + try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { + listViewVector.allocateNew(); + UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector); + ListWriter innerListWriter = listViewWriter.listView(); + for (int i = 0; i < COUNT; i++) { + listViewWriter.startListView(); + for (int j = 0; j < i % 7; j++) { + innerListWriter.startListView(); + for (int k = 0; k < i % 13; k++) { + if (k % 2 == 0) { + innerListWriter.integer().writeInt(k); + } else { + innerListWriter.bigInt().writeBigInt(k); + } } + innerListWriter.endListView(); } + listViewWriter.endListView(); } + listViewWriter.setValueCount(COUNT); + UnionListViewReader listViewReader = new UnionListViewReader(listViewVector); + checkUnionListType(listViewReader); } } @@ -700,27 +1062,11 @@ public void testListMapType() { try (ListVector listVector = ListVector.empty("list", allocator)) { listVector.allocateNew(); UnionListWriter listWriter = new UnionListWriter(listVector); - MapWriter innerMapWriter = listWriter.map(true); - for (int i = 0; i < COUNT; i++) { - listWriter.startList(); - for (int j = 0; j < i % 7; j++) { - innerMapWriter.startMap(); - for (int k = 0; k < i % 13; k++) { - innerMapWriter.startEntry(); - innerMapWriter.key().integer().writeInt(k); - if (k % 2 == 0) { - innerMapWriter.value().bigInt().writeBigInt(k); - } - innerMapWriter.endEntry(); - } - innerMapWriter.endMap(); - } - listWriter.endList(); - } + createListTypeVectorWithMapType(listWriter); listWriter.setValueCount(COUNT); - checkListMap(listVector); - + UnionListReader listReader = new UnionListReader(listVector); + checkListTypeMap(listReader); // Verify that the map vector has keysSorted = true MapVector mapVector = (MapVector) listVector.getDataVector(); ArrowType arrowType = mapVector.getField().getFieldType().getType(); @@ -728,23 +1074,20 @@ public void testListMapType() { } } - private void checkListMap(ListVector listVector) { - UnionListReader listReader = new UnionListReader(listVector); - for (int i = 0; i < COUNT; i++) { - listReader.setPosition(i); - for (int j = 0; j < i % 7; j++) { - listReader.next(); - UnionMapReader mapReader = (UnionMapReader) listReader.reader(); - for (int k = 0; k < i % 13; k++) { - mapReader.next(); - assertEquals(k, mapReader.key().readInteger().intValue(), "record key: " + i); - if (k % 2 == 0) { - assertEquals(k, mapReader.value().readLong().longValue(), "record value: " + i); - } else { - assertNull(mapReader.value().readLong(), "record value: " + i); - } - } - } + @Test + public void testListViewMapType() { + try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { + listViewVector.allocateNew(); + UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector); + + createListTypeVectorWithMapType(listViewWriter); + listViewWriter.setValueCount(COUNT); + UnionListViewReader listViewReader = new UnionListViewReader(listViewVector); + checkListTypeMap(listViewReader); + // Verify that the map vector has keysSorted = true + MapVector mapVector = (MapVector) listViewVector.getDataVector(); + ArrowType arrowType = mapVector.getField().getFieldType().getType(); + assertTrue(((ArrowType.Map) arrowType).getKeysSorted()); } } @@ -1212,6 +1555,7 @@ public void complexCopierWithList() { ComplexWriter writer = new ComplexWriterImpl("root", parent); StructWriter rootWriter = writer.rootAsStruct(); ListWriter listWriter = rootWriter.list("list"); + StructWriter innerStructWriter = listWriter.struct(); IntWriter outerIntWriter = listWriter.integer(); rootWriter.start(); @@ -1246,6 +1590,47 @@ public void complexCopierWithList() { } } + @Test + public void complexCopierWithListView() { + try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { + ComplexWriter writer = new ComplexWriterImpl("root", parent); + StructWriter rootWriter = writer.rootAsStruct(); + ListWriter listViewWriter = rootWriter.listView("listView"); + + StructWriter innerStructWriter = listViewWriter.struct(); + IntWriter outerIntWriter = listViewWriter.integer(); + rootWriter.start(); + listViewWriter.startListView(); + outerIntWriter.writeInt(1); + outerIntWriter.writeInt(2); + innerStructWriter.start(); + IntWriter intWriter = innerStructWriter.integer("a"); + intWriter.writeInt(1); + innerStructWriter.end(); + innerStructWriter.start(); + intWriter = innerStructWriter.integer("a"); + intWriter.writeInt(2); + innerStructWriter.end(); + listViewWriter.endListView(); + rootWriter.end(); + writer.setValueCount(1); + + StructVector structVector = (StructVector) parent.getChild("root"); + TransferPair tp = structVector.getTransferPair(allocator); + tp.splitAndTransfer(0, 1); + NonNullableStructVector toStructVector = (NonNullableStructVector) tp.getTo(); + JsonStringHashMap toMapValue = (JsonStringHashMap) toStructVector.getObject(0); + JsonStringArrayList object = (JsonStringArrayList) toMapValue.get("listView"); + assertEquals(1, object.get(0)); + assertEquals(2, object.get(1)); + JsonStringHashMap innerStruct = (JsonStringHashMap) object.get(2); + assertEquals(1, innerStruct.get("a")); + innerStruct = (JsonStringHashMap) object.get(3); + assertEquals(2, innerStruct.get("a")); + toStructVector.close(); + } + } + @Test public void testSingleStructWriter1() { /* initialize a SingleStructWriter with empty StructVector and then lazily @@ -1262,6 +1647,7 @@ public void testSingleStructWriter1() { Float4Writer float4Writer = singleStructWriter.float4("float4Field"); Float8Writer float8Writer = singleStructWriter.float8("float8Field"); ListWriter listWriter = singleStructWriter.list("listField"); + ListWriter listViewWriter = singleStructWriter.listView("listViewField"); MapWriter mapWriter = singleStructWriter.map("mapField", false); int intValue = 100; @@ -1285,6 +1671,14 @@ public void testSingleStructWriter1() { listWriter.integer().writeInt(intValue + i + 3); listWriter.endList(); + listViewWriter.setPosition(i); + listViewWriter.startListView(); + listViewWriter.integer().writeInt(intValue + i); + listViewWriter.integer().writeInt(intValue + i + 1); + listViewWriter.integer().writeInt(intValue + i + 2); + listViewWriter.integer().writeInt(intValue + i + 3); + listViewWriter.endListView(); + mapWriter.setPosition(i); mapWriter.startMap(); mapWriter.startEntry(); @@ -1323,6 +1717,8 @@ public void testSingleStructWriter1() { Float4Reader float4Reader = singleStructReader.reader("float4Field"); Float8Reader float8Reader = singleStructReader.reader("float8Field"); UnionListReader listReader = (UnionListReader) singleStructReader.reader("listField"); + UnionListViewReader listViewReader = + (UnionListViewReader) singleStructReader.reader("listViewField"); UnionMapReader mapReader = (UnionMapReader) singleStructReader.reader("mapField"); for (int i = 0; i < initialCapacity; i++) { @@ -1331,6 +1727,7 @@ public void testSingleStructWriter1() { float4Reader.setPosition(i); float8Reader.setPosition(i); listReader.setPosition(i); + listViewReader.setPosition(i); mapReader.setPosition(i); assertEquals(intValue + i, intReader.readInteger().intValue()); @@ -1343,6 +1740,11 @@ public void testSingleStructWriter1() { assertEquals(intValue + i + j, listReader.reader().readInteger().intValue()); } + for (int j = 0; j < 4; j++) { + listViewReader.next(); + assertEquals(intValue + i + j, listViewReader.reader().readInteger().intValue()); + } + for (int k = 0; k < 4; k += 2) { mapReader.next(); assertEquals(intValue + k + i, mapReader.key().readInteger().intValue()); @@ -1362,40 +1764,31 @@ public void testListWriterWithNulls() { UnionListWriter listWriter = listVector.getWriter(); // expected listVector : [[null], null, [2, 4], null, [null], null, [6, 12], ...] - for (int i = 0; i < COUNT; i++) { - listWriter.setPosition(i); - if (i % 2 == 0) { - listWriter.startList(); - if (i % 4 == 0) { - listWriter.integer().writeNull(); - } else { - listWriter.integer().writeInt(i); - listWriter.integer().writeInt(i * 2); - } - listWriter.endList(); - } else { - listWriter.writeNull(); - } - } + createNullsWithListWriters(listWriter); listVector.setValueCount(COUNT); UnionListReader listReader = new UnionListReader(listVector); - for (int i = 0; i < COUNT; i++) { - listReader.setPosition(i); - if (i % 2 == 0) { - assertTrue(listReader.isSet()); - listReader.next(); - if (i % 4 == 0) { - assertNull(listReader.reader().readInteger()); - } else { - assertEquals(i, listReader.reader().readInteger().intValue()); - listReader.next(); - assertEquals(i * 2, listReader.reader().readInteger().intValue()); - } - } else { - assertFalse(listReader.isSet()); - } - } + checkNullsWithListWriters(listReader); + } + } + + @Test + public void testListViewWriterWithNulls() { + try (ListViewVector listViewVector = ListViewVector.empty("listView", allocator)) { + listViewVector.setInitialCapacity(COUNT); + listViewVector.allocateNew(); + listViewVector + .getValidityBuffer() + .setOne(0, (int) listViewVector.getValidityBuffer().capacity()); + + UnionListViewWriter listWriter = listViewVector.getWriter(); + + // expected listVector : [[null], null, [2, 4], null, [null], null, [6, 12], ...] + createNullsWithListWriters(listWriter); + listViewVector.setValueCount(COUNT); + + UnionListViewReader listReader = new UnionListViewReader(listViewVector); + checkNullsWithListWriters(listReader); } } @@ -1452,6 +1845,61 @@ public void testListOfListWriterWithNulls() { } } + @Test + public void testListViewOfListViewWriterWithNulls() { + try (ListViewVector listViewVector = ListViewVector.empty("listViewoflistView", allocator)) { + listViewVector.setInitialCapacity(COUNT); + listViewVector.allocateNew(); + listViewVector + .getValidityBuffer() + .setOne(0, (int) listViewVector.getValidityBuffer().capacity()); + + UnionListViewWriter listViewWriter = listViewVector.getWriter(); + + // create list : [ [null], null, [[null, 2, 4]], null, [null], null, [[null, 6, 12]], ... ] + for (int i = 0; i < COUNT; i++) { + listViewWriter.setPosition(i); + if (i % 2 == 0) { + listViewWriter.startListView(); + if (i % 4 == 0) { + listViewWriter.listView().writeNull(); + } else { + listViewWriter.listView().startListView(); + listViewWriter.listView().integer().writeNull(); + listViewWriter.listView().integer().writeInt(i); + listViewWriter.listView().integer().writeInt(i * 2); + listViewWriter.listView().endListView(); + } + listViewWriter.endListView(); + } else { + listViewWriter.writeNull(); + } + } + listViewVector.setValueCount(COUNT); + + UnionListViewReader listViewReader = new UnionListViewReader(listViewVector); + for (int i = 0; i < COUNT; i++) { + listViewReader.setPosition(i); + if (i % 2 == 0) { + assertTrue(listViewReader.isSet()); + listViewReader.next(); + if (i % 4 == 0) { + assertFalse(listViewReader.reader().isSet()); + } else { + listViewReader.reader().next(); + assertFalse(listViewReader.reader().reader().isSet()); + listViewReader.reader().next(); + assertEquals(i, listViewReader.reader().reader().readInteger().intValue()); + listViewReader.reader().next(); + assertEquals(i * 2, listViewReader.reader().reader().readInteger().intValue()); + } + } else { + assertFalse(listViewReader.isSet()); + } + } + } + } + @Test public void testListOfListOfListWriterWithNulls() { try (ListVector listVector = ListVector.empty("listoflistoflist", allocator)) { @@ -1515,6 +1963,72 @@ public void testListOfListOfListWriterWithNulls() { } } + @Test + public void testListViewOfListViewOfListViewWriterWithNulls() { + try (ListViewVector listViewVector = + ListViewVector.empty("listViewoflistViewoflistView", allocator)) { + listViewVector.setInitialCapacity(COUNT); + listViewVector.allocateNew(); + listViewVector + .getValidityBuffer() + .setOne(0, (int) listViewVector.getValidityBuffer().capacity()); + + UnionListViewWriter listViewWriter = listViewVector.getWriter(); + + // create list : [ null, [null], [[null]], [[[null, 1, 2]]], null, [null], ... + for (int i = 0; i < COUNT; i++) { + listViewWriter.setPosition(i); + if (i % 4 == 0) { + listViewWriter.writeNull(); + } else { + listViewWriter.startListView(); + if (i % 4 == 1) { + listViewWriter.listView().writeNull(); + } else if (i % 4 == 2) { + listViewWriter.listView().startListView(); + listViewWriter.listView().listView().writeNull(); + listViewWriter.listView().endListView(); + } else { + listViewWriter.listView().startListView(); + listViewWriter.listView().listView().startListView(); + listViewWriter.listView().listView().integer().writeNull(); + listViewWriter.listView().listView().integer().writeInt(i); + listViewWriter.listView().listView().integer().writeInt(i * 2); + listViewWriter.listView().listView().endListView(); + listViewWriter.listView().endListView(); + } + listViewWriter.endListView(); + } + } + listViewVector.setValueCount(COUNT); + + UnionListViewReader listViewReader = new UnionListViewReader(listViewVector); + for (int i = 0; i < COUNT; i++) { + listViewReader.setPosition(i); + if (i % 4 == 0) { + assertFalse(listViewReader.isSet()); + } else { + assertTrue(listViewReader.isSet()); + listViewReader.next(); + if (i % 4 == 1) { + assertFalse(listViewReader.reader().isSet()); + } else if (i % 4 == 2) { + listViewReader.reader().next(); + assertFalse(listViewReader.reader().reader().isSet()); + } else { + listViewReader.reader().next(); + listViewReader.reader().reader().next(); + assertFalse(listViewReader.reader().reader().reader().isSet()); + listViewReader.reader().reader().next(); + assertEquals(i, listViewReader.reader().reader().reader().readInteger().intValue()); + listViewReader.reader().reader().next(); + assertEquals(i * 2, listViewReader.reader().reader().reader().readInteger().intValue()); + } + } + } + } + } + @Test public void testStructOfList() { try (StructVector structVector = StructVector.empty("struct1", allocator)) { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java index c18f6faeb548f..281f050dfb662 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java @@ -579,7 +579,7 @@ public void validateUnionData(int count, VectorSchemaRoot root) { FieldReader unionReader = root.getVector("union").getReader(); for (int i = 0; i < count; i++) { unionReader.setPosition(i); - switch (i % 4) { + switch (i % 5) { case 0: assertEquals(i, unionReader.readInteger().intValue()); break; @@ -590,6 +590,9 @@ public void validateUnionData(int count, VectorSchemaRoot root) { assertEquals(i % 3, unionReader.size()); break; case 3: + assertEquals(3, unionReader.size()); + break; + case 4: NullableTimeStampMilliHolder h = new NullableTimeStampMilliHolder(); unionReader.reader("timestamp").read(h); assertEquals(i, h.value); @@ -612,9 +615,10 @@ public void writeUnionData(int count, StructVector parent) { IntWriter intWriter = rootWriter.integer("union"); BigIntWriter bigIntWriter = rootWriter.bigInt("union"); ListWriter listWriter = rootWriter.list("union"); + ListWriter listViewWriter = rootWriter.listView("union"); StructWriter structWriter = rootWriter.struct("union"); for (int i = 0; i < count; i++) { - switch (i % 4) { + switch (i % 5) { case 0: intWriter.setPosition(i); intWriter.writeInt(i); @@ -632,6 +636,14 @@ public void writeUnionData(int count, StructVector parent) { listWriter.endList(); break; case 3: + listViewWriter.setPosition(i); + listViewWriter.startListView(); + for (int j = 0; j < i % 5; j++) { + listViewWriter.varChar().writeVarChar(0, 3, varchar); + } + listViewWriter.endListView(); + break; + case 4: structWriter.setPosition(i); structWriter.start(); structWriter.timeStampMilli("timestamp").writeTimeStampMilli(i); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java b/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java index 9b2c80ef181d1..69e16dc470351 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java @@ -61,9 +61,11 @@ import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.VariableWidthFieldVector; import org.apache.arrow.vector.complex.BaseRepeatedValueVector; +import org.apache.arrow.vector.complex.BaseRepeatedValueViewVector; import org.apache.arrow.vector.complex.FixedSizeListVector; import org.apache.arrow.vector.complex.LargeListVector; import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.ListViewVector; import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.holders.IntervalDayHolder; import org.apache.arrow.vector.types.Types; @@ -728,4 +730,34 @@ public static void setVector(StructVector vector, Map> val } vector.setValueCount(valueCount); } + + /** Populate values for {@link ListViewVector}. */ + public static void setVector(ListViewVector vector, List... values) { + vector.allocateNewSafe(); + Types.MinorType type = Types.MinorType.INT; + vector.addOrGetVector(FieldType.nullable(type.getType())); + + IntVector dataVector = (IntVector) vector.getDataVector(); + dataVector.allocateNew(); + + // set underlying vectors + int curPos = 0; + for (int i = 0; i < values.length; i++) { + vector.getOffsetBuffer().setInt((long) i * BaseRepeatedValueViewVector.OFFSET_WIDTH, curPos); + if (values[i] == null) { + BitVectorHelper.unsetBit(vector.getValidityBuffer(), i); + } else { + BitVectorHelper.setBit(vector.getValidityBuffer(), i); + for (int value : values[i]) { + dataVector.setSafe(curPos, value); + curPos += 1; + } + } + vector + .getSizeBuffer() + .setInt((long) i * BaseRepeatedValueViewVector.SIZE_WIDTH, values[i].size()); + } + dataVector.setValueCount(curPos); + vector.setValueCount(values.length); + } }