Skip to content

Commit

Permalink
GH-33032: [C#] Support fixed-size lists (#35716)
Browse files Browse the repository at this point in the history
### What changes are included in this PR?

Support fixed-size lists in the C# implementation.
Adds Archery support for Lists, Structs and Fixed-size Lists in the C# implementation.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Fixed-size lists are now supported for C#.

* Closes: #33032
* Closes: #32886

Authored-by: Curt Hagenlocher <[email protected]>
Signed-off-by: David Li <[email protected]>
  • Loading branch information
CurtHagenlocher authored Aug 21, 2023
1 parent 3262deb commit 37a9f27
Show file tree
Hide file tree
Showing 30 changed files with 667 additions and 31 deletions.
10 changes: 10 additions & 0 deletions csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ private class ArrayDataConcatenationVisitor :
IArrowTypeVisitor<BinaryType>,
IArrowTypeVisitor<StringType>,
IArrowTypeVisitor<ListType>,
IArrowTypeVisitor<FixedSizeListType>,
IArrowTypeVisitor<StructType>
{
public ArrayData Result { get; private set; }
Expand Down Expand Up @@ -100,6 +101,15 @@ public void Visit(ListType type)
Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, offsetBuffer }, new[] { child });
}

public void Visit(FixedSizeListType type)
{
CheckData(type, 1);
ArrowBuffer validityBuffer = ConcatenateValidityBuffer();
ArrayData child = Concatenate(SelectChildren(0), _allocator);

Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer }, new[] { child });
}

public void Visit(StructType type)
{
CheckData(type, 1);
Expand Down
11 changes: 11 additions & 0 deletions csharp/src/Apache.Arrow/Arrays/ArrayDataTypeComparer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ internal sealed class ArrayDataTypeComparer :
IArrowTypeVisitor<Time64Type>,
IArrowTypeVisitor<FixedSizeBinaryType>,
IArrowTypeVisitor<ListType>,
IArrowTypeVisitor<FixedSizeListType>,
IArrowTypeVisitor<StructType>
{
private readonly IArrowType _expectedType;
Expand Down Expand Up @@ -102,6 +103,16 @@ public void Visit(ListType actualType)
}
}

public void Visit(FixedSizeListType actualType)
{
if (_expectedType is FixedSizeListType expectedType
&& actualType.ListSize == expectedType.ListSize
&& CompareNested(expectedType, actualType))
{
_dataTypeMatch = true;
}
}

public void Visit(StructType actualType)
{
if (_expectedType is StructType expectedType
Expand Down
2 changes: 2 additions & 0 deletions csharp/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ internal static IArrowArrayBuilder<IArrowArray, IArrowArrayBuilder<IArrowArray>>
return new Time64Array.Builder(dataType as Time64Type);
case ArrowTypeId.List:
return new ListArray.Builder(dataType as ListType);
case ArrowTypeId.FixedSizeList:
return new FixedSizeListArray.Builder(dataType as FixedSizeListType);
case ArrowTypeId.Decimal128:
return new Decimal128Array.Builder(dataType as Decimal128Type);
case ArrowTypeId.Decimal256:
Expand Down
2 changes: 2 additions & 0 deletions csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ public static IArrowArray BuildArray(ArrayData data)
#else
throw new NotSupportedException("Half-float arrays are not supported by this target framework.");
#endif
case ArrowTypeId.FixedSizeList:
return new FixedSizeListArray(data);
case ArrowTypeId.Interval:
case ArrowTypeId.Map:
default:
Expand Down
2 changes: 1 addition & 1 deletion csharp/src/Apache.Arrow/Arrays/DateArrayBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ public TBuilder Append(ReadOnlySpan<DateOnly> span)
/// Append a null date to the array.
/// </summary>
/// <returns>Returns the builder (for fluent-style composition).</returns>
public TBuilder AppendNull()
public override TBuilder AppendNull()
{
InnerBuilder.AppendNull();
return this as TBuilder;
Expand Down
6 changes: 6 additions & 0 deletions csharp/src/Apache.Arrow/Arrays/DelegatingArrayBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -98,5 +98,11 @@ public TBuilder Clear()
InnerBuilder.Clear();
return this as TBuilder;
}

/// <summary>
/// Appends a null value
/// </summary>
/// <returns>Returns the builder (for fluent-style composition).</returns>
public abstract TBuilder AppendNull();
}
}
190 changes: 190 additions & 0 deletions csharp/src/Apache.Arrow/Arrays/FixedSizeListArray.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
// Licensed to the Apache Software Foundation (ASF) under one or more
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using System;
using Apache.Arrow.Memory;
using Apache.Arrow.Types;

namespace Apache.Arrow
{
public class FixedSizeListArray : Array
{
public class Builder : IArrowArrayBuilder<FixedSizeListArray, Builder>
{
public IArrowArrayBuilder<IArrowArray, IArrowArrayBuilder<IArrowArray>> ValueBuilder { get; }

public int Length { get; protected set; }

private ArrowBuffer.BitmapBuilder ValidityBufferBuilder { get; }

public int NullCount => ValidityBufferBuilder.UnsetBitCount;

private FixedSizeListType DataType { get; }

private int ExpectedValueLength => Length * DataType.ListSize;

public Builder(IArrowType valueDataType, int listSize) : this(new FixedSizeListType(valueDataType, listSize))
{
}

public Builder(Field valueField, int listSize) : this(new FixedSizeListType(valueField, listSize))
{
}

internal Builder(FixedSizeListType dataType)
{
ValueBuilder = ArrowArrayBuilderFactory.Build(dataType.ValueDataType);
ValidityBufferBuilder = new ArrowBuffer.BitmapBuilder();
DataType = dataType;
}

/// <summary>
/// Start a new fixed-length list slot
///
/// This function should be called before beginning to append elements to the
/// value builder
/// </summary>
/// <returns></returns>
public Builder Append()
{
ValidateChildLength();

ValidityBufferBuilder.Append(true);
Length++;

return this;
}

public Builder AppendNull()
{
ValidateChildLength();

ValidityBufferBuilder.Append(false);
for (int i = 0; i < DataType.ListSize; i++)
{
ValueBuilder.AppendNull();
}
Length++;

return this;
}

public FixedSizeListArray Build(MemoryAllocator allocator = default)
{
ValidateChildLength();

int nullCount = NullCount;
ArrowBuffer validityBuffer = nullCount > 0
? ValidityBufferBuilder.Build(allocator)
: ArrowBuffer.Empty;

return new FixedSizeListArray(DataType, Length,
ValueBuilder.Build(allocator),
validityBuffer, nullCount, 0);
}

public Builder Reserve(int capacity)
{
ValidityBufferBuilder.Reserve(capacity);
ValueBuilder.Reserve(DataType.ListSize * capacity);
return this;
}

public Builder Resize(int length)
{
ValidateChildLength();

ValidityBufferBuilder.Resize(length);
ValueBuilder.Resize(DataType.ListSize * length);
Length = length;
return this;
}

public Builder Clear()
{
ValueBuilder.Clear();
ValidityBufferBuilder.Clear();
Length = 0;
return this;
}

void ValidateChildLength()
{
if (ValueBuilder.Length != ExpectedValueLength)
{
int actualLength = ValueBuilder.Length - ExpectedValueLength + DataType.ListSize;
throw new ArgumentOutOfRangeException($"Lists of length: {actualLength} do not conform to the fixed size: " + DataType.ListSize);
}
}
}

public IArrowArray Values { get; }

public FixedSizeListArray(IArrowType dataType, int length,
IArrowArray values, ArrowBuffer nullBitmapBuffer,
int nullCount = 0, int offset = 0)
: this(new ArrayData(dataType, length, nullCount, offset,
new[] { nullBitmapBuffer }, new[] { values.Data }),
values)
{
}

public FixedSizeListArray(ArrayData data)
: this(data, ArrowArrayFactory.BuildArray(data.Children[0]))
{
}

private FixedSizeListArray(ArrayData data, IArrowArray values) : base(data)
{
data.EnsureBufferCount(1);
data.EnsureDataType(ArrowTypeId.FixedSizeList);
Values = values;
}

public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor);

public IArrowArray GetSlicedValues(int index)
{
if (index < 0 || index >= Length)
{
throw new ArgumentOutOfRangeException(nameof(index));
}

if (IsNull(index))
{
return null;
}

if (!(Values is Array array))
{
return default;
}

index += Data.Offset;

int length = ((FixedSizeListType)Data.DataType).ListSize;
return array.Slice(index * length, length);
}

protected override void Dispose(bool disposing)
{
if (disposing)
{
Values?.Dispose();
}
base.Dispose(disposing);
}
}
}
2 changes: 1 addition & 1 deletion csharp/src/Apache.Arrow/Arrays/TimeArrayBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ public TBuilder Append(ReadOnlySpan<TUnderlying> values)
/// Append a null time to the array.
/// </summary>
/// <returns>Returns the builder (for fluent-style composition).</returns>
public TBuilder AppendNull()
public override TBuilder AppendNull()
{
InnerBuilder.AppendNull();
return this as TBuilder;
Expand Down
23 changes: 20 additions & 3 deletions csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,10 @@ private ArrayData GetAsArrayData(CArrowArray* cArray, IArrowType type)
children = ProcessListChildren(cArray, ((ListType)type).ValueDataType);
buffers = ImportListBuffers(cArray);
break;
case ArrowTypeId.FixedSizeList:
children = ProcessListChildren(cArray, ((FixedSizeListType)type).ValueDataType);
buffers = ImportFixedSizeListBuffers(cArray);
break;
case ArrowTypeId.Struct:
children = ProcessStructChildren(cArray, ((StructType)type).Fields);
buffers = new ArrowBuffer[] { ImportValidityBuffer(cArray) };
Expand Down Expand Up @@ -236,7 +240,7 @@ private ArrowBuffer[] ImportByteArrayBuffers(CArrowArray* cArray)
{
if (cArray->n_buffers != 3)
{
throw new InvalidOperationException("Byte arrays are expected to have exactly three child arrays");
throw new InvalidOperationException("Byte arrays are expected to have exactly three buffers");
}

int length = checked((int)cArray->length);
Expand All @@ -256,7 +260,7 @@ private ArrowBuffer[] ImportListBuffers(CArrowArray* cArray)
{
if (cArray->n_buffers != 2)
{
throw new InvalidOperationException("List arrays are expected to have exactly two children");
throw new InvalidOperationException("List arrays are expected to have exactly two buffers");
}

int length = checked((int)cArray->length);
Expand All @@ -269,11 +273,24 @@ private ArrowBuffer[] ImportListBuffers(CArrowArray* cArray)
return buffers;
}

private ArrowBuffer[] ImportFixedSizeListBuffers(CArrowArray* cArray)
{
if (cArray->n_buffers != 1)
{
throw new InvalidOperationException("Fixed-size list arrays are expected to have exactly one buffer");
}

ArrowBuffer[] buffers = new ArrowBuffer[1];
buffers[0] = ImportValidityBuffer(cArray);

return buffers;
}

private ArrowBuffer[] ImportFixedWidthBuffers(CArrowArray* cArray, int bitWidth)
{
if (cArray->n_buffers != 2)
{
throw new InvalidOperationException("Arrays of fixed-width type are expected to have exactly two children");
throw new InvalidOperationException("Arrays of fixed-width type are expected to have exactly two buffers");
}

// validity, data
Expand Down
2 changes: 2 additions & 0 deletions csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,8 @@ private static string GetFormat(IArrowType datatype)
return String.Format("ts{0}:{1}", FormatTimeUnit(timestampType.Unit), timestampType.Timezone);
// Nested
case ListType _: return "+l";
case FixedSizeListType fixedListType:
return $"+w:{fixedListType.ListSize}";
case StructType _: return "+s";
// Dictionary
case DictionaryType dictionaryType:
Expand Down
21 changes: 21 additions & 0 deletions csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,27 @@ public ArrowType GetAsType()

return new StructType(childFields);
}
else if (format.StartsWith("+w:"))
{
// Fixed-width list
int width = Int32.Parse(format.Substring(3));

if (_cSchema->n_children != 1)
{
throw new InvalidDataException("Expected fixed-length list type to have exactly one child.");
}
ImportedArrowSchema childSchema;
if (_cSchema->GetChild(0) == null)
{
throw new InvalidDataException("Expected fixed-length list type child to be non-null.");
}
childSchema = new ImportedArrowSchema(_cSchema->GetChild(0), isRoot: false);

Field childField = childSchema.GetAsField();

return new FixedSizeListType(childField, width);
}

// TODO: Map type and large list type

// Decimals
Expand Down
Loading

0 comments on commit 37a9f27

Please sign in to comment.