Skip to content

Commit

Permalink
Builders mostly working; tests mostly passing
Browse files Browse the repository at this point in the history
  • Loading branch information
CurtHagenlocher committed Dec 21, 2023
1 parent 737909e commit 8d23502
Show file tree
Hide file tree
Showing 12 changed files with 374 additions and 118 deletions.
89 changes: 27 additions & 62 deletions csharp/src/Apache.Arrow/Arrays/BinaryViewArray.cs
Original file line number Diff line number Diff line change
Expand Up @@ -56,37 +56,25 @@ public abstract class BuilderBase<TArray, TBuilder> : IArrowArrayBuilder<byte, T
{
protected IArrowType DataType { get; }
protected TBuilder Instance => this as TBuilder;
protected ArrowBuffer.Builder<int> ValueOffsets { get; }
protected ArrowBuffer.Builder<BinaryView> BinaryViews { get; }
protected ArrowBuffer.Builder<byte> ValueBuffer { get; }
protected ArrowBuffer.BitmapBuilder ValidityBuffer { get; }
protected int Offset { get; set; }
protected int NullCount => this.ValidityBuffer.UnsetBitCount;

protected BuilderBase(IArrowType dataType)
{
DataType = dataType;
ValueOffsets = new ArrowBuffer.Builder<int>();
BinaryViews = new ArrowBuffer.Builder<BinaryView>();
ValueBuffer = new ArrowBuffer.Builder<byte>();
ValidityBuffer = new ArrowBuffer.BitmapBuilder();

// From the docs:
//
// The offsets buffer contains length + 1 signed integers (either 32-bit or 64-bit, depending on the
// logical type), which encode the start position of each slot in the data buffer. The length of the
// value in each slot is computed using the difference between the offset at that slot’s index and the
// subsequent offset.
//
// In this builder, we choose to append the first offset (zero) upon construction, and each trailing
// offset is then added after each individual item has been appended.
ValueOffsets.Append(this.Offset);
}

protected abstract TArray Build(ArrayData data);

/// <summary>
/// Gets the length of the array built so far.
/// </summary>
public int Length => ValueOffsets.Length - 1;
public int Length => BinaryViews.Length - 1;

/// <summary>
/// Build an Arrow array from the appended contents so far.
Expand All @@ -98,7 +86,7 @@ public TArray Build(MemoryAllocator allocator = default)
var bufs = new[]
{
NullCount > 0 ? ValidityBuffer.Build(allocator) : ArrowBuffer.Empty,
ValueOffsets.Build(allocator),
BinaryViews.Build(allocator),
ValueBuffer.Build(allocator),
};
var data = new ArrayData(
Expand All @@ -120,7 +108,7 @@ public TBuilder AppendNull()
// Do not add to the value buffer in the case of a null.
// Note that we do not need to increment the offset as a result.
ValidityBuffer.Append(false);
ValueOffsets.Append(Offset);
BinaryViews.Append(default(BinaryView));
return Instance;
}

Expand All @@ -131,10 +119,9 @@ public TBuilder AppendNull()
/// <returns>Returns the builder (for fluent-style composition).</returns>
public TBuilder Append(byte value)
{
ValueBuffer.Append(value);
ValidityBuffer.Append(true);
Offset++;
ValueOffsets.Append(Offset);
Span<byte> buf = stackalloc[] { value };
BinaryViews.Append(new BinaryView(buf));
return Instance;
}

Expand All @@ -149,46 +136,25 @@ public TBuilder Append(byte value)
/// <returns>Returns the builder (for fluent-style composition).</returns>
public TBuilder Append(ReadOnlySpan<byte> span)
{
ValueBuffer.Append(span);
ValidityBuffer.Append(true);
Offset += span.Length;
ValueOffsets.Append(Offset);
return Instance;
}

/// <summary>
/// Append a value, consisting of an enumerable collection of bytes, to the array.
/// </summary>
/// <remarks>
/// Note that this method appends a single value, which may consist of arbitrarily many bytes. If multiple
/// values are to be added, use the <see cref="AppendRange(IEnumerable{byte})"/> method instead.
/// </remarks>
/// <param name="value">Enumerable collection of bytes to add.</param>
/// <returns>Returns the builder (for fluent-style composition).</returns>
public TBuilder Append(IEnumerable<byte> value)
{
if (value == null)
if (span.Length > BinaryView.MaxInlineLength)
{
return AppendNull();
int offset = ValueBuffer.Length;
ValueBuffer.Append(span);
BinaryViews.Append(new BinaryView(span.Length, span.Slice(0, 4), 0, offset));
}
else
{
BinaryViews.Append(new BinaryView(span));
}

// Note: by looking at the length of the value buffer before and after, we avoid having to iterate
// through the enumerable multiple times to get both length and contents.
int priorLength = ValueBuffer.Length;
ValueBuffer.AppendRange(value);
int valueLength = ValueBuffer.Length - priorLength;
Offset += valueLength;
ValidityBuffer.Append(true);
ValueOffsets.Append(Offset);
return Instance;
}

/// <summary>
/// Append an enumerable collection of single-byte values to the array.
/// </summary>
/// <remarks>
/// Note that this method appends multiple values, each of which is a single byte. If a single value is
/// to be added, use the <see cref="Append(IEnumerable{byte})"/> method instead.
/// Note that this method appends multiple values, each of which is a single byte
/// </remarks>
/// <param name="values">Single-byte values to add.</param>
/// <returns>Returns the builder (for fluent-style composition).</returns>
Expand Down Expand Up @@ -237,7 +203,7 @@ public TBuilder AppendRange(IEnumerable<byte[]> values)
public TBuilder Reserve(int capacity)
{
// TODO: [ARROW-9366] Reserve capacity in the value buffer in a more sensible way.
ValueOffsets.Reserve(capacity + 1);
BinaryViews.Reserve(capacity);
ValueBuffer.Reserve(capacity);
ValidityBuffer.Reserve(capacity);
return Instance;
Expand All @@ -246,16 +212,19 @@ public TBuilder Reserve(int capacity)
public TBuilder Resize(int length)
{
// TODO: [ARROW-9366] Resize the value buffer to a safe length based on offsets, not `length`.
ValueOffsets.Resize(length + 1);
BinaryViews.Resize(length);
ValueBuffer.Resize(length);
ValidityBuffer.Resize(length);
return Instance;
}

public TBuilder Swap(int i, int j)
{
// TODO: Implement
throw new NotImplementedException();
ValidityBuffer.Swap(i, j);
BinaryView view = BinaryViews.Span[i];
BinaryViews.Span[i] = BinaryViews.Span[j];
BinaryViews.Span[j] = view;
return Instance;
}

public TBuilder Set(int index, byte value)
Expand All @@ -270,31 +239,27 @@ public TBuilder Set(int index, byte value)
/// <returns>Returns the builder (for fluent-style composition).</returns>
public TBuilder Clear()
{
ValueOffsets.Clear();
BinaryViews.Clear();
ValueBuffer.Clear();
ValidityBuffer.Clear();

// Always write the first offset before anything has been written.
Offset = 0;
ValueOffsets.Append(Offset);
return Instance;
}
}

public BinaryViewArray(IArrowType dataType, int length,
ArrowBuffer valueOffsetsBuffer,
ArrowBuffer binaryViewsBuffer,
ArrowBuffer dataBuffer,
ArrowBuffer nullBitmapBuffer,
int nullCount = 0, int offset = 0)
: this(new ArrayData(dataType, length, nullCount, offset,
new[] { nullBitmapBuffer, valueOffsetsBuffer, dataBuffer }))
new[] { nullBitmapBuffer, binaryViewsBuffer, dataBuffer }))
{ }

public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor);

public ArrowBuffer ViewsBuffer => Data.Buffers[1];

public int BufferCount => Data.Buffers.Length - 2;
public int DataBufferCount => Data.Buffers.Length - 2;

public ArrowBuffer DataBuffer(int index) => Data.Buffers[index + 2];

Expand Down
54 changes: 42 additions & 12 deletions csharp/src/Apache.Arrow/Arrays/ListViewArray.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,16 @@ public class Builder : IArrowArrayBuilder<ListViewArray, Builder>

private ArrowBuffer.Builder<int> ValueOffsetsBufferBuilder { get; }

private ArrowBuffer.Builder<int> SizesBufferBuilder { get; }

private ArrowBuffer.BitmapBuilder ValidityBufferBuilder { get; }

public int NullCount { get; protected set; }

private IArrowType DataType { get; }

private int Start { get; set; }

public Builder(IArrowType valueDataType) : this(new ListViewType(valueDataType))
{
}
Expand All @@ -47,64 +51,86 @@ internal Builder(ListViewType dataType)
{
ValueBuilder = ArrowArrayBuilderFactory.Build(dataType.ValueDataType);
ValueOffsetsBufferBuilder = new ArrowBuffer.Builder<int>();
SizesBufferBuilder = new ArrowBuffer.Builder<int>();
ValidityBufferBuilder = new ArrowBuffer.BitmapBuilder();
DataType = dataType;
Start = -1;
}

/// <summary>
/// Start a new variable-length list slot
///
/// This function should be called before beginning to append elements to the
/// value builder
/// value builder. TODO: Consider adding builder APIs to support construction
/// of overlapping lists.
/// </summary>
/// <returns></returns>
public Builder Append()
{
ValueOffsetsBufferBuilder.Append(ValueBuilder.Length);
AppendPrevious();
ValidityBufferBuilder.Append(true);

return this;
}

public Builder AppendNull()
{
ValueOffsetsBufferBuilder.Append(ValueBuilder.Length);
AppendPrevious();
ValueOffsetsBufferBuilder.Append(Start);
SizesBufferBuilder.Append(0);
ValidityBufferBuilder.Append(false);
NullCount++;

return this;
}

private void AppendPrevious()
{
if (Start < 0)
{
Start = 0;
}
else
{
ValueOffsetsBufferBuilder.Append(Start);
SizesBufferBuilder.Append(ValueOffsetsBufferBuilder.Length - Start);
Start = ValueOffsetsBufferBuilder.Length;
}
}

public ListViewArray Build(MemoryAllocator allocator = default)
{
ValueOffsetsBufferBuilder.Append(ValueBuilder.Length);
AppendPrevious();

ArrowBuffer validityBuffer = NullCount > 0
? ValidityBufferBuilder.Build(allocator)
: ArrowBuffer.Empty;

return new ListViewArray(DataType, Length - 1,
ValueOffsetsBufferBuilder.Build(allocator), ValueBuilder.Build(allocator),
return new ListViewArray(DataType, Length,
ValueOffsetsBufferBuilder.Build(allocator), SizesBufferBuilder.Build(allocator),
ValueBuilder.Build(allocator),
validityBuffer, NullCount, 0);
}

public Builder Reserve(int capacity)
{
ValueOffsetsBufferBuilder.Reserve(capacity + 1);
ValueOffsetsBufferBuilder.Reserve(capacity);
SizesBufferBuilder.Reserve(capacity);
ValidityBufferBuilder.Reserve(capacity);
return this;
}

public Builder Resize(int length)
{
ValueOffsetsBufferBuilder.Resize(length + 1);
ValueOffsetsBufferBuilder.Resize(length);
SizesBufferBuilder.Resize(length);
ValidityBufferBuilder.Resize(length);
return this;
}

public Builder Clear()
{
ValueOffsetsBufferBuilder.Clear();
SizesBufferBuilder.Clear();
ValueBuilder.Clear();
ValidityBufferBuilder.Clear();
return this;
Expand All @@ -118,11 +144,15 @@ public Builder Clear()

public ReadOnlySpan<int> ValueOffsets => ValueOffsetsBuffer.Span.CastTo<int>().Slice(Offset, Length + 1);

public ArrowBuffer SizesBuffer => Data.Buffers[2];

public ReadOnlySpan<int> Sizes => SizesBuffer.Span.CastTo<int>().Slice(Offset, Length + 1);

public ListViewArray(IArrowType dataType, int length,
ArrowBuffer valueOffsetsBuffer, IArrowArray values,
ArrowBuffer valueOffsetsBuffer, ArrowBuffer sizesBuffer, IArrowArray values,
ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0)
: this(new ArrayData(dataType, length, nullCount, offset,
new[] { nullBitmapBuffer, valueOffsetsBuffer }, new[] { values.Data }),
new[] { nullBitmapBuffer, valueOffsetsBuffer, sizesBuffer }, new[] { values.Data }),
values)
{
}
Expand All @@ -135,7 +165,7 @@ public ListViewArray(ArrayData data)
private ListViewArray(ArrayData data, IArrowArray values) : base(data)
{
data.EnsureBufferCount(3);
data.EnsureDataType(ArrowTypeId.List);
data.EnsureDataType(ArrowTypeId.ListView);
Values = values;
}

Expand Down
22 changes: 21 additions & 1 deletion csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@


using System;
using System.Buffers;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using Apache.Arrow.Memory;
using Apache.Arrow.Types;

namespace Apache.Arrow.C
{
Expand Down Expand Up @@ -121,7 +123,16 @@ private unsafe static void ConvertArray(ExportedAllocationOwner sharedOwner, Arr
cArray->buffers = null;
if (cArray->n_buffers > 0)
{
cArray->buffers = (byte**)sharedOwner.Allocate(array.Buffers.Length * IntPtr.Size);
int* lengths = null;
int bufferCount = array.Buffers.Length;
if (array.DataType.TypeId == ArrowTypeId.BinaryView || array.DataType.TypeId == ArrowTypeId.StringView)
{
lengths = (int*)sharedOwner.Allocate(4 * bufferCount); // overallocation to avoid edge case
bufferCount++;
cArray->n_buffers++;
}

cArray->buffers = (byte**)sharedOwner.Allocate(bufferCount * IntPtr.Size);
for (int i = 0; i < array.Buffers.Length; i++)
{
ArrowBuffer buffer = array.Buffers[i];
Expand All @@ -131,6 +142,15 @@ private unsafe static void ConvertArray(ExportedAllocationOwner sharedOwner, Arr
throw new NotSupportedException($"An ArrowArray of type {array.DataType.TypeId} could not be exported: failed on buffer #{i}");
}
cArray->buffers[i] = (byte*)ptr;
if (lengths != null && i >= 2)
{
lengths[i - 2] = array.Buffers[i].Length;
}
}

if (lengths != null)
{
cArray->buffers[array.Buffers.Length] = (byte*)lengths;
}
}

Expand Down
Loading

0 comments on commit 8d23502

Please sign in to comment.