Skip to content

Commit

Permalink
Fixes and additional tests
Browse files Browse the repository at this point in the history
  • Loading branch information
CurtHagenlocher committed Dec 22, 2023
1 parent 8b82c0d commit 124aee8
Show file tree
Hide file tree
Showing 6 changed files with 255 additions and 49 deletions.
27 changes: 21 additions & 6 deletions csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,25 @@ public void Visit(ListViewType type)
{
CheckData(type, 3);
ArrowBuffer validityBuffer = ConcatenateValidityBuffer();
ArrowBuffer offsetBuffer = ConcatenateOffsetBuffer();

var offsetsBuilder = new ArrowBuffer.Builder<int>(_totalLength);
int baseOffset = 0;

foreach (ArrayData arrayData in _arrayDataList)
{
if (arrayData.Length > 0)
{
ReadOnlySpan<int> span = arrayData.Buffers[1].Span.CastTo<int>().Slice(0, arrayData.Length);
foreach (int offset in span)
{
offsetsBuilder.Append(baseOffset + offset);
}
}

baseOffset += arrayData.Children[0].Length;
}

ArrowBuffer offsetBuffer = offsetsBuilder.Build(_allocator);
ArrowBuffer sizesBuffer = ConcatenateFixedWidthTypeValueBuffer(2, Int32Type.Default);
ArrayData child = Concatenate(SelectChildren(0), _allocator);

Expand Down Expand Up @@ -207,7 +225,7 @@ private void ConcatenateBinaryViewArrayData(IArrowType type)
ArrowBuffer[] buffers = new ArrowBuffer[2 + variadicBufferCount];
buffers[0] = validityBuffer;
buffers[1] = viewBuffer;
int index = 0;
int index = 2;
foreach (ArrayData arrayData in _arrayDataList)
{
for (int i = 2; i < arrayData.Buffers.Length; i++)
Expand Down Expand Up @@ -324,10 +342,7 @@ private ArrowBuffer ConcatenateViewBuffer(out int variadicBufferCount)
continue;
}

// The first offset is always 0.
// It should be skipped because it duplicate to the last offset of builder.
ReadOnlySpan<BinaryView> span = arrayData.Buffers[1].Span.CastTo<BinaryView>().Slice(1, arrayData.Length);

ReadOnlySpan<BinaryView> span = arrayData.Buffers[1].Span.CastTo<BinaryView>().Slice(0, arrayData.Length);
foreach (BinaryView view in span)
{
if (view.Length > BinaryView.MaxInlineLength)
Expand Down
14 changes: 7 additions & 7 deletions csharp/src/Apache.Arrow/Arrays/BinaryViewArray.cs
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,12 @@ protected BuilderBase(IArrowType dataType)
/// <returns>Returns an array of type <typeparamref name="TArray"/>.</returns>
public TArray Build(MemoryAllocator allocator = default)
{
var bufs = new[]
{
NullCount > 0 ? ValidityBuffer.Build(allocator) : ArrowBuffer.Empty,
BinaryViews.Build(allocator),
ValueBuffer.Build(allocator),
};
bool hasValues = ValueBuffer.Length > 0;
var bufs = new ArrowBuffer[hasValues ? 3 : 2];
bufs[0] = NullCount > 0 ? ValidityBuffer.Build(allocator) : ArrowBuffer.Empty;
bufs[1] = BinaryViews.Build(allocator);
if (hasValues) { bufs[2] = ValueBuffer.Build(allocator); }

var data = new ArrayData(
DataType,
length: Length,
Expand Down Expand Up @@ -325,7 +325,7 @@ public ReadOnlySpan<byte> GetBytes(int index, out bool isNull)
return ViewsBuffer.Span.Slice(16 * index + 4, binaryView.Length);
}

return DataBuffer(binaryView.BufferIndex).Span.Slice(binaryView.Offset, binaryView.Length);
return DataBuffer(binaryView._bufferIndex).Span.Slice(binaryView._bufferOffset, binaryView.Length);
}

int IReadOnlyCollection<byte[]>.Count => Length;
Expand Down
22 changes: 10 additions & 12 deletions csharp/src/Apache.Arrow/Arrays/ListViewArray.cs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ internal Builder(ListViewType dataType)
public Builder Append()
{
AppendPrevious();

ValidityBufferBuilder.Append(true);

return this;
Expand All @@ -75,26 +76,24 @@ public Builder Append()
public Builder AppendNull()
{
AppendPrevious();

ValidityBufferBuilder.Append(false);
ValueOffsetsBufferBuilder.Append(Start);
SizesBufferBuilder.Append(0);
ValidityBufferBuilder.Append(false);
NullCount++;
Start = -1;

return this;
}

private void AppendPrevious()
{
if (Start < 0)
{
Start = 0;
}
else
if (Start >= 0)
{
ValueOffsetsBufferBuilder.Append(Start);
SizesBufferBuilder.Append(ValueOffsetsBufferBuilder.Length - Start);
Start = ValueOffsetsBufferBuilder.Length;
SizesBufferBuilder.Append(ValueBuilder.Length - Start);
}
Start = ValueBuilder.Length;
}

public ListViewArray Build(MemoryAllocator allocator = default)
Expand Down Expand Up @@ -142,11 +141,11 @@ public Builder Clear()

public ArrowBuffer ValueOffsetsBuffer => Data.Buffers[1];

public ReadOnlySpan<int> ValueOffsets => ValueOffsetsBuffer.Span.CastTo<int>().Slice(Offset, Length + 1);
public ReadOnlySpan<int> ValueOffsets => ValueOffsetsBuffer.Span.CastTo<int>().Slice(Offset, Length);

public ArrowBuffer SizesBuffer => Data.Buffers[2];

public ReadOnlySpan<int> Sizes => SizesBuffer.Span.CastTo<int>().Slice(Offset, Length + 1);
public ReadOnlySpan<int> Sizes => SizesBuffer.Span.CastTo<int>().Slice(Offset, Length);

public ListViewArray(IArrowType dataType, int length,
ArrowBuffer valueOffsetsBuffer, ArrowBuffer sizesBuffer, IArrowArray values,
Expand Down Expand Up @@ -183,8 +182,7 @@ public int GetValueLength(int index)
return 0;
}

ReadOnlySpan<int> offsets = ValueOffsets;
return offsets[index + 1] - offsets[index];
return Sizes[index];
}

public IArrowArray GetSlicedValues(int index)
Expand Down
66 changes: 45 additions & 21 deletions csharp/src/Apache.Arrow/Scalars/BinaryView.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,74 +14,98 @@
// limitations under the License.

using System;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

namespace Apache.Arrow.Scalars
{
[StructLayout(LayoutKind.Explicit)]
public unsafe struct BinaryView : IEquatable<BinaryView>
{
public const int PrefixLength = 4;
public const int MaxInlineLength = 12;

[FieldOffset(0)]
public readonly int Length;

[FieldOffset(4)]
internal readonly int Prefix;
internal readonly int _prefix;

[FieldOffset(8)]
internal readonly int BufferIndex;
internal readonly int _bufferIndex;

[FieldOffset(12)]
internal readonly int Offset;
internal readonly int _bufferOffset;

[FieldOffset(4)]
internal fixed byte Inline[12];
internal fixed byte _inline[MaxInlineLength];

public unsafe BinaryView(ReadOnlySpan<byte> inlined) : this()
public unsafe BinaryView(ReadOnlySpan<byte> inline) : this()
{
Length = inlined.Length;
fixed (byte* dest = Inline)
fixed (byte* src = inlined)
if (inline.Length > MaxInlineLength)
{
Buffer.MemoryCopy(src, dest, 12, inlined.Length);
throw new ArgumentException("invalid inline data length", nameof(inline));
}

Length = inline.Length;
fixed (byte* dest = _inline)
fixed (byte* src = inline)
{
Buffer.MemoryCopy(src, dest, MaxInlineLength, inline.Length);
}
}

public BinaryView(int length, ReadOnlySpan<byte> prefix, int bufferIndex, int offset)
public BinaryView(int length, ReadOnlySpan<byte> prefix, int bufferIndex, int bufferOffset)
{
Debug.Assert(prefix.Length == 4);
if (length < MaxInlineLength)
{
throw new ArgumentException("invalid length", nameof(length));
}
if (prefix.Length != PrefixLength)
{
throw new ArgumentException("invalid prefix length", nameof(prefix));
}

Length = length;
BufferIndex = bufferIndex;
Offset = offset;
Prefix = prefix.CastTo<int>()[0];
_bufferIndex = bufferIndex;
_bufferOffset = bufferOffset;
_prefix = prefix.CastTo<int>()[0];
}

private BinaryView(int length, int prefix, int bufferIndex, int offset)
{
Length = length;
Prefix = prefix;
BufferIndex = bufferIndex;
Offset = offset;
_prefix = prefix;
_bufferIndex = bufferIndex;
_bufferOffset = offset;
}

public bool IsInline => Length <= MaxInlineLength;

public override int GetHashCode() => Length ^ Prefix ^ BufferIndex ^ Offset;
#if NET5_0_OR_GREATER
public ReadOnlySpan<byte> Bytes => MemoryMarshal.CreateReadOnlySpan<byte>(ref Unsafe.AsRef(_inline[0]), IsInline ? Length : PrefixLength);
#else
public unsafe ReadOnlySpan<byte> Bytes => new ReadOnlySpan<byte>(Unsafe.AsPointer(ref _inline[0]), IsInline ? Length : PrefixLength);
#endif

public int BufferIndex => IsInline ? -1 : _bufferIndex;

public int BufferOffset => IsInline ? -1 : _bufferOffset;

public override int GetHashCode() => Length ^ _prefix ^ _bufferIndex ^ _bufferOffset;

public override bool Equals(object obj)
{
BinaryView? other = obj as BinaryView?;
return other != null && Equals(other.Value);
}

public bool Equals(BinaryView other) => Length == other.Length && Prefix == other.Prefix && BufferIndex == other.BufferIndex && Offset == other.Offset;
public bool Equals(BinaryView other) =>
Length == other.Length && _prefix == other._prefix && _bufferIndex == other._bufferIndex && _bufferOffset == other._bufferOffset;

internal BinaryView AdjustBufferIndex(int bufferOffset)
{
return new BinaryView(Length, Prefix, BufferIndex + bufferOffset, Offset);
return new BinaryView(Length, _prefix, _bufferIndex + bufferOffset, _bufferOffset);
}
}
}
86 changes: 83 additions & 3 deletions csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,16 @@ private static IEnumerable<Tuple<List<IArrowArray>, IArrowArray>> GenerateTestDa
FloatType.Default,
DoubleType.Default,
BinaryType.Default,
BinaryViewType.Default,
StringType.Default,
StringViewType.Default,
Date32Type.Default,
Date64Type.Default,
TimestampType.Default,
new Decimal128Type(14, 10),
new Decimal256Type(14,10),
new ListType(Int64Type.Default),
new ListViewType(Int64Type.Default),
new StructType(new List<Field>{
new Field.Builder().Name("Strings").DataType(StringType.Default).Nullable(true).Build(),
new Field.Builder().Name("Ints").DataType(Int32Type.Default).Nullable(true).Build()
Expand Down Expand Up @@ -371,7 +374,33 @@ public void Visit(BinaryType type)
ExpectedArray = resultBuilder.Build();
}

public void Visit(BinaryViewType type) => throw new NotImplementedException("TODO");
public void Visit(BinaryViewType type)
{
BinaryViewArray.Builder resultBuilder = new BinaryViewArray.Builder().Reserve(_baseDataTotalElementCount);

for (int i = 0; i < _baseDataListCount; i++)
{
List<int?> dataList = _baseData[i];
BinaryViewArray.Builder builder = new BinaryViewArray.Builder().Reserve(dataList.Count);

foreach (byte? value in dataList)
{
if (value.HasValue)
{
builder.Append(value.Value);
resultBuilder.Append(value.Value);
}
else
{
builder.AppendNull();
resultBuilder.AppendNull();
}
}
TestTargetArrayList.Add(builder.Build());
}

ExpectedArray = resultBuilder.Build();
}

public void Visit(StringType type)
{
Expand All @@ -393,7 +422,25 @@ public void Visit(StringType type)
ExpectedArray = resultBuilder.Build();
}

public void Visit(StringViewType type) => throw new NotImplementedException("TODO");
public void Visit(StringViewType type)
{
StringViewArray.Builder resultBuilder = new StringViewArray.Builder().Reserve(_baseDataTotalElementCount);

for (int i = 0; i < _baseDataListCount; i++)
{
List<int?> dataList = _baseData[i];
StringViewArray.Builder builder = new StringViewArray.Builder().Reserve(dataList.Count);

foreach (string value in dataList.Select(_ => _.ToString() ?? null))
{
builder.Append(value);
resultBuilder.Append(value);
}
TestTargetArrayList.Add(builder.Build());
}

ExpectedArray = resultBuilder.Build();
}

public void Visit(ListType type)
{
Expand Down Expand Up @@ -430,7 +477,40 @@ public void Visit(ListType type)
ExpectedArray = resultBuilder.Build();
}

public void Visit(ListViewType type) => throw new NotImplementedException("TODO");
public void Visit(ListViewType type)
{
ListViewArray.Builder resultBuilder = new ListViewArray.Builder(type.ValueDataType).Reserve(_baseDataTotalElementCount);
Int64Array.Builder resultValueBuilder = (Int64Array.Builder)resultBuilder.ValueBuilder.Reserve(_baseDataTotalElementCount);

for (int i = 0; i < _baseDataListCount; i++)
{
List<int?> dataList = _baseData[i];

ListViewArray.Builder builder = new ListViewArray.Builder(type.ValueField).Reserve(dataList.Count);
Int64Array.Builder valueBuilder = (Int64Array.Builder)builder.ValueBuilder.Reserve(dataList.Count);

foreach (long? value in dataList)
{
if (value.HasValue)
{
builder.Append();
resultBuilder.Append();

valueBuilder.Append(value.Value);
resultValueBuilder.Append(value.Value);
}
else
{
builder.AppendNull();
resultBuilder.AppendNull();
}
}

TestTargetArrayList.Add(builder.Build());
}

ExpectedArray = resultBuilder.Build();
}

public void Visit(FixedSizeListType type)
{
Expand Down
Loading

0 comments on commit 124aee8

Please sign in to comment.