Skip to content

Commit

Permalink
Support blank column names and enable more integration tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
CurtHagenlocher committed Dec 10, 2023
1 parent 7415ce6 commit ee8fe2f
Show file tree
Hide file tree
Showing 6 changed files with 61 additions and 18 deletions.
3 changes: 1 addition & 2 deletions csharp/src/Apache.Arrow/Field.Builder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,13 @@ public class Builder

public Builder()
{
_name = string.Empty;
_type = NullType.Default;
_nullable = true;
}

public Builder Name(string value)
{
if (string.IsNullOrWhiteSpace(value))
if (value == null)
{
throw new ArgumentNullException(nameof(value));
}
Expand Down
11 changes: 5 additions & 6 deletions csharp/src/Apache.Arrow/Field.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,24 +35,23 @@ public partial class Field

public Field(string name, IArrowType dataType, bool nullable,
IEnumerable<KeyValuePair<string, string>> metadata = default)
: this(name, dataType, nullable, false)
: this(name, dataType, nullable)
{
Metadata = metadata?.ToDictionary(kv => kv.Key, kv => kv.Value);

}

internal Field(string name, IArrowType dataType, bool nullable,
IReadOnlyDictionary<string, string> metadata, bool copyCollections, bool allowBlankName)
: this(name, dataType, nullable, allowBlankName)
IReadOnlyDictionary<string, string> metadata, bool copyCollections)
: this(name, dataType, nullable)
{
Debug.Assert(copyCollections == false, "This internal constructor is to not copy the collections.");

Metadata = metadata;
}

private Field(string name, IArrowType dataType, bool nullable, bool allowBlankName)
private Field(string name, IArrowType dataType, bool nullable)
{
if (name == null || (!allowBlankName && string.IsNullOrWhiteSpace(name)))
if (name == null)
{
throw new ArgumentNullException(nameof(name));
}
Expand Down
9 changes: 4 additions & 5 deletions csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ internal static Schema GetSchema(Flatbuf.Schema schema, ref DictionaryMemo dicti
for (int i = 0; i < schema.FieldsLength; i++)
{
Flatbuf.Field field = schema.Fields(i).GetValueOrDefault();
fields.Add(FieldFromFlatbuffer(field, ref dictionaryMemo, allowBlankName: false));
fields.Add(FieldFromFlatbuffer(field, ref dictionaryMemo));
}

Dictionary<string, string> metadata = schema.CustomMetadataLength > 0 ? new Dictionary<string, string>() : null;
Expand All @@ -73,14 +73,13 @@ internal static Schema GetSchema(Flatbuf.Schema schema, ref DictionaryMemo dicti
return new Schema(fields, metadata, copyCollections: false);
}

private static Field FieldFromFlatbuffer(Flatbuf.Field flatbufField, ref DictionaryMemo dictionaryMemo, bool allowBlankName)
private static Field FieldFromFlatbuffer(Flatbuf.Field flatbufField, ref DictionaryMemo dictionaryMemo)
{
bool allowBlankNameChild = flatbufField.ChildrenLength == 1 && flatbufField.TypeType == Flatbuf.Type.FixedSizeList;
Field[] childFields = flatbufField.ChildrenLength > 0 ? new Field[flatbufField.ChildrenLength] : null;
for (int i = 0; i < flatbufField.ChildrenLength; i++)
{
Flatbuf.Field? childFlatbufField = flatbufField.Children(i);
childFields[i] = FieldFromFlatbuffer(childFlatbufField.Value, ref dictionaryMemo, allowBlankNameChild);
childFields[i] = FieldFromFlatbuffer(childFlatbufField.Value, ref dictionaryMemo);
}

Flatbuf.DictionaryEncoding? dictionaryEncoding = flatbufField.Dictionary;
Expand All @@ -104,7 +103,7 @@ private static Field FieldFromFlatbuffer(Flatbuf.Field flatbufField, ref Diction
metadata[keyValue.Key] = keyValue.Value;
}

var arrowField = new Field(flatbufField.Name, type, flatbufField.Nullable, metadata, copyCollections: false, allowBlankName);
var arrowField = new Field(flatbufField.Name, type, flatbufField.Nullable, metadata, copyCollections: false);

if (dictionaryEncoding.HasValue)
{
Expand Down
6 changes: 6 additions & 0 deletions csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,12 @@ private static Schema CreateSchema(JsonSchema jsonSchema, Dictionary<DictionaryT
{
builder.Field(f => CreateField(f, jsonSchema.Fields[i], dictionaryIndexes));
}

if (jsonSchema.Metadata != null)
{
builder.Metadata(jsonSchema.Metadata);
}

return builder.Build();
}

Expand Down
44 changes: 44 additions & 0 deletions csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -829,6 +829,50 @@ public unsafe void ExportArrayStream()
CArrowArrayStream.Free(cArrayStream);
}

[SkippableFact]
public unsafe void ImportRecordBatchFromBuffer()
{
using (Py.GIL())
{
dynamic pa = Py.Import("pyarrow");
dynamic batch = pa.record_batch(
new PyList(new PyObject[]
{
pa.array(new long?[] { 1, 2, 3, null, 5 }),
pa.array(new[] { "hello", "world", null, "foo", "bar" }),
pa.array(new[] { 0.0, 1.4, 2.5, 3.6, 4.7 }),
pa.array(new bool?[] { true, false, null, false, true }),
}),
new[] { null, "", "column", "column" });

dynamic sink = pa.BufferOutputStream();
dynamic writer = pa.ipc.new_stream(sink, batch.schema);
writer.write_batch(batch);
((IDisposable)writer).Dispose();

dynamic buf = sink.getvalue();
// buf.address, buf.size

IntPtr address = (IntPtr)(long)buf.address;
int size = buf.size;
byte[] buffer = new byte[size];
byte* ptr = (byte*)address.ToPointer();
for (int i = 0; i < size; i++)
{
buffer[i] = ptr[i];
}

using (ArrowStreamReader reader = new ArrowStreamReader(new ReadOnlyMemory<byte>(buffer)))
{
RecordBatch batch2 = reader.ReadNextRecordBatch();
Assert.Equal("None", batch2.Schema.FieldsList[0].Name);
Assert.Equal("", batch2.Schema.FieldsList[1].Name);
Assert.Equal("column", batch2.Schema.FieldsList[2].Name);
Assert.Equal("column", batch2.Schema.FieldsList[3].Name);
}
}
}

private static PyObject List(params int?[] values)
{
return new PyList(values.Select(i => i == null ? PyObject.None : new PyInt(i.Value)).ToArray());
Expand Down
6 changes: 1 addition & 5 deletions dev/archery/archery/integration/datagen.py
Original file line number Diff line number Diff line change
Expand Up @@ -1812,7 +1812,6 @@ def _temp_path():
generate_map_case(),

generate_non_canonical_map_case()
.skip_tester('C#')
.skip_tester('Java') # TODO(ARROW-8715)
# Canonical map names are restored on import, so the schemas are unequal
.skip_format(SKIP_C_SCHEMA, 'C++'),
Expand All @@ -1827,11 +1826,9 @@ def _temp_path():

generate_unions_case(),

generate_custom_metadata_case()
.skip_tester('C#'),
generate_custom_metadata_case(),

generate_duplicate_fieldnames_case()
.skip_tester('C#')
.skip_tester('JS'),

generate_dictionary_case(),
Expand All @@ -1856,7 +1853,6 @@ def _temp_path():
.skip_tester('Rust'),

generate_extension_case()
.skip_tester('C#')
# TODO: ensure the extension is registered in the C++ entrypoint
.skip_format(SKIP_C_SCHEMA, 'C++')
.skip_format(SKIP_C_ARRAY, 'C++'),
Expand Down

0 comments on commit ee8fe2f

Please sign in to comment.