diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index eaedbf1e38580..8bddc34e1000b 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -1174,7 +1174,12 @@ cdef class Array(_PandasConvertible): "({0}) did not match the passed number " "({1}).".format(type.num_fields, len(children))) - if type.num_buffers != len(buffers): + if type.has_variadic_buffers: + if type.num_buffers > len(buffers): + raise ValueError("Type's expected number of buffers is at least " + "{0}, but the passed number is " + "{1}.".format(type.num_buffers, len(buffers))) + elif type.num_buffers != len(buffers): raise ValueError("Type's expected number of buffers " "({0}) did not match the passed number " "({1}).".format(type.num_buffers, len(buffers))) diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index a70cb91873e45..8bf61b73cc211 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -158,6 +158,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: cdef cppclass CDataTypeLayout" arrow::DataTypeLayout": vector[CBufferSpec] buffers + optional[CBufferSpec] variadic_spec c_bool has_dictionary cdef cppclass CDataType" arrow::DataType": diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 4160d64829483..885442b079c5b 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -651,6 +651,32 @@ def test_string_binary_from_buffers(): assert copied.null_count == 0 +def test_string_view_from_buffers(): + array = pa.array( + [ + "String longer than 12 characters", + None, + "short", + "Length is 12" + ], type=pa.string_view()) + + buffers = array.buffers() + copied = pa.StringViewArray.from_buffers( + pa.string_view(), len(array), buffers) + copied.validate(full=True) + assert copied.to_pylist() == [ + "String longer than 12 characters", + None, + "short", + "Length is 12" + ] + + match = r"number of buffers is at least 2" + with pytest.raises(ValueError, match=match): + pa.StringViewArray.from_buffers( + pa.string_view(), len(array), buffers[0:1]) + + @pytest.mark.parametrize('list_type_factory', [ pa.list_, pa.large_list, pa.list_view, pa.large_list_view]) def test_list_from_buffers(list_type_factory): diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py index fef350d5de958..de439b6bb8cd7 100644 --- a/python/pyarrow/tests/test_types.py +++ b/python/pyarrow/tests/test_types.py @@ -887,6 +887,14 @@ def test_types_weakref(): assert wr() is None # not a singleton +def test_types_has_variadic_buffers(): + for ty in get_many_types(): + if ty in (pa.string_view(), pa.binary_view()): + assert ty.has_variadic_buffers + else: + assert not ty.has_variadic_buffers + + def test_fields_hashable(): in_dict = {} fields = [pa.field('a', pa.int32()), diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi index 4aa8238556a9c..0d6787cf2a049 100644 --- a/python/pyarrow/types.pxi +++ b/python/pyarrow/types.pxi @@ -326,6 +326,22 @@ cdef class DataType(_Weakrefable): """ return self.type.layout().buffers.size() + @property + def has_variadic_buffers(self): + """ + If True, the number of expected buffers is only + lower-bounded by num_buffers. + + Examples + -------- + >>> import pyarrow as pa + >>> pa.int64().has_variadic_buffers + False + >>> pa.string_view().has_variadic_buffers + True + """ + return self.type.layout().variadic_spec.has_value() + def __str__(self): return frombytes(self.type.ToString(), safe=True)