From 287e06210d216d6b8ca1b52983bbff7f90f2b5f9 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 29 Oct 2024 18:47:45 -0400 Subject: [PATCH 1/2] refactor: Replace Python bootstrap script with explicit Cythond declartions --- python/bootstrap.py | 282 ------------------ python/generate_dist.py | 10 - python/src/nanoarrow/meson.build | 30 +- python/src/nanoarrow/nanoarrow_c.pxd | 310 ++++++++++++++++++++ python/src/nanoarrow/nanoarrow_device_c.pxd | 78 +++++ 5 files changed, 389 insertions(+), 321 deletions(-) delete mode 100644 python/bootstrap.py create mode 100644 python/src/nanoarrow/nanoarrow_c.pxd create mode 100644 python/src/nanoarrow/nanoarrow_device_c.pxd diff --git a/python/bootstrap.py b/python/bootstrap.py deleted file mode 100644 index 19729e07b..000000000 --- a/python/bootstrap.py +++ /dev/null @@ -1,282 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import argparse -import pathlib -import re -import subprocess -import sys - - -# Generate the nanoarrow_c.pxd file used by the Cython extensions -class PxdGenerator: - def __init__(self): - self._define_regexes() - - def generate_pxd(self, file_in, file_out): - file_in_name = pathlib.Path(file_in).name - - # Read the header - content = None - with open(file_in, "r") as input: - content = input.read() - - # Strip comments - content = self.re_comment.sub("", content) - - # Replace NANOARROW_MAX_FIXED_BUFFERS with its value - content = self._preprocess_content(content) - - # Find typedefs, types, and function definitions - typedefs = self._find_typedefs(content) - types = self._find_types(content) - func_defs = self._find_func_defs(content) - - # Make corresponding cython definitions - typedefs_cython = [self._typdef_to_cython(t, " ") for t in typedefs] - types_cython = [self._type_to_cython(t, " ") for t in types] - func_defs_cython = [self._func_def_to_cython(d, " ") for d in func_defs] - - # Unindent the header - header = self.re_newline_plus_indent.sub("\n", self._pxd_header()) - - # Write nanoarrow_c.pxd - with open(file_out, "wb") as output: - output.write(header.encode("UTF-8")) - - output.write( - f'\ncdef extern from "{file_in_name}" nogil:\n'.encode("UTF-8") - ) - - # A few things we add in manually - self._write_defs(output) - - for type in types_cython: - output.write(type.encode("UTF-8")) - output.write(b"\n\n") - - for typedef in typedefs_cython: - output.write(typedef.encode("UTF-8")) - output.write(b"\n") - - output.write(b"\n") - - for func_def in func_defs_cython: - output.write(func_def.encode("UTF-8")) - output.write(b"\n") - - def _preprocess_content(self, content): - return content - - def _write_defs(self, output): - pass - - def _define_regexes(self): - self.re_comment = re.compile(r"\s*//[^\n]*") - self.re_typedef = re.compile(r"typedef(?P[^;]+)") - self.re_type = re.compile( - r"(?Pstruct|union|enum) (?PArrow[^ ]+) {(?P[^}]*)}" - ) - self.re_func_def = re.compile( - r"\n(static inline )?(?Pconst )?(struct |enum )?" - r"(?P[A-Za-z0-9_*]+) " - r"(?PArrow[A-Za-z0-9]+)\((?P[^\)]*)\);" - ) - self.re_tagged_type = re.compile( - r"(?Pstruct|union|enum) (?PArrow[A-Za-z]+)" - ) - self.re_struct_delim = re.compile(r";\s*") - self.re_enum_delim = re.compile(r",\s*") - self.re_whitespace = re.compile(r"\s+") - self.re_newline_plus_indent = re.compile(r"\n +") - - def _strip_comments(self, content): - return self.re_comment.sub("", content) - - def _find_typedefs(self, content): - return [m.groupdict() for m in self.re_typedef.finditer(content)] - - def _find_types(self, content): - return [m.groupdict() for m in self.re_type.finditer(content)] - - def _find_func_defs(self, content): - return [m.groupdict() for m in self.re_func_def.finditer(content)] - - def _typdef_to_cython(self, t, indent=""): - typedef = t["typedef"] - typedef = self.re_tagged_type.sub(r"\2", typedef) - return f"{indent}ctypedef {typedef}" - - def _type_to_cython(self, t, indent=""): - type = t["type"] - name = t["name"] - body = self.re_tagged_type.sub(r"\2", t["body"].strip()) - if type == "enum": - items = [item for item in self.re_enum_delim.split(body) if item] - else: - items = [item for item in self.re_struct_delim.split(body) if item] - - cython_body = f"\n{indent} ".join([""] + items) - return f"{indent}{type} {name}:{cython_body}" - - def _func_def_to_cython(self, d, indent=""): - return_type = d["return_type"].strip() - if d["const"]: - return_type = "const " + return_type - name = d["name"] - args = re.sub(r"\s+", " ", d["args"].strip()) - args = self.re_tagged_type.sub(r"\2", args) - - # Cython doesn't do (void) - if args == "void": - args = "" - - return f"{indent}{return_type} {name}({args})" - - def _pxd_header(self): - return """ - # Licensed to the Apache Software Foundation (ASF) under one - # or more contributor license agreements. See the NOTICE file - # distributed with this work for additional information - # regarding copyright ownership. The ASF licenses this file - # to you under the Apache License, Version 2.0 (the - # "License"); you may not use this file except in compliance - # with the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, - # software distributed under the License is distributed on an - # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - # KIND, either express or implied. See the License for the - # specific language governing permissions and limitations - # under the License. - - # cython: language_level = 3 - - """ - - -class NanoarrowPxdGenerator(PxdGenerator): - def _preprocess_content(self, content): - content = re.sub(r"NANOARROW_MAX_FIXED_BUFFERS", "3", content) - content = re.sub(r"NANOARROW_BINARY_VIEW_INLINE_SIZE", "12", content) - content = re.sub(r"NANOARROW_BINARY_VIEW_PREFIX_SIZE", "4", content) - return content - - def _pxd_header(self): - return ( - super()._pxd_header() - + """ - from libc.stdint cimport int8_t, uint8_t, int16_t, uint16_t - from libc.stdint cimport int32_t, uint32_t, int64_t, uint64_t - """ - ) - - def _write_defs(self, output): - output.write(b"\n") - output.write(b" cdef int NANOARROW_OK\n") - output.write(b" cdef int NANOARROW_MAX_FIXED_BUFFERS\n") - output.write(b" cdef int ARROW_FLAG_DICTIONARY_ORDERED\n") - output.write(b" cdef int ARROW_FLAG_NULLABLE\n") - output.write(b" cdef int ARROW_FLAG_MAP_KEYS_SORTED\n") - output.write(b"\n") - - -class NanoarrowDevicePxdGenerator(PxdGenerator): - def _preprocess_content(self, content): - self.device_names = re.findall("#define (ARROW_DEVICE_[A-Z0-9_]+)", content) - return super()._preprocess_content(content) - - def _find_typedefs(self, content): - return [] - - def _pxd_header(self): - return ( - super()._pxd_header() - + """ - from libc.stdint cimport int32_t, int64_t - from nanoarrow_c cimport * - """ - ) - - def _write_defs(self, output): - output.write(b"\n") - output.write(b" ctypedef int32_t ArrowDeviceType\n") - output.write(b"\n") - for name in self.device_names: - output.write(f" cdef ArrowDeviceType {name}\n".encode()) - output.write(b"\n") - - -def copy_or_generate_nanoarrow_c(target_dir: pathlib.Path): - vendored_files = [ - "nanoarrow.h", - "nanoarrow.c", - "nanoarrow_ipc.h", - "nanoarrow_ipc.c", - "nanoarrow_device.h", - "nanoarrow_device.c", - ] - dst = {name: target_dir / name for name in vendored_files} - - this_dir = pathlib.Path(__file__).parent.resolve() - arrow_proj_dir = this_dir / "subprojects" / "arrow-nanoarrow" - - subprocess.run( - [ - sys.executable, - arrow_proj_dir / "ci" / "scripts" / "bundle.py", - "--symbol-namespace", - "PythonPkg", - "--header-namespace", - "", - "--source-output-dir", - target_dir, - "--include-output-dir", - target_dir, - "--with-device", - "--with-ipc", - ], - ) - - if not dst["nanoarrow.h"].exists(): - raise ValueError("Attempt to vendor nanoarrow.c/h failed") - - -# Runs the pxd generator with some information about the file name -def generate_nanoarrow_pxds(target_dir: pathlib.Path): - NanoarrowPxdGenerator().generate_pxd( - target_dir / "nanoarrow.h", target_dir / "nanoarrow_c.pxd" - ) - NanoarrowDevicePxdGenerator().generate_pxd( - target_dir / "nanoarrow_device.h", - target_dir / "nanoarrow_device_c.pxd", - ) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "--output-dir", help="Target directory where files should be written" - ) - - args = parser.parse_args() - target_dir = pathlib.Path(args.output_dir).resolve() - - copy_or_generate_nanoarrow_c(target_dir) - generate_nanoarrow_pxds(target_dir) diff --git a/python/generate_dist.py b/python/generate_dist.py index abf4104bd..afaddf9e3 100644 --- a/python/generate_dist.py +++ b/python/generate_dist.py @@ -44,16 +44,6 @@ def main(): target_src_dir = subproj_dir / "src" shutil.copytree(src_dir / "src", target_src_dir) - # CMake isn't actually required for building, but the bundle.py script reads from - # its configuration - shutil.copy(src_dir / "CMakeLists.txt", subproj_dir / "CMakeLists.txt") - - subproj_ci_scripts_dir = subproj_dir / "ci" / "scripts" - subproj_ci_scripts_dir.mkdir(parents=True) - shutil.copy( - src_dir / "ci" / "scripts" / "bundle.py", subproj_ci_scripts_dir / "bundle.py" - ) - if __name__ == "__main__": main() diff --git a/python/src/nanoarrow/meson.build b/python/src/nanoarrow/meson.build index 452cf37a2..d6eb0a38f 100644 --- a/python/src/nanoarrow/meson.build +++ b/python/src/nanoarrow/meson.build @@ -23,32 +23,6 @@ nanoarrow_device_dep = nanoarrow_proj.get_variable('nanoarrow_device_dep') py = import('python').find_installation(pure: false) -vendored_files = custom_target( - 'generate-pyx', - output: [ - 'nanoarrow.c', - 'nanoarrow_c.pxd', - 'nanoarrow_device.c', - 'nanoarrow_device_c.pxd', - 'nanoarrow_device.h', - 'nanoarrow_device.hpp', - 'nanoarrow.h', - 'nanoarrow.hpp', - 'nanoarrow_ipc.c', - 'nanoarrow_ipc.h', - 'nanoarrow_ipc.hpp', - ], - command: [ - py, - meson.current_source_dir() + '/../../bootstrap.py', - '--output-dir', meson.current_build_dir() - ], -) - -nanoarrow_pyx_dep = declare_dependency( - sources: vendored_files[1], -) - cyfiles = [ '_array.pyx', '_array_stream.pyx', @@ -63,8 +37,6 @@ cyfiles = [ cython_args = [ '--include-dir', meson.current_source_dir(), - '--include-dir', - meson.current_build_dir(), ] if get_option('buildtype') in ['debug', 'debugoptimized'] cython_args += ['--gdb'] @@ -72,7 +44,7 @@ endif fs = import('fs') foreach cyf : cyfiles - cyfile_deps = [nanoarrow_pyx_dep, nanoarrow_dep] + cyfile_deps = [nanoarrow_dep] stem = fs.stem(cyf) if stem in ['_array', '_device'] diff --git a/python/src/nanoarrow/nanoarrow_c.pxd b/python/src/nanoarrow/nanoarrow_c.pxd new file mode 100644 index 000000000..3cb98f435 --- /dev/null +++ b/python/src/nanoarrow/nanoarrow_c.pxd @@ -0,0 +1,310 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# cython: language_level = 3 + + +from libc.stdint cimport int8_t, uint8_t, int16_t, uint16_t +from libc.stdint cimport int32_t, uint32_t, int64_t, uint64_t + +cdef extern from "nanoarrow/nanoarrow.h" nogil: + + cdef int NANOARROW_OK + cdef int NANOARROW_MAX_FIXED_BUFFERS + cdef int ARROW_FLAG_DICTIONARY_ORDERED + cdef int ARROW_FLAG_NULLABLE + cdef int ARROW_FLAG_MAP_KEYS_SORTED + + struct ArrowSchema: + const char* format + const char* name + const char* metadata + int64_t flags + int64_t n_children + ArrowSchema** children + ArrowSchema* dictionary + void (*release)(ArrowSchema*) + void* private_data + + struct ArrowArray: + int64_t length + int64_t null_count + int64_t offset + int64_t n_buffers + int64_t n_children + const void** buffers + ArrowArray** children + ArrowArray* dictionary + void (*release)(ArrowArray*) + void* private_data + + struct ArrowArrayStream: + int (*get_schema)(ArrowArrayStream*, ArrowSchema* out) + int (*get_next)(ArrowArrayStream*, ArrowArray* out) + const char* (*get_last_error)(ArrowArrayStream*) + void (*release)(ArrowArrayStream*) + void* private_data + + struct ArrowError: + char message[1024] + + enum ArrowType: + NANOARROW_TYPE_UNINITIALIZED = 0 + NANOARROW_TYPE_NA = 1 + NANOARROW_TYPE_BOOL + NANOARROW_TYPE_UINT8 + NANOARROW_TYPE_INT8 + NANOARROW_TYPE_UINT16 + NANOARROW_TYPE_INT16 + NANOARROW_TYPE_UINT32 + NANOARROW_TYPE_INT32 + NANOARROW_TYPE_UINT64 + NANOARROW_TYPE_INT64 + NANOARROW_TYPE_HALF_FLOAT + NANOARROW_TYPE_FLOAT + NANOARROW_TYPE_DOUBLE + NANOARROW_TYPE_STRING + NANOARROW_TYPE_BINARY + NANOARROW_TYPE_FIXED_SIZE_BINARY + NANOARROW_TYPE_DATE32 + NANOARROW_TYPE_DATE64 + NANOARROW_TYPE_TIMESTAMP + NANOARROW_TYPE_TIME32 + NANOARROW_TYPE_TIME64 + NANOARROW_TYPE_INTERVAL_MONTHS + NANOARROW_TYPE_INTERVAL_DAY_TIME + NANOARROW_TYPE_DECIMAL128 + NANOARROW_TYPE_DECIMAL256 + NANOARROW_TYPE_LIST + NANOARROW_TYPE_STRUCT + NANOARROW_TYPE_SPARSE_UNION + NANOARROW_TYPE_DENSE_UNION + NANOARROW_TYPE_DICTIONARY + NANOARROW_TYPE_MAP + NANOARROW_TYPE_EXTENSION + NANOARROW_TYPE_FIXED_SIZE_LIST + NANOARROW_TYPE_DURATION + NANOARROW_TYPE_LARGE_STRING + NANOARROW_TYPE_LARGE_BINARY + NANOARROW_TYPE_LARGE_LIST + NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO + NANOARROW_TYPE_RUN_END_ENCODED + NANOARROW_TYPE_BINARY_VIEW + NANOARROW_TYPE_STRING_VIEW + + enum ArrowTimeUnit: + NANOARROW_TIME_UNIT_SECOND = 0 + NANOARROW_TIME_UNIT_MILLI = 1 + NANOARROW_TIME_UNIT_MICRO = 2 + NANOARROW_TIME_UNIT_NANO = 3 + + enum ArrowValidationLevel: + NANOARROW_VALIDATION_LEVEL_NONE = 0 + NANOARROW_VALIDATION_LEVEL_MINIMAL = 1 + NANOARROW_VALIDATION_LEVEL_DEFAULT = 2 + NANOARROW_VALIDATION_LEVEL_FULL = 3 + + enum ArrowBufferType: + NANOARROW_BUFFER_TYPE_NONE + NANOARROW_BUFFER_TYPE_VALIDITY + NANOARROW_BUFFER_TYPE_TYPE_ID + NANOARROW_BUFFER_TYPE_UNION_OFFSET + NANOARROW_BUFFER_TYPE_DATA_OFFSET + NANOARROW_BUFFER_TYPE_DATA + NANOARROW_BUFFER_TYPE_VARIADIC_DATA + NANOARROW_BUFFER_TYPE_VARIADIC_SIZE + + struct ArrowStringView: + const char* data + int64_t size_bytes + + union ArrowBufferViewData: + const void* data + const int8_t* as_int8 + const uint8_t* as_uint8 + const int16_t* as_int16 + const uint16_t* as_uint16 + const int32_t* as_int32 + const uint32_t* as_uint32 + const int64_t* as_int64 + const uint64_t* as_uint64 + const double* as_double + const float* as_float + const char* as_char + const ArrowBinaryView* as_binary_view + + struct ArrowBufferView: + ArrowBufferViewData data + int64_t size_bytes + + struct ArrowBufferAllocator: + uint8_t* (*reallocate)(ArrowBufferAllocator* allocator, uint8_t* ptr, + int64_t old_size, int64_t new_size) + void (*free)(ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t size) + void* private_data + + struct ArrowBuffer: + uint8_t* data + int64_t size_bytes + int64_t capacity_bytes + ArrowBufferAllocator allocator + + struct ArrowBitmap: + ArrowBuffer buffer + int64_t size_bits + + struct ArrowLayout: + ArrowBufferType buffer_type[3] + ArrowType buffer_data_type[3] + int64_t element_size_bits[3] + int64_t child_size_elements + + struct ArrowArrayView: + const ArrowArray* array + int64_t offset + int64_t length + int64_t null_count + ArrowType storage_type + ArrowLayout layout + ArrowBufferView buffer_views[3] + int64_t n_children + ArrowArrayView** children + ArrowArrayView* dictionary + int8_t* union_type_id_map + int32_t n_variadic_buffers + const void** variadic_buffers + int64_t* variadic_buffer_sizes + + struct ArrowMetadataReader: + const char* metadata + int64_t offset + int32_t remaining_keys + + struct ArrowSchemaView: + const ArrowSchema* schema + ArrowType type + ArrowType storage_type + ArrowLayout layout + ArrowStringView extension_name + ArrowStringView extension_metadata + int32_t fixed_size + int32_t decimal_bitwidth + int32_t decimal_precision + int32_t decimal_scale + ArrowTimeUnit time_unit + const char* timezone + const char* union_type_ids + + struct ArrowBinaryViewInlined: + int32_t size + uint8_t data[12] + + struct ArrowBinaryViewRef: + int32_t size + uint8_t prefix[4] + int32_t buffer_index + int32_t offset + + union ArrowBinaryView: + ArrowBinaryViewInlined inlined + ArrowBinaryViewRef ref + int64_t alignment_dummy + + ctypedef int ArrowErrorCode + ctypedef void (*ArrowBufferDeallocatorCallback)(ArrowBufferAllocator* allocator, + uint8_t* ptr, int64_t size) + + const char* ArrowTypeString(ArrowType type) + const char* ArrowTimeUnitString(ArrowTimeUnit time_unit) + + void* ArrowMalloc(int64_t size) + void ArrowFree(void* ptr) + ArrowBufferAllocator ArrowBufferDeallocator(ArrowBufferDeallocatorCallback, void* private_data) + void ArrowSchemaMove(ArrowSchema* src, ArrowSchema* dst) + void ArrowSchemaRelease(ArrowSchema* schema) + void ArrowArrayMove(ArrowArray* src, ArrowArray* dst) + void ArrowArrayStreamMove(ArrowArrayStream* src, ArrowArrayStream* dst) + ArrowErrorCode ArrowArrayStreamGetSchema(ArrowArrayStream* array_stream, ArrowSchema* out, ArrowError* error) + ArrowErrorCode ArrowArrayStreamGetNext(ArrowArrayStream* array_stream, ArrowArray* out, ArrowError* error) + void ArrowSchemaRelease(ArrowSchema* schema) + void ArrowArrayMove(ArrowArray* src, ArrowArray* dst) + void ArrowArrayRelease(ArrowArray* array) + void ArrowArrayStreamRelease(ArrowArrayStream* array_stream) + const char* ArrowNanoarrowVersion() + int64_t ArrowResolveChunk64(int64_t index, const int64_t* offsets, int64_t lo, int64_t hi) + void ArrowSchemaInit(ArrowSchema* schema) + ArrowErrorCode ArrowSchemaInitFromType(ArrowSchema* schema, ArrowType type) + int64_t ArrowSchemaToString(const ArrowSchema* schema, char* out, int64_t n, char recursive) + ArrowErrorCode ArrowSchemaSetType(ArrowSchema* schema, ArrowType type) + ArrowErrorCode ArrowSchemaSetTypeFixedSize(ArrowSchema* schema, ArrowType type, int32_t fixed_size) + ArrowErrorCode ArrowSchemaSetTypeDecimal(ArrowSchema* schema, ArrowType type, int32_t decimal_precision, int32_t decimal_scale) + ArrowErrorCode ArrowSchemaSetTypeDateTime(ArrowSchema* schema, ArrowType type, ArrowTimeUnit time_unit, const char* timezone) + ArrowErrorCode ArrowSchemaSetFormat(ArrowSchema* schema, const char* format) + ArrowErrorCode ArrowSchemaSetName(ArrowSchema* schema, const char* name) + ArrowErrorCode ArrowSchemaSetMetadata(ArrowSchema* schema, const char* metadata) + ArrowErrorCode ArrowSchemaDeepCopy(const ArrowSchema* schema, ArrowSchema* schema_out) + ArrowErrorCode ArrowSchemaAllocateChildren(ArrowSchema* schema, int64_t n_children) + ArrowErrorCode ArrowSchemaAllocateDictionary(ArrowSchema* schema) + ArrowErrorCode ArrowMetadataReaderInit(ArrowMetadataReader* reader, const char* metadata) + ArrowErrorCode ArrowMetadataReaderInit(ArrowMetadataReader* reader, const char* metadata) + ArrowErrorCode ArrowMetadataReaderRead(ArrowMetadataReader* reader, ArrowStringView* key_out, ArrowStringView* value_out) + ArrowErrorCode ArrowMetadataBuilderInit(ArrowBuffer* buffer, const char* metadata) + ArrowErrorCode ArrowMetadataBuilderAppend(ArrowBuffer* buffer, ArrowStringView key, ArrowStringView value) + ArrowErrorCode ArrowSchemaViewInit(ArrowSchemaView* schema_view, const ArrowSchema* schema, ArrowError* error) + void ArrowBufferInit(ArrowBuffer* buffer) + void ArrowBufferReset(ArrowBuffer* buffer) + void ArrowBufferMove(ArrowBuffer* src, ArrowBuffer* dst) + ArrowErrorCode ArrowBufferReserve(ArrowBuffer* buffer, int64_t additional_size_bytes) + ArrowErrorCode ArrowBufferAppendFill(ArrowBuffer* buffer, uint8_t value, int64_t size_bytes) + ArrowErrorCode ArrowBufferAppendInt8(ArrowBuffer* buffer, int8_t value) + ArrowErrorCode ArrowBufferAppendInt64(ArrowBuffer* buffer, int64_t value) + int8_t ArrowBitGet(const uint8_t* bits, int64_t i) + int64_t ArrowBitCountSet(const uint8_t* bits, int64_t i_from, int64_t i_to) + void ArrowBitsUnpackInt8(const uint8_t* bits, int64_t start_offset, int64_t length, int8_t* out) + void ArrowBitmapInit(ArrowBitmap* bitmap) + ArrowErrorCode ArrowBitmapReserve(ArrowBitmap* bitmap, int64_t additional_size_bits) + ArrowErrorCode ArrowBitmapAppend(ArrowBitmap* bitmap, uint8_t bits_are_set, int64_t length) + void ArrowBitmapAppendUnsafe(ArrowBitmap* bitmap, uint8_t bits_are_set, int64_t length) + void ArrowBitmapReset(ArrowBitmap* bitmap) + ArrowErrorCode ArrowArrayInitFromType(ArrowArray* array, ArrowType storage_type) + ArrowErrorCode ArrowArrayInitFromSchema(ArrowArray* array, const ArrowSchema* schema, ArrowError* error) + ArrowErrorCode ArrowArrayAllocateChildren(ArrowArray* array, int64_t n_children) + ArrowErrorCode ArrowArrayAllocateDictionary(ArrowArray* array) + ArrowBuffer* ArrowArrayBuffer(ArrowArray* array, int64_t i) + ArrowErrorCode ArrowArrayStartAppending(ArrowArray* array) + ArrowErrorCode ArrowArrayAppendNull(ArrowArray* array, int64_t n) + ArrowErrorCode ArrowArrayAppendBytes(ArrowArray* array, ArrowBufferView value) + ArrowErrorCode ArrowArrayAppendString(ArrowArray* array, ArrowStringView value) + ArrowErrorCode ArrowArrayFinishBuilding(ArrowArray* array, ArrowValidationLevel validation_level, ArrowError* error) + void ArrowArrayViewInitFromType(ArrowArrayView* array_view, ArrowType storage_type) + ArrowErrorCode ArrowArrayViewInitFromSchema(ArrowArrayView* array_view, const ArrowSchema* schema, ArrowError* error) + ArrowErrorCode ArrowArrayViewInitFromSchema(ArrowArrayView* array_view, const ArrowSchema* schema, ArrowError* error) + ArrowErrorCode ArrowArrayViewSetArray(ArrowArrayView* array_view, const ArrowArray* array, ArrowError* error) + ArrowErrorCode ArrowArrayViewSetArrayMinimal(ArrowArrayView* array_view, const ArrowArray* array, ArrowError* error) + int64_t ArrowArrayViewGetNumBuffers(ArrowArrayView* array_view) + ArrowBufferView ArrowArrayViewGetBufferView(ArrowArrayView* array_view, int64_t i) + ArrowBufferType ArrowArrayViewGetBufferType(ArrowArrayView* array_view, int64_t i) + ArrowType ArrowArrayViewGetBufferDataType(ArrowArrayView* array_view, int64_t i) + int64_t ArrowArrayViewGetBufferElementSizeBits(ArrowArrayView* array_view, int64_t i) + void ArrowArrayViewReset(ArrowArrayView* array_view) + int8_t ArrowArrayViewIsNull(const ArrowArrayView* array_view, int64_t i) + int64_t ArrowArrayViewComputeNullCount(const ArrowArrayView* array_view) + ArrowStringView ArrowArrayViewGetStringUnsafe(const ArrowArrayView* array_view, int64_t i) + ArrowBufferView ArrowArrayViewGetBytesUnsafe(const ArrowArrayView* array_view, int64_t i) + ArrowErrorCode ArrowBasicArrayStreamInit(ArrowArrayStream* array_stream, ArrowSchema* schema, int64_t n_arrays) + void ArrowBasicArrayStreamSetArray(ArrowArrayStream* array_stream, int64_t i, ArrowArray* array) + ArrowErrorCode ArrowBasicArrayStreamValidate(const ArrowArrayStream* array_stream, ArrowError* error) diff --git a/python/src/nanoarrow/nanoarrow_device_c.pxd b/python/src/nanoarrow/nanoarrow_device_c.pxd new file mode 100644 index 000000000..0b6fab087 --- /dev/null +++ b/python/src/nanoarrow/nanoarrow_device_c.pxd @@ -0,0 +1,78 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# cython: language_level = 3 + + +from libc.stdint cimport int32_t, int64_t +from nanoarrow_c cimport * + +cdef extern from "nanoarrow/nanoarrow_device.h" nogil: + + ctypedef int32_t ArrowDeviceType + + cdef ArrowDeviceType ARROW_DEVICE_CPU + cdef ArrowDeviceType ARROW_DEVICE_CUDA + cdef ArrowDeviceType ARROW_DEVICE_CUDA_HOST + cdef ArrowDeviceType ARROW_DEVICE_OPENCL + cdef ArrowDeviceType ARROW_DEVICE_VULKAN + cdef ArrowDeviceType ARROW_DEVICE_METAL + cdef ArrowDeviceType ARROW_DEVICE_VPI + cdef ArrowDeviceType ARROW_DEVICE_ROCM + cdef ArrowDeviceType ARROW_DEVICE_ROCM_HOST + cdef ArrowDeviceType ARROW_DEVICE_EXT_DEV + cdef ArrowDeviceType ARROW_DEVICE_CUDA_MANAGED + cdef ArrowDeviceType ARROW_DEVICE_ONEAPI + cdef ArrowDeviceType ARROW_DEVICE_WEBGPU + cdef ArrowDeviceType ARROW_DEVICE_HEXAGON + + struct ArrowDeviceArray: + ArrowArray array + int64_t device_id + ArrowDeviceType device_type + void* sync_event + int64_t reserved[3] + + struct ArrowDevice: + ArrowDeviceType device_type + int64_t device_id + ArrowErrorCode (*array_init)(ArrowDevice* device, + ArrowDeviceArray* device_array, + ArrowArray* array, void* sync_event, void* stream) + ArrowErrorCode (*array_move)(ArrowDevice* device_src, + ArrowDeviceArray* src, + ArrowDevice* device_dst, + ArrowDeviceArray* dst) + ArrowErrorCode (*buffer_init)(ArrowDevice* device_src, + ArrowBufferView src, + ArrowDevice* device_dst, ArrowBuffer* dst, + void* stream) + ArrowErrorCode (*buffer_move)(ArrowDevice* device_src, ArrowBuffer* src, + ArrowDevice* device_dst, ArrowBuffer* dst) + ArrowErrorCode (*buffer_copy)(ArrowDevice* device_src, + ArrowBufferView src, + ArrowDevice* device_dst, + ArrowBufferView dst, void* stream) + ArrowErrorCode (*synchronize_event)(ArrowDevice* device, void* sync_event, + void* stream, ArrowError* error) + void (*release)(ArrowDevice* device) + void* private_data + + + ArrowDevice* ArrowDeviceCpu() + ArrowDevice* ArrowDeviceResolve(ArrowDeviceType device_type, int64_t device_id) + ArrowErrorCode ArrowDeviceArrayInit(ArrowDevice* device, ArrowDeviceArray* device_array, ArrowArray* array, void* sync_event) From 9299d5c1e2ce142e3a6ab9cd72a4275d6006bf18 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 31 Oct 2024 17:01:07 -0400 Subject: [PATCH 2/2] Try autopxd2 --- python/meson.build | 4 +- python/pyproject.toml | 1 + python/src/nanoarrow/_array.pyx | 5 +- python/src/nanoarrow/_buffer.pyx | 7 +- python/src/nanoarrow/_device.pyx | 10 +- python/src/nanoarrow/_ipc_lib.pyx | 3 +- python/src/nanoarrow/_schema.pyx | 11 +- python/src/nanoarrow/_utils.pyx | 3 +- python/src/nanoarrow/meson.build | 42 ++- python/src/nanoarrow/nanoarrow_c.pxd | 310 -------------------- python/src/nanoarrow/nanoarrow_device_c.pxd | 78 ----- python/src/nanoarrow/nanoarrow_macros.pxd | 51 ++++ 12 files changed, 122 insertions(+), 403 deletions(-) delete mode 100644 python/src/nanoarrow/nanoarrow_c.pxd delete mode 100644 python/src/nanoarrow/nanoarrow_device_c.pxd create mode 100644 python/src/nanoarrow/nanoarrow_macros.pxd diff --git a/python/meson.build b/python/meson.build index 3d1f9d394..282969f95 100644 --- a/python/meson.build +++ b/python/meson.build @@ -29,7 +29,9 @@ project( # due to https://github.com/mesonbuild/meson/issues/6728 'arrow-nanoarrow:ipc=true', 'arrow-nanoarrow:device=true', - 'arrow-nanoarrow:namespace=PythonPkg', + # Adding this namespace doesn't work with the autopxd generation + # is that a problem? + #'arrow-nanoarrow:namespace=PythonPkg', ], ) diff --git a/python/pyproject.toml b/python/pyproject.toml index acb0398d0..3f1f07312 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -38,6 +38,7 @@ Changelog = "https://github.com/apache/arrow-nanoarrow/blob/main/CHANGELOG.md" [build-system] requires = [ + "autopxd2", "meson>=1.3.0", "meson-python", "Cython" diff --git a/python/src/nanoarrow/_array.pyx b/python/src/nanoarrow/_array.pyx index 869087b57..3d6bf02d0 100644 --- a/python/src/nanoarrow/_array.pyx +++ b/python/src/nanoarrow/_array.pyx @@ -76,11 +76,14 @@ from nanoarrow_c cimport ( NANOARROW_VALIDATION_LEVEL_FULL, NANOARROW_VALIDATION_LEVEL_MINIMAL, NANOARROW_VALIDATION_LEVEL_NONE, +) + +from nanoarrow_macros cimport ( NANOARROW_OK, + ARROW_DEVICE_CPU, ) from nanoarrow_device_c cimport ( - ARROW_DEVICE_CPU, ArrowDeviceType, ArrowDeviceArray, ArrowDeviceArrayInit, diff --git a/python/src/nanoarrow/_buffer.pyx b/python/src/nanoarrow/_buffer.pyx index 14b63b853..f597dbe74 100644 --- a/python/src/nanoarrow/_buffer.pyx +++ b/python/src/nanoarrow/_buffer.pyx @@ -35,7 +35,6 @@ from cpython cimport ( from cpython.ref cimport Py_INCREF, Py_DECREF from nanoarrow_c cimport ( - NANOARROW_OK, ArrowMalloc, ArrowFree, ArrowType, @@ -55,12 +54,14 @@ from nanoarrow_c cimport ( ArrowBufferMove, ) -from nanoarrow_device_c cimport ( +from nanoarrow_macros cimport ( + NANOARROW_OK, ARROW_DEVICE_CPU, ARROW_DEVICE_CUDA, - ArrowDevice, ) +from nanoarrow_device_c cimport ArrowDevice + from nanoarrow_dlpack cimport ( DLDataType, DLDevice, diff --git a/python/src/nanoarrow/_device.pyx b/python/src/nanoarrow/_device.pyx index 2ef383fc1..8f3b0e5a1 100644 --- a/python/src/nanoarrow/_device.pyx +++ b/python/src/nanoarrow/_device.pyx @@ -20,6 +20,13 @@ from libc.stdint cimport uintptr_t, int64_t from nanoarrow_device_c cimport ( + ArrowDevice, + ArrowDeviceCpu, + ArrowDeviceResolve +) + +from nanoarrow_macros cimport ( + NANOARROW_OK, ARROW_DEVICE_CPU, ARROW_DEVICE_CUDA, ARROW_DEVICE_CUDA_HOST, @@ -34,9 +41,6 @@ from nanoarrow_device_c cimport ( ARROW_DEVICE_ONEAPI, ARROW_DEVICE_WEBGPU, ARROW_DEVICE_HEXAGON, - ArrowDevice, - ArrowDeviceCpu, - ArrowDeviceResolve ) from nanoarrow._utils cimport Error diff --git a/python/src/nanoarrow/_ipc_lib.pyx b/python/src/nanoarrow/_ipc_lib.pyx index 163644d9e..fa1655bde 100644 --- a/python/src/nanoarrow/_ipc_lib.pyx +++ b/python/src/nanoarrow/_ipc_lib.pyx @@ -30,9 +30,10 @@ from nanoarrow_c cimport ( ArrowSchema, ArrowErrorCode, ArrowError, - NANOARROW_OK, ) +from nanoarrow_macros cimport NANOARROW_OK + from nanoarrow._schema cimport CSchema from nanoarrow._array cimport CArrayView from nanoarrow._utils cimport Error diff --git a/python/src/nanoarrow/_schema.pyx b/python/src/nanoarrow/_schema.pyx index 2717c529e..d2ff1a106 100644 --- a/python/src/nanoarrow/_schema.pyx +++ b/python/src/nanoarrow/_schema.pyx @@ -22,9 +22,6 @@ from cpython.bytes cimport PyBytes_FromStringAndSize, PyBytes_AsString, PyBytes_ from cpython.pycapsule cimport PyCapsule_GetPointer from nanoarrow_c cimport ( - ARROW_FLAG_DICTIONARY_ORDERED, - ARROW_FLAG_MAP_KEYS_SORTED, - ARROW_FLAG_NULLABLE, ArrowFree, ArrowLayout, ArrowMalloc, @@ -54,13 +51,19 @@ from nanoarrow_c cimport ( ArrowType, ArrowTypeString, NANOARROW_BUFFER_TYPE_NONE, - NANOARROW_MAX_FIXED_BUFFERS, NANOARROW_TIME_UNIT_SECOND, NANOARROW_TIME_UNIT_MILLI, NANOARROW_TIME_UNIT_MICRO, NANOARROW_TIME_UNIT_NANO, ) +from nanoarrow_macros cimport ( + ARROW_FLAG_DICTIONARY_ORDERED, + ARROW_FLAG_MAP_KEYS_SORTED, + ARROW_FLAG_NULLABLE, + NANOARROW_MAX_FIXED_BUFFERS, +) + from nanoarrow cimport _types from nanoarrow._buffer cimport CBuffer from nanoarrow._utils cimport alloc_c_schema, Error diff --git a/python/src/nanoarrow/_utils.pyx b/python/src/nanoarrow/_utils.pyx index b261fb294..0f74a82fd 100644 --- a/python/src/nanoarrow/_utils.pyx +++ b/python/src/nanoarrow/_utils.pyx @@ -54,10 +54,11 @@ from nanoarrow_c cimport ( ArrowNanoarrowVersion, ArrowSchema, ArrowSchemaRelease, - NANOARROW_OK, NANOARROW_TYPE_UNINITIALIZED ) +from nanoarrow_macros cimport NANOARROW_OK + from nanoarrow_device_c cimport ( ArrowDeviceArray ) diff --git a/python/src/nanoarrow/meson.build b/python/src/nanoarrow/meson.build index d6eb0a38f..dec19524f 100644 --- a/python/src/nanoarrow/meson.build +++ b/python/src/nanoarrow/meson.build @@ -23,6 +23,43 @@ nanoarrow_device_dep = nanoarrow_proj.get_variable('nanoarrow_device_dep') py = import('python').find_installation(pure: false) +nanoarrow_c = custom_target( + 'nanoarrow_c', + output: 'nanoarrow_c.pxd', + command: [ + 'autopxd', + '-I', + meson.project_source_root() / 'subprojects/arrow-nanoarrow/src/', + '-I', + meson.project_build_root() / 'subprojects/arrow-nanoarrow/src/', + # ideally we could use @INPUT@ but that will throw a sandboxing violation + meson.project_source_root() / 'subprojects/arrow-nanoarrow/src/nanoarrow/nanoarrow.h', + '@OUTPUT@', + ], +) +nanoarrow_c_dep = declare_dependency( + sources: nanoarrow_c, +) + +nanoarrow_device_c = custom_target( + 'nanoarrow_device_c', + output: 'nanoarrow_device_c.pxd', + command: [ + 'autopxd', + '-I', + meson.project_source_root() / 'subprojects/arrow-nanoarrow/src/', + '-I', + meson.project_build_root() / 'subprojects/arrow-nanoarrow/src/', + # ideally we could use @INPUT@ but that will throw a sandboxing violation + meson.project_source_root() / 'subprojects/arrow-nanoarrow/src/nanoarrow/nanoarrow_device.h', + '@OUTPUT@', + ], +) + +nanoarrow_device_c_dep = declare_dependency( + sources: nanoarrow_device_c, +) + cyfiles = [ '_array.pyx', '_array_stream.pyx', @@ -37,6 +74,8 @@ cyfiles = [ cython_args = [ '--include-dir', meson.current_source_dir(), + '--include-dir', + meson.current_build_dir(), ] if get_option('buildtype') in ['debug', 'debugoptimized'] cython_args += ['--gdb'] @@ -44,7 +83,7 @@ endif fs = import('fs') foreach cyf : cyfiles - cyfile_deps = [nanoarrow_dep] + cyfile_deps = [nanoarrow_dep, nanoarrow_c_dep, nanoarrow_device_c_dep] stem = fs.stem(cyf) if stem in ['_array', '_device'] @@ -76,6 +115,7 @@ py_sources = [ 'device.py', 'ipc.py', 'iterator.py', + 'nanoarrow_macros.pxd', '_repr_utils.py', 'schema.py', 'visitor.py', diff --git a/python/src/nanoarrow/nanoarrow_c.pxd b/python/src/nanoarrow/nanoarrow_c.pxd deleted file mode 100644 index 3cb98f435..000000000 --- a/python/src/nanoarrow/nanoarrow_c.pxd +++ /dev/null @@ -1,310 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# cython: language_level = 3 - - -from libc.stdint cimport int8_t, uint8_t, int16_t, uint16_t -from libc.stdint cimport int32_t, uint32_t, int64_t, uint64_t - -cdef extern from "nanoarrow/nanoarrow.h" nogil: - - cdef int NANOARROW_OK - cdef int NANOARROW_MAX_FIXED_BUFFERS - cdef int ARROW_FLAG_DICTIONARY_ORDERED - cdef int ARROW_FLAG_NULLABLE - cdef int ARROW_FLAG_MAP_KEYS_SORTED - - struct ArrowSchema: - const char* format - const char* name - const char* metadata - int64_t flags - int64_t n_children - ArrowSchema** children - ArrowSchema* dictionary - void (*release)(ArrowSchema*) - void* private_data - - struct ArrowArray: - int64_t length - int64_t null_count - int64_t offset - int64_t n_buffers - int64_t n_children - const void** buffers - ArrowArray** children - ArrowArray* dictionary - void (*release)(ArrowArray*) - void* private_data - - struct ArrowArrayStream: - int (*get_schema)(ArrowArrayStream*, ArrowSchema* out) - int (*get_next)(ArrowArrayStream*, ArrowArray* out) - const char* (*get_last_error)(ArrowArrayStream*) - void (*release)(ArrowArrayStream*) - void* private_data - - struct ArrowError: - char message[1024] - - enum ArrowType: - NANOARROW_TYPE_UNINITIALIZED = 0 - NANOARROW_TYPE_NA = 1 - NANOARROW_TYPE_BOOL - NANOARROW_TYPE_UINT8 - NANOARROW_TYPE_INT8 - NANOARROW_TYPE_UINT16 - NANOARROW_TYPE_INT16 - NANOARROW_TYPE_UINT32 - NANOARROW_TYPE_INT32 - NANOARROW_TYPE_UINT64 - NANOARROW_TYPE_INT64 - NANOARROW_TYPE_HALF_FLOAT - NANOARROW_TYPE_FLOAT - NANOARROW_TYPE_DOUBLE - NANOARROW_TYPE_STRING - NANOARROW_TYPE_BINARY - NANOARROW_TYPE_FIXED_SIZE_BINARY - NANOARROW_TYPE_DATE32 - NANOARROW_TYPE_DATE64 - NANOARROW_TYPE_TIMESTAMP - NANOARROW_TYPE_TIME32 - NANOARROW_TYPE_TIME64 - NANOARROW_TYPE_INTERVAL_MONTHS - NANOARROW_TYPE_INTERVAL_DAY_TIME - NANOARROW_TYPE_DECIMAL128 - NANOARROW_TYPE_DECIMAL256 - NANOARROW_TYPE_LIST - NANOARROW_TYPE_STRUCT - NANOARROW_TYPE_SPARSE_UNION - NANOARROW_TYPE_DENSE_UNION - NANOARROW_TYPE_DICTIONARY - NANOARROW_TYPE_MAP - NANOARROW_TYPE_EXTENSION - NANOARROW_TYPE_FIXED_SIZE_LIST - NANOARROW_TYPE_DURATION - NANOARROW_TYPE_LARGE_STRING - NANOARROW_TYPE_LARGE_BINARY - NANOARROW_TYPE_LARGE_LIST - NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO - NANOARROW_TYPE_RUN_END_ENCODED - NANOARROW_TYPE_BINARY_VIEW - NANOARROW_TYPE_STRING_VIEW - - enum ArrowTimeUnit: - NANOARROW_TIME_UNIT_SECOND = 0 - NANOARROW_TIME_UNIT_MILLI = 1 - NANOARROW_TIME_UNIT_MICRO = 2 - NANOARROW_TIME_UNIT_NANO = 3 - - enum ArrowValidationLevel: - NANOARROW_VALIDATION_LEVEL_NONE = 0 - NANOARROW_VALIDATION_LEVEL_MINIMAL = 1 - NANOARROW_VALIDATION_LEVEL_DEFAULT = 2 - NANOARROW_VALIDATION_LEVEL_FULL = 3 - - enum ArrowBufferType: - NANOARROW_BUFFER_TYPE_NONE - NANOARROW_BUFFER_TYPE_VALIDITY - NANOARROW_BUFFER_TYPE_TYPE_ID - NANOARROW_BUFFER_TYPE_UNION_OFFSET - NANOARROW_BUFFER_TYPE_DATA_OFFSET - NANOARROW_BUFFER_TYPE_DATA - NANOARROW_BUFFER_TYPE_VARIADIC_DATA - NANOARROW_BUFFER_TYPE_VARIADIC_SIZE - - struct ArrowStringView: - const char* data - int64_t size_bytes - - union ArrowBufferViewData: - const void* data - const int8_t* as_int8 - const uint8_t* as_uint8 - const int16_t* as_int16 - const uint16_t* as_uint16 - const int32_t* as_int32 - const uint32_t* as_uint32 - const int64_t* as_int64 - const uint64_t* as_uint64 - const double* as_double - const float* as_float - const char* as_char - const ArrowBinaryView* as_binary_view - - struct ArrowBufferView: - ArrowBufferViewData data - int64_t size_bytes - - struct ArrowBufferAllocator: - uint8_t* (*reallocate)(ArrowBufferAllocator* allocator, uint8_t* ptr, - int64_t old_size, int64_t new_size) - void (*free)(ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t size) - void* private_data - - struct ArrowBuffer: - uint8_t* data - int64_t size_bytes - int64_t capacity_bytes - ArrowBufferAllocator allocator - - struct ArrowBitmap: - ArrowBuffer buffer - int64_t size_bits - - struct ArrowLayout: - ArrowBufferType buffer_type[3] - ArrowType buffer_data_type[3] - int64_t element_size_bits[3] - int64_t child_size_elements - - struct ArrowArrayView: - const ArrowArray* array - int64_t offset - int64_t length - int64_t null_count - ArrowType storage_type - ArrowLayout layout - ArrowBufferView buffer_views[3] - int64_t n_children - ArrowArrayView** children - ArrowArrayView* dictionary - int8_t* union_type_id_map - int32_t n_variadic_buffers - const void** variadic_buffers - int64_t* variadic_buffer_sizes - - struct ArrowMetadataReader: - const char* metadata - int64_t offset - int32_t remaining_keys - - struct ArrowSchemaView: - const ArrowSchema* schema - ArrowType type - ArrowType storage_type - ArrowLayout layout - ArrowStringView extension_name - ArrowStringView extension_metadata - int32_t fixed_size - int32_t decimal_bitwidth - int32_t decimal_precision - int32_t decimal_scale - ArrowTimeUnit time_unit - const char* timezone - const char* union_type_ids - - struct ArrowBinaryViewInlined: - int32_t size - uint8_t data[12] - - struct ArrowBinaryViewRef: - int32_t size - uint8_t prefix[4] - int32_t buffer_index - int32_t offset - - union ArrowBinaryView: - ArrowBinaryViewInlined inlined - ArrowBinaryViewRef ref - int64_t alignment_dummy - - ctypedef int ArrowErrorCode - ctypedef void (*ArrowBufferDeallocatorCallback)(ArrowBufferAllocator* allocator, - uint8_t* ptr, int64_t size) - - const char* ArrowTypeString(ArrowType type) - const char* ArrowTimeUnitString(ArrowTimeUnit time_unit) - - void* ArrowMalloc(int64_t size) - void ArrowFree(void* ptr) - ArrowBufferAllocator ArrowBufferDeallocator(ArrowBufferDeallocatorCallback, void* private_data) - void ArrowSchemaMove(ArrowSchema* src, ArrowSchema* dst) - void ArrowSchemaRelease(ArrowSchema* schema) - void ArrowArrayMove(ArrowArray* src, ArrowArray* dst) - void ArrowArrayStreamMove(ArrowArrayStream* src, ArrowArrayStream* dst) - ArrowErrorCode ArrowArrayStreamGetSchema(ArrowArrayStream* array_stream, ArrowSchema* out, ArrowError* error) - ArrowErrorCode ArrowArrayStreamGetNext(ArrowArrayStream* array_stream, ArrowArray* out, ArrowError* error) - void ArrowSchemaRelease(ArrowSchema* schema) - void ArrowArrayMove(ArrowArray* src, ArrowArray* dst) - void ArrowArrayRelease(ArrowArray* array) - void ArrowArrayStreamRelease(ArrowArrayStream* array_stream) - const char* ArrowNanoarrowVersion() - int64_t ArrowResolveChunk64(int64_t index, const int64_t* offsets, int64_t lo, int64_t hi) - void ArrowSchemaInit(ArrowSchema* schema) - ArrowErrorCode ArrowSchemaInitFromType(ArrowSchema* schema, ArrowType type) - int64_t ArrowSchemaToString(const ArrowSchema* schema, char* out, int64_t n, char recursive) - ArrowErrorCode ArrowSchemaSetType(ArrowSchema* schema, ArrowType type) - ArrowErrorCode ArrowSchemaSetTypeFixedSize(ArrowSchema* schema, ArrowType type, int32_t fixed_size) - ArrowErrorCode ArrowSchemaSetTypeDecimal(ArrowSchema* schema, ArrowType type, int32_t decimal_precision, int32_t decimal_scale) - ArrowErrorCode ArrowSchemaSetTypeDateTime(ArrowSchema* schema, ArrowType type, ArrowTimeUnit time_unit, const char* timezone) - ArrowErrorCode ArrowSchemaSetFormat(ArrowSchema* schema, const char* format) - ArrowErrorCode ArrowSchemaSetName(ArrowSchema* schema, const char* name) - ArrowErrorCode ArrowSchemaSetMetadata(ArrowSchema* schema, const char* metadata) - ArrowErrorCode ArrowSchemaDeepCopy(const ArrowSchema* schema, ArrowSchema* schema_out) - ArrowErrorCode ArrowSchemaAllocateChildren(ArrowSchema* schema, int64_t n_children) - ArrowErrorCode ArrowSchemaAllocateDictionary(ArrowSchema* schema) - ArrowErrorCode ArrowMetadataReaderInit(ArrowMetadataReader* reader, const char* metadata) - ArrowErrorCode ArrowMetadataReaderInit(ArrowMetadataReader* reader, const char* metadata) - ArrowErrorCode ArrowMetadataReaderRead(ArrowMetadataReader* reader, ArrowStringView* key_out, ArrowStringView* value_out) - ArrowErrorCode ArrowMetadataBuilderInit(ArrowBuffer* buffer, const char* metadata) - ArrowErrorCode ArrowMetadataBuilderAppend(ArrowBuffer* buffer, ArrowStringView key, ArrowStringView value) - ArrowErrorCode ArrowSchemaViewInit(ArrowSchemaView* schema_view, const ArrowSchema* schema, ArrowError* error) - void ArrowBufferInit(ArrowBuffer* buffer) - void ArrowBufferReset(ArrowBuffer* buffer) - void ArrowBufferMove(ArrowBuffer* src, ArrowBuffer* dst) - ArrowErrorCode ArrowBufferReserve(ArrowBuffer* buffer, int64_t additional_size_bytes) - ArrowErrorCode ArrowBufferAppendFill(ArrowBuffer* buffer, uint8_t value, int64_t size_bytes) - ArrowErrorCode ArrowBufferAppendInt8(ArrowBuffer* buffer, int8_t value) - ArrowErrorCode ArrowBufferAppendInt64(ArrowBuffer* buffer, int64_t value) - int8_t ArrowBitGet(const uint8_t* bits, int64_t i) - int64_t ArrowBitCountSet(const uint8_t* bits, int64_t i_from, int64_t i_to) - void ArrowBitsUnpackInt8(const uint8_t* bits, int64_t start_offset, int64_t length, int8_t* out) - void ArrowBitmapInit(ArrowBitmap* bitmap) - ArrowErrorCode ArrowBitmapReserve(ArrowBitmap* bitmap, int64_t additional_size_bits) - ArrowErrorCode ArrowBitmapAppend(ArrowBitmap* bitmap, uint8_t bits_are_set, int64_t length) - void ArrowBitmapAppendUnsafe(ArrowBitmap* bitmap, uint8_t bits_are_set, int64_t length) - void ArrowBitmapReset(ArrowBitmap* bitmap) - ArrowErrorCode ArrowArrayInitFromType(ArrowArray* array, ArrowType storage_type) - ArrowErrorCode ArrowArrayInitFromSchema(ArrowArray* array, const ArrowSchema* schema, ArrowError* error) - ArrowErrorCode ArrowArrayAllocateChildren(ArrowArray* array, int64_t n_children) - ArrowErrorCode ArrowArrayAllocateDictionary(ArrowArray* array) - ArrowBuffer* ArrowArrayBuffer(ArrowArray* array, int64_t i) - ArrowErrorCode ArrowArrayStartAppending(ArrowArray* array) - ArrowErrorCode ArrowArrayAppendNull(ArrowArray* array, int64_t n) - ArrowErrorCode ArrowArrayAppendBytes(ArrowArray* array, ArrowBufferView value) - ArrowErrorCode ArrowArrayAppendString(ArrowArray* array, ArrowStringView value) - ArrowErrorCode ArrowArrayFinishBuilding(ArrowArray* array, ArrowValidationLevel validation_level, ArrowError* error) - void ArrowArrayViewInitFromType(ArrowArrayView* array_view, ArrowType storage_type) - ArrowErrorCode ArrowArrayViewInitFromSchema(ArrowArrayView* array_view, const ArrowSchema* schema, ArrowError* error) - ArrowErrorCode ArrowArrayViewInitFromSchema(ArrowArrayView* array_view, const ArrowSchema* schema, ArrowError* error) - ArrowErrorCode ArrowArrayViewSetArray(ArrowArrayView* array_view, const ArrowArray* array, ArrowError* error) - ArrowErrorCode ArrowArrayViewSetArrayMinimal(ArrowArrayView* array_view, const ArrowArray* array, ArrowError* error) - int64_t ArrowArrayViewGetNumBuffers(ArrowArrayView* array_view) - ArrowBufferView ArrowArrayViewGetBufferView(ArrowArrayView* array_view, int64_t i) - ArrowBufferType ArrowArrayViewGetBufferType(ArrowArrayView* array_view, int64_t i) - ArrowType ArrowArrayViewGetBufferDataType(ArrowArrayView* array_view, int64_t i) - int64_t ArrowArrayViewGetBufferElementSizeBits(ArrowArrayView* array_view, int64_t i) - void ArrowArrayViewReset(ArrowArrayView* array_view) - int8_t ArrowArrayViewIsNull(const ArrowArrayView* array_view, int64_t i) - int64_t ArrowArrayViewComputeNullCount(const ArrowArrayView* array_view) - ArrowStringView ArrowArrayViewGetStringUnsafe(const ArrowArrayView* array_view, int64_t i) - ArrowBufferView ArrowArrayViewGetBytesUnsafe(const ArrowArrayView* array_view, int64_t i) - ArrowErrorCode ArrowBasicArrayStreamInit(ArrowArrayStream* array_stream, ArrowSchema* schema, int64_t n_arrays) - void ArrowBasicArrayStreamSetArray(ArrowArrayStream* array_stream, int64_t i, ArrowArray* array) - ArrowErrorCode ArrowBasicArrayStreamValidate(const ArrowArrayStream* array_stream, ArrowError* error) diff --git a/python/src/nanoarrow/nanoarrow_device_c.pxd b/python/src/nanoarrow/nanoarrow_device_c.pxd deleted file mode 100644 index 0b6fab087..000000000 --- a/python/src/nanoarrow/nanoarrow_device_c.pxd +++ /dev/null @@ -1,78 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# cython: language_level = 3 - - -from libc.stdint cimport int32_t, int64_t -from nanoarrow_c cimport * - -cdef extern from "nanoarrow/nanoarrow_device.h" nogil: - - ctypedef int32_t ArrowDeviceType - - cdef ArrowDeviceType ARROW_DEVICE_CPU - cdef ArrowDeviceType ARROW_DEVICE_CUDA - cdef ArrowDeviceType ARROW_DEVICE_CUDA_HOST - cdef ArrowDeviceType ARROW_DEVICE_OPENCL - cdef ArrowDeviceType ARROW_DEVICE_VULKAN - cdef ArrowDeviceType ARROW_DEVICE_METAL - cdef ArrowDeviceType ARROW_DEVICE_VPI - cdef ArrowDeviceType ARROW_DEVICE_ROCM - cdef ArrowDeviceType ARROW_DEVICE_ROCM_HOST - cdef ArrowDeviceType ARROW_DEVICE_EXT_DEV - cdef ArrowDeviceType ARROW_DEVICE_CUDA_MANAGED - cdef ArrowDeviceType ARROW_DEVICE_ONEAPI - cdef ArrowDeviceType ARROW_DEVICE_WEBGPU - cdef ArrowDeviceType ARROW_DEVICE_HEXAGON - - struct ArrowDeviceArray: - ArrowArray array - int64_t device_id - ArrowDeviceType device_type - void* sync_event - int64_t reserved[3] - - struct ArrowDevice: - ArrowDeviceType device_type - int64_t device_id - ArrowErrorCode (*array_init)(ArrowDevice* device, - ArrowDeviceArray* device_array, - ArrowArray* array, void* sync_event, void* stream) - ArrowErrorCode (*array_move)(ArrowDevice* device_src, - ArrowDeviceArray* src, - ArrowDevice* device_dst, - ArrowDeviceArray* dst) - ArrowErrorCode (*buffer_init)(ArrowDevice* device_src, - ArrowBufferView src, - ArrowDevice* device_dst, ArrowBuffer* dst, - void* stream) - ArrowErrorCode (*buffer_move)(ArrowDevice* device_src, ArrowBuffer* src, - ArrowDevice* device_dst, ArrowBuffer* dst) - ArrowErrorCode (*buffer_copy)(ArrowDevice* device_src, - ArrowBufferView src, - ArrowDevice* device_dst, - ArrowBufferView dst, void* stream) - ArrowErrorCode (*synchronize_event)(ArrowDevice* device, void* sync_event, - void* stream, ArrowError* error) - void (*release)(ArrowDevice* device) - void* private_data - - - ArrowDevice* ArrowDeviceCpu() - ArrowDevice* ArrowDeviceResolve(ArrowDeviceType device_type, int64_t device_id) - ArrowErrorCode ArrowDeviceArrayInit(ArrowDevice* device, ArrowDeviceArray* device_array, ArrowArray* array, void* sync_event) diff --git a/python/src/nanoarrow/nanoarrow_macros.pxd b/python/src/nanoarrow/nanoarrow_macros.pxd new file mode 100644 index 000000000..f4b398ae0 --- /dev/null +++ b/python/src/nanoarrow/nanoarrow_macros.pxd @@ -0,0 +1,51 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# cython: language_level = 3 + +# Macro constants are not generated by autopxd2 +# https://github.com/elijahr/python-autopxd2/issues/37 + +from libc.stdint cimport int32_t + +cdef extern from "nanoarrow/nanoarrow.h" nogil: + + cdef const int NANOARROW_OK + cdef const int NANOARROW_MAX_FIXED_BUFFERS + cdef const int ARROW_FLAG_DICTIONARY_ORDERED + cdef const int ARROW_FLAG_NULLABLE + cdef const int ARROW_FLAG_MAP_KEYS_SORTED + + +cdef extern from "nanoarrow/nanoarrow_device.h" nogil: + + ctypedef int32_t ArrowDeviceType + + cdef ArrowDeviceType ARROW_DEVICE_CPU + cdef ArrowDeviceType ARROW_DEVICE_CUDA + cdef ArrowDeviceType ARROW_DEVICE_CUDA_HOST + cdef ArrowDeviceType ARROW_DEVICE_OPENCL + cdef ArrowDeviceType ARROW_DEVICE_VULKAN + cdef ArrowDeviceType ARROW_DEVICE_METAL + cdef ArrowDeviceType ARROW_DEVICE_VPI + cdef ArrowDeviceType ARROW_DEVICE_ROCM + cdef ArrowDeviceType ARROW_DEVICE_ROCM_HOST + cdef ArrowDeviceType ARROW_DEVICE_EXT_DEV + cdef ArrowDeviceType ARROW_DEVICE_CUDA_MANAGED + cdef ArrowDeviceType ARROW_DEVICE_ONEAPI + cdef ArrowDeviceType ARROW_DEVICE_WEBGPU + cdef ArrowDeviceType ARROW_DEVICE_HEXAGON