diff --git a/python/bootstrap.py b/python/bootstrap.py deleted file mode 100644 index 19729e07b..000000000 --- a/python/bootstrap.py +++ /dev/null @@ -1,282 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import argparse -import pathlib -import re -import subprocess -import sys - - -# Generate the nanoarrow_c.pxd file used by the Cython extensions -class PxdGenerator: - def __init__(self): - self._define_regexes() - - def generate_pxd(self, file_in, file_out): - file_in_name = pathlib.Path(file_in).name - - # Read the header - content = None - with open(file_in, "r") as input: - content = input.read() - - # Strip comments - content = self.re_comment.sub("", content) - - # Replace NANOARROW_MAX_FIXED_BUFFERS with its value - content = self._preprocess_content(content) - - # Find typedefs, types, and function definitions - typedefs = self._find_typedefs(content) - types = self._find_types(content) - func_defs = self._find_func_defs(content) - - # Make corresponding cython definitions - typedefs_cython = [self._typdef_to_cython(t, " ") for t in typedefs] - types_cython = [self._type_to_cython(t, " ") for t in types] - func_defs_cython = [self._func_def_to_cython(d, " ") for d in func_defs] - - # Unindent the header - header = self.re_newline_plus_indent.sub("\n", self._pxd_header()) - - # Write nanoarrow_c.pxd - with open(file_out, "wb") as output: - output.write(header.encode("UTF-8")) - - output.write( - f'\ncdef extern from "{file_in_name}" nogil:\n'.encode("UTF-8") - ) - - # A few things we add in manually - self._write_defs(output) - - for type in types_cython: - output.write(type.encode("UTF-8")) - output.write(b"\n\n") - - for typedef in typedefs_cython: - output.write(typedef.encode("UTF-8")) - output.write(b"\n") - - output.write(b"\n") - - for func_def in func_defs_cython: - output.write(func_def.encode("UTF-8")) - output.write(b"\n") - - def _preprocess_content(self, content): - return content - - def _write_defs(self, output): - pass - - def _define_regexes(self): - self.re_comment = re.compile(r"\s*//[^\n]*") - self.re_typedef = re.compile(r"typedef(?P[^;]+)") - self.re_type = re.compile( - r"(?Pstruct|union|enum) (?PArrow[^ ]+) {(?P[^}]*)}" - ) - self.re_func_def = re.compile( - r"\n(static inline )?(?Pconst )?(struct |enum )?" - r"(?P[A-Za-z0-9_*]+) " - r"(?PArrow[A-Za-z0-9]+)\((?P[^\)]*)\);" - ) - self.re_tagged_type = re.compile( - r"(?Pstruct|union|enum) (?PArrow[A-Za-z]+)" - ) - self.re_struct_delim = re.compile(r";\s*") - self.re_enum_delim = re.compile(r",\s*") - self.re_whitespace = re.compile(r"\s+") - self.re_newline_plus_indent = re.compile(r"\n +") - - def _strip_comments(self, content): - return self.re_comment.sub("", content) - - def _find_typedefs(self, content): - return [m.groupdict() for m in self.re_typedef.finditer(content)] - - def _find_types(self, content): - return [m.groupdict() for m in self.re_type.finditer(content)] - - def _find_func_defs(self, content): - return [m.groupdict() for m in self.re_func_def.finditer(content)] - - def _typdef_to_cython(self, t, indent=""): - typedef = t["typedef"] - typedef = self.re_tagged_type.sub(r"\2", typedef) - return f"{indent}ctypedef {typedef}" - - def _type_to_cython(self, t, indent=""): - type = t["type"] - name = t["name"] - body = self.re_tagged_type.sub(r"\2", t["body"].strip()) - if type == "enum": - items = [item for item in self.re_enum_delim.split(body) if item] - else: - items = [item for item in self.re_struct_delim.split(body) if item] - - cython_body = f"\n{indent} ".join([""] + items) - return f"{indent}{type} {name}:{cython_body}" - - def _func_def_to_cython(self, d, indent=""): - return_type = d["return_type"].strip() - if d["const"]: - return_type = "const " + return_type - name = d["name"] - args = re.sub(r"\s+", " ", d["args"].strip()) - args = self.re_tagged_type.sub(r"\2", args) - - # Cython doesn't do (void) - if args == "void": - args = "" - - return f"{indent}{return_type} {name}({args})" - - def _pxd_header(self): - return """ - # Licensed to the Apache Software Foundation (ASF) under one - # or more contributor license agreements. See the NOTICE file - # distributed with this work for additional information - # regarding copyright ownership. The ASF licenses this file - # to you under the Apache License, Version 2.0 (the - # "License"); you may not use this file except in compliance - # with the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, - # software distributed under the License is distributed on an - # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - # KIND, either express or implied. See the License for the - # specific language governing permissions and limitations - # under the License. - - # cython: language_level = 3 - - """ - - -class NanoarrowPxdGenerator(PxdGenerator): - def _preprocess_content(self, content): - content = re.sub(r"NANOARROW_MAX_FIXED_BUFFERS", "3", content) - content = re.sub(r"NANOARROW_BINARY_VIEW_INLINE_SIZE", "12", content) - content = re.sub(r"NANOARROW_BINARY_VIEW_PREFIX_SIZE", "4", content) - return content - - def _pxd_header(self): - return ( - super()._pxd_header() - + """ - from libc.stdint cimport int8_t, uint8_t, int16_t, uint16_t - from libc.stdint cimport int32_t, uint32_t, int64_t, uint64_t - """ - ) - - def _write_defs(self, output): - output.write(b"\n") - output.write(b" cdef int NANOARROW_OK\n") - output.write(b" cdef int NANOARROW_MAX_FIXED_BUFFERS\n") - output.write(b" cdef int ARROW_FLAG_DICTIONARY_ORDERED\n") - output.write(b" cdef int ARROW_FLAG_NULLABLE\n") - output.write(b" cdef int ARROW_FLAG_MAP_KEYS_SORTED\n") - output.write(b"\n") - - -class NanoarrowDevicePxdGenerator(PxdGenerator): - def _preprocess_content(self, content): - self.device_names = re.findall("#define (ARROW_DEVICE_[A-Z0-9_]+)", content) - return super()._preprocess_content(content) - - def _find_typedefs(self, content): - return [] - - def _pxd_header(self): - return ( - super()._pxd_header() - + """ - from libc.stdint cimport int32_t, int64_t - from nanoarrow_c cimport * - """ - ) - - def _write_defs(self, output): - output.write(b"\n") - output.write(b" ctypedef int32_t ArrowDeviceType\n") - output.write(b"\n") - for name in self.device_names: - output.write(f" cdef ArrowDeviceType {name}\n".encode()) - output.write(b"\n") - - -def copy_or_generate_nanoarrow_c(target_dir: pathlib.Path): - vendored_files = [ - "nanoarrow.h", - "nanoarrow.c", - "nanoarrow_ipc.h", - "nanoarrow_ipc.c", - "nanoarrow_device.h", - "nanoarrow_device.c", - ] - dst = {name: target_dir / name for name in vendored_files} - - this_dir = pathlib.Path(__file__).parent.resolve() - arrow_proj_dir = this_dir / "subprojects" / "arrow-nanoarrow" - - subprocess.run( - [ - sys.executable, - arrow_proj_dir / "ci" / "scripts" / "bundle.py", - "--symbol-namespace", - "PythonPkg", - "--header-namespace", - "", - "--source-output-dir", - target_dir, - "--include-output-dir", - target_dir, - "--with-device", - "--with-ipc", - ], - ) - - if not dst["nanoarrow.h"].exists(): - raise ValueError("Attempt to vendor nanoarrow.c/h failed") - - -# Runs the pxd generator with some information about the file name -def generate_nanoarrow_pxds(target_dir: pathlib.Path): - NanoarrowPxdGenerator().generate_pxd( - target_dir / "nanoarrow.h", target_dir / "nanoarrow_c.pxd" - ) - NanoarrowDevicePxdGenerator().generate_pxd( - target_dir / "nanoarrow_device.h", - target_dir / "nanoarrow_device_c.pxd", - ) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "--output-dir", help="Target directory where files should be written" - ) - - args = parser.parse_args() - target_dir = pathlib.Path(args.output_dir).resolve() - - copy_or_generate_nanoarrow_c(target_dir) - generate_nanoarrow_pxds(target_dir) diff --git a/python/generate_dist.py b/python/generate_dist.py index abf4104bd..afaddf9e3 100644 --- a/python/generate_dist.py +++ b/python/generate_dist.py @@ -44,16 +44,6 @@ def main(): target_src_dir = subproj_dir / "src" shutil.copytree(src_dir / "src", target_src_dir) - # CMake isn't actually required for building, but the bundle.py script reads from - # its configuration - shutil.copy(src_dir / "CMakeLists.txt", subproj_dir / "CMakeLists.txt") - - subproj_ci_scripts_dir = subproj_dir / "ci" / "scripts" - subproj_ci_scripts_dir.mkdir(parents=True) - shutil.copy( - src_dir / "ci" / "scripts" / "bundle.py", subproj_ci_scripts_dir / "bundle.py" - ) - if __name__ == "__main__": main() diff --git a/python/meson.build b/python/meson.build index 3d1f9d394..282969f95 100644 --- a/python/meson.build +++ b/python/meson.build @@ -29,7 +29,9 @@ project( # due to https://github.com/mesonbuild/meson/issues/6728 'arrow-nanoarrow:ipc=true', 'arrow-nanoarrow:device=true', - 'arrow-nanoarrow:namespace=PythonPkg', + # Adding this namespace doesn't work with the autopxd generation + # is that a problem? + #'arrow-nanoarrow:namespace=PythonPkg', ], ) diff --git a/python/pyproject.toml b/python/pyproject.toml index acb0398d0..3f1f07312 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -38,6 +38,7 @@ Changelog = "https://github.com/apache/arrow-nanoarrow/blob/main/CHANGELOG.md" [build-system] requires = [ + "autopxd2", "meson>=1.3.0", "meson-python", "Cython" diff --git a/python/src/nanoarrow/_array.pyx b/python/src/nanoarrow/_array.pyx index 869087b57..3d6bf02d0 100644 --- a/python/src/nanoarrow/_array.pyx +++ b/python/src/nanoarrow/_array.pyx @@ -76,11 +76,14 @@ from nanoarrow_c cimport ( NANOARROW_VALIDATION_LEVEL_FULL, NANOARROW_VALIDATION_LEVEL_MINIMAL, NANOARROW_VALIDATION_LEVEL_NONE, +) + +from nanoarrow_macros cimport ( NANOARROW_OK, + ARROW_DEVICE_CPU, ) from nanoarrow_device_c cimport ( - ARROW_DEVICE_CPU, ArrowDeviceType, ArrowDeviceArray, ArrowDeviceArrayInit, diff --git a/python/src/nanoarrow/_buffer.pyx b/python/src/nanoarrow/_buffer.pyx index 14b63b853..f597dbe74 100644 --- a/python/src/nanoarrow/_buffer.pyx +++ b/python/src/nanoarrow/_buffer.pyx @@ -35,7 +35,6 @@ from cpython cimport ( from cpython.ref cimport Py_INCREF, Py_DECREF from nanoarrow_c cimport ( - NANOARROW_OK, ArrowMalloc, ArrowFree, ArrowType, @@ -55,12 +54,14 @@ from nanoarrow_c cimport ( ArrowBufferMove, ) -from nanoarrow_device_c cimport ( +from nanoarrow_macros cimport ( + NANOARROW_OK, ARROW_DEVICE_CPU, ARROW_DEVICE_CUDA, - ArrowDevice, ) +from nanoarrow_device_c cimport ArrowDevice + from nanoarrow_dlpack cimport ( DLDataType, DLDevice, diff --git a/python/src/nanoarrow/_device.pyx b/python/src/nanoarrow/_device.pyx index 2ef383fc1..8f3b0e5a1 100644 --- a/python/src/nanoarrow/_device.pyx +++ b/python/src/nanoarrow/_device.pyx @@ -20,6 +20,13 @@ from libc.stdint cimport uintptr_t, int64_t from nanoarrow_device_c cimport ( + ArrowDevice, + ArrowDeviceCpu, + ArrowDeviceResolve +) + +from nanoarrow_macros cimport ( + NANOARROW_OK, ARROW_DEVICE_CPU, ARROW_DEVICE_CUDA, ARROW_DEVICE_CUDA_HOST, @@ -34,9 +41,6 @@ from nanoarrow_device_c cimport ( ARROW_DEVICE_ONEAPI, ARROW_DEVICE_WEBGPU, ARROW_DEVICE_HEXAGON, - ArrowDevice, - ArrowDeviceCpu, - ArrowDeviceResolve ) from nanoarrow._utils cimport Error diff --git a/python/src/nanoarrow/_ipc_lib.pyx b/python/src/nanoarrow/_ipc_lib.pyx index 163644d9e..fa1655bde 100644 --- a/python/src/nanoarrow/_ipc_lib.pyx +++ b/python/src/nanoarrow/_ipc_lib.pyx @@ -30,9 +30,10 @@ from nanoarrow_c cimport ( ArrowSchema, ArrowErrorCode, ArrowError, - NANOARROW_OK, ) +from nanoarrow_macros cimport NANOARROW_OK + from nanoarrow._schema cimport CSchema from nanoarrow._array cimport CArrayView from nanoarrow._utils cimport Error diff --git a/python/src/nanoarrow/_schema.pyx b/python/src/nanoarrow/_schema.pyx index 2717c529e..d2ff1a106 100644 --- a/python/src/nanoarrow/_schema.pyx +++ b/python/src/nanoarrow/_schema.pyx @@ -22,9 +22,6 @@ from cpython.bytes cimport PyBytes_FromStringAndSize, PyBytes_AsString, PyBytes_ from cpython.pycapsule cimport PyCapsule_GetPointer from nanoarrow_c cimport ( - ARROW_FLAG_DICTIONARY_ORDERED, - ARROW_FLAG_MAP_KEYS_SORTED, - ARROW_FLAG_NULLABLE, ArrowFree, ArrowLayout, ArrowMalloc, @@ -54,13 +51,19 @@ from nanoarrow_c cimport ( ArrowType, ArrowTypeString, NANOARROW_BUFFER_TYPE_NONE, - NANOARROW_MAX_FIXED_BUFFERS, NANOARROW_TIME_UNIT_SECOND, NANOARROW_TIME_UNIT_MILLI, NANOARROW_TIME_UNIT_MICRO, NANOARROW_TIME_UNIT_NANO, ) +from nanoarrow_macros cimport ( + ARROW_FLAG_DICTIONARY_ORDERED, + ARROW_FLAG_MAP_KEYS_SORTED, + ARROW_FLAG_NULLABLE, + NANOARROW_MAX_FIXED_BUFFERS, +) + from nanoarrow cimport _types from nanoarrow._buffer cimport CBuffer from nanoarrow._utils cimport alloc_c_schema, Error diff --git a/python/src/nanoarrow/_utils.pyx b/python/src/nanoarrow/_utils.pyx index b261fb294..0f74a82fd 100644 --- a/python/src/nanoarrow/_utils.pyx +++ b/python/src/nanoarrow/_utils.pyx @@ -54,10 +54,11 @@ from nanoarrow_c cimport ( ArrowNanoarrowVersion, ArrowSchema, ArrowSchemaRelease, - NANOARROW_OK, NANOARROW_TYPE_UNINITIALIZED ) +from nanoarrow_macros cimport NANOARROW_OK + from nanoarrow_device_c cimport ( ArrowDeviceArray ) diff --git a/python/src/nanoarrow/meson.build b/python/src/nanoarrow/meson.build index 452cf37a2..dec19524f 100644 --- a/python/src/nanoarrow/meson.build +++ b/python/src/nanoarrow/meson.build @@ -23,30 +23,41 @@ nanoarrow_device_dep = nanoarrow_proj.get_variable('nanoarrow_device_dep') py = import('python').find_installation(pure: false) -vendored_files = custom_target( - 'generate-pyx', - output: [ - 'nanoarrow.c', - 'nanoarrow_c.pxd', - 'nanoarrow_device.c', - 'nanoarrow_device_c.pxd', - 'nanoarrow_device.h', - 'nanoarrow_device.hpp', - 'nanoarrow.h', - 'nanoarrow.hpp', - 'nanoarrow_ipc.c', - 'nanoarrow_ipc.h', - 'nanoarrow_ipc.hpp', +nanoarrow_c = custom_target( + 'nanoarrow_c', + output: 'nanoarrow_c.pxd', + command: [ + 'autopxd', + '-I', + meson.project_source_root() / 'subprojects/arrow-nanoarrow/src/', + '-I', + meson.project_build_root() / 'subprojects/arrow-nanoarrow/src/', + # ideally we could use @INPUT@ but that will throw a sandboxing violation + meson.project_source_root() / 'subprojects/arrow-nanoarrow/src/nanoarrow/nanoarrow.h', + '@OUTPUT@', ], +) +nanoarrow_c_dep = declare_dependency( + sources: nanoarrow_c, +) + +nanoarrow_device_c = custom_target( + 'nanoarrow_device_c', + output: 'nanoarrow_device_c.pxd', command: [ - py, - meson.current_source_dir() + '/../../bootstrap.py', - '--output-dir', meson.current_build_dir() + 'autopxd', + '-I', + meson.project_source_root() / 'subprojects/arrow-nanoarrow/src/', + '-I', + meson.project_build_root() / 'subprojects/arrow-nanoarrow/src/', + # ideally we could use @INPUT@ but that will throw a sandboxing violation + meson.project_source_root() / 'subprojects/arrow-nanoarrow/src/nanoarrow/nanoarrow_device.h', + '@OUTPUT@', ], ) -nanoarrow_pyx_dep = declare_dependency( - sources: vendored_files[1], +nanoarrow_device_c_dep = declare_dependency( + sources: nanoarrow_device_c, ) cyfiles = [ @@ -72,7 +83,7 @@ endif fs = import('fs') foreach cyf : cyfiles - cyfile_deps = [nanoarrow_pyx_dep, nanoarrow_dep] + cyfile_deps = [nanoarrow_dep, nanoarrow_c_dep, nanoarrow_device_c_dep] stem = fs.stem(cyf) if stem in ['_array', '_device'] @@ -104,6 +115,7 @@ py_sources = [ 'device.py', 'ipc.py', 'iterator.py', + 'nanoarrow_macros.pxd', '_repr_utils.py', 'schema.py', 'visitor.py', diff --git a/python/src/nanoarrow/nanoarrow_macros.pxd b/python/src/nanoarrow/nanoarrow_macros.pxd new file mode 100644 index 000000000..f4b398ae0 --- /dev/null +++ b/python/src/nanoarrow/nanoarrow_macros.pxd @@ -0,0 +1,51 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# cython: language_level = 3 + +# Macro constants are not generated by autopxd2 +# https://github.com/elijahr/python-autopxd2/issues/37 + +from libc.stdint cimport int32_t + +cdef extern from "nanoarrow/nanoarrow.h" nogil: + + cdef const int NANOARROW_OK + cdef const int NANOARROW_MAX_FIXED_BUFFERS + cdef const int ARROW_FLAG_DICTIONARY_ORDERED + cdef const int ARROW_FLAG_NULLABLE + cdef const int ARROW_FLAG_MAP_KEYS_SORTED + + +cdef extern from "nanoarrow/nanoarrow_device.h" nogil: + + ctypedef int32_t ArrowDeviceType + + cdef ArrowDeviceType ARROW_DEVICE_CPU + cdef ArrowDeviceType ARROW_DEVICE_CUDA + cdef ArrowDeviceType ARROW_DEVICE_CUDA_HOST + cdef ArrowDeviceType ARROW_DEVICE_OPENCL + cdef ArrowDeviceType ARROW_DEVICE_VULKAN + cdef ArrowDeviceType ARROW_DEVICE_METAL + cdef ArrowDeviceType ARROW_DEVICE_VPI + cdef ArrowDeviceType ARROW_DEVICE_ROCM + cdef ArrowDeviceType ARROW_DEVICE_ROCM_HOST + cdef ArrowDeviceType ARROW_DEVICE_EXT_DEV + cdef ArrowDeviceType ARROW_DEVICE_CUDA_MANAGED + cdef ArrowDeviceType ARROW_DEVICE_ONEAPI + cdef ArrowDeviceType ARROW_DEVICE_WEBGPU + cdef ArrowDeviceType ARROW_DEVICE_HEXAGON