From fe17e09caeebefab103dac0913d73130676c3420 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Thu, 26 Sep 2019 13:51:16 -0500 Subject: [PATCH] Implemented ListArray and ListOffsetArray's __getitem__. (#11) * Start by refreshing RawArray class. * Reinstate the RawArray tests. * RawArray now has the infrastructure for getitem. * Maybe I shouldn't be working on RawArray; I don't see how this will fit into ListOffsetArray. * Work on ListArray instead. * Cleaned up setid. * Ready to work on ListArray. * More cleaning up; working on making getitem universal. * Cleaned up a lot of duplication in pyawkward.cpp. * Rename Content::get (and others) to ::getitem_at and ::slice to ::getitem_range. * Start on ListArray tests. * Tested ListArray::getitem_at and ListArray::getitem_range. * Ready to work on ListArray::getitem_next. * Implemented basic (not entirely correct) ListArray::getitem for SliceArray. * more correct * Very nearly have recursive ListArray::getitem((array, array)). * it works * Split ListArray::getitem(array) into advanced and non-advanced cases. * [skip ci] calling NumpyArray::getitem_next from getitem_next isn't looking promising * It looks like NumpyArray::getitem_next(3 args) can be a simple call to NumpyArray::getitem_next(6 args) * ListArray::getitem_next slice and array tests work; need to clean up. * Cleaned up. * Solved ListArray::getitem_next(SliceAt). * Cleaned up ListArray::getitem_next(SliceAt). * ListArray::getitem_next(SliceEllipsis), but SliceNewAxis will have to wait for RegularArray. * ListOffsetArray can do everything ListArray can do. * Started converting cases that create ListArrays into creating ListOffsetArrays. * Continuing to convert cases that create ListArrays into creating ListOffsetArrays. * ListArray and ListOffsetArray now share an entry getitem_next. * Fix problems in compilation. * Fix more problems in compilation. * If *not* py27. * Implemented a new setid for ListOffsetArray. * Finished up PR #11. * Fix warnings on Windows and MacOS. --- .gitignore | 3 + CMakeLists.txt | 2 +- README.md | 1 + VERSION_INFO | 2 +- awkward1/__init__.py | 2 +- awkward1/operations/{format.py => convert.py} | 3 +- awkward1/util.py | 11 + include/awkward/Content.h | 10 +- include/awkward/Identity.h | 10 +- include/awkward/Index.h | 6 +- include/awkward/Iterator.h | 2 +- include/awkward/ListArray.h | 50 + include/awkward/ListOffsetArray.h | 12 +- include/awkward/NumpyArray.h | 10 +- include/awkward/RawArray.h | 139 ++- include/awkward/Slice.h | 3 +- include/awkward/cpu-kernels/getitem.h | 30 +- include/awkward/cpu-kernels/identity.h | 13 +- include/awkward/cpu-kernels/util.h | 9 +- src/cpu-kernels/getitem.cpp | 212 +++- src/cpu-kernels/identity.cpp | 52 +- src/libawkward/Content.cpp | 48 +- src/libawkward/Identity.cpp | 164 +-- src/libawkward/Index.cpp | 110 +- src/libawkward/Iterator.cpp | 22 +- src/libawkward/ListArray.cpp | 524 ++++++++ src/libawkward/ListOffsetArray.cpp | 567 ++++++++- src/libawkward/NumpyArray.cpp | 1091 +++++++++-------- src/libawkward/Slice.cpp | 412 ++++--- src/pyawkward.cpp | 343 +++--- tests/test_PR10_rawarray_getitem.cpp | 58 + tests/test_PR11_listarray.py | 149 +++ tests/test_PR8_rawarray_and_slices.cpp | 38 - 33 files changed, 2872 insertions(+), 1236 deletions(-) rename awkward1/operations/{format.py => convert.py} (86%) create mode 100644 awkward1/util.py create mode 100644 include/awkward/ListArray.h create mode 100644 src/libawkward/ListArray.cpp create mode 100644 tests/test_PR10_rawarray_getitem.cpp create mode 100644 tests/test_PR11_listarray.py delete mode 100644 tests/test_PR8_rawarray_and_slices.cpp diff --git a/.gitignore b/.gitignore index 3c10430216..a9a42fbfcb 100644 --- a/.gitignore +++ b/.gitignore @@ -71,6 +71,9 @@ docs/_build/ ############################################################# C and C++ +# ctest +Testing/ + # Prerequisites *.d diff --git a/CMakeLists.txt b/CMakeLists.txt index 533d9c3cb1..721edc2c70 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -48,7 +48,7 @@ add_library(awkward-static STATIC $) add_library(awkward SHARED $) target_link_libraries(awkward-static PRIVATE awkward-cpu-kernels-static) target_link_libraries(awkward PRIVATE awkward-cpu-kernels-static) -addtest(test-PR8-rawarray "tests/test_PR8_rawarray_and_slices.cpp") +addtest(test-PR10 "tests/test_PR10_rawarray_getitem.cpp") pybind11_add_module(layout src/pyawkward.cpp) target_link_libraries(layout PRIVATE awkward-static) diff --git a/README.md b/README.md index 950fce1ea1..a9f8c643e2 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,7 @@ The following features of awkward 0.x will be features of awkward 1.x. * 2019-09-02 (PR [#7](../../pull/7)): refactored `Index`, `Identity`, and `ListOffsetArray` (and any other array types with `Index`, which is nearly all of them) to have a 32-bit and a 64-bit version. My original plan to only support 64-bit in "chunked arrays" with 32-bit everywhere else is hereby scrapped—both bit widths will be supported on all indexes. Non-native endian, non-trivial strides, and multidimensional `Index`/`Identity` are not supported, though all of these features are allowed for `NumpyArray` (which is _content_, not an _index_). The only limitation on `NumpyArray` is that data must be C-ordered, not Fortran-ordered. * 2019-09-21 (PR [#8](../../pull/8)): C++ NumpyArray::getitem is done, setting the pattern for other classes (external C functions). The Numba and Identity extensions are not done, which would be necessary to fully set the pattern. This involved a lot of investigation (see [studies/getitem.py](https://github.com/jpivarski/awkward-1.0/blob/master/studies/getitem.py)). * 2019-09-21 (PR [#9](../../pull/9)): `Identity` is correctly passed through `NumpyArray` slices and `__getitem__` uses `get`, `slice`, or the full `getitem`, depending on argument complexity. + * 2019-09-26 (PR [#11](../../pull/11)): fully implemented `ListArray` and `ListOffsetArray`'s `__getitem__`. ## Roadmap diff --git a/VERSION_INFO b/VERSION_INFO index b1e80bb248..845639eef2 100644 --- a/VERSION_INFO +++ b/VERSION_INFO @@ -1 +1 @@ -0.1.3 +0.1.4 diff --git a/awkward1/__init__.py b/awkward1/__init__.py index 2f77eddc07..1b2e64c890 100644 --- a/awkward1/__init__.py +++ b/awkward1/__init__.py @@ -2,6 +2,6 @@ import awkward1.layout import awkward1._numba -from awkward1.operations.format import * +from awkward1.operations.convert import * __version__ = awkward1.layout.__version__ diff --git a/awkward1/operations/format.py b/awkward1/operations/convert.py similarity index 86% rename from awkward1/operations/format.py rename to awkward1/operations/convert.py index 4623d2f3bb..99750f1411 100644 --- a/awkward1/operations/format.py +++ b/awkward1/operations/convert.py @@ -4,6 +4,7 @@ import numpy +import awkward1.util import awkward1.layout def tolist(array): @@ -16,7 +17,7 @@ def tolist(array): elif isinstance(array, awkward1.layout.NumpyArray): return numpy.asarray(array).tolist() - elif isinstance(array, (awkward1.layout.ListOffsetArray32, awkward1.layout.ListOffsetArray64)): + elif isinstance(array, awkward1.util.anycontent): return [tolist(x) for x in array] else: diff --git a/awkward1/util.py b/awkward1/util.py new file mode 100644 index 0000000000..02c6898ddd --- /dev/null +++ b/awkward1/util.py @@ -0,0 +1,11 @@ +# BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE + +import awkward1.layout + +anycontent = ( + awkward1.layout.NumpyArray, + awkward1.layout.ListArray32, + awkward1.layout.ListArray64, + awkward1.layout.ListOffsetArray32, + awkward1.layout.ListOffsetArray64, + ) diff --git a/include/awkward/Content.h b/include/awkward/Content.h index 746f1bbd29..fd6ae010ef 100644 --- a/include/awkward/Content.h +++ b/include/awkward/Content.h @@ -5,6 +5,7 @@ #include "awkward/cpu-kernels/util.h" #include "awkward/Identity.h" +#include "awkward/Slice.h" namespace awkward { class Content { @@ -15,11 +16,16 @@ namespace awkward { virtual const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const = 0; virtual int64_t length() const = 0; virtual const std::shared_ptr shallow_copy() const = 0; - virtual const std::shared_ptr get(int64_t at) const = 0; - virtual const std::shared_ptr slice(int64_t start, int64_t stop) const = 0; + virtual const std::shared_ptr getitem_at(int64_t at) const = 0; + virtual const std::shared_ptr getitem_range(int64_t start, int64_t stop) const = 0; + virtual const std::shared_ptr getitem(const Slice& where) const; + virtual const std::shared_ptr getitem_next(const std::shared_ptr head, const Slice& tail, const Index64& advanced) const = 0; + virtual const std::shared_ptr carry(const Index64& carry) const = 0; virtual const std::pair minmax_depth() const = 0; const std::string tostring() const; + const std::shared_ptr getitem_ellipsis(const Slice& tail, const Index64& advanced) const; + const std::shared_ptr getitem_newaxis(const Slice& tail, const Index64& advanced) const; }; } diff --git a/include/awkward/Identity.h b/include/awkward/Identity.h index 83170043fb..482ee2d617 100644 --- a/include/awkward/Identity.h +++ b/include/awkward/Identity.h @@ -32,10 +32,11 @@ namespace awkward { const int64_t width() const { return width_; } const int64_t length() const { return length_; } + virtual const std::shared_ptr to64() const = 0; virtual const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const = 0; - virtual const std::shared_ptr slice(int64_t start, int64_t stop) const = 0; + virtual const std::shared_ptr getitem_range(int64_t start, int64_t stop) const = 0; virtual const std::shared_ptr shallow_copy() const = 0; - virtual const std::shared_ptr getitem_carry_64(Index64& carry) const = 0; + virtual const std::shared_ptr getitem_carry_64(const Index64& carry) const = 0; protected: const Ref ref_; @@ -57,10 +58,11 @@ namespace awkward { const std::shared_ptr ptr() const { return ptr_; } + virtual const std::shared_ptr to64() const; virtual const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const; - virtual const std::shared_ptr slice(int64_t start, int64_t stop) const; + virtual const std::shared_ptr getitem_range(int64_t start, int64_t stop) const; virtual const std::shared_ptr shallow_copy() const; - virtual const std::shared_ptr getitem_carry_64(Index64& carry) const; + virtual const std::shared_ptr getitem_carry_64(const Index64& carry) const; const std::string tostring() const; const std::vector get(int64_t at) const; diff --git a/include/awkward/Index.h b/include/awkward/Index.h index 0211c873ae..3293a9c717 100644 --- a/include/awkward/Index.h +++ b/include/awkward/Index.h @@ -18,7 +18,7 @@ namespace awkward { class IndexOf: public Index { public: IndexOf(int64_t length) - : ptr_(std::shared_ptr(new T[(size_t)length], awkward::util::array_deleter())) + : ptr_(std::shared_ptr(length == 0 ? nullptr : new T[(size_t)length], awkward::util::array_deleter())) , offset_(0) , length_(length) { } IndexOf(const std::shared_ptr ptr, int64_t offset, int64_t length) @@ -32,8 +32,8 @@ namespace awkward { const std::string tostring() const; const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const; - T get(int64_t at) const; - IndexOf slice(int64_t start, int64_t stop) const; + T getitem_at(int64_t at) const; + IndexOf getitem_range(int64_t start, int64_t stop) const; virtual const std::shared_ptr shallow_copy() const; private: diff --git a/include/awkward/Iterator.h b/include/awkward/Iterator.h index 1bdb35120f..2082c647dd 100644 --- a/include/awkward/Iterator.h +++ b/include/awkward/Iterator.h @@ -17,7 +17,7 @@ namespace awkward { const int64_t where() const { return where_; } const bool isdone() const { return where_ >= content_.get()->length(); } - const std::shared_ptr next() { return content_.get()->get(where_++); } + const std::shared_ptr next() { return content_.get()->getitem_at(where_++); } const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const; const std::string tostring() const; diff --git a/include/awkward/ListArray.h b/include/awkward/ListArray.h new file mode 100644 index 0000000000..a38f9935b1 --- /dev/null +++ b/include/awkward/ListArray.h @@ -0,0 +1,50 @@ +// BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE + +#ifndef AWKWARD_LISTARRAY_H_ +#define AWKWARD_LISTARRAY_H_ + +#include + +#include "awkward/cpu-kernels/util.h" +#include "awkward/Index.h" +#include "awkward/Identity.h" +#include "awkward/Content.h" + +namespace awkward { + template + class ListArrayOf: public Content { + public: + ListArrayOf(const std::shared_ptr id, const IndexOf starts, const IndexOf stops, const std::shared_ptr content) + : id_(id) + , starts_(starts) + , stops_(stops) + , content_(content) { } + + const IndexOf starts() const { return starts_; } + const IndexOf stops() const { return stops_; } + const std::shared_ptr content() const { return content_.get()->shallow_copy(); } + + virtual const std::shared_ptr id() const { return id_; } + virtual void setid(); + virtual void setid(const std::shared_ptr id); + virtual const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const; + virtual int64_t length() const; + virtual const std::shared_ptr shallow_copy() const; + virtual const std::shared_ptr getitem_at(int64_t at) const; + virtual const std::shared_ptr getitem_range(int64_t start, int64_t stop) const; + virtual const std::shared_ptr getitem_next(const std::shared_ptr head, const Slice& tail, const Index64& advanced) const; + virtual const std::shared_ptr carry(const Index64& carry) const; + virtual const std::pair minmax_depth() const; + + private: + std::shared_ptr id_; + const IndexOf starts_; + const IndexOf stops_; + const std::shared_ptr content_; + }; + + typedef ListArrayOf ListArray32; + typedef ListArrayOf ListArray64; +} + +#endif // AWKWARD_LISTARRAY_H_ diff --git a/include/awkward/ListOffsetArray.h b/include/awkward/ListOffsetArray.h index a2c222e9cf..8aa7f772d4 100644 --- a/include/awkward/ListOffsetArray.h +++ b/include/awkward/ListOffsetArray.h @@ -1,7 +1,7 @@ // BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE -#ifndef AWKWARD_LISTOFFSETARRAYCONTENT_H_ -#define AWKWARD_LISTOFFSETARRAYCONTENT_H_ +#ifndef AWKWARD_LISTOFFSETARRAY_H_ +#define AWKWARD_LISTOFFSETARRAY_H_ #include @@ -28,8 +28,10 @@ namespace awkward { virtual const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const; virtual int64_t length() const; virtual const std::shared_ptr shallow_copy() const; - virtual const std::shared_ptr get(int64_t at) const; - virtual const std::shared_ptr slice(int64_t start, int64_t stop) const; + virtual const std::shared_ptr getitem_at(int64_t at) const; + virtual const std::shared_ptr getitem_range(int64_t start, int64_t stop) const; + virtual const std::shared_ptr getitem_next(const std::shared_ptr head, const Slice& tail, const Index64& advanced) const; + virtual const std::shared_ptr carry(const Index64& carry) const; virtual const std::pair minmax_depth() const; private: @@ -42,4 +44,4 @@ namespace awkward { typedef ListOffsetArrayOf ListOffsetArray64; } -#endif // AWKWARD_LISTOFFSETARRAYCONTENT_H_ +#endif // AWKWARD_LISTOFFSETARRAY_H_ diff --git a/include/awkward/NumpyArray.h b/include/awkward/NumpyArray.h index d95850bacd..e6a5246cec 100644 --- a/include/awkward/NumpyArray.h +++ b/include/awkward/NumpyArray.h @@ -46,17 +46,19 @@ namespace awkward { virtual const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const; virtual int64_t length() const; virtual const std::shared_ptr shallow_copy() const; - virtual const std::shared_ptr get(int64_t at) const; - virtual const std::shared_ptr slice(int64_t start, int64_t stop) const; + virtual const std::shared_ptr getitem_at(int64_t at) const; + virtual const std::shared_ptr getitem_range(int64_t start, int64_t stop) const; + virtual const std::shared_ptr getitem(const Slice& where) const; + virtual const std::shared_ptr getitem_next(const std::shared_ptr head, const Slice& tail, const Index64& advanced) const; + virtual const std::shared_ptr carry(const Index64& carry) const; virtual const std::pair minmax_depth() const; bool iscontiguous() const; void become_contiguous(); const NumpyArray contiguous() const; const NumpyArray contiguous_next(Index64 bytepos) const; - const std::shared_ptr getitem(const Slice& slice) const; const NumpyArray getitem_bystrides(const std::shared_ptr& head, const Slice& tail, int64_t length) const; - const NumpyArray getitem_next(const std::shared_ptr head, const Slice& tail, Index64& carry, Index64& advanced, int64_t length, int64_t stride) const; + const NumpyArray getitem_next(const std::shared_ptr head, const Slice& tail, const Index64& carry, const Index64& advanced, int64_t length, int64_t stride) const; private: std::shared_ptr id_; diff --git a/include/awkward/RawArray.h b/include/awkward/RawArray.h index 69d53070f3..358269a751 100644 --- a/include/awkward/RawArray.h +++ b/include/awkward/RawArray.h @@ -14,6 +14,7 @@ #include "awkward/cpu-kernels/util.h" #include "awkward/cpu-kernels/identity.h" +#include "awkward/cpu-kernels/getitem.h" #include "awkward/util.h" #include "awkward/Slice.h" #include "awkward/Content.h" @@ -22,13 +23,13 @@ namespace awkward { template class RawArrayOf: public Content { public: - RawArrayOf(const std::shared_ptr id, const std::shared_ptr ptr, const int64_t offset, const int64_t length, const int64_t stride) + RawArrayOf(const std::shared_ptr id, const std::shared_ptr ptr, const int64_t offset, const int64_t length, const int64_t itemsize) : id_(id) , ptr_(ptr) , offset_(offset) , length_(length) - , stride_(stride) { - assert(sizeof(T) <= stride); + , itemsize_(itemsize) { + assert(sizeof(T) == itemsize); } RawArrayOf(const std::shared_ptr id, const std::shared_ptr ptr, const int64_t length) @@ -36,37 +37,51 @@ namespace awkward { , ptr_(ptr) , offset_(0) , length_(length) - , stride_(sizeof(T)) { } + , itemsize_(sizeof(T)) { } RawArrayOf(const std::shared_ptr id, const int64_t length) : id_(id) , ptr_(std::shared_ptr(new T[(size_t)length], awkward::util::array_deleter())) , offset_(0) , length_(length) - , stride_(sizeof(T)) { } + , itemsize_(sizeof(T)) { } const std::shared_ptr ptr() const { return ptr_; } const int64_t offset() const { return offset_; } - const int64_t stride() const { return stride_; } + const int64_t itemsize() const { return itemsize_; } bool isempty() const { return length_ == 0; } - bool iscompact() const { return sizeof(T) == stride_; } - ssize_t byteoffset() const { return (ssize_t)stride_*(ssize_t)offset_; } - void* byteptr() const { return reinterpret_cast(reinterpret_cast(ptr_.get()) + byteoffset()); } - ssize_t bytelength() const { return (ssize_t)stride_*(ssize_t)length_; } + ssize_t byteoffset() const { return (ssize_t)itemsize_*(ssize_t)offset_; } + uint8_t* byteptr() const { return reinterpret_cast(reinterpret_cast(ptr_.get()) + byteoffset()); } + ssize_t bytelength() const { return (ssize_t)itemsize_*(ssize_t)length_; } uint8_t getbyte(ssize_t at) const { return *reinterpret_cast(reinterpret_cast(ptr_.get()) + (ssize_t)(byteoffset() + at)); } + T* borrow() const { return borrow(0); } + T* borrow(int64_t at) const { return reinterpret_cast(reinterpret_cast(ptr_.get()) + (ssize_t)itemsize_*(ssize_t)(offset_ + at)); } virtual const std::shared_ptr id() const { return id_; } virtual void setid() { - Identity32* id32 = new Identity32(Identity::newref(), Identity::FieldLoc(), 1, length()); - std::shared_ptr newid(id32); - awkward_identity_new32(length(), id32->ptr().get()); - setid(newid); + if (length() <= kMaxInt32) { + Identity32* rawid = new Identity32(Identity::newref(), Identity::FieldLoc(), 1, length()); + std::shared_ptr newid(rawid); + awkward_new_identity32(rawid->ptr().get(), length()); + setid(newid); + } + else { + Identity64* rawid = new Identity64(Identity::newref(), Identity::FieldLoc(), 1, length()); + std::shared_ptr newid(rawid); + awkward_new_identity64(rawid->ptr().get(), length()); + setid(newid); + } + } + virtual void setid(const std::shared_ptr id) { + if (id.get() != nullptr && length() != id.get()->length()) { + throw std::invalid_argument("content and its id must have the same length"); + } + id_ = id; } - virtual void setid(const std::shared_ptr id) { id_ = id; } virtual const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const { std::stringstream out; - out << indent << pre << " shallow_copy() const { return std::shared_ptr(new RawArrayOf(id_, ptr_, offset_, length_, stride_)); } - virtual const std::shared_ptr get(int64_t at) const { return slice(at, at + 1); } - virtual const std::shared_ptr slice(int64_t start, int64_t stop) const { + virtual const std::shared_ptr shallow_copy() const { return std::shared_ptr(new RawArrayOf(id_, ptr_, offset_, length_, itemsize_)); } + virtual const std::shared_ptr getitem_at(int64_t at) const { return getitem_range(at, at + 1); } + virtual const std::shared_ptr getitem_range(int64_t start, int64_t stop) const { + int64_t regular_start = start; + int64_t regular_stop = stop; + awkward_regularize_rangeslice(regular_start, regular_stop, true, start != Slice::none(), stop != Slice::none(), length_); std::shared_ptr id(nullptr); if (id_.get() != nullptr) { - id = id_.get()->slice(start, stop); + if (regular_stop > id_.get()->length()) { + throw std::invalid_argument("index out of range for identity"); + } + id = id_.get()->getitem_range(regular_start, regular_stop); } - return std::shared_ptr(new RawArrayOf(id, ptr_, offset_ + start, stop - start, stride_)); + return std::shared_ptr(new RawArrayOf(id, ptr_, offset_ + regular_start, regular_stop - regular_start, itemsize_)); } - virtual const std::pair minmax_depth() const { return std::pair(1, 1); } + virtual const std::shared_ptr getitem(const Slice& where) const { + std::shared_ptr nexthead = where.head(); + Slice nexttail = where.tail(); + Index64 nextadvanced(0); + return getitem_next(nexthead, nexttail, nextadvanced); + } + const std::shared_ptr getitem_next(const std::shared_ptr head, const Slice& tail, const Index64& advanced) const { + if (tail.length() != 0) { + throw std::invalid_argument("too many indexes for array"); + } + + if (head.get() == nullptr) { + throw std::runtime_error("null"); + } + + else if (SliceAt* at = dynamic_cast(head.get())) { + return getitem_at(at->at()); + } + + else if (SliceRange* range = dynamic_cast(head.get())) { + if (range->step() == Slice::none() || range->step() == 1) { + return getitem_range(range->start(), range->stop()); + } + else { + int64_t start = range->start(); + int64_t stop = range->stop(); + int64_t step = range->step(); + if (step == Slice::none()) { + step = 1; + } + else if (step == 0) { + throw std::invalid_argument("slice step must not be 0"); + } + awkward_regularize_rangeslice(start, stop, step > 0, start != Slice::none(), stop != Slice::none(), length_); - T* borrow(int64_t at) const { return reinterpret_cast(reinterpret_cast(ptr_.get()) + (ssize_t)stride_*(ssize_t)(offset_ + at)); } + throw std::runtime_error("stop here for now"); + + + + // int64_t regular_start = start; + // int64_t regular_stop = stop; + // awkward_regularize_rangeslice(regular_start, regular_stop, true, start != Slice::none(), stop != Slice::none(), length_); + // std::shared_ptr id(nullptr); + // if (id_.get() != nullptr) { + // if (regular_stop > id_.get()->length()) { + // throw std::invalid_argument("index out of range for identity"); + // } + // id = id_.get()->slice(regular_start, regular_stop); + // } + // return std::shared_ptr(new RawArrayOf(id, ptr_, offset_ + regular_start, regular_stop - regular_start, itemsize_)); + } + } + + else if (SliceEllipsis* ellipsis = dynamic_cast(head.get())) { + throw std::runtime_error("ellipsis"); + } + + else if (SliceNewAxis* newaxis = dynamic_cast(head.get())) { + throw std::runtime_error("newaxis"); + } + + else if (SliceArray64* array = dynamic_cast(head.get())) { + throw std::runtime_error("array"); + } + + else { + throw std::runtime_error("unrecognized slice item type"); + } + } + virtual const std::shared_ptr carry(const Index64& carry) const { + throw std::runtime_error("RawArray::carry"); + } + virtual const std::pair minmax_depth() const { return std::pair(1, 1); } private: std::shared_ptr id_; const std::shared_ptr ptr_; const int64_t offset_; const int64_t length_; - const int64_t stride_; + const int64_t itemsize_; }; } diff --git a/include/awkward/Slice.h b/include/awkward/Slice.h index 85ee09daf2..ea9ea85678 100644 --- a/include/awkward/Slice.h +++ b/include/awkward/Slice.h @@ -14,7 +14,7 @@ namespace awkward { class SliceItem { public: - static int64_t none() { return kMaxInt64 + 1; } + static int64_t none() { return kSliceNone; } virtual const std::string tostring() const = 0; }; @@ -31,7 +31,6 @@ namespace awkward { class SliceRange: public SliceItem { public: SliceRange(int64_t start, int64_t stop, int64_t step): start_(start), stop_(stop), step_(step) { - assert(step_ != none()); assert(step_ != 0); } int64_t start() const { return start_; } diff --git a/include/awkward/cpu-kernels/getitem.h b/include/awkward/cpu-kernels/getitem.h index 78e1a1b299..4ff95b397a 100644 --- a/include/awkward/cpu-kernels/getitem.h +++ b/include/awkward/cpu-kernels/getitem.h @@ -11,6 +11,8 @@ extern "C" { void awkward_slicearray_ravel_64(int64_t* toptr, const int64_t* fromptr, int64_t ndim, const int64_t* shape, const int64_t* strides); + void awkward_carry_arange_64(int64_t* toptr, int64_t length); + Error awkward_identity32_getitem_carry_64(int32_t* newidentityptr, const int32_t* identityptr, const int64_t* carryptr, int64_t lencarry, int64_t offset, int64_t width, int64_t length); Error awkward_identity64_getitem_carry_64(int64_t* newidentityptr, const int64_t* identityptr, const int64_t* carryptr, int64_t lencarry, int64_t offset, int64_t width, int64_t length); @@ -19,10 +21,34 @@ extern "C" { void awkward_numpyarray_contiguous_next_64(int64_t* topos, const int64_t* frompos, int64_t len, int64_t skip, int64_t stride); void awkward_numpyarray_getitem_next_null_64(uint8_t* toptr, const uint8_t* fromptr, int64_t len, int64_t stride, int64_t offset, const int64_t* pos); void awkward_numpyarray_getitem_next_at_64(int64_t* nextcarryptr, const int64_t* carryptr, int64_t lencarry, int64_t skip, int64_t at); - void awkward_numpyarray_getitem_next_slice_64(int64_t* nextcarryptr, const int64_t* carryptr, int64_t lencarry, int64_t lenhead, int64_t skip, int64_t start, int64_t step); - void awkward_numpyarray_getitem_next_slice_advanced_64(int64_t* nextcarryptr, int64_t* nextadvancedptr, const int64_t* carryptr, const int64_t* advancedptr, int64_t lencarry, int64_t lenhead, int64_t skip, int64_t start, int64_t step); + void awkward_numpyarray_getitem_next_range_64(int64_t* nextcarryptr, const int64_t* carryptr, int64_t lencarry, int64_t lenhead, int64_t skip, int64_t start, int64_t step); + void awkward_numpyarray_getitem_next_range_advanced_64(int64_t* nextcarryptr, int64_t* nextadvancedptr, const int64_t* carryptr, const int64_t* advancedptr, int64_t lencarry, int64_t lenhead, int64_t skip, int64_t start, int64_t step); void awkward_numpyarray_getitem_next_array_64(int64_t* nextcarryptr, int64_t* nextadvancedptr, const int64_t* carryptr, const int64_t* flatheadptr, int64_t lencarry, int64_t lenflathead, int64_t skip); void awkward_numpyarray_getitem_next_array_advanced_64(int64_t* nextcarryptr, const int64_t* carryptr, const int64_t* advancedptr, const int64_t* flatheadptr, int64_t lencarry, int64_t skip); + + Error awkward_listarray32_getitem_next_at_64(int64_t* tocarry, const int32_t* fromstarts, const int32_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t at); + Error awkward_listarray64_getitem_next_at_64(int64_t* tocarry, const int64_t* fromstarts, const int64_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t at); + + void awkward_listarray32_getitem_next_range_carrylength(int64_t& carrylength, const int32_t* fromstarts, const int32_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t start, int64_t stop, int64_t step); + void awkward_listarray64_getitem_next_range_carrylength(int64_t& carrylength, const int64_t* fromstarts, const int64_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t start, int64_t stop, int64_t step); + + void awkward_listarray32_getitem_next_range_64(int32_t* tooffsets, int64_t* tocarry, const int32_t* fromstarts, const int32_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t start, int64_t stop, int64_t step); + void awkward_listarray64_getitem_next_range_64(int64_t* tooffsets, int64_t* tocarry, const int64_t* fromstarts, const int64_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t start, int64_t stop, int64_t step); + + void awkward_listarray32_getitem_next_range_counts_64(int64_t& total, const int32_t* fromoffsets, int64_t lenstarts); + void awkward_listarray64_getitem_next_range_counts_64(int64_t& total, const int64_t* fromoffsets, int64_t lenstarts); + + void awkward_listarray32_getitem_next_range_spreadadvanced_64(int64_t* toadvanced, const int64_t* fromadvanced, const int32_t* fromoffsets, int64_t lenstarts); + void awkward_listarray64_getitem_next_range_spreadadvanced_64(int64_t* toadvanced, const int64_t* fromadvanced, const int64_t* fromoffsets, int64_t lenstarts); + + Error awkward_listarray32_getitem_next_array_64(int32_t* tooffsets, int64_t* tocarry, int64_t* toadvanced, const int32_t* fromstarts, const int32_t* fromstops, const int64_t* fromarray, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lenarray, int64_t lencontent); + Error awkward_listarray64_getitem_next_array_64(int64_t* tooffsets, int64_t* tocarry, int64_t* toadvanced, const int64_t* fromstarts, const int64_t* fromstops, const int64_t* fromarray, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lenarray, int64_t lencontent); + + Error awkward_listarray32_getitem_next_array_advanced_64(int64_t* tocarry, int64_t* toadvanced, const int32_t* fromstarts, const int32_t* fromstops, const int64_t* fromarray, const int64_t* fromadvanced, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lenarray, int64_t lencontent); + Error awkward_listarray64_getitem_next_array_advanced_64(int64_t* tocarry, int64_t* toadvanced, const int64_t* fromstarts, const int64_t* fromstops, const int64_t* fromarray, const int64_t* fromadvanced, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lenarray, int64_t lencontent); + + void awkward_listarray32_getitem_carry_64(int32_t* tostarts, int32_t* tostops, const int32_t* fromstarts, const int32_t* fromstops, const int64_t* fromcarry, int64_t startsoffset, int64_t stopsoffset, int64_t lencarry); + void awkward_listarray64_getitem_carry_64(int64_t* tostarts, int64_t* tostops, const int64_t* fromstarts, const int64_t* fromstops, const int64_t* fromcarry, int64_t startsoffset, int64_t stopsoffset, int64_t lencarry); } #endif // AWKWARDCPU_GETITEM_H_ diff --git a/include/awkward/cpu-kernels/identity.h b/include/awkward/cpu-kernels/identity.h index 064b1a71d5..f7f37d1d84 100644 --- a/include/awkward/cpu-kernels/identity.h +++ b/include/awkward/cpu-kernels/identity.h @@ -6,11 +6,14 @@ #include "awkward/cpu-kernels/util.h" extern "C" { - void awkward_identity_new32(int64_t length, int32_t* to); - void awkward_identity_new64(int64_t length, int32_t* to); - void awkward_identity_32to64(int64_t length, int32_t* from, int64_t* to); - void awkward_identity_from_listfoffsets32(int64_t length, int64_t width, int32_t* offsets, int32_t* from, int64_t tolength, int32_t* to); - void awkward_identity_from_listfoffsets64(int64_t length, int64_t width, int64_t* offsets, int64_t* from, int64_t tolength, int64_t* to); + void awkward_new_identity32(int32_t* toptr, int64_t length); + void awkward_new_identity64(int64_t* toptr, int64_t length); + + void awkward_identity32_to_identity64(int64_t* toptr, const int32_t* fromptr, int64_t length); + + Error awkward_identity32_from_listarray32(int32_t* toptr, const int32_t* fromptr, const int32_t* fromstarts, const int32_t* fromstops, int64_t fromptroffset, int64_t startsoffset, int64_t stopsoffset, int64_t tolength, int64_t fromlength, int64_t fromwidth); + Error awkward_identity64_from_listarray32(int64_t* toptr, const int64_t* fromptr, const int32_t* fromstarts, const int32_t* fromstops, int64_t fromptroffset, int64_t startsoffset, int64_t stopsoffset, int64_t tolength, int64_t fromlength, int64_t fromwidth); + Error awkward_identity64_from_listarray64(int64_t* toptr, const int64_t* fromptr, const int64_t* fromstarts, const int64_t* fromstops, int64_t fromptroffset, int64_t startsoffset, int64_t stopsoffset, int64_t tolength, int64_t fromlength, int64_t fromwidth); } #endif // AWKWARDCPU_IDENTITY_H_ diff --git a/include/awkward/cpu-kernels/util.h b/include/awkward/cpu-kernels/util.h index 99130e7255..36f560dc12 100644 --- a/include/awkward/cpu-kernels/util.h +++ b/include/awkward/cpu-kernels/util.h @@ -24,10 +24,11 @@ extern "C" { typedef const char* Error; const Error kNoError = nullptr; - const int8_t kMaxInt8 = 127; // 2**7 - 1 - const uint8_t kMaxUInt8 = 255; // 2**8 - 1 - const int32_t kMaxInt32 = 2147483647; // 2**31 - 1 - const int64_t kMaxInt64 = 9223372036854775806; // 2**63 - 2: kMaxInt64 + 1 is Slice::none() + const int8_t kMaxInt8 = 127; // 2**7 - 1 + const uint8_t kMaxUInt8 = 255; // 2**8 - 1 + const int32_t kMaxInt32 = 2147483647; // 2**31 - 1 + const int64_t kMaxInt64 = 9223372036854775806; // 2**63 - 2: see below + const int64_t kSliceNone = kMaxInt64 + 1; // reserved for Slice::none() } #endif // AWKWARDCPU_UTIL_H_ diff --git a/src/cpu-kernels/getitem.cpp b/src/cpu-kernels/getitem.cpp index 06603cc4f2..59e2363d49 100644 --- a/src/cpu-kernels/getitem.cpp +++ b/src/cpu-kernels/getitem.cpp @@ -65,6 +65,16 @@ void awkward_slicearray_ravel_64(int64_t* toptr, const int64_t* fromptr, int64_t awkward_slicearray_ravel(toptr, fromptr, ndim, shape, strides); } +template +void awkward_carry_arange(T* toptr, int64_t length) { + for (int64_t i = 0; i < length; i++) { + toptr[i] = i; + } +} +void awkward_carry_arange_64(int64_t* toptr, int64_t length) { + awkward_carry_arange(toptr, length); +} + template Error awkward_identity_getitem_carry(ID* newidentityptr, const ID* identityptr, const T* carryptr, int64_t lencarry, int64_t offset, int64_t width, int64_t length) { for (int64_t i = 0; i < lencarry; i++) { @@ -137,19 +147,19 @@ void awkward_numpyarray_getitem_next_at_64(int64_t* nextcarryptr, const int64_t* } template -void awkward_numpyarray_getitem_next_slice(T* nextcarryptr, const T* carryptr, int64_t lencarry, int64_t lenhead, int64_t skip, int64_t start, int64_t step) { +void awkward_numpyarray_getitem_next_range(T* nextcarryptr, const T* carryptr, int64_t lencarry, int64_t lenhead, int64_t skip, int64_t start, int64_t step) { for (int64_t i = 0; i < lencarry; i++) { for (int64_t j = 0; j < lenhead; j++) { nextcarryptr[i*lenhead + j] = skip*carryptr[i] + start + j*step; } } } -void awkward_numpyarray_getitem_next_slice_64(int64_t* nextcarryptr, const int64_t* carryptr, int64_t lencarry, int64_t lenhead, int64_t skip, int64_t start, int64_t step) { - awkward_numpyarray_getitem_next_slice(nextcarryptr, carryptr, lencarry, lenhead, skip, start, step); +void awkward_numpyarray_getitem_next_range_64(int64_t* nextcarryptr, const int64_t* carryptr, int64_t lencarry, int64_t lenhead, int64_t skip, int64_t start, int64_t step) { + awkward_numpyarray_getitem_next_range(nextcarryptr, carryptr, lencarry, lenhead, skip, start, step); } template -void awkward_numpyarray_getitem_next_slice_advanced(T* nextcarryptr, T* nextadvancedptr, const T* carryptr, const T* advancedptr, int64_t lencarry, int64_t lenhead, int64_t skip, int64_t start, int64_t step) { +void awkward_numpyarray_getitem_next_range_advanced(T* nextcarryptr, T* nextadvancedptr, const T* carryptr, const T* advancedptr, int64_t lencarry, int64_t lenhead, int64_t skip, int64_t start, int64_t step) { for (int64_t i = 0; i < lencarry; i++) { for (int64_t j = 0; j < lenhead; j++) { nextcarryptr[i*lenhead + j] = skip*carryptr[i] + start + j*step; @@ -157,8 +167,8 @@ void awkward_numpyarray_getitem_next_slice_advanced(T* nextcarryptr, T* nextadva } } } -void awkward_numpyarray_getitem_next_slice_advanced_64(int64_t* nextcarryptr, int64_t* nextadvancedptr, const int64_t* carryptr, const int64_t* advancedptr, int64_t lencarry, int64_t lenhead, int64_t skip, int64_t start, int64_t step) { - awkward_numpyarray_getitem_next_slice_advanced(nextcarryptr, nextadvancedptr, carryptr, advancedptr, lencarry, lenhead, skip, start, step); +void awkward_numpyarray_getitem_next_range_advanced_64(int64_t* nextcarryptr, int64_t* nextadvancedptr, const int64_t* carryptr, const int64_t* advancedptr, int64_t lencarry, int64_t lenhead, int64_t skip, int64_t start, int64_t step) { + awkward_numpyarray_getitem_next_range_advanced(nextcarryptr, nextadvancedptr, carryptr, advancedptr, lencarry, lenhead, skip, start, step); } template @@ -183,3 +193,193 @@ void awkward_numpyarray_getitem_next_array_advanced(T* nextcarryptr, const T* ca void awkward_numpyarray_getitem_next_array_advanced_64(int64_t* nextcarryptr, const int64_t* carryptr, const int64_t* advancedptr, const int64_t* flatheadptr, int64_t lencarry, int64_t skip) { awkward_numpyarray_getitem_next_array_advanced(nextcarryptr, carryptr, advancedptr, flatheadptr, lencarry, skip); } + +template +Error awkward_listarray_getitem_next_at(T* tocarry, const C* fromstarts, const C* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t at) { + for (int64_t i = 0; i < lenstarts; i++) { + int64_t length = fromstops[stopsoffset + i] - fromstarts[startsoffset + i]; + int64_t regular_at = at; + if (regular_at < 0) { + regular_at += length; + } + if (!(0 <= regular_at && regular_at < length)) { + return "index out of range"; + } + tocarry[i] = fromstarts[startsoffset + i] + regular_at; + } + return kNoError; +} +Error awkward_listarray32_getitem_next_at_64(int64_t* tocarry, const int32_t* fromstarts, const int32_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t at) { + return awkward_listarray_getitem_next_at(tocarry, fromstarts, fromstops, lenstarts, startsoffset, stopsoffset, at); +} +Error awkward_listarray64_getitem_next_at_64(int64_t* tocarry, const int64_t* fromstarts, const int64_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t at) { + return awkward_listarray_getitem_next_at(tocarry, fromstarts, fromstops, lenstarts, startsoffset, stopsoffset, at); +} + +template +void awkward_listarray_getitem_next_range_carrylength(int64_t& carrylength, const C* fromstarts, const C* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t start, int64_t stop, int64_t step) { + carrylength = 0; + for (int64_t i = 0; i < lenstarts; i++) { + int64_t length = fromstops[stopsoffset + i] - fromstarts[startsoffset + i]; + int64_t regular_start = start; + int64_t regular_stop = stop; + awkward_regularize_rangeslice(regular_start, regular_stop, step > 0, start != kSliceNone, stop != kSliceNone, length); + if (step > 0) { + for (int64_t j = regular_start; j < regular_stop; j += step) { + carrylength++; + } + } + else { + for (int64_t j = regular_start; j > regular_stop; j += step) { + carrylength++; + } + } + } +} +void awkward_listarray32_getitem_next_range_carrylength(int64_t& carrylength, const int32_t* fromstarts, const int32_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t start, int64_t stop, int64_t step) { + awkward_listarray_getitem_next_range_carrylength(carrylength, fromstarts, fromstops, lenstarts, startsoffset, stopsoffset, start, stop, step); +} +void awkward_listarray64_getitem_next_range_carrylength(int64_t& carrylength, const int64_t* fromstarts, const int64_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t start, int64_t stop, int64_t step) { + awkward_listarray_getitem_next_range_carrylength(carrylength, fromstarts, fromstops, lenstarts, startsoffset, stopsoffset, start, stop, step); +} + + +template +void awkward_listarray_getitem_next_range(C* tooffsets, T* tocarry, const C* fromstarts, const C* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t start, int64_t stop, int64_t step) { + int64_t k = 0; + tooffsets[0] = 0; + for (int64_t i = 0; i < lenstarts; i++) { + int64_t length = fromstops[stopsoffset + i] - fromstarts[startsoffset + i]; + int64_t regular_start = start; + int64_t regular_stop = stop; + awkward_regularize_rangeslice(regular_start, regular_stop, step > 0, start != kSliceNone, stop != kSliceNone, length); + if (step > 0) { + for (int64_t j = regular_start; j < regular_stop; j += step) { + tocarry[k] = fromstarts[startsoffset + i] + j; + k++; + } + } + else { + for (int64_t j = regular_start; j > regular_stop; j += step) { + tocarry[k] = fromstarts[startsoffset + i] + j; + k++; + } + } + tooffsets[i + 1] = (C)k; + } +} +void awkward_listarray32_getitem_next_range_64(int32_t* tooffsets, int64_t* tocarry, const int32_t* fromstarts, const int32_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t start, int64_t stop, int64_t step) { + awkward_listarray_getitem_next_range(tooffsets, tocarry, fromstarts, fromstops, lenstarts, startsoffset, stopsoffset, start, stop, step); +} +void awkward_listarray64_getitem_next_range_64(int64_t* tooffsets, int64_t* tocarry, const int64_t* fromstarts, const int64_t* fromstops, int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset, int64_t start, int64_t stop, int64_t step) { + awkward_listarray_getitem_next_range(tooffsets, tocarry, fromstarts, fromstops, lenstarts, startsoffset, stopsoffset, start, stop, step); +} + +template +void awkward_listarray_getitem_next_range_counts(int64_t& total, const C* fromoffsets, int64_t lenstarts) { + total = 0; + for (int64_t i = 0; i < lenstarts; i++) { + total += fromoffsets[i + 1] - fromoffsets[i]; + } +} +void awkward_listarray32_getitem_next_range_counts_64(int64_t& total, const int32_t* fromoffsets, int64_t lenstarts) { + awkward_listarray_getitem_next_range_counts(total, fromoffsets, lenstarts); +} +void awkward_listarray64_getitem_next_range_counts_64(int64_t& total, const int64_t* fromoffsets, int64_t lenstarts) { + awkward_listarray_getitem_next_range_counts(total, fromoffsets, lenstarts); +} + +template +void awkward_listarray_getitem_next_range_spreadadvanced(T* toadvanced, const T* fromadvanced, const C* fromoffsets, int64_t lenstarts) { + for (int64_t i = 0; i < lenstarts; i++) { + C count = fromoffsets[i + 1] - fromoffsets[i]; + for (int64_t j = 0; j < count; j++) { + toadvanced[fromoffsets[i] + j] = fromadvanced[i]; + } + } +} +void awkward_listarray32_getitem_next_range_spreadadvanced_64(int64_t* toadvanced, const int64_t* fromadvanced, const int32_t* fromoffsets, int64_t lenstarts) { + awkward_listarray_getitem_next_range_spreadadvanced(toadvanced, fromadvanced, fromoffsets, lenstarts); +} +void awkward_listarray64_getitem_next_range_spreadadvanced_64(int64_t* toadvanced, const int64_t* fromadvanced, const int64_t* fromoffsets, int64_t lenstarts) { + awkward_listarray_getitem_next_range_spreadadvanced(toadvanced, fromadvanced, fromoffsets, lenstarts); +} + +template +Error awkward_listarray_getitem_next_array(C* tooffsets, T* tocarry, T* toadvanced, const C* fromstarts, const C* fromstops, const T* fromarray, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lenarray, int64_t lencontent) { + tooffsets[0] = 0; + for (int64_t i = 0; i < lenstarts; i++) { + if (fromstops[stopsoffset + i] < fromstarts[startsoffset + i]) { + return "stops[i] < starts[i]"; + } + if (fromstarts[startsoffset + i] != fromstops[stopsoffset + i] && fromstops[stopsoffset + i] > lencontent) { + return "stops[i] > len(content)"; + } + int64_t length = fromstops[stopsoffset + i] - fromstarts[startsoffset + i]; + for (int64_t j = 0; j < lenarray; j++) { + int64_t regular_at = fromarray[j]; + if (regular_at < 0) { + regular_at += length; + } + if (!(0 <= regular_at && regular_at < length)) { + return "array[i] is out of range for at least one sublist"; + } + tocarry[i*lenarray + j] = fromstarts[startsoffset + i] + regular_at; + toadvanced[i*lenarray + j] = j; + } + tooffsets[i + 1] = (C)((i + 1)*lenarray); + } + return kNoError; +} +Error awkward_listarray32_getitem_next_array_64(int32_t* tooffsets, int64_t* tocarry, int64_t* toadvanced, const int32_t* fromstarts, const int32_t* fromstops, const int64_t* fromarray, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lenarray, int64_t lencontent) { + return awkward_listarray_getitem_next_array(tooffsets, tocarry, toadvanced, fromstarts, fromstops, fromarray, startsoffset, stopsoffset, lenstarts, lenarray, lencontent); +} +Error awkward_listarray64_getitem_next_array_64(int64_t* tooffsets, int64_t* tocarry, int64_t* toadvanced, const int64_t* fromstarts, const int64_t* fromstops, const int64_t* fromarray, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lenarray, int64_t lencontent) { + return awkward_listarray_getitem_next_array(tooffsets, tocarry, toadvanced, fromstarts, fromstops, fromarray, startsoffset, stopsoffset, lenstarts, lenarray, lencontent); +} + +template +Error awkward_listarray_getitem_next_array_advanced(T* tocarry, T* toadvanced, const C* fromstarts, const C* fromstops, const T* fromarray, const T* fromadvanced, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lenarray, int64_t lencontent) { + for (int64_t i = 0; i < lenstarts; i++) { + if (fromstops[stopsoffset + i] < fromstarts[startsoffset + i]) { + return "stops[i] < starts[i]"; + } + if (fromstarts[startsoffset + i] != fromstops[stopsoffset + i] && fromstops[stopsoffset + i] > lencontent) { + return "stops[i] > len(content)"; + } + int64_t length = fromstops[stopsoffset + i] - fromstarts[startsoffset + i]; + if (fromadvanced[i] >= lenarray) { + return "lengths of advanced indexes must match"; + } + int64_t regular_at = fromarray[fromadvanced[i]]; + if (regular_at < 0) { + regular_at += length; + } + if (!(0 <= regular_at && regular_at < length)) { + return "array[i] is out of range for at least one sublist"; + } + tocarry[i] = fromstarts[startsoffset + i] + regular_at; + toadvanced[i] = i; + } + return kNoError; +} +Error awkward_listarray32_getitem_next_array_advanced_64(int64_t* tocarry, int64_t* toadvanced, const int32_t* fromstarts, const int32_t* fromstops, const int64_t* fromarray, const int64_t* fromadvanced, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lenarray, int64_t lencontent) { + return awkward_listarray_getitem_next_array_advanced(tocarry, toadvanced, fromstarts, fromstops, fromarray, fromadvanced, startsoffset, stopsoffset, lenstarts, lenarray, lencontent); +} +Error awkward_listarray64_getitem_next_array_advanced_64(int64_t* tocarry, int64_t* toadvanced, const int64_t* fromstarts, const int64_t* fromstops, const int64_t* fromarray, const int64_t* fromadvanced, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lenarray, int64_t lencontent) { + return awkward_listarray_getitem_next_array_advanced(tocarry, toadvanced, fromstarts, fromstops, fromarray, fromadvanced, startsoffset, stopsoffset, lenstarts, lenarray, lencontent); +} + +template +void awkward_listarray_getitem_carry(C* tostarts, C* tostops, const C* fromstarts, const C* fromstops, const T* fromcarry, int64_t startsoffset, int64_t stopsoffset, int64_t lencarry) { + for (int64_t i = 0; i < lencarry; i++) { + tostarts[i] = (C)(fromstarts[startsoffset + fromcarry[i]]); + tostops[i] = (C)(fromstops[stopsoffset + fromcarry[i]]); + } +} +void awkward_listarray32_getitem_carry_64(int32_t* tostarts, int32_t* tostops, const int32_t* fromstarts, const int32_t* fromstops, const int64_t* fromcarry, int64_t startsoffset, int64_t stopsoffset, int64_t lencarry) { + awkward_listarray_getitem_carry(tostarts, tostops, fromstarts, fromstops, fromcarry, startsoffset, stopsoffset, lencarry); +} +void awkward_listarray64_getitem_carry_64(int64_t* tostarts, int64_t* tostops, const int64_t* fromstarts, const int64_t* fromstops, const int64_t* fromcarry, int64_t startsoffset, int64_t stopsoffset, int64_t lencarry) { + awkward_listarray_getitem_carry(tostarts, tostops, fromstarts, fromstops, fromcarry, startsoffset, stopsoffset, lencarry); +} diff --git a/src/cpu-kernels/identity.cpp b/src/cpu-kernels/identity.cpp index e76377c39b..d33468eea4 100644 --- a/src/cpu-kernels/identity.cpp +++ b/src/cpu-kernels/identity.cpp @@ -3,40 +3,50 @@ #include "awkward/cpu-kernels/identity.h" template -void awkward_identity_new(int64_t length, T* to) { +void awkward_new_identity(T* toptr, int64_t length) { for (T i = 0; i < length; i++) { - to[i] = i; + toptr[i] = i; } } -void awkward_identity_new32(int64_t length, int32_t* to) { - awkward_identity_new(length, to); +void awkward_new_identity32(int32_t* toptr, int64_t length) { + awkward_new_identity(toptr, length); } -void awkward_identity_new64(int64_t length, int64_t* to) { - awkward_identity_new(length, to); +void awkward_new_identity64(int64_t* toptr, int64_t length) { + awkward_new_identity(toptr, length); } -void awkward_identity_32to64(int64_t length, int32_t* from, int64_t* to) { +void awkward_identity32_to_identity64(int64_t* toptr, const int32_t* fromptr, int64_t length) { for (int64_t i = 0; i < length; i++) { - to[i]= (int64_t)from[i]; + toptr[i]= (int64_t)fromptr[i]; } } -template -void awkward_identity_from_listfoffsets(int64_t length, int64_t width, T* offsets, T* from, int64_t tolength, T* to) { - int64_t k = 0; - for (int64_t i = 0; i < length; i++) { - for (T subi = 0; subi < offsets[i + 1] - offsets[i]; subi++) { - for (int64_t j = 0; j < width; j++) { - to[(width + 1)*k + j] = from[(width)*i + j]; +template +Error awkward_identity_from_listarray(ID* toptr, const ID* fromptr, const T* fromstarts, const T* fromstops, int64_t fromptroffset, int64_t startsoffset, int64_t stopsoffset, int64_t tolength, int64_t fromlength, int64_t fromwidth) { + for (int64_t k = 0; k < tolength*(fromwidth + 1); k++) { + toptr[k] = -1; + } + for (int64_t i = 0; i < fromlength; i++) { + int64_t start = fromstarts[startsoffset + i]; + int64_t stop = fromstops[stopsoffset + i]; + if (start != stop && stop > tolength) { + return "max(stop) > len(content)"; + } + for (int64_t j = start; j < stop; j++) { + for (int64_t k = 0; k < fromwidth; k++) { + toptr[j*(fromwidth + 1) + k] = fromptr[fromptroffset + i*(fromwidth) + k]; } - to[(width + 1)*k + width] = subi; - k++; + toptr[j*(fromwidth + 1) + fromwidth] = ID(j - start); } } + return kNoError; +} +Error awkward_identity32_from_listarray32(int32_t* toptr, const int32_t* fromptr, const int32_t* fromstarts, const int32_t* fromstops, int64_t fromptroffset, int64_t startsoffset, int64_t stopsoffset, int64_t tolength, int64_t fromlength, int64_t fromwidth) { + return awkward_identity_from_listarray(toptr, fromptr, fromstarts, fromstops, fromptroffset, startsoffset, stopsoffset, tolength, fromlength, fromwidth); } -void awkward_identity_from_listfoffsets32(int64_t length, int64_t width, int32_t* offsets, int32_t* from, int64_t tolength, int32_t* to) { - awkward_identity_from_listfoffsets(length, width, offsets, from, tolength, to); +Error awkward_identity64_from_listarray32(int64_t* toptr, const int64_t* fromptr, const int32_t* fromstarts, const int32_t* fromstops, int64_t fromptroffset, int64_t startsoffset, int64_t stopsoffset, int64_t tolength, int64_t fromlength, int64_t fromwidth) { + return awkward_identity_from_listarray(toptr, fromptr, fromstarts, fromstops, fromptroffset, startsoffset, stopsoffset, tolength, fromlength, fromwidth); } -void awkward_identity_from_listfoffsets64(int64_t length, int64_t width, int64_t* offsets, int64_t* from, int64_t tolength, int64_t* to) { - awkward_identity_from_listfoffsets(length, width, offsets, from, tolength, to); +Error awkward_identity64_from_listarray64(int64_t* toptr, const int64_t* fromptr, const int64_t* fromstarts, const int64_t* fromstops, int64_t fromptroffset, int64_t startsoffset, int64_t stopsoffset, int64_t tolength, int64_t fromlength, int64_t fromwidth) { + return awkward_identity_from_listarray(toptr, fromptr, fromstarts, fromstops, fromptroffset, startsoffset, stopsoffset, tolength, fromlength, fromwidth); } diff --git a/src/libawkward/Content.cpp b/src/libawkward/Content.cpp index 3518f23bec..aca705dfca 100644 --- a/src/libawkward/Content.cpp +++ b/src/libawkward/Content.cpp @@ -1,9 +1,51 @@ // BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE +#include "awkward/ListOffsetArray.h" + #include "awkward/Content.h" -using namespace awkward; +namespace awkward { + const std::string Content::tostring() const { + return tostring_part("", "", ""); + } + + const std::shared_ptr Content::getitem(const Slice& where) const { + Index64 nextoffsets(2); + nextoffsets.ptr().get()[0] = 0; + nextoffsets.ptr().get()[1] = length(); + ListOffsetArrayOf next(std::shared_ptr(nullptr), nextoffsets, shallow_copy()); + + std::shared_ptr nexthead = where.head(); + Slice nexttail = where.tail(); + Index64 nextadvanced(0); + std::shared_ptr out = next.getitem_next(nexthead, nexttail, nextadvanced); + return out.get()->getitem_at(0); + } + + const std::shared_ptr Content::getitem_ellipsis(const Slice& tail, const Index64& advanced) const { + std::pair minmax = minmax_depth(); + int64_t mindepth = minmax.first; + int64_t maxdepth = minmax.second; + + if (tail.length() == 0 || (mindepth - 1 == tail.dimlength() && maxdepth - 1 == tail.dimlength())) { + std::shared_ptr nexthead = tail.head(); + Slice nexttail = tail.tail(); + return getitem_next(nexthead, nexttail, advanced); + } + else if (mindepth - 1 == tail.dimlength() || maxdepth - 1 == tail.dimlength()) { + throw std::invalid_argument("ellipsis (...) can't be used on a data structure of different depths"); + } + else { + std::vector> tailitems = tail.items(); + std::vector> items = { std::shared_ptr(new SliceEllipsis()) }; + items.insert(items.end(), tailitems.begin(), tailitems.end()); + std::shared_ptr nexthead(new SliceRange(Slice::none(), Slice::none(), 1)); + Slice nexttail(items, true); + return getitem_next(nexthead, nexttail, advanced); + } + } -const std::string Content::tostring() const { - return tostring_part("", "", ""); + const std::shared_ptr Content::getitem_newaxis(const Slice& tail, const Index64& advanced) const { + throw std::runtime_error("FIXME: insert a RegularArray of 1 here"); + } } diff --git a/src/libawkward/Identity.cpp b/src/libawkward/Identity.cpp index f520714a45..25f46e4d4a 100644 --- a/src/libawkward/Identity.cpp +++ b/src/libawkward/Identity.cpp @@ -6,99 +6,113 @@ #include #include +#include "awkward/cpu-kernels/identity.h" #include "awkward/cpu-kernels/getitem.h" #include "awkward/Identity.h" -using namespace awkward; - -std::atomic numrefs{0}; - -Identity::Ref Identity::newref() { - return numrefs++; -} +namespace awkward { + std::atomic numrefs{0}; -template -const std::string IdentityOf::tostring_part(const std::string indent, const std::string pre, const std::string post) const { - std::stringstream out; - std::string name = "Unrecognized Identity"; - if (std::is_same::value) { - name = "Identity32"; + Identity::Ref Identity::newref() { + return numrefs++; } - else if (std::is_same::value) { - name = "Identity64"; + + template + const std::shared_ptr IdentityOf::to64() const { + if (std::is_same::value) { + return shallow_copy(); + } + else if (std::is_same::value) { + Identity64* raw = new Identity64(ref_, fieldloc_, width_, length_); + std::shared_ptr out(raw); + awkward_identity32_to_identity64(raw->ptr().get(), reinterpret_cast(ptr_.get()), length_); + return out; + } } - out << indent << pre << "<" << name << " ref=\"" << ref() << "\" fieldloc=\"["; - for (size_t i = 0; i < fieldloc().size(); i++) { - if (i != 0) { - out << " "; + + template + const std::string IdentityOf::tostring_part(const std::string indent, const std::string pre, const std::string post) const { + std::stringstream out; + std::string name = "Unrecognized Identity"; + if (std::is_same::value) { + name = "Identity32"; + } + else if (std::is_same::value) { + name = "Identity64"; } - out << "(" << fieldloc()[i].first << ", '" << fieldloc()[i].second << "')"; + out << indent << pre << "<" << name << " ref=\"" << ref() << "\" fieldloc=\"["; + for (size_t i = 0; i < fieldloc().size(); i++) { + if (i != 0) { + out << " "; + } + out << "(" << fieldloc()[i].first << ", '" << fieldloc()[i].second << "')"; + } + out << "]\" width=\"" << width() << "\" length=\"" << length() << "\" at=\"0x"; + out << std::hex << std::setw(12) << std::setfill('0') << reinterpret_cast(ptr_.get()) << "\"/>" << post; + return out.str(); } - out << "]\" width=\"" << width() << "\" length=\"" << length() << "\" at=\"0x"; - out << std::hex << std::setw(12) << std::setfill('0') << reinterpret_cast(ptr_.get()) << "\"/>" << post; - return out.str(); -} -template -const std::string IdentityOf::tostring() const { - return tostring_part("", "", ""); -} + template + const std::string IdentityOf::tostring() const { + return tostring_part("", "", ""); + } -template -const std::shared_ptr IdentityOf::slice(int64_t start, int64_t stop) const { - return std::shared_ptr(new IdentityOf(ref(), fieldloc(), offset() + width()*start*(start != stop), width(), (stop - start), ptr_)); -} + template + const std::shared_ptr IdentityOf::getitem_range(int64_t start, int64_t stop) const { + assert(0 <= start && start < length_ && 0 <= stop && stop < length_); + return std::shared_ptr(new IdentityOf(ref_, fieldloc_, offset_ + width_*start*(start != stop), width_, (stop - start), ptr_)); + } -template -const std::shared_ptr IdentityOf::shallow_copy() const { - return std::shared_ptr(new IdentityOf(ref(), fieldloc(), offset(), width(), length(), ptr_)); -} + template + const std::shared_ptr IdentityOf::shallow_copy() const { + return std::shared_ptr(new IdentityOf(ref_, fieldloc_, offset_, width_, length_, ptr_)); + } -template -const std::shared_ptr IdentityOf::getitem_carry_64(Index64& carry) const { - IdentityOf* rawout = new IdentityOf(ref_, fieldloc_, width_, carry.length()); - std::shared_ptr out(rawout); + template + const std::shared_ptr IdentityOf::getitem_carry_64(const Index64& carry) const { + IdentityOf* rawout = new IdentityOf(ref_, fieldloc_, width_, carry.length()); + std::shared_ptr out(rawout); - Error assign_err = kNoError; - if (std::is_same::value) { - assign_err = awkward_identity32_getitem_carry_64( - reinterpret_cast(rawout->ptr().get()), - reinterpret_cast(ptr_.get()), - carry.ptr().get(), - carry.length(), - offset_, - width_, - length_); - } - else if (std::is_same::value) { - assign_err = awkward_identity64_getitem_carry_64( - reinterpret_cast(rawout->ptr().get()), - reinterpret_cast(ptr_.get()), - carry.ptr().get(), - carry.length(), - offset_, - width_, - length_); - } - else { - throw std::runtime_error("unrecognized identity"); - } - HANDLE_ERROR(assign_err) + Error assign_err = kNoError; + if (std::is_same::value) { + assign_err = awkward_identity32_getitem_carry_64( + reinterpret_cast(rawout->ptr().get()), + reinterpret_cast(ptr_.get()), + carry.ptr().get(), + carry.length(), + offset_, + width_, + length_); + } + else if (std::is_same::value) { + assign_err = awkward_identity64_getitem_carry_64( + reinterpret_cast(rawout->ptr().get()), + reinterpret_cast(ptr_.get()), + carry.ptr().get(), + carry.length(), + offset_, + width_, + length_); + } + else { + throw std::runtime_error("unrecognized Identity specialization"); + } + HANDLE_ERROR(assign_err) - return out; -} + return out; + } -template -const std::vector IdentityOf::get(int64_t at) const { - std::vector out; - for (size_t i = (size_t)(offset() + at); i < (size_t)(offset() + at + width()); i++) { - out.push_back(ptr_.get()[i]); + template + const std::vector IdentityOf::get(int64_t at) const { + assert(0 <= at < length_); + std::vector out; + for (size_t i = (size_t)(offset() + at); i < (size_t)(offset() + at + width()); i++) { + out.push_back(ptr_.get()[i]); + } + return out; } - return out; -} -namespace awkward { template class IdentityOf; template class IdentityOf; } diff --git a/src/libawkward/Index.cpp b/src/libawkward/Index.cpp index 7ea15a6984..ffd3a02687 100644 --- a/src/libawkward/Index.cpp +++ b/src/libawkward/Index.cpp @@ -4,73 +4,75 @@ #include #include -#include "awkward/Index.h" - -using namespace awkward; +#include "awkward/Slice.h" -template -const std::string IndexOf::tostring() const { - return tostring_part("", "", ""); -} +#include "awkward/Index.h" -template -const std::string IndexOf::tostring_part(const std::string indent, const std::string pre, const std::string post) const { - std::stringstream out; - std::string name = "Unrecognized Index"; - if (std::is_same::value) { - name = "Index8"; - } - else if (std::is_same::value) { - name = "Index32"; - } - else if (std::is_same::value) { - name = "Index64"; +namespace awkward { + template + const std::string IndexOf::tostring() const { + return tostring_part("", "", ""); } - out << indent << pre << "<" << name << " i=\"["; - if (length_ <= 10) { - for (int64_t i = 0; i < length_; i++) { - if (i != 0) { - out << " "; - } - out << (int64_t)get(i); + + template + const std::string IndexOf::tostring_part(const std::string indent, const std::string pre, const std::string post) const { + std::stringstream out; + std::string name = "Unrecognized Index"; + if (std::is_same::value) { + name = "Index8"; } - } - else { - for (int64_t i = 0; i < 5; i++) { - if (i != 0) { - out << " "; + else if (std::is_same::value) { + name = "Index32"; + } + else if (std::is_same::value) { + name = "Index64"; + } + out << indent << pre << "<" << name << " i=\"["; + if (length_ <= 10) { + for (int64_t i = 0; i < length_; i++) { + if (i != 0) { + out << " "; + } + out << (int64_t)getitem_at(i); } - out << (int64_t)get(i); } - out << " ... "; - for (int64_t i = length_ - 5; i < length_; i++) { - if (i != length_ - 5) { - out << " "; + else { + for (int64_t i = 0; i < 5; i++) { + if (i != 0) { + out << " "; + } + out << (int64_t)getitem_at(i); + } + out << " ... "; + for (int64_t i = length_ - 5; i < length_; i++) { + if (i != length_ - 5) { + out << " "; + } + out << (int64_t)getitem_at(i); } - out << (int64_t)get(i); } + out << "]\" at=\"0x"; + out << std::hex << std::setw(12) << std::setfill('0') << reinterpret_cast(ptr_.get()) << "\"/>" << post; + return out.str(); } - out << "]\" at=\"0x"; - out << std::hex << std::setw(12) << std::setfill('0') << reinterpret_cast(ptr_.get()) << "\"/>" << post; - return out.str(); -} -template -T IndexOf::get(int64_t at) const { - return ptr_.get()[(size_t)(offset_ + at)]; -} + template + T IndexOf::getitem_at(int64_t at) const { + assert(0 <= at && at < length_); + return ptr_.get()[(size_t)(offset_ + at)]; + } -template -IndexOf IndexOf::slice(int64_t start, int64_t stop) const { - return IndexOf(ptr_, offset_ + start*(start != stop), stop - start); -} + template + IndexOf IndexOf::getitem_range(int64_t start, int64_t stop) const { + assert(0 <= start && start < length_ && 0 <= stop && stop < length_); + return IndexOf(ptr_, offset_ + start*(start != stop), stop - start); + } -template -const std::shared_ptr IndexOf::shallow_copy() const { - return std::shared_ptr(new IndexOf(ptr_, offset_, length_)); -} + template + const std::shared_ptr IndexOf::shallow_copy() const { + return std::shared_ptr(new IndexOf(ptr_, offset_, length_)); + } -namespace awkward { template class IndexOf; template class IndexOf; template class IndexOf; diff --git a/src/libawkward/Iterator.cpp b/src/libawkward/Iterator.cpp index 7133ac433b..72e3f99665 100644 --- a/src/libawkward/Iterator.cpp +++ b/src/libawkward/Iterator.cpp @@ -4,16 +4,16 @@ #include "awkward/Iterator.h" -using namespace awkward; +namespace awkward { + const std::string Iterator::tostring_part(const std::string indent, const std::string pre, const std::string post) const { + std::stringstream out; + out << indent << pre << "\n"; + out << content_.get()->tostring_part(indent + std::string(" "), "", "\n"); + out << indent << "" << post; + return out.str(); + } -const std::string Iterator::tostring_part(const std::string indent, const std::string pre, const std::string post) const { - std::stringstream out; - out << indent << pre << "\n"; - out << content_.get()->tostring_part(indent + std::string(" "), "", "\n"); - out << indent << "" << post; - return out.str(); -} - -const std::string Iterator::tostring() const { - return tostring_part("", "", ""); + const std::string Iterator::tostring() const { + return tostring_part("", "", ""); + } } diff --git a/src/libawkward/ListArray.cpp b/src/libawkward/ListArray.cpp new file mode 100644 index 0000000000..5fbf5960cf --- /dev/null +++ b/src/libawkward/ListArray.cpp @@ -0,0 +1,524 @@ +// BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE + +#include +#include + +#include "awkward/cpu-kernels/identity.h" +#include "awkward/cpu-kernels/getitem.h" +#include "awkward/Slice.h" +#include "awkward/ListOffsetArray.h" + +#include "awkward/ListArray.h" + +namespace awkward { + template <> + void ListArrayOf::setid(const std::shared_ptr id) { + if (id.get() == nullptr) { + content_.get()->setid(id); + } + else { + if (length() != id.get()->length()) { + throw std::invalid_argument("content and its id must have the same length"); + } + std::shared_ptr bigid = id; + if (content_.get()->length() > kMaxInt32) { + bigid = id.get()->to64(); + } + if (Identity32* rawid = dynamic_cast(bigid.get())) { + Identity32* rawsubid = new Identity32(Identity::newref(), rawid->fieldloc(), rawid->width() + 1, content_.get()->length()); + std::shared_ptr subid(rawsubid); + Error err = awkward_identity32_from_listarray32( + rawsubid->ptr().get(), + rawid->ptr().get(), + starts_.ptr().get(), + stops_.ptr().get(), + rawid->offset(), + starts_.offset(), + stops_.offset(), + content_.get()->length(), + length(), + rawid->width()); + HANDLE_ERROR(err) + content_.get()->setid(subid); + } + else if (Identity64* rawid = dynamic_cast(bigid.get())) { + Identity64* rawsubid = new Identity64(Identity::newref(), rawid->fieldloc(), rawid->width() + 1, content_.get()->length()); + std::shared_ptr subid(rawsubid); + Error err = awkward_identity64_from_listarray32( + rawsubid->ptr().get(), + rawid->ptr().get(), + starts_.ptr().get(), + stops_.ptr().get(), + rawid->offset(), + starts_.offset(), + stops_.offset(), + content_.get()->length(), + length(), + rawid->width()); + HANDLE_ERROR(err) + content_.get()->setid(subid); + } + else { + throw std::runtime_error("unrecognized Identity specialization"); + } + } + id_ = id; + } + + template <> + void ListArrayOf::setid(const std::shared_ptr id) { + if (id.get() == nullptr) { + content_.get()->setid(id); + } + else { + if (length() != id.get()->length()) { + throw std::invalid_argument("content and its id must have the same length"); + } + std::shared_ptr bigid = id.get()->to64(); + if (Identity64* rawid = dynamic_cast(bigid.get())) { + Identity64* rawsubid = new Identity64(Identity::newref(), rawid->fieldloc(), rawid->width() + 1, content_.get()->length()); + std::shared_ptr subid(rawsubid); + Error err = awkward_identity64_from_listarray64( + rawsubid->ptr().get(), + rawid->ptr().get(), + starts_.ptr().get(), + stops_.ptr().get(), + rawid->offset(), + starts_.offset(), + stops_.offset(), + content_.get()->length(), + length(), + rawid->width()); + HANDLE_ERROR(err) + content_.get()->setid(subid); + } + else { + throw std::runtime_error("unrecognized Identity specialization"); + } + } + id_ = id; + } + + template + void ListArrayOf::setid() { + if (length() <= kMaxInt32) { + Identity32* rawid = new Identity32(Identity::newref(), Identity::FieldLoc(), 1, length()); + std::shared_ptr newid(rawid); + awkward_new_identity32(rawid->ptr().get(), length()); + setid(newid); + } + else { + Identity64* rawid = new Identity64(Identity::newref(), Identity::FieldLoc(), 1, length()); + std::shared_ptr newid(rawid); + awkward_new_identity64(rawid->ptr().get(), length()); + setid(newid); + } + } + + template + const std::string ListArrayOf::tostring_part(const std::string indent, const std::string pre, const std::string post) const { + std::stringstream out; + std::string name = "Unrecognized ListArray"; + if (std::is_same::value) { + name = "ListArray32"; + } + else if (std::is_same::value) { + name = "ListArray64"; + } + out << indent << pre << "<" << name << ">\n"; + if (id_.get() != nullptr) { + out << id_.get()->tostring_part(indent + std::string(" "), "", "\n"); + } + out << starts_.tostring_part(indent + std::string(" "), "", "\n"); + out << stops_.tostring_part(indent + std::string(" "), "", "\n"); + out << content_.get()->tostring_part(indent + std::string(" "), "", "\n"); + out << indent << "" << post; + return out.str(); + } + + template + int64_t ListArrayOf::length() const { + return starts_.length(); + } + + template + const std::shared_ptr ListArrayOf::shallow_copy() const { + return std::shared_ptr(new ListArrayOf(id_, starts_, stops_, content_)); + } + + template + const std::shared_ptr ListArrayOf::getitem_at(int64_t at) const { + int64_t regular_at = at; + if (regular_at < 0) { + regular_at += starts_.length(); + } + if (regular_at < 0 || regular_at >= starts_.length()) { + throw std::invalid_argument("index out of range"); + } + if (regular_at >= stops_.length()) { + throw std::invalid_argument("len(stops) < len(starts) in ListArray"); + } + return content_.get()->getitem_range(starts_.getitem_at(regular_at), stops_.getitem_at(regular_at)); + } + + template + const std::shared_ptr ListArrayOf::getitem_range(int64_t start, int64_t stop) const { + int64_t regular_start = start; + int64_t regular_stop = stop; + awkward_regularize_rangeslice(regular_start, regular_stop, true, start != Slice::none(), stop != Slice::none(), starts_.length()); + if (regular_stop > stops_.length()) { + throw std::invalid_argument("len(stops) < len(starts) in ListArray"); + } + + std::shared_ptr id(nullptr); + if (id_.get() != nullptr) { + if (regular_stop > id_.get()->length()) { + throw std::invalid_argument("index out of range for identity"); + } + id = id_.get()->getitem_range(regular_start, regular_stop); + } + + return std::shared_ptr(new ListArrayOf(id, starts_.getitem_range(regular_start, regular_stop), stops_.getitem_range(regular_start, regular_stop), content_)); + } + + template <> + const std::shared_ptr ListArrayOf::getitem_next(const std::shared_ptr head, const Slice& tail, const Index64& advanced) const { + int64_t lenstarts = starts_.length(); + if (stops_.length() < lenstarts) { + throw std::invalid_argument("len(stops) < len(starts)"); + } + + if (head.get() == nullptr) { + return shallow_copy(); + } + + else if (SliceAt* at = dynamic_cast(head.get())) { + assert(advanced.length() == 0); + std::shared_ptr nexthead = tail.head(); + Slice nexttail = tail.tail(); + Index64 nextcarry(lenstarts); + Error err = awkward_listarray32_getitem_next_at_64( + nextcarry.ptr().get(), + starts_.ptr().get(), + stops_.ptr().get(), + lenstarts, + starts_.offset(), + stops_.offset(), + at->at()); + std::shared_ptr nextcontent = content_.get()->carry(nextcarry); + return nextcontent.get()->getitem_next(nexthead, nexttail, advanced); + } + + else if (SliceRange* range = dynamic_cast(head.get())) { + std::shared_ptr nexthead = tail.head(); + Slice nexttail = tail.tail(); + int64_t start = range->start(); + int64_t stop = range->stop(); + int64_t step = range->step(); + if (step == Slice::none()) { + step = 1; + } + int64_t carrylength; + awkward_listarray32_getitem_next_range_carrylength( + carrylength, + starts_.ptr().get(), + stops_.ptr().get(), + lenstarts, + starts_.offset(), + stops_.offset(), + start, + stop, + step); + + Index32 nextoffsets(lenstarts + 1); + Index64 nextcarry(carrylength); + + awkward_listarray32_getitem_next_range_64( + nextoffsets.ptr().get(), + nextcarry.ptr().get(), + starts_.ptr().get(), + stops_.ptr().get(), + lenstarts, + starts_.offset(), + stops_.offset(), + start, + stop, + step); + std::shared_ptr nextcontent = content_.get()->carry(nextcarry); + + if (advanced.length() == 0) { + return std::shared_ptr(new ListOffsetArrayOf(id_, nextoffsets, nextcontent.get()->getitem_next(nexthead, nexttail, advanced))); + } + else { + int64_t total; + awkward_listarray32_getitem_next_range_counts_64( + total, + nextoffsets.ptr().get(), + lenstarts); + Index64 nextadvanced(total); + awkward_listarray32_getitem_next_range_spreadadvanced_64( + nextadvanced.ptr().get(), + advanced.ptr().get(), + nextoffsets.ptr().get(), + lenstarts); + return std::shared_ptr(new ListOffsetArrayOf(id_, nextoffsets, nextcontent.get()->getitem_next(nexthead, nexttail, nextadvanced))); + } + } + + else if (SliceEllipsis* ellipsis = dynamic_cast(head.get())) { + return getitem_ellipsis(tail, advanced); + } + + else if (SliceNewAxis* newaxis = dynamic_cast(head.get())) { + return getitem_newaxis(tail, advanced); + } + + else if (SliceArray64* array = dynamic_cast(head.get())) { + std::shared_ptr nexthead = tail.head(); + Slice nexttail = tail.tail(); + Index64 flathead = array->ravel(); + if (advanced.length() == 0) { + Index64 nextcarry(lenstarts*flathead.length()); + Index64 nextadvanced(lenstarts*flathead.length()); + Index32 nextoffsets(lenstarts + 1); + Error err = awkward_listarray32_getitem_next_array_64( + nextoffsets.ptr().get(), + nextcarry.ptr().get(), + nextadvanced.ptr().get(), + starts_.ptr().get(), + stops_.ptr().get(), + flathead.ptr().get(), + starts_.offset(), + stops_.offset(), + lenstarts, + flathead.length(), + content_.get()->length()); + HANDLE_ERROR(err) + std::shared_ptr nextcontent = content_.get()->carry(nextcarry); + // FIXME: if the head is not flat, you'll need to wrap the ListArray output in a RegularArray + return std::shared_ptr(new ListOffsetArrayOf(id_, nextoffsets, nextcontent.get()->getitem_next(nexthead, nexttail, nextadvanced))); + } + else { + Index64 nextcarry(lenstarts); + Index64 nextadvanced(lenstarts); + Error err = awkward_listarray32_getitem_next_array_advanced_64( + nextcarry.ptr().get(), + nextadvanced.ptr().get(), + starts_.ptr().get(), + stops_.ptr().get(), + flathead.ptr().get(), + advanced.ptr().get(), + starts_.offset(), + stops_.offset(), + lenstarts, + flathead.length(), + content_.get()->length()); + HANDLE_ERROR(err) + std::shared_ptr nextcontent = content_.get()->carry(nextcarry); + return nextcontent.get()->getitem_next(nexthead, nexttail, nextadvanced); + } + } + + else { + throw std::runtime_error("unrecognized slice item type"); + } + } + + template <> + const std::shared_ptr ListArrayOf::getitem_next(const std::shared_ptr head, const Slice& tail, const Index64& advanced) const { + int64_t lenstarts = starts_.length(); + if (stops_.length() < lenstarts) { + throw std::invalid_argument("len(stops) < len(starts)"); + } + + if (head.get() == nullptr) { + return shallow_copy(); + } + + else if (SliceAt* at = dynamic_cast(head.get())) { + assert(advanced.length() == 0); + std::shared_ptr nexthead = tail.head(); + Slice nexttail = tail.tail(); + Index64 nextcarry(lenstarts); + Error err = awkward_listarray64_getitem_next_at_64( + nextcarry.ptr().get(), + starts_.ptr().get(), + stops_.ptr().get(), + lenstarts, + starts_.offset(), + stops_.offset(), + at->at()); + std::shared_ptr nextcontent = content_.get()->carry(nextcarry); + return nextcontent.get()->getitem_next(nexthead, nexttail, advanced); + } + + else if (SliceRange* range = dynamic_cast(head.get())) { + std::shared_ptr nexthead = tail.head(); + Slice nexttail = tail.tail(); + int64_t start = range->start(); + int64_t stop = range->stop(); + int64_t step = range->step(); + if (step == Slice::none()) { + step = 1; + } + int64_t carrylength; + awkward_listarray64_getitem_next_range_carrylength( + carrylength, + starts_.ptr().get(), + stops_.ptr().get(), + lenstarts, + starts_.offset(), + stops_.offset(), + start, + stop, + step); + + Index64 nextoffsets(lenstarts + 1); + Index64 nextcarry(carrylength); + + awkward_listarray64_getitem_next_range_64( + nextoffsets.ptr().get(), + nextcarry.ptr().get(), + starts_.ptr().get(), + stops_.ptr().get(), + lenstarts, + starts_.offset(), + stops_.offset(), + start, + stop, + step); + std::shared_ptr nextcontent = content_.get()->carry(nextcarry); + + if (advanced.length() == 0) { + return std::shared_ptr(new ListOffsetArrayOf(id_, nextoffsets, nextcontent.get()->getitem_next(nexthead, nexttail, advanced))); + } + else { + int64_t total; + awkward_listarray64_getitem_next_range_counts_64( + total, + nextoffsets.ptr().get(), + lenstarts); + Index64 nextadvanced(total); + awkward_listarray64_getitem_next_range_spreadadvanced_64( + nextadvanced.ptr().get(), + advanced.ptr().get(), + nextoffsets.ptr().get(), + lenstarts); + return std::shared_ptr(new ListOffsetArrayOf(id_, nextoffsets, nextcontent.get()->getitem_next(nexthead, nexttail, nextadvanced))); + } + } + + else if (SliceEllipsis* ellipsis = dynamic_cast(head.get())) { + return getitem_ellipsis(tail, advanced); + } + + else if (SliceNewAxis* newaxis = dynamic_cast(head.get())) { + return getitem_newaxis(tail, advanced); + } + + else if (SliceArray64* array = dynamic_cast(head.get())) { + std::shared_ptr nexthead = tail.head(); + Slice nexttail = tail.tail(); + Index64 flathead = array->ravel(); + if (advanced.length() == 0) { + Index64 nextcarry(lenstarts*flathead.length()); + Index64 nextadvanced(lenstarts*flathead.length()); + Index64 nextoffsets(lenstarts + 1); + Error err = awkward_listarray64_getitem_next_array_64( + nextoffsets.ptr().get(), + nextcarry.ptr().get(), + nextadvanced.ptr().get(), + starts_.ptr().get(), + stops_.ptr().get(), + flathead.ptr().get(), + starts_.offset(), + stops_.offset(), + lenstarts, + flathead.length(), + content_.get()->length()); + HANDLE_ERROR(err) + std::shared_ptr nextcontent = content_.get()->carry(nextcarry); + // FIXME: if the head is not flat, you'll need to wrap the ListArray output in a RegularArray + return std::shared_ptr(new ListOffsetArrayOf(id_, nextoffsets, nextcontent.get()->getitem_next(nexthead, nexttail, nextadvanced))); + } + else { + Index64 nextcarry(lenstarts); + Index64 nextadvanced(lenstarts); + Error err = awkward_listarray64_getitem_next_array_advanced_64( + nextcarry.ptr().get(), + nextadvanced.ptr().get(), + starts_.ptr().get(), + stops_.ptr().get(), + flathead.ptr().get(), + advanced.ptr().get(), + starts_.offset(), + stops_.offset(), + lenstarts, + flathead.length(), + content_.get()->length()); + HANDLE_ERROR(err) + std::shared_ptr nextcontent = content_.get()->carry(nextcarry); + return nextcontent.get()->getitem_next(nexthead, nexttail, nextadvanced); + } + } + + else { + throw std::runtime_error("unrecognized slice item type"); + } + } + + template <> + const std::shared_ptr ListArrayOf::carry(const Index64& carry) const { + if (stops_.length() < starts_.length()) { + throw std::invalid_argument("len(stops) < len(starts)"); + } + Index32 nextstarts(carry.length()); + Index32 nextstops(carry.length()); + awkward_listarray32_getitem_carry_64( + nextstarts.ptr().get(), + nextstops.ptr().get(), + starts_.ptr().get(), + stops_.ptr().get(), + carry.ptr().get(), + starts_.offset(), + stops_.offset(), + carry.length()); + std::shared_ptr id(nullptr); + if (id_.get() != nullptr) { + id = id_.get()->getitem_carry_64(carry); + } + return std::shared_ptr(new ListArrayOf(id, nextstarts, nextstops, content_)); + } + + template <> + const std::shared_ptr ListArrayOf::carry(const Index64& carry) const { + if (stops_.length() < starts_.length()) { + throw std::invalid_argument("len(stops) < len(starts)"); + } + Index64 nextstarts(carry.length()); + Index64 nextstops(carry.length()); + awkward_listarray64_getitem_carry_64( + nextstarts.ptr().get(), + nextstops.ptr().get(), + starts_.ptr().get(), + stops_.ptr().get(), + carry.ptr().get(), + starts_.offset(), + stops_.offset(), + carry.length()); + std::shared_ptr id(nullptr); + if (id_.get() != nullptr) { + id = id_.get()->getitem_carry_64(carry); + } + return std::shared_ptr(new ListArrayOf(id, nextstarts, nextstops, content_)); + } + + template + const std::pair ListArrayOf::minmax_depth() const { + std::pair content_depth = content_.get()->minmax_depth(); + return std::pair(content_depth.first + 1, content_depth.second + 1); + } + + template class ListArrayOf; + template class ListArrayOf; +} diff --git a/src/libawkward/ListOffsetArray.cpp b/src/libawkward/ListOffsetArray.cpp index e59a59cbe7..2255ed9f42 100644 --- a/src/libawkward/ListOffsetArray.cpp +++ b/src/libawkward/ListOffsetArray.cpp @@ -4,94 +4,527 @@ #include #include "awkward/cpu-kernels/identity.h" -// #include "awkward/util.h" +#include "awkward/ListArray.h" #include "awkward/ListOffsetArray.h" -using namespace awkward; +namespace awkward { + template + IndexOf make_starts(const IndexOf& offsets) { + return IndexOf(offsets.ptr(), offsets.offset(), offsets.length() - 1); + } -template -void ListOffsetArrayOf::setid() { - Identity32* rawid = new Identity32(Identity::newref(), Identity::FieldLoc(), 1, length()); - std::shared_ptr newid(rawid); - awkward_identity_new32(length(), rawid->ptr().get()); - setid(newid); -} + template + IndexOf make_stops(const IndexOf& offsets) { + return IndexOf(offsets.ptr(), offsets.offset() + 1, offsets.length() - 1); + } -template -void ListOffsetArrayOf::setid(const std::shared_ptr id) { - if (id.get() == nullptr) { - content_.get()->setid(id); + template <> + void ListOffsetArrayOf::setid(const std::shared_ptr id) { + if (id.get() == nullptr) { + content_.get()->setid(id); + } + else { + if (length() != id.get()->length()) { + throw std::invalid_argument("content and its id must have the same length"); + } + Index32 starts = make_starts(offsets_); + Index32 stops = make_stops(offsets_); + std::shared_ptr bigid = id; + if (content_.get()->length() > kMaxInt32) { + bigid = id.get()->to64(); + } + if (Identity32* rawid = dynamic_cast(bigid.get())) { + Identity32* rawsubid = new Identity32(Identity::newref(), rawid->fieldloc(), rawid->width() + 1, content_.get()->length()); + std::shared_ptr subid(rawsubid); + Error err = awkward_identity32_from_listarray32( + rawsubid->ptr().get(), + rawid->ptr().get(), + starts.ptr().get(), + stops.ptr().get(), + rawid->offset(), + starts.offset(), + stops.offset(), + content_.get()->length(), + length(), + rawid->width()); + HANDLE_ERROR(err) + content_.get()->setid(subid); + } + else if (Identity64* rawid = dynamic_cast(bigid.get())) { + Identity64* rawsubid = new Identity64(Identity::newref(), rawid->fieldloc(), rawid->width() + 1, content_.get()->length()); + std::shared_ptr subid(rawsubid); + Error err = awkward_identity64_from_listarray32( + rawsubid->ptr().get(), + rawid->ptr().get(), + starts.ptr().get(), + stops.ptr().get(), + rawid->offset(), + starts.offset(), + stops.offset(), + content_.get()->length(), + length(), + rawid->width()); + HANDLE_ERROR(err) + content_.get()->setid(subid); + } + else { + throw std::runtime_error("unrecognized Identity specialization"); + } + } + id_ = id; } - else { - Identity32* rawid32 = dynamic_cast(id.get()); - Identity64* rawid64 = dynamic_cast(id.get()); - if (rawid32 && std::is_same::value) { - Identity32* rawsubid = new Identity32(Identity::newref(), rawid32->fieldloc(), rawid32->width() + 1, content_.get()->length()); - std::shared_ptr newsubid(rawsubid); - awkward_identity_from_listfoffsets32(length(), rawid32->width(), reinterpret_cast(offsets_.ptr().get()), rawid32->ptr().get(), content_.get()->length(), rawsubid->ptr().get()); - content_.get()->setid(newsubid); + + template <> + void ListOffsetArrayOf::setid(const std::shared_ptr id) { + if (id.get() == nullptr) { + content_.get()->setid(id); } else { - throw std::runtime_error("unhandled Identity specialization case"); + if (length() != id.get()->length()) { + throw std::invalid_argument("content and its id must have the same length"); + } + Index64 starts = make_starts(offsets_); + Index64 stops = make_stops(offsets_); + std::shared_ptr bigid = id.get()->to64(); + if (Identity64* rawid = dynamic_cast(bigid.get())) { + Identity64* rawsubid = new Identity64(Identity::newref(), rawid->fieldloc(), rawid->width() + 1, content_.get()->length()); + std::shared_ptr subid(rawsubid); + Error err = awkward_identity64_from_listarray64( + rawsubid->ptr().get(), + rawid->ptr().get(), + starts.ptr().get(), + stops.ptr().get(), + rawid->offset(), + starts.offset(), + stops.offset(), + content_.get()->length(), + length(), + rawid->width()); + HANDLE_ERROR(err) + content_.get()->setid(subid); + } + else { + throw std::runtime_error("unrecognized Identity specialization"); + } } + id_ = id; } - id_ = id; -} -template -const std::string ListOffsetArrayOf::tostring_part(const std::string indent, const std::string pre, const std::string post) const { - std::stringstream out; - std::string name = "Unrecognized ListOffsetArray"; - if (std::is_same::value) { - name = "ListOffsetArray32"; + template + void ListOffsetArrayOf::setid() { + if (length() <= kMaxInt32) { + Identity32* rawid = new Identity32(Identity::newref(), Identity::FieldLoc(), 1, length()); + std::shared_ptr newid(rawid); + awkward_new_identity32(rawid->ptr().get(), length()); + setid(newid); + } + else { + Identity64* rawid = new Identity64(Identity::newref(), Identity::FieldLoc(), 1, length()); + std::shared_ptr newid(rawid); + awkward_new_identity64(rawid->ptr().get(), length()); + setid(newid); + } } - else if (std::is_same::value) { - name = "ListOffsetArray64"; + + template + const std::string ListOffsetArrayOf::tostring_part(const std::string indent, const std::string pre, const std::string post) const { + std::stringstream out; + std::string name = "Unrecognized ListOffsetArray"; + if (std::is_same::value) { + name = "ListOffsetArray32"; + } + else if (std::is_same::value) { + name = "ListOffsetArray64"; + } + out << indent << pre << "<" << name << ">\n"; + if (id_.get() != nullptr) { + out << id_.get()->tostring_part(indent + std::string(" "), "", "\n"); + } + out << offsets_.tostring_part(indent + std::string(" "), "", "\n"); + out << content_.get()->tostring_part(indent + std::string(" "), "", "\n"); + out << indent << "" << post; + return out.str(); } - out << indent << pre << "<" << name << ">\n"; - if (id_.get() != nullptr) { - out << id_.get()->tostring_part(indent + std::string(" "), "", "\n"); + + template + int64_t ListOffsetArrayOf::length() const { + return offsets_.length() - 1; } - out << offsets_.tostring_part(indent + std::string(" "), "", "\n"); - out << content_.get()->tostring_part(indent + std::string(" "), "", "\n"); - out << indent << "" << post; - return out.str(); -} -template -int64_t ListOffsetArrayOf::length() const { - return offsets_.length() - 1; -} + template + const std::shared_ptr ListOffsetArrayOf::shallow_copy() const { + return std::shared_ptr(new ListOffsetArrayOf(id_, offsets_, content_)); + } -template -const std::shared_ptr ListOffsetArrayOf::shallow_copy() const { - return std::shared_ptr(new ListOffsetArrayOf(id_, offsets_, content_)); -} + template + const std::shared_ptr ListOffsetArrayOf::getitem_at(int64_t at) const { + int64_t start = (int64_t)offsets_.getitem_at(at); + int64_t stop = (int64_t)offsets_.getitem_at(at + 1); + return content_.get()->getitem_range(start, stop); + } -template -const std::shared_ptr ListOffsetArrayOf::get(int64_t at) const { - int64_t start = (int64_t)offsets_.get(at); - int64_t stop = (int64_t)offsets_.get(at + 1); - return content_.get()->slice(start, stop); -} + template + const std::shared_ptr ListOffsetArrayOf::getitem_range(int64_t start, int64_t stop) const { + int64_t regular_start = start; + int64_t regular_stop = stop; + awkward_regularize_rangeslice(regular_start, regular_stop, true, start != Slice::none(), stop != Slice::none(), offsets_.length() - 1); + + std::shared_ptr id(nullptr); + if (id_.get() != nullptr) { + if (regular_stop > id_.get()->length()) { + throw std::invalid_argument("index out of range for identity"); + } + id = id_.get()->getitem_range(regular_start, regular_stop); + } -template -const std::shared_ptr ListOffsetArrayOf::slice(int64_t start, int64_t stop) const { - std::shared_ptr id(nullptr); - if (id_.get() != nullptr) { - id = id_.get()->slice(start, stop); + return std::shared_ptr(new ListOffsetArrayOf(id, offsets_.getitem_range(regular_start, regular_stop + 1), content_)); } - return std::shared_ptr(new ListOffsetArrayOf(id, offsets_.slice(start, stop + 1), content_)); -} -template -const std::pair ListOffsetArrayOf::minmax_depth() const { - std::pair content_depth = content_.get()->minmax_depth(); - return std::pair(content_depth.first + 1, content_depth.second + 1); -} + template <> + const std::shared_ptr ListOffsetArrayOf::getitem_next(const std::shared_ptr head, const Slice& tail, const Index64& advanced) const { + int64_t lenstarts = offsets_.length() - 1; + + if (head.get() == nullptr) { + return shallow_copy(); + } + + else if (SliceAt* at = dynamic_cast(head.get())) { + assert(advanced.length() == 0); + Index32 starts = make_starts(offsets_); + Index32 stops = make_stops(offsets_); + std::shared_ptr nexthead = tail.head(); + Slice nexttail = tail.tail(); + Index64 nextcarry(lenstarts); + Error err = awkward_listarray32_getitem_next_at_64( + nextcarry.ptr().get(), + starts.ptr().get(), + stops.ptr().get(), + lenstarts, + starts.offset(), + stops.offset(), + at->at()); + std::shared_ptr nextcontent = content_.get()->carry(nextcarry); + return nextcontent.get()->getitem_next(nexthead, nexttail, advanced); + } + + else if (SliceRange* range = dynamic_cast(head.get())) { + Index32 starts = make_starts(offsets_); + Index32 stops = make_stops(offsets_); + std::shared_ptr nexthead = tail.head(); + Slice nexttail = tail.tail(); + int64_t start = range->start(); + int64_t stop = range->stop(); + int64_t step = range->step(); + if (step == Slice::none()) { + step = 1; + } + int64_t carrylength; + awkward_listarray32_getitem_next_range_carrylength( + carrylength, + starts.ptr().get(), + stops.ptr().get(), + lenstarts, + starts.offset(), + stops.offset(), + start, + stop, + step); + + Index32 nextoffsets(lenstarts + 1); + Index64 nextcarry(carrylength); + + awkward_listarray32_getitem_next_range_64( + nextoffsets.ptr().get(), + nextcarry.ptr().get(), + starts.ptr().get(), + stops.ptr().get(), + lenstarts, + starts.offset(), + stops.offset(), + start, + stop, + step); + std::shared_ptr nextcontent = content_.get()->carry(nextcarry); + + if (advanced.length() == 0) { + return std::shared_ptr(new ListOffsetArrayOf(id_, nextoffsets, nextcontent.get()->getitem_next(nexthead, nexttail, advanced))); + } + else { + int64_t total; + awkward_listarray32_getitem_next_range_counts_64( + total, + nextoffsets.ptr().get(), + lenstarts); + Index64 nextadvanced(total); + awkward_listarray32_getitem_next_range_spreadadvanced_64( + nextadvanced.ptr().get(), + advanced.ptr().get(), + nextoffsets.ptr().get(), + lenstarts); + return std::shared_ptr(new ListOffsetArrayOf(id_, nextoffsets, nextcontent.get()->getitem_next(nexthead, nexttail, nextadvanced))); + } + } + + else if (SliceEllipsis* ellipsis = dynamic_cast(head.get())) { + return getitem_ellipsis(tail, advanced); + } + + else if (SliceNewAxis* newaxis = dynamic_cast(head.get())) { + return getitem_newaxis(tail, advanced); + } + + else if (SliceArray64* array = dynamic_cast(head.get())) { + Index32 starts = make_starts(offsets_); + Index32 stops = make_stops(offsets_); + std::shared_ptr nexthead = tail.head(); + Slice nexttail = tail.tail(); + Index64 flathead = array->ravel(); + if (advanced.length() == 0) { + Index64 nextcarry(lenstarts*flathead.length()); + Index64 nextadvanced(lenstarts*flathead.length()); + Index32 nextoffsets(lenstarts + 1); + Index32 nextstops(lenstarts); + Error err = awkward_listarray32_getitem_next_array_64( + nextoffsets.ptr().get(), + nextcarry.ptr().get(), + nextadvanced.ptr().get(), + starts.ptr().get(), + stops.ptr().get(), + flathead.ptr().get(), + starts.offset(), + stops.offset(), + lenstarts, + flathead.length(), + content_.get()->length()); + HANDLE_ERROR(err) + std::shared_ptr nextcontent = content_.get()->carry(nextcarry); + // FIXME: if the head is not flat, you'll need to wrap the ListArray output in a RegularArray + return std::shared_ptr(new ListOffsetArrayOf(id_, nextoffsets, nextcontent.get()->getitem_next(nexthead, nexttail, nextadvanced))); + } + else { + Index64 nextcarry(lenstarts); + Index64 nextadvanced(lenstarts); + Error err = awkward_listarray32_getitem_next_array_advanced_64( + nextcarry.ptr().get(), + nextadvanced.ptr().get(), + starts.ptr().get(), + stops.ptr().get(), + flathead.ptr().get(), + advanced.ptr().get(), + starts.offset(), + stops.offset(), + lenstarts, + flathead.length(), + content_.get()->length()); + HANDLE_ERROR(err) + std::shared_ptr nextcontent = content_.get()->carry(nextcarry); + return nextcontent.get()->getitem_next(nexthead, nexttail, nextadvanced); + } + } + + else { + throw std::runtime_error("unrecognized slice item type"); + } + } + + template <> + const std::shared_ptr ListOffsetArrayOf::getitem_next(const std::shared_ptr head, const Slice& tail, const Index64& advanced) const { + int64_t lenstarts = offsets_.length() - 1; + + if (head.get() == nullptr) { + return shallow_copy(); + } + + else if (SliceAt* at = dynamic_cast(head.get())) { + assert(advanced.length() == 0); + Index64 starts = make_starts(offsets_); + Index64 stops = make_stops(offsets_); + std::shared_ptr nexthead = tail.head(); + Slice nexttail = tail.tail(); + Index64 nextcarry(lenstarts); + Error err = awkward_listarray64_getitem_next_at_64( + nextcarry.ptr().get(), + starts.ptr().get(), + stops.ptr().get(), + lenstarts, + starts.offset(), + stops.offset(), + at->at()); + std::shared_ptr nextcontent = content_.get()->carry(nextcarry); + return nextcontent.get()->getitem_next(nexthead, nexttail, advanced); + } + + else if (SliceRange* range = dynamic_cast(head.get())) { + Index64 starts = make_starts(offsets_); + Index64 stops = make_stops(offsets_); + std::shared_ptr nexthead = tail.head(); + Slice nexttail = tail.tail(); + int64_t start = range->start(); + int64_t stop = range->stop(); + int64_t step = range->step(); + if (step == Slice::none()) { + step = 1; + } + int64_t carrylength; + awkward_listarray64_getitem_next_range_carrylength( + carrylength, + starts.ptr().get(), + stops.ptr().get(), + lenstarts, + starts.offset(), + stops.offset(), + start, + stop, + step); + + Index64 nextoffsets(lenstarts + 1); + Index64 nextcarry(carrylength); + + awkward_listarray64_getitem_next_range_64( + nextoffsets.ptr().get(), + nextcarry.ptr().get(), + starts.ptr().get(), + stops.ptr().get(), + lenstarts, + starts.offset(), + stops.offset(), + start, + stop, + step); + std::shared_ptr nextcontent = content_.get()->carry(nextcarry); + + if (advanced.length() == 0) { + return std::shared_ptr(new ListOffsetArrayOf(id_, nextoffsets, nextcontent.get()->getitem_next(nexthead, nexttail, advanced))); + } + else { + int64_t total; + awkward_listarray64_getitem_next_range_counts_64( + total, + nextoffsets.ptr().get(), + lenstarts); + Index64 nextadvanced(total); + awkward_listarray64_getitem_next_range_spreadadvanced_64( + nextadvanced.ptr().get(), + advanced.ptr().get(), + nextoffsets.ptr().get(), + lenstarts); + return std::shared_ptr(new ListOffsetArrayOf(id_, nextoffsets, nextcontent.get()->getitem_next(nexthead, nexttail, nextadvanced))); + } + } + + else if (SliceEllipsis* ellipsis = dynamic_cast(head.get())) { + return getitem_ellipsis(tail, advanced); + } + + else if (SliceNewAxis* newaxis = dynamic_cast(head.get())) { + return getitem_newaxis(tail, advanced); + } + + else if (SliceArray64* array = dynamic_cast(head.get())) { + Index64 starts = make_starts(offsets_); + Index64 stops = make_stops(offsets_); + std::shared_ptr nexthead = tail.head(); + Slice nexttail = tail.tail(); + Index64 flathead = array->ravel(); + if (advanced.length() == 0) { + Index64 nextcarry(lenstarts*flathead.length()); + Index64 nextadvanced(lenstarts*flathead.length()); + Index64 nextoffsets(lenstarts + 1); + Index64 nextstops(lenstarts); + Error err = awkward_listarray64_getitem_next_array_64( + nextoffsets.ptr().get(), + nextcarry.ptr().get(), + nextadvanced.ptr().get(), + starts.ptr().get(), + stops.ptr().get(), + flathead.ptr().get(), + starts.offset(), + stops.offset(), + lenstarts, + flathead.length(), + content_.get()->length()); + HANDLE_ERROR(err) + std::shared_ptr nextcontent = content_.get()->carry(nextcarry); + // FIXME: if the head is not flat, you'll need to wrap the ListArray output in a RegularArray + return std::shared_ptr(new ListOffsetArrayOf(id_, nextoffsets, nextcontent.get()->getitem_next(nexthead, nexttail, nextadvanced))); + } + else { + Index64 nextcarry(lenstarts); + Index64 nextadvanced(lenstarts); + Error err = awkward_listarray64_getitem_next_array_advanced_64( + nextcarry.ptr().get(), + nextadvanced.ptr().get(), + starts.ptr().get(), + stops.ptr().get(), + flathead.ptr().get(), + advanced.ptr().get(), + starts.offset(), + stops.offset(), + lenstarts, + flathead.length(), + content_.get()->length()); + HANDLE_ERROR(err) + std::shared_ptr nextcontent = content_.get()->carry(nextcarry); + return nextcontent.get()->getitem_next(nexthead, nexttail, nextadvanced); + } + } + + else { + throw std::runtime_error("unrecognized slice item type"); + } + } + + template <> + const std::shared_ptr ListOffsetArrayOf::carry(const Index64& carry) const { + Index32 starts = make_starts(offsets_); + Index32 stops = make_stops(offsets_); + Index32 nextstarts(carry.length()); + Index32 nextstops(carry.length()); + awkward_listarray32_getitem_carry_64( + nextstarts.ptr().get(), + nextstops.ptr().get(), + starts.ptr().get(), + stops.ptr().get(), + carry.ptr().get(), + starts.offset(), + stops.offset(), + carry.length()); + std::shared_ptr id(nullptr); + if (id_.get() != nullptr) { + id = id_.get()->getitem_carry_64(carry); + } + return std::shared_ptr(new ListArrayOf(id, nextstarts, nextstops, content_)); + } + + template <> + const std::shared_ptr ListOffsetArrayOf::carry(const Index64& carry) const { + Index64 starts = make_starts(offsets_); + Index64 stops = make_stops(offsets_); + Index64 nextstarts(carry.length()); + Index64 nextstops(carry.length()); + awkward_listarray64_getitem_carry_64( + nextstarts.ptr().get(), + nextstops.ptr().get(), + starts.ptr().get(), + stops.ptr().get(), + carry.ptr().get(), + starts.offset(), + stops.offset(), + carry.length()); + std::shared_ptr id(nullptr); + if (id_.get() != nullptr) { + id = id_.get()->getitem_carry_64(carry); + } + return std::shared_ptr(new ListArrayOf(id, nextstarts, nextstops, content_)); + } + + template + const std::pair ListOffsetArrayOf::minmax_depth() const { + std::pair content_depth = content_.get()->minmax_depth(); + return std::pair(content_depth.first + 1, content_depth.second + 1); + } -namespace awkward { template class ListOffsetArrayOf; template class ListOffsetArrayOf; } diff --git a/src/libawkward/NumpyArray.cpp b/src/libawkward/NumpyArray.cpp index 502804c288..6be6c289fd 100644 --- a/src/libawkward/NumpyArray.cpp +++ b/src/libawkward/NumpyArray.cpp @@ -9,661 +9,704 @@ #include "awkward/NumpyArray.h" -using namespace awkward; - -ssize_t NumpyArray::ndim() const { - return shape_.size(); -} - -bool NumpyArray::isscalar() const { - return ndim() == 0; -} - -bool NumpyArray::isempty() const { - for (auto x : shape_) { - if (x == 0) return true; +namespace awkward { + ssize_t NumpyArray::ndim() const { + return shape_.size(); } - return false; // false for isscalar(), too -} -void* NumpyArray::byteptr() const { - return reinterpret_cast(reinterpret_cast(ptr_.get()) + byteoffset_); -} + bool NumpyArray::isscalar() const { + return ndim() == 0; + } -ssize_t NumpyArray::bytelength() const { - if (isscalar()) { - return itemsize_; + bool NumpyArray::isempty() const { + for (auto x : shape_) { + if (x == 0) return true; + } + return false; // false for isscalar(), too } - else { - return shape_[0]*strides_[0]; + + void* NumpyArray::byteptr() const { + return reinterpret_cast(reinterpret_cast(ptr_.get()) + byteoffset_); } -} -uint8_t NumpyArray::getbyte(ssize_t at) const { - return *reinterpret_cast(reinterpret_cast(ptr_.get()) + byteoffset_ + at); -} + ssize_t NumpyArray::bytelength() const { + if (isscalar()) { + return itemsize_; + } + else { + return shape_[0]*strides_[0]; + } + } -void NumpyArray::setid(const std::shared_ptr id) { - id_ = id; -} + uint8_t NumpyArray::getbyte(ssize_t at) const { + return *reinterpret_cast(reinterpret_cast(ptr_.get()) + byteoffset_ + at); + } -void NumpyArray::setid() { - assert(!isscalar()); - Identity32* id32 = new Identity32(Identity::newref(), Identity::FieldLoc(), 1, length()); - std::shared_ptr newid(id32); - awkward_identity_new32(length(), id32->ptr().get()); - setid(newid); -} + void NumpyArray::setid(const std::shared_ptr id) { + if (id.get() != nullptr && length() != id.get()->length()) { + throw std::invalid_argument("content and its id must have the same length"); + } + id_ = id; + } -template -void tostring_as(std::stringstream& out, T* ptr, int64_t length) { - if (length <= 10) { - for (int64_t i = 0; i < length; i++) { - if (i != 0) { - out << " "; - } - out << ptr[i]; + void NumpyArray::setid() { + assert(!isscalar()); + if (length() <= kMaxInt32) { + Identity32* rawid = new Identity32(Identity::newref(), Identity::FieldLoc(), 1, length()); + std::shared_ptr newid(rawid); + awkward_new_identity32(rawid->ptr().get(), length()); + setid(newid); + } + else { + Identity64* rawid = new Identity64(Identity::newref(), Identity::FieldLoc(), 1, length()); + std::shared_ptr newid(rawid); + awkward_new_identity64(rawid->ptr().get(), length()); + setid(newid); } } - else { - for (int64_t i = 0; i < 5; i++) { - if (i != 0) { - out << " "; + + template + void tostring_as(std::stringstream& out, T* ptr, int64_t length) { + if (length <= 10) { + for (int64_t i = 0; i < length; i++) { + if (i != 0) { + out << " "; + } + out << ptr[i]; } - out << ptr[i]; } - out << " ... "; - for (int64_t i = length - 5; i < length; i++) { - if (i != length - 5) { - out << " "; + else { + for (int64_t i = 0; i < 5; i++) { + if (i != 0) { + out << " "; + } + out << ptr[i]; + } + out << " ... "; + for (int64_t i = length - 5; i < length; i++) { + if (i != length - 5) { + out << " "; + } + out << ptr[i]; } - out << ptr[i]; } } -} -const std::string NumpyArray::tostring_part(const std::string indent, const std::string pre, const std::string post) const { - assert(!isscalar()); - std::stringstream out; - out << indent << pre << "(out, reinterpret_cast(byteptr()), length()); - } -#ifdef _MSC_VER - else if (ndim() == 1 && format_.compare("q") == 0) { -#else - else if (ndim() == 1 && format_.compare("l") == 0) { -#endif - tostring_as(out, reinterpret_cast(byteptr()), length()); - } - else if (ndim() == 1 && format_.compare("f") == 0) { - tostring_as(out, reinterpret_cast(byteptr()), length()); - } - else if (ndim() == 1 && format_.compare("d") == 0) { - tostring_as(out, reinterpret_cast(byteptr()), length()); - } - else { - ssize_t len = bytelength(); - if (len <= 32) { - for (ssize_t i = 0; i < len; i++) { - if (i != 0 && i % 4 == 0) { - out << " "; + if (!iscontiguous()) { + out << "strides=\""; + for (ssize_t i = 0; i < ndim(); i++) { + if (i != 0) { + out << ", "; } - out << std::hex << std::setw(2) << std::setfill('0') << int(getbyte(i)); + out << strides_[i]; } + out << "\" "; + } + out << "data=\""; + #ifdef _MSC_VER + if (ndim() == 1 && format_.compare("l") == 0) { + #else + if (ndim() == 1 && format_.compare("i") == 0) { + #endif + tostring_as(out, reinterpret_cast(byteptr()), length()); + } + #ifdef _MSC_VER + else if (ndim() == 1 && format_.compare("q") == 0) { + #else + else if (ndim() == 1 && format_.compare("l") == 0) { + #endif + tostring_as(out, reinterpret_cast(byteptr()), length()); + } + else if (ndim() == 1 && format_.compare("f") == 0) { + tostring_as(out, reinterpret_cast(byteptr()), length()); + } + else if (ndim() == 1 && format_.compare("d") == 0) { + tostring_as(out, reinterpret_cast(byteptr()), length()); } else { - for (ssize_t i = 0; i < 16; i++) { - if (i != 0 && i % 4 == 0) { - out << " "; + ssize_t len = bytelength(); + if (len <= 32) { + for (ssize_t i = 0; i < len; i++) { + if (i != 0 && i % 4 == 0) { + out << " "; + } + out << std::hex << std::setw(2) << std::setfill('0') << int(getbyte(i)); } - out << std::hex << std::setw(2) << std::setfill('0') << int(getbyte(i)); } - out << " ... "; - for (ssize_t i = len - 16; i < len; i++) { - if (i != len - 16 && i % 4 == 0) { - out << " "; + else { + for (ssize_t i = 0; i < 16; i++) { + if (i != 0 && i % 4 == 0) { + out << " "; + } + out << std::hex << std::setw(2) << std::setfill('0') << int(getbyte(i)); + } + out << " ... "; + for (ssize_t i = len - 16; i < len; i++) { + if (i != len - 16 && i % 4 == 0) { + out << " "; + } + out << std::hex << std::setw(2) << std::setfill('0') << int(getbyte(i)); } - out << std::hex << std::setw(2) << std::setfill('0') << int(getbyte(i)); } } + out << "\" at=\"0x"; + out << std::hex << std::setw(12) << std::setfill('0') << reinterpret_cast(ptr_.get()); + if (id_.get() == nullptr) { + out << "\"/>" << post; + } + else { + out << "\">\n"; + out << id_.get()->tostring_part(indent + std::string(" "), "", "\n"); + out << indent << "" << post; + } + return out.str(); } - out << "\" at=\"0x"; - out << std::hex << std::setw(12) << std::setfill('0') << reinterpret_cast(ptr_.get()); - if (id_.get() == nullptr) { - out << "\"/>" << post; - } - else { - out << "\">\n"; - out << id_.get()->tostring_part(indent + std::string(" "), "", "\n"); - out << indent << "" << post; - } - return out.str(); -} -int64_t NumpyArray::length() const { - if (isscalar()) { - return -1; - } - else { - return (int64_t)shape_[0]; + int64_t NumpyArray::length() const { + if (isscalar()) { + return -1; + } + else { + return (int64_t)shape_[0]; + } } -} - -const std::shared_ptr NumpyArray::shallow_copy() const { - return std::shared_ptr(new NumpyArray(id_, ptr_, shape_, strides_, byteoffset_, itemsize_, format_)); -} -const std::shared_ptr NumpyArray::get(int64_t at) const { - assert(!isscalar()); - // return getitem(Slice(std::vector>({ std::shared_ptr(new SliceAt(at)) }), true)); - int64_t regular_at = at; - if (regular_at < 0) { - regular_at += shape_[0]; - } - if (regular_at < 0 || regular_at >= shape_[0]) { - throw std::invalid_argument("index out of range"); + const std::shared_ptr NumpyArray::shallow_copy() const { + return std::shared_ptr(new NumpyArray(id_, ptr_, shape_, strides_, byteoffset_, itemsize_, format_)); } - ssize_t byteoffset = byteoffset_ + strides_[0]*((ssize_t)regular_at); - const std::vector shape(shape_.begin() + 1, shape_.end()); - const std::vector strides(strides_.begin() + 1, strides_.end()); - std::shared_ptr id; - if (id_.get() != nullptr) { - if (regular_at >= id_.get()->length()) { - throw std::invalid_argument("index out of range for identity"); - } - id = id_.get()->slice(regular_at, regular_at + 1); + + const std::shared_ptr NumpyArray::getitem_at(int64_t at) const { + assert(!isscalar()); + int64_t regular_at = at; + if (regular_at < 0) { + regular_at += shape_[0]; + } + if (regular_at < 0 || regular_at >= shape_[0]) { + throw std::invalid_argument("index out of range"); + } + ssize_t byteoffset = byteoffset_ + strides_[0]*((ssize_t)regular_at); + const std::vector shape(shape_.begin() + 1, shape_.end()); + const std::vector strides(strides_.begin() + 1, strides_.end()); + std::shared_ptr id; + if (id_.get() != nullptr) { + if (regular_at >= id_.get()->length()) { + throw std::invalid_argument("index out of range for identity"); + } + id = id_.get()->getitem_range(regular_at, regular_at + 1); + } + return std::shared_ptr(new NumpyArray(id, ptr_, shape, strides, byteoffset, itemsize_, format_)); } - return std::shared_ptr(new NumpyArray(id, ptr_, shape, strides, byteoffset, itemsize_, format_)); -} -const std::shared_ptr NumpyArray::slice(int64_t start, int64_t stop) const { - assert(!isscalar()); - // return getitem(Slice(std::vector>({ std::shared_ptr(new SliceRange(start, stop, 1)) }), true)); - int64_t regular_start = start; - int64_t regular_stop = stop; - awkward_regularize_rangeslice(regular_start, regular_stop, true, start != Slice::none(), stop != Slice::none(), shape_[0]); - ssize_t byteoffset = byteoffset_ + strides_[0]*((ssize_t)regular_start); - std::vector shape; - shape.push_back((ssize_t)(regular_stop - regular_start)); - shape.insert(shape.end(), shape_.begin() + 1, shape_.end()); - std::shared_ptr id; - if (id_.get() != nullptr) { - if (regular_stop > id_.get()->length()) { - throw std::invalid_argument("index out of range for identity"); - } - id = id_.get()->slice(regular_start, regular_stop); + const std::shared_ptr NumpyArray::getitem_range(int64_t start, int64_t stop) const { + assert(!isscalar()); + int64_t regular_start = start; + int64_t regular_stop = stop; + awkward_regularize_rangeslice(regular_start, regular_stop, true, start != Slice::none(), stop != Slice::none(), shape_[0]); + ssize_t byteoffset = byteoffset_ + strides_[0]*((ssize_t)regular_start); + std::vector shape; + shape.push_back((ssize_t)(regular_stop - regular_start)); + shape.insert(shape.end(), shape_.begin() + 1, shape_.end()); + std::shared_ptr id; + if (id_.get() != nullptr) { + if (regular_stop > id_.get()->length()) { + throw std::invalid_argument("index out of range for identity"); + } + id = id_.get()->getitem_range(regular_start, regular_stop); + } + return std::shared_ptr(new NumpyArray(id, ptr_, shape, strides_, byteoffset, itemsize_, format_)); } - return std::shared_ptr(new NumpyArray(id, ptr_, shape, strides_, byteoffset, itemsize_, format_)); -} -const std::pair NumpyArray::minmax_depth() const { - return std::pair((int64_t)shape_.size(), (int64_t)shape_.size()); -} + const std::shared_ptr NumpyArray::getitem(const Slice& where) const { + assert(!isscalar()); -const std::vector flatten_shape(const std::vector shape) { - if (shape.size() == 1) { - return std::vector(); - } - else { - std::vector out = { shape[0]*shape[1] }; - out.insert(out.end(), shape.begin() + 2, shape.end()); - return out; - } -} + if (!where.isadvanced() && id_.get() == nullptr) { + std::vector nextshape = { 1 }; + nextshape.insert(nextshape.end(), shape_.begin(), shape_.end()); + std::vector nextstrides = { shape_[0]*strides_[0] }; + nextstrides.insert(nextstrides.end(), strides_.begin(), strides_.end()); + NumpyArray next(id_, ptr_, nextshape, nextstrides, byteoffset_, itemsize_, format_); -const std::vector flatten_strides(const std::vector strides) { - if (strides.size() == 1) { - return std::vector(); - } - else { - return std::vector(strides.begin() + 1, strides.end()); - } -} + std::shared_ptr nexthead = where.head(); + Slice nexttail = where.tail(); + NumpyArray out = next.getitem_bystrides(nexthead, nexttail, 1); -bool NumpyArray::iscontiguous() const { - ssize_t x = itemsize_; - for (ssize_t i = ndim() - 1; i >= 0; i--) { - if (x != strides_[i]) return false; - x *= shape_[i]; - } - return true; // true for isscalar(), too -} + std::vector outshape(out.shape_.begin() + 1, out.shape_.end()); + std::vector outstrides(out.strides_.begin() + 1, out.strides_.end()); + return std::shared_ptr(new NumpyArray(out.id_, out.ptr_, outshape, outstrides, out.byteoffset_, itemsize_, format_)); + } -void NumpyArray::become_contiguous() { - if (!iscontiguous()) { - NumpyArray x = contiguous(); - id_ = x.id_; - ptr_ = x.ptr_; - shape_ = x.shape_; - strides_ = x.strides_; - byteoffset_ = x.byteoffset_; + else { + NumpyArray safe = contiguous(); // maybe become_contiguous() to change in-place? + + std::vector nextshape = { 1 }; + nextshape.insert(nextshape.end(), safe.shape_.begin(), safe.shape_.end()); + std::vector nextstrides = { safe.shape_[0]*safe.strides_[0] }; + nextstrides.insert(nextstrides.end(), safe.strides_.begin(), safe.strides_.end()); + NumpyArray next(safe.id_, safe.ptr_, nextshape, nextstrides, safe.byteoffset_, itemsize_, format_); + + std::shared_ptr nexthead = where.head(); + Slice nexttail = where.tail(); + Index64 nextcarry(1); + nextcarry.ptr().get()[0] = 0; + Index64 nextadvanced(0); + NumpyArray out = next.getitem_next(nexthead, nexttail, nextcarry, nextadvanced, 1, next.strides_[0]); + + std::vector outshape(out.shape_.begin() + 1, out.shape_.end()); + std::vector outstrides(out.strides_.begin() + 1, out.strides_.end()); + return std::shared_ptr(new NumpyArray(out.id_, out.ptr_, outshape, outstrides, out.byteoffset_, itemsize_, format_)); + } } -} -const NumpyArray NumpyArray::contiguous() const { - if (iscontiguous()) { - return NumpyArray(id_, ptr_, shape_, strides_, byteoffset_, itemsize_, format_); - } - else { - Index64 bytepos(shape_[0]); - awkward_numpyarray_contiguous_init_64(bytepos.ptr().get(), shape_[0], strides_[0]); - return contiguous_next(bytepos); + const std::shared_ptr NumpyArray::getitem_next(const std::shared_ptr head, const Slice& tail, const Index64& advanced) const { + assert(!isscalar()); + Index64 carry(shape_[0]); + awkward_carry_arange_64(carry.ptr().get(), shape_[0]); + return getitem_next(head, tail, carry, advanced, shape_[0], strides_[0]).shallow_copy(); } -} -const NumpyArray NumpyArray::contiguous_next(Index64 bytepos) const { - if (iscontiguous()) { - std::shared_ptr ptr(new uint8_t[(size_t)(bytepos.length()*strides_[0])], awkward::util::array_deleter()); - awkward_numpyarray_contiguous_copy_64( + const std::shared_ptr NumpyArray::carry(const Index64& carry) const { + assert(!isscalar); + + std::shared_ptr ptr(new uint8_t[(size_t)(carry.length()*strides_[0])], awkward::util::array_deleter()); + awkward_numpyarray_getitem_next_null_64( reinterpret_cast(ptr.get()), reinterpret_cast(ptr_.get()), - bytepos.length(), + carry.length(), strides_[0], byteoffset_, - bytepos.ptr().get()); - return NumpyArray(id_, ptr, shape_, strides_, 0, itemsize_, format_); - } + carry.ptr().get()); - else if (shape_.size() == 1) { - std::shared_ptr ptr(new uint8_t[(size_t)(bytepos.length()*itemsize_)], awkward::util::array_deleter()); - awkward_numpyarray_contiguous_copy_64( - reinterpret_cast(ptr.get()), - reinterpret_cast(ptr_.get()), - bytepos.length(), - itemsize_, - byteoffset_, - bytepos.ptr().get()); - std::vector strides = { itemsize_ }; - return NumpyArray(id_, ptr, shape_, strides, 0, itemsize_, format_); - } + std::shared_ptr id(nullptr); + if (id_.get() != nullptr) { + id = id_.get()->getitem_carry_64(carry); + } - else { - NumpyArray next(id_, ptr_, flatten_shape(shape_), flatten_strides(strides_), byteoffset_, itemsize_, format_); - - Index64 nextbytepos(bytepos.length()*shape_[1]); - awkward_numpyarray_contiguous_next_64( - nextbytepos.ptr().get(), - bytepos.ptr().get(), - bytepos.length(), - (int64_t)shape_[1], - (int64_t)strides_[1]); - - NumpyArray out = next.contiguous_next(nextbytepos); - std::vector outstrides = { shape_[1]*out.strides_[0] }; - outstrides.insert(outstrides.end(), out.strides_.begin(), out.strides_.end()); - return NumpyArray(out.id_, out.ptr_, shape_, outstrides, out.byteoffset_, itemsize_, format_); + std::vector shape = { (ssize_t)carry.length() }; + shape.insert(shape.end(), shape_.begin() + 1, shape_.end()); + return std::shared_ptr(new NumpyArray(id, ptr, shape, strides_, 0, itemsize_, format_)); } -} - -const std::shared_ptr NumpyArray::getitem(const Slice& where) const { - assert(!isscalar()); - - if (!where.isadvanced() && id_.get() == nullptr) { - std::vector nextshape = { 1 }; - nextshape.insert(nextshape.end(), shape_.begin(), shape_.end()); - std::vector nextstrides = { shape_[0]*strides_[0] }; - nextstrides.insert(nextstrides.end(), strides_.begin(), strides_.end()); - NumpyArray next(id_, ptr_, nextshape, nextstrides, byteoffset_, itemsize_, format_); - - std::shared_ptr nexthead = where.head(); - Slice nexttail = where.tail(); - NumpyArray out = next.getitem_bystrides(nexthead, nexttail, 1); - std::vector outshape(out.shape_.begin() + 1, out.shape_.end()); - std::vector outstrides(out.strides_.begin() + 1, out.strides_.end()); - return std::shared_ptr(new NumpyArray(out.id_, out.ptr_, outshape, outstrides, out.byteoffset_, itemsize_, format_)); + const std::pair NumpyArray::minmax_depth() const { + return std::pair((int64_t)shape_.size(), (int64_t)shape_.size()); } - else { - NumpyArray safe = contiguous(); // maybe become_contiguous() to change in-place? - - std::vector nextshape = { 1 }; - nextshape.insert(nextshape.end(), safe.shape_.begin(), safe.shape_.end()); - std::vector nextstrides = { safe.shape_[0]*safe.strides_[0] }; - nextstrides.insert(nextstrides.end(), safe.strides_.begin(), safe.strides_.end()); - NumpyArray next(safe.id_, safe.ptr_, nextshape, nextstrides, safe.byteoffset_, itemsize_, format_); - - std::shared_ptr nexthead = where.head(); - Slice nexttail = where.tail(); - Index64 nextcarry(1); - nextcarry.ptr().get()[0] = 0; - Index64 nextadvanced(0); - NumpyArray out = next.getitem_next(nexthead, nexttail, nextcarry, nextadvanced, 1, next.strides_[0]); - - std::vector outshape(out.shape_.begin() + 1, out.shape_.end()); - std::vector outstrides(out.strides_.begin() + 1, out.strides_.end()); - return std::shared_ptr(new NumpyArray(out.id_, out.ptr_, outshape, outstrides, out.byteoffset_, itemsize_, format_)); + const std::vector flatten_shape(const std::vector shape) { + if (shape.size() == 1) { + return std::vector(); + } + else { + std::vector out = { shape[0]*shape[1] }; + out.insert(out.end(), shape.begin() + 2, shape.end()); + return out; + } } -} -const NumpyArray NumpyArray::getitem_bystrides(const std::shared_ptr& head, const Slice& tail, int64_t length) const { - if (head.get() == nullptr) { - return NumpyArray(id_, ptr_, shape_, strides_, byteoffset_, itemsize_, format_); + const std::vector flatten_strides(const std::vector strides) { + if (strides.size() == 1) { + return std::vector(); + } + else { + return std::vector(strides.begin() + 1, strides.end()); + } } - else if (SliceAt* at = dynamic_cast(head.get())) { - if (ndim() < 2) { - throw std::invalid_argument("too many indexes for array"); + bool NumpyArray::iscontiguous() const { + ssize_t x = itemsize_; + for (ssize_t i = ndim() - 1; i >= 0; i--) { + if (x != strides_[i]) return false; + x *= shape_[i]; } + return true; // true for isscalar(), too + } - int64_t i = at->at(); - if (i < 0) i += shape_[1]; - if (i < 0 || i >= shape_[1]) { - throw std::invalid_argument("index out of range"); + void NumpyArray::become_contiguous() { + if (!iscontiguous()) { + NumpyArray x = contiguous(); + id_ = x.id_; + ptr_ = x.ptr_; + shape_ = x.shape_; + strides_ = x.strides_; + byteoffset_ = x.byteoffset_; } + } - ssize_t nextbyteoffset = byteoffset_ + ((ssize_t)i)*strides_[1]; - NumpyArray next(id_, ptr_, flatten_shape(shape_), flatten_strides(strides_), nextbyteoffset, itemsize_, format_); - - std::shared_ptr nexthead = tail.head(); - Slice nexttail = tail.tail(); - NumpyArray out = next.getitem_bystrides(nexthead, nexttail, length); - - std::vector outshape = { (ssize_t)length }; - outshape.insert(outshape.end(), out.shape_.begin() + 1, out.shape_.end()); - return NumpyArray(out.id_, out.ptr_, outshape, out.strides_, out.byteoffset_, itemsize_, format_); + const NumpyArray NumpyArray::contiguous() const { + if (iscontiguous()) { + return NumpyArray(id_, ptr_, shape_, strides_, byteoffset_, itemsize_, format_); + } + else { + Index64 bytepos(shape_[0]); + awkward_numpyarray_contiguous_init_64(bytepos.ptr().get(), shape_[0], strides_[0]); + return contiguous_next(bytepos); + } } - else if (SliceRange* range = dynamic_cast(head.get())) { - if (ndim() < 2) { - throw std::invalid_argument("too many indexes for array"); + const NumpyArray NumpyArray::contiguous_next(Index64 bytepos) const { + if (iscontiguous()) { + std::shared_ptr ptr(new uint8_t[(size_t)(bytepos.length()*strides_[0])], awkward::util::array_deleter()); + awkward_numpyarray_contiguous_copy_64( + reinterpret_cast(ptr.get()), + reinterpret_cast(ptr_.get()), + bytepos.length(), + strides_[0], + byteoffset_, + bytepos.ptr().get()); + return NumpyArray(id_, ptr, shape_, strides_, 0, itemsize_, format_); } - int64_t start = range->start(); - int64_t stop = range->stop(); - int64_t step = range->step(); - awkward_regularize_rangeslice(start, stop, step > 0, range->hasstart(), range->hasstop(), (int64_t)shape_[1]); + else if (shape_.size() == 1) { + std::shared_ptr ptr(new uint8_t[(size_t)(bytepos.length()*itemsize_)], awkward::util::array_deleter()); + awkward_numpyarray_contiguous_copy_64( + reinterpret_cast(ptr.get()), + reinterpret_cast(ptr_.get()), + bytepos.length(), + itemsize_, + byteoffset_, + bytepos.ptr().get()); + std::vector strides = { itemsize_ }; + return NumpyArray(id_, ptr, shape_, strides, 0, itemsize_, format_); + } - int64_t numer = abs(start - stop); - int64_t denom = abs(step); - int64_t d = numer / denom; - int64_t m = numer % denom; - int64_t lenhead = d + (m != 0 ? 1 : 0); + else { + NumpyArray next(id_, ptr_, flatten_shape(shape_), flatten_strides(strides_), byteoffset_, itemsize_, format_); + + Index64 nextbytepos(bytepos.length()*shape_[1]); + awkward_numpyarray_contiguous_next_64( + nextbytepos.ptr().get(), + bytepos.ptr().get(), + bytepos.length(), + (int64_t)shape_[1], + (int64_t)strides_[1]); + + NumpyArray out = next.contiguous_next(nextbytepos); + std::vector outstrides = { shape_[1]*out.strides_[0] }; + outstrides.insert(outstrides.end(), out.strides_.begin(), out.strides_.end()); + return NumpyArray(out.id_, out.ptr_, shape_, outstrides, out.byteoffset_, itemsize_, format_); + } + } - ssize_t nextbyteoffset = byteoffset_ + ((ssize_t)start)*strides_[1]; - NumpyArray next(id_, ptr_, flatten_shape(shape_), flatten_strides(strides_), nextbyteoffset, itemsize_, format_); + const NumpyArray NumpyArray::getitem_bystrides(const std::shared_ptr& head, const Slice& tail, int64_t length) const { + if (head.get() == nullptr) { + return NumpyArray(id_, ptr_, shape_, strides_, byteoffset_, itemsize_, format_); + } - std::shared_ptr nexthead = tail.head(); - Slice nexttail = tail.tail(); - NumpyArray out = next.getitem_bystrides(nexthead, nexttail, length*lenhead); + else if (SliceAt* at = dynamic_cast(head.get())) { + if (ndim() < 2) { + throw std::invalid_argument("too many indexes for array"); + } - std::vector outshape = { (ssize_t)length, (ssize_t)lenhead }; - outshape.insert(outshape.end(), out.shape_.begin() + 1, out.shape_.end()); - std::vector outstrides = { strides_[0], strides_[1]*((ssize_t)step) }; - outstrides.insert(outstrides.end(), out.strides_.begin() + 1, out.strides_.end()); - return NumpyArray(out.id_, out.ptr_, outshape, outstrides, out.byteoffset_, itemsize_, format_); - } + int64_t i = at->at(); + if (i < 0) i += shape_[1]; + if (i < 0 || i >= shape_[1]) { + throw std::invalid_argument("index out of range"); + } - else if (SliceEllipsis* ellipsis = dynamic_cast(head.get())) { - std::pair minmax = minmax_depth(); - assert(minmax.first == minmax.second); - int64_t mindepth = minmax.first; + ssize_t nextbyteoffset = byteoffset_ + ((ssize_t)i)*strides_[1]; + NumpyArray next(id_, ptr_, flatten_shape(shape_), flatten_strides(strides_), nextbyteoffset, itemsize_, format_); - if (tail.length() == 0 || mindepth - 1 == tail.dimlength()) { std::shared_ptr nexthead = tail.head(); Slice nexttail = tail.tail(); - return getitem_bystrides(nexthead, nexttail, length); - } - else { - std::vector> tailitems = tail.items(); - std::vector> items = { std::shared_ptr(new SliceEllipsis()) }; - items.insert(items.end(), tailitems.begin(), tailitems.end()); + NumpyArray out = next.getitem_bystrides(nexthead, nexttail, length); - std::shared_ptr nexthead(new SliceRange(Slice::none(), Slice::none(), 1)); - Slice nexttail(items, true); - return getitem_bystrides(nexthead, nexttail, length); + std::vector outshape = { (ssize_t)length }; + outshape.insert(outshape.end(), out.shape_.begin() + 1, out.shape_.end()); + return NumpyArray(out.id_, out.ptr_, outshape, out.strides_, out.byteoffset_, itemsize_, format_); } - } - else if (SliceNewAxis* newaxis = dynamic_cast(head.get())) { - std::shared_ptr nexthead = tail.head(); - Slice nexttail = tail.tail(); - NumpyArray out = getitem_bystrides(nexthead, nexttail, length); + else if (SliceRange* range = dynamic_cast(head.get())) { + if (ndim() < 2) { + throw std::invalid_argument("too many indexes for array"); + } - std::vector outshape = { (ssize_t)length, 1 }; - outshape.insert(outshape.end(), out.shape_.begin() + 1, out.shape_.end()); - std::vector outstrides = { out.strides_[0] }; - outstrides.insert(outstrides.end(), out.strides_.begin(), out.strides_.end()); - return NumpyArray(out.id_, out.ptr_, outshape, outstrides, out.byteoffset_, itemsize_, format_); - } + int64_t start = range->start(); + int64_t stop = range->stop(); + int64_t step = range->step(); + if (step == Slice::none()) { + step = 1; + } + awkward_regularize_rangeslice(start, stop, step > 0, range->hasstart(), range->hasstop(), (int64_t)shape_[1]); - else { - throw std::runtime_error("unrecognized slice item type"); - } -} + int64_t numer = abs(start - stop); + int64_t denom = abs(step); + int64_t d = numer / denom; + int64_t m = numer % denom; + int64_t lenhead = d + (m != 0 ? 1 : 0); -const NumpyArray NumpyArray::getitem_next(const std::shared_ptr head, const Slice& tail, Index64& carry, Index64& advanced, int64_t length, int64_t stride) const { - if (head.get() == nullptr) { - std::shared_ptr ptr(new uint8_t[(size_t)(carry.length()*stride)], awkward::util::array_deleter()); - awkward_numpyarray_getitem_next_null_64( - reinterpret_cast(ptr.get()), - reinterpret_cast(ptr_.get()), - carry.length(), - stride, - byteoffset_, - carry.ptr().get()); + ssize_t nextbyteoffset = byteoffset_ + ((ssize_t)start)*strides_[1]; + NumpyArray next(id_, ptr_, flatten_shape(shape_), flatten_strides(strides_), nextbyteoffset, itemsize_, format_); - std::shared_ptr id(nullptr); - if (id_.get() != nullptr) { - id = id_.get()->getitem_carry_64(carry); + std::shared_ptr nexthead = tail.head(); + Slice nexttail = tail.tail(); + NumpyArray out = next.getitem_bystrides(nexthead, nexttail, length*lenhead); + + std::vector outshape = { (ssize_t)length, (ssize_t)lenhead }; + outshape.insert(outshape.end(), out.shape_.begin() + 1, out.shape_.end()); + std::vector outstrides = { strides_[0], strides_[1]*((ssize_t)step) }; + outstrides.insert(outstrides.end(), out.strides_.begin() + 1, out.strides_.end()); + return NumpyArray(out.id_, out.ptr_, outshape, outstrides, out.byteoffset_, itemsize_, format_); } - std::vector shape = { (ssize_t)carry.length() }; - shape.insert(shape.end(), shape_.begin() + 1, shape_.end()); - std::vector strides = { (ssize_t)stride }; - strides.insert(strides.end(), strides_.begin() + 1, strides_.end()); - return NumpyArray(id, ptr, shape, strides, 0, itemsize_, format_); - } + else if (SliceEllipsis* ellipsis = dynamic_cast(head.get())) { + std::pair minmax = minmax_depth(); + assert(minmax.first == minmax.second); + int64_t mindepth = minmax.first; - else if (SliceAt* at = dynamic_cast(head.get())) { - if (ndim() < 2) { - throw std::invalid_argument("too many indexes for array"); + if (tail.length() == 0 || mindepth - 1 == tail.dimlength()) { + std::shared_ptr nexthead = tail.head(); + Slice nexttail = tail.tail(); + return getitem_bystrides(nexthead, nexttail, length); + } + else { + std::vector> tailitems = tail.items(); + std::vector> items = { std::shared_ptr(new SliceEllipsis()) }; + items.insert(items.end(), tailitems.begin(), tailitems.end()); + + std::shared_ptr nexthead(new SliceRange(Slice::none(), Slice::none(), 1)); + Slice nexttail(items, true); + return getitem_bystrides(nexthead, nexttail, length); + } } - NumpyArray next(id_, ptr_, flatten_shape(shape_), flatten_strides(strides_), byteoffset_, itemsize_, format_); - std::shared_ptr nexthead = tail.head(); - Slice nexttail = tail.tail(); + else if (SliceNewAxis* newaxis = dynamic_cast(head.get())) { + std::shared_ptr nexthead = tail.head(); + Slice nexttail = tail.tail(); + NumpyArray out = getitem_bystrides(nexthead, nexttail, length); - // if we had any array slices, this int would become an array - assert(advanced.length() == 0); + std::vector outshape = { (ssize_t)length, 1 }; + outshape.insert(outshape.end(), out.shape_.begin() + 1, out.shape_.end()); + std::vector outstrides = { out.strides_[0] }; + outstrides.insert(outstrides.end(), out.strides_.begin(), out.strides_.end()); + return NumpyArray(out.id_, out.ptr_, outshape, outstrides, out.byteoffset_, itemsize_, format_); + } - Index64 nextcarry(carry.length()); - awkward_numpyarray_getitem_next_at_64( - nextcarry.ptr().get(), - carry.ptr().get(), - carry.length(), - shape_[1], // because this is contiguous - at->at()); + else { + throw std::runtime_error("unrecognized slice item type"); + } + } - NumpyArray out = next.getitem_next(nexthead, nexttail, nextcarry, advanced, length, next.strides_[0]); + const NumpyArray NumpyArray::getitem_next(const std::shared_ptr head, const Slice& tail, const Index64& carry, const Index64& advanced, int64_t length, int64_t stride) const { + if (head.get() == nullptr) { + std::shared_ptr ptr(new uint8_t[(size_t)(carry.length()*stride)], awkward::util::array_deleter()); + awkward_numpyarray_getitem_next_null_64( + reinterpret_cast(ptr.get()), + reinterpret_cast(ptr_.get()), + carry.length(), + stride, + byteoffset_, + carry.ptr().get()); - std::vector outshape = { (ssize_t)length }; - outshape.insert(outshape.end(), out.shape_.begin() + 1, out.shape_.end()); - return NumpyArray(out.id_, out.ptr_, outshape, out.strides_, out.byteoffset_, itemsize_, format_); - } + std::shared_ptr id(nullptr); + if (id_.get() != nullptr) { + id = id_.get()->getitem_carry_64(carry); + } - else if (SliceRange* range = dynamic_cast(head.get())) { - if (ndim() < 2) { - throw std::invalid_argument("too many indexes for array"); + std::vector shape = { (ssize_t)carry.length() }; + shape.insert(shape.end(), shape_.begin() + 1, shape_.end()); + std::vector strides = { (ssize_t)stride }; + strides.insert(strides.end(), strides_.begin() + 1, strides_.end()); + return NumpyArray(id, ptr, shape, strides, 0, itemsize_, format_); } - int64_t start = range->start(); - int64_t stop = range->stop(); - int64_t step = range->step(); - awkward_regularize_rangeslice(start, stop, step > 0, range->hasstart(), range->hasstop(), (int64_t)shape_[1]); + else if (SliceAt* at = dynamic_cast(head.get())) { + if (ndim() < 2) { + throw std::invalid_argument("too many indexes for array"); + } - int64_t numer = abs(start - stop); - int64_t denom = abs(step); - int64_t d = numer / denom; - int64_t m = numer % denom; - int64_t lenhead = d + (m != 0 ? 1 : 0); + NumpyArray next(id_, ptr_, flatten_shape(shape_), flatten_strides(strides_), byteoffset_, itemsize_, format_); + std::shared_ptr nexthead = tail.head(); + Slice nexttail = tail.tail(); - NumpyArray next(id_, ptr_, flatten_shape(shape_), flatten_strides(strides_), byteoffset_, itemsize_, format_); - std::shared_ptr nexthead = tail.head(); - Slice nexttail = tail.tail(); + // if we had any array slices, this int would become an array + assert(advanced.length() == 0); - if (advanced.length() == 0) { - Index64 nextcarry(carry.length()*lenhead); - awkward_numpyarray_getitem_next_slice_64( + Index64 nextcarry(carry.length()); + awkward_numpyarray_getitem_next_at_64( nextcarry.ptr().get(), carry.ptr().get(), carry.length(), - lenhead, shape_[1], // because this is contiguous - start, - step); + at->at()); - NumpyArray out = next.getitem_next(nexthead, nexttail, nextcarry, advanced, length*lenhead, next.strides_[0]); - std::vector outshape = { (ssize_t)length, (ssize_t)lenhead }; + NumpyArray out = next.getitem_next(nexthead, nexttail, nextcarry, advanced, length, next.strides_[0]); + + std::vector outshape = { (ssize_t)length }; outshape.insert(outshape.end(), out.shape_.begin() + 1, out.shape_.end()); - std::vector outstrides = { (ssize_t)lenhead*out.strides_[0] }; - outstrides.insert(outstrides.end(), out.strides_.begin(), out.strides_.end()); - return NumpyArray(out.id_, out.ptr_, outshape, outstrides, out.byteoffset_, itemsize_, format_); + return NumpyArray(out.id_, out.ptr_, outshape, out.strides_, out.byteoffset_, itemsize_, format_); } - else { - Index64 nextcarry(carry.length()*lenhead); - Index64 nextadvanced(carry.length()*lenhead); - awkward_numpyarray_getitem_next_slice_advanced_64( - nextcarry.ptr().get(), - nextadvanced.ptr().get(), - carry.ptr().get(), - advanced.ptr().get(), - carry.length(), - lenhead, - shape_[1], // because this is contiguous - start, - step); + else if (SliceRange* range = dynamic_cast(head.get())) { + if (ndim() < 2) { + throw std::invalid_argument("too many indexes for array"); + } - NumpyArray out = next.getitem_next(nexthead, nexttail, nextcarry, nextadvanced, length*lenhead, next.strides_[0]); - std::vector outshape = { (ssize_t)length, (ssize_t)lenhead }; - outshape.insert(outshape.end(), out.shape_.begin() + 1, out.shape_.end()); - std::vector outstrides = { (ssize_t)lenhead*out.strides_[0] }; - outstrides.insert(outstrides.end(), out.strides_.begin(), out.strides_.end()); - return NumpyArray(out.id_, out.ptr_, outshape, outstrides, out.byteoffset_, itemsize_, format_); - } - } + int64_t start = range->start(); + int64_t stop = range->stop(); + int64_t step = range->step(); + if (step == Slice::none()) { + step = 1; + } + awkward_regularize_rangeslice(start, stop, step > 0, range->hasstart(), range->hasstop(), (int64_t)shape_[1]); - else if (SliceEllipsis* ellipsis = dynamic_cast(head.get())) { - std::pair minmax = minmax_depth(); - assert(minmax.first == minmax.second); - int64_t mindepth = minmax.first; + int64_t numer = abs(start - stop); + int64_t denom = abs(step); + int64_t d = numer / denom; + int64_t m = numer % denom; + int64_t lenhead = d + (m != 0 ? 1 : 0); - if (tail.length() == 0 || mindepth - 1 == tail.dimlength()) { + NumpyArray next(id_, ptr_, flatten_shape(shape_), flatten_strides(strides_), byteoffset_, itemsize_, format_); std::shared_ptr nexthead = tail.head(); Slice nexttail = tail.tail(); - return getitem_next(nexthead, nexttail, carry, advanced, length, stride); - } - else { - std::vector> tailitems = tail.items(); - std::vector> items = { std::shared_ptr(new SliceEllipsis()) }; - items.insert(items.end(), tailitems.begin(), tailitems.end()); - std::shared_ptr nexthead(new SliceRange(Slice::none(), Slice::none(), 1)); - Slice nexttail(items, true); - return getitem_next(nexthead, nexttail, carry, advanced, length, stride); + if (advanced.length() == 0) { + Index64 nextcarry(carry.length()*lenhead); + awkward_numpyarray_getitem_next_range_64( + nextcarry.ptr().get(), + carry.ptr().get(), + carry.length(), + lenhead, + shape_[1], // because this is contiguous + start, + step); + + NumpyArray out = next.getitem_next(nexthead, nexttail, nextcarry, advanced, length*lenhead, next.strides_[0]); + std::vector outshape = { (ssize_t)length, (ssize_t)lenhead }; + outshape.insert(outshape.end(), out.shape_.begin() + 1, out.shape_.end()); + std::vector outstrides = { (ssize_t)lenhead*out.strides_[0] }; + outstrides.insert(outstrides.end(), out.strides_.begin(), out.strides_.end()); + return NumpyArray(out.id_, out.ptr_, outshape, outstrides, out.byteoffset_, itemsize_, format_); + } + + else { + Index64 nextcarry(carry.length()*lenhead); + Index64 nextadvanced(carry.length()*lenhead); + awkward_numpyarray_getitem_next_range_advanced_64( + nextcarry.ptr().get(), + nextadvanced.ptr().get(), + carry.ptr().get(), + advanced.ptr().get(), + carry.length(), + lenhead, + shape_[1], // because this is contiguous + start, + step); + + NumpyArray out = next.getitem_next(nexthead, nexttail, nextcarry, nextadvanced, length*lenhead, next.strides_[0]); + std::vector outshape = { (ssize_t)length, (ssize_t)lenhead }; + outshape.insert(outshape.end(), out.shape_.begin() + 1, out.shape_.end()); + std::vector outstrides = { (ssize_t)lenhead*out.strides_[0] }; + outstrides.insert(outstrides.end(), out.strides_.begin(), out.strides_.end()); + return NumpyArray(out.id_, out.ptr_, outshape, outstrides, out.byteoffset_, itemsize_, format_); + } } - } - else if (SliceNewAxis* newaxis = dynamic_cast(head.get())) { - std::shared_ptr nexthead = tail.head(); - Slice nexttail = tail.tail(); - NumpyArray out = getitem_next(nexthead, nexttail, carry, advanced, length, stride); + else if (SliceEllipsis* ellipsis = dynamic_cast(head.get())) { + std::pair minmax = minmax_depth(); + assert(minmax.first == minmax.second); + int64_t mindepth = minmax.first; - std::vector outshape = { (ssize_t)length, 1 }; - outshape.insert(outshape.end(), out.shape_.begin() + 1, out.shape_.end()); - std::vector outstrides = { out.strides_[0] }; - outstrides.insert(outstrides.end(), out.strides_.begin(), out.strides_.end()); - return NumpyArray(out.id_, out.ptr_, outshape, outstrides, out.byteoffset_, itemsize_, format_); - } + if (tail.length() == 0 || mindepth - 1 == tail.dimlength()) { + std::shared_ptr nexthead = tail.head(); + Slice nexttail = tail.tail(); + return getitem_next(nexthead, nexttail, carry, advanced, length, stride); + } + else { + std::vector> tailitems = tail.items(); + std::vector> items = { std::shared_ptr(new SliceEllipsis()) }; + items.insert(items.end(), tailitems.begin(), tailitems.end()); + std::shared_ptr nexthead(new SliceRange(Slice::none(), Slice::none(), 1)); + Slice nexttail(items, true); + return getitem_next(nexthead, nexttail, carry, advanced, length, stride); + } + } + + else if (SliceNewAxis* newaxis = dynamic_cast(head.get())) { + std::shared_ptr nexthead = tail.head(); + Slice nexttail = tail.tail(); + NumpyArray out = getitem_next(nexthead, nexttail, carry, advanced, length, stride); - else if (SliceArray64* array = dynamic_cast(head.get())) { - if (ndim() < 2) { - throw std::invalid_argument("too many indexes for array"); + std::vector outshape = { (ssize_t)length, 1 }; + outshape.insert(outshape.end(), out.shape_.begin() + 1, out.shape_.end()); + std::vector outstrides = { out.strides_[0] }; + outstrides.insert(outstrides.end(), out.strides_.begin(), out.strides_.end()); + return NumpyArray(out.id_, out.ptr_, outshape, outstrides, out.byteoffset_, itemsize_, format_); } - NumpyArray next(id_, ptr_, flatten_shape(shape_), flatten_strides(strides_), byteoffset_, itemsize_, format_); - std::shared_ptr nexthead = tail.head(); - Slice nexttail = tail.tail(); + else if (SliceArray64* array = dynamic_cast(head.get())) { + if (ndim() < 2) { + throw std::invalid_argument("too many indexes for array"); + } - Index64 flathead = array->ravel(); - Error regularize_error = awkward_regularize_arrayslice_64( - flathead.ptr().get(), - flathead.length(), - shape_[1]); - HANDLE_ERROR(regularize_error) + NumpyArray next(id_, ptr_, flatten_shape(shape_), flatten_strides(strides_), byteoffset_, itemsize_, format_); + std::shared_ptr nexthead = tail.head(); + Slice nexttail = tail.tail(); - if (advanced.length() == 0) { - Index64 nextcarry(carry.length()*flathead.length()); - Index64 nextadvanced(carry.length()*flathead.length()); - awkward_numpyarray_getitem_next_array_64( - nextcarry.ptr().get(), - nextadvanced.ptr().get(), - carry.ptr().get(), + Index64 flathead = array->ravel(); + Error regularize_error = awkward_regularize_arrayslice_64( flathead.ptr().get(), - carry.length(), flathead.length(), - shape_[1]); // because this is contiguous - - NumpyArray out = next.getitem_next(nexthead, nexttail, nextcarry, nextadvanced, length*flathead.length(), next.strides_[0]); + shape_[1]); + HANDLE_ERROR(regularize_error) + + if (advanced.length() == 0) { + Index64 nextcarry(carry.length()*flathead.length()); + Index64 nextadvanced(carry.length()*flathead.length()); + awkward_numpyarray_getitem_next_array_64( + nextcarry.ptr().get(), + nextadvanced.ptr().get(), + carry.ptr().get(), + flathead.ptr().get(), + carry.length(), + flathead.length(), + shape_[1]); // because this is contiguous + + NumpyArray out = next.getitem_next(nexthead, nexttail, nextcarry, nextadvanced, length*flathead.length(), next.strides_[0]); + + std::vector outshape = { (ssize_t)length }; + std::vector arrayshape = array->shape(); + for (auto x = arrayshape.begin(); x != arrayshape.end(); ++x) { + outshape.push_back((ssize_t)(*x)); + } + outshape.insert(outshape.end(), out.shape_.begin() + 1, out.shape_.end()); - std::vector outshape = { (ssize_t)length }; - std::vector arrayshape = array->shape(); - for (auto x = arrayshape.begin(); x != arrayshape.end(); ++x) { - outshape.push_back((ssize_t)(*x)); + std::vector outstrides(out.strides_.begin(), out.strides_.end()); + for (auto x = arrayshape.rbegin(); x != arrayshape.rend(); ++x) { + outstrides.insert(outstrides.begin(), ((ssize_t)(*x))*outstrides[0]); + } + return NumpyArray(out.id_, out.ptr_, outshape, outstrides, out.byteoffset_, itemsize_, format_); } - outshape.insert(outshape.end(), out.shape_.begin() + 1, out.shape_.end()); - std::vector outstrides(out.strides_.begin(), out.strides_.end()); - for (auto x = arrayshape.rbegin(); x != arrayshape.rend(); ++x) { - outstrides.insert(outstrides.begin(), ((ssize_t)(*x))*outstrides[0]); + else { + Index64 nextcarry(carry.length()); + Index64 nextadvanced(carry.length()); + awkward_numpyarray_getitem_next_array_advanced_64( + nextcarry.ptr().get(), + carry.ptr().get(), + advanced.ptr().get(), + flathead.ptr().get(), + carry.length(), + shape_[1]); // because this is contiguous + + NumpyArray out = next.getitem_next(nexthead, nexttail, nextcarry, advanced, length*array->length(), next.strides_[0]); + + std::vector outshape = { (ssize_t)length }; + outshape.insert(outshape.end(), out.shape_.begin() + 1, out.shape_.end()); + return NumpyArray(out.id_, out.ptr_, outshape, out.strides_, out.byteoffset_, itemsize_, format_); } - return NumpyArray(out.id_, out.ptr_, outshape, outstrides, out.byteoffset_, itemsize_, format_); } else { - Index64 nextcarry(carry.length()); - Index64 nextadvanced(carry.length()); - awkward_numpyarray_getitem_next_array_advanced_64( - nextcarry.ptr().get(), - carry.ptr().get(), - advanced.ptr().get(), - flathead.ptr().get(), - carry.length(), - shape_[1]); // because this is contiguous - - NumpyArray out = next.getitem_next(nexthead, nexttail, nextcarry, advanced, length*array->length(), next.strides_[0]); - - std::vector outshape = { (ssize_t)length }; - outshape.insert(outshape.end(), out.shape_.begin() + 1, out.shape_.end()); - return NumpyArray(out.id_, out.ptr_, outshape, out.strides_, out.byteoffset_, itemsize_, format_); + throw std::runtime_error("unrecognized slice item type"); } } - - else { - throw std::runtime_error("unrecognized slice item type"); - } } diff --git a/src/libawkward/Slice.cpp b/src/libawkward/Slice.cpp index cfff47add4..111624da9b 100644 --- a/src/libawkward/Slice.cpp +++ b/src/libawkward/Slice.cpp @@ -6,268 +6,266 @@ #include "awkward/Slice.h" -using namespace awkward; - -const std::string SliceAt::tostring() const { - return std::to_string(at_); -} +namespace awkward { + const std::string SliceAt::tostring() const { + return std::to_string(at_); + } -const std::string SliceRange::tostring() const { - return (hasstart() ? std::to_string(start_) : std::string("")) + std::string(":") + - (hasstop() ? std::to_string(stop_) : std::string("")) + std::string(":") + - (step_ != 1 ? std::to_string(step_) : std::string("")); -} + const std::string SliceRange::tostring() const { + return (hasstart() ? std::to_string(start_) : std::string("")) + std::string(":") + + (hasstop() ? std::to_string(stop_) : std::string("")) + std::string(":") + + (step_ != 1 ? std::to_string(step_) : std::string("")); + } -const std::string SliceEllipsis::tostring() const { - return std::string("..."); -} + const std::string SliceEllipsis::tostring() const { + return std::string("..."); + } -const std::string SliceNewAxis::tostring() const { - return std::string("newaxis"); -} + const std::string SliceNewAxis::tostring() const { + return std::string("newaxis"); + } -template -const std::string SliceArrayOf::tostring() const { - return std::string("array(") + tostring_part() + std::string(")"); -} + template + const std::string SliceArrayOf::tostring() const { + return std::string("array(") + tostring_part() + std::string(")"); + } -template -const std::string SliceArrayOf::tostring_part() const { - std::stringstream out; - out << "["; - if (shape_.size() == 1) { - if (shape_[0] < 6) { - for (int64_t i = 0; i < shape_[0]; i++) { - if (i != 0) { - out << ", "; - } - out << (T)index_.get(i*strides_[0]); - } - } - else { - for (int64_t i = 0; i < 3; i++) { - if (i != 0) { - out << ", "; + template + const std::string SliceArrayOf::tostring_part() const { + std::stringstream out; + out << "["; + if (shape_.size() == 1) { + if (shape_[0] < 6) { + for (int64_t i = 0; i < shape_[0]; i++) { + if (i != 0) { + out << ", "; + } + out << (T)index_.getitem_at(i*strides_[0]); } - out << (T)index_.get(i*strides_[0]); } - out << ", ..., "; - for (int64_t i = shape_[0] - 3; i < shape_[0]; i++) { - if (i != shape_[0] - 3) { - out << ", "; + else { + for (int64_t i = 0; i < 3; i++) { + if (i != 0) { + out << ", "; + } + out << (T)index_.getitem_at(i*strides_[0]); } - out << (T)index_.get(i*strides_[0]); - } - } - } - else { - std::vector shape(shape_.begin() + 1, shape_.end()); - std::vector strides(strides_.begin() + 1, strides_.end()); - if (shape_[0] < 6) { - for (int64_t i = 0; i < shape_[0]; i++) { - if (i != 0) { - out << ", "; + out << ", ..., "; + for (int64_t i = shape_[0] - 3; i < shape_[0]; i++) { + if (i != shape_[0] - 3) { + out << ", "; + } + out << (T)index_.getitem_at(i*strides_[0]); } - IndexOf index(index_.ptr(), index_.offset() + i*strides_[0], shape_[1]); - SliceArrayOf subarray(index, shape, strides); - out << subarray.tostring_part(); } } else { - for (int64_t i = 0; i < 3; i++) { - if (i != 0) { - out << ", "; + std::vector shape(shape_.begin() + 1, shape_.end()); + std::vector strides(strides_.begin() + 1, strides_.end()); + if (shape_[0] < 6) { + for (int64_t i = 0; i < shape_[0]; i++) { + if (i != 0) { + out << ", "; + } + IndexOf index(index_.ptr(), index_.offset() + i*strides_[0], shape_[1]); + SliceArrayOf subarray(index, shape, strides); + out << subarray.tostring_part(); } - IndexOf index(index_.ptr(), index_.offset() + i*strides_[0], shape_[1]); - SliceArrayOf subarray(index, shape, strides); - out << subarray.tostring_part(); } - out << ", ..., "; - for (int64_t i = shape_[0] - 3; i < shape_[0]; i++) { - if (i != shape_[0] - 3) { - out << ", "; + else { + for (int64_t i = 0; i < 3; i++) { + if (i != 0) { + out << ", "; + } + IndexOf index(index_.ptr(), index_.offset() + i*strides_[0], shape_[1]); + SliceArrayOf subarray(index, shape, strides); + out << subarray.tostring_part(); + } + out << ", ..., "; + for (int64_t i = shape_[0] - 3; i < shape_[0]; i++) { + if (i != shape_[0] - 3) { + out << ", "; + } + IndexOf index(index_.ptr(), index_.offset() + i*strides_[0], shape_[1]); + SliceArrayOf subarray(index, shape, strides); + out << subarray.tostring_part(); } - IndexOf index(index_.ptr(), index_.offset() + i*strides_[0], shape_[1]); - SliceArrayOf subarray(index, shape, strides); - out << subarray.tostring_part(); } } + out << "]"; + return out.str(); } - out << "]"; - return out.str(); -} -template -const IndexOf SliceArrayOf::ravel() const { - int64_t length = 1; - for (int64_t i = 0; i < ndim(); i++) { - length *= shape_[(size_t)i]; - } + template + const IndexOf SliceArrayOf::ravel() const { + int64_t length = 1; + for (int64_t i = 0; i < ndim(); i++) { + length *= shape_[(size_t)i]; + } - IndexOf index(length); - if (std::is_same::value) { - awkward_slicearray_ravel_64(index.ptr().get(), index_.ptr().get(), ndim(), shape_.data(), strides_.data()); - } - else { - throw std::runtime_error("unrecognized SliceArrayOf type"); - } + IndexOf index(length); + if (std::is_same::value) { + awkward_slicearray_ravel_64(index.ptr().get(), index_.ptr().get(), ndim(), shape_.data(), strides_.data()); + } + else { + throw std::runtime_error("unrecognized SliceArrayOf type"); + } - return index; -} + return index; + } -namespace awkward { template class SliceArrayOf; -} -int64_t Slice::length() const { - return (int64_t)items_.size(); -} + int64_t Slice::length() const { + return (int64_t)items_.size(); + } -int64_t Slice::dimlength() const { - int64_t out = 0; - for (auto x : items_) { - if (dynamic_cast(x.get()) != nullptr) { - out += 1; - } - else if (dynamic_cast(x.get()) != nullptr) { - out += 1; - } - else if (dynamic_cast(x.get()) != nullptr) { - out += 1; + int64_t Slice::dimlength() const { + int64_t out = 0; + for (auto x : items_) { + if (dynamic_cast(x.get()) != nullptr) { + out += 1; + } + else if (dynamic_cast(x.get()) != nullptr) { + out += 1; + } + else if (dynamic_cast(x.get()) != nullptr) { + out += 1; + } } + return out; } - return out; -} -const std::shared_ptr Slice::head() const { - if (items_.size() != 0) { - return items_[0]; - } - else { - return std::shared_ptr(nullptr); + const std::shared_ptr Slice::head() const { + if (items_.size() != 0) { + return items_[0]; + } + else { + return std::shared_ptr(nullptr); + } } -} -const Slice Slice::tail() const { - std::vector> items; - if (items_.size() != 0) { - items.insert(items.end(), items_.begin() + 1, items_.end()); + const Slice Slice::tail() const { + std::vector> items; + if (items_.size() != 0) { + items.insert(items.end(), items_.begin() + 1, items_.end()); + } + return Slice(items, true); } - return Slice(items, true); -} -const std::string Slice::tostring() const { - std::stringstream out; - out << "["; - for (size_t i = 0; i < items_.size(); i++) { - if (i != 0) { - out << ", "; + const std::string Slice::tostring() const { + std::stringstream out; + out << "["; + for (size_t i = 0; i < items_.size(); i++) { + if (i != 0) { + out << ", "; + } + out << items_[i].get()->tostring(); } - out << items_[i].get()->tostring(); + out << "]"; + return out.str(); } - out << "]"; - return out.str(); -} -void Slice::append(const std::shared_ptr& item) { - assert(!sealed_); - items_.push_back(item); -} + void Slice::append(const std::shared_ptr& item) { + assert(!sealed_); + items_.push_back(item); + } -void Slice::become_sealed() { - assert(!sealed_); + void Slice::become_sealed() { + assert(!sealed_); - std::vector shape; - for (size_t i = 0; i < items_.size(); i++) { - if (SliceArray64* array = dynamic_cast(items_[i].get())) { - if (shape.size() == 0) { - shape = array->shape(); - } - else if (shape.size() != array->ndim()) { - throw std::invalid_argument("cannot broadcast arrays in slice"); - } - else { - std::vector arrayshape = array->shape(); - for (size_t j = 0; j < shape.size(); j++) { - if (arrayshape[j] > shape[j]) { - shape[j] = arrayshape[j]; + std::vector shape; + for (size_t i = 0; i < items_.size(); i++) { + if (SliceArray64* array = dynamic_cast(items_[i].get())) { + if (shape.size() == 0) { + shape = array->shape(); + } + else if (shape.size() != array->ndim()) { + throw std::invalid_argument("cannot broadcast arrays in slice"); + } + else { + std::vector arrayshape = array->shape(); + for (size_t j = 0; j < shape.size(); j++) { + if (arrayshape[j] > shape[j]) { + shape[j] = arrayshape[j]; + } } } } } - } - if (shape.size() != 0) { - for (size_t i = 0; i < items_.size(); i++) { - if (SliceAt* at = dynamic_cast(items_[i].get())) { - Index64 index(1); - index.ptr().get()[0] = at->at(); - std::vector strides; - for (size_t j = 0; j < shape.size(); j++) { - strides.push_back(0); - } - items_[i] = std::shared_ptr(new SliceArray64(index, shape, strides)); - } - else if (SliceArray64* array = dynamic_cast(items_[i].get())) { - std::vector arrayshape = array->shape(); - std::vector arraystrides = array->strides(); - std::vector strides; - for (size_t j = 0; j < shape.size(); j++) { - if (arrayshape[j] == shape[j]) { - strides.push_back(arraystrides[j]); - } - else if (arrayshape[j] == 1) { + if (shape.size() != 0) { + for (size_t i = 0; i < items_.size(); i++) { + if (SliceAt* at = dynamic_cast(items_[i].get())) { + Index64 index(1); + index.ptr().get()[0] = at->at(); + std::vector strides; + for (size_t j = 0; j < shape.size(); j++) { strides.push_back(0); } - else { - throw std::invalid_argument("cannot broadcast arrays in slice"); + items_[i] = std::shared_ptr(new SliceArray64(index, shape, strides)); + } + else if (SliceArray64* array = dynamic_cast(items_[i].get())) { + std::vector arrayshape = array->shape(); + std::vector arraystrides = array->strides(); + std::vector strides; + for (size_t j = 0; j < shape.size(); j++) { + if (arrayshape[j] == shape[j]) { + strides.push_back(arraystrides[j]); + } + else if (arrayshape[j] == 1) { + strides.push_back(0); + } + else { + throw std::invalid_argument("cannot broadcast arrays in slice"); + } } + items_[i] = std::shared_ptr(new SliceArray64(array->index(), shape, strides)); } - items_[i] = std::shared_ptr(new SliceArray64(array->index(), shape, strides)); } - } - std::string types; - for (size_t i = 0; i < items_.size(); i++) { - if (dynamic_cast(items_[i].get()) != nullptr) { - types.push_back('@'); - } - else if (dynamic_cast(items_[i].get()) != nullptr) { - types.push_back(':'); - } - else if (dynamic_cast(items_[i].get()) != nullptr) { - types.push_back('.'); - } - else if (dynamic_cast(items_[i].get()) != nullptr) { - types.push_back('1'); - } - else if (dynamic_cast(items_[i].get()) != nullptr) { - types.push_back('A'); + std::string types; + for (size_t i = 0; i < items_.size(); i++) { + if (dynamic_cast(items_[i].get()) != nullptr) { + types.push_back('@'); + } + else if (dynamic_cast(items_[i].get()) != nullptr) { + types.push_back(':'); + } + else if (dynamic_cast(items_[i].get()) != nullptr) { + types.push_back('.'); + } + else if (dynamic_cast(items_[i].get()) != nullptr) { + types.push_back('1'); + } + else if (dynamic_cast(items_[i].get()) != nullptr) { + types.push_back('A'); + } } - } - if (std::count(types.begin(), types.end(), '.') > 1) { - throw std::invalid_argument("a slice can have no more than one ellipsis ('...')"); - } + if (std::count(types.begin(), types.end(), '.') > 1) { + throw std::invalid_argument("a slice can have no more than one ellipsis ('...')"); + } - size_t numadvanced = std::count(types.begin(), types.end(), 'A'); - if (numadvanced != 0) { - types = types.substr(0, types.find_last_of("A") + 1).substr(types.find_first_of("A")); - if (numadvanced != types.size()) { - throw std::invalid_argument("advanced indexes separated by basic indexes is not permitted (simple integers are advanced when any arrays are present)"); + size_t numadvanced = std::count(types.begin(), types.end(), 'A'); + if (numadvanced != 0) { + types = types.substr(0, types.find_last_of("A") + 1).substr(types.find_first_of("A")); + if (numadvanced != types.size()) { + throw std::invalid_argument("advanced indexes separated by basic indexes is not permitted (simple integers are advanced when any arrays are present)"); + } } } - } - sealed_ = true; -} + sealed_ = true; + } -bool Slice::isadvanced() const { - assert(sealed_); - for (size_t i = 0; i < items_.size(); i++) { - if (dynamic_cast(items_[i].get()) != nullptr) { - return true; + bool Slice::isadvanced() const { + assert(sealed_); + for (size_t i = 0; i < items_.size(); i++) { + if (dynamic_cast(items_[i].get()) != nullptr) { + return true; + } } + return false; } - return false; } diff --git a/src/pyawkward.cpp b/src/pyawkward.cpp index d15e0c9ec5..ac76080281 100644 --- a/src/pyawkward.cpp +++ b/src/pyawkward.cpp @@ -12,6 +12,7 @@ #include "awkward/Content.h" #include "awkward/Iterator.h" #include "awkward/NumpyArray.h" +#include "awkward/ListArray.h" #include "awkward/ListOffsetArray.h" namespace py = pybind11; @@ -30,7 +31,7 @@ class pyobject_deleter { PyObject* pyobj_; }; -py::object unwrap(std::shared_ptr content) { +py::object box(std::shared_ptr content) { if (ak::NumpyArray* raw = dynamic_cast(content.get())) { if (raw->isscalar()) { return py::array(py::buffer_info( @@ -46,6 +47,12 @@ py::object unwrap(std::shared_ptr content) { return py::cast(*raw); } } + else if (ak::ListArray32* raw = dynamic_cast(content.get())) { + return py::cast(*raw); + } + else if (ak::ListArray64* raw = dynamic_cast(content.get())) { + return py::cast(*raw); + } else if (ak::ListOffsetArray32* raw = dynamic_cast(content.get())) { return py::cast(*raw); } @@ -53,11 +60,11 @@ py::object unwrap(std::shared_ptr content) { return py::cast(*raw); } else { - throw std::runtime_error("missing unwrapper for Content subtype"); + throw std::runtime_error("missing boxer for Content subtype"); } } -py::object unwrap(std::shared_ptr id) { +py::object box(std::shared_ptr id) { if (id.get() == nullptr) { return py::none(); } @@ -68,45 +75,64 @@ py::object unwrap(std::shared_ptr id) { return py::cast(*raw); } else { - throw std::runtime_error("missing unwrapper for Identity subtype"); + throw std::runtime_error("missing boxer for Identity subtype"); } } -template -void setid(CONTENT& self, py::object obj) { - ak::Identity32* id32; - ak::Identity64* id64; +std::shared_ptr unbox_content(py::object obj) { try { - id32 = obj.cast(); + return obj.cast()->shallow_copy(); } - catch (py::cast_error err) { - try { - id64 = obj.cast(); - } - catch (py::cast_error err) { - throw std::invalid_argument("'id' member must be Identity32 or Identity64"); - } - if (id64->length() != self.length()) { - throw std::invalid_argument("Identity must have the same length as the Content to which it is assigned"); - } - if (id64 != nullptr) { - self.setid(std::shared_ptr(new ak::Identity64(id64->ref(), id64->fieldloc(), id64->offset(), id64->width(), id64->length(), id64->ptr()))); - return; - } + catch (py::cast_error err) { } + try { + return obj.cast()->shallow_copy(); } - if (id32 != nullptr) { - self.setid(std::shared_ptr(new ak::Identity32(id32->ref(), id32->fieldloc(), id32->offset(), id32->width(), id32->length(), id32->ptr()))); + catch (py::cast_error err) { } + try { + return obj.cast()->shallow_copy(); } - else { - self.setid(std::shared_ptr(nullptr)); + catch (py::cast_error err) { } + try { + return obj.cast()->shallow_copy(); } + catch (py::cast_error err) { } + try { + return obj.cast()->shallow_copy(); + } + catch (py::cast_error err) { } + throw std::invalid_argument("content argument must be a Content subtype"); +} + +std::shared_ptr unbox_id(py::object id) { + if (id.is(py::none())) { + return std::shared_ptr(nullptr); + } + try { + return id.cast()->shallow_copy(); + } + catch (py::cast_error err) { } + try { + return id.cast()->shallow_copy(); + } + catch (py::cast_error err) { } + throw std::invalid_argument("id argument must be an Identity subtype"); +} + +template +std::string repr(T& self) { + return self.tostring(); +} + +template +int64_t len(T& self) { + return self.length(); } /////////////////////////////////////////////////////////////// Index template py::class_> make_IndexOf(py::handle m, std::string name) { - return py::class_>(m, name.c_str(), py::buffer_protocol()) + return (py::class_>(m, name.c_str(), py::buffer_protocol()) .def_buffer([](ak::IndexOf& self) -> py::buffer_info { return py::buffer_info( reinterpret_cast(reinterpret_cast(self.ptr().get()) + self.offset()*sizeof(T)), @@ -131,28 +157,34 @@ py::class_> make_IndexOf(py::handle m, std::string name) { (T)info.shape[0]); })) - .def("__repr__", [](ak::IndexOf& self) -> const std::string { - return self.tostring(); - }) - + .def("__repr__", &ak::IndexOf::tostring) .def("__len__", &ak::IndexOf::length) - .def("__getitem__", &ak::IndexOf::get) - .def("__getitem__", [](ak::IndexOf& self, py::slice slice) -> ak::IndexOf { - size_t start, stop, step, length; - if (!slice.compute(self.length(), &start, &stop, &step, &length)) { - throw py::error_already_set(); - } - return self.slice((int64_t)start, (int64_t)stop); - }) + .def("__getitem__", &ak::IndexOf::getitem_at) + .def("__getitem__", &ak::IndexOf::getitem_range) - ; + ); } /////////////////////////////////////////////////////////////// Identity +template +py::object getid(T& self) { + return box(self.id()); +} + +template +void setid(T& self, py::object id) { + self.setid(unbox_id(id)); +} + +template +void setid_noarg(T& self) { + self.setid(); +} + template py::class_> make_IdentityOf(py::handle m, std::string name) { - return py::class_>(m, name.c_str(), py::buffer_protocol()) + return (py::class_>(m, name.c_str(), py::buffer_protocol()) .def_buffer([](ak::IdentityOf& self) -> py::buffer_info { return py::buffer_info( reinterpret_cast(reinterpret_cast(self.ptr().get()) + self.offset()*sizeof(T)), @@ -181,21 +213,10 @@ py::class_> make_IdentityOf(py::handle m, std::string name) { std::shared_ptr(reinterpret_cast(info.ptr), pyobject_deleter(array.ptr()))); })) - .def("__repr__", [](ak::IdentityOf& self) -> const std::string { - return self.tostring(); - }) - + .def("__repr__", &ak::IdentityOf::tostring) .def("__len__", &ak::IdentityOf::length) .def("__getitem__", &ak::IdentityOf::get) - .def("__getitem__", [](ak::IdentityOf& self, py::slice slice) -> ak::IdentityOf { - size_t start, stop, step, length; - if (!slice.compute(self.length(), &start, &stop, &step, &length)) { - throw py::error_already_set(); - } - std::shared_ptr out = self.slice((int64_t)start, (int64_t)stop); - ak::IdentityOf* raw = dynamic_cast*>(out.get()); - return ak::IdentityOf(raw->ref(), raw->fieldloc(), raw->offset(), raw->width(), raw->length(), raw->ptr()); - }) + .def("__getitem__", &ak::IdentityOf::getitem_range) .def_property_readonly("ref", &ak::IdentityOf::ref) .def_property_readonly("fieldloc", &ak::IdentityOf::fieldloc) @@ -205,7 +226,7 @@ py::class_> make_IdentityOf(py::handle m, std::string name) { return py::array(self); }) - ; + ); } /////////////////////////////////////////////////////////////// Slice @@ -323,7 +344,7 @@ ak::Slice toslice(py::object obj) { } py::class_ make_Slice(py::handle m, std::string name) { - return py::class_(m, name.c_str()) + return (py::class_(m, name.c_str()) .def(py::init([](py::object obj) { return toslice(obj); })) @@ -331,8 +352,7 @@ py::class_ make_Slice(py::handle m, std::string name) { .def("__repr__", [](ak::Slice& self) -> const std::string { return self.tostring(); }) - - ; + ); } /////////////////////////////////////////////////////////////// Iterator @@ -342,54 +362,70 @@ py::class_ make_Iterator(py::handle m, std::string name) { if (iterator.isdone()) { throw py::stop_iteration(); } - return unwrap(iterator.next()); + return box(iterator.next()); }; - return py::class_(m, name.c_str()) - .def(py::init([](ak::NumpyArray& content) -> ak::Iterator { - return ak::Iterator(std::shared_ptr(new ak::NumpyArray(content))); + return (py::class_(m, name.c_str()) + .def(py::init([](py::object content) -> ak::Iterator { + return ak::Iterator(unbox_content(content)); })) - .def(py::init([](ak::ListOffsetArrayOf& content) -> ak::Iterator { - return ak::Iterator(std::shared_ptr(new ak::ListOffsetArrayOf(content))); - })) - .def(py::init([](ak::ListOffsetArrayOf& content) -> ak::Iterator { - return ak::Iterator(std::shared_ptr(new ak::ListOffsetArrayOf(content))); - })) - + .def("__repr__", &ak::Iterator::tostring) .def("__next__", next) .def("next", next) - - .def("__repr__", [](ak::Iterator& self) -> const std::string { - return self.tostring(); - }) - - ; + ); } -/////////////////////////////////////////////////////////////// NumpyArray +/////////////////////////////////////////////////////////////// Content -template -ak::NumpyArray init_NumpyArray(py::array array, py::object id) { - py::buffer_info info = array.request(); - if (info.ndim == 0) { - throw std::invalid_argument("NumpyArray must not be scalar; try array.reshape(1)"); +template +py::object getitem(T& self, py::object obj) { + if (py::isinstance(obj)) { + return box(self.getitem_at(obj.cast())); } - if (info.shape.size() != info.ndim || info.strides.size() != info.ndim) { - throw std::invalid_argument("NumpyArray len(shape) != ndim or len(strides) != ndim"); + if (py::isinstance(obj)) { + py::object pystep = obj.attr("step"); + if ((py::isinstance(pystep) && pystep.cast() == 1) || pystep.is(py::none())) { + int64_t start = ak::Slice::none(); + int64_t stop = ak::Slice::none(); + py::object pystart = obj.attr("start"); + py::object pystop = obj.attr("stop"); + if (!pystart.is(py::none())) { + start = pystart.cast(); + } + if (!pystop.is(py::none())) { + stop = pystop.cast(); + } + return box(self.getitem_range(start, stop)); + } + // NOTE: control flow can pass through here; don't make the last line an 'else'! } - ak::NumpyArray out = ak::NumpyArray(std::shared_ptr(nullptr), std::shared_ptr( - reinterpret_cast(info.ptr), pyobject_deleter(array.ptr())), - info.shape, - info.strides, - 0, - info.itemsize, - info.format); - setid(out, id); - return out; + return box(self.getitem(toslice(obj))); } +template +ak::Iterator iter(T& self) { + return ak::Iterator(self.shallow_copy()); +} + +template +py::class_ content(py::class_& x) { + return x.def("__repr__", &repr) + .def_property("id", [](T& self) -> py::object { return box(self.id()); }, [](T& self, py::object id) -> void { self.setid(unbox_id(id)); }) + .def("setid", [](T& self, py::object id) -> void { + self.setid(unbox_id(id)); + }) + .def("setid", [](T& self) -> void { + self.setid(); + }) + .def("__len__", &len) + .def("__getitem__", &getitem) + .def("__iter__", &iter); +} + +/////////////////////////////////////////////////////////////// NumpyArray + py::class_ make_NumpyArray(py::handle m, std::string name) { - return py::class_(m, name.c_str(), py::buffer_protocol()) + return content(py::class_(m, name.c_str(), py::buffer_protocol()) .def_buffer([](ak::NumpyArray& self) -> py::buffer_info { return py::buffer_info( self.byteptr(), @@ -400,15 +436,22 @@ py::class_ make_NumpyArray(py::handle m, std::string name) { self.strides()); }) - .def(py::init(&init_NumpyArray), py::arg("array"), py::arg("id") = py::none()) - .def(py::init(&init_NumpyArray), py::arg("array"), py::arg("id") = py::none()) - - .def_property("id", [](ak::NumpyArray& self) -> py::object { return unwrap(self.id()); }, &setid) - .def("setid", &setid) - .def("setid", [](ak::NumpyArray& self) -> void { self.setid(); }) - .def("__repr__", [](ak::NumpyArray& self) -> const std::string { - return self.tostring(); - }) + .def(py::init([](py::array array, py::object id) -> ak::NumpyArray { + py::buffer_info info = array.request(); + if (info.ndim == 0) { + throw std::invalid_argument("NumpyArray must not be scalar; try array.reshape(1)"); + } + if (info.shape.size() != info.ndim || info.strides.size() != info.ndim) { + throw std::invalid_argument("NumpyArray len(shape) != ndim or len(strides) != ndim"); + } + return ak::NumpyArray(unbox_id(id), std::shared_ptr( + reinterpret_cast(info.ptr), pyobject_deleter(array.ptr())), + info.shape, + info.strides, + 0, + info.itemsize, + info.format); + }), py::arg("array"), py::arg("id") = py::none()) .def_property_readonly("shape", &ak::NumpyArray::shape) .def_property_readonly("strides", &ak::NumpyArray::strides) @@ -421,87 +464,44 @@ py::class_ make_NumpyArray(py::handle m, std::string name) { .def_property_readonly("iscontiguous", &ak::NumpyArray::iscontiguous) .def("contiguous", &ak::NumpyArray::contiguous) .def("become_contiguous", &ak::NumpyArray::become_contiguous) + ); +} - .def("__len__", &ak::NumpyArray::length) - .def("__getitem__", [](ak::NumpyArray& self, int64_t at) -> py::object { - return unwrap(self.get(at)); - }) - .def("__getitem__", [](ak::NumpyArray& self, py::object pyslice) -> py::object { - if (py::isinstance(pyslice)) { - py::object pystep = pyslice.attr("step"); - if ((py::isinstance(pystep) && pystep.cast() == 1) || pystep.is(py::none())) { - int64_t start = ak::Slice::none(); - int64_t stop = ak::Slice::none(); - py::object pystart = pyslice.attr("start"); - py::object pystop = pyslice.attr("stop"); - if (!pystart.is(py::none())) { - start = pystart.cast(); - } - if (!pystop.is(py::none())) { - stop = pystop.cast(); - } - return unwrap(self.slice(start, stop)); - } - } - return unwrap(self.getitem(toslice(pyslice))); - }) - .def("__iter__", [](ak::NumpyArray& self) -> ak::Iterator { - return ak::Iterator(std::shared_ptr(new ak::NumpyArray(self))); - }) +/////////////////////////////////////////////////////////////// ListArray - ; +template +py::class_> make_ListArrayOf(py::handle m, std::string name) { + return content(py::class_>(m, name.c_str()) + .def(py::init([](ak::IndexOf& starts, ak::IndexOf& stops, py::object content, py::object id) -> ak::ListArrayOf { + return ak::ListArrayOf(unbox_id(id), starts, stops, unbox_content(content)); + }), py::arg("starts"), py::arg("stops"), py::arg("content"), py::arg("id") = py::none()) + + .def_property_readonly("starts", &ak::ListArrayOf::starts) + .def_property_readonly("stops", &ak::ListArrayOf::stops) + .def_property_readonly("content", [](ak::ListArrayOf& self) -> py::object { + return box(self.content()); + }) + ); } /////////////////////////////////////////////////////////////// ListOffsetArray -template -ak::ListOffsetArrayOf init_ListOffsetArrayOf(ak::IndexOf& offsets, CONTENT& content, py::object id) { - ak::ListOffsetArrayOf out = ak::ListOffsetArrayOf(std::shared_ptr(nullptr), offsets, std::shared_ptr(content.shallow_copy())); - setid(out, id); - return out; -} - template py::class_> make_ListOffsetArrayOf(py::handle m, std::string name) { - return py::class_>(m, name.c_str()) - .def(py::init(&init_ListOffsetArrayOf), py::arg("offsets"), py::arg("content"), py::arg("id") = py::none()) - .def(py::init(&init_ListOffsetArrayOf), py::arg("offsets"), py::arg("content"), py::arg("id") = py::none()) - .def(py::init(&init_ListOffsetArrayOf), py::arg("offsets"), py::arg("content"), py::arg("id") = py::none()) - .def(py::init(&init_ListOffsetArrayOf), py::arg("offsets"), py::arg("content"), py::arg("id") = py::none()) - .def(py::init(&init_ListOffsetArrayOf), py::arg("offsets"), py::arg("content"), py::arg("id") = py::none()) - .def(py::init(&init_ListOffsetArrayOf), py::arg("offsets"), py::arg("content"), py::arg("id") = py::none()) + return content(py::class_>(m, name.c_str()) + .def(py::init([](ak::IndexOf& offsets, py::object content, py::object id) -> ak::ListOffsetArrayOf { + return ak::ListOffsetArrayOf(unbox_id(id), offsets, std::shared_ptr(unbox_content(content))); + }), py::arg("offsets"), py::arg("content"), py::arg("id") = py::none()) .def_property_readonly("offsets", &ak::ListOffsetArrayOf::offsets) .def_property_readonly("content", [](ak::ListOffsetArrayOf& self) -> py::object { - return unwrap(self.content()); + return box(self.content()); }) - - .def_property("id", [](ak::ListOffsetArrayOf& self) -> py::object { return unwrap(self.id()); }, &setid>) - .def("setid", &setid>) - .def("setid", [](ak::ListOffsetArrayOf& self) -> void { self.setid(); }) - .def("__repr__", [](ak::ListOffsetArrayOf& self) -> const std::string { - return self.tostring(); - }) - - .def("__len__", &ak::ListOffsetArrayOf::length) - .def("__getitem__", [](ak::ListOffsetArrayOf& self, int64_t at) -> py::object { - return unwrap(self.get(at)); - }) - .def("__getitem__", [](ak::ListOffsetArrayOf& self, py::slice slice) -> py::object { - size_t start, stop, step, length; - if (!slice.compute(self.length(), &start, &stop, &step, &length)) { - throw py::error_already_set(); - } - return unwrap(self.slice((int64_t)start, (int64_t)stop)); - }) - - .def("__iter__", [](ak::ListOffsetArrayOf& self) -> ak::Iterator { - return ak::Iterator(std::shared_ptr(new ak::ListOffsetArrayOf(self))); - }) - - ; + ); } +/////////////////////////////////////////////////////////////// module + PYBIND11_MODULE(layout, m) { #ifdef VERSION_INFO m.attr("__version__") = VERSION_INFO; @@ -522,6 +522,9 @@ PYBIND11_MODULE(layout, m) { make_NumpyArray(m, "NumpyArray"); + make_ListArrayOf(m, "ListArray32"); + make_ListArrayOf(m, "ListArray64"); + make_ListOffsetArrayOf(m, "ListOffsetArray32"); make_ListOffsetArrayOf(m, "ListOffsetArray64"); } diff --git a/tests/test_PR10_rawarray_getitem.cpp b/tests/test_PR10_rawarray_getitem.cpp new file mode 100644 index 0000000000..6154b112f8 --- /dev/null +++ b/tests/test_PR10_rawarray_getitem.cpp @@ -0,0 +1,58 @@ +// BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE + +#include +#include + +#include "awkward/Identity.h" +#include "awkward/RawArray.h" +#include "awkward/Slice.h" + +using namespace awkward; + +void rawarray() { + RawArrayOf data(Identity::none(), 4); + *data.borrow(0) = 0.0f; + *data.borrow(1) = 1.1f; + *data.borrow(2) = 2.2f; + *data.borrow(3) = 3.3f; + assert(*dynamic_cast*>(data.get(1).get())->borrow() == 1.1f); + assert(*dynamic_cast*>(data.slice(1, 3).get())->borrow(0) == 1.1f); + assert(*dynamic_cast*>(data.slice(1, 3).get())->borrow(1) == 2.2f); +} + +void slices() { + RawArrayOf data(Identity::none(), 9); + *data.borrow(0) = 0.0f; + *data.borrow(1) = 1.1f; + *data.borrow(2) = 2.2f; + *data.borrow(3) = 3.3f; + *data.borrow(4) = 4.4f; + *data.borrow(5) = 5.5f; + *data.borrow(6) = 6.6f; + *data.borrow(7) = 7.7f; + *data.borrow(8) = 8.8f; + *data.borrow(9) = 9.9f; + + // Slice none(std::vector>(), true); + // assert(*dynamic_cast*>(data.getitem(none).get())->borrow() == 0.0f); + + Slice at1(std::vector>({ std::shared_ptr(new SliceAt(1)) }), true); + assert(*dynamic_cast*>(data.getitem(at1).get())->borrow() == 1.1f); + Slice at2(std::vector>({ std::shared_ptr(new SliceAt(2)) }), true); + assert(*dynamic_cast*>(data.getitem(at2).get())->borrow() == 2.2f); + + Slice range1(std::vector>({ std::shared_ptr(new SliceRange(1, 3, Slice::none())) })); + assert(*dynamic_cast*>(data.getitem(range1).get())->borrow(0) == 1.1f); + + Slice range2(std::vector>({ std::shared_ptr(new SliceRange(Slice::none(), 4, 1)) })); + assert(*dynamic_cast*>(data.getitem(range2).get())->borrow(0) == 0.0f); + + // Slice range3(std::vector>({ std::shared_ptr(new SliceRange(1, Slice::none(), 2)) })); + // assert(*dynamic_cast*>(data.getitem(range3).get())->borrow(1) == 3.3f); + +} + +int main(int, char**) { + rawarray(); + slices(); +} diff --git a/tests/test_PR11_listarray.py b/tests/test_PR11_listarray.py new file mode 100644 index 0000000000..17fc93f023 --- /dev/null +++ b/tests/test_PR11_listarray.py @@ -0,0 +1,149 @@ +# BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE + +import sys + +import pytest +import numpy + +import awkward1 + +py27 = (sys.version_info[0] < 3) + +content = awkward1.layout.NumpyArray(numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9])) +starts1 = awkward1.layout.Index64(numpy.array([0, 3, 3, 5, 6])) +stops1 = awkward1.layout.Index64(numpy.array([3, 3, 5, 6, 9])) +offsets1 = awkward1.layout.Index64(numpy.array([0, 3, 3, 5, 6, 9])) +starts2 = awkward1.layout.Index64(numpy.array([0, 2, 3, 3])) +stops2 = awkward1.layout.Index64(numpy.array([2, 3, 3, 5])) +offsets2 = awkward1.layout.Index64(numpy.array([0, 2, 3, 3, 5])) + +def test_listarray_basic(): + array1 = awkward1.layout.ListArray64(starts1, stops1, content) + array2 = awkward1.layout.ListArray64(starts2, stops2, array1) + assert awkward1.tolist(array1) == [[1.1, 2.2, 3.3], [], [4.4, 5.5], [6.6], [7.7, 8.8, 9.9]] + assert awkward1.tolist(array1[2]) == [4.4, 5.5] + assert awkward1.tolist(array1[1:-1]) == [[], [4.4, 5.5], [6.6]] + assert awkward1.tolist(array2) == [[[1.1, 2.2, 3.3], []], [[4.4, 5.5]], [], [[6.6], [7.7, 8.8, 9.9]]] + assert awkward1.tolist(array2[1]) == [[4.4, 5.5]] + assert awkward1.tolist(array2[1:-1]) == [[[4.4, 5.5]], []] + +def test_listoffsetarray_basic(): + array1 = awkward1.layout.ListOffsetArray64(offsets1, content) + array2 = awkward1.layout.ListOffsetArray64(offsets2, array1) + assert awkward1.tolist(array1) == [[1.1, 2.2, 3.3], [], [4.4, 5.5], [6.6], [7.7, 8.8, 9.9]] + assert awkward1.tolist(array1[2]) == [4.4, 5.5] + assert awkward1.tolist(array1[1:-1]) == [[], [4.4, 5.5], [6.6]] + assert awkward1.tolist(array2) == [[[1.1, 2.2, 3.3], []], [[4.4, 5.5]], [], [[6.6], [7.7, 8.8, 9.9]]] + assert awkward1.tolist(array2[1]) == [[4.4, 5.5]] + assert awkward1.tolist(array2[1:-1]) == [[[4.4, 5.5]], []] + +def test_listarray_at(): + array1 = awkward1.layout.ListArray64(starts1, stops1, content) + array2 = awkward1.layout.ListArray64(starts2, stops2, array1) + assert awkward1.tolist(array1[2]) == [4.4, 5.5] + assert awkward1.tolist(array1[2,]) == [4.4, 5.5] + assert awkward1.tolist(array1[2, 1:]) == [5.5] + assert awkward1.tolist(array1[2:, 0]) == [4.4, 6.6, 7.7] + assert awkward1.tolist(array1[2:, -1]) == [5.5, 6.6, 9.9] + +def test_listoffsetarray_at(): + array1 = awkward1.layout.ListOffsetArray64(offsets1, content) + array2 = awkward1.layout.ListOffsetArray64(offsets2, array1) + assert awkward1.tolist(array1[2,]) == [4.4, 5.5] + assert awkward1.tolist(array1[2, 1:]) == [5.5] + assert awkward1.tolist(array1[2:, 0]) == [4.4, 6.6, 7.7] + assert awkward1.tolist(array1[2:, -1]) == [5.5, 6.6, 9.9] + +def test_listarray_slice(): + array1 = awkward1.layout.ListArray64(starts1, stops1, content) + array2 = awkward1.layout.ListArray64(starts2, stops2, array1) + assert awkward1.tolist(array1[1:-1]) == [[], [4.4, 5.5], [6.6]] + assert awkward1.tolist(array1[1:-1,]) == [[], [4.4, 5.5], [6.6]] + assert awkward1.tolist(array2[1:-1]) == [[[4.4, 5.5]], []] + assert awkward1.tolist(array2[1:-1,]) == [[[4.4, 5.5]], []] + +def test_listoffsetarray_slice(): + array1 = awkward1.layout.ListOffsetArray64(offsets1, content) + array2 = awkward1.layout.ListOffsetArray64(offsets2, array1) + assert awkward1.tolist(array1[1:-1]) == [[], [4.4, 5.5], [6.6]] + assert awkward1.tolist(array1[1:-1,]) == [[], [4.4, 5.5], [6.6]] + assert awkward1.tolist(array2[1:-1]) == [[[4.4, 5.5]], []] + assert awkward1.tolist(array2[1:-1,]) == [[[4.4, 5.5]], []] + +def test_listarray_slice_slice(): + array1 = awkward1.layout.ListArray64(starts1, stops1, content) + array2 = awkward1.layout.ListArray64(starts2, stops2, array1) + assert awkward1.tolist(array1[2:]) == [[4.4, 5.5], [6.6], [7.7, 8.8, 9.9]] + assert awkward1.tolist(array1[2:, 1:]) == [[5.5], [], [8.8, 9.9]] + assert awkward1.tolist(array1[2:,:-1]) == [[4.4], [], [7.7, 8.8]] + +def test_listoffsetarray_slice_slice(): + array1 = awkward1.layout.ListOffsetArray64(offsets1, content) + array2 = awkward1.layout.ListOffsetArray64(offsets2, array1) + assert awkward1.tolist(array1[2:]) == [[4.4, 5.5], [6.6], [7.7, 8.8, 9.9]] + assert awkward1.tolist(array1[2:, 1:]) == [[5.5], [], [8.8, 9.9]] + assert awkward1.tolist(array1[2:,:-1]) == [[4.4], [], [7.7, 8.8]] + +def test_listarray_ellipsis(): + array1 = awkward1.layout.ListArray64(starts1, stops1, content) + array2 = awkward1.layout.ListArray64(starts2, stops2, array1) + if not py27: + assert awkward1.tolist(array1[Ellipsis, 1:]) == [[2.2, 3.3], [], [5.5], [], [8.8, 9.9]] + assert awkward1.tolist(array2[Ellipsis, 1:]) == [[[2.2, 3.3], []], [[5.5]], [], [[], [8.8, 9.9]]] + +def test_listoffsetarray_ellipsis(): + array1 = awkward1.layout.ListOffsetArray64(offsets1, content) + array2 = awkward1.layout.ListOffsetArray64(offsets2, array1) + if not py27: + assert awkward1.tolist(array1[Ellipsis, 1:]) == [[2.2, 3.3], [], [5.5], [], [8.8, 9.9]] + assert awkward1.tolist(array2[Ellipsis, 1:]) == [[[2.2, 3.3], []], [[5.5]], [], [[], [8.8, 9.9]]] + +def test_listarray_array_slice(): + array1 = awkward1.layout.ListArray64(starts1, stops1, content) + array2 = awkward1.layout.ListArray64(starts2, stops2, array1) + assert awkward1.tolist(array2[[0, 0, 1, 1, 1, 0]]) == [[[1.1, 2.2, 3.3], []], [[1.1, 2.2, 3.3], []], [[4.4, 5.5]], [[4.4, 5.5]], [[4.4, 5.5]], [[1.1, 2.2, 3.3], []]] + assert awkward1.tolist(array2[[0, 0, 1, 1, 1, 0], :]) == [[[1.1, 2.2, 3.3], []], [[1.1, 2.2, 3.3], []], [[4.4, 5.5]], [[4.4, 5.5]], [[4.4, 5.5]], [[1.1, 2.2, 3.3], []]] + assert awkward1.tolist(array2[[0, 0, 1, 1, 1, 0], :, 1:]) == [[[2.2, 3.3], []], [[2.2, 3.3], []], [[5.5]], [[5.5]], [[5.5]], [[2.2, 3.3], []]] + +def test_listoffsetarray_array_slice(): + array1 = awkward1.layout.ListOffsetArray64(offsets1, content) + array2 = awkward1.layout.ListOffsetArray64(offsets2, array1) + assert awkward1.tolist(array2[[0, 0, 1, 1, 1, 0]]) == [[[1.1, 2.2, 3.3], []], [[1.1, 2.2, 3.3], []], [[4.4, 5.5]], [[4.4, 5.5]], [[4.4, 5.5]], [[1.1, 2.2, 3.3], []]] + assert awkward1.tolist(array2[[0, 0, 1, 1, 1, 0], :]) == [[[1.1, 2.2, 3.3], []], [[1.1, 2.2, 3.3], []], [[4.4, 5.5]], [[4.4, 5.5]], [[4.4, 5.5]], [[1.1, 2.2, 3.3], []]] + assert awkward1.tolist(array2[[0, 0, 1, 1, 1, 0], :, 1:]) == [[[2.2, 3.3], []], [[2.2, 3.3], []], [[5.5]], [[5.5]], [[5.5]], [[2.2, 3.3], []]] + +def test_listarray_array(): + array1 = awkward1.layout.ListArray64(starts1, stops1, content) + array2 = awkward1.layout.ListArray64(starts2, stops2, array1) + assert awkward1.tolist(array1[numpy.array([2, 0, 0, 1, -1])]) == [[4.4, 5.5], [1.1, 2.2, 3.3], [1.1, 2.2, 3.3], [], [7.7, 8.8, 9.9]] + assert awkward1.tolist(array1[numpy.array([2, 0, 0, -1]), numpy.array([1, 1, 0, 0])]) == [5.5, 2.2, 1.1, 7.7] + + content_deep = awkward1.layout.NumpyArray(numpy.array([[0, 0], [1, 10], [2, 20], [3, 30], [4, 40], [5, 50], [6, 60], [7, 70], [8, 80]])) + starts1_deep = awkward1.layout.Index64(numpy.array([0, 3, 6])) + stops1_deep = awkward1.layout.Index64(numpy.array([3, 6, 9])) + array1_deep = awkward1.layout.ListArray64(starts1_deep, stops1_deep, content_deep) + + assert awkward1.tolist(array1_deep) == [[[0, 0], [1, 10], [2, 20]], [[3, 30], [4, 40], [5, 50]], [[6, 60], [7, 70], [8, 80]]] + s = (numpy.array([2, 0, 0, -1]), numpy.array([1, 1, 0, 0]), numpy.array([0, 1, 0, 1])) + assert numpy.array([[[0, 0], [1, 10], [2, 20]], [[3, 30], [4, 40], [5, 50]], [[6, 60], [7, 70], [8, 80]]])[s].tolist() == awkward1.tolist(array1_deep[s]) + + s = (numpy.array([2, 0, 0, -1]), numpy.array([1, 1, 0, 0]), slice(1, None)) + assert numpy.array([[[0, 0], [1, 10], [2, 20]], [[3, 30], [4, 40], [5, 50]], [[6, 60], [7, 70], [8, 80]]])[s].tolist() == awkward1.tolist(array1_deep[s]) + +def test_listoffsetarray_array(): + array1 = awkward1.layout.ListOffsetArray64(offsets1, content) + array2 = awkward1.layout.ListOffsetArray64(offsets2, array1) + assert awkward1.tolist(array1[numpy.array([2, 0, 0, 1, -1])]) == [[4.4, 5.5], [1.1, 2.2, 3.3], [1.1, 2.2, 3.3], [], [7.7, 8.8, 9.9]] + assert awkward1.tolist(array1[numpy.array([2, 0, 0, -1]), numpy.array([1, 1, 0, 0])]) == [5.5, 2.2, 1.1, 7.7] + + content_deep = awkward1.layout.NumpyArray(numpy.array([[0, 0], [1, 10], [2, 20], [3, 30], [4, 40], [5, 50], [6, 60], [7, 70], [8, 80]])) + starts1_deep = awkward1.layout.Index64(numpy.array([0, 3, 6])) + stops1_deep = awkward1.layout.Index64(numpy.array([3, 6, 9])) + array1_deep = awkward1.layout.ListArray64(starts1_deep, stops1_deep, content_deep) + + assert awkward1.tolist(array1_deep) == [[[0, 0], [1, 10], [2, 20]], [[3, 30], [4, 40], [5, 50]], [[6, 60], [7, 70], [8, 80]]] + s = (numpy.array([2, 0, 0, -1]), numpy.array([1, 1, 0, 0]), numpy.array([0, 1, 0, 1])) + assert numpy.array([[[0, 0], [1, 10], [2, 20]], [[3, 30], [4, 40], [5, 50]], [[6, 60], [7, 70], [8, 80]]])[s].tolist() == awkward1.tolist(array1_deep[s]) + + s = (numpy.array([2, 0, 0, -1]), numpy.array([1, 1, 0, 0]), slice(1, None)) + assert numpy.array([[[0, 0], [1, 10], [2, 20]], [[3, 30], [4, 40], [5, 50]], [[6, 60], [7, 70], [8, 80]]])[s].tolist() == awkward1.tolist(array1_deep[s]) diff --git a/tests/test_PR8_rawarray_and_slices.cpp b/tests/test_PR8_rawarray_and_slices.cpp deleted file mode 100644 index 68ef507280..0000000000 --- a/tests/test_PR8_rawarray_and_slices.cpp +++ /dev/null @@ -1,38 +0,0 @@ -// BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE - -#include -#include - -#include "awkward/Identity.h" -#include "awkward/RawArray.h" -#include "awkward/Slice.h" - -using namespace awkward; - -// void rawarray() { -// RawArrayOf data(Identity::none(), 4); -// *data.borrow(0) = 0.0f; -// *data.borrow(1) = 1.1f; -// *data.borrow(2) = 2.2f; -// *data.borrow(3) = 3.3f; -// assert(*dynamic_cast*>(data.get(1).get())->borrow(0) == 1.1f); -// assert(*dynamic_cast*>(data.slice(1, 3).get())->borrow(0) == 1.1f); -// } -// -// void slices() { -// Slice slice = Slice().with(SliceAt(1)) -// .with(SliceStartStop(1, 3)) -// .with(SliceStartStop(Slice::none(), Slice::none())) -// .with(SliceStartStopStep(Slice::none(), Slice::none(), 2)) -// .with(SliceByteMask(Index8(10))) -// .with(SliceIndex32(Index32(15))) -// .with(SliceIndex64(Index64(20))) -// .with(SliceEllipsis()) -// .with(SliceNewAxis()); -// assert(slice.length() == 9); -// } - -int main(int, char**) { - // rawarray(); - // slices(); -}