Skip to content

Commit

Permalink
Finish up getitem: handle all slice types but newaxis. (#14)
Browse files Browse the repository at this point in the history
Brought `NumpyArray`, `ListArray`, and `ListOffsetArray` up-to-date on `getitem` for all cases (except `newaxis`) and tested them all as well. (Not including Numba.)

* Fix warnings on Windows 32-bit.

* Contents all have getitem_at_unsafe and getitem_range_unsafe.

* Iterators check for safety before running; normal getitem checks on demand.

* Finally rename those test_PRs to properly sort them.

* Set up tests.

* listoffsetarray1 tests work.

* Also ListArray and 32 and 64-bit.

* Doubly nested tests work, too, but Identities don't yet.

* NumpyArray, ListArray, and ListOffsetArray getitems are fully tested.

* Adopt the getitem_*_unsafe naming convention in Identity and Index, too.

* Fixed passing of Identity in both PR009 and PR014.

* Cleaned up everything; ready to finish PR.
  • Loading branch information
jpivarski authored Oct 4, 2019
1 parent af21ca9 commit 069fea6
Show file tree
Hide file tree
Showing 31 changed files with 516 additions and 88 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ The following features of awkward 0.x will be features of awkward 1.x.
* 2019-09-26 (PR [#11](../../pull/11)): fully implemented `ListArray` and `ListOffsetArray`'s `__getitem__`.
* 2019-10-02 (PR [#12](../../pull/12)): implemented `ListArray.__getitem__(array)` in Numba, setting the pattern for all the other cases.
* 2019-10-04 (PR [#13](../../pull13)): turned the `Error` type into a struct and `handle_error` into a function that provides `Identity` (if available) and attempted index information. Numba also handles errors, but with messages that must be known at compile-time.
* 2019-10-04 (PR [#14](../../pull14)): brought `NumpyArray`, `ListArray`, and `ListOffsetArray` up-to-date on `getitem` for all cases (except `newaxis`) and tested them all as well. (Not including Numba.)

## Roadmap

Expand Down
2 changes: 1 addition & 1 deletion VERSION_INFO
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.1.10
0.1.11
2 changes: 2 additions & 0 deletions awkward1/_numba/listoffsetarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,8 @@ def lower_getitem_slice(context, builder, sig, args):
proxyin = numba.cgutils.create_struct_proxy(tpe)(context, builder, value=val)

proxyslicein = numba.cgutils.create_struct_proxy(wheretpe)(context, builder, value=whereval)
numba.targets.slicing.fix_slice(builder, proxyslicein, tpe.lower_len(context, builder, numba.intp(tpe), (val,)))

proxysliceout = numba.cgutils.create_struct_proxy(numba.types.slice2_type)(context, builder)
proxysliceout.start = proxyslicein.start
proxysliceout.stop = builder.add(proxyslicein.stop, context.get_constant(numba.intp, 1))
Expand Down
3 changes: 3 additions & 0 deletions include/awkward/Content.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,11 @@ namespace awkward {
virtual const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const = 0;
virtual int64_t length() const = 0;
virtual const std::shared_ptr<Content> shallow_copy() const = 0;
virtual void checksafe() const = 0;
virtual const std::shared_ptr<Content> getitem_at(int64_t at) const = 0;
virtual const std::shared_ptr<Content> getitem_at_unsafe(int64_t at) const = 0;
virtual const std::shared_ptr<Content> getitem_range(int64_t start, int64_t stop) const = 0;
virtual const std::shared_ptr<Content> getitem_range_unsafe(int64_t start, int64_t stop) const = 0;
virtual const std::shared_ptr<Content> getitem(const Slice& where) const;
virtual const std::shared_ptr<Content> getitem_next(const std::shared_ptr<SliceItem> head, const Slice& tail, const Index64& advanced) const = 0;
virtual const std::shared_ptr<Content> carry(const Index64& carry) const = 0;
Expand Down
11 changes: 7 additions & 4 deletions include/awkward/Identity.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,12 @@ namespace awkward {
virtual const std::string location(int64_t where) const = 0;
virtual const std::shared_ptr<Identity> to64() const = 0;
virtual const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const = 0;
virtual const std::shared_ptr<Identity> getitem_range(int64_t start, int64_t stop) const = 0;
virtual const std::shared_ptr<Identity> getitem_range_unsafe(int64_t start, int64_t stop) const = 0;
virtual const std::shared_ptr<Identity> shallow_copy() const = 0;
virtual const std::shared_ptr<Identity> getitem_carry_64(const Index64& carry) const = 0;

const std::string tostring() const;

protected:
const Ref ref_;
const FieldLoc fieldloc_;
Expand All @@ -64,12 +66,13 @@ namespace awkward {
virtual const std::string location(int64_t where) const;
virtual const std::shared_ptr<Identity> to64() const;
virtual const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const;
virtual const std::shared_ptr<Identity> getitem_range(int64_t start, int64_t stop) const;
virtual const std::shared_ptr<Identity> getitem_range_unsafe(int64_t start, int64_t stop) const;
virtual const std::shared_ptr<Identity> shallow_copy() const;
virtual const std::shared_ptr<Identity> getitem_carry_64(const Index64& carry) const;

const std::string tostring() const;
const std::vector<T> get(int64_t at) const;
const std::vector<T> getitem_at(int64_t at) const;
const std::vector<T> getitem_at_unsafe(int64_t at) const;
const std::shared_ptr<Identity> getitem_range(int64_t start, int64_t stop) const;

private:
const std::shared_ptr<T> ptr_;
Expand Down
3 changes: 3 additions & 0 deletions include/awkward/Index.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,13 @@ namespace awkward {
int64_t offset() const { return offset_; }
int64_t length() const { return length_; }

const std::string classname() const;
const std::string tostring() const;
const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const;
T getitem_at(int64_t at) const;
T getitem_at_unsafe(int64_t at) const;
IndexOf<T> getitem_range(int64_t start, int64_t stop) const;
IndexOf<T> getitem_range_unsafe(int64_t start, int64_t stop) const;
virtual const std::shared_ptr<Index> shallow_copy() const;

private:
Expand Down
8 changes: 5 additions & 3 deletions include/awkward/Iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,15 @@ namespace awkward {
public:
Iterator(const std::shared_ptr<Content> content)
: content_(content)
, where_(0) { }
, where_(0) {
content.get()->checksafe();
}

const std::shared_ptr<Content> content() const { return content_; }
const int64_t where() const { return where_; }

const bool isdone() const { return where_ >= content_.get()->length(); }
const std::shared_ptr<Content> next() { return content_.get()->getitem_at(where_++); }
const bool isdone() const;
const std::shared_ptr<Content> next();

const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const;
const std::string tostring() const;
Expand Down
3 changes: 3 additions & 0 deletions include/awkward/ListArray.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,11 @@ namespace awkward {
virtual const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const;
virtual int64_t length() const;
virtual const std::shared_ptr<Content> shallow_copy() const;
virtual void checksafe() const;
virtual const std::shared_ptr<Content> getitem_at(int64_t at) const;
virtual const std::shared_ptr<Content> getitem_at_unsafe(int64_t at) const;
virtual const std::shared_ptr<Content> getitem_range(int64_t start, int64_t stop) const;
virtual const std::shared_ptr<Content> getitem_range_unsafe(int64_t start, int64_t stop) const;
virtual const std::shared_ptr<Content> getitem_next(const std::shared_ptr<SliceItem> head, const Slice& tail, const Index64& advanced) const;
virtual const std::shared_ptr<Content> carry(const Index64& carry) const;
virtual const std::pair<int64_t, int64_t> minmax_depth() const;
Expand Down
3 changes: 3 additions & 0 deletions include/awkward/ListOffsetArray.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,11 @@ namespace awkward {
virtual const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const;
virtual int64_t length() const;
virtual const std::shared_ptr<Content> shallow_copy() const;
virtual void checksafe() const;
virtual const std::shared_ptr<Content> getitem_at(int64_t at) const;
virtual const std::shared_ptr<Content> getitem_at_unsafe(int64_t at) const;
virtual const std::shared_ptr<Content> getitem_range(int64_t start, int64_t stop) const;
virtual const std::shared_ptr<Content> getitem_range_unsafe(int64_t start, int64_t stop) const;
virtual const std::shared_ptr<Content> getitem_next(const std::shared_ptr<SliceItem> head, const Slice& tail, const Index64& advanced) const;
virtual const std::shared_ptr<Content> carry(const Index64& carry) const;
virtual const std::pair<int64_t, int64_t> minmax_depth() const;
Expand Down
5 changes: 4 additions & 1 deletion include/awkward/NumpyArray.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,11 @@ namespace awkward {
virtual const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const;
virtual int64_t length() const;
virtual const std::shared_ptr<Content> shallow_copy() const;
virtual void checksafe() const;
virtual const std::shared_ptr<Content> getitem_at(int64_t at) const;
virtual const std::shared_ptr<Content> getitem_at_unsafe(int64_t at) const;
virtual const std::shared_ptr<Content> getitem_range(int64_t start, int64_t stop) const;
virtual const std::shared_ptr<Content> getitem_range_unsafe(int64_t start, int64_t stop) const;
virtual const std::shared_ptr<Content> getitem(const Slice& where) const;
virtual const std::shared_ptr<Content> getitem_next(const std::shared_ptr<SliceItem> head, const Slice& tail, const Index64& advanced) const;
virtual const std::shared_ptr<Content> carry(const Index64& carry) const;
Expand All @@ -59,7 +62,7 @@ namespace awkward {
const NumpyArray contiguous() const;
const NumpyArray contiguous_next(Index64 bytepos) const;
const NumpyArray getitem_bystrides(const std::shared_ptr<SliceItem>& head, const Slice& tail, int64_t length) const;
const NumpyArray getitem_next(const std::shared_ptr<SliceItem> head, const Slice& tail, const Index64& carry, const Index64& advanced, int64_t length, int64_t stride) const;
const NumpyArray getitem_next(const std::shared_ptr<SliceItem> head, const Slice& tail, const Index64& carry, const Index64& advanced, int64_t length, int64_t stride, bool first) const;

private:
std::shared_ptr<Identity> id_;
Expand Down
47 changes: 45 additions & 2 deletions include/awkward/RawArray.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,19 +47,29 @@ namespace awkward {
, itemsize_(sizeof(T)) { }

const std::shared_ptr<T> ptr() const { return ptr_; }

const int64_t offset() const { return offset_; }

const int64_t itemsize() const { return itemsize_; }

bool isempty() const { return length_ == 0; }

ssize_t byteoffset() const { return (ssize_t)itemsize_*(ssize_t)offset_; }

uint8_t* byteptr() const { return reinterpret_cast<uint8_t*>(reinterpret_cast<ssize_t>(ptr_.get()) + byteoffset()); }

ssize_t bytelength() const { return (ssize_t)itemsize_*(ssize_t)length_; }

uint8_t getbyte(ssize_t at) const { return *reinterpret_cast<uint8_t*>(reinterpret_cast<ssize_t>(ptr_.get()) + (ssize_t)(byteoffset() + at)); }

T* borrow() const { return borrow(0); }

T* borrow(int64_t at) const { return reinterpret_cast<T*>(reinterpret_cast<ssize_t>(ptr_.get()) + (ssize_t)itemsize_*(ssize_t)(offset_ + at)); }

virtual const std::string classname() const { return std::string("RawArrayOf<") + std::string(typeid(T).name()) + std::string(">"); }

virtual const std::shared_ptr<Identity> id() const { return id_; }

virtual void setid() {
if (length() <= kMaxInt32) {
Identity32* rawid = new Identity32(Identity::newref(), Identity::FieldLoc(), 1, length());
Expand All @@ -74,12 +84,14 @@ namespace awkward {
setid(newid);
}
}

virtual void setid(const std::shared_ptr<Identity> id) {
if (id.get() != nullptr && length() != id.get()->length()) {
throw std::invalid_argument("content and its id must have the same length");
}
id_ = id;
}

virtual const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const {
std::stringstream out;
out << indent << pre << "<RawArray of=\"" << typeid(T).name() << "\" length=\"" << length_ << "\" itemsize=\"" << itemsize_ << "\" data=\"";
Expand Down Expand Up @@ -119,28 +131,57 @@ namespace awkward {
}
return out.str();
}

virtual int64_t length() const { return length_; }

virtual const std::shared_ptr<Content> shallow_copy() const { return std::shared_ptr<Content>(new RawArrayOf<T>(id_, ptr_, offset_, length_, itemsize_)); }
virtual const std::shared_ptr<Content> getitem_at(int64_t at) const { return getitem_range(at, at + 1); }

virtual void checksafe() const {
if (id_.get() != nullptr && id_.get()->length() < length_) {
util::handle_error(failure("len(id) < len(array)", kSliceNone, kSliceNone), id_.get()->classname(), nullptr);
}
}

virtual const std::shared_ptr<Content> getitem_at(int64_t at) const {
int64_t regular_at = at;
if (regular_at < 0) {
regular_at += length_;
}
if (!(0 <= regular_at && regular_at < length_)) {
util::handle_error(failure("index out of range", kSliceNone, at), classname(), id_.get());
}
return getitem_range_unsafe(regular_at, regular_at + 1);
}

virtual const std::shared_ptr<Content> getitem_at_unsafe(int64_t at) const {
return getitem_range_unsafe(at, at + 1);
}

virtual const std::shared_ptr<Content> getitem_range(int64_t start, int64_t stop) const {
int64_t regular_start = start;
int64_t regular_stop = stop;
awkward_regularize_rangeslice(regular_start, regular_stop, true, start != Slice::none(), stop != Slice::none(), length_);
return getitem_range_unsafe(regular_start, regular_stop);
}

virtual const std::shared_ptr<Content> getitem_range_unsafe(int64_t start, int64_t stop) const {
std::shared_ptr<Identity> id(nullptr);
if (id_.get() != nullptr) {
if (regular_stop > id_.get()->length()) {
throw std::invalid_argument("index out of range for identity");
util::handle_error(failure("index out of range", kSliceNone, stop), id_.get()->classname(), nullptr);
}
id = id_.get()->getitem_range(regular_start, regular_stop);
}
return std::shared_ptr<Content>(new RawArrayOf<T>(id, ptr_, offset_ + regular_start, regular_stop - regular_start, itemsize_));
}

virtual const std::shared_ptr<Content> getitem(const Slice& where) const {
std::shared_ptr<SliceItem> nexthead = where.head();
Slice nexttail = where.tail();
Index64 nextadvanced(0);
return getitem_next(nexthead, nexttail, nextadvanced, false);
}

const std::shared_ptr<Content> getitem_next(const std::shared_ptr<SliceItem> head, const Slice& tail, const Index64& advanced) const {
if (tail.length() != 0) {
throw std::invalid_argument("too many indexes for array");
Expand Down Expand Up @@ -204,9 +245,11 @@ namespace awkward {
throw std::runtime_error("unrecognized slice item type");
}
}

virtual const std::shared_ptr<Content> carry(const Index64& carry) const {
throw std::runtime_error("RawArray<T>::carry");
}

virtual const std::pair<int64_t, int64_t> minmax_depth() const { return std::pair<int64_t, int64_t>(1, 1); }

private:
Expand Down
42 changes: 31 additions & 11 deletions src/libawkward/Identity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "awkward/cpu-kernels/identity.h"
#include "awkward/cpu-kernels/getitem.h"
#include "awkward/Slice.h"

#include "awkward/Identity.h"

Expand Down Expand Up @@ -40,8 +41,8 @@ namespace awkward {
if (bothi != 0) {
out << ", ";
}
if (fieldi < (int64_t)fieldloc_.size() && fieldloc_[fieldi].first == bothi) {
out << "\"" << fieldloc_[fieldi].second << "\"";
if (fieldi < (int64_t)fieldloc_.size() && fieldloc_[(size_t)fieldi].first == bothi) {
out << "\"" << fieldloc_[(size_t)fieldi].second << "\"";
fieldi++;
}
else {
Expand Down Expand Up @@ -88,12 +89,7 @@ namespace awkward {
}

template <typename T>
const std::string IdentityOf<T>::tostring() const {
return tostring_part("", "", "");
}

template <typename T>
const std::shared_ptr<Identity> IdentityOf<T>::getitem_range(int64_t start, int64_t stop) const {
const std::shared_ptr<Identity> IdentityOf<T>::getitem_range_unsafe(int64_t start, int64_t stop) const {
assert(0 <= start && start < length_ && 0 <= stop && stop < length_);
return std::shared_ptr<Identity>(new IdentityOf<T>(ref_, fieldloc_, offset_ + width_*start*(start != stop), width_, (stop - start), ptr_));
}
Expand Down Expand Up @@ -137,16 +133,40 @@ namespace awkward {
return out;
}

const std::string Identity::tostring() const {
return tostring_part("", "", "");
}

template <typename T>
const std::vector<T> IdentityOf<T>::get(int64_t at) const {
assert(0 <= at < length_);
const std::vector<T> IdentityOf<T>::getitem_at(int64_t at) const {
int64_t regular_at = at;
if (regular_at < 0) {
regular_at += length_;
}
if (!(0 <= regular_at && regular_at < length_)) {
util::handle_error(failure("index out of range", kSliceNone, at), classname(), nullptr);
}
return getitem_at_unsafe(regular_at);
}

template <typename T>
const std::vector<T> IdentityOf<T>::getitem_at_unsafe(int64_t at) const {
assert(0 <= at && at < length_);
std::vector<T> out;
for (size_t i = (size_t)(offset() + at); i < (size_t)(offset() + at + width()); i++) {
for (size_t i = (size_t)(offset_ + at); i < (size_t)(offset_ + at + width_); i++) {
out.push_back(ptr_.get()[i]);
}
return out;
}

template <typename T>
const std::shared_ptr<Identity> IdentityOf<T>::getitem_range(int64_t start, int64_t stop) const {
int64_t regular_start = start;
int64_t regular_stop = stop;
awkward_regularize_rangeslice(&regular_start, &regular_stop, true, start != Slice::none(), stop != Slice::none(), length_);
return getitem_range_unsafe(regular_start, regular_stop);
}

template class IdentityOf<int32_t>;
template class IdentityOf<int64_t>;
}
Loading

0 comments on commit 069fea6

Please sign in to comment.