diff --git a/include/hashinator/hashinator.h b/include/hashinator/hashinator.h index 85f1044..435e906 100644 --- a/include/hashinator/hashinator.h +++ b/include/hashinator/hashinator.h @@ -63,6 +63,7 @@ class Hashmap { private: // CUDA device handle Hashmap* device_map; + split::SplitVector>* device_buckets; //~CUDA device handle // Host members @@ -83,6 +84,7 @@ class Hashmap { void preallocate_device_handles() { #ifndef HASHINATOR_CPU_ONLY_MODE SPLIT_CHECK_ERR(split_gpuMalloc((void**)&device_map, sizeof(Hashmap))); + device_buckets = (split::SplitVector>*)((char*)device_map + offsetof(Hashmap, buckets)); #endif } @@ -94,6 +96,7 @@ class Hashmap { #ifndef HASHINATOR_CPU_ONLY_MODE SPLIT_CHECK_ERR(split_gpuFree(device_map)); device_map = nullptr; + device_buckets = nullptr; #endif } @@ -107,6 +110,7 @@ class Hashmap { *_mapInfo = MapInfo(5); buckets = split::SplitVector>( 1 << _mapInfo->sizePower, hash_pair(EMPTYBUCKET, VAL_TYPE())); + SPLIT_CHECK_ERR(split_gpuMemcpy(device_map, this, sizeof(Hashmap), split_gpuMemcpyHostToDevice)); }; Hashmap(int sizepower) { @@ -115,6 +119,7 @@ class Hashmap { *_mapInfo = MapInfo(sizepower); buckets = split::SplitVector>( 1 << _mapInfo->sizePower, hash_pair(EMPTYBUCKET, VAL_TYPE())); + SPLIT_CHECK_ERR(split_gpuMemcpy(device_map, this, sizeof(Hashmap), split_gpuMemcpyHostToDevice)); }; Hashmap(const Hashmap& other) { @@ -122,6 +127,7 @@ class Hashmap { _mapInfo = _metaAllocator.allocate(1); *_mapInfo = *(other._mapInfo); buckets = other.buckets; + SPLIT_CHECK_ERR(split_gpuMemcpy(device_map, this, sizeof(Hashmap), split_gpuMemcpyHostToDevice)); }; Hashmap(Hashmap&& other) { @@ -129,6 +135,7 @@ class Hashmap { _mapInfo = other._mapInfo; other._mapInfo=nullptr; buckets = std::move(other.buckets); + SPLIT_CHECK_ERR(split_gpuMemcpy(device_map, this, sizeof(Hashmap), split_gpuMemcpyHostToDevice)); }; Hashmap& operator=(const Hashmap& other) { @@ -137,6 +144,7 @@ class Hashmap { } *_mapInfo = *(other._mapInfo); buckets = other.buckets; + SPLIT_CHECK_ERR(split_gpuMemcpy(device_map, this, sizeof(Hashmap), split_gpuMemcpyHostToDevice)); return *this; } @@ -146,7 +154,8 @@ class Hashmap { return; } SPLIT_CHECK_ERR(split_gpuMemcpyAsync(_mapInfo,other._mapInfo, sizeof(MapInfo), split_gpuMemcpyDeviceToDevice, stream)); - buckets.overwrite(other.buckets); + buckets.overwrite(other.buckets, stream); + SPLIT_CHECK_ERR(split_gpuMemcpyAsync(device_map, this, sizeof(Hashmap), split_gpuMemcpyHostToDevice, stream)); return; } @@ -157,7 +166,8 @@ class Hashmap { _metaAllocator.deallocate(_mapInfo, 1); _mapInfo = other._mapInfo; other._mapInfo=nullptr; - buckets =std::move(other.buckets); + buckets = std::move(other.buckets); + SPLIT_CHECK_ERR(split_gpuMemcpy(device_map, this, sizeof(Hashmap), split_gpuMemcpyHostToDevice)); return *this; } @@ -246,11 +256,15 @@ class Hashmap { buckets = newBuckets; _mapInfo->currentMaxBucketOverflow = Hashinator::defaults::BUCKET_OVERFLOW; _mapInfo->tombstoneCounter = 0; + #ifndef HASHINATOR_CPU_ONLY_MODE + SPLIT_CHECK_ERR(split_gpuMemcpy(device_map, this, sizeof(Hashmap), split_gpuMemcpyHostToDevice)); + #endif } #ifndef HASHINATOR_CPU_ONLY_MODE // Resize the table to fit more things. This is automatically invoked once // maxBucketOverflow has triggered. This can only be done on host (so far) + template void device_rehash(int newSizePower, split_gpuStream_t s = 0) { if (newSizePower > 32) { throw std::out_of_range("Hashmap ran into rehashing catastrophe and exceeded 32bit buckets."); @@ -261,8 +275,10 @@ class Hashmap { hash_pair* validElements; SPLIT_CHECK_ERR(split_gpuMallocAsync((void**)&validElements, (_mapInfo->fill + 1) * sizeof(hash_pair), s)); - optimizeGPU(s); - SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s)); + if (prefetches) { + optimizeGPU(s); + SPLIT_CHECK_ERR(split_gpuStreamSynchronize(s)); + } auto isValidKey = [] __host__ __device__(hash_pair & element) { if (element.first != TOMBSTONE && element.first != EMPTYBUCKET) { @@ -283,10 +299,16 @@ class Hashmap { split_gpuFreeAsync(validElements, s); return; } - optimizeCPU(s); - buckets = std::move(split::SplitVector>( - 1 << newSizePower, hash_pair(EMPTYBUCKET, VAL_TYPE()))); - optimizeGPU(s); + if (newSizePower == _mapInfo->sizePower) { + // Just clear the current contents + DeviceHasher::reset_all(buckets.data(),_mapInfo, buckets.size(), s); + } else { + // Need new buckets + buckets = std::move(split::SplitVector>( + 1 << newSizePower, hash_pair(EMPTYBUCKET, VAL_TYPE()))); + SPLIT_CHECK_ERR(split_gpuMemcpyAsync(device_map, this, sizeof(Hashmap), split_gpuMemcpyHostToDevice, s)); + optimizeGPU(s); + } *_mapInfo = Info(newSizePower); // Insert valid elements to now larger buckets insert(validElements, nValidElements, 1, s); @@ -430,7 +452,8 @@ class Hashmap { return; } #else - void clear(targets t = targets::host, split_gpuStream_t s = 0, bool prefetches = true) { + template + void clear(targets t = targets::host, split_gpuStream_t s = 0, size_t len = 0) { switch (t) { case targets::host: buckets = @@ -440,9 +463,12 @@ class Hashmap { case targets::device: if (prefetches) { - buckets.optimizeGPU(s); + optimizeGPU(s); } - DeviceHasher::reset_all(buckets.data(),_mapInfo, buckets.size(), s); + if (len==0) { // If size is provided, no need to page fault size information. + len = buckets.size(); + } + DeviceHasher::reset_all(buckets.data(),_mapInfo, len, s); #ifdef HASHINATOR_DEBUG set_status((_mapInfo->fill == 0) ? success : fail); #endif @@ -561,6 +587,7 @@ class Hashmap { buckets.swap(other.buckets); std::swap(_mapInfo, other._mapInfo); std::swap(device_map, other.device_map); + std::swap(device_buckets, other.device_buckets); } #ifdef HASHINATOR_CPU_ONLY_MODE @@ -576,12 +603,13 @@ class Hashmap { } #else // Try to get the overflow back to the original one + template void performCleanupTasks(split_gpuStream_t s = 0) { - while (_mapInfo->currentMaxBucketOverflow > Hashinator::defaults::BUCKET_OVERFLOW) { - device_rehash(_mapInfo->sizePower + 1, s); - } if (tombstone_ratio() > 0.025) { - clean_tombstones(s); + clean_tombstones(s); + } + while (_mapInfo->currentMaxBucketOverflow > Hashinator::defaults::BUCKET_OVERFLOW) { + device_rehash(_mapInfo->sizePower + 1, s); } } @@ -1080,9 +1108,9 @@ class Hashmap { * Then call this: * hmap.extractPattern(elements,Rule()); * */ - template + template size_t extractPattern(split::SplitVector>& elements, Rule rule, - split_gpuStream_t s = 0, bool prefetches = true) { + split_gpuStream_t s = 0) { elements.resize(_mapInfo->fill + 1, true); if (prefetches) { elements.optimizeGPU(s); @@ -1116,7 +1144,7 @@ class Hashmap { void extractPatternLoop(split::SplitVector>& elements, Rule rule, split_gpuStream_t s = 0) { // Extract elements matching the Pattern Rule(element)==true; split::tools::copy_if_loop, Rule, defaults::MAX_BLOCKSIZE, - defaults::WARPSIZE>(buckets, elements, rule, s); + defaults::WARPSIZE>(*device_buckets, elements, rule, s); } void extractLoop(split::SplitVector>& elements, split_gpuStream_t s = 0) { // Extract all valid elements @@ -1126,9 +1154,8 @@ class Hashmap { extractPatternLoop(elements, rule, s); } - template - size_t extractKeysByPattern(split::SplitVector& elements, Rule rule, split_gpuStream_t s = 0, - bool prefetches = true) { + template + size_t extractKeysByPattern(split::SplitVector& elements, Rule rule, split_gpuStream_t s = 0) { elements.resize(_mapInfo->fill + 1, true); if (prefetches) { elements.optimizeGPU(s); @@ -1138,12 +1165,13 @@ class Hashmap { defaults::WARPSIZE>(buckets, elements, rule, s); //FIXME: there is an issue where paging to host occurs and following calls to hashmap operations take a hit. //temp fix: call optimizeGPU() here - optimizeGPU(s); + if (prefetches) { + optimizeGPU(s); + } return elements.size(); } - template - size_t extractKeysByPattern(split::SplitVector& elements, Rule rule, void *stack, size_t max_size, split_gpuStream_t s = 0, - bool prefetches = true) { + template + size_t extractKeysByPattern(split::SplitVector& elements, Rule rule, void *stack, size_t max_size, split_gpuStream_t s = 0) { elements.resize(_mapInfo->fill + 1, true); if (prefetches) { elements.optimizeGPU(s); @@ -1157,17 +1185,19 @@ class Hashmap { void extractKeysByPatternLoop(split::SplitVector& elements, Rule rule, split_gpuStream_t s = 0) { // Extract element **keys** matching the Pattern Rule(element)==true; split::tools::copy_if_keys_loop, KEY_TYPE, Rule, defaults::MAX_BLOCKSIZE, - defaults::WARPSIZE>(buckets, elements, rule, s); + defaults::WARPSIZE>(*device_buckets, elements, rule, s); } - size_t extractAllKeys(split::SplitVector& elements, split_gpuStream_t s = 0, bool prefetches = true) { + template + size_t extractAllKeys(split::SplitVector& elements, split_gpuStream_t s = 0) { // Extract all keys auto rule = [] __host__ __device__(const hash_pair& kval) -> bool { return kval.first != EMPTYBUCKET && kval.first != TOMBSTONE; }; return extractKeysByPattern(elements, rule, s, prefetches); } - size_t extractAllKeys(split::SplitVector& elements, void *stack, size_t max_size, split_gpuStream_t s = 0, bool prefetches = true) { + template + size_t extractAllKeys(split::SplitVector& elements, void *stack, size_t max_size, split_gpuStream_t s = 0) { // Extract all keys auto rule = [] __host__ __device__(const hash_pair& kval) -> bool { return kval.first != EMPTYBUCKET && kval.first != TOMBSTONE; @@ -1182,7 +1212,8 @@ class Hashmap { extractKeysByPatternLoop(elements, rule, s); } - void clean_tombstones(split_gpuStream_t s = 0, bool prefetches = false) { + template + void clean_tombstones(split_gpuStream_t s = 0) { if (_mapInfo->tombstoneCounter == 0) { return; @@ -1240,8 +1271,8 @@ class Hashmap { } // Uses Hasher's insert_kernel to insert all elements - void insert(KEY_TYPE* keys, VAL_TYPE* vals, size_t len, float targetLF = 0.5, split_gpuStream_t s = 0, - bool prefetches = true) { + template + void insert(KEY_TYPE* keys, VAL_TYPE* vals, size_t len, float targetLF = 0.5, split_gpuStream_t s = 0) { // Here we do some calculations to estimate how much if any we need to grow our buckets // TODO fix these if paths or at least annotate them . if (len == 0) { @@ -1262,7 +1293,8 @@ class Hashmap { } // Uses Hasher's insert_index_kernel to insert all elements, with the index as the value - void insertIndex(KEY_TYPE* keys, size_t len, float targetLF = 0.5, split_gpuStream_t s = 0, bool prefetches = true) { + template + void insertIndex(KEY_TYPE* keys, size_t len, float targetLF = 0.5, split_gpuStream_t s = 0) { // Here we do some calculations to estimate how much if any we need to grow our buckets // TODO fix these if paths or at least annotate them . if (len == 0) { @@ -1283,8 +1315,8 @@ class Hashmap { } // Uses Hasher's insert_kernel to insert all elements - void insert(hash_pair* src, size_t len, float targetLF = 0.5, split_gpuStream_t s = 0, - bool prefetches = true) { + template + void insert(hash_pair* src, size_t len, float targetLF = 0.5, split_gpuStream_t s = 0) { if (len == 0) { set_status(status::success); return; @@ -1303,7 +1335,8 @@ class Hashmap { } // Uses Hasher's retrieve_kernel to read all elements - void retrieve(KEY_TYPE* keys, VAL_TYPE* vals, size_t len, split_gpuStream_t s = 0,bool prefetches=true) { + template + void retrieve(KEY_TYPE* keys, VAL_TYPE* vals, size_t len, split_gpuStream_t s = 0) { if (prefetches){ buckets.optimizeGPU(s); } @@ -1313,7 +1346,8 @@ class Hashmap { } // Uses Hasher's retrieve_kernel to read all elements - void retrieve(hash_pair* src, size_t len, split_gpuStream_t s = 0, bool prefetches=true) { + template + void retrieve(hash_pair* src, size_t len, split_gpuStream_t s = 0) { if (prefetches){ buckets.optimizeGPU(s); } @@ -1322,7 +1356,8 @@ class Hashmap { } // Uses Hasher's erase_kernel to delete all elements - void erase(KEY_TYPE* keys, size_t len, split_gpuStream_t s = 0,bool prefetches=true) { + template + void erase(KEY_TYPE* keys, size_t len, split_gpuStream_t s = 0) { if (prefetches){ buckets.optimizeGPU(s); } @@ -1341,8 +1376,11 @@ class Hashmap { * The pointer is internally cleaned up by the destructors, however the user **must** * call download() after usage on device. */ + template Hashmap* upload(split_gpuStream_t stream = 0) { - optimizeGPU(stream); + if (prefetches) { + optimizeGPU(stream); + } SPLIT_CHECK_ERR(split_gpuMemcpyAsync(device_map, this, sizeof(Hashmap), split_gpuMemcpyHostToDevice, stream)); return device_map; } diff --git a/include/splitvector/split_tools.h b/include/splitvector/split_tools.h index d2f8c78..1779444 100644 --- a/include/splitvector/split_tools.h +++ b/include/splitvector/split_tools.h @@ -1005,7 +1005,7 @@ void copy_if_loop( split::SplitVector>& input, split::SplitVector>& output, Rule rule, split_gpuStream_t s = 0) { - #ifndef NDEBUG + #ifdef HASHINATOR_DEBUG bool input_ok = isDeviceAccessible( reinterpret_cast(&input)); bool output_ok= isDeviceAccessible( reinterpret_cast(&output)); assert( (input_ok && output_ok) && "This method supports splitvectors dynamically allocated on device or unified memory!"); @@ -1018,7 +1018,7 @@ void copy_if_keys_loop( split::SplitVector>& input, split::SplitVector>& output, Rule rule, split_gpuStream_t s = 0) { - #ifndef NDEBUG + #ifdef HASHINATOR_DEBUG bool input_ok = isDeviceAccessible( reinterpret_cast(&input)); bool output_ok= isDeviceAccessible( reinterpret_cast(&output)); assert( (input_ok && output_ok) && "This method supports splitvectors dynamically allocated on device or unified memory!"); diff --git a/include/splitvector/splitvec.h b/include/splitvector/splitvec.h index 84bb2a3..fa59582 100644 --- a/include/splitvector/splitvec.h +++ b/include/splitvector/splitvec.h @@ -89,6 +89,7 @@ class SplitVector { size_t _alloc_multiplier = 2; // host variable; multiplier for when reserving more space Allocator _allocator; // Allocator used to allocate and deallocate memory; Residency _location; // Flags that describes the current residency of our data + SplitVector* d_vec; // device copy pointer /** * @brief Checks if a pointer is valid and throws an exception if it's null. @@ -213,6 +214,7 @@ class SplitVector { */ HOSTONLY explicit SplitVector() : _location(Residency::host) { this->_allocate(0); // seems counter-intuitive based on stl but it is not! + d_vec = NULL; } /** @@ -220,7 +222,10 @@ class SplitVector { * * @param size The size of the SplitVector to be created. */ - HOSTONLY explicit SplitVector(size_t size) : _location(Residency::host) { this->_allocate(size); } + HOSTONLY explicit SplitVector(size_t size) : _location(Residency::host) { + this->_allocate(size); + d_vec = NULL; + } /** * @brief Constructor to create a SplitVector of a specified size with initial values. @@ -233,6 +238,7 @@ class SplitVector { for (size_t i = 0; i < size; i++) { _data[i] = val; } + d_vec = NULL; } /** @@ -269,6 +275,7 @@ class SplitVector { } copySafe(); _location = Residency::host; + d_vec = NULL; } #endif /** @@ -284,6 +291,7 @@ class SplitVector { *(other._size) = 0; other._data = nullptr; _location = other._location; + d_vec = NULL; } /** @@ -296,6 +304,7 @@ class SplitVector { for (size_t i = 0; i < size(); i++) { _data[i] = init_list.begin()[i]; } + d_vec = NULL; } /** @@ -308,12 +317,18 @@ class SplitVector { for (size_t i = 0; i < size(); i++) { _data[i] = other[i]; } + d_vec = NULL; } /** * @brief Destructor for the SplitVector. Deallocates memory. */ - HOSTONLY ~SplitVector() { _deallocate(); } + HOSTONLY ~SplitVector() { + _deallocate(); + if (d_vec != NULL) { + SPLIT_CHECK_ERR(split_gpuFree(d_vec)); + } + } /** * @brief Custom assignment operator to assign the content of another SplitVector. @@ -357,6 +372,7 @@ class SplitVector { } copySafe(); _location = Residency::host; + d_vec = NULL; return *this; } @@ -376,13 +392,17 @@ class SplitVector { if constexpr (std::is_trivially_copyable::value) { if (other._location == Residency::device) { _location = Residency::device; - optimizeGPU(stream); SPLIT_CHECK_ERR(split_gpuMemcpyAsync(_data, other._data, size() * sizeof(T), split_gpuMemcpyDeviceToDevice,stream)); + int device; + SPLIT_CHECK_ERR(split_gpuGetDevice(&device)); + SPLIT_CHECK_ERR(split_gpuMemPrefetchAsync(_size, sizeof(size_t), device, stream)); + SPLIT_CHECK_ERR(split_gpuMemPrefetchAsync(_capacity, sizeof(size_t), device, stream)); return; } } copySafe(); _location = Residency::host; + d_vec = NULL; return; } @@ -407,6 +427,7 @@ class SplitVector { *(other._size) = 0; other._data = nullptr; _location = other._location; + d_vec = NULL; return *this; } @@ -456,16 +477,24 @@ class SplitVector { * * @param stream The GPU stream to perform the upload on. * @return Pointer to the uploaded SplitVector on the GPU. - * Has to be split_gpuFree'd after use otherwise memleak (small one but still)! */ HOSTONLY SplitVector* upload(split_gpuStream_t stream = 0) { - SplitVector* d_vec; - optimizeGPU(stream); - SPLIT_CHECK_ERR(split_gpuMallocAsync((void**)&d_vec, sizeof(SplitVector), stream)); + if (d_vec == NULL) { + SPLIT_CHECK_ERR(split_gpuMallocAsync((void**)&d_vec, sizeof(SplitVector), stream)); + } SPLIT_CHECK_ERR(split_gpuMemcpyAsync(d_vec, this, sizeof(SplitVector), split_gpuMemcpyHostToDevice, stream)); return d_vec; } + /** + * @brief Returns pre-uploaded pointer to the SplitVector on the GPU. + * + * @return Pointer to the uploaded SplitVector on the GPU. + */ + HOSTONLY + SplitVector* device_pointer() { + return d_vec; + } /** * @brief Manually prefetches data to the GPU. @@ -731,6 +760,7 @@ class SplitVector { } reserve(newSize, eco); *_size = newSize; + // TODO: should it set entries to zero? } /** @@ -756,15 +786,26 @@ class SplitVector { #else /** - * @brief Reallocates data to a bigger chunk of memory. + * @brief Reallocates data to a bigger (or smaller) chunk of memory. * * @param requested_space The size of the requested space. */ HOSTONLY void reallocate(size_t requested_space, split_gpuStream_t stream = 0) { + // Store addresses + const size_t __size = *_size; + const size_t __old_capacity = *_capacity; + T* __old_data = _data; + // Verify allocation sufficiency + if (__size > requested_space) { + printf("Tried reallocating to capacity %d with size %d\n", (int)requested_space, (int)__size); + this->_deallocate(); + throw std::bad_alloc(); + } + // Check for complete deallocation if (requested_space == 0) { if (_data != nullptr) { - _deallocate_and_destroy(capacity(), _data); + _deallocate_and_destroy(__old_capacity, __old_data); } _data = nullptr; *_capacity = 0; @@ -778,28 +819,18 @@ class SplitVector { this->_deallocate(); throw std::bad_alloc(); } - // Store addresses - const size_t __size = *_size; - const size_t __old_capacity = *_capacity; T* __new_data = _new_data; - T* __data = _data; // Swap pointers & update capacity - // Size remains the same ofc _data = _new_data; *_capacity = requested_space; // Perform copy on device if (__size>0) { - int device; - SPLIT_CHECK_ERR(split_gpuGetDevice(&device)); - SPLIT_CHECK_ERR(split_gpuMemPrefetchAsync(__data, __size * sizeof(T), device, stream));// - SPLIT_CHECK_ERR(split_gpuMemPrefetchAsync(__new_data, requested_space * sizeof(T), device, stream)); - SPLIT_CHECK_ERR(split_gpuStreamSynchronize(stream)); - SPLIT_CHECK_ERR(split_gpuMemcpy(__new_data, __data, __size * sizeof(T), split_gpuMemcpyDeviceToDevice)); + SPLIT_CHECK_ERR(split_gpuMemcpyAsync(__new_data, __old_data, __size * sizeof(T), split_gpuMemcpyDeviceToDevice,stream)); SPLIT_CHECK_ERR(split_gpuStreamSynchronize(stream)); } // Deallocate old space - _deallocate_and_destroy(__old_capacity, __data); + _deallocate_and_destroy(__old_capacity, __old_data); return; } @@ -815,7 +846,11 @@ class SplitVector { */ HOSTONLY void reserve(size_t requested_space, bool eco = false, split_gpuStream_t stream = 0) { - const size_t current_space = *_capacity; + // If the users passes eco=true we allocate + // exactly what was requested + if (!eco) { + requested_space *= _alloc_multiplier; + } // Vector was default initialized if (_data == nullptr) { _deallocate(); @@ -823,23 +858,12 @@ class SplitVector { *_size = 0; return; } - // Nope. - const size_t currentSize = size(); + // Already has sufficient capacity? + const size_t current_space = *_capacity; if (requested_space <= current_space) { - if (std::is_trivially_constructible::value && _location == Residency::device) { - SPLIT_CHECK_ERR( split_gpuMemsetAsync(&_data[currentSize],0,(requested_space-currentSize)*sizeof(T), stream) ); - } else { - for (size_t i = currentSize; i < requested_space; ++i) { - _allocator.construct(&_data[i], T()); - } - } return; } - // If the users passes eco=true we allocate - // exactly what was requested - if (!eco) { - requested_space *= _alloc_multiplier; - } + // Reallocate. reallocate(requested_space,stream); return; } @@ -864,6 +888,7 @@ class SplitVector { } reserve(newSize, eco, stream); *_size = newSize; + // TODO: should it set entries to zero? } /** @@ -872,12 +897,14 @@ class SplitVector { * @param newSize The new size of the SplitVector. */ DEVICEONLY - void device_resize(size_t newSize) { + void device_resize(size_t newSize, bool construct=true) { if (newSize > capacity()) { assert(0 && "Splitvector has a catastrophic failure trying to resize on device."); } - for (size_t i = size(); i < newSize; ++i) { - _allocator.construct(&_data[i], T()); + if (construct) { + for (size_t i = size(); i < newSize; ++i) { + _allocator.construct(&_data[i], T()); + } } *_size = newSize; } @@ -901,7 +928,6 @@ class SplitVector { if (curr_cap == curr_size) { return; } - reallocate(curr_size,stream); return; } @@ -1350,7 +1376,7 @@ class SplitVector { } // Increase size; - device_resize(size() + count); + device_resize(size() + count, false); // false means don't construct base objects for (size_t i = 0; i < count; ++i) { _data[index + i] = *(p0.data() + i); } @@ -1386,11 +1412,11 @@ class SplitVector { } // Increase size; + device_resize(size() + 1, false); // false means don't construct base objects for (int64_t i = size() - 1; i >= index; i--) { _data[i + 1] = _data[i]; } _data[index] = val; - device_resize(size() + 1); return iterator(_data + index); } @@ -1414,7 +1440,7 @@ class SplitVector { "space available."); } - device_resize(newSize); + device_resize(newSize, false); // false means don't construct base objects it = begin().data() + index; iterator last = it.data() + oldsize;