diff --git a/Makefile b/Makefile index ab410bd..648ba70 100644 --- a/Makefile +++ b/Makefile @@ -1,18 +1,33 @@ build: ripser -all: ripser ripser-coeff ripser-debug +all: ripser ripser-coeff ripser-debug ripser-serial ripser-serial-debug +FLAGS=-Iinclude -pthread +FLAGS+=-MMD -MP + +-include *.d ripser: ripser.cpp - c++ -std=c++11 -Wall ripser.cpp -o ripser -Ofast -D NDEBUG + c++ -std=c++11 -Wall ripser.cpp -o $@ -Ofast -D NDEBUG ${FLAGS} + +ripser-serial: ripser.cpp + c++ -std=c++11 -Wall ripser.cpp -o $@ -Ofast -D NDEBUG ${FLAGS} -DUSE_SERIAL + +ripser-serial-debug: ripser.cpp + c++ -std=c++11 -Wall ripser.cpp -o $@ -g ${FLAGS} -DUSE_SERIAL ripser-coeff: ripser.cpp - c++ -std=c++11 -Wall ripser.cpp -o ripser-coeff -Ofast -D NDEBUG -D USE_COEFFICIENTS + c++ -std=c++11 -Wall ripser.cpp -o $@ -Ofast -D NDEBUG -D USE_COEFFICIENTS ${FLAGS} ripser-debug: ripser.cpp - c++ -std=c++11 -Wall ripser.cpp -o ripser-debug -g + c++ -std=c++11 -Wall ripser.cpp -o $@ -g ${FLAGS} # -fsanitize=thread -fsanitize=undefined + +ripser-tbb: ripser.cpp + c++ -std=c++11 -Wall ripser.cpp -o $@ -Ofast -D NDEBUG ${FLAGS} -DUSE_TBB -DUSE_TBB_HASHMAP -ltbb +ripser-pstl: ripser.cpp + c++ -std=c++11 -Wall ripser.cpp -o $@ -Ofast -D NDEBUG ${FLAGS} -DUSE_PARALLEL_STL -DUSE_TBB_HASHMAP -ltbb -std=c++17 clean: - rm -f ripser ripser-coeff ripser-debug + rm -f ripser ripser-* diff --git a/README.md b/README.md index 51029bf..b0725ed 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,8 @@ Copyright © 2015–2019 [Ulrich Bauer]. +This branch implements a version of the algorithm described in +[Towards Lockfree Persistent Homology](https://mrzv.org/publications/lockfree-persistence/). ### Description @@ -128,4 +130,4 @@ Ripser is licensed under the [MIT] license (`COPYING.txt`), with an extra clause [Perseus]: [GUDHI]: [sparsehash]: -[MIT]: \ No newline at end of file +[MIT]: diff --git a/include/atomic_ref.hpp b/include/atomic_ref.hpp new file mode 100644 index 0000000..1326489 --- /dev/null +++ b/include/atomic_ref.hpp @@ -0,0 +1,172 @@ +#pragma once + +// Atuhor: Dmitriy Morozov +// Version 2020-03-15 + +#if defined(USE_SERIAL_ATOMIC_REF) +#include "atomic_ref_serial.hpp" +#else + +#include +#include +#include + +namespace mrzv +{ + +template +class atomic_ref; + +template +class atomic_ref_base +{ + public: + using value_type = T; + using difference_type = value_type; + + public: + + explicit atomic_ref_base(T& obj): obj_(&obj) {} + atomic_ref_base(const atomic_ref_base& ref) noexcept =default; + + T operator=(T desired) const noexcept { store(desired); return desired; } + atomic_ref_base& + operator=(const atomic_ref_base&) =delete; + + bool is_lock_free() const noexcept { return __atomic_is_lock_free(sizeof(T), obj_); } + + void store(T desired, std::memory_order order = std::memory_order_seq_cst) const noexcept + { __atomic_store_n(obj_, desired, atomic_memory_order(order)); } + T load(std::memory_order order = std::memory_order_seq_cst) const noexcept + { return __atomic_load_n(obj_, atomic_memory_order(order)); } + + operator T() const noexcept { return load(); } + + T exchange(T desired, std::memory_order order = std::memory_order_seq_cst) const noexcept + { return __atomic_exchange_n(obj_, desired, atomic_memory_order(order)); } + + bool compare_exchange_weak(T& expected, T desired, + std::memory_order success, + std::memory_order failure) const noexcept + { return __atomic_compare_exchange_n(obj_, &expected, desired, true, atomic_memory_order(success), atomic_memory_order(failure)); } + + bool compare_exchange_weak(T& expected, T desired, + std::memory_order order = std::memory_order_seq_cst ) const noexcept + { return compare_exchange_weak(expected, desired, order, order); } // TODO: not quite this simple + + bool compare_exchange_strong(T& expected, T desired, + std::memory_order success, + std::memory_order failure) const noexcept + { return __atomic_compare_exchange_n(obj_, &expected, desired, false, atomic_memory_order(success), atomic_memory_order(failure)); } + + + bool compare_exchange_strong(T& expected, T desired, + std::memory_order order = std::memory_order_seq_cst) const noexcept + { return compare_exchange_strong(expected, desired, order, order); } // TODO: not quite this simple + + // would be great to have wait and notify, but unclear how to implement them efficiently with __atomic + //void wait(T old, std::memory_order order = std::memory_order::seq_cst) const noexcept; + //void wait(T old, std::memory_order order = std::memory_order::seq_cst) const volatile noexcept; + //void notify_one() const noexcept; + //void notify_one() const volatile noexcept; + //void notify_all() const noexcept; + //void notify_all() const volatile noexcept; + + protected: + int atomic_memory_order(std::memory_order order) const + { + if (order == std::memory_order_relaxed) + return __ATOMIC_RELAXED; + else if (order == std::memory_order_consume) + return __ATOMIC_CONSUME; + else if (order == std::memory_order_acquire) + return __ATOMIC_ACQUIRE; + else if (order == std::memory_order_release) + return __ATOMIC_RELEASE; + else if (order == std::memory_order_acq_rel) + return __ATOMIC_ACQ_REL; + else if (order == std::memory_order_seq_cst) + return __ATOMIC_SEQ_CST; + assert(0); + return __ATOMIC_RELAXED; + } + + protected: + T* obj_; +}; + +template +class atomic_ref::value>::type>: public atomic_ref_base +{ + public: + using Parent = atomic_ref_base; + using value_type = typename Parent::value_type; + using difference_type = typename Parent::difference_type; + + public: + using Parent::Parent; + + value_type operator++() const noexcept { return fetch_add(1) + 1; } + value_type operator++(int) const noexcept { return fetch_add(1); } + value_type operator--() const noexcept { return fetch_sub(1) - 1; } + value_type operator--(int) const noexcept { return fetch_sub(1); } + + T operator+=( T arg ) const noexcept { return fetch_add(arg) + arg; } + T operator-=( T arg ) const noexcept { return fetch_sub(arg) - arg; } + T operator&=( T arg ) const noexcept { return fetch_and(arg) & arg; } + T operator|=( T arg ) const noexcept { return fetch_or(arg) | arg; } + T operator^=( T arg ) const noexcept { return fetch_xor(arg) ^ arg; } + + T fetch_add(T arg, std::memory_order order = std::memory_order_seq_cst) const noexcept + { return __atomic_fetch_add(obj_, arg, atomic_memory_order(order)); } + + T fetch_sub(T arg, std::memory_order order = std::memory_order_seq_cst) const noexcept + { return __atomic_fetch_sub(obj_, arg, atomic_memory_order(order)); } + + T fetch_and(T arg, std::memory_order order = std::memory_order_seq_cst) const noexcept + { return __atomic_fetch_and(obj_, arg, atomic_memory_order(order)); } + + T fetch_or(T arg, std::memory_order order = std::memory_order_seq_cst) const noexcept + { return __atomic_fetch_or(obj_, arg, atomic_memory_order(order)); } + + T fetch_xor(T arg, std::memory_order order = std::memory_order_seq_cst) const noexcept + { return __atomic_fetch_xor(obj_, arg, atomic_memory_order(order)); } + + protected: + using Parent::obj_; + using Parent::atomic_memory_order; +}; + +template +class atomic_ref: public atomic_ref_base +{ + public: + using Parent = atomic_ref_base; + using value_type = typename Parent::value_type; + using difference_type = typename Parent::difference_type; + + public: + using Parent::Parent; + + value_type operator++() const noexcept { return fetch_add(1) + 1; } + value_type operator++(int) const noexcept { return fetch_add(1); } + value_type operator--() const noexcept { return fetch_sub(1) - 1; } + value_type operator--(int) const noexcept { return fetch_sub(1); } + + T* operator+=(std::ptrdiff_t arg) const noexcept { return fetch_add(arg) + arg; } + T* operator-=(std::ptrdiff_t arg) const noexcept { return fetch_sub(arg) - arg; } + + T* fetch_add(std::ptrdiff_t arg, std::memory_order order = std::memory_order_seq_cst) const noexcept + { return __atomic_fetch_add(obj_, arg * sizeof(T), atomic_memory_order(order)); } + + T* fetch_sub(std::ptrdiff_t arg, std::memory_order order = std::memory_order_seq_cst) const noexcept + { return __atomic_fetch_sub(obj_, arg * sizeof(T), atomic_memory_order(order)); } + + protected: + using Parent::obj_; + using Parent::atomic_memory_order; +}; + +} + +#endif // USE_SERIAL_ATOMIC_REF diff --git a/include/atomic_ref_serial.hpp b/include/atomic_ref_serial.hpp new file mode 100644 index 0000000..beff4cf --- /dev/null +++ b/include/atomic_ref_serial.hpp @@ -0,0 +1,148 @@ +#pragma once + +#pragma message "Using serial atomic_ref" + +// Atuhor: Dmitriy Morozov +// Version 2020-03-15 + +#include +#include +#include + +namespace mrzv +{ + +template +class atomic_ref; + +template +class atomic_ref_base +{ + public: + using value_type = T; + using difference_type = value_type; + + public: + + explicit atomic_ref_base(T& obj): obj_(&obj) {} + atomic_ref_base(const atomic_ref_base& ref) noexcept =default; + + T operator=(T desired) const noexcept { store(desired); return desired; } + atomic_ref_base& + operator=(const atomic_ref_base&) =delete; + + bool is_lock_free() const noexcept { return true; } + + void store(T desired, std::memory_order order = std::memory_order_seq_cst) const noexcept + { *obj_ = desired; } + T load(std::memory_order order = std::memory_order_seq_cst) const noexcept + { return *obj_; } + + operator T() const noexcept { return load(); } + + T exchange(T desired, std::memory_order order = std::memory_order_seq_cst) const noexcept + { std::swap(*obj_, desired); return desired; } + + // Technically, not quite CAS, but simplified for the only meaningful scenario I can think of + bool compare_exchange_weak(T& expected, T desired, + std::memory_order, + std::memory_order) const noexcept + { assert(*obj_ == expected); *obj_ = desired; return true; } + + bool compare_exchange_weak(T& expected, T desired, + std::memory_order order = std::memory_order_seq_cst ) const noexcept + { assert(*obj_ == expected); *obj_ = desired; return true; } + + bool compare_exchange_strong(T& expected, T desired, + std::memory_order, + std::memory_order) const noexcept + { assert(*obj_ == expected); *obj_ = desired; return true; } + + + bool compare_exchange_strong(T& expected, T desired, + std::memory_order order = std::memory_order_seq_cst) const noexcept + { assert(*obj_ == expected); *obj_ = desired; return true; } + + // would be great to have wait and notify, but unclear how to implement them efficiently with __atomic + //void wait(T old, std::memory_order order = std::memory_order::seq_cst) const noexcept; + //void wait(T old, std::memory_order order = std::memory_order::seq_cst) const volatile noexcept; + //void notify_one() const noexcept; + //void notify_one() const volatile noexcept; + //void notify_all() const noexcept; + //void notify_all() const volatile noexcept; + + protected: + T* obj_; +}; + +template +class atomic_ref::value>::type>: public atomic_ref_base +{ + public: + using Parent = atomic_ref_base; + using value_type = typename Parent::value_type; + using difference_type = typename Parent::difference_type; + + public: + using Parent::Parent; + + value_type operator++() const noexcept { return fetch_add(1) + 1; } + value_type operator++(int) const noexcept { return fetch_add(1); } + value_type operator--() const noexcept { return fetch_sub(1) - 1; } + value_type operator--(int) const noexcept { return fetch_sub(1); } + + T operator+=( T arg ) const noexcept { return fetch_add(arg) + arg; } + T operator-=( T arg ) const noexcept { return fetch_sub(arg) - arg; } + T operator&=( T arg ) const noexcept { return fetch_and(arg) & arg; } + T operator|=( T arg ) const noexcept { return fetch_or(arg) | arg; } + T operator^=( T arg ) const noexcept { return fetch_xor(arg) ^ arg; } + + T fetch_add(T arg, std::memory_order order = std::memory_order_seq_cst) const noexcept + { T result = *obj_; *obj_ += arg; return result; } + + T fetch_sub(T arg, std::memory_order order = std::memory_order_seq_cst) const noexcept + { T result = *obj_; *obj_ -= arg; return result; } + + T fetch_and(T arg, std::memory_order order = std::memory_order_seq_cst) const noexcept + { T result = *obj_; *obj_ &= arg; return result; } + + T fetch_or(T arg, std::memory_order order = std::memory_order_seq_cst) const noexcept + { T result = *obj_; *obj_ |= arg; return result; } + + T fetch_xor(T arg, std::memory_order order = std::memory_order_seq_cst) const noexcept + { T result = *obj_; *obj_ ^= arg; return result; } + + protected: + using Parent::obj_; +}; + +template +class atomic_ref: public atomic_ref_base +{ + public: + using Parent = atomic_ref_base; + using value_type = typename Parent::value_type; + using difference_type = typename Parent::difference_type; + + public: + using Parent::Parent; + + value_type operator++() const noexcept { return fetch_add(1) + 1; } + value_type operator++(int) const noexcept { return fetch_add(1); } + value_type operator--() const noexcept { return fetch_sub(1) - 1; } + value_type operator--(int) const noexcept { return fetch_sub(1); } + + T* operator+=(std::ptrdiff_t arg) const noexcept { return fetch_add(arg) + arg; } + T* operator-=(std::ptrdiff_t arg) const noexcept { return fetch_sub(arg) - arg; } + + T* fetch_add(std::ptrdiff_t arg, std::memory_order order = std::memory_order_seq_cst) const noexcept + { T* result = *obj_; *obj_ += arg; return result; } + + T* fetch_sub(std::ptrdiff_t arg, std::memory_order order = std::memory_order_seq_cst) const noexcept + { T* result = *obj_; *obj_ -= arg; return result; } + + protected: + using Parent::obj_; +}; + +} diff --git a/include/boost/sort/parallel/detail/bis/backbone.hpp b/include/boost/sort/parallel/detail/bis/backbone.hpp new file mode 100755 index 0000000..b56c3e8 --- /dev/null +++ b/include/boost/sort/parallel/detail/bis/backbone.hpp @@ -0,0 +1,223 @@ +//---------------------------------------------------------------------------- +/// @file backbone.hpp +/// @brief This file constains the class backbone, which is part of the +/// block_indirect_sort algorithm +/// +/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n +/// Distributed under the Boost Software License, Version 1.0.\n +/// ( See accompanying file LICENSE_1_0.txt or copy at +/// http://www.boost.org/LICENSE_1_0.txt ) +/// @version 0.1 +/// +/// @remarks +//----------------------------------------------------------------------------- +#ifndef __BOOST_SORT_PARALLEL_DETAIL_BIS_BACKBONE_HPP +#define __BOOST_SORT_PARALLEL_DETAIL_BIS_BACKBONE_HPP + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace boost +{ +namespace sort +{ +namespace parallel +{ +namespace detail +{ +namespace bis +{ + +//--------------------------------------------------------------------------- +// USING SENTENCES +//--------------------------------------------------------------------------- +using boost::sort::parallel::detail::util::stack_cnc; + +///--------------------------------------------------------------------------- +/// @struct backbone +/// @brief This contains all the information shared betwen the classes of the +/// block indirect sort algorithm + +//---------------------------------------------------------------------------- +template < uint32_t Block_size, class Iter_t, class Compare > +struct backbone +{ + //------------------------------------------------------------------------- + // D E F I N I T I O N S + //------------------------------------------------------------------------- + typedef typename std::iterator_traits< Iter_t >::value_type value_t; + typedef std::atomic< uint32_t > atomic_t; + typedef range< size_t > range_pos; + typedef range< Iter_t > range_it; + typedef range< value_t * > range_buf; + typedef std::function< void(void) > function_t; + typedef block< Block_size, Iter_t > block_t; + + //------------------------------------------------------------------------ + // V A R I A B L E S + //------------------------------------------------------------------------ + // range with all the element to sort + util::range< Iter_t > global_range; + + // index vector of block_pos elements + std::vector< block_pos > index; + + // Number of elements to sort + size_t nelem; + + // Number of blocks to sort + size_t nblock; + + // Number of elements in the last block (tail) + size_t ntail; + + // object for to compare two elements + Compare cmp; + + // range of elements of the last block (tail) + range_it range_tail; + + // thread local varible. It is a pointer to the buffer + static thread_local value_t *buf; + + // concurrent stack where store the function_t elements + stack_cnc< function_t > works; + + // global indicator of error + bool error; + // + //------------------------------------------------------------------------ + // F U N C T I O N S + //------------------------------------------------------------------------ + backbone (Iter_t first, Iter_t last, Compare comp); + + //------------------------------------------------------------------------ + // function : get_block + /// @brief obtain the block in the position pos + /// @param pos : position of the range + /// @return block required + //------------------------------------------------------------------------ + block_t get_block (size_t pos) const + { + return block_t (global_range.first + (pos * Block_size)); + }; + //------------------------------------------------------------------------- + // function : get_range + /// @brief obtain the range in the position pos + /// @param pos : position of the range + /// @return range required + //------------------------------------------------------------------------- + range_it get_range (size_t pos) const + { + Iter_t it1 = global_range.first + (pos * Block_size); + Iter_t it2 = + (pos == (nblock - 1)) ? global_range.last : it1 + Block_size; + return range_it (it1, it2); + }; + //------------------------------------------------------------------------- + // function : get_range_buf + /// @brief obtain the auxiliary buffer of the thread + //------------------------------------------------------------------------- + range_buf get_range_buf ( ) const + { + return range_buf (buf, buf + Block_size); + }; + + //------------------------------------------------------------------------- + // function : exec + /// @brief Initialize the thread local buffer with the ptr_buf pointer, + /// and begin with the execution of the functions stored in works + // + /// @param ptr_buf : Pointer to the memory assigned to the thread_local + /// buffer + /// @param counter : atomic counter for to invoke to the exec function + /// with only 1 parameter + //------------------------------------------------------------------------- + void exec (value_t *ptr_buf, atomic_t &counter) + { + buf = ptr_buf; + exec (counter); + }; + + void exec (atomic_t &counter); + +//--------------------------------------------------------------------------- +}; // end struct backbone +//--------------------------------------------------------------------------- +// +//############################################################################ +// ## +// ## +// N O N I N L I N E F U N C T I O N S ## +// ## +// ## +//############################################################################ +// +// initialization of the thread_local pointer to the auxiliary buffer +template < uint32_t Block_size, class Iter_t, class Compare > +thread_local typename std::iterator_traits< Iter_t >::value_type + *backbone< Block_size, Iter_t, Compare >::buf = nullptr; + +//------------------------------------------------------------------------ +// function : backbone +/// @brief constructor of the class +// +/// @param first : iterator to the first element of the range to sort +/// @param last : iterator after the last element to the range to sort +/// @param comp : object for to compare two elements pointed by Iter_t +/// iterators +//------------------------------------------------------------------------ +template < uint32_t Block_size, class Iter_t, class Compare > +backbone< Block_size, Iter_t, Compare > + ::backbone (Iter_t first, Iter_t last, Compare comp) + : global_range (first, last), cmp (comp), error (false) +{ + assert ((last - first) >= 0); + if (first == last) return; // nothing to do + + nelem = size_t (last - first); + nblock = (nelem + Block_size - 1) / Block_size; + ntail = (nelem % Block_size); + index.reserve (nblock + 1); + + for (size_t i = 0; i < nblock; ++i) index.emplace_back (block_pos (i)); + + range_tail.first = + (ntail == 0) ? last : (first + ((nblock - 1) * Block_size)); + range_tail.last = last; +}; +// +//------------------------------------------------------------------------- +// function : exec +/// @brief execute the function_t stored in works, until counter is zero +// +/// @param counter : atomic counter. When 0 exits the function +//------------------------------------------------------------------------- +template < uint32_t Block_size, class Iter_t, class Compare > +void backbone< Block_size, Iter_t, Compare >::exec (atomic_t &counter) +{ + function_t func_exec; + while (util::atomic_read (counter) != 0) { + if (works.pop_move_back (func_exec)) + func_exec ( ); + else + std::this_thread::yield ( ); + }; +}; +// +//**************************************************************************** +}; // End namespace bis +}; // End namespace detail +}; // End namespace parallel +}; // End namespace sort +}; // End namespace boost +//**************************************************************************** +#endif diff --git a/include/boost/sort/parallel/detail/bis/block.hpp b/include/boost/sort/parallel/detail/bis/block.hpp new file mode 100755 index 0000000..adc68b1 --- /dev/null +++ b/include/boost/sort/parallel/detail/bis/block.hpp @@ -0,0 +1,186 @@ +//---------------------------------------------------------------------------- +/// @file block.hpp +/// @brief This file contains the internal data structures used in the +/// block_indirect_sort algorithm +/// +/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n +/// Distributed under the Boost Software License, Version 1.0.\n +/// ( See accompanying file LICENSE_1_0.txt or copy at +/// http://www.boost.org/LICENSE_1_0.txt ) +/// @version 0.1 +/// +/// @remarks +//----------------------------------------------------------------------------- +#ifndef __BOOST_SORT_PARALLEL_DETAIL_BIS_BLOCK_HPP +#define __BOOST_SORT_PARALLEL_DETAIL_BIS_BLOCK_HPP + +#include + +namespace boost +{ +namespace sort +{ +namespace parallel +{ +namespace detail +{ +namespace bis +{ +//--------------------------------------------------------------------------- +// USING SENTENCES +//--------------------------------------------------------------------------- +using boost::sort::parallel::detail::util::range; +// +//--------------------------------------------------------------------------- +/// @struct block_pos +/// @brief represent a pair of values, a position represented as an unsigned +/// variable ( position ), and a bool variable ( side ). They are packed +/// in a size_t variable. The Least Significant Bit is the bool variable, +/// and the others bits are the position +//---------------------------------------------------------------------------- +class block_pos +{ + //------------------------------------------------------------------------ + // VARIABLES + //----------------------------------------------------------------------- + size_t num; // number which store a position and a bool side + + public: + //----------------------------- FUNCTIONS ------------------------------ + block_pos (void) : num (0){}; + // + //------------------------------------------------------------------------- + // function : block_pos + /// @brief constructor from a position and a side + /// @param position : position to sotre + /// @param side : side to store + //------------------------------------------------------------------------- + block_pos (size_t position, bool side = false) + { + num = (position << 1) + ((side) ? 1 : 0); + }; + // + //------------------------------------------------------------------------- + // function : pos + /// @brief obtain the position stored inside the block_pos + /// @return position + //------------------------------------------------------------------------- + size_t pos (void) const { return (num >> 1); }; + // + //------------------------------------------------------------------------- + // function : pos + /// @brief store a position inside the block_pos + /// @param position : value to store + //------------------------------------------------------------------------- + void set_pos (size_t position) { num = (position << 1) + (num & 1); }; + // + //------------------------------------------------------------------------- + // function : side + /// @brief obtain the side stored inside the block_pos + /// @return bool value + //------------------------------------------------------------------------- + bool side (void) const { return ((num & 1) != 0); }; + // + //------------------------------------------------------------------------- + // function : side + /// @brief store a bool value the block_pos + /// @param sd : bool value to store + //------------------------------------------------------------------------- + void set_side (bool sd) { num = (num & ~1) + ((sd) ? 1 : 0); }; +}; // end struct block_pos + +// +//--------------------------------------------------------------------------- +/// @struct block +/// @brief represent a group of Block_size contiguous elements, beginning +/// with the pointed by first +//---------------------------------------------------------------------------- +template < uint32_t Block_size, class Iter_t > +struct block +{ + //---------------------------------------------------------------------- + // VARIABLES + //---------------------------------------------------------------------- + Iter_t first; // iterator to the first element of the block + + //------------------------------------------------------------------------- + // function : block + /// @brief constructor from an iterator to the first element of the block + /// @param it : iterator to the first element of the block + //------------------------------------------------------------------------- + block (Iter_t it) : first (it){}; + + //------------------------------------------------------------------------- + // function : get_range + /// @brief convert a block in a range + /// @return range + //------------------------------------------------------------------------- + range< Iter_t > get_range (void) + { + return range_it (first, first + Block_size); + }; + +}; // end struct block + +// +//------------------------------------------------------------------------- +// function : compare_block +/// @brief compare two blocks using the content of the pointed by first +/// @param block1 : first block to compare +/// @param block2 : second block to compare +/// @param cmp : comparison operator +//------------------------------------------------------------------------- +template < uint32_t Block_size, class Iter_t, class Compare > +bool compare_block (block< Block_size, Iter_t > block1, + block< Block_size, Iter_t > block2, + Compare cmp = Compare ( )) +{ + return cmp (*block1.first, *block2.first); +}; +// +///--------------------------------------------------------------------------- +/// @struct compare_block_pos +/// @brief This is a object for to compare two block_pos objects +//---------------------------------------------------------------------------- +template < uint32_t Block_size, class Iter_t, class Compare > +struct compare_block_pos +{ + //----------------------------------------------------------------------- + // VARIABLES + //----------------------------------------------------------------------- + Iter_t global_first; // iterator to the first element to sort + Compare comp; // comparison object for to compare two elements + + //------------------------------------------------------------------------- + // function : compare_block_pos + /// @brief constructor + /// @param g_first : itertor to the first element to sort + /// @param cmp : comparison operator + //------------------------------------------------------------------------- + compare_block_pos (Iter_t g_first, Compare cmp) + : global_first (g_first), comp (cmp){}; + // + //------------------------------------------------------------------------- + // function : operator () + /// @brief compare two blocks using the content of the pointed by + /// global_first + /// @param block_pos1 : first block to compare + /// @param block_pos2 : second block to compare + //------------------------------------------------------------------------- + bool operator( ) (block_pos block_pos1, block_pos block_pos2) const + { + return comp (*(global_first + (block_pos1.pos ( ) * Block_size)), + *(global_first + (block_pos2.pos ( ) * Block_size))); + }; + +}; // end struct compare_block_pos + +//**************************************************************************** +}; // End namespace bis +}; // End namespace detail +}; // End namespace parallel +}; // End namespace sort +}; // End namespace boost +//**************************************************************************** +// +#endif diff --git a/include/boost/sort/parallel/detail/bis/merge_blocks.hpp b/include/boost/sort/parallel/detail/bis/merge_blocks.hpp new file mode 100755 index 0000000..7210cfe --- /dev/null +++ b/include/boost/sort/parallel/detail/bis/merge_blocks.hpp @@ -0,0 +1,407 @@ +//---------------------------------------------------------------------------- +/// @file merge_blocks.hpp +/// @brief contains the class merge_blocks, which is part of the +/// block_indirect_sort algorithm +/// +/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n +/// Distributed under the Boost Software License, Version 1.0.\n +/// ( See accompanying file LICENSE_1_0.txt or copy at +/// http://www.boost.org/LICENSE_1_0.txt ) +/// @version 0.1 +/// +/// @remarks +//----------------------------------------------------------------------------- +#ifndef __BOOST_SORT_PARALLEL_DETAIL_BIS_MERGE_BLOCKS_HPP +#define __BOOST_SORT_PARALLEL_DETAIL_BIS_MERGE_BLOCKS_HPP + +#include +#include +#include +#include +#include + +namespace boost +{ +namespace sort +{ +namespace parallel +{ +namespace detail +{ +namespace bis +{ +// +///--------------------------------------------------------------------------- +/// @struct merge_blocks +/// @brief This class merge the blocks. The blocks to merge are defined by two +/// ranges of positions in the index of the backbone +//---------------------------------------------------------------------------- +template < uint32_t Block_size, uint32_t Group_size, class Iter_t, + class Compare > +struct merge_blocks +{ + //----------------------------------------------------------------------- + // D E F I N I T I O N S + //----------------------------------------------------------------------- + typedef typename std::iterator_traits< Iter_t >::value_type value_t; + typedef std::atomic< uint32_t > atomic_t; + typedef util::range< size_t > range_pos; + typedef util::range< Iter_t > range_it; + typedef util::range< value_t * > range_buf; + typedef std::function< void(void) > function_t; + typedef backbone< Block_size, Iter_t, Compare > backbone_t; + typedef compare_block_pos< Block_size, Iter_t, Compare > + compare_block_pos_t; + + //------------------------------------------------------------------------ + // V A R I A B L E S + //------------------------------------------------------------------------ + // Object with the elements to sort and all internal data structures of the + // algorithm + backbone_t &bk; + // + //------------------------------------------------------------------------ + // F U N C T I O N S + //------------------------------------------------------------------------ + merge_blocks (backbone_t &bkb, size_t pos_index1, size_t pos_index2, + size_t pos_index3); + + void tail_process (std::vector< block_pos > &vblkpos1, + std::vector< block_pos > &vblkpos2); + + void cut_range (range_pos rng); + + void merge_range_pos (range_pos rng); + + void extract_ranges (range_pos range_input); + // + //------------------------------------------------------------------------ + // function : function_merge_range_pos + /// @brief create a function_t with a call to merge_range_pos, and insert + /// in the stack of the backbone + // + /// @param rng_input : range of positions of blocks in the index to merge + /// @param son_counter : atomic variable which is decremented when finish + /// the function. This variable is used for to know + /// when are finished all the function_t created + /// inside an object + /// @param error : global indicator of error. + /// + //------------------------------------------------------------------------ + void function_merge_range_pos (const range_pos &rng_input, + atomic_t &counter, bool &error) + { + util::atomic_add (counter, 1); + function_t f1 = [this, rng_input, &counter, &error]( ) -> void { + if (not error) { + try + { + this->merge_range_pos (rng_input); + } + catch (std::bad_alloc &ba) + { + error = true; + }; + } + util::atomic_sub (counter, 1); + }; + bk.works.emplace_back (f1); + }; + // + //------------------------------------------------------------------------ + // function : function_cut_range + /// @brief create a function_t with a call to cut_range, and inser in + /// the stack of the backbone + // + /// @param rng_input : range of positions in the index to cut + /// @param counter : atomic variable which is decremented when finish + /// the function. This variable is used for to know + /// when are finished all the function_t created + /// inside an object + /// @param error : global indicator of error. + //------------------------------------------------------------------------ + void function_cut_range (const range_pos &rng_input, atomic_t &counter, + bool &error) + { + util::atomic_add (counter, 1); + function_t f1 = [this, rng_input, &counter, &error]( ) -> void { + if (not error) { + try + { + this->cut_range (rng_input); + } + catch (std::bad_alloc &) + { + error = true; + }; + } + util::atomic_sub (counter, 1); + }; + bk.works.emplace_back (f1); + }; + +//---------------------------------------------------------------------------- +}; // end struct merge_blocks +//---------------------------------------------------------------------------- +// +//############################################################################ +// ## +// ## +// N O N I N L I N E F U N C T I O N S ## +// ## +// ## +//############################################################################ +// +//------------------------------------------------------------------------- +// function : merge_blocks +/// @brief make the indirect merge of the two range_pos defined by their index +/// position [pos_index1, pos_index2 ) and [ pos_index2, pos_index3 ) +// +/// @param bkb : backbone with all the data to sort , and the internal data +/// structures of the algorithm +/// @param pos_index1 : first position of the first range in the index +/// @param pos_index2 : last position of the first range and first position +/// of the second range in the index +/// @param pos_index3 : last position of the second range in the index +//------------------------------------------------------------------------- +template < uint32_t Block_size, uint32_t Group_size, class Iter_t, + class Compare > +merge_blocks< Block_size, Group_size, Iter_t, Compare > + ::merge_blocks (backbone_t &bkb, size_t pos_index1, + size_t pos_index2, size_t pos_index3) : bk (bkb) +{ + size_t nblock1 = pos_index2 - pos_index1; + size_t nblock2 = pos_index3 - pos_index2; + if (nblock1 == 0 or nblock2 == 0) return; + + //----------------------------------------------------------------------- + // Merging of the two intervals + //----------------------------------------------------------------------- + std::vector< block_pos > vpos1, vpos2; + vpos1.reserve (nblock1 + 1); + vpos2.reserve (nblock2 + 1); + + for (size_t i = pos_index1; i < pos_index2; ++i) { + vpos1.emplace_back (bk.index[i].pos ( ), true); + }; + + for (size_t i = pos_index2; i < pos_index3; ++i) { + vpos2.emplace_back (bk.index[i].pos ( ), false); + }; + //------------------------------------------------------------------- + // tail process + //------------------------------------------------------------------- + if (vpos2.back ( ).pos ( ) == (bk.nblock - 1) and + bk.range_tail.first != bk.range_tail.last) + { + tail_process (vpos1, vpos2); + nblock1 = vpos1.size ( ); + nblock2 = vpos2.size ( ); + }; + + compare_block_pos_t cmp_blk (bk.global_range.first, bk.cmp); + if (bk.error) return; + util::full_merge (vpos1.begin ( ), vpos1.end ( ), vpos2.begin ( ), + vpos2.end ( ), bk.index.begin ( ) + pos_index1, cmp_blk); + if (bk.error) return; + // Extracting the ranges for to merge the elements + extract_ranges (range_pos (pos_index1, pos_index1 + nblock1 + nblock2)); +}; + +// +//------------------------------------------------------------------------- +// function : tail_process +/// @brief make the process when the second vector of block_pos to merge is +/// the last, and have an incomplete block ( tail) +// +/// @param vblkpos1 : first vector of block_pos elements to merge +/// @param vblkpos2 : second vector of block_pos elements to merge +//------------------------------------------------------------------------- +template < uint32_t Block_size, uint32_t Group_size, class Iter_t, + class Compare > +void merge_blocks< Block_size, Group_size, Iter_t, Compare > + ::tail_process (std::vector< block_pos > &vblkpos1, + std::vector< block_pos > &vblkpos2) +{ + if (vblkpos1.size ( ) == 0 or vblkpos2.size ( ) == 0) return; + + vblkpos2.pop_back ( ); + + size_t posback1 = vblkpos1.back ( ).pos ( ); + range_it range_back1 = bk.get_range (posback1); + + if (util::is_mergeable (range_back1, bk.range_tail, bk.cmp)) { + util::in_place_merge_uncontiguous (range_back1, bk.range_tail, + bk.get_range_buf ( ), bk.cmp); + if (vblkpos1.size ( ) > 1) { + size_t pos_aux = vblkpos1[vblkpos1.size ( ) - 2].pos ( ); + range_it range_aux = bk.get_range (pos_aux); + + if (util::is_mergeable (range_aux, range_back1, bk.cmp)) { + vblkpos2.emplace_back (posback1, false); + vblkpos1.pop_back ( ); + }; + }; + }; +}; +// +//------------------------------------------------------------------------- +// function : cut_range +/// @brief when the rng_input is greather than Group_size, this function divide +/// it in several parts creating function_t elements, which are inserted +/// in the concurrent stack of the backbone +// +/// @param rng_input : range to divide +//------------------------------------------------------------------------- +template < uint32_t Block_size, uint32_t Group_size, class Iter_t, + class Compare > +void merge_blocks< Block_size, Group_size, Iter_t, Compare > + ::cut_range (range_pos rng_input) +{ + if (rng_input.size ( ) < Group_size) { + merge_range_pos (rng_input); + return; + }; + + atomic_t counter (0); + size_t npart = (rng_input.size ( ) + Group_size - 1) / Group_size; + size_t size_part = rng_input.size ( ) / npart; + + size_t pos_ini = rng_input.first; + size_t pos_last = rng_input.last; + + while (pos_ini < pos_last) { + size_t pos = pos_ini + size_part; + while (pos < pos_last and + bk.index[pos - 1].side ( ) == bk.index[pos].side ( )) + { + ++pos; + }; + if (pos < pos_last) { + in_place_merge_uncontiguous ( + bk.get_range (bk.index[pos - 1].pos ( )), + bk.get_range (bk.index[pos].pos ( )), bk.get_range_buf ( ), + bk.cmp); + } + else + pos = pos_last; + if ((pos - pos_ini) > 1) { + range_pos rng_aux (pos_ini, pos); + function_merge_range_pos (rng_aux, counter, bk.error); + }; + pos_ini = pos; + }; + bk.exec (counter); // wait until finish all the ranges +}; + +// +//------------------------------------------------------------------------- +// function : merge_range_pos +/// @brief make the indirect merge of the blocks inside the rng_input +// +/// @param rng_input : range of positions of the blocks to merge +//------------------------------------------------------------------------- +template < uint32_t Block_size, uint32_t Group_size, class Iter_t, + class Compare > +void merge_blocks< Block_size, Group_size, Iter_t, Compare > + ::merge_range_pos (range_pos rng_input) +{ + if (rng_input.size ( ) < 2) return; + range_buf rbuf = bk.get_range_buf ( ); + + range_it rng_prev = bk.get_range (bk.index[rng_input.first].pos ( )); + init_move (rbuf, rng_prev); + range_it rng_posx (rng_prev); + + for (size_t posx = rng_input.first + 1; posx != rng_input.last; ++posx) { + rng_posx = bk.get_range (bk.index[posx].pos ( )); + util::merge_flow (rng_prev, rbuf, rng_posx, bk.cmp); + rng_prev = rng_posx; + + }; + init_move (rng_posx, rbuf); +}; + +// +//------------------------------------------------------------------------- +// function : extract_ranges +/// @brief from a big range of positions of blocks in the index. Examine which +/// are mergeable, and generate a couple of ranges for to be merged. +/// With the ranges obtained generate function_t elements and are +/// inserted in the concurrent stack. +/// When the range obtained is smaller than Group_size, generate a +/// function_t calling to merge_range_pos, when is greater, generate a +/// function_t calling to cut_range +// +/// @param rpos range_input : range of the position in the index, where must +/// extract the ranges to merge +//------------------------------------------------------------------------- +template < uint32_t Block_size, uint32_t Group_size, class Iter_t, + class Compare > +void merge_blocks< Block_size, Group_size, Iter_t, Compare > + ::extract_ranges (range_pos range_input) +{ + if (range_input.size ( ) < 2) return; + atomic_t counter (0); + + // The names with x are positions of the index + size_t posx_ini = range_input.first; + block_pos bp_posx_ini = bk.index[posx_ini]; + + range_it rng_max = bk.get_range (bp_posx_ini.pos ( )); + bool side_max = bp_posx_ini.side ( ); + + block_pos bp_posx; + range_it rng_posx = rng_max; + bool side_posx = side_max; + + for (size_t posx = posx_ini + 1; posx <= range_input.last; ++posx) { + bool final = (posx == range_input.last); + bool mergeable = false; + + if (not final) { + bp_posx = bk.index[posx]; + rng_posx = bk.get_range (bp_posx.pos ( )); + side_posx = bp_posx.side ( ); + mergeable = (side_max != side_posx and + is_mergeable (rng_max, rng_posx, bk.cmp)); + }; + if (bk.error) return; + if (final or not mergeable) { + range_pos rp_final (posx_ini, posx); + if (rp_final.size ( ) > 1) { + if (rp_final.size ( ) > Group_size) { + function_cut_range (rp_final, counter, bk.error); + } + else + { + function_merge_range_pos (rp_final, counter, bk.error); + }; + }; + posx_ini = posx; + if (not final) { + rng_max = rng_posx; + side_max = side_posx; + }; + } + else + { + if (bk.cmp (*(rng_max.back ( )), *(rng_posx.back ( )))) { + rng_max = rng_posx; + side_max = side_posx; + }; + }; + }; + bk.exec (counter); +}; + +// +//**************************************************************************** +}; // End namespace bis +}; // End namespace detail +}; // End namespace parallel +}; // End namespace sort +}; // End namespace boost +//**************************************************************************** +// +#endif diff --git a/include/boost/sort/parallel/detail/bis/move_blocks.hpp b/include/boost/sort/parallel/detail/bis/move_blocks.hpp new file mode 100755 index 0000000..4220921 --- /dev/null +++ b/include/boost/sort/parallel/detail/bis/move_blocks.hpp @@ -0,0 +1,278 @@ +//---------------------------------------------------------------------------- +/// @file move_blocks.hpp +/// @brief contains the class move_blocks, which is part of the +/// block_indirect_sort algorithm +/// +/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n +/// Distributed under the Boost Software License, Version 1.0.\n +/// ( See accompanying file LICENSE_1_0.txt or copy at +/// http://www.boost.org/LICENSE_1_0.txt ) +/// @version 0.1 +/// +/// @remarks +//----------------------------------------------------------------------------- +#ifndef __BOOST_SORT_PARALLEL_DETAIL_BIS_MOVE_BLOCKS_HPP +#define __BOOST_SORT_PARALLEL_DETAIL_BIS_MOVE_BLOCKS_HPP + +#include +#include +#include +#include +#include + +namespace boost +{ +namespace sort +{ +namespace parallel +{ +namespace detail +{ +namespace bis +{ +// +///--------------------------------------------------------------------------- +/// @struct move_blocks +/// @brief This class move the blocks, trnasforming a logical sort by an index, +/// in physical sort +//---------------------------------------------------------------------------- +template < uint32_t Block_size, uint32_t Group_size, class Iter_t, + class Compare > +struct move_blocks +{ + //------------------------------------------------------------------------- + // D E F I N I T I O N S + //------------------------------------------------------------------------- + typedef move_blocks< Block_size, Group_size, Iter_t, Compare > this_type; + typedef typename std::iterator_traits< Iter_t >::value_type value_t; + typedef std::atomic< uint32_t > atomic_t; + typedef util::range< size_t > range_pos; + typedef util::range< Iter_t > range_it; + typedef util::range< value_t * > range_buf; + typedef std::function< void(void) > function_t; + typedef backbone< Block_size, Iter_t, Compare > backbone_t; + + //------------------------------------------------------------------------ + // V A R I A B L E S + //------------------------------------------------------------------------ + // Object with the elements to sort and all internal data structures of the + // algorithm + backbone_t &bk; + + //------------------------------------------------------------------------ + // F U N C T I O N S + //------------------------------------------------------------------------ + move_blocks (backbone_t &bkb); + + void move_sequence (const std::vector< size_t > &init_sequence); + + void move_long_sequence (const std::vector< size_t > &init_sequence); + // + //------------------------------------------------------------------------ + // function : function_move_sequence + /// @brief create a function_t with a call to move_sequence, and insert + /// in the stack of the backbone + /// + /// @param sequence :sequence of positions for to move the blocks + /// @param counter : atomic variable which is decremented when finish + /// the function. This variable is used for to know + /// when are finished all the function_t created + /// inside an object + /// @param error : global indicator of error. + //------------------------------------------------------------------------ + void function_move_sequence (std::vector< size_t > &sequence, + atomic_t &counter, bool &error) + { + util::atomic_add (counter, 1); + function_t f1 = [this, sequence, &counter, &error]( ) -> void { + if (not error) { + try + { + this->move_sequence (sequence); + } + catch (std::bad_alloc &) + { + error = true; + }; + } + util::atomic_sub (counter, 1); + }; + bk.works.emplace_back (f1); + }; + // + //------------------------------------------------------------------------ + // function : function_move_long_sequence + /// @brief create a function_t with a call to move_long_sequence, and + /// insert in the stack of the backbone + // + /// @param sequence :sequence of positions for to move the blocks + /// @param counter : atomic variable which is decremented when finish + /// the function. This variable is used for to know + /// when are finished all the function_t created + /// inside an object + /// @param error : global indicator of error. + //------------------------------------------------------------------------ + void function_move_long_sequence (std::vector< size_t > &sequence, + atomic_t &counter, bool &error) + { + util::atomic_add (counter, 1); + function_t f1 = [this, sequence, &counter, &error]( ) -> void { + if (not error) { + try + { + this->move_long_sequence (sequence); + } + catch (std::bad_alloc &) + { + error = true; + }; + } + util::atomic_sub (counter, 1); + }; + bk.works.emplace_back (f1); + }; +//--------------------------------------------------------------------------- +}; // end of struct move_blocks +//--------------------------------------------------------------------------- +// +//############################################################################ +// ## +// ## +// N O N I N L I N E F U N C T I O N S ## +// ## +// ## +//############################################################################ +// +//------------------------------------------------------------------------- +// function : move_blocks +/// @brief constructor of the class for to move the blocks to their true +/// position obtained from the index +// +/// @param bkb : backbone with the index and the blocks +//------------------------------------------------------------------------- +template < uint32_t Block_size, uint32_t Group_size, class Iter_t, + class Compare > +move_blocks< Block_size, Group_size, Iter_t, Compare > + ::move_blocks (backbone_t &bkb): bk (bkb) +{ + std::vector< std::vector< size_t > > vsequence; + vsequence.reserve (bk.index.size ( ) >> 1); + std::vector< size_t > sequence; + atomic_t counter (0); + + size_t pos_index_ini = 0, pos_index_src = 0, pos_index_dest = 0; + while (pos_index_ini < bk.index.size ( )) { + while (pos_index_ini < bk.index.size ( ) and + bk.index[pos_index_ini].pos ( ) == pos_index_ini) + { + ++pos_index_ini; + }; + + if (pos_index_ini == bk.index.size ( )) break; + + sequence.clear ( ); + pos_index_src = pos_index_dest = pos_index_ini; + sequence.push_back (pos_index_ini); + + while (bk.index[pos_index_dest].pos ( ) != pos_index_ini) { + pos_index_src = bk.index[pos_index_dest].pos ( ); + sequence.push_back (pos_index_src); + + bk.index[pos_index_dest].set_pos (pos_index_dest); + pos_index_dest = pos_index_src; + }; + + bk.index[pos_index_dest].set_pos (pos_index_dest); + vsequence.push_back (sequence); + + if (sequence.size ( ) < Group_size) { + function_move_sequence (vsequence.back ( ), counter, bk.error); + } + else + { + function_move_long_sequence (vsequence.back ( ), counter, bk.error); + }; + }; + bk.exec (counter); +}; +// +//------------------------------------------------------------------------- +// function : move_sequence +/// @brief move the blocks, following the positions of the init_sequence +// +/// @param init_sequence : vector with the positions from and where move the +/// blocks +//------------------------------------------------------------------------- +template < uint32_t Block_size, uint32_t Group_size, class Iter_t, + class Compare > +void move_blocks< Block_size, Group_size, Iter_t, Compare > + ::move_sequence (const std::vector< size_t > &init_sequence) +{ + range_buf rbuf = bk.get_range_buf ( ); + size_t pos_range2 = init_sequence[0]; + + range_it range2 = bk.get_range (pos_range2); + init_move (rbuf, range2); + + for (size_t i = 1; i < init_sequence.size ( ); ++i) { + pos_range2 = init_sequence[i]; + range_it range1 (range2); + range2 = bk.get_range (pos_range2); + init_move (range1, range2); + }; + init_move (range2, rbuf); +}; +// +//------------------------------------------------------------------------- +// function : move_long_sequence +/// @brief move the blocks, following the positions of the init_sequence. +/// if the sequence is greater than Group_size, it is divided in small +/// sequences, creating function_t elements, for to be inserted in the +/// concurrent stack +// +/// @param init_sequence : vector with the positions from and where move the +/// blocks +//------------------------------------------------------------------------- +template < uint32_t Block_size, uint32_t Group_size, class Iter_t, + class Compare > +void move_blocks< Block_size, Group_size, Iter_t, Compare > + ::move_long_sequence (const std::vector< size_t > &init_sequence) +{ + if (init_sequence.size ( ) < Group_size) + return move_sequence (init_sequence); + + size_t npart = (init_sequence.size ( ) + Group_size - 1) / Group_size; + size_t size_part = init_sequence.size ( ) / npart; + atomic_t son_counter (0); + + std::vector< size_t > sequence; + sequence.reserve (size_part); + + std::vector< size_t > index_seq; + index_seq.reserve (npart); + + auto it_pos = init_sequence.begin ( ); + for (size_t i = 0; i < (npart - 1); ++i, it_pos += size_part) { + sequence.assign (it_pos, it_pos + size_part); + index_seq.emplace_back (*(it_pos + size_part - 1)); + function_move_sequence (sequence, son_counter, bk.error); + }; + + sequence.assign (it_pos, init_sequence.end ( )); + index_seq.emplace_back (init_sequence.back ( )); + function_move_sequence (sequence, son_counter, bk.error); + + bk.exec (son_counter); + if (bk.error) return; + move_long_sequence (index_seq); +}; +// +//**************************************************************************** +}; // End namespace bis +}; // End namespace detail +}; // End namespace parallel +}; // End namespace sort +}; // End namespace boost +//**************************************************************************** +// +#endif diff --git a/include/boost/sort/parallel/detail/bis/parallel_sort.hpp b/include/boost/sort/parallel/detail/bis/parallel_sort.hpp new file mode 100755 index 0000000..e6509aa --- /dev/null +++ b/include/boost/sort/parallel/detail/bis/parallel_sort.hpp @@ -0,0 +1,216 @@ +//---------------------------------------------------------------------------- +/// @file parallel_sort.hpp +/// @brief Contains the parallel_sort class, which is part of the +/// block_indirect_sort algorithm +/// +/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n +/// Distributed under the Boost Software License, Version 1.0.\n +/// ( See accompanying file LICENSE_1_0.txt or copy at +/// http://www.boost.org/LICENSE_1_0.txt ) +/// @version 0.1 +/// +/// @remarks +//----------------------------------------------------------------------------- +#ifndef __BOOST_SORT_PARALLEL_DETAIL_BIS_PARALLEL_SORT_HPP +#define __BOOST_SORT_PARALLEL_DETAIL_BIS_PARALLEL_SORT_HPP + +#include + +namespace boost +{ +namespace sort +{ +namespace parallel +{ +namespace detail +{ +namespace bis +{ +//---------------------------------------------------------------------------- +// USING SENTENCES +//---------------------------------------------------------------------------- +using boost::sort::parallel::detail::util::nbits64; +// +///--------------------------------------------------------------------------- +/// @struct parallel_sort +/// @brief This class do a parallel sort, using the quicksort filtering, +/// splitting the data until the number of elements is smaller than a +/// predefined value (max_per_thread) +//---------------------------------------------------------------------------- +template < uint32_t Block_size, class Iter_t, class Compare > +struct parallel_sort +{ + //------------------------------------------------------------------------- + // D E F I N I T I O N S + //------------------------------------------------------------------------- + typedef typename std::iterator_traits< Iter_t >::value_type value_t; + typedef std::atomic< uint32_t > atomic_t; + typedef std::function< void(void) > function_t; + typedef backbone< Block_size, Iter_t, Compare > backbone_t; + + //------------------------------------------------------------------------ + // V A R I A B L E S + //------------------------------------------------------------------------ + // reference to a object with all the data to sort + backbone_t &bk; + + // maximun number of element to sort woth 1 thread + size_t max_per_thread; + + // atomic counter for to detect the end of the works created inside + // the object + atomic_t counter; + + //------------------------------------------------------------------------ + // F U N C T I O N S + //------------------------------------------------------------------------ + parallel_sort (backbone_t &bkbn, Iter_t first, Iter_t last); + + void divide_sort (Iter_t first, Iter_t last, uint32_t level); + // + //------------------------------------------------------------------------ + // function : function_divide_sort + /// @brief create a function_t with a call to divide_sort, and inser in + /// the stack of the backbone + // + /// @param first : iterator to the first element of the range to divide + /// @param last : iterator to the next element after the last element of + /// the range to divide + /// @param level : level of depth in the division.When zero call to + /// introsort + /// @param counter : atomic variable which is decremented when finish + /// the function. This variable is used for to know + /// when are finished all the function_t created + /// inside an object + /// @param error : global indicator of error. + //------------------------------------------------------------------------ + void function_divide_sort (Iter_t first, Iter_t last, uint32_t level, + atomic_t &counter, bool &error) + { + util::atomic_add (counter, 1); + function_t f1 = [this, first, last, level, &counter, &error]( ) { + if (not error) { + try + { + this->divide_sort (first, last, level); + } + catch (std::bad_alloc &) + { + error = true; + }; + }; + util::atomic_sub (counter, 1); + }; + bk.works.emplace_back (f1); + }; + +//-------------------------------------------------------------------------- +}; // end struct parallel_sort +//-------------------------------------------------------------------------- +// +//############################################################################ +// ## +// ## +// N O N I N L I N E F U N C T I O N S ## +// ## +// ## +//############################################################################ +// +//------------------------------------------------------------------------ +// function : parallel_sort +/// @brief constructor of the class +/// @param [in] bkbn : backbone struct with all the information to sort +/// @param [in] first : iterator to the first element to sort +/// @param [in] last : iterator to the next element after the last +//------------------------------------------------------------------------ +template < uint32_t Block_size, class Iter_t, class Compare > +parallel_sort< Block_size, Iter_t, Compare > + ::parallel_sort (backbone_t &bkbn, Iter_t first, Iter_t last) + : bk (bkbn), counter (0) +{ + assert ((last - first) >= 0); + size_t nelem = size_t (last - first); + + //------------------- check if sort -------------------------------------- + bool sorted = true; + for (Iter_t it1 = first, it2 = first + 1; + it2 != last and (sorted = not bk.cmp (*it2, *it1)); it1 = it2++) + ; + if (sorted) return; + + //-------------------max_per_thread --------------------------- + uint32_t nbits_size = (nbits64 (sizeof (value_t))) >> 1; + if (nbits_size > 5) nbits_size = 5; + max_per_thread = 1 << (18 - nbits_size); + + uint32_t level = ((nbits64 (nelem / max_per_thread)) * 3) / 2; + + //---------------- check if only single thread ----------------------- + if (nelem < (max_per_thread)) { + intro_sort (first, last, bk.cmp); + return; + }; + if (not bk.error) divide_sort (first, last, level); + + // wait until all the parts are finished + bk.exec (counter); +}; + +//------------------------------------------------------------------------ +// function : divide_sort +/// @brief this function divide the data in two part, for to be sorted in +/// a parallel mode +/// @param first : iterator to the first element to sort +/// @param last : iterator to the next element after the last +/// @param level : level of depth before call to introsort +//------------------------------------------------------------------------ +template < uint32_t Block_size, class Iter_t, class Compare > +void parallel_sort< Block_size, Iter_t, Compare > + ::divide_sort (Iter_t first, Iter_t last, uint32_t level) +{ + //------------------- check if sort ----------------------------------- + bool sorted = true; + for (Iter_t it1 = first, it2 = first + 1; + it2 != last and (sorted = not bk.cmp (*it2, *it1)); it1 = it2++) + ; + if (sorted) return; + + //---------------- check if finish the subdivision ------------------- + size_t nelem = last - first; + if (level == 0 or nelem < (max_per_thread)) { + return intro_sort (first, last, bk.cmp); + }; + + //-------------------- pivoting ---------------------------------- + pivot9 (first, last, bk.cmp); + const value_t &val = const_cast< value_t & > (*first); + Iter_t c_first = first + 1, c_last = last - 1; + + while (bk.cmp (*c_first, val)) ++c_first; + while (bk.cmp (val, *c_last)) --c_last; + + while (not(c_first > c_last)) { + std::swap (*(c_first++), *(c_last--)); + while (bk.cmp (*c_first, val)) ++c_first; + while (bk.cmp (val, *c_last)) --c_last; + }; + + std::swap (*first, *c_last); + + // insert the work of the second half in the stack of works + function_divide_sort (c_first, last, level - 1, counter, bk.error); + if (bk.error) return; + + // The first half is done by the same thread + function_divide_sort (first, c_last, level - 1, counter, bk.error); +}; +// +//**************************************************************************** +}; // End namespace bis +}; // End namespace detail +}; // End namespace parallel +}; // End namespace sort +}; // End namespace boost +//**************************************************************************** +// +#endif diff --git a/include/boost/sort/parallel/detail/block_indirect_sort.hpp b/include/boost/sort/parallel/detail/block_indirect_sort.hpp new file mode 100755 index 0000000..2421da2 --- /dev/null +++ b/include/boost/sort/parallel/detail/block_indirect_sort.hpp @@ -0,0 +1,339 @@ +//---------------------------------------------------------------------------- +/// @file block_indirect_sort.hpp +/// @brief block indirect sort algorithm +/// +/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n +/// Distributed under the Boost Software License, Version 1.0.\n +/// ( See accompanying file LICENSE_1_0.txt or copy at +/// http://www.boost.org/LICENSE_1_0.txt ) +/// @version 0.1 +/// +/// @remarks +//----------------------------------------------------------------------------- +#ifndef __BOOST_SORT_PARALLEL_DETAIL_BLOCK_INDIRECT_SORT_HPP +#define __BOOST_SORT_PARALLEL_DETAIL_BLOCK_INDIRECT_SORT_HPP + +#include +#include +#include +#include + +#include +#include +#include + +// This value is the minimal number of threads for to use the +// block_indirect_sort algorithm +#define BOOST_NTHREAD_BORDER 6 + +namespace boost +{ +namespace sort +{ +namespace parallel +{ +namespace detail +{ +// +///--------------------------------------------------------------------------- +/// @struct block_indirect_sort +/// @brief This class is the entry point of the block indirect sort. The code +/// of this algorithm is divided in several classes: +/// bis/block.hpp : basic structures used in the algorithm +/// bis/backbone.hpp : data used by all the classes +/// bis/merge_blocks.hpp : merge the internal blocks +/// bis/move_blocks.hpp : move the blocks, and obtain all the elements +/// phisicaly sorted +/// bis/parallel_sort.hpp : make the parallel sort of each part in the +/// initial division of the data +/// +//---------------------------------------------------------------------------- +template < uint32_t Block_size, uint32_t Group_size, class Iter_t, + class Compare = util::compare_iter< Iter_t > > +struct block_indirect_sort +{ + //------------------------------------------------------------------------ + // D E F I N I T I O N S + //------------------------------------------------------------------------ + typedef typename std::iterator_traits< Iter_t >::value_type value_t; + typedef std::atomic< uint32_t > atomic_t; + typedef util::range< size_t > range_pos; + typedef util::range< Iter_t > range_it; + typedef util::range< value_t * > range_buf; + typedef std::function< void(void) > function_t; + + // classes used in the internal operations of the algorithm + typedef bis::block_pos block_pos_t; + typedef bis::block< Block_size, Iter_t > block_t; + typedef bis::backbone< Block_size, Iter_t, Compare > backbone_t; + typedef bis::parallel_sort< Block_size, Iter_t, Compare > parallel_sort_t; + + typedef bis::merge_blocks< Block_size, Group_size, Iter_t, Compare > + merge_blocks_t; + typedef bis::move_blocks< Block_size, Group_size, Iter_t, Compare > + move_blocks_t; + typedef bis::compare_block_pos< Block_size, Iter_t, Compare > + compare_block_pos_t; + // + //------------------------------------------------------------------------ + // V A R I A B L E S A N D C O N S T A N T S + //------------------------------------------------------------------------ + // contains the data and the internal data structures of the algorithm for + // to be shared between the classes which are part of the algorithm + backbone_t bk; + // atomic counter for to detect the end of the works created inside + // the object + atomic_t counter; + // pointer to the uninitialized memory used for the thread buffers + value_t *ptr; + // indicate if the memory pointed by ptr is initialized + bool construct; + // range from extract the buffers for the threads + range_buf rglobal_buf; + // number of threads to use + uint32_t nthread; + // + //------------------------------------------------------------------------ + // F U N C T I O N S + //------------------------------------------------------------------------ + + block_indirect_sort (Iter_t first, Iter_t last, Compare cmp, uint32_t nthr); + + block_indirect_sort (Iter_t first, Iter_t last) + : block_indirect_sort (first, last, Compare ( ), + std::thread::hardware_concurrency ( )){}; + + block_indirect_sort (Iter_t first, Iter_t last, Compare cmp) + : block_indirect_sort (first, last, cmp, + std::thread::hardware_concurrency ( )){}; + + block_indirect_sort (Iter_t first, Iter_t last, uint32_t nthread) + : block_indirect_sort (first, last, Compare ( ), nthread){}; + + // + //------------------------------------------------------------------------ + // function :destroy_all + /// @brief destructor all the data structures of the class (if the memory + /// is constructed, is destroyed) and return the uninitialized + /// memory + //------------------------------------------------------------------------ + void destroy_all (void) + { + if (ptr != nullptr) { + if (construct) { + util::destroy (rglobal_buf); + construct = false; + }; + std::return_temporary_buffer (ptr); + ptr = nullptr; + }; + }; + + // + //------------------------------------------------------------------------ + // function :~block_indirect_sort + /// @brief destructor of the class (if the memory is constructed, is + /// destroyed) and return the uninitialized memory + //------------------------------------------------------------------------ + ~block_indirect_sort (void) { destroy_all ( ); }; + void split_range (size_t pos_index1, size_t pos_index2, + uint32_t level_thread); + + void start_function (void); + + //------------------------------------------------------------------------- +}; +// End class block_indirect_sort +//---------------------------------------------------------------------------- +// +//############################################################################ +// ## +// ## +// N O N I N L I N E F U N C T I O N S ## +// ## +// ## +//############################################################################ +// +//------------------------------------------------------------------------- +// function : block_indirect_sort +/// @brief begin with the execution of the functions stored in works +/// @param first : iterator to the first element of the range to sort +/// @param last : iterator after the last element to the range to sort +/// @param comp : object for to compare two elements pointed by Iter_t +/// iterators +/// @param nthr : Number of threads to use in the process.When this value +/// is lower than 2, the sorting is done with 1 thread +//------------------------------------------------------------------------- +template < uint32_t Block_size, uint32_t Group_size, class Iter_t, + class Compare > +block_indirect_sort< Block_size, Group_size, Iter_t, + Compare >::block_indirect_sort (Iter_t first, Iter_t last, + Compare cmp, uint32_t nthr) + : bk (first, last, cmp), counter (0), ptr (nullptr), construct (false), + nthread (nthr) +{ + try + { + assert ((last - first) >= 0); + size_t nelem = size_t (last - first); + if (nelem == 0) return; + + //------------------- check if sort ----------------------------------- + bool sorted = true; + for (Iter_t it1 = first, it2 = first + 1; + it2 != last and (sorted = not bk.cmp (*it2, *it1)); it1 = it2++) + ; + if (sorted) return; + + //---------------- check if only single thread ----------------------- + size_t nthreadmax = nelem / (Block_size * Group_size) + 1; + if (nthread > nthreadmax) nthread = (uint32_t)nthreadmax; + uint32_t nbits_size = (util::nbits64 (sizeof (value_t))) >> 1; + if (nbits_size > 5) nbits_size = 5; + size_t max_per_thread = 1 << (18 - nbits_size); + + if (nelem < (max_per_thread) or nthread < 2) { + intro_sort (first, last, bk.cmp); + return; + }; + + //----------- creation of the temporary buffer -------------------- + ptr = std::get_temporary_buffer< value_t > (Block_size * nthread).first; + if (ptr == nullptr) { + bk.error = true; + throw std::bad_alloc ( ); + }; + + rglobal_buf = range_buf (ptr, ptr + (Block_size * nthread)); + util::init (rglobal_buf, *first); + construct = true; + + // creation of the buffers for the threads + std::vector< value_t * > vbuf (nthread); + for (uint32_t i = 0; i < nthread; ++i) { + vbuf[i] = ptr + (i * Block_size); + }; + + // Insert the first work in the stack + util::atomic_write (counter, 1); + function_t f1 = [&]( ) { + start_function ( ); + util::atomic_sub (counter, 1); + }; + bk.works.emplace_back (f1); + + //--------------------------------------------------------------------- + // PROCESS + //--------------------------------------------------------------------- + std::vector< std::future< void > > vfuture (nthread); + + // The function launched with the futures is "execute the functions of + // the stack until this->counter is zero + // vbuf[i] is the memory from the main thread for to configure the + // thread local buffer + for (uint32_t i = 0; i < nthread; ++i) { + auto f1 = [=, &vbuf]( ) { bk.exec (vbuf[i], this->counter); }; + vfuture[i] = std::async (std::launch::async, f1); + }; + for (uint32_t i = 0; i < nthread; ++i) vfuture[i].get ( ); + if (bk.error) throw std::bad_alloc ( ); + } + catch (std::bad_alloc &) + { + destroy_all ( ); + throw; + } +}; + +// +//----------------------------------------------------------------------------- +// function : split_rage +/// @brief this function splits a range of positions in the index, and +/// depending of the size, sort directly or make to a recursive call +/// to split_range +/// @param pos_index1 : first position in the index +/// @param pos_index2 : position after the last in the index +/// @param level_thread : depth of the call. When 0 sort the blocks +//----------------------------------------------------------------------------- +template < uint32_t Block_size, uint32_t Group_size, class Iter_t, + class Compare > +void block_indirect_sort< Block_size, Group_size, Iter_t, + Compare >::split_range (size_t pos_index1, + size_t pos_index2, + uint32_t level_thread) +{ + size_t nblock = pos_index2 - pos_index1; + + //------------------------------------------------------------------------- + // In the blocks not sorted, the physical position is the logical position + //------------------------------------------------------------------------- + Iter_t first = bk.get_block (pos_index1).first; + Iter_t last = bk.get_range (pos_index2 - 1).last; + + if (nblock < Group_size) { + intro_sort (first, last, bk.cmp); + return; + }; + + size_t pos_index_mid = pos_index1 + (nblock >> 1); + atomic_t son_counter (1); + + //------------------------------------------------------------------------- + // Insert in the stack the work for the second part, and the actual thread, + // execute the first part + //------------------------------------------------------------------------- + if (level_thread != 0) { + auto f1 = [=, &son_counter]( ) { + split_range (pos_index_mid, pos_index2, level_thread - 1); + util::atomic_sub (son_counter, 1); + }; + bk.works.emplace_back (f1); + if (bk.error) return; + split_range (pos_index1, pos_index_mid, level_thread - 1); + } + else + { + Iter_t mid = first + ((nblock >> 1) * Block_size); + auto f1 = [=, &son_counter]( ) { + parallel_sort_t (bk, mid, last); + util::atomic_sub (son_counter, 1); + }; + bk.works.emplace_back (f1); + if (bk.error) return; + parallel_sort_t (bk, first, mid); + }; + bk.exec (son_counter); + if (bk.error) return; + merge_blocks_t (bk, pos_index1, pos_index_mid, pos_index2); +}; +// +//----------------------------------------------------------------------------- +// function : start_function +/// @brief this function init the process. When the number of threads is lower +/// than a predefined value, sort the elements with a parallel introsort. +//----------------------------------------------------------------------------- +template < uint32_t Block_size, uint32_t Group_size, class Iter_t, + class Compare > +void block_indirect_sort< Block_size, Group_size, Iter_t, + Compare >::start_function (void) +{ + if (nthread < BOOST_NTHREAD_BORDER) { + parallel_sort_t (bk, bk.global_range.first, bk.global_range.last); + } + else + { + size_t level_thread = nbits64 (nthread - 1); + split_range (0, bk.nblock, level_thread - 1); + if (bk.error) return; + move_blocks_t k (bk); + }; +}; +// +//**************************************************************************** +};// End namespace detail +};// End namespace parallel +};// End namespace sort +};// End namespace boost +//**************************************************************************** +// +#endif diff --git a/include/boost/sort/parallel/detail/heap_sort.hpp b/include/boost/sort/parallel/detail/heap_sort.hpp new file mode 100755 index 0000000..d48312b --- /dev/null +++ b/include/boost/sort/parallel/detail/heap_sort.hpp @@ -0,0 +1,205 @@ +//---------------------------------------------------------------------------- +/// @file heap_sort.hpp +/// @brief Insertion Sort algorithm +/// +/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n +/// Distributed under the Boost Software License, Version 1.0.\n +/// ( See accompanying file LICENSE_1_0.txt or copy at +/// http://www.boost.org/LICENSE_1_0.txt ) +/// @version 0.1 +/// +/// @remarks +//----------------------------------------------------------------------------- +#ifndef __BOOST_SORT_PARALLEL_DETAIL_HEAP_SORT_HPP +#define __BOOST_SORT_PARALLEL_DETAIL_HEAP_SORT_HPP + +#include +#include +#include +#include +#include // for std::swap + +namespace boost +{ +namespace sort +{ +namespace parallel +{ +namespace detail +{ +// +//--------------------------------------------------------------------------- +// struct : heap_sort +/// @brief : Heap sort algorithm +/// @remarks This algorithm is O(NLogN) +//--------------------------------------------------------------------------- +template < class Iter_t, class Compare > +struct heap_sort +{ + typedef typename std::iterator_traits< Iter_t >::value_type value_t; + + // + //------------------------------------------------------------------------ + // function : sort3 + /// @brief Sort and signal the changes of three values + /// @param val_0 : first value to compare + /// @param val_1 : second value to compare + /// @param val_2 : third value to compare + /// @param [out] bool_0 : if true indicates val_0 had been changed + /// @param [out] bool_1 : if true indicates val_1 had been changed + /// @param [out] bool_2 : if true indicates val_2 had been changed + /// @return if true , some value had changed + /// @remarks + //------------------------------------------------------------------------ + bool sort3 (value_t &val_0, value_t &val_1, value_t &val_2, bool &bool_0, + bool &bool_1, bool &bool_2) + { + bool_0 = bool_1 = bool_2 = false; + int value = 0; + if (val_0 < val_1) value += 4; + if (val_1 < val_2) value += 2; + if (val_0 < val_2) value += 1; + + switch (value) + { + case 0: break; + + case 2: + std::swap (val_1, val_2); + bool_1 = bool_2 = true; + break; + + case 3: + if (not(val_0 > val_1)) { + std::swap (val_0, val_2); + bool_0 = bool_2 = true; + } + else + { + auto aux = std::move (val_2); + val_2 = std::move (val_1); + val_1 = std::move (val_0); + val_0 = std::move (aux); + bool_0 = bool_1 = bool_2 = true; + }; + break; + + case 4: + std::swap (val_0, val_1); + bool_0 = bool_1 = true; + break; + + case 5: + if (val_1 > val_2) { + auto aux = std::move (val_0); + val_0 = std::move (val_1); + val_1 = std::move (val_2); + val_2 = std::move (aux); + bool_0 = bool_1 = bool_2 = true; + } + else + { + std::swap (val_0, val_2); + bool_0 = bool_2 = true; + }; + break; + + case 7: + std::swap (val_0, val_2); + bool_0 = bool_2 = true; + break; + + default: abort ( ); + }; + return (bool_0 or bool_1 or bool_2); + }; + // + //----------------------------------------------------------------------- + // function : make_heap + /// @brief Make the heap for to extract the sorted elements + /// @param first : iterator to the first element of the range + /// @param nelem : number of lements of the range + /// @param comp : object for to compare two elements + /// @remarks This algorithm is O(NLogN) + //------------------------------------------------------------------------ + void make_heap (Iter_t first, size_t nelem, Compare comp) + { + size_t pos_father, pos_son; + Iter_t iter_father = first, iter_son = first; + bool sw = false; + + for (size_t i = 1; i < nelem; ++i) { + pos_father = i; + iter_father = first + i; + sw = false; + do + { + iter_son = iter_father; + pos_son = pos_father; + pos_father = (pos_son - 1) >> 1; + iter_father = first + pos_father; + if ((sw = comp (*iter_father, *iter_son))) + std::swap (*iter_father, *iter_son); + } while (sw and pos_father != 0); + }; + }; + // + //------------------------------------------------------------------------ + // function : heap_sort + /// @brief : Heap sort algorithm + /// @param first: iterator to the first element of the range + /// @param last : iterator to the next element of the last in the range + /// @param comp : object for to do the comparison between the elements + /// @remarks This algorithm is O(NLogN) + //------------------------------------------------------------------------ + heap_sort (Iter_t first, Iter_t last, Compare comp) + { + assert ((last - first) >= 0); + size_t nelem = last - first; + if (nelem < 2) return; + + //-------------------------------------------------------------------- + // Creating the initial heap + //-------------------------------------------------------------------- + make_heap (first, nelem, comp); + + //-------------------------------------------------------------------- + // Sort the heap + //-------------------------------------------------------------------- + size_t pos_father, pos_son; + Iter_t iter_father = first, iter_son = first; + + bool sw = false; + for (size_t i = 1; i < nelem; ++i) { + std::swap (*first, *(first + (nelem - i))); + pos_father = 0; + pos_son = 1; + iter_father = first; + sw = true; + while (sw and pos_son < (nelem - i)) { + // if the father have two sons must select the bigger + iter_son = first + pos_son; + if ((pos_son + 1) < (nelem - i) and + comp (*iter_son, *(iter_son + 1))) + { + ++pos_son; + ++iter_son; + }; + if ((sw = comp (*iter_father, *iter_son))) + std::swap (*iter_father, *iter_son); + pos_father = pos_son; + iter_father = iter_son; + pos_son = (pos_father << 1) + 1; + }; + }; + }; +}; // End class heap_sort +// +//**************************************************************************** +}; // End namespace detail +}; // End namespace parallel +}; // End namespace sort +}; // End namespace boost +//**************************************************************************** +// +#endif diff --git a/include/boost/sort/parallel/detail/indirect.hpp b/include/boost/sort/parallel/detail/indirect.hpp new file mode 100755 index 0000000..58b6911 --- /dev/null +++ b/include/boost/sort/parallel/detail/indirect.hpp @@ -0,0 +1,139 @@ +//---------------------------------------------------------------------------- +/// @file indirect.hpp +/// @brief Indirect algorithm +/// +/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n +/// Distributed under the Boost Software License, Version 1.0.\n +/// ( See accompanying file LICENSE_1_0.txt or copy at +/// http://www.boost.org/LICENSE_1_0.txt ) +/// @version 0.1 +/// +/// @remarks +//----------------------------------------------------------------------------- +#ifndef __BOOST_SORT_PARALLEL_DETAIL_INDIRECT_HPP +#define __BOOST_SORT_PARALLEL_DETAIL_INDIRECT_HPP + +#include +#include +#include +#include +#include +#include + +namespace boost +{ +namespace sort +{ +namespace parallel +{ +namespace detail +{ +// +//--------------------------------------------------------------------------- +/// @struct less_ptr_no_null +/// +/// @remarks this is the comparison object for pointers. Compare the objects +/// pointed by the iterators +//--------------------------------------------------------------------------- +template < class Iter_t, class Compare = util::compare_iter< Iter_t > > +struct less_ptr_no_null +{ + //----------------------------- Variables ----------------------- + Compare comp; // comparison object of the elements pointed by Iter_t + + //------------------------------------------------------------------------ + // function : less_ptr_no_null + /// @brief constructor from a Compare object + /// @param C1 : comparison object + //----------------------------------------------------------------------- + inline less_ptr_no_null (Compare C1 = Compare ( )) : comp (C1){}; + + //------------------------------------------------------------------------ + // function : operator ( ) + /// @brief Make the comparison of the objects pointed by T1 and T2, using + // the internal comp + // + /// @param T1 : first iterator + /// @param T2 : second iterator + /// @return bool result of the comparison + //----------------------------------------------------------------------- + inline bool operator( ) (Iter_t T1, Iter_t T2) const + { + return comp (*T1, *T2); + }; +}; +// +//----------------------------------------------------------------------------- +// function : create_index +/// @brief From a vector of objects, create a vector of iterators to +/// the objects +/// +/// @param first : iterator to the first element of the range +/// @param last : iterator to the element after the last of the range +/// @param index : vector where store the iterators +//----------------------------------------------------------------------------- +template < class Iter_t > +void create_index (Iter_t first, Iter_t last, std::vector< Iter_t > &index) +{ + auto nelem = last - first; + assert (nelem >= 0); + index.clear ( ); + index.reserve (nelem); + for (; first != last; ++first) index.push_back (first); +}; +// +//----------------------------------------------------------------------------- +// function : sort_index +/// @brief This function transform a logical sort of the elements in the index +/// in a physical sort +// +/// @param global_first : iterator to the first element of the data +/// @param [in] index : vector of the iterators +//----------------------------------------------------------------------------- +template < class Iter_t > +void sort_index (Iter_t global_first, std::vector< Iter_t > &index) +{ + typedef typename std::iterator_traits< Iter_t >::value_type value_t; + + size_t pos_dest = 0; + size_t pos_src = 0; + size_t pos_in_vector = 0; + size_t nelem = index.size ( ); + Iter_t it_dest, it_src; + + while (pos_in_vector < nelem) { + while (pos_in_vector < nelem and + (size_t (index[pos_in_vector] - global_first)) == pos_in_vector) + { + ++pos_in_vector; + }; + + if (pos_in_vector == nelem) return; + pos_dest = pos_src = pos_in_vector; + it_dest = global_first + pos_dest; + value_t Aux = std::move (*it_dest); + + while ((pos_src = (size_t (index[pos_dest] - global_first))) != + pos_in_vector) + { + index[pos_dest] = it_dest; + it_src = global_first + pos_src; + *it_dest = std::move (*it_src); + it_dest = it_src; + pos_dest = pos_src; + }; + + *it_dest = std::move (Aux); + index[pos_dest] = it_dest; + ++pos_in_vector; + }; +}; +// +//**************************************************************************** +}; // End namespace detail +}; // End namespace parallel +}; // End namespace sort +}; // End namespace boost +//**************************************************************************** +// +#endif diff --git a/include/boost/sort/parallel/detail/insertion_sort.hpp b/include/boost/sort/parallel/detail/insertion_sort.hpp new file mode 100755 index 0000000..1fb2d83 --- /dev/null +++ b/include/boost/sort/parallel/detail/insertion_sort.hpp @@ -0,0 +1,66 @@ +//---------------------------------------------------------------------------- +/// @file insertion_sort.hpp +/// @brief Insertion Sort algorithm +/// +/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n +/// Distributed under the Boost Software License, Version 1.0.\n +/// ( See accompanying file LICENSE_1_0.txt or copy at +/// http://www.boost.org/LICENSE_1_0.txt ) +/// @version 0.1 +/// +/// @remarks +//----------------------------------------------------------------------------- +#ifndef __BOOST_SORT_PARALLEL_DETAIL_INSERTION_SORT_HPP +#define __BOOST_SORT_PARALLEL_DETAIL_INSERTION_SORT_HPP + +#include +#include +#include // std::swap + +namespace boost +{ +namespace sort +{ +namespace parallel +{ +namespace detail +{ +// +//----------------------------------------------------------------------------- +// function : insertion_sort +/// @brief : Insertion sort algorithm +/// @param first: iterator to the first element of the range +/// @param last : iterator to the next element of the last in the range +/// @param comp : object for to do the comparison between the elements +/// @remarks This algorithm is O(N²) +//----------------------------------------------------------------------------- +template < class Iter_t, typename Compare > +void insertion_sort (Iter_t first, Iter_t last, Compare comp) +{ + //-------------------------------------------------------------------- + // DEFINITIONS + //-------------------------------------------------------------------- + typedef typename std::iterator_traits< Iter_t >::value_type value_t; + + if ((last - first) < 2) return; + + for (Iter_t it_examine = first + 1; it_examine != last; ++it_examine) { + value_t Aux = std::move (*it_examine); + Iter_t it_insertion = it_examine; + + while (it_insertion != first and comp (Aux, *(it_insertion - 1))) { + *it_insertion = std::move (*(it_insertion - 1)); + --it_insertion; + }; + *it_insertion = std::move (Aux); + }; +}; +// +//**************************************************************************** +}; // End namespace detail +}; // End namespace parallel +}; // End namespace sort +}; // End namespace boost +//**************************************************************************** +// +#endif diff --git a/include/boost/sort/parallel/detail/intro_sort.hpp b/include/boost/sort/parallel/detail/intro_sort.hpp new file mode 100755 index 0000000..0bf6bd1 --- /dev/null +++ b/include/boost/sort/parallel/detail/intro_sort.hpp @@ -0,0 +1,203 @@ +//---------------------------------------------------------------------------- +/// @file intro_sort.hpp +/// @brief Intro Sort algorithm +/// +/// @author Copyright (c) 2016 Francisco José Tapia (fjtapia@gmail.com )\n +/// Distributed under the Boost Software License, Version 1.0.\n +/// ( See accompanying file LICENSE_1_0.txt or copy at +/// http://www.boost.org/LICENSE_1_0.txt ) +/// @version 0.1 +/// +/// @remarks +//----------------------------------------------------------------------------- +#ifndef __BOOST_SORT_PARALLEL_ALGORITHM_INTRO_SORT_HPP +#define __BOOST_SORT_PARALLEL_ALGORITHM_INTRO_SORT_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace boost +{ +namespace sort +{ +namespace parallel +{ +namespace detail +{ +//--------------------------------------------------------------------------- +// USING SENTENCES +//--------------------------------------------------------------------------- +using util::compare_iter; +using util::nbits64; +// +//----------------------------------------------------------------------------- +// function : mid3 +/// @brief : return the iterator to the mid value of the three values passsed +/// as parameters +// +/// @param iter_1 : iterator to the first value +/// @param iter_2 : iterator to the second value +/// @param iter_3 : iterator to the third value +/// @param comp : object for to compare two values +/// @return iterator to mid value +//----------------------------------------------------------------------------- +template < typename Iter_t, typename Compare > +inline Iter_t mid3 (Iter_t iter_1, Iter_t iter_2, Iter_t iter_3, Compare comp) +{ + return comp (*iter_1, *iter_2) + ? (comp (*iter_2, *iter_3) + ? iter_2 + : (comp (*iter_1, *iter_3) ? iter_3 : iter_1)) + : (comp (*iter_3, *iter_2) + ? iter_2 + : (comp (*iter_3, *iter_1) ? iter_3 : iter_1)); +}; +// +//----------------------------------------------------------------------------- +// function : pivot3 +/// @brief : receive a range between first and last, calcule the mid iterator +/// with the first, the previous to the last, and the central +/// position. With this mid iterator swap with the first position +// +/// @param first : iterator to the first element +/// @param last : iterator to the last element +/// @param comp : object for to compare two elements +//----------------------------------------------------------------------------- +template < class Iter_t, class Compare > +inline void pivot3 (Iter_t first, Iter_t last, Compare comp) +{ + auto N2 = (last - first) >> 1; + Iter_t it_val = mid3 (first + 1, first + N2, last - 1, comp); + std::swap (*first, *it_val); +}; +// +//----------------------------------------------------------------------------- +// function : mid9 +/// @brief : return the iterator to the mid value of the nine values passsed +/// as parameters +// +/// @param iter_1 : iterator to the first value +/// @param iter_2 : iterator to the second value +/// @param iter_3 : iterator to the third value +/// @param iter_4 : iterator to the fourth value +/// @param iter_5 : iterator to the fifth value +/// @param iter_6 : iterator to the sixth value +/// @param iter_7 : iterator to the seventh value +/// @param iter_8 : iterator to the eighth value +/// @param iter_9 : iterator to the ninth value +/// @return iterator to the mid value +//----------------------------------------------------------------------------- +template < class Iter_t, class Compare > +inline Iter_t mid9 (Iter_t iter_1, Iter_t iter_2, Iter_t iter_3, Iter_t iter_4, + Iter_t iter_5, Iter_t iter_6, Iter_t iter_7, Iter_t iter_8, + Iter_t iter_9, Compare comp) +{ + return mid3 (mid3 (iter_1, iter_2, iter_3, comp), + mid3 (iter_4, iter_5, iter_6, comp), + mid3 (iter_7, iter_8, iter_9, comp), comp); +}; +// +//----------------------------------------------------------------------------- +// function : pivot9 +/// @brief : receive a range between first and last, obtain 9 values between +/// the elements including the first and the previous to the last. +/// Obtain the iterator to the mid value and swap with the first +/// position +// +/// @param first : iterator to the first element +/// @param last : iterator to the last element +/// @param comp : object for to compare two elements +//----------------------------------------------------------------------------- +template < class Iter_t, class Compare > +inline void pivot9 (Iter_t first, Iter_t last, Compare comp) +{ + size_t cupo = (last - first) >> 3; + Iter_t itaux = mid9 (first + 1, first + cupo, first + 2 * cupo, + first + 3 * cupo, first + 4 * cupo, first + 5 * cupo, + first + 6 * cupo, first + 7 * cupo, last - 1, comp); + std::swap (*first, *itaux); +}; +// +//----------------------------------------------------------------------------- +// function : intro_sort_internal +/// @brief : internal function for to divide and sort the ranges +// +/// @param first : iterator to the first element +/// @param last : iterator to the element after the last in the range +/// @param level : level of depth from the top level call +/// @param comp : object for to Compare elements +//----------------------------------------------------------------------------- +template < class Iter_t, typename Compare > +void intro_sort_internal (Iter_t first, Iter_t last, uint32_t level, + Compare comp) +{ + typedef typename std::iterator_traits< Iter_t >::value_type value_t; + + const uint32_t nmin = 32; + size_t nelem = last - first; + if (nelem < nmin) return insertion_sort (first, last, comp); + + if (level == 0) { + heap_sort< Iter_t, Compare > (first, last, comp); + return; + }; + + pivot3 (first, last, comp); + + const value_t &val = const_cast< value_t & > (*first); + Iter_t c_first = first + 1, c_last = last - 1; + + while (comp (*c_first, val)) ++c_first; + while (comp (val, *c_last)) --c_last; + + while (not(c_first > c_last)) { + std::swap (*(c_first++), *(c_last--)); + while (comp (*c_first, val)) ++c_first; + while (comp (val, *c_last)) --c_last; + }; // End while + + std::swap (*first, *c_last); + intro_sort_internal (first, c_last, level - 1, comp); + intro_sort_internal (c_first, last, level - 1, comp); +}; + +// +//----------------------------------------------------------------------------- +// function : intro_sort +/// @brief : function for to sort range [first, last ) +/// @param first : iterator to the first element +/// @param last : iterator to the element after the last in the range +/// @param comp : object for to compare elements +//----------------------------------------------------------------------------- +template < class Iter_t, typename Compare > +void intro_sort (Iter_t first, Iter_t last, Compare comp) +{ + auto nelem = last - first; + assert (nelem >= 0); + + //------------------- check if sort -------------------------------------- + bool sw = true; + for (Iter_t it1 = first, it2 = first + 1; + it2 != last and (sw = not comp (*it2, *it1)); it1 = it2++) + ; + if (sw) return; + + uint32_t level = ((nbits64 (nelem) - 4) * 3) / 2; + intro_sort_internal (first, last, level, comp); +}; +// +//**************************************************************************** +}; // End namespace detail +}; // End namespace parallel +}; // End namespace sort +}; // End namespace boost +//**************************************************************************** +// +#endif diff --git a/include/boost/sort/parallel/detail/parallel_stable_sort.hpp b/include/boost/sort/parallel/detail/parallel_stable_sort.hpp new file mode 100755 index 0000000..80bd509 --- /dev/null +++ b/include/boost/sort/parallel/detail/parallel_stable_sort.hpp @@ -0,0 +1,188 @@ +//---------------------------------------------------------------------------- +/// @file parallel_stable_sort.hpp +/// @brief This file contains the class parallel_stable_sort +/// +/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n +/// Distributed under the Boost Software License, Version 1.0.\n +/// ( See accompanying file LICENSE_1_0.txt or copy at +/// http://www.boost.org/LICENSE_1_0.txt ) +/// @version 0.1 +/// +/// @remarks +//----------------------------------------------------------------------------- +#ifndef __BOOST_SORT_PARALLEL_DETAIL_PARALLEL_STABLE_SORT_HPP +#define __BOOST_SORT_PARALLEL_DETAIL_PARALLEL_STABLE_SORT_HPP + +#include +#include +#include +#include +#include +#include +#include + +namespace boost +{ +namespace sort +{ +namespace parallel +{ +namespace detail +{ +// +///--------------------------------------------------------------------------- +/// @struct parallel_stable_sort +/// @brief This a structure for to implement a parallel stable sort, exception +/// safe +//---------------------------------------------------------------------------- +template < class Iter_t, class Compare = util::compare_iter< Iter_t > > +struct parallel_stable_sort +{ + //------------------------------------------------------------------------- + // DEFINITIONS + //------------------------------------------------------------------------- + typedef typename std::iterator_traits< Iter_t >::value_type value_t; + + //------------------------------------------------------------------------- + // VARIABLES + //------------------------------------------------------------------------- + // Number of elements to sort + size_t nelem; + // Pointer to the auxiliary memory needed for the algorithm + value_t *ptr; + // Minimal number of elements for to be sorted in parallel mode + const size_t nelem_min = 1 << 16; + + //------------------------------------------------------------------------ + // F U N C T I O N S + //------------------------------------------------------------------------ + parallel_stable_sort (Iter_t first, Iter_t last) + : parallel_stable_sort (first, last, Compare ( ), + std::thread::hardware_concurrency ( )){}; + + parallel_stable_sort (Iter_t first, Iter_t last, Compare cmp) + : parallel_stable_sort (first, last, cmp, + std::thread::hardware_concurrency ( )){}; + + parallel_stable_sort (Iter_t first, Iter_t last, uint32_t num_thread) + : parallel_stable_sort (first, last, Compare ( ), num_thread){}; + + parallel_stable_sort (Iter_t first, Iter_t last, Compare cmp, + uint32_t num_thread); + + // + //----------------------------------------------------------------------------- + // function : destroy_all + /// @brief The utility is to destroy the temporary buffer used in the + /// sorting process + //----------------------------------------------------------------------------- + void destroy_all ( ) + { + if (ptr != nullptr) std::return_temporary_buffer (ptr); + }; + // + //----------------------------------------------------------------------------- + // function :~parallel_stable_sort + /// @brief destructor of the class. The utility is to destroy the temporary + /// buffer used in the sorting process + //----------------------------------------------------------------------------- + ~parallel_stable_sort ( ) { destroy_all ( ); }; +}; // end struct parallel_stable_sort + +// +//############################################################################ +// ## +// ## +// N O N I N L I N E F U N C T I O N S ## +// ## +// ## +//############################################################################ +// +//----------------------------------------------------------------------------- +// function : parallel_stable_sort +/// @brief constructor of the class +/// +/// @param first : iterator to the first element of the range to sort +/// @param last : iterator after the last element to the range to sort +/// @param comp : object for to compare two elements pointed by Iter_t +/// iterators +/// @param nthread : Number of threads to use in the process. When this value +/// is lower than 2, the sorting is done with 1 thread +//----------------------------------------------------------------------------- +template < class Iter_t, class Compare > +parallel_stable_sort< Iter_t, Compare > + ::parallel_stable_sort (Iter_t first, Iter_t last, Compare comp, + uint32_t nthread): nelem (0), ptr (nullptr) +{ + util::range< Iter_t > range_initial (first, last); + assert (range_initial.valid ( )); + + nelem = range_initial.size ( ); + size_t nptr = (nelem + 1) >> 1; + + if (nelem < nelem_min or nthread < 2) { + spin_sort< Iter_t, Compare > (range_initial.first, range_initial.last, + comp); + return; + }; + + //------------------- check if sort -------------------------------------- + bool sw = true; + for (Iter_t it1 = range_initial.first, it2 = range_initial.first + 1; + it2 != range_initial.last and (sw = not comp (*it2, *it1)); + it1 = it2++) + ; + if (sw) return; + + ptr = std::get_temporary_buffer< value_t > (nptr).first; + if (ptr == nullptr) throw std::bad_alloc ( ); + + //--------------------------------------------------------------------- + // Parallel Process + //--------------------------------------------------------------------- + range< Iter_t > range_first (range_initial.first, + range_initial.first + nptr); + + range< Iter_t > range_second (range_initial.first + nptr, + range_initial.last); + + range< value_t * > range_buffer (ptr, ptr + nptr); + + try + { + sample_sort< Iter_t, Compare > (range_initial.first, + range_initial.first + nptr, comp, + nthread, range_buffer); + } + catch (std::bad_alloc &) + { + destroy_all ( ); + throw std::bad_alloc ( ); + }; + + try + { + sample_sort< Iter_t, Compare > (range_initial.first + nptr, + range_initial.last, comp, nthread, + range_buffer); + } + catch (std::bad_alloc &) + { + destroy_all ( ); + throw std::bad_alloc ( ); + }; + + range_buffer = init_move (range_buffer, range_first); + range_initial = + half_merge (range_initial, range_buffer, range_second, comp); +}; // end of constructor + +// +//**************************************************************************** +}; // End namespace algorithm +}; // End namespace parallel +}; // End namespace sort +}; // End namespace boost +//**************************************************************************** +// +#endif diff --git a/include/boost/sort/parallel/detail/sample_sort.hpp b/include/boost/sort/parallel/detail/sample_sort.hpp new file mode 100755 index 0000000..81a34e4 --- /dev/null +++ b/include/boost/sort/parallel/detail/sample_sort.hpp @@ -0,0 +1,438 @@ +//---------------------------------------------------------------------------- +/// @file sample_sort.hpp +/// @brief contains the class sample_sort +/// +/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n +/// Distributed under the Boost Software License, Version 1.0.\n +/// ( See accompanying file LICENSE_1_0.txt or copy at +/// http://www.boost.org/LICENSE_1_0.txt ) +/// @version 0.1 +/// +/// @remarks +//----------------------------------------------------------------------------- +#ifndef __BOOST_SORT_PARALLEL_DETAIL_SAMPLE_SORT_HPP +#define __BOOST_SORT_PARALLEL_DETAIL_SAMPLE_SORT_HPP + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace boost +{ +namespace sort +{ +namespace parallel +{ +namespace detail +{ +// +///--------------------------------------------------------------------------- +/// @struct sample_sort +/// @brief This a structure for to implement a sample sort, exception +/// safe +/// @tparam +/// @remarks +//---------------------------------------------------------------------------- +template < class Iter_t, class Compare > +struct sample_sort +{ + //------------------------------------------------------------------------ + // DEFINITIONS + //------------------------------------------------------------------------ + typedef typename iterator_traits< Iter_t >::value_type value_t; + typedef range< Iter_t > range_it; + typedef range< value_t * > range_buf; + typedef sample_sort< Iter_t, Compare > this_t; + typedef spin_sort< Iter_t, Compare > spin_sort_t; + + //------------------------------------------------------------------------ + // VARIABLES AND CONSTANTS + //------------------------------------------------------------------------ + // minimun numbers of elements for to be sortd in parallel mode + static const uint32_t thread_min = (1 << 16); + + // Number of threads to use in the algorithm + // Number of intervals for to do the internal division of the data + uint32_t nthread, ninterval; + + // Bool variables indicating if the auxiliary memory is constructed + // and indicating in the auxiliary memory had been obtained inside the + /// algorithm or had been received as a parameter + bool construct = false, owner = false; + + // Comparison object for to compare two elements + Compare comp; + + // Range with all the elements to sort + range_it global_range; + + // range with the auxiliary memory + range_buf global_buf; + + // vector of futures + std::vector< std::future< void > > vfuture; + + // vector of vectors which contains the ranges to merge obtained in the + // subdivision + std::vector< std::vector< range_it > > vv_range_it; + + // each vector of ranges of the vv_range_it, need their corresponding buffer + // for to do the merge + std::vector< std::vector< range_buf > > vv_range_buf; + + // Initial vector of ranges + std::vector< range_it > vrange_it_ini; + + // Initial vector of buffers + std::vector< range_buf > vrange_buf_ini; + + // atomic counter for to know when are finished the function_t created + // inside a function + std::atomic< uint32_t > njob; + + // Indicate if an error in the algorithm for to undo all + bool error; + + //------------------------------------------------------------------------ + // FUNCTIONS OF THE STRUCT + //------------------------------------------------------------------------ + void initial_configuration (void); + + sample_sort (Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread, + value_t *paux, size_t naux); + + sample_sort (Iter_t first, Iter_t last) + : sample_sort (first, last, Compare ( ), + std::thread::hardware_concurrency ( ), nullptr, 0){}; + + sample_sort (Iter_t first, Iter_t last, Compare cmp) + : sample_sort (first, last, cmp, std::thread::hardware_concurrency ( ), + nullptr, 0){}; + + sample_sort (Iter_t first, Iter_t last, uint32_t num_thread) + : sample_sort (first, last, Compare ( ), num_thread, nullptr, 0){}; + + sample_sort (Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread) + : sample_sort (first, last, cmp, num_thread, nullptr, 0){}; + + sample_sort (Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread, + range_buf range_buf_initial) + : sample_sort (first, last, cmp, num_thread, range_buf_initial.first, + range_buf_initial.size ( )){}; + + void destroy_all (void); + // + //----------------------------------------------------------------------------- + // function :~sample_sort + /// @brief destructor of the class. The utility is to destroy the temporary + /// buffer used in the sorting process + //----------------------------------------------------------------------------- + ~sample_sort (void) { destroy_all ( ); }; + // + //----------------------------------------------------------------------- + // function : execute first + /// @brief this a function to assign to each thread in the first merge + //----------------------------------------------------------------------- + void execute_first (void) + { + uint32_t job = 0; + while ((job = util::atomic_add (njob, 1)) < ninterval) { + uninit_merge_level4 (vrange_buf_ini[job], vv_range_it[job], + vv_range_buf[job], comp); + }; + }; + // + //----------------------------------------------------------------------- + // function : execute + /// @brief this is a function to assignt each thread the final merge + //----------------------------------------------------------------------- + void execute (void) + { + uint32_t job = 0; + while ((job = util::atomic_add (njob, 1)) < ninterval) { + merge_vector4 (vrange_buf_ini[job], vrange_it_ini[job], + vv_range_buf[job], vv_range_it[job], comp); + }; + }; + // + //----------------------------------------------------------------------- + // function : first merge + /// @brief Implement the merge of the initially sparse ranges + //----------------------------------------------------------------------- + void first_merge (void) + { //---------------------------------- begin -------------------------- + njob = 0; + + for (uint32_t i = 0; i < nthread; ++i) { + vfuture[i] = + std::async (std::launch::async, &this_t::execute_first, this); + }; + for (uint32_t i = 0; i < nthread; ++i) vfuture[i].get ( ); + }; + // + //----------------------------------------------------------------------- + // function : final merge + /// @brief Implement the final merge of the ranges + //----------------------------------------------------------------------- + void final_merge (void) + { //---------------------------------- begin -------------------------- + njob = 0; + + for (uint32_t i = 0; i < nthread; ++i) { + vfuture[i] = + std::async (std::launch::async, &this_t::execute, this); + }; + for (uint32_t i = 0; i < nthread; ++i) vfuture[i].get ( ); + }; + + //---------------------------------------------------------------------------- +}; // End class sample_sort +//---------------------------------------------------------------------------- +// +//############################################################################ +// ## +// N O N I N L I N E F U N C T I O N S ## +// ## +// ## +//############################################################################ +// +//----------------------------------------------------------------------------- +// function : sample_sort +/// @brief constructor of the class +/// +/// @param first : iterator to the first element of the range to sort +/// @param last : iterator after the last element to the range to sort +/// @param cmp : object for to compare two elements pointed by Iter_t iterators +/// @param num_thread : Number of threads to use in the process. When this value +/// is lower than 2, the sorting is done with 1 thread +/// @param paux : pointer to the auxiliary memory. If nullptr, the memory is +/// created inside the class +/// @param naux : number of elements of the memory pointed by paux +//----------------------------------------------------------------------------- +template < class Iter_t, typename Compare > +sample_sort< Iter_t, Compare > + ::sample_sort (Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread, + value_t *paux, size_t naux) + : nthread (num_thread), owner (false), comp (cmp), + global_range (first, last), global_buf (nullptr, nullptr), error (false) +{ + assert ((last - first) >= 0); + size_t nelem = size_t (last - first); + construct = false; + njob = 0; + vfuture.resize (nthread); + + // Adjust when have many threads and only a few elements + while (nelem > thread_min and (nthread * nthread) > (nelem >> 3)) { + nthread /= 2; + }; + ninterval = (nthread << 3); + + if (nthread < 2 or nelem <= (thread_min)) { + spin_sort< Iter_t, Compare > (first, last, comp); + return; + }; + + //------------------- check if sort -------------------------------------- + bool sw = true; + for (Iter_t it1 = first, it2 = first + 1; + it2 != last and (sw = not comp (*it2, *it1)); it1 = it2++) + ; + if (sw) return; + + if (paux != nullptr) { + assert (naux != 0); + global_buf.first = paux; + global_buf.last = paux + naux; + owner = false; + } + else + { + value_t *ptr = std::get_temporary_buffer< value_t > (nelem).first; + if (ptr == nullptr) throw std::bad_alloc ( ); + owner = true; + global_buf = range_buf (ptr, ptr + nelem); + }; + //------------------------------------------------------------------------ + // PROCESS + //------------------------------------------------------------------------ + try + { + initial_configuration ( ); + } + catch (std::bad_alloc &) + { + error = true; + }; + if (not error) { + first_merge ( ); + construct = true; + final_merge ( ); + }; + if (error) { + destroy_all ( ); + throw std::bad_alloc ( ); + }; +}; +// +//----------------------------------------------------------------------------- +// function : destroy_all +/// @brief destructor of the class. The utility is to destroy the temporary +/// buffer used in the sorting process +//----------------------------------------------------------------------------- +template < class Iter_t, typename Compare > +void sample_sort< Iter_t, Compare >::destroy_all (void) +{ + if (construct) { + destroy (global_buf); + construct = false; + } + if (global_buf.first != nullptr and owner) + std::return_temporary_buffer (global_buf.first); +}; + +// +//----------------------------------------------------------------------------- +// function : initial_configuration +/// @brief Create the internal data structures, and obtain the inital set of +/// ranges to merge +//----------------------------------------------------------------------------- +template < class Iter_t, typename Compare > +void sample_sort< Iter_t, Compare >::initial_configuration (void) +{ + std::vector< range_it > vmem_thread; + std::vector< range_buf > vbuf_thread; + size_t nelem = global_range.size ( ); + + //------------------------------------------------------------------------ + size_t cupo = nelem / nthread; + Iter_t it_first = global_range.first; + value_t *buf_first = global_buf.first; + vmem_thread.reserve (nthread + 1); + vbuf_thread.reserve (nthread + 1); + + for (uint32_t i = 0; i < (nthread - 1); + ++i, it_first += cupo, buf_first += cupo) + { + vmem_thread.emplace_back (it_first, it_first + cupo); + vbuf_thread.emplace_back (buf_first, buf_first + cupo); + }; + + vmem_thread.emplace_back (it_first, global_range.last); + vbuf_thread.emplace_back (buf_first, global_buf.last); + + //------------------------------------------------------------------------ + // Sorting of the ranges + //------------------------------------------------------------------------ + std::vector< std::future< void > > vfuture (nthread); + + for (uint32_t i = 0; i < nthread; ++i) { + auto func = [=]( ) { + spin_sort_t (vmem_thread[i].first, vmem_thread[i].last, comp, + vbuf_thread[i]); + }; + vfuture[i] = std::async (std::launch::async, func); + }; + + for (uint32_t i = 0; i < nthread; ++i) vfuture[i].get ( ); + + //------------------------------------------------------------------------ + // Obtain the vector of milestones + //------------------------------------------------------------------------ + std::vector< Iter_t > vsample; + vsample.reserve (nthread * (ninterval - 1)); + + for (uint32_t i = 0; i < nthread; ++i) { + size_t distance = vmem_thread[i].size ( ) / ninterval; + for (size_t j = 1, pos = distance; j < ninterval; ++j, pos += distance) + { + vsample.push_back (vmem_thread[i].first + pos); + }; + }; + typedef less_ptr_no_null< Iter_t, Compare > compare_ptr; + typedef typename std::vector< Iter_t >::iterator it_to_it; + + spin_sort< it_to_it, compare_ptr > (vsample.begin ( ), vsample.end ( ), + compare_ptr (comp)); + + //------------------------------------------------------------------------ + // Create the final milestone vector + //------------------------------------------------------------------------ + std::vector< Iter_t > vmilestone; + vmilestone.reserve (ninterval); + + for (uint32_t pos = nthread >> 1; pos < vsample.size ( ); pos += nthread) { + vmilestone.push_back (vsample[pos]); + }; + + //------------------------------------------------------------------------ + // Creation of the first vector of ranges + //------------------------------------------------------------------------ + std::vector< std::vector< range< Iter_t > > > vv_range_first (nthread); + + for (uint32_t i = 0; i < nthread; ++i) { + Iter_t itaux = vmem_thread[i].first; + + for (uint32_t k = 0; k < (ninterval - 1); ++k) { + Iter_t it2 = std::upper_bound (itaux, vmem_thread[i].last, + *vmilestone[k], comp); + + vv_range_first[i].emplace_back (itaux, it2); + itaux = it2; + }; + vv_range_first[i].emplace_back (itaux, vmem_thread[i].last); + }; + + //------------------------------------------------------------------------ + // Copy in buffer and creation of the final matrix of ranges + //------------------------------------------------------------------------ + vv_range_it.resize (ninterval); + vv_range_buf.resize (ninterval); + vrange_it_ini.reserve (ninterval); + vrange_buf_ini.reserve (ninterval); + + for (uint32_t i = 0; i < ninterval; ++i) { + vv_range_it[i].reserve (nthread); + vv_range_buf[i].reserve (nthread); + }; + + Iter_t it = global_range.first; + value_t *it_buf = global_buf.first; + + for (uint32_t k = 0; k < ninterval; ++k) { + size_t nelem_interval = 0; + + for (uint32_t i = 0; i < nthread; ++i) { + size_t nelem_range = vv_range_first[i][k].size ( ); + if (nelem_range != 0) { + vv_range_it[k].push_back (vv_range_first[i][k]); + }; + nelem_interval += nelem_range; + }; + + vrange_it_ini.emplace_back (it, it + nelem_interval); + vrange_buf_ini.emplace_back (it_buf, it_buf + nelem_interval); + + it += nelem_interval; + it_buf += nelem_interval; + }; +}; +// +//**************************************************************************** +}; // End namespace detail +}; // End namespace parallel +}; // End namespace sort +}; // End namespace boost +//**************************************************************************** +// +#endif diff --git a/include/boost/sort/parallel/detail/select_block_indirect.hpp b/include/boost/sort/parallel/detail/select_block_indirect.hpp new file mode 100755 index 0000000..229eea4 --- /dev/null +++ b/include/boost/sort/parallel/detail/select_block_indirect.hpp @@ -0,0 +1,78 @@ +//---------------------------------------------------------------------------- +/// @file select_block_indirect.hpp +/// @brief block indirect sort algorithm +/// +/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n +/// Distributed under the Boost Software License, Version 1.0.\n +/// ( See accompanying file LICENSE_1_0.txt or copy at +/// http://www.boost.org/LICENSE_1_0.txt ) +/// @version 0.1 +/// +/// @remarks +//----------------------------------------------------------------------------- +#ifndef __BOOST_SORT_PARALLEL_DETAIL_SELECT_BLOCK_INDIRECT_HPP +#define __BOOST_SORT_PARALLEL_DETAIL_SELECT_BLOCK_INDIRECT_HPP + +#include +#include +#include + +namespace boost +{ +namespace sort +{ +namespace parallel +{ +namespace detail +{ +using boost::sort::parallel::detail::util::tmsb; +// +///--------------------------------------------------------------------------- +// function select_block_indirect +/// @brief This class is select the block size in the block_indirect_sort +/// algorithm depending of the type and size of the data to sort +/// +//---------------------------------------------------------------------------- +template < class Iter_t, class Compare, + util::enable_if_string< util::value_iter< Iter_t > > * = nullptr > +void select_block_indirect (Iter_t first, Iter_t last, Compare cmp, + uint32_t nthr = std::thread::hardware_concurrency ()) +{ + block_indirect_sort< 128, 128, Iter_t, Compare > (first, last, cmp, nthr); +}; + +template < size_t Size > +struct block_size +{ + static constexpr const uint32_t BitsSize = + (Size == 0) ? 0 : (Size > 256) ? 9 : tmsb[Size - 1]; + static constexpr const uint32_t sz[10] = {4096, 4096, 4096, 4096, 2048, + 1024, 768, 512, 256, 128}; + static constexpr const uint32_t data = sz[BitsSize]; +}; +// +///--------------------------------------------------------------------------- +/// @struct select_block_indirect_sort +/// @brief This class is select the block size in the block_indirect_sort +/// algorithm depending of the type and size of the data to sort +/// +//---------------------------------------------------------------------------- +template < class Iter_t, class Compare, + util::enable_if_not_string< util::value_iter< Iter_t > >* = nullptr > + +void select_block_indirect (Iter_t first, Iter_t last, Compare cmp, + uint32_t nthr = std::thread::hardware_concurrency ()) +{ + block_indirect_sort) >::data, + 28, Iter_t, Compare > (first, last, cmp, nthr); +}; + +// +//**************************************************************************** +}; // End namespace detail +}; // End namespace parallel +}; // End namespace sort +}; // End namespace boost +//**************************************************************************** +// +#endif diff --git a/include/boost/sort/parallel/detail/spin_sort.hpp b/include/boost/sort/parallel/detail/spin_sort.hpp new file mode 100755 index 0000000..63736af --- /dev/null +++ b/include/boost/sort/parallel/detail/spin_sort.hpp @@ -0,0 +1,249 @@ +//---------------------------------------------------------------------------- +/// @file spin_sort.hpp +/// @brief Spin Sort algorithm +/// +/// @author Copyright (c) 2016 Francisco José Tapia (fjtapia@gmail.com )\n +/// Distributed under the Boost Software License, Version 1.0.\n +/// ( See accompanying file LICENSE_1_0.txt or copy at +/// http://www.boost.org/LICENSE_1_0.txt ) +/// @version 0.1 +/// +/// @remarks +//----------------------------------------------------------------------------- +#ifndef __BOOST_SORT_PARALLEL_ALGORITHM_SPIN_SORT_HPP +#define __BOOST_SORT_PARALLEL_ALGORITHM_SPIN_SORT_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace boost +{ +namespace sort +{ +namespace parallel +{ +namespace detail +{ +// +//---------------------------------------------------------------------------- +// USING SENTENCES +//---------------------------------------------------------------------------- +using util::range; +using util::nbits64; +using std::iterator_traits; + +//----------------------------------------------------------------------------- +// function : range_sort +/// @brief this function divide r_input in two parts, sort it,and merge moving +/// the elements to range_buf +/// @param range_input : range with the elements to sort +/// @param range_buffer : range with the elements sorted +/// @param comp : object for to compare two elements +/// @param level : when is 1, sort with the insertion_sort algorithm +/// if not make a recursive call splitting the ranges +//----------------------------------------------------------------------------- +template < class Iter1_t, class Iter2_t, class Compare > +void range_sort (const range< Iter1_t > &range_input, + const range< Iter2_t > &range_buffer, Compare comp, + uint32_t level) +{ + typedef range< Iter1_t > range_it1; + typedef range< Iter2_t > range_it2; + assert (range_input.size ( ) == range_buffer.size ( ) and level != 0); + + size_t nelem1 = (range_input.size ( ) + 1) >> 1; + range_it1 range_input1 (range_input.first, range_input.first + nelem1), + range_input2 (range_input.first + nelem1, range_input.last); + + if (level < 2) { + insertion_sort (range_input1.first, range_input1.last, comp); + insertion_sort (range_input2.first, range_input2.last, comp); + } + else + { + range_sort (range_it2 (range_buffer.first, range_buffer.first + nelem1), + range_input1, comp, level - 1); + + range_sort (range_it2 (range_buffer.first + nelem1, range_buffer.last), + range_input2, comp, level - 1); + }; + + full_merge (range_buffer, range_input1, range_input2, comp); +}; + +//--------------------------------------------------------------------------- +/// @struct spin_sort +/// @brief This class implement s stable sort algorithm with 1 thread, with +/// an auxiliary memory of N/2 elements +//---------------------------------------------------------------------------- +template < class Iter_t, typename Compare = util::compare_iter< Iter_t > > +class spin_sort +{ + //------------------------------------------------------------------------ + // DEFINITIONS AND CONSTANTS + //------------------------------------------------------------------------ + typedef typename iterator_traits< Iter_t >::value_type value_t; + typedef range< Iter_t > range_it; + typedef range< value_t * > range_buf; + // When the number of elements to sort is smaller than Sort_min, are sorted + // by the insertion sort algorithm + static const uint32_t Sort_min = 36; + + //------------------------------------------------------------------------ + // VARIABLES + //------------------------------------------------------------------------ + // Pointer to the auxiliary memory + value_t *ptr; + + // Number of elements in the auxiliary memory + size_t nptr; + + // construct indicate if the auxiliary memory in initialized, and owner + // indicate if the auxiliary memory had been created inside the object or + // had + // been received as a parameter + bool construct = false, owner = false; + + //------------------------------------------------------------------------ + // PRIVATE FUNCTIONS + //------------------------------------------------------------------------- + spin_sort (Iter_t first, Iter_t last, Compare comp, value_t *paux, + size_t naux); + +public: + //------------------------------------------------------------------------ + // PUBLIC FUNCTIONS + //------------------------------------------------------------------------- + spin_sort (Iter_t first, Iter_t last, Compare comp = Compare ( )) + : spin_sort (first, last, comp, nullptr, 0){}; + + spin_sort (Iter_t first, Iter_t last, Compare comp, range_buf range_aux) + : spin_sort (first, last, comp, range_aux.first, range_aux.size ( )){}; + // + //----------------------------------------------------------------------- + // function :~spin_sort + /// @brief destructor of the struct. Destroy the elements if construct is + /// true, + /// and return the memory if owner is true + //----------------------------------------------------------------------- + ~spin_sort (void) + { + if (construct) { + destroy (range< value_t * > (ptr, ptr + nptr)); + construct = false; + }; + if (owner and ptr != nullptr) std::return_temporary_buffer (ptr); + }; +}; // End of class spin_sort +// +//############################################################################ +// ## +// N O N I N L I N E F U N C T I O N S ## +// ## +// ## +//############################################################################ +// +//----------------------------------------------------------------------------- +// function : spin_sort +/// @brief constructor of the struct +// +/// @param first : iterator to the first element of the range to sort +/// @param last : iterator after the last element to the range to sort +/// @param comp : object for to compare two elements pointed by Iter_t +/// iterators +/// @param paux : pointer to the auxiliary memory provided. If nullptr, the +/// memory is created inside the class +/// @param naux : number of elements pointed by paux +//----------------------------------------------------------------------------- +template < class Iter_t, class Compare > +spin_sort< Iter_t, Compare > + ::spin_sort (Iter_t first, Iter_t last, Compare comp, value_t *paux, + size_t naux) + : ptr (paux), nptr (naux), construct (false), owner (false) +{ + range< Iter_t > range_input (first, last); + assert (range_input.valid ( )); + + size_t nelem = range_input.size ( ); + owner = construct = false; + + nptr = (nelem + 1) >> 1; + size_t nelem_1 = nptr; + size_t nelem_2 = nelem - nelem_1; + + if (nelem <= (Sort_min << 1)) { + insertion_sort (range_input.first, range_input.last, comp); + return; + }; + //------------------- check if sort -------------------------------------- + bool sw = true; + for (Iter_t it1 = range_input.first, it2 = range_input.first + 1; + it2 != range_input.last and (sw = not comp (*it2, *it1)); it1 = it2++) + ; + if (sw) return; + + if (ptr == nullptr) { + ptr = std::get_temporary_buffer< value_t > (nptr).first; + if (ptr == nullptr) throw std::bad_alloc ( ); + owner = true; + }; + range_buf range_aux (ptr, (ptr + nptr)); + + //------------------------------------------------------------------------ + // Process + //------------------------------------------------------------------------ + uint32_t nlevel = nbits64 (((nelem + Sort_min - 1) / Sort_min) - 1) - 1; + assert (nlevel != 0); + + if ((nlevel & 1) == 1) { + //--------------------------------------------------------------------- + // if the number of levels is odd, the data are in the first parameter + // of range_sort, and the results appear in the second parameter + //--------------------------------------------------------------------- + range_it range_1 (first, first + nelem_2), + range_2 (first + nelem_2, last); + range_aux = uninit_move (range_aux, range_2); + construct = true; + + range_sort (range_aux, range_2, comp, nlevel); + range_buf rng_bx (range_aux.first, range_aux.first + nelem_2); + + range_sort (range_1, rng_bx, comp, nlevel); + half_merge (range_input, rng_bx, range_2, comp); + } + else + { + //-------------------------------------------------------------------- + // If the number of levels is even, the data are in the second + // parameter of range_sort, and the results are in the same parameter + //--------------------------------------------------------------------- + range_it range_1 (first, first + nelem_1), + range_2 (first + nelem_1, last); + range_aux = uninit_move (range_aux, range_1); + construct = true; + + range_sort (range_1, range_aux, comp, nlevel); + + range_1.last = range_1.first + range_2.size ( ); + range_sort (range_1, range_2, comp, nlevel); + half_merge (range_input, range_aux, range_2, comp); + }; +}; + +//**************************************************************************** +}; // End namespace algorithm +}; // End namespace parallel +}; // End namespace sort +}; // End namepspace boost +//**************************************************************************** +// +#endif diff --git a/include/boost/sort/parallel/detail/util/atomic.hpp b/include/boost/sort/parallel/detail/util/atomic.hpp new file mode 100755 index 0000000..9807d32 --- /dev/null +++ b/include/boost/sort/parallel/detail/util/atomic.hpp @@ -0,0 +1,93 @@ +//---------------------------------------------------------------------------- +/// @file atomic.hpp +/// @brief Basic layer for to simplify the use of atomic functions +/// @author Copyright(c) 2016 Francisco José Tapia (fjtapia@gmail.com )\n +/// Distributed under the Boost Software License, Version 1.0.\n +/// ( See accompanying file LICENSE_1_0.txt or copy at +/// http://www.boost.org/LICENSE_1_0.txt ) +/// @version 0.1 +/// +/// @remarks +//----------------------------------------------------------------------------- +#ifndef __BOOST_SORT_PARALLEL_DETAIL_UTIL_ATOMIC_HPP +#define __BOOST_SORT_PARALLEL_DETAIL_UTIL_ATOMIC_HPP + +#include +#include +#include + +namespace boost +{ +namespace sort +{ +namespace parallel +{ +namespace detail +{ +namespace util +{ +// +//----------------------------------------------------------------------------- +// function : atomic_read +/// @brief make the atomic read of an atomic variable, using a memory model +/// @param at_var : atomic variable to read +/// @return value obtained +//----------------------------------------------------------------------------- +template < typename T > +inline T atomic_read (std::atomic< T > &at_var) +{ + return std::atomic_load_explicit< T > (&at_var, std::memory_order_acquire); +}; +// +//----------------------------------------------------------------------------- +// function : atomic_add +/// @brief Add a number to an atomic variable, using a memory model +/// @param at_var : variable to add +/// @param num : value to add to at_var +/// @return result of the operation +//----------------------------------------------------------------------------- +template < typename T, typename T2 > +inline T atomic_add (std::atomic< T > &at_var, T2 num) +{ + static_assert (std::is_integral< T2 >::value, "Bad parameter"); + return std::atomic_fetch_add_explicit< T > (&at_var, (T)num, + std::memory_order_acq_rel); +}; +// +//----------------------------------------------------------------------------- +// function : atomic_sub +/// @brief Atomic subtract of an atomic variable using memory model +/// @param at_var : Varibale to subtract +/// @param num : value to sub to at_var +/// @return result of the operation +//----------------------------------------------------------------------------- +template < typename T, typename T2 > +inline T atomic_sub (std::atomic< T > &at_var, T2 num) +{ + static_assert (std::is_integral< T2 >::value, "Bad parameter"); + return std::atomic_fetch_sub_explicit< T > (&at_var, (T)num, + std::memory_order_acq_rel); +}; +// +//----------------------------------------------------------------------------- +// function : atomic_write +/// @brief Write a value in an atomic variable using memory model +/// @param at_var : varible to write +/// @param num : value to write in at_var +//----------------------------------------------------------------------------- +template < typename T, typename T2 > +inline void atomic_write (std::atomic< T > &at_var, T2 num) +{ + static_assert (std::is_integral< T2 >::value, "Bad parameter"); + std::atomic_store_explicit< T > (&at_var, (T)num, + std::memory_order_release); +}; + +//**************************************************************************** +}; // End namespace util +}; // End namespace detail +}; // End namespace parallel +}; // End namespace sort +}; // End namespace boost +//**************************************************************************** +#endif diff --git a/include/boost/sort/parallel/detail/util/compare_traits.hpp b/include/boost/sort/parallel/detail/util/compare_traits.hpp new file mode 100755 index 0000000..68fd55f --- /dev/null +++ b/include/boost/sort/parallel/detail/util/compare_traits.hpp @@ -0,0 +1,99 @@ +//---------------------------------------------------------------------------- +/// @file compare_traits.hpp +/// @brief this file contains the metaprogramming classes compare_iter and +/// enable_if_not_integral +/// @author Copyright(c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n +/// Distributed under the Boost Software License, Version 1.0.\n +/// ( See accompanying file LICENSE_1_0.txt or copy at +/// http://www.boost.org/LICENSE_1_0.txt ) +/// @version 0.1 +/// +//----------------------------------------------------------------------------- +#ifndef __BOOST_SORT_PARALLEL_DETAIL_UTIL_COMPARE_TRAITS_HPP +#define __BOOST_SORT_PARALLEL_DETAIL_UTIL_COMPARE_TRAITS_HPP + +#include +#include +#include + +namespace boost +{ +namespace sort +{ +namespace parallel +{ +namespace detail +{ +namespace util +{ +//---------------------------------------------------------------------------- +// USING SENTENCES +//---------------------------------------------------------------------------- +using std::iterator_traits; + +//--------------------------------------------------------------------------- +/// @class compare_iter +/// @brief From the iterator, received as template parameter, obtain the type +/// of the object pointed by the iterator, and with this define the +/// std::less with this type obtained +/// @remarks The main utility of this, is simplify the default template +/// parameter of comparison +//--------------------------------------------------------------------------- +template < class iter_t > +using compare_iter = + std::less< typename iterator_traits< iter_t >::value_type >; +// +//--------------------------------------------------------------------------- +/// @class value_iter +/// @brief From the iterator, obtain the type pointed by it +/// @remarks The main utility of this, is simplify the default template +/// parameter of comparison +//--------------------------------------------------------------------------- +template < class iter_t > +using value_iter = typename iterator_traits< iter_t >::value_type ; + + +// +//--------------------------------------------------------------------------- +/// @class enable_if_not_integral +/// @brief This is a SFINAE class for to detect if the third parameter in the +/// invocation of the parallel sorting algorithms is an integer +/// representing the number of threads to use or is a comparison object +/// @remarks +//--------------------------------------------------------------------------- +template < class T > +using enable_if_not_integral = + typename std::enable_if< !std::is_integral< T >::value >::type; + +// +//--------------------------------------------------------------------------- +/// @class enable_if_string +/// @brief This is a SFINAE class for to detect if the parameter is a +/// std::string for to apply specialized parameters in the invocation +/// of the block_indirect_sort algorithm +/// @remarks +//--------------------------------------------------------------------------- +template < class T > +using enable_if_string = + typename std::enable_if< std::is_same< T, std::string >::value >::type; + +// +//--------------------------------------------------------------------------- +/// @class enable_if_not_string +/// @brief This is a SFINAE class for to detect if the parameter is a +/// std::string for to apply specialized parameters in the invocation +/// of the block_indirect_sort algorithm +/// @remarks +//--------------------------------------------------------------------------- +template < class T > +using enable_if_not_string = + typename std::enable_if::value >::type; +// +//**************************************************************************** +}; // End namespace util +}; // End namespace detail +}; // End namespace parallel +}; // End namespace sort +}; // End namespace boost +//**************************************************************************** +#endif diff --git a/include/boost/sort/parallel/detail/util/low_level.hpp b/include/boost/sort/parallel/detail/util/low_level.hpp new file mode 100755 index 0000000..c678396 --- /dev/null +++ b/include/boost/sort/parallel/detail/util/low_level.hpp @@ -0,0 +1,326 @@ +//---------------------------------------------------------------------------- +/// @file low_level.hpp +/// @brief low level functions of create, destroy, move and merge functions +/// +/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n +/// Distributed under the Boost Software License, Version 1.0.\n +/// ( See accompanying file LICENSE_1_0.txt or copy at +/// http://www.boost.org/LICENSE_1_0.txt ) +/// @version 0.1 +/// +/// @remarks +//----------------------------------------------------------------------------- +#ifndef __BOOST_SORT_PARALLEL_DETAIL_UTIL_LOW_LEVEL_HPP +#define __BOOST_SORT_PARALLEL_DETAIL_UTIL_LOW_LEVEL_HPP + +#include +#include +#include +#include +#include +#include + +namespace boost +{ +namespace sort +{ +namespace parallel +{ +namespace detail +{ +namespace util +{ +//--------------------------------------------------------------------------- +// USING SENTENCES +//--------------------------------------------------------------------------- +using std::iterator_traits; + +// +//----------------------------------------------------------------------------- +// function : construct +/// @brief create an object in the memory specified by ptr +/// +/// @param ptr : pointer to the memory where to create the object +/// @param args : arguments to the constructor +//----------------------------------------------------------------------------- +template < class Value_t, class... Args > +inline void construct (Value_t *ptr, Args &&... args) +{ + (::new (static_cast< void * > (ptr)) + Value_t (std::forward< Args > (args)...)); +}; +// +//----------------------------------------------------------------------------- +// function : destroy_object +/// @brief destroy an object in the memory specified by ptr +/// @param ptr : pointer to the object to destroy +//----------------------------------------------------------------------------- +template < class Value_t > +inline void destroy_object (Value_t *ptr) +{ + ptr->~Value_t ( ); +}; + +// +//----------------------------------------------------------------------------- +// function : init +/// @brief initialize a range of objects with the object val moving across them +/// +/// @param first : itertor to the first element to initialize +/// @param last : iterator to the last element to initialize +/// @param val : object used for the initialization +//----------------------------------------------------------------------------- +template < class Iter_t > +void init (Iter_t first, Iter_t last, + typename iterator_traits< Iter_t >::value_type &val) +{ + if (first == last) return; + construct (&(*first), std::move (val)); + + Iter_t it1 = first, it2 = first + 1; + while (it2 != last) { + construct (&(*(it2++)), std::move (*(it1++))); + }; + val = std::move (*(last - 1)); +}; +// +//----------------------------------------------------------------------------- +// function : init_move +/// @brief Move initialized objets +/// @param it_dest : iterator to the final place of the objects +/// @param first : iterator to the first element to move +/// @param last : iterator to the last element to move +//----------------------------------------------------------------------------- +template < class Iter1_t, class Iter2_t > +Iter2_t init_move (Iter2_t it_dest, Iter1_t first, Iter1_t last) +{ + while (first != last) *(it_dest++) = std::move (*(first++)); + return it_dest; +}; +// +//----------------------------------------------------------------------------- +// function : uninit_move +/// @brief Move objets to uninitialized memory +/// +/// @param ptr : pointer to the memory where to create the objects +/// @param first : iterator to the first element to move +/// @param last : iterator to the last element to move +//----------------------------------------------------------------------------- +template < class Iter_t, + class Value_t = typename iterator_traits< Iter_t >::value_type > +Value_t *uninit_move (Value_t *ptr, Iter_t first, Iter_t last) +{ + typedef typename iterator_traits< Iter_t >::value_type value2_t; + + static_assert (std::is_same< Value_t, value2_t >::value, + "Incompatible iterators\n"); + + while (first != last) { + ::new (static_cast< void * > (ptr++)) Value_t (std::move (*(first++))); + }; + return ptr; +}; +// +//----------------------------------------------------------------------------- +// function : destroy +/// @brief destroy the elements between first and last +/// @param first : iterator to the first element to destroy +/// @param last : iterator to the last element to destroy +//----------------------------------------------------------------------------- +template < class Iter_t > +void destroy (Iter_t first, const Iter_t last) +{ + typedef typename iterator_traits< Iter_t >::value_type value_t; + while (first != last) { + (&(*(first++)))->~value_t ( ); + }; +}; +// +//----------------------------------------------------------------------------- +// function : full_merge +/// @brief Merge two contiguous buffers pointed by buf1 and buf2, and put +/// in the buffer pointed by buf_out +/// +/// @param buf1 : iterator to the first element in the first buffer +/// @param end_buf1 : final iterator of first buffer +/// @param buf2 : iterator to the first iterator to the second buffer +/// @param end_buf2 : final iterator of the second buffer +/// @param buf_out : buffer where move the elements merged +/// @param comp : comparison object +//----------------------------------------------------------------------------- +template < class Iter1_t, class Iter2_t, class Compare > +Iter2_t full_merge (Iter1_t buf1, const Iter1_t end_buf1, Iter1_t buf2, + const Iter1_t end_buf2, Iter2_t buf_out, Compare comp) +{ + typedef typename iterator_traits< Iter1_t >::value_type value1_t; + typedef typename iterator_traits< Iter2_t >::value_type value2_t; + static_assert (std::is_same< value1_t, value2_t >::value, + "Incompatible iterators\n"); + + + while ((buf1 != end_buf1) and (buf2 != end_buf2)) { + *(buf_out++) = (not comp (*buf2, *buf1)) ? std::move (*(buf1++)) + : std::move (*(buf2++)); + }; + return (buf1 == end_buf1) ? init_move (buf_out, buf2, end_buf2) + : init_move (buf_out, buf1, end_buf1); +}; +// +//----------------------------------------------------------------------------- +// function : uninit_full_merge +/// @brief Merge two contiguous buffers pointed by first1 and first2, and put +/// in the uninitialized buffer pointed by it_out +/// +/// @param first1 : iterator to the first element in the first buffer +/// @param last1 : last iterator of the first buffer +/// @param first2 : iterator to the first element to the second buffer +/// @param last2 : final iterator of the second buffer +/// @param it_out : uninitialized buffer where move the elements merged +/// @param comp : comparison object +//----------------------------------------------------------------------------- +template < class Iter_t, class Value_t, class Compare > +Value_t *uninit_full_merge (Iter_t first1, const Iter_t last1, Iter_t first2, + const Iter_t last2, Value_t *it_out, Compare comp) +{ + typedef typename iterator_traits< Iter_t >::value_type type1; + static_assert (std::is_same< Value_t, type1 >::value, + "Incompatible iterators\n"); + + while (first1 != last1 and first2 != last2) { + construct ((it_out++), (not comp (*first2, *first1)) + ? std::move (*(first1++)) + : std::move (*(first2++))); + }; + return (first1 == last1) ? uninit_move (it_out, first2, last2) + : uninit_move (it_out, first1, last1); +}; +// +//--------------------------------------------------------------------------- +// function : half_merge +/// @brief : Merge two buffers. The first buffer is in a separate memory. +/// The second buffer have a empty space before buf2 of the same size +/// than the (end_buf1 - buf1) +/// +/// @param buf1 : iterator to the first element of the first buffer +/// @param end_buf1 : iterator to the last element of the first buffer +/// @param buf2 : iterator to the first element of the second buffer +/// @param end_buf2 : iterator to the last element of the second buffer +/// @param buf_out : iterator to the first element to the buffer where put +/// the result +/// @param comp : object for Compare two elements of the type pointed +/// by the Iter1_t and Iter2_t +//--------------------------------------------------------------------------- +template < class Iter1_t, class Iter2_t, class Compare > +Iter2_t half_merge (Iter1_t buf1, const Iter1_t end_buf1, Iter2_t buf2, + const Iter2_t end_buf2, Iter2_t buf_out, Compare comp) +{ + typedef typename iterator_traits< Iter1_t >::value_type value1_t; + typedef typename iterator_traits< Iter2_t >::value_type value2_t; + static_assert (std::is_same< value1_t, value2_t >::value, + "Incompatible iterators\n"); + + while ((buf1 != end_buf1) and (buf2 != end_buf2)) { + *(buf_out++) = (not comp (*buf2, *buf1)) ? std::move (*(buf1++)) + : std::move (*(buf2++)); + }; + return (buf2 == end_buf2) ? init_move (buf_out, buf1, end_buf1) : end_buf2; +}; +// +//----------------------------------------------------------------------------- +// function : in_place_merge_uncontiguous +/// @brief : merge two uncontiguous buffers, placing the results in the buffers +/// Use an auxiliary buffer pointed by aux +/// +/// @param src1 : iterator to the first element of the first buffer +/// @param end_src1 : last iterator of the first buffer +/// @param src2 : iterator to the first element of the second buffer +/// @param end_src2 : last iterator of the second buffer +/// @param aux : iterator to the first element of the auxiliary buffer +/// @param comp : object for to Compare elements +/// @return true : not changes done, false : changes in the buffers +/// @remarks +//----------------------------------------------------------------------------- +template < class Iter1_t, class Iter2_t, class Iter3_t, class Compare > +bool in_place_merge_uncontiguous (Iter1_t src1, const Iter1_t end_src1, + Iter2_t src2, const Iter2_t end_src2, + Iter3_t aux, Compare comp) +{ + typedef typename iterator_traits< Iter1_t >::value_type type1; + typedef typename iterator_traits< Iter2_t >::value_type type2; + typedef typename iterator_traits< Iter3_t >::value_type type3; + static_assert (std::is_same< type1, type2 >::value, + "Incompatible iterators\n"); + static_assert (std::is_same< type3, type2 >::value, + "Incompatible iterators\n"); + + if (src1 == end_src1 or src2 == end_src2 or + not comp (*src2, *(end_src1 - 1))) + return true; + + while (src1 != end_src1 and not comp (*src2, *src1)) ++src1; + + Iter3_t const end_aux = aux + (end_src1 - src1); + Iter2_t src2_first = src2; + init_move (aux, src1, end_src1); + + while ((src1 != end_src1) and (src2 != end_src2)) { + *(src1++) = std::move ((not comp (*src2, *aux)) ? *(aux++) : *(src2++)); + } + + if (src2 == end_src2) { + while (src1 != end_src1) *(src1++) = std::move (*(aux++)); + init_move (src2_first, aux, end_aux); + } + else + { + half_merge (aux, end_aux, src2, end_src2, src2_first, comp); + }; + return false; +}; + +// +//----------------------------------------------------------------------------- +// function : in_place_merge +/// @brief : merge two contiguous buffers,using an auxiliary buffer pointed +/// by buf. The results are in src1 and src2 +/// +/// @param src1: iterator to the first position of the first buffer +/// @param src2: final iterator of the first buffer and first iterator +/// of the second buffer +/// @param end_src2 : final iterator of the second buffer +/// @param buf : iterator to buffer used as auxiliary memory +/// @param comp : object for to Compare elements +/// @return true : not changes done, false : changes in the buffers +//----------------------------------------------------------------------------- +template < class Iter1_t, class Iter2_t, class Compare > +bool in_place_merge (Iter1_t src1, Iter1_t src2, Iter1_t end_src2, Iter2_t buf, + Compare comp) +{ + typedef typename iterator_traits< Iter1_t >::value_type type1; + typedef typename iterator_traits< Iter2_t >::value_type type2; + static_assert (std::is_same< type1, type2 >::value, + "Incompatible iterators\n"); + + if (src1 == src2 or src2 == end_src2 or not comp (*src2, *(src2 - 1))) + return true; + + Iter1_t end_src1 = src2; + while (src1 != end_src1 and not comp (*src2, *src1)) ++src1; + + if (src1 == end_src1) return false; + + size_t nx = end_src1 - src1; + init_move (buf, src1, end_src1); + half_merge (buf, buf + nx, src2, end_src2, src1, comp); + return false; +}; +// +//**************************************************************************** +}; // End namespace util +}; // End namespave detail +}; // End namespace parallel +}; // End namespace sort +}; // End namespace boost +//**************************************************************************** +// +#endif diff --git a/include/boost/sort/parallel/detail/util/merge_four.hpp b/include/boost/sort/parallel/detail/util/merge_four.hpp new file mode 100755 index 0000000..e932a2c --- /dev/null +++ b/include/boost/sort/parallel/detail/util/merge_four.hpp @@ -0,0 +1,316 @@ +//---------------------------------------------------------------------------- +/// @file merge_four.hpp +/// @brief This file have the functions for to merge 4 buffers +/// +/// @author Copyright (c) 2016 Francisco José Tapia (fjtapia@gmail.com )\n +/// Distributed under the Boost Software License, Version 1.0.\n +/// ( See accompanying file LICENSE_1_0.txt or copy at +/// http://www.boost.org/LICENSE_1_0.txt ) +/// @version 0.1 +/// +/// @remarks +//----------------------------------------------------------------------------- +#ifndef __BOOST_SORT_PARALLEL_DETAIL_UTIL_MERGE_FOUR_HPP +#define __BOOST_SORT_PARALLEL_DETAIL_UTIL_MERGE_FOUR_HPP + +#include +#include +#include +#include +#include +#include +#include + +namespace boost +{ +namespace sort +{ +namespace parallel +{ +namespace detail +{ +namespace util +{ +// +//############################################################################ +// ## +// F U S I O N O F ## +// ## +// F O U R E L E M E N T S R A N G E ## +// ## +//############################################################################ +// +using std::iterator_traits; + +//----------------------------------------------------------------------------- +// function : less_range +/// @brief Compare the elements pointed by it1 and it2, and if they +/// are equals, compare their position, doing a stable comparison +/// +/// @param it1 : iterator to the first element +/// @param pos1 : position of the object pointed by it1 +/// @param it2 : iterator to the second element +/// @param pos2 : position of the element pointed by it2 +/// @param comp : comparison object +/// @return result of the comparison +//----------------------------------------------------------------------------- +template < class Iter_t, + class Compare = typename compare_iter< Iter_t >::value_type > +inline bool less_range (Iter_t it1, uint32_t pos1, Iter_t it2, uint32_t pos2, + Compare comp = Compare ( )) +{ + return (comp (*it1, *it2)) ? true : (pos2 < pos1) ? false + : not(comp (*it2, *it1)); +}; + +//----------------------------------------------------------------------------- +// function : full_merge4 +/// @brief Merge four ranges +/// +/// @param dest: range where move the elements merged. Their size must be +/// greater or equal than the sum of the sizes of the ranges +/// in vrange_input +/// @param vrange_input : array of ranges to merge +/// @param nrange_input : number of ranges in vrange_input +/// @param comp : comparison object +/// @return range with all the elements moved with the size adjusted +//----------------------------------------------------------------------------- +template < class Iter1_t, class Iter2_t, class Compare > +range< Iter1_t > full_merge4 (const range< Iter1_t > &rdest, + range< Iter2_t > vrange_input[4], + uint32_t nrange_input, Compare comp) +{ + typedef range< Iter1_t > range1_t; + typedef typename iterator_traits< Iter1_t >::value_type type1; + typedef typename iterator_traits< Iter2_t >::value_type type2; + static_assert (std::is_same< type1, type2 >::value, + "Incompatible iterators\n"); + + size_t ndest = 0; + uint32_t i = 0; + while (i < nrange_input) { + if (vrange_input[i].size ( ) != 0) { + ndest += vrange_input[i++].size ( ); + } + else + { + for (uint32_t k = i + 1; k < nrange_input; ++k) { + vrange_input[k - 1] = vrange_input[k]; + }; + --nrange_input; + }; + }; + + if (nrange_input == 0) return range1_t (rdest.first, rdest.first); + if (nrange_input == 1) return init_move (rdest, vrange_input[0]); + if (nrange_input == 2) { + return full_merge (rdest, vrange_input[0], vrange_input[1], comp); + }; + + //------------------------------------------------------------------------ + // Initial sort + //------------------------------------------------------------------------ + uint32_t pos[4] = {0, 1, 2, 3}, npos = nrange_input; + + //----------------------------------------------------------------------- + // thanks to Steven Ross by their suggestion about the optimal + // sorting networks + //----------------------------------------------------------------------- + if (less_range (vrange_input[pos[1]].first, pos[1], + vrange_input[pos[0]].first, pos[0], comp)) + { + std::swap (pos[0], pos[1]); + }; + if (npos == 4 and less_range (vrange_input[pos[3]].first, pos[3], + vrange_input[pos[2]].first, pos[2], comp)) + { + std::swap (pos[3], pos[2]); + }; + if (less_range (vrange_input[pos[2]].first, pos[2], + vrange_input[pos[0]].first, pos[0], comp)) + { + std::swap (pos[0], pos[2]); + }; + if (npos == 4 and less_range (vrange_input[pos[3]].first, pos[3], + vrange_input[pos[1]].first, pos[1], comp)) + { + std::swap (pos[1], pos[3]); + }; + if (less_range (vrange_input[pos[2]].first, pos[2], + vrange_input[pos[1]].first, pos[1], comp)) + { + std::swap (pos[1], pos[2]); + }; + + Iter1_t it_dest = rdest.first; + while (npos > 2) { + *(it_dest++) = std::move (*(vrange_input[pos[0]].first++)); + if (vrange_input[pos[0]].size ( ) == 0) { + pos[0] = pos[1]; + pos[1] = pos[2]; + pos[2] = pos[3]; + --npos; + } + else + { + if (less_range (vrange_input[pos[1]].first, pos[1], + vrange_input[pos[0]].first, pos[0], comp)) + { + std::swap (pos[0], pos[1]); + if (less_range (vrange_input[pos[2]].first, pos[2], + vrange_input[pos[1]].first, pos[1], comp)) + { + std::swap (pos[1], pos[2]); + if (npos == 4 and + less_range (vrange_input[pos[3]].first, pos[3], + vrange_input[pos[2]].first, pos[2], comp)) + { + std::swap (pos[2], pos[3]); + }; + }; + }; + }; + }; + + range1_t raux1 (rdest.first, it_dest), raux2 (it_dest, rdest.last); + if (pos[0] < pos[1]) { + return concat (raux1, full_merge (raux2, vrange_input[pos[0]], + vrange_input[pos[1]], comp)); + } + else + { + return concat (raux1, full_merge (raux2, vrange_input[pos[1]], + vrange_input[pos[0]], comp)); + }; +}; + +//----------------------------------------------------------------------------- +// function : uninit_full_merge4 +/// @brief Merge four ranges and put the result in uninitialized memory +/// +/// @param dest: range where create and move the elements merged. Their +/// size must be greater or equal than the sum of the sizes +/// of the ranges in the array R +/// @param vrange_input : array of ranges to merge +/// @param nrange_input : number of ranges in vrange_input +/// @param comp : comparison object +/// @return range with all the elements move with the size adjusted +//----------------------------------------------------------------------------- +template < class Value_t, class Iter_t, class Compare > +range< Value_t * > uninit_full_merge4 (const range< Value_t * > &dest, + range< Iter_t > vrange_input[4], + uint32_t nrange_input, Compare comp) +{ + typedef typename iterator_traits< Iter_t >::value_type type1; + static_assert (std::is_same< type1, Value_t >::value, + "Incompatible iterators\n"); + + size_t ndest = 0; + uint32_t i = 0; + while (i < nrange_input) { + if (vrange_input[i].size ( ) != 0) { + ndest += vrange_input[i++].size ( ); + } + else + { + for (uint32_t k = i + 1; k < nrange_input; ++k) { + vrange_input[k - 1] = vrange_input[k]; + }; + --nrange_input; + }; + }; + if (nrange_input == 0) return range< Value_t * > (dest.first, dest.first); + if (nrange_input == 1) return uninit_move (dest, vrange_input[0]); + if (nrange_input == 2) { + return uninit_full_merge (dest, vrange_input[0], vrange_input[1], comp); + }; + + //------------------------------------------------------------------------ + // Initial sort + //------------------------------------------------------------------------ + uint32_t pos[4] = {0, 1, 2, 3}, npos = nrange_input; + + //----------------------------------------------------------------------- + // thanks to Steven Ross by their suggestion about the optimal + // sorting networks + //----------------------------------------------------------------------- + if (less_range (vrange_input[pos[1]].first, pos[1], + vrange_input[pos[0]].first, pos[0], comp)) + { + std::swap (pos[0], pos[1]); + }; + if (npos == 4 and less_range (vrange_input[pos[3]].first, pos[3], + vrange_input[pos[2]].first, pos[2], comp)) + { + std::swap (pos[3], pos[2]); + }; + if (less_range (vrange_input[pos[2]].first, pos[2], + vrange_input[pos[0]].first, pos[0], comp)) + { + std::swap (pos[0], pos[2]); + }; + if (npos == 4 and less_range (vrange_input[pos[3]].first, pos[3], + vrange_input[pos[1]].first, pos[1], comp)) + { + std::swap (pos[1], pos[3]); + }; + if (less_range (vrange_input[pos[2]].first, pos[2], + vrange_input[pos[1]].first, pos[1], comp)) + { + std::swap (pos[1], pos[2]); + }; + + Value_t *it_dest = dest.first; + while (npos > 2) { + construct (&(*(it_dest++)), + std::move (*(vrange_input[pos[0]].first++))); + if (vrange_input[pos[0]].size ( ) == 0) { + pos[0] = pos[1]; + pos[1] = pos[2]; + pos[2] = pos[3]; + --npos; + } + else + { + if (less_range (vrange_input[pos[1]].first, pos[1], + vrange_input[pos[0]].first, pos[0], comp)) + { + std::swap (pos[0], pos[1]); + if (less_range (vrange_input[pos[2]].first, pos[2], + vrange_input[pos[1]].first, pos[1], comp)) + { + std::swap (pos[1], pos[2]); + if (npos == 4 and + less_range (vrange_input[pos[3]].first, pos[3], + vrange_input[pos[2]].first, pos[2], comp)) + { + std::swap (pos[2], pos[3]); + }; + }; + }; + }; + }; // end while (npos > 2) + + range< Value_t * > raux1 (dest.first, it_dest), raux2 (it_dest, dest.last); + if (pos[0] < pos[1]) { + return concat (raux1, + uninit_full_merge (raux2, vrange_input[pos[0]], + vrange_input[pos[1]], comp)); + } + else + { + return concat (raux1, + uninit_full_merge (raux2, vrange_input[pos[1]], + vrange_input[pos[0]], comp)); + }; +}; + +//**************************************************************************** +}; // End namespace util +}; // End namespace detail +}; // End namespace parallel +}; // End namespace sort +}; // End namespace boost +//**************************************************************************** +// +#endif diff --git a/include/boost/sort/parallel/detail/util/merge_vector.hpp b/include/boost/sort/parallel/detail/util/merge_vector.hpp new file mode 100755 index 0000000..a3dab9f --- /dev/null +++ b/include/boost/sort/parallel/detail/util/merge_vector.hpp @@ -0,0 +1,194 @@ +//---------------------------------------------------------------------------- +/// @file merge_vector.hpp +/// @brief In this file have the functions for to do a stable merge of +// ranges, in a vector +/// +/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n +/// Distributed under the Boost Software License, Version 1.0.\n +/// ( See accompanying file LICENSE_1_0.txt or copy at +/// http://www.boost.org/LICENSE_1_0.txt ) +/// @version 0.1 +/// +/// @remarks +//----------------------------------------------------------------------------- +#ifndef __BOOST_SORT_PARALLEL_DETAIL_UTIL_MERGE_VECTOR_HPP +#define __BOOST_SORT_PARALLEL_DETAIL_UTIL_MERGE_VECTOR_HPP + +#include +#include +#include +#include +#include +#include + +namespace boost +{ +namespace sort +{ +namespace parallel +{ +namespace detail +{ +namespace util +{ +//############################################################################ +// ## +// F U S I O N O F ## +// ## +// A V E C T O R O F R A N G E S ## +// ## +//############################################################################ +using std::iterator_traits; +// +//----------------------------------------------------------------------------- +// function : merge_level4 +/// @brief merge the ranges in the vector v_input with the full_merge4 function. +/// The v_output vector is used as auxiliary memory in the internal +/// process. The final results is in the dest range. +/// All the ranges of v_output are inside the range dest +/// @param dest : range where move the elements merged +/// @param v_input : vector of ranges to merge +/// @param v_output : vector of ranges obtained +/// @param comp : comparison object +/// @return range with all the elements moved +//----------------------------------------------------------------------------- +template < class Iter1_t, class Iter2_t, class Compare > +void merge_level4 (range< Iter1_t > dest, + std::vector< range< Iter2_t > > &v_input, + std::vector< range< Iter1_t > > &v_output, Compare comp) +{ + typedef range< Iter1_t > range1_t; + typedef typename iterator_traits< Iter1_t >::value_type type1; + typedef typename iterator_traits< Iter2_t >::value_type type2; + static_assert (std::is_same< type1, type2 >::value, + "Incompatible iterators\n"); + + v_output.clear ( ); + if (v_input.size ( ) == 0) return; + if (v_input.size ( ) == 1) { + v_output.emplace_back (init_move (dest, v_input[0])); + return; + }; + + uint32_t nrange = v_input.size ( ); + uint32_t pos_ini = 0; + while (pos_ini < v_input.size ( )) { + uint32_t nmerge = (nrange + 3) >> 2; + uint32_t nelem = (nrange + nmerge - 1) / nmerge; + range1_t rz = full_merge4 (dest, &v_input[pos_ini], nelem, comp); + v_output.emplace_back (rz); + dest.first = rz.last; + pos_ini += nelem; + nrange -= nelem; + }; + return; +}; +// +//----------------------------------------------------------------------------- +// function : uninit_merge_level4 +/// @brief merge the ranges moving the objects and constructing them in +/// uninitialized memory, in the vector v_input +/// using full_merge4. The v_output vector is used as auxiliary memory +/// in the internal process. The final results is in the dest range. +/// All the ranges of v_output are inside the range dest +/// +/// @param dest : range where move the elements merged +/// @param v_input : vector of ranges to merge +/// @param v_output : vector of ranges obtained +/// @param comp : comparison object +/// @return range with all the elements moved and constructed +//----------------------------------------------------------------------------- +template < class Value_t, class Iter_t, class Compare > +void uninit_merge_level4 (range< Value_t * > dest, + std::vector< range< Iter_t > > &v_input, + std::vector< range< Value_t * > > &v_output, + Compare comp) +{ + typedef range< Value_t * > range1_t; + typedef typename iterator_traits< Iter_t >::value_type type1; + static_assert (std::is_same< type1, Value_t >::value, + "Incompatible iterators\n"); + + v_output.clear ( ); + if (v_input.size ( ) == 0) return; + if (v_input.size ( ) == 1) { + v_output.emplace_back (uninit_move (dest, v_input[0])); + return; + }; + + uint32_t nrange = v_input.size ( ); + uint32_t pos_ini = 0; + while (pos_ini < v_input.size ( )) { + uint32_t nmerge = (nrange + 3) >> 2; + uint32_t nelem = (nrange + nmerge - 1) / nmerge; + range1_t rz = uninit_full_merge4 (dest, &v_input[pos_ini], nelem, comp); + v_output.emplace_back (rz); + dest.first = rz.last; + pos_ini += nelem; + nrange -= nelem; + }; + return; +}; +// +//----------------------------------------------------------------------------- +// function : merge_vector4 +/// @brief merge the ranges in the vector v_input using the merge_level4 +/// function. The v_output vector is used as auxiliary memory in the +/// internal process +/// The final results is in the range_output range. +/// All the ranges of v_output are inside the range range_output +/// All the ranges of v_input are inside the range range_input +/// @param range_input : range including all the ranges of v_input +/// @param ange_output : range including all the elements of v_output +/// @param v_input : vector of ranges to merge +/// @param v_output : vector of ranges obtained +/// @param comp : comparison object +/// @return range with all the elements moved +//----------------------------------------------------------------------------- +template < class Iter1_t, class Iter2_t, class Compare > +range< Iter2_t > + merge_vector4 (range< Iter1_t > range_input, range< Iter2_t > range_output, + std::vector< range< Iter1_t > > &v_input, + std::vector< range< Iter2_t > > &v_output, Compare comp) +{ + typedef range< Iter2_t > range2_t; + typedef typename iterator_traits< Iter1_t >::value_type type1; + typedef typename iterator_traits< Iter2_t >::value_type type2; + static_assert (std::is_same< type1, type2 >::value, + "Incompatible iterators\n"); + + v_output.clear ( ); + if (v_input.size ( ) == 0) { + return range2_t (range_output.first, range_output.first); + }; + if (v_input.size ( ) == 1) { + return init_move (range_output, v_input[0]); + }; + bool sw = false; + uint32_t nrange = v_input.size ( ); + + while (nrange > 1) { + if (sw) { + merge_level4 (range_input, v_output, v_input, comp); + sw = false; + nrange = v_input.size ( ); + } + else + { + merge_level4 (range_output, v_input, v_output, comp); + sw = true; + nrange = v_output.size ( ); + }; + }; + return (sw) ? v_output[0] : init_move (range_output, v_input[0]); +}; + +//**************************************************************************** +}; // End namespace util +}; // End namespace detail +}; // End namespace parallel +}; // End namespace sort +}; // End namespace boost +//**************************************************************************** +// +#endif diff --git a/include/boost/sort/parallel/detail/util/nbits.hpp b/include/boost/sort/parallel/detail/util/nbits.hpp new file mode 100755 index 0000000..e1c50ea --- /dev/null +++ b/include/boost/sort/parallel/detail/util/nbits.hpp @@ -0,0 +1,91 @@ +//---------------------------------------------------------------------------- +/// @file algorithm.hpp +/// @brief This file contains the description of several low level algorithms +/// +/// @author Copyright (c) 2010 2015 Francisco José Tapia (fjtapia@gmail.com )\n +/// Distributed under the Boost Software License, Version 1.0.\n +/// ( See accompanying file LICENSE_1_0.txt or copy at +/// http://www.boost.org/LICENSE_1_0.txt ) +/// @version 0.1 +/// +/// @remarks +//----------------------------------------------------------------------------- +#ifndef __BOOST_SORT_PARALLEL_DETAIL_UTIL_NBITS_HPP +#define __BOOST_SORT_PARALLEL_DETAIL_UTIL_NBITS_HPP + +#include + +namespace boost +{ +namespace sort +{ +namespace parallel +{ +namespace detail +{ +namespace util +{ +// +//########################################################################## +// ## +// G L O B A L V A R I B L E S ## +// ## +//########################################################################## +// +// this array represent the number of bits needed for to represent the +// first 256 numbers +static constexpr const uint32_t tmsb[256] = { + 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8}; +// +//########################################################################## +// ## +// I N L I N E F U N C T I O N S ## +// ## +//########################################################################## +// +//--------------------------------------------------------------------------- +// function : nbits32 +/// @brief Obtain the number of bits of a number equal or greater than num +/// @param num : Number to examine +/// @return Number of bits +//--------------------------------------------------------------------------- +static inline uint32_t nbits32 (uint32_t num) noexcept +{ + int Pos = (num & 0xffff0000U) ? 16 : 0; + if ((num >> Pos) & 0xff00U) Pos += 8; + return (tmsb[num >> Pos] + Pos); +} +// +//--------------------------------------------------------------------------- +// function : nbits64 +/// @brief Obtain the number of bits of a number equal or greater than num +/// @param num : Number to examine +/// @exception none +/// @return Number of bits +//--------------------------------------------------------------------------- +static inline uint32_t nbits64 (uint64_t num) +{ + uint32_t Pos = (num & 0xffffffff00000000ULL) ? 32 : 0; + if ((num >> Pos) & 0xffff0000ULL) Pos += 16; + if ((num >> Pos) & 0xff00ULL) Pos += 8; + return (tmsb[num >> Pos] + Pos); +} + +//**************************************************************************** +}; // End namespace util +}; // End namespace detail +}; // End namespace parallel +}; // End namespace sort +}; // End namespace boost +//**************************************************************************** +#endif diff --git a/include/boost/sort/parallel/detail/util/range.hpp b/include/boost/sort/parallel/detail/util/range.hpp new file mode 100755 index 0000000..ccd950a --- /dev/null +++ b/include/boost/sort/parallel/detail/util/range.hpp @@ -0,0 +1,407 @@ +//---------------------------------------------------------------------------- +/// @file range.hpp +/// @brief Define a range [first, last), and the associated operations +/// +/// @author Copyright (c) 2016 Francisco José Tapia (fjtapia@gmail.com )\n +/// Distributed under the Boost Software License, Version 1.0.\n +/// ( See accompanyingfile LICENSE_1_0.txt or copy at +/// http://www.boost.org/LICENSE_1_0.txt ) +/// @version 0.1 +/// +/// @remarks +//----------------------------------------------------------------------------- +#ifndef __BOOST_SORT_PARALLEL_DETAIL_UTIL_RANGE_HPP +#define __BOOST_SORT_PARALLEL_DETAIL_UTIL_RANGE_HPP + +#include +#include +#include +#include +#include +#include +#include + +namespace boost +{ +namespace sort +{ +namespace parallel +{ +namespace detail +{ +namespace util +{ +//---------------------------------------------------------------------------- +// USING SENTENCES +//---------------------------------------------------------------------------- +using std::iterator_traits; + +///--------------------------------------------------------------------------- +/// @struct range +/// @brief this represent a range between two iterators +/// @remarks +//---------------------------------------------------------------------------- +template < class Iter_t > +struct range +{ + Iter_t first, last; + // + //------------------------------------------------------------------------ + // function : range + /// @brief empty constructor + //------------------------------------------------------------------------ + range (void){}; + // + //------------------------------------------------------------------------ + // function : range + /// @brief constructor with two parameters + /// @param frs : iterator to the first element + /// @param lst : iterator to the last element + //----------------------------------------------------------------------- + range (Iter_t frs, Iter_t lst) : first (frs), last (lst){}; + // + //----------------------------------------------------------------------- + // function : empty + /// @brief indicate if the range is empty + /// @return true : empty false : not empty + //----------------------------------------------------------------------- + bool empty (void) const { return (first == last); }; + // + //----------------------------------------------------------------------- + // function : not_empty + /// @brief indicate if the range is not empty + /// @return true : not empty false : empty + //----------------------------------------------------------------------- + bool not_empty (void) const { return (first != last); }; + // + //----------------------------------------------------------------------- + // function : valid + /// @brief Indicate if the range is well constructed, and valid + /// @return true : valid, false : not valid + //----------------------------------------------------------------------- + bool valid (void) const { return ((last - first) >= 0); }; + // + //----------------------------------------------------------------------- + // function : size + /// @brief return the size of the range + /// @return size + //----------------------------------------------------------------------- + size_t size (void) const { return (last - first); }; + // + //------------------------------------------------------------------------ + // function : front + /// @brief return an iterator to the first element of the range + /// @return iterator + //----------------------------------------------------------------------- + Iter_t front (void) const { return first; }; + // + //------------------------------------------------------------------------- + // function : back + /// @brief return an iterator to the last element of the range + /// @return iterator + //------------------------------------------------------------------------- + Iter_t back (void) const { return (last - 1); }; +}; + +// +//----------------------------------------------------------------------------- +// function : concat +/// @brief concatenate two contiguous ranges +/// @param it1 : first range +/// @param it2 : second range +/// @return range resulting of the concatenation +//----------------------------------------------------------------------------- +template < class Iter_t > +range< Iter_t > concat (const range< Iter_t > &it1, const range< Iter_t > &it2) +{ + return range< Iter_t > (it1.first, it2.last); +}; +// +//----------------------------------------------------------------------------- +// function : init_move +/// @brief Move initialized objets from the range src to dest +/// @param dest : range where move the objects +/// @param src : range from where move the objects +/// @return range with the objects moved and the size adjusted +//----------------------------------------------------------------------------- +template < class Iter1_t, class Iter2_t > +inline range< Iter2_t > init_move (const range< Iter2_t > &dest, + const range< Iter1_t > &src) +{ + typedef typename iterator_traits< Iter1_t >::value_type type1; + typedef typename iterator_traits< Iter2_t >::value_type type2; + static_assert (std::is_same< type1, type2 >::value, + "Incompatible iterators\n"); + + if (src.size ( ) == 0) { + return range< Iter2_t > (dest.first, dest.first); + }; + init_move (dest.first, src.first, src.last); + return range< Iter2_t > (dest.first, dest.first + src.size ( )); +}; +//----------------------------------------------------------------------------- +// function : uninit_move +/// @brief Move uninitialized objets from the range src creating them in dest +/// +/// @param dest : range where move and create the objects +/// @param src : range from where move the objects +/// @return range with the objects moved and the size adjusted +//----------------------------------------------------------------------------- +template < class Iter1_t, class Iter2_t > +inline range< Iter2_t > uninit_move (const range< Iter2_t > &dest, + const range< Iter1_t > &src) +{ + typedef typename iterator_traits< Iter1_t >::value_type type1; + typedef typename iterator_traits< Iter2_t >::value_type type2; + static_assert (std::is_same< type1, type2 >::value, + "Incompatible iterators\n"); + + if (src.size ( ) == 0) { + return range< Iter2_t > (dest.first, dest.first); + }; + uninit_move (dest.first, src.first, src.last); + return range< Iter2_t > (dest.first, dest.first + src.size ( )); +}; +// +//----------------------------------------------------------------------------- +// function : destroy +/// @brief destroy a range of objects +/// @param rng : range to destroy +//----------------------------------------------------------------------------- +template < class Iter_t > +inline void destroy (range< Iter_t > rng) +{ + detail::util::destroy (rng.first, rng.last); +}; +// +//----------------------------------------------------------------------------- +// function : init +/// @brief initialize a range of objects with the object val moving across them +/// @param rng : range of elements not initialized +/// @param val : object used for the initialization +/// @return range initialized +//----------------------------------------------------------------------------- +template < class Iter_t > +inline range< Iter_t > + init (const range< Iter_t > &rng, + typename iterator_traits< Iter_t >::value_type &val) +{ + init (rng.first, rng.last, val); + return rng; +}; +// +//----------------------------------------------------------------------------- +// function : is_mergeable +/// @brief : indicate if two ranges have a possible merge +/// @param src1 : first range +/// @param src2 : second range +/// @param comp : object for to compare elements +/// @return true : they can be merged +/// false : they can't be merged +//----------------------------------------------------------------------------- +template < class Iter1_t, class Iter2_t, class Compare > +inline bool is_mergeable (const range< Iter1_t > &src1, + const range< Iter2_t > &src2, Compare comp) +{ + typedef typename iterator_traits< Iter1_t >::value_type type1; + typedef typename iterator_traits< Iter2_t >::value_type type2; + static_assert (std::is_same< type1, type2 >::value, + "Incompatible iterators\n"); + + return comp (*(src2.front ( )), *(src1.back ( ))); +}; +// +//----------------------------------------------------------------------------- +// function : full_merge +/// @brief Merge two contiguous ranges src1 and src2, and put the result in +/// the range dest, returning the range merged +/// +/// @param dest : range where locate the lements merged. the size of dest +/// must be greater or equal than the sum of the sizes of +/// src1 and src2 +/// @param src1 : first range to merge +/// @param src2 : second range to merge +/// @param comp : comparison object +/// @return range with the elements merged and the size adjusted +//----------------------------------------------------------------------------- +template < class Iter1_t, class Iter2_t, class Iter3_t, class Compare > +inline range< Iter3_t > full_merge (const range< Iter3_t > &dest, + const range< Iter1_t > &src1, + const range< Iter2_t > &src2, Compare comp) +{ + typedef typename iterator_traits< Iter1_t >::value_type type1; + typedef typename iterator_traits< Iter2_t >::value_type type2; + typedef typename iterator_traits< Iter3_t >::value_type type3; + static_assert (std::is_same< type1, type2 >::value, + "Incompatible iterators\n"); + static_assert (std::is_same< type3, type2 >::value, + "Incompatible iterators\n"); + + return range< Iter3_t > (dest.first, + full_merge (src1.first, src1.last, src2.first, + src2.last, dest.first, comp)); +}; + +//----------------------------------------------------------------------------- +// function : uninit_full_merge +/// @brief Merge two contiguous uninitialized ranges src1 and src2, and create +/// and move the result in the uninitialized range dest, returning the +/// range merged +// +/// @param dest : range where locate the elements merged. the size of dest +/// must be greater or equal than the sum of the sizes of +/// src1 and src2. Initially is uninitialize memory +/// @param src1 : first range to merge +/// @param src2 : second range to merge +/// @param comp : comparison object +/// @return range with the elements merged and the size adjusted +//----------------------------------------------------------------------------- +template < class Iter1_t, class Iter2_t, class Value_t, class Compare > +inline range< Value_t * > uninit_full_merge (const range< Value_t * > &dest, + const range< Iter1_t > &src1, + const range< Iter2_t > &src2, + Compare comp) +{ + typedef typename iterator_traits< Iter1_t >::value_type type1; + typedef typename iterator_traits< Iter2_t >::value_type type2; + static_assert (std::is_same< type1, type2 >::value, + "Incompatible iterators\n"); + static_assert (std::is_same< Value_t, type2 >::value, + "Incompatible iterators\n"); + + return range< Value_t * > ( + dest.first, uninit_full_merge (src1.first, src1.last, src2.first, + src2.last, dest.first, comp)); +}; +// +//--------------------------------------------------------------------------- +// function : half_merge +/// @brief : Merge two initialized buffers. The first buffer is in a separate +/// memory +// +/// @param dest : range where finish the two buffers merged +/// @param src1 : first range to merge in a separate memory +/// @param src2 : second range to merge, in the final part of the +/// range where deposit the final results +/// @param comp : object for compare two elements of the type pointed +/// by the Iter1_t and Iter2_t +/// @return : range with the two buffers merged +//--------------------------------------------------------------------------- +template < class Iter1_t, class Iter2_t, class Compare > +inline range< Iter2_t > half_merge (const range< Iter2_t > &dest, + const range< Iter1_t > &src1, + const range< Iter2_t > &src2, Compare comp) +{ + typedef typename iterator_traits< Iter1_t >::value_type type1; + typedef typename iterator_traits< Iter2_t >::value_type type2; + static_assert (std::is_same< type1, type2 >::value, + "Incompatible iterators\n"); + + return range< Iter2_t > (dest.first, + half_merge (src1.first, src1.last, src2.first, + src2.last, dest.first, comp)); +}; +// +//----------------------------------------------------------------------------- +// function : in_place_merge_uncontiguous +/// @brief : merge two non contiguous ranges src1, src2, using the range +/// aux as auxiliary memory. The results are in the original ranges +// +/// @param src1 : first range to merge +/// @param src2 : second range to merge +/// @param aux : auxiliary range used in the merge +/// @param comp : object for to compare elements +/// @return true : not changes done, false : changes in the buffers +//----------------------------------------------------------------------------- +template < class Iter1_t, class Iter2_t, class Iter3_t, class Compare > +inline bool in_place_merge_uncontiguous (const range< Iter1_t > &src1, + const range< Iter2_t > &src2, + const range< Iter3_t > &aux, + Compare comp) +{ + typedef typename iterator_traits< Iter1_t >::value_type type1; + typedef typename iterator_traits< Iter2_t >::value_type type2; + typedef typename iterator_traits< Iter3_t >::value_type type3; + + static_assert (std::is_same< type1, type2 >::value, + "Incompatible iterators\n"); + static_assert (std::is_same< type3, type2 >::value, + "Incompatible iterators\n"); + + return in_place_merge_uncontiguous (src1.first, src1.last, src2.first, + src2.last, aux.first, comp); +}; +// +//----------------------------------------------------------------------------- +// function : in_place_merge +/// @brief : merge two contiguous ranges ( src1, src2) using buf as +/// auxiliary memory. The results are in the same ranges +/// @param src1 : first range to merge +/// @param src1 : second range to merge +/// @param buf : auxiliary memory used in the merge +/// @param comp : object for to compare elements +/// @return true : not changes done, false : changes in the buffers +//----------------------------------------------------------------------------- +template < class Iter1_t, class Iter2_t, class Compare > +inline range< Iter1_t > + in_place_merge (const range< Iter1_t > &src1, const range< Iter1_t > &src2, + const range< Iter2_t > &buf, Compare comp) +{ + typedef typename iterator_traits< Iter1_t >::value_type type1; + typedef typename iterator_traits< Iter2_t >::value_type type2; + static_assert (std::is_same< type1, type2 >::value, + "Incompatible iterators\n"); + + in_place_merge (src1.first, src1.last, src2.last, buf.first, comp); + return concat (src1, src2); +}; + +// +//----------------------------------------------------------------------------- +// function : merge_flow +/// @brief : merge two ranges, as part of a merge the ranges in a list. This +/// function reduce the number of movements compared with inplace_merge +/// when you need to merge a sequence of ranges. +/// This function merge the ranges rbuf and rng2, and the results +/// are in rng1 and rbuf +// +/// @param rng1 : range where locate the first elements of the merge +/// @param rbuf : range which provide the first elements, and where store +/// the last results of the merge +/// @param rng2 : range which provide the last elements to merge +/// @param comp : object for to compare elements +/// @return true : not changes done, false : changes in the buffers +//----------------------------------------------------------------------------- +template < class Iter1_t, class Iter2_t, class Compare > +void merge_flow (range< Iter1_t > rng1, range< Iter2_t > rbuf, + range< Iter1_t > rng2, Compare cmp) +{ + typedef typename iterator_traits< Iter1_t >::value_type type1; + typedef typename iterator_traits< Iter2_t >::value_type type2; + static_assert (std::is_same< type1, type2 >::value, + "Incompatible iterators\n"); + + range< Iter2_t > rbx (rbuf); + range< Iter1_t > rx1 (rng1), rx2 (rng2); + assert (rbx.size ( ) == rx1.size ( ) and rx1.size ( ) == rx2.size ( )); + while (rx1.first != rx1.last) { + *(rx1.first++) = (cmp (*rbx.first, *rx2.first)) + ? std::move (*(rbx.first++)) + : std::move (*(rx2.first++)); + }; + if (rx2.first == rx2.last) return; + if (rbx.first == rbx.last) + util::init_move (rbuf, rng2); + else + util::half_merge (rbuf, rx2, rbx, cmp); +}; + +//**************************************************************************** +}; // End namespace util +}; // End namespace detail +}; // End namespace parallel +}; // End namespace sort +}; // End namespace boost +//**************************************************************************** +// +#endif diff --git a/include/boost/sort/parallel/detail/util/spinlock.hpp b/include/boost/sort/parallel/detail/util/spinlock.hpp new file mode 100755 index 0000000..ac8ca7f --- /dev/null +++ b/include/boost/sort/parallel/detail/util/spinlock.hpp @@ -0,0 +1,96 @@ +//---------------------------------------------------------------------------- +/// @file spinlock_t.hpp +/// @brief +/// +/// @author Copyright (c) 2010 2015 Francisco José Tapia (fjtapia@gmail.com )\n +/// Distributed under the Boost Software License, Version 1.0.\n +/// ( See accompanyingfile LICENSE_1_0.txt or copy at +/// http://www.boost.org/LICENSE_1_0.txt ) +/// @version 0.1 +/// +/// @remarks +//----------------------------------------------------------------------------- +#ifndef __BOOST_SORT_PARALLEL_DETAIL_UTIL_SPINLOCK_HPP +#define __BOOST_SORT_PARALLEL_DETAIL_UTIL_SPINLOCK_HPP + +#include +#include +#include +#include +#include +#include + +namespace boost +{ +namespace sort +{ +namespace parallel +{ +namespace detail +{ +namespace util +{ +// +//--------------------------------------------------------------------------- +/// @class spinlock_t +/// @brief This class implement, from atomic variables, a spinlock +/// @remarks This class meet the BasicLockable requirements ( lock, unlock ) +//--------------------------------------------------------------------------- +class spinlock_t +{ + private: + //------------------------------------------------------------------------ + // P R I V A T E V A R I A B L E S + //------------------------------------------------------------------------ + std::atomic_flag af; + + public: + // + //------------------------------------------------------------------------- + // function : spinlock_t + /// @brief class constructor + /// @param [in] + //------------------------------------------------------------------------- + explicit spinlock_t ( ) noexcept { af.clear ( ); }; + // + //------------------------------------------------------------------------- + // function : lock + /// @brief Lock the spinlock_t + //------------------------------------------------------------------------- + void lock ( ) noexcept + { + if (af.test_and_set (std::memory_order_acquire)) { + std::this_thread::yield ( ); + while (af.test_and_set (std::memory_order_relaxed)) { + std::this_thread::yield ( ); + }; + }; + }; + // + //------------------------------------------------------------------------- + // function : try_lock + /// @brief Try to lock the spinlock_t, if not, return false + /// @return true : locked + /// false: not previous locked + //------------------------------------------------------------------------- + bool try_lock ( ) noexcept + { + return not af.test_and_set (std::memory_order_acquire); + }; + // + //------------------------------------------------------------------------- + // function : unlock + /// @brief unlock the spinlock_t + //------------------------------------------------------------------------- + void unlock ( ) noexcept { af.clear (std::memory_order_release); }; + +}; // E N D C L A S S S P I N L O C K +// +//*************************************************************************** +}; // end namespace util +}; // end namespace detail +}; // end namespace parallel +}; // end namespace sort +}; // end namespace boost +//*************************************************************************** +#endif diff --git a/include/boost/sort/parallel/detail/util/stack_cnc.hpp b/include/boost/sort/parallel/detail/util/stack_cnc.hpp new file mode 100755 index 0000000..e262a9c --- /dev/null +++ b/include/boost/sort/parallel/detail/util/stack_cnc.hpp @@ -0,0 +1,131 @@ +//---------------------------------------------------------------------------- +/// @file stack_cnc.hpp +/// @brief This file contains the implementation concurrent stack +/// +/// @author Copyright (c) 2010 2015 Francisco José Tapia (fjtapia@gmail.com )\n +/// Distributed under the Boost Software License, Version 1.0.\n +/// ( See accompanyingfile LICENSE_1_0.txt or copy at +/// http://www.boost.org/LICENSE_1_0.txt ) +/// @version 0.1 +/// +/// @remarks +//----------------------------------------------------------------------------- +#ifndef __BOOST_SORT_PARALLEL_DETAIL_UTIL_stack_cnc_t_HPP +#define __BOOST_SORT_PARALLEL_DETAIL_UTIL_stack_cnc_t_HPP + +#include +#include + +namespace boost +{ +namespace sort +{ +namespace parallel +{ +namespace detail +{ +namespace util +{ +// +//########################################################################### +// ## +// ################################################################ ## +// # # ## +// # C L A S S # ## +// # S T A C K _ C N C # ## +// # # ## +// ################################################################ ## +// ## +//########################################################################### +// +//--------------------------------------------------------------------------- +/// @class stack_cnc +/// @brief This class is a concurrent stack controled by a spin_lock +/// @remarks +//--------------------------------------------------------------------------- +template < typename T, typename Allocator = std::allocator< T > > +class stack_cnc +{ + public: + //------------------------------------------------------------------------ + // D E F I N I T I O N S + //------------------------------------------------------------------------ + typedef std::vector< T, Allocator > vector_t; + typedef typename vector_t::size_type size_type; + typedef typename vector_t::difference_type difference_type; + typedef typename vector_t::value_type value_type; + typedef typename vector_t::pointer pointer; + typedef typename vector_t::const_pointer const_pointer; + typedef typename vector_t::reference reference; + typedef typename vector_t::const_reference const_reference; + typedef typename vector_t::allocator_type allocator_type; + typedef Allocator alloc_t; + + protected: + //------------------------------------------------------------------------- + // INTERNAL VARIABLES + //------------------------------------------------------------------------- + vector_t v_t; + mutable spinlock_t spl; + + public: + // + //------------------------------------------------------------------------- + // function : stack_cnc + /// @brief constructor + //------------------------------------------------------------------------- + explicit stack_cnc (void) : v_t ( ){}; + + // + //------------------------------------------------------------------------- + // function : stack_cnc + /// @brief Move constructor + //------------------------------------------------------------------------- + stack_cnc (stack_cnc &&) = delete; + // + //------------------------------------------------------------------------- + // function : ~stack_cnc + /// @brief Destructor + //------------------------------------------------------------------------- + virtual ~stack_cnc (void) { v_t.clear ( ); }; + + //------------------------------------------------------------------------- + // function : emplace_back + /// @brief Insert one element in the back of the container + /// @param args : group of arguments for to build the object to insert. Can + /// be values, references or rvalues + //------------------------------------------------------------------------- + template < class... Args > + void emplace_back (Args &&... args) + { + std::lock_guard< spinlock_t > guard (spl); + v_t.emplace_back (std::forward< Args > (args)...); + }; + + // + //------------------------------------------------------------------------- + // function :pop_move_back + /// @brief if exist, move the last element to P, and delete it + /// @param P : reference to a variable where move the element + /// @return true - Element moved and deleted + /// false - Empty stack_cnc + //------------------------------------------------------------------------- + bool pop_move_back (value_type &P) + { + std::lock_guard< spinlock_t > S (spl); + if (v_t.size ( ) == 0) return false; + P = std::move (v_t.back ( )); + v_t.pop_back ( ); + return true; + }; + +}; // end class stack_cnc + +//*************************************************************************** +}; // end namespace util +}; // end namespace detail +}; // end namespace parallel +}; // end namespace sort +}; // end namespace boost +//*************************************************************************** +#endif diff --git a/include/boost/sort/parallel/sort.hpp b/include/boost/sort/parallel/sort.hpp new file mode 100755 index 0000000..332e782 --- /dev/null +++ b/include/boost/sort/parallel/sort.hpp @@ -0,0 +1,349 @@ +//---------------------------------------------------------------------------- +/// @file sort.hpp +/// @brief This file contains the sort functions of the sort library +/// +/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n +/// Distributed under the Boost Software License, Version 1.0.\n +/// ( See accompanying file LICENSE_1_0.txt or copy at +/// http://www.boost.org/LICENSE_1_0.txt ) +/// @version 0.1 +/// +/// @remarks +//----------------------------------------------------------------------------- +#ifndef __BOOST_SORT_PARALLEL_SORT_HPP +#define __BOOST_SORT_PARALLEL_SORT_HPP + +#include +#include +#include +#include + +namespace boost +{ +namespace sort +{ +namespace parallel +{ +// +//**************************************************************************** +// USING AND DEFINITIONS +//**************************************************************************** +using std::iterator_traits; +using detail::less_ptr_no_null; +using detail::util::compare_iter; +using detail::util::enable_if_not_integral; +// +//############################################################################ +// ## +// S O R T , I N D I R E C T _ S O R T ## +// ## +//############################################################################ +// +//----------------------------------------------------------------------------- +// function : sort +/// @brief non stable sort, based internally in the intro_sort algorithm +/// +/// @param first : iterator to the first element of the range to sort +/// @param last : iterator after the last element to the range to sort +/// @param comp : object for to compare two elements pointed by Iter_t +/// iterators +//----------------------------------------------------------------------------- +template < class Iter_t, typename Compare = compare_iter< Iter_t > > +void sort (Iter_t first, Iter_t last, Compare comp = Compare ( )) +{ + detail::intro_sort (first, last, comp); +}; +// +//----------------------------------------------------------------------------- +// function : indirect_sort +/// @brief : indirect sort. This function sort a vector of iterators to the +/// elements. And after, move the elements, for to be phisically +/// sorted. This is useful with big objects +/// +/// @param first : iterator to the first element of the range to sort +/// @param last : iterator after the last element to the range to sort +/// @param comp : object for to compare two elements pointed by Iter_t +/// iterators +//----------------------------------------------------------------------------- +template < class Iter_t, typename Compare = compare_iter< Iter_t > > +void indirect_sort (Iter_t first, Iter_t last, Compare comp = Compare ( )) +{ + typedef less_ptr_no_null< Iter_t, Compare > compare_ptr; + + std::vector< Iter_t > v_iter; + detail::create_index (first, last, v_iter); + detail::intro_sort (v_iter.begin ( ), v_iter.end ( ), compare_ptr (comp)); + detail::sort_index (first, v_iter); +}; +// +//############################################################################ +// ## +// ## +// P A R A L L E L _ S O R T ## +// ## +// ## +//############################################################################ +// +//----------------------------------------------------------------------------- +// function : parallel_sort +/// @brief this function implement a non stable parallel sort. The number of +/// threads to use is the HW threads of the machine +/// +/// @param first : iterator to the first element of the range to sort +/// @param last : iterator after the last element to the range to sort +//----------------------------------------------------------------------------- +template < class Iter_t > +void parallel_sort (Iter_t first, Iter_t last) +{ + typedef compare_iter< Iter_t > Compare; + detail::select_block_indirect (first, last, Compare()); +}; +// +//----------------------------------------------------------------------------- +// function : parallel_sort +/// @brief this function implement a non stable parallel sort. +/// +/// @param first : iterator to the first element of the range to sort +/// @param last : iterator after the last element to the range to sort +/// @param nthread : Number of threads to use in the process. When this value +/// is lower than 2, the sorting is done with 1 thread +//----------------------------------------------------------------------------- +template < class Iter_t > +void parallel_sort (Iter_t first, Iter_t last, uint32_t nthread) +{ + typedef compare_iter< Iter_t > Compare; + detail::select_block_indirect (first, last, Compare(), nthread); +}; +//----------------------------------------------------------------------------- +// function : parallel_sort +/// @brief this function implement a non stable parallel sort. +/// +/// @param first : iterator to the first element of the range to sort +/// @param last : iterator after the last element to the range to sort +/// @param comp : object for to compare two elements pointed by Iter_t +/// iterators +/// @param nthread : Number of threads to use in the process. When this value +/// is lower than 2, the sorting is done with 1 thread +//----------------------------------------------------------------------------- +template < class Iter_t, class Compare > +void parallel_sort (Iter_t first, Iter_t last, Compare comp, uint32_t nthread) +{ + detail::select_block_indirect (first, last, comp, nthread); +}; +// +//----------------------------------------------------------------------------- +// function : parallel_sort +/// @brief : non stable parallel sort. +/// +/// @param first : iterator to the first element of the range to sort +/// @param last : iterator after the last element to the range to sort +/// @param comp : object for to compare two elements pointed by Iter_t +/// iterators +//----------------------------------------------------------------------------- +template < class Iter_t, class Compare, + enable_if_not_integral< Compare > * = nullptr > +void parallel_sort (Iter_t first, Iter_t last, Compare comp) +{ + detail::select_block_indirect (first, last, comp); +}; + +// +//############################################################################ +// ## +// ## +// S T A B L E _ S O R T , I N D I R E C T _ S T A B L E _ S O R T ## +// ## +// ## +//############################################################################ +// +//----------------------------------------------------------------------------- +// function : stable_sort +/// @brief this function implement a single thread stable sort +/// +/// @param first : iterator to the first element of the range to sort +/// @param last : iterator after the last element to the range to sort +/// @param comp : object for to compare two elements pointed by Iter_t +/// iterators +//----------------------------------------------------------------------------- +template < class Iter_t, class Compare = compare_iter< Iter_t > > +void stable_sort (Iter_t first, Iter_t last, Compare comp = Compare ( )) +{ + detail::spin_sort< Iter_t, Compare > (first, last, comp); +}; +// +//----------------------------------------------------------------------------- +// function : indirect_stable_sort +/// @brief : indirect stable sort. This function make a stable sort of a vector +/// of iterators to the elements. And after, move the elements, for +/// to be phisically sorted. This is useful with big objects +/// +/// @param first : iterator to the first element of the range to sort +/// @param last : iterator after the last element to the range to sort +/// @param comp : object for to compare two elements pointed by Iter_t +/// iterators +//----------------------------------------------------------------------------- +template < class Iter_t, class Compare = compare_iter< Iter_t > > +void indirect_stable_sort (Iter_t first, Iter_t last, + Compare comp = Compare ( )) +{ + typedef less_ptr_no_null< Iter_t, Compare > compare_ptr; + typedef typename std::vector< Iter_t >::iterator iter_ptr; + + std::vector< Iter_t > v_iter; + detail::create_index (first, last, v_iter); + detail::spin_sort< iter_ptr, compare_ptr > + (v_iter.begin ( ), v_iter.end ( ), compare_ptr (comp)); + detail::sort_index (first, v_iter); +}; +// +//############################################################################ +// ## +// ## +// P A R A L L E L _ S T A B L E _ S O R T ## +// ## +// ## +//############################################################################ +// +//----------------------------------------------------------------------------- +// function : parallel_stable_sort +/// @brief : parallel stable sort algorithm. +/// +/// @param first : iterator to the first element of the range to sort +/// @param last : iterator after the last element to the range to sort +//----------------------------------------------------------------------------- +template < class Iter_t > +void parallel_stable_sort (Iter_t first, Iter_t last) +{ + typedef compare_iter< Iter_t > Compare; + detail::parallel_stable_sort< Iter_t, Compare > (first, last); +}; +// +//----------------------------------------------------------------------------- +// function : parallel_stable_sort +/// @brief parallel stable sort. +/// +/// @param first : iterator to the first element of the range to sort +/// @param last : iterator after the last element to the range to sort +/// @param nthread : Number of threads to use in the process. When this value +/// is lower than 2, the sorting is done with 1 thread +//----------------------------------------------------------------------------- +template < class Iter_t > +void parallel_stable_sort (Iter_t first, Iter_t last, uint32_t nthread) +{ + typedef compare_iter< Iter_t > Compare; + detail::parallel_stable_sort< Iter_t, Compare > (first, last, nthread); +}; +// +//----------------------------------------------------------------------------- +// function : parallel_stable_sort +/// @brief : parallel stable sort. +/// +/// @param first : iterator to the first element of the range to sort +/// @param last : iterator after the last element to the range to sort +/// @param comp : object for to compare two elements pointed by Iter_t +/// iterators +//----------------------------------------------------------------------------- +template < class Iter_t, class Compare, + enable_if_not_integral< Compare > * = nullptr > +void parallel_stable_sort (Iter_t first, Iter_t last, Compare comp) +{ + detail::parallel_stable_sort< Iter_t, Compare > (first, last, comp); +}; +// +//----------------------------------------------------------------------------- +// function : parallel_stable_sort +/// @brief parallel stable sort +/// +/// @param first : iterator to the first element of the range to sort +/// @param last : iterator after the last element to the range to sort +/// @param comp : object for to compare two elements pointed by Iter_t +/// iterators +/// @param nthread : Number of threads to use in the process. When this value +/// is lower than 2, the sorting is done with 1 thread +//----------------------------------------------------------------------------- +template < class Iter_t, typename Compare > +void parallel_stable_sort (Iter_t first, Iter_t last, Compare comp, + uint32_t nthread) +{ + detail::parallel_stable_sort< Iter_t, Compare > (first, last, comp, + nthread); +}; +// +//############################################################################ +// ## +// ## +// S A M P L E _ S O R T ## +// ## +// ## +//############################################################################ +// +//----------------------------------------------------------------------------- +// function : sample_sort +/// @brief parallel sample sort algorithm (stable sort) +/// +/// @param first : iterator to the first element of the range to sort +/// @param last : iterator after the last element to the range to sort +//----------------------------------------------------------------------------- +template < class Iter_t > +void sample_sort (Iter_t first, Iter_t last) +{ + typedef compare_iter< Iter_t > Compare; + detail::sample_sort< Iter_t, Compare > (first, last); +}; +// +//----------------------------------------------------------------------------- +// function : sample_sort +/// @brief parallel sample sort algorithm (stable sort) +/// +/// @param first : iterator to the first element of the range to sort +/// @param last : iterator after the last element to the range to sort +/// @param nthread : Number of threads to use in the process. When this value +/// is lower than 2, the sorting is done with 1 thread +//----------------------------------------------------------------------------- +template < class Iter_t > +void sample_sort (Iter_t first, Iter_t last, uint32_t nthread) +{ + typedef compare_iter< Iter_t > Compare; + detail::sample_sort< Iter_t, Compare > (first, last, nthread); +}; +// +//----------------------------------------------------------------------------- +// function : sample_sort +/// @brief parallel sample sort algorithm (stable sort) +/// +/// @param first : iterator to the first element of the range to sort +/// @param last : iterator after the last element to the range to sort +/// @param comp : object for to compare two elements pointed by Iter_t +/// iterators +//----------------------------------------------------------------------------- +template < class Iter_t, class Compare, + enable_if_not_integral< Compare > * = nullptr > +void sample_sort (Iter_t first, Iter_t last, Compare comp) +{ + detail::sample_sort< Iter_t, Compare > (first, last, comp); +}; +// +//----------------------------------------------------------------------------- +// function : sample_sort +/// @brief parallel sample sort algorithm (stable sort) +/// +/// @param first : iterator to the first element of the range to sort +/// @param last : iterator after the last element to the range to sort +/// @param comp : object for to compare two elements pointed by Iter_t +/// iterators +/// @param nthread : Number of threads to use in the process. When this value +/// is lower than 2, the sorting is done with 1 thread +//----------------------------------------------------------------------------- +template < class Iter_t, class Compare > +void sample_sort (Iter_t first, Iter_t last, Compare comp, uint32_t nthread) +{ + detail::sample_sort< Iter_t, Compare > (first, last, comp, nthread); +}; +// +//**************************************************************************** +}; // End namespace parallel +}; // End namespace sort +}; // End namespace boost +//**************************************************************************** +// +#endif diff --git a/include/counting_iterator.hpp b/include/counting_iterator.hpp new file mode 100644 index 0000000..3562d11 --- /dev/null +++ b/include/counting_iterator.hpp @@ -0,0 +1,41 @@ +#pragma once + +#include + +namespace mrzv +{ + +class counting_iterator +{ + public: + typedef size_t value_type; + typedef const size_t& reference; + typedef const size_t* pointer; + typedef ptrdiff_t difference_type; + typedef std::random_access_iterator_tag iterator_category; + + explicit counting_iterator(size_t x): m_inc(x) {} + size_t const& base() const { return m_inc; } + reference operator*() const { return m_inc; } + counting_iterator& operator++() { ++m_inc; return *this; } + counting_iterator& operator--() { --m_inc; return *this; } + + counting_iterator& operator+=(difference_type i) { m_inc += i; return *this; } + counting_iterator& operator-=(difference_type i) { m_inc -= i; return *this; } + + bool operator==(const counting_iterator& other) const { return m_inc == other.m_inc; } + bool operator!=(const counting_iterator& other) const { return m_inc != other.m_inc; } + + counting_iterator operator+(difference_type x) const { counting_iterator result = *this; result += x; return result; } + + bool operator<(const counting_iterator& other) const { return m_inc < other.m_inc; } + + friend + difference_type operator-(const counting_iterator& x, const counting_iterator& y) { return x.m_inc - y.m_inc; } + + // and loads of others + private: + size_t m_inc; +}; + +} diff --git a/include/reclamation.hpp b/include/reclamation.hpp new file mode 100644 index 0000000..27f3c04 --- /dev/null +++ b/include/reclamation.hpp @@ -0,0 +1,46 @@ +#pragma once + +#include +#include + +namespace mrzv +{ + +template +struct MemoryManager +{ + std::vector retiring_, retired_, to_delete_; + atomic_ref counter_; + unsigned n_threads_; + bool even_epoch_ = false; + + MemoryManager(int& epoch_counter, unsigned n_threads): + counter_(epoch_counter), n_threads_(n_threads) {} + + ~MemoryManager() + { + for(T* p : to_delete_) + delete p; + for(T* p : retired_) + delete p; + for(T* p : retiring_) + delete p; + } + bool is_even_epoch(int counter) const { return (counter / n_threads_) % 2 == 0; } + void retire(T* ptr) { if(ptr) retiring_.push_back(ptr); } + void quiescent() + { + if (even_epoch_ != is_even_epoch(counter_)) + { + ++counter_; + even_epoch_ = !even_epoch_; + for(T* p : to_delete_) + delete p; + to_delete_.clear(); + retired_.swap(to_delete_); + retiring_.swap(retired_); + } + } +}; + +} diff --git a/include/trivial_concurrent_hash_map.hpp b/include/trivial_concurrent_hash_map.hpp new file mode 100644 index 0000000..b3c05c6 --- /dev/null +++ b/include/trivial_concurrent_hash_map.hpp @@ -0,0 +1,128 @@ +#pragma once + +// Atuhor: Dmitriy Morozov +// Version 2020-03-12 + +#include +#include + +#include + +namespace mrzv +{ + +template +class trivial_concurrent_hash_map +{ + public: + using key_type = Key; + using mapped_type = T; + using value_type = std::pair; + using Storage = std::vector; + using iterator = typename Storage::iterator; + + static constexpr size_t storage_multiplier = 8; + + public: + void reserve(size_t hint) + { + size_t sz = 1; + while (sz < hint*storage_multiplier) + sz *= 2; + storage_.clear(); + storage_.resize(sz, dummy()); + } + + inline iterator find(Key k); + inline std::pair + insert(value_type x); + + // CAS update value + inline bool update(iterator it, T& expected, T desired) + { return atomic_ref(it->second).compare_exchange_weak(expected, desired); } + + iterator end() { return storage_.end(); } + + Key key(iterator it) { return atomic_ref(it->first).load(); } + T value(iterator it) { return atomic_ref(it->second).load(); } + + template + void foreach(const F& f) const { for(auto& x : storage_) if (x.first != dummy_key()) f(x); } + + public: + static Key dummy_key() { return static_cast(-1); } + static T dummy_value() { return static_cast(-1); } + static value_type dummy() { return value_type { dummy_key(), dummy_value() }; } + + private: + static size_t hash(Key k) { return H()(k); } + static bool equal(Key k1, Key k2) { return E()(k1,k2); } + + private: + Storage storage_; +}; + +} + + +template +typename mrzv::trivial_concurrent_hash_map::iterator +mrzv::trivial_concurrent_hash_map:: +find(Key k) +{ + auto size = storage_.size(); + auto idx = hash(k) % size; + + while(true) + { + iterator it = storage_.begin() + idx; + + atomic_ref ak(it->first); + Key kk = ak; + + if (equal(kk, k)) { + while(value(it) == dummy_value()); // make sure a value has been written (possibly should add exponential backoff) + return it; + } + else if (equal(kk, dummy_key())) + return end(); + + idx = (idx + 1) % size; + } +} + +template +std::pair::iterator, bool> +mrzv::trivial_concurrent_hash_map:: +insert(value_type x) +{ + Key k = x.first; + + auto size = storage_.size(); + auto idx = hash(k) % size; + + while(true) + { + iterator it = storage_.begin() + idx; + + atomic_ref ak(it->first); + Key kk = ak; + + if (equal(kk, k)) + return { it, false }; + else if (equal(kk, dummy_key())) + { + Key dk = dummy_key(); + if (ak.compare_exchange_weak(dk, k)) + { + atomic_ref av(it->second); + av.store(x.second); + return { it, true }; + } else if (dk == k) // somebody overwrote our own key, we lose + return { it, false }; + // else something is written, but not our key, continue scanning + } + + idx = (idx + 1) % size; + } +} diff --git a/ripser.cpp b/ripser.cpp index cd546e8..0c82aa4 100644 --- a/ripser.cpp +++ b/ripser.cpp @@ -38,11 +38,25 @@ //#define USE_COEFFICIENTS -//#define INDICATE_PROGRESS -#define PRINT_PERSISTENCE_PAIRS +#define INDICATE_PROGRESS +//#define PRINT_PERSISTENCE_PAIRS + +//#define USE_SERIAL +//#define USE_SHUFFLED_SERIAL + +#if defined(USE_SERIAL) || defined(USE_SHUFFLED_SERIAL) +#define USING_SERIAL +#define USE_SERIAL_ATOMIC_REF +#endif + +#if !(defined USING_SERIAL) +#define USE_TRIVIAL_CONCURRENT_HASHMAP +#endif //#define USE_GOOGLE_HASHMAP +//#define USE_TBB_HASHMAP + #include #include #include @@ -54,6 +68,30 @@ #include #include +#include +#include +#include +#include + +#include + +#ifndef USING_SERIAL +#include +#endif + +#ifdef USE_SHUFFLED_SERIAL +#include +#endif + +#ifdef USE_PARALLEL_STL +#include +#endif + +#ifdef USE_TBB +#include +#include +#endif + #ifdef USE_GOOGLE_HASHMAP #include template @@ -62,9 +100,45 @@ class hash_map : public google::dense_hash_map { explicit hash_map() : google::dense_hash_map() { this->set_empty_key(-1); } inline void reserve(size_t hint) { this->resize(hint); } }; +#elif defined(USE_TBB_HASHMAP) +#include +template +class hash_map : public tbb::concurrent_unordered_map +{ + public: + using Parent = tbb::concurrent_unordered_map; + using iterator = typename Parent::iterator; + + Key key(iterator it) const { return it->first; } + T value(iterator it) const { return it->second; } + + bool update(iterator it, T& expected, T desired) { it->second = desired; return true; } + + template + void foreach(const F& f) const { for(auto& x : (*this)) f(x); } + + void reserve(size_t hint) {} +}; +#elif defined(USE_TRIVIAL_CONCURRENT_HASHMAP) +#include +template +class hash_map : public mrzv::trivial_concurrent_hash_map {}; #else template -class hash_map : public std::unordered_map {}; +class hash_map : public std::unordered_map +{ + public: + using Parent = std::unordered_map; + using iterator = typename Parent::iterator; + + Key key(iterator it) const { return it->first; } + T value(iterator it) const { return it->second; } + + bool update(iterator it, T& expected, T desired) { it->second = desired; return true; } + + template + void foreach(const F& f) const { for(auto& x : (*this)) f(x); } +}; #endif typedef float value_t; @@ -78,6 +152,7 @@ static const std::chrono::milliseconds time_step(40); static const std::string clear_line("\r\033[K"); static const size_t num_coefficient_bits = 8; +static const index_t coefficient_mask = (static_cast(1) << num_coefficient_bits) - 1; static const index_t max_simplex_index = (1l << (8 * sizeof(index_t) - 1 - num_coefficient_bits)) - 1; @@ -134,26 +209,17 @@ std::vector multiplicative_inverse_vector(const coefficient_t m) #ifdef USE_COEFFICIENTS -struct __attribute__((packed)) entry_t { - index_t index : 8 * sizeof(index_t) - num_coefficient_bits; - coefficient_t coefficient : num_coefficient_bits; - entry_t(index_t _index, coefficient_t _coefficient) - : index(_index), coefficient(_coefficient) {} - entry_t(index_t _index) : index(_index), coefficient(0) {} - entry_t() : index(0), coefficient(0) {} -}; - -static_assert(sizeof(entry_t) == sizeof(index_t), "size of entry_t is not the same as index_t"); +typedef index_t entry_t; -entry_t make_entry(index_t i, coefficient_t c) { return entry_t(i, c); } -index_t get_index(const entry_t& e) { return e.index; } -index_t get_coefficient(const entry_t& e) { return e.coefficient; } -void set_coefficient(entry_t& e, const coefficient_t c) { e.coefficient = c; } +entry_t make_entry(index_t i, coefficient_t c) { return (i << num_coefficient_bits) | c; } +index_t get_index(const entry_t& e) { return (e >> num_coefficient_bits); } +index_t get_coefficient(const entry_t& e) { return (e & coefficient_mask); } +void set_coefficient(entry_t& e, const coefficient_t c) { e = (e & ~coefficient_mask) | c; } -std::ostream& operator<<(std::ostream& stream, const entry_t& e) { - stream << get_index(e) << ":" << get_coefficient(e); - return stream; -} +//std::ostream& operator<<(std::ostream& stream, const entry_t& e) { +// stream << get_index(e) << ":" << get_coefficient(e); +// return stream; +//} #else @@ -197,7 +263,7 @@ void set_coefficient(diameter_entry_t& p, const coefficient_t c) { } template struct greater_diameter_or_smaller_index { - bool operator()(const Entry& a, const Entry& b) { + bool operator()(const Entry& a, const Entry& b) const { return (get_diameter(a) > get_diameter(b)) || ((get_diameter(a) == get_diameter(b)) && (get_index(a) < get_index(b))); } @@ -263,9 +329,6 @@ struct sparse_distance_matrix { index_t num_edges; - mutable std::vector::const_reverse_iterator> neighbor_it; - mutable std::vector::const_reverse_iterator> neighbor_end; - sparse_distance_matrix(std::vector>&& _neighbors, index_t _num_edges) : neighbors(std::move(_neighbors)), num_edges(_num_edges) {} @@ -338,29 +401,31 @@ template T begin(std::pair& p) { return p.first; } template T end(std::pair& p) { return p.second; } template class compressed_sparse_matrix { - std::vector bounds; - std::vector entries; - - typedef typename std::vector::iterator iterator; - typedef std::pair iterator_pair; + std::vector*> columns; public: - size_t size() const { return bounds.size(); } + using Column = std::vector; - iterator_pair subrange(const index_t index) { - return {entries.begin() + (index == 0 ? 0 : bounds[index - 1]), - entries.begin() + bounds[index]}; - } + compressed_sparse_matrix(size_t n): + columns(n, nullptr) {} + ~compressed_sparse_matrix() { for(Column* x : columns) delete x; } - void append_column() { bounds.push_back(entries.size()); } + size_t size() const { return columns.size(); } - void push_back(const ValueType e) { - assert(0 < size()); - entries.push_back(e); - ++bounds.back(); - } + Column* column(const index_t index) + { return mrzv::atomic_ref(columns[index]).load(); } + + Column* exchange(const index_t index, Column* desired) + { return mrzv::atomic_ref(columns[index]).exchange(desired); } + + void store(const index_t index, Column* desired) + { return mrzv::atomic_ref(columns[index]).store(desired); } + + bool update(const index_t index, Column*& expected, Column* desired) + { return mrzv::atomic_ref(columns[index]).compare_exchange_weak(expected, desired); } }; + template index_t get_max(index_t top, const index_t bottom, const Predicate pred) { if (!pred(top)) { @@ -383,9 +448,9 @@ template class ripser { const value_t threshold; const float ratio; const coefficient_t modulus; + const unsigned num_threads; const binomial_coeff_table binomial_coeff; const std::vector multiplicative_inverse; - mutable std::vector cofacet_entries; struct entry_hash { std::size_t operator()(const entry_t& e) const { @@ -399,14 +464,26 @@ template class ripser { } }; - typedef hash_map entry_hash_map; + typedef hash_map entry_hash_map; + + typedef compressed_sparse_matrix Matrix; + typedef typename Matrix::Column MatrixColumn; + + typedef std::priority_queue, + greater_diameter_or_smaller_index> WorkingColumn; public: ripser(DistanceMatrix&& _dist, index_t _dim_max, value_t _threshold, float _ratio, - coefficient_t _modulus) + coefficient_t _modulus, unsigned _num_threads) : dist(std::move(_dist)), n(dist.size()), dim_max(std::min(_dim_max, index_t(dist.size() - 2))), threshold(_threshold), - ratio(_ratio), modulus(_modulus), binomial_coeff(n, dim_max + 2), + ratio(_ratio), modulus(_modulus), +#ifndef USING_SERIAL + num_threads((_num_threads == 0 ? std::thread::hardware_concurrency() : _num_threads)), +#else + num_threads(1), +#endif + binomial_coeff(n, dim_max + 2), multiplicative_inverse(multiplicative_inverse_vector(_modulus)) {} index_t get_max_vertex(const index_t idx, const index_t k, const index_t n) const { @@ -444,28 +521,93 @@ template class ripser { columns_to_reduce.clear(); std::vector next_simplices; - for (diameter_index_t& simplex : simplices) { - simplex_coboundary_enumerator cofacets(diameter_entry_t(simplex, 1), dim, *this); + const size_t chunk_size = 1024; + std::atomic achunk {0}; +#ifdef INDICATE_PROGRESS + std::atomic progress {0}; +#endif - while (cofacets.has_next(false)) { + std::vector> next_simplices_vec(num_threads), columns_to_reduce_vec(num_threads); +#if defined(USE_TBB) + tbb::parallel_for(0, num_threads, + [&](unsigned i) { +#else + std::vector> handles; + for (unsigned i = 0; i < num_threads; ++i) + handles.emplace_back(std::async(std::launch::async, [&,i]() { +#endif + std::vector& next_simplices = next_simplices_vec[i]; + std::vector& columns_to_reduce = columns_to_reduce_vec[i]; #ifdef INDICATE_PROGRESS - if (std::chrono::steady_clock::now() > next) { - std::cerr << clear_line << "assembling " << next_simplices.size() - << " columns (processing " << std::distance(&simplices[0], &simplex) - << "/" << simplices.size() << " simplices)" << std::flush; - next = std::chrono::steady_clock::now() + time_step; - } + int indicate_progress = progress++; #endif - auto cofacet = cofacets.next(); - if (get_diameter(cofacet) <= threshold) { + size_t cur_chunk = achunk++; + while(cur_chunk * chunk_size < simplices.size()) { + size_t from = cur_chunk * chunk_size; + size_t to = std::min((cur_chunk + 1) * chunk_size, simplices.size()); + + for (size_t idx = from; idx < to; ++idx) { + auto& simplex = simplices[idx]; + simplex_coboundary_enumerator cofacets(diameter_entry_t(simplex, 1), dim, *this); + while (cofacets.has_next(false)) { +#ifdef INDICATE_PROGRESS + if (indicate_progress == 0) { + if (std::chrono::steady_clock::now() > next) { + std::cerr << clear_line << "assembling columns (processing " + << idx << "/" << simplices.size() << " simplices)" << std::flush; + next = std::chrono::steady_clock::now() + time_step; + } + } +#endif + auto cofacet = cofacets.next(); + if (get_diameter(cofacet) <= threshold) { - next_simplices.push_back({get_diameter(cofacet), get_index(cofacet)}); + next_simplices.push_back({get_diameter(cofacet), get_index(cofacet)}); - if (pivot_column_index.find(get_entry(cofacet)) == pivot_column_index.end()) - columns_to_reduce.push_back({get_diameter(cofacet), get_index(cofacet)}); + if (pivot_column_index.find(get_entry(cofacet)) == pivot_column_index.end()) + columns_to_reduce.push_back({get_diameter(cofacet), get_index(cofacet)}); + } + } + } + cur_chunk = achunk++; } - } +#if defined(USE_TBB) + }); +#else + })); + handles.clear(); +#endif + + // figure out offsets to put everything together and resize + std::vector simplices_prefix { 0 }, columns_to_reduce_prefix { 0 }; + for (unsigned i = 0; i < num_threads; ++i) { + simplices_prefix.push_back(simplices_prefix.back() + next_simplices_vec[i].size()); + columns_to_reduce_prefix.push_back(columns_to_reduce_prefix.back() + columns_to_reduce_vec[i].size()); } + next_simplices.resize(simplices_prefix.back()); + columns_to_reduce.resize(columns_to_reduce_prefix.back()); + + // copy into the arrays +#if defined(USE_TBB) + tbb::parallel_for(0, num_threads, + [&](unsigned i) { +#else + for (unsigned i = 0; i < num_threads; ++i) + handles.emplace_back(std::async(std::launch::async, [&,i]() { +#endif + size_t k = 0; + for (size_t j = simplices_prefix[i]; j < simplices_prefix[i+1]; ++j) + next_simplices[j] = next_simplices_vec[i][k++]; + + k = 0; + for (size_t j = columns_to_reduce_prefix[i]; j < columns_to_reduce_prefix[i+1]; ++j) + columns_to_reduce[j] = columns_to_reduce_vec[i][k++]; +#if defined(USE_TBB) + }); +#else + })); + handles.clear(); // force execution +#endif simplices.swap(next_simplices); @@ -474,8 +616,16 @@ template class ripser { << std::flush; #endif +#ifdef USING_SERIAL std::sort(columns_to_reduce.begin(), columns_to_reduce.end(), greater_diameter_or_smaller_index()); +#elif defined(USE_TBB) + tbb::parallel_sort(columns_to_reduce.begin(), columns_to_reduce.end(), + greater_diameter_or_smaller_index()); +#else + boost::sort::parallel::parallel_sort(columns_to_reduce.begin(), columns_to_reduce.end(), + greater_diameter_or_smaller_index(), num_threads); +#endif #ifdef INDICATE_PROGRESS std::cerr << clear_line << std::flush; #endif @@ -514,7 +664,7 @@ template class ripser { #endif } - template diameter_entry_t pop_pivot(Column& column) { + diameter_entry_t pop_pivot(WorkingColumn& column) { diameter_entry_t pivot(-1); #ifdef USE_COEFFICIENTS while (!column.empty()) { @@ -539,16 +689,17 @@ template class ripser { #endif } - template diameter_entry_t get_pivot(Column& column) { + diameter_entry_t get_pivot(WorkingColumn& column) { diameter_entry_t result = pop_pivot(column); if (get_index(result) != -1) column.push(result); return result; } - template - diameter_entry_t init_coboundary_and_get_pivot(const diameter_entry_t simplex, - Column& working_coboundary, const index_t& dim, - entry_hash_map& pivot_column_index) { + std::pair init_coboundary_and_get_pivot(const diameter_entry_t simplex, + WorkingColumn& working_coboundary, const index_t& dim, + entry_hash_map& pivot_column_index, Matrix& reduction_matrix, + const size_t index_column_to_reduce) { + thread_local static std::vector cofacet_entries; bool check_for_emergent_pair = true; cofacet_entries.clear(); simplex_coboundary_enumerator cofacets(simplex, dim, *this); @@ -557,20 +708,21 @@ template class ripser { if (get_diameter(cofacet) <= threshold) { cofacet_entries.push_back(cofacet); if (check_for_emergent_pair && (get_diameter(simplex) == get_diameter(cofacet))) { - if (pivot_column_index.find(get_entry(cofacet)) == pivot_column_index.end()) - return cofacet; + if (pivot_column_index.find(get_entry(cofacet)) == pivot_column_index.end()) { + if (pivot_column_index.insert({get_entry(cofacet), make_entry(index_column_to_reduce, get_coefficient(cofacet))}).second) + return { cofacet, true }; + } check_for_emergent_pair = false; } } } for (auto cofacet : cofacet_entries) working_coboundary.push(cofacet); - return get_pivot(working_coboundary); + return { get_pivot(working_coboundary), false }; } - template void add_simplex_coboundary(const diameter_entry_t simplex, const index_t& dim, - Column& working_reduction_column, Column& working_coboundary) { - working_reduction_column.push(simplex); + WorkingColumn& working_reduction_column, WorkingColumn& working_coboundary, bool add_diagonal = true) { + if (add_diagonal) working_reduction_column.push(simplex); simplex_coboundary_enumerator cofacets(simplex, dim, *this); while (cofacets.has_next()) { diameter_entry_t cofacet = cofacets.next(); @@ -578,49 +730,220 @@ template class ripser { } } - template - void add_coboundary(compressed_sparse_matrix& reduction_matrix, + void add_coboundary(MatrixColumn* reduction_column_to_add, const std::vector& columns_to_reduce, const size_t index_column_to_add, const coefficient_t factor, const size_t& dim, - Column& working_reduction_column, Column& working_coboundary) { + WorkingColumn& working_reduction_column, WorkingColumn& working_coboundary, bool add_diagonal = true) { diameter_entry_t column_to_add(columns_to_reduce[index_column_to_add], factor); - add_simplex_coboundary(column_to_add, dim, working_reduction_column, working_coboundary); + add_simplex_coboundary(column_to_add, dim, working_reduction_column, working_coboundary, add_diagonal); - for (diameter_entry_t simplex : reduction_matrix.subrange(index_column_to_add)) { + if (!reduction_column_to_add) return; + for (diameter_entry_t simplex : *reduction_column_to_add) { set_coefficient(simplex, get_coefficient(simplex) * factor % modulus); add_simplex_coboundary(simplex, dim, working_reduction_column, working_coboundary); } } + MatrixColumn* generate_column(WorkingColumn&& working_reduction_column) { + if (working_reduction_column.empty()) + return nullptr; + + MatrixColumn column; + while (true) { + diameter_entry_t e = pop_pivot(working_reduction_column); + if (get_index(e) == -1) break; + assert(get_coefficient(e) > 0); + column.push_back(e); + } + + if (column.empty()) return nullptr; + return new MatrixColumn(std::move(column)); + } + + template + void foreach(const std::vector& columns_to_reduce, const F& f) { +#if defined(INDICATE_PROGRESS) && !defined(USE_SERIAL) + std::atomic progress(0); + std::cerr << clear_line << "Starting reduction of " << columns_to_reduce.size() << " columns" << std::endl; +#endif +#ifdef USE_SERIAL + int epoch_counter = 0; + mrzv::MemoryManager memory_manager(epoch_counter, 1); + + for (size_t index_column_to_reduce = 0; index_column_to_reduce < columns_to_reduce.size(); + ++index_column_to_reduce) { + size_t next = f(index_column_to_reduce, true, memory_manager); + assert(next == index_column_to_reduce); + } +#elif defined(USE_SHUFFLED_SERIAL) // meant for debugging + std::vector indices(mrzv::counting_iterator(0), mrzv::counting_iterator(columns_to_reduce.size())); + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(indices.begin(), indices.end(), g); + + int epoch_counter = 0; + mrzv::MemoryManager memory_manager(epoch_counter, 1); + + size_t count = 0; + for (size_t index_column_to_reduce : indices) { + bool first = true; + size_t next; + do { + next = index_column_to_reduce; + index_column_to_reduce = f(next, first, memory_manager); + first = false; + } while (next != index_column_to_reduce); + + if (++count == 1024) { + memory_manager.quiescent(); + count = 0; + } + } +#elif defined(USE_PARALLEL_STL) + int epoch_counter = 0; + std::for_each(std::execution::par, mrzv::counting_iterator(0), mrzv::counting_iterator(columns_to_reduce.size()), + [&](size_t index_column_to_reduce) { + thread_local int count = 0; + thread_local mrzv::MemoryManager memory_manager(epoch_counter, std::thread::hardware_concurrency()); + + bool first = true; + size_t next; + do { + next = index_column_to_reduce; + index_column_to_reduce = f(next, first, memory_manager); + first = false; + } while (next != index_column_to_reduce); + + if (++count == 1024) { + memory_manager.quiescent(); + count = 0; + } + }); +#elif defined(USE_TBB) + int epoch_counter = 0; + tbb::parallel_for(0, columns_to_reduce.size(), + [&](size_t index_column_to_reduce) { + thread_local int count = 0; + thread_local mrzv::MemoryManager memory_manager(epoch_counter, num_threads); + + bool first = true; + size_t next; + do { + next = index_column_to_reduce; + index_column_to_reduce = f(next, first, memory_manager); + first = false; + } while (next != index_column_to_reduce); + + if (++count == 1024) { + memory_manager.quiescent(); + count = 0; + } + }); +#else // default: hand-rolled chunking + const size_t chunk_size = 1024; + size_t chunk = 0; + unsigned n_threads = num_threads; + + int epoch_counter = 0; + std::vector threads; + for (unsigned t = 0; t < n_threads; ++t) + threads.emplace_back([&]() { + mrzv::atomic_ref achunk(chunk); + + mrzv::MemoryManager memory_manager(epoch_counter, n_threads); + +#ifdef INDICATE_PROGRESS + int indicate_progress = progress++; + std::chrono::steady_clock::time_point next = std::chrono::steady_clock::now() + time_step; +#endif + + size_t cur_chunk = achunk++; + while(cur_chunk * chunk_size < columns_to_reduce.size()) { + size_t from = cur_chunk * chunk_size; + size_t to = std::min((cur_chunk + 1) * chunk_size, columns_to_reduce.size()); +#ifdef INDICATE_PROGRESS + if (indicate_progress == 0) { + if (std::chrono::steady_clock::now() > next) { + std::cerr << clear_line << "reducing columns " << from << " - " << to + << "/" << columns_to_reduce.size() + << std::flush; + next = std::chrono::steady_clock::now() + time_step; + } + } +#endif + for (size_t idx = from; idx < to; ++idx) { + size_t index_column_to_reduce = idx; + bool first = true; + size_t next; + do { + next = index_column_to_reduce; + index_column_to_reduce = f(next, first, memory_manager); + first = false; + } while (next != index_column_to_reduce); + } + cur_chunk = achunk++; + memory_manager.quiescent(); + } + }); + + for (auto& thread : threads) + thread.join(); +#endif + } + + // debug only + diameter_entry_t get_column_pivot(MatrixColumn* column, + const std::vector& columns_to_reduce, + const size_t index, const coefficient_t factor, const size_t& dim) { + WorkingColumn tmp_working_reduction_column, tmp_working_coboundary; + add_coboundary(column, columns_to_reduce, index, + 1, dim, tmp_working_reduction_column, tmp_working_coboundary); + return get_pivot(tmp_working_coboundary); + } + void compute_pairs(const std::vector& columns_to_reduce, entry_hash_map& pivot_column_index, const index_t dim) { -#ifdef PRINT_PERSISTENCE_PAIRS +#if defined(PRINT_PERSISTENCE_PAIRS) std::cout << "persistence intervals in dim " << dim << ":" << std::endl; #endif - compressed_sparse_matrix reduction_matrix; + Matrix reduction_matrix(columns_to_reduce.size()); -#ifdef INDICATE_PROGRESS - std::chrono::steady_clock::time_point next = std::chrono::steady_clock::now() + time_step; +#if defined(PRINT_PERSISTENCE_PAIRS) && !defined(USING_SERIAL) + // extra vector is a work-around inability to store floats in the hash_map + typedef hash_map entry_diameter_index_map; + std::atomic last_diameter_index { 0 }; + std::vector diameters(columns_to_reduce.size()); + entry_diameter_index_map deaths; + deaths.reserve(columns_to_reduce.size()); #endif - for (size_t index_column_to_reduce = 0; index_column_to_reduce < columns_to_reduce.size(); - ++index_column_to_reduce) { +#if defined(INDICATE_PROGRESS) && defined(USING_SERIAL) + std::chrono::steady_clock::time_point next = std::chrono::steady_clock::now() + time_step; +#endif + foreach(columns_to_reduce, [&](index_t index_column_to_reduce, bool first, mrzv::MemoryManager& memory_manager) { diameter_entry_t column_to_reduce(columns_to_reduce[index_column_to_reduce], 1); value_t diameter = get_diameter(column_to_reduce); - reduction_matrix.append_column(); - - std::priority_queue, - greater_diameter_or_smaller_index> - working_reduction_column, working_coboundary; - - diameter_entry_t pivot = init_coboundary_and_get_pivot( - column_to_reduce, working_coboundary, dim, pivot_column_index); + WorkingColumn working_reduction_column, working_coboundary; + + diameter_entry_t pivot; + if (first) { + bool emergent; + std::tie(pivot,emergent) = init_coboundary_and_get_pivot( + column_to_reduce, working_coboundary, dim, pivot_column_index, reduction_matrix, index_column_to_reduce); + if (emergent) + return index_column_to_reduce; + } else { + MatrixColumn* reduction_column_to_reduce = reduction_matrix.column(index_column_to_reduce); + add_coboundary(reduction_column_to_reduce, columns_to_reduce, index_column_to_reduce, + 1, dim, working_reduction_column, working_coboundary, false); + pivot = get_pivot(working_coboundary); + } while (true) { -#ifdef INDICATE_PROGRESS +#if defined(INDICATE_PROGRESS) && defined(USING_SERIAL) if (std::chrono::steady_clock::now() > next) { std::cerr << clear_line << "reducing column " << index_column_to_reduce + 1 << "/" << columns_to_reduce.size() << " (diameter " << diameter << ")" @@ -631,50 +954,107 @@ template class ripser { if (get_index(pivot) != -1) { auto pair = pivot_column_index.find(get_entry(pivot)); if (pair != pivot_column_index.end()) { - entry_t other_pivot = pair->first; - index_t index_column_to_add = pair->second; - coefficient_t factor = - modulus - get_coefficient(pivot) * - multiplicative_inverse[get_coefficient(other_pivot)] % - modulus; - - add_coboundary(reduction_matrix, columns_to_reduce, index_column_to_add, - factor, dim, working_reduction_column, working_coboundary); - - pivot = get_pivot(working_coboundary); + entry_t old_entry_column_to_add; + index_t index_column_to_add; + MatrixColumn* reduction_column_to_add; + entry_t entry_column_to_add = pivot_column_index.value(pair); + do + { + old_entry_column_to_add = entry_column_to_add; + + index_column_to_add = get_index(entry_column_to_add); + + reduction_column_to_add = reduction_matrix.column(index_column_to_add); + + // this is a weaker check than in the original lockfree + // persistence paper (it would suffice that the pivot + // in reduction_column_to_add) hasn't changed, but + // given that matrix V is stored, rather than matrix R, + // it's easier to check that pivot_column_index entry + // we read hasn't changed + // TODO: think through memory orders, and whether we need to adjust anything + entry_column_to_add = pivot_column_index.value(pair); + } while (old_entry_column_to_add != entry_column_to_add); + + if (index_column_to_add < index_column_to_reduce) + { + // pivot to the left; usual reduction + coefficient_t factor = + modulus - get_coefficient(pivot) * + multiplicative_inverse[get_coefficient(entry_column_to_add)] % + modulus; + + add_coboundary(reduction_column_to_add, columns_to_reduce, index_column_to_add, + factor, dim, working_reduction_column, working_coboundary); + + pivot = get_pivot(working_coboundary); + } else { + // pivot to the right + MatrixColumn* new_column = generate_column(std::move(working_reduction_column)); + MatrixColumn* previous = reduction_matrix.exchange(index_column_to_reduce, new_column); + assert(get_index(get_column_pivot(new_column, columns_to_reduce, index_column_to_reduce, 1, dim)) == get_index(pivot)); + memory_manager.retire(previous); + + if (pivot_column_index.update(pair, entry_column_to_add, make_entry(index_column_to_reduce, get_coefficient(pivot)))) { + return index_column_to_add; + } else { + continue; // re-read the pair + } + } } else { -#ifdef PRINT_PERSISTENCE_PAIRS +#if defined(PRINT_PERSISTENCE_PAIRS) && defined(USING_SERIAL) value_t death = get_diameter(pivot); if (death > diameter * ratio) { #ifdef INDICATE_PROGRESS std::cerr << clear_line << std::flush; #endif - std::cout << " [" << diameter << "," << death << ")" << std::endl; + std::cout << " [" << diameter << "," << death << ")" << (first ? "" : " <- correction") << std::endl; } #endif - pivot_column_index.insert({get_entry(pivot), index_column_to_reduce}); +#if defined(PRINT_PERSISTENCE_PAIRS) && !defined(USING_SERIAL) + size_t location = last_diameter_index++; + diameters[location] = get_diameter(pivot); + deaths.insert({get_entry(pivot), location}); +#endif + MatrixColumn* new_column = generate_column(std::move(working_reduction_column)); + MatrixColumn* previous = reduction_matrix.exchange(index_column_to_reduce, new_column); + memory_manager.retire(previous); + + assert(get_index(get_column_pivot(new_column, columns_to_reduce, index_column_to_reduce, 1, dim)) == get_index(pivot)); + + // equivalent to CAS in the original algorithm + auto insertion_result = pivot_column_index.insert({get_entry(pivot), make_entry(index_column_to_reduce, get_coefficient(pivot))}); + if (!insertion_result.second) // failed to insert, somebody got there before us, continue reduction + continue; // TODO: insertion_result.first is the new pair; could elide and extra atomic load - while (true) { - diameter_entry_t e = pop_pivot(working_reduction_column); - if (get_index(e) == -1) break; - assert(get_coefficient(e) > 0); - reduction_matrix.push_back(e); - } break; } } else { -#ifdef PRINT_PERSISTENCE_PAIRS + // TODO: these will need special attention, if output happens after the reduction, not during +#if defined(PRINT_PERSISTENCE_PAIRS) && defined(USING_SERIAL) #ifdef INDICATE_PROGRESS std::cerr << clear_line << std::flush; #endif - std::cout << " [" << diameter << ", )" << std::endl; + std::cout << " [" << diameter << ", )" << (first ? "" : " <- correction") << std::endl; #endif break; } } - } -#ifdef INDICATE_PROGRESS + return index_column_to_reduce; + }); +#if defined(INDICATE_PROGRESS) std::cerr << clear_line << std::flush; +#endif +#if defined(PRINT_PERSISTENCE_PAIRS) && !defined(USING_SERIAL) + pivot_column_index.foreach([&](const typename entry_hash_map::value_type& x) { + auto it = deaths.find(x.first); + if (it == deaths.end()) return; + value_t death = diameters[it->second]; + value_t birth = get_diameter(columns_to_reduce[get_index(x.second)]); + if (death > birth * ratio) + std::cout << " [" << birth << "," << death << ")" << std::endl; + }); + // TODO: this doesn't print unpaired values #endif } @@ -744,8 +1124,8 @@ template <> class ripser::simplex_coboundary_enumerator const coefficient_t modulus; const sparse_distance_matrix& dist; const binomial_coeff_table& binomial_coeff; - std::vector::const_reverse_iterator>& neighbor_it; - std::vector::const_reverse_iterator>& neighbor_end; + static thread_local std::vector::const_reverse_iterator> neighbor_it; + static thread_local std::vector::const_reverse_iterator> neighbor_end; index_diameter_t neighbor; public: @@ -753,8 +1133,7 @@ template <> class ripser::simplex_coboundary_enumerator const ripser& _parent) : parent(_parent), idx_below(get_index(_simplex)), idx_above(0), k(_dim + 1), vertices(_dim + 1), simplex(_simplex), modulus(parent.modulus), dist(parent.dist), - binomial_coeff(parent.binomial_coeff), neighbor_it(dist.neighbor_it), - neighbor_end(dist.neighbor_end) { + binomial_coeff(parent.binomial_coeff) { neighbor_it.clear(); neighbor_end.clear(); @@ -799,6 +1178,9 @@ template <> class ripser::simplex_coboundary_enumerator } }; +thread_local std::vector::const_reverse_iterator> ripser::simplex_coboundary_enumerator::neighbor_it; +thread_local std::vector::const_reverse_iterator> ripser::simplex_coboundary_enumerator::neighbor_end; + template <> std::vector ripser::get_edges() { std::vector edges; std::vector vertices(2); @@ -1007,6 +1389,10 @@ void print_usage_and_exit(int exit_code) { #ifdef USE_COEFFICIENTS << " --modulus

compute homology with coefficients in the prime field Z/pZ" << std::endl +#endif +#ifndef USING_SERIAL + << " --threads number of threads to use" + << std::endl #endif << " --ratio only show persistence pairs with death/birth ratio > r" << std::endl << std::endl; @@ -1022,6 +1408,7 @@ int main(int argc, char** argv) { value_t threshold = std::numeric_limits::max(); float ratio = 1; coefficient_t modulus = 2; + unsigned num_threads = 0; for (index_t i = 1; i < argc; ++i) { const std::string arg(argv[i]); @@ -1066,6 +1453,13 @@ int main(int argc, char** argv) { size_t next_pos; modulus = std::stol(parameter, &next_pos); if (next_pos != parameter.size() || !is_prime(modulus)) print_usage_and_exit(-1); +#endif +#ifndef USING_SERIAL + } else if (arg == "--threads") { + std::string parameter = std::string(argv[++i]); + size_t next_pos; + num_threads = std::stol(parameter, &next_pos); + if (next_pos != parameter.size() || !is_prime(modulus)) print_usage_and_exit(-1); #endif } else { if (filename) { print_usage_and_exit(-1); } @@ -1073,6 +1467,10 @@ int main(int argc, char** argv) { } } +#ifdef USE_TBB + tbb::task_scheduler_init init(num_threads); +#endif + std::ifstream file_stream(filename); if (filename && file_stream.fail()) { std::cerr << "couldn't open file " << filename << std::endl; @@ -1086,7 +1484,7 @@ int main(int argc, char** argv) { << dist.num_edges << "/" << (dist.size() * (dist.size() - 1)) / 2 << " entries" << std::endl; - ripser(std::move(dist), dim_max, threshold, ratio, modulus) + ripser(std::move(dist), dim_max, threshold, ratio, modulus, num_threads) .compute_barcodes(); } else { compressed_lower_distance_matrix dist = @@ -1118,7 +1516,7 @@ int main(int argc, char** argv) { if (threshold >= max) { std::cout << "distance matrix with " << dist.size() << " points" << std::endl; ripser(std::move(dist), dim_max, threshold, ratio, - modulus) + modulus, num_threads) .compute_barcodes(); } else { std::cout << "sparse distance matrix with " << dist.size() << " points and " @@ -1126,7 +1524,7 @@ int main(int argc, char** argv) { << std::endl; ripser(sparse_distance_matrix(std::move(dist), threshold), - dim_max, threshold, ratio, modulus) + dim_max, threshold, ratio, modulus, num_threads) .compute_barcodes(); } exit(0);