diff --git a/Makefile b/Makefile
index ab410bd..648ba70 100644
--- a/Makefile
+++ b/Makefile
@@ -1,18 +1,33 @@
build: ripser
-all: ripser ripser-coeff ripser-debug
+all: ripser ripser-coeff ripser-debug ripser-serial ripser-serial-debug
+FLAGS=-Iinclude -pthread
+FLAGS+=-MMD -MP
+
+-include *.d
ripser: ripser.cpp
- c++ -std=c++11 -Wall ripser.cpp -o ripser -Ofast -D NDEBUG
+ c++ -std=c++11 -Wall ripser.cpp -o $@ -Ofast -D NDEBUG ${FLAGS}
+
+ripser-serial: ripser.cpp
+ c++ -std=c++11 -Wall ripser.cpp -o $@ -Ofast -D NDEBUG ${FLAGS} -DUSE_SERIAL
+
+ripser-serial-debug: ripser.cpp
+ c++ -std=c++11 -Wall ripser.cpp -o $@ -g ${FLAGS} -DUSE_SERIAL
ripser-coeff: ripser.cpp
- c++ -std=c++11 -Wall ripser.cpp -o ripser-coeff -Ofast -D NDEBUG -D USE_COEFFICIENTS
+ c++ -std=c++11 -Wall ripser.cpp -o $@ -Ofast -D NDEBUG -D USE_COEFFICIENTS ${FLAGS}
ripser-debug: ripser.cpp
- c++ -std=c++11 -Wall ripser.cpp -o ripser-debug -g
+ c++ -std=c++11 -Wall ripser.cpp -o $@ -g ${FLAGS} # -fsanitize=thread -fsanitize=undefined
+
+ripser-tbb: ripser.cpp
+ c++ -std=c++11 -Wall ripser.cpp -o $@ -Ofast -D NDEBUG ${FLAGS} -DUSE_TBB -DUSE_TBB_HASHMAP -ltbb
+ripser-pstl: ripser.cpp
+ c++ -std=c++11 -Wall ripser.cpp -o $@ -Ofast -D NDEBUG ${FLAGS} -DUSE_PARALLEL_STL -DUSE_TBB_HASHMAP -ltbb -std=c++17
clean:
- rm -f ripser ripser-coeff ripser-debug
+ rm -f ripser ripser-*
diff --git a/README.md b/README.md
index 51029bf..b0725ed 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,8 @@
Copyright © 2015–2019 [Ulrich Bauer].
+This branch implements a version of the algorithm described in
+[Towards Lockfree Persistent Homology](https://mrzv.org/publications/lockfree-persistence/).
### Description
@@ -128,4 +130,4 @@ Ripser is licensed under the [MIT] license (`COPYING.txt`), with an extra clause
[Perseus]:
[GUDHI]:
[sparsehash]:
-[MIT]:
\ No newline at end of file
+[MIT]:
diff --git a/include/atomic_ref.hpp b/include/atomic_ref.hpp
new file mode 100644
index 0000000..1326489
--- /dev/null
+++ b/include/atomic_ref.hpp
@@ -0,0 +1,172 @@
+#pragma once
+
+// Atuhor: Dmitriy Morozov
+// Version 2020-03-15
+
+#if defined(USE_SERIAL_ATOMIC_REF)
+#include "atomic_ref_serial.hpp"
+#else
+
+#include
+#include
+#include
+
+namespace mrzv
+{
+
+template
+class atomic_ref;
+
+template
+class atomic_ref_base
+{
+ public:
+ using value_type = T;
+ using difference_type = value_type;
+
+ public:
+
+ explicit atomic_ref_base(T& obj): obj_(&obj) {}
+ atomic_ref_base(const atomic_ref_base& ref) noexcept =default;
+
+ T operator=(T desired) const noexcept { store(desired); return desired; }
+ atomic_ref_base&
+ operator=(const atomic_ref_base&) =delete;
+
+ bool is_lock_free() const noexcept { return __atomic_is_lock_free(sizeof(T), obj_); }
+
+ void store(T desired, std::memory_order order = std::memory_order_seq_cst) const noexcept
+ { __atomic_store_n(obj_, desired, atomic_memory_order(order)); }
+ T load(std::memory_order order = std::memory_order_seq_cst) const noexcept
+ { return __atomic_load_n(obj_, atomic_memory_order(order)); }
+
+ operator T() const noexcept { return load(); }
+
+ T exchange(T desired, std::memory_order order = std::memory_order_seq_cst) const noexcept
+ { return __atomic_exchange_n(obj_, desired, atomic_memory_order(order)); }
+
+ bool compare_exchange_weak(T& expected, T desired,
+ std::memory_order success,
+ std::memory_order failure) const noexcept
+ { return __atomic_compare_exchange_n(obj_, &expected, desired, true, atomic_memory_order(success), atomic_memory_order(failure)); }
+
+ bool compare_exchange_weak(T& expected, T desired,
+ std::memory_order order = std::memory_order_seq_cst ) const noexcept
+ { return compare_exchange_weak(expected, desired, order, order); } // TODO: not quite this simple
+
+ bool compare_exchange_strong(T& expected, T desired,
+ std::memory_order success,
+ std::memory_order failure) const noexcept
+ { return __atomic_compare_exchange_n(obj_, &expected, desired, false, atomic_memory_order(success), atomic_memory_order(failure)); }
+
+
+ bool compare_exchange_strong(T& expected, T desired,
+ std::memory_order order = std::memory_order_seq_cst) const noexcept
+ { return compare_exchange_strong(expected, desired, order, order); } // TODO: not quite this simple
+
+ // would be great to have wait and notify, but unclear how to implement them efficiently with __atomic
+ //void wait(T old, std::memory_order order = std::memory_order::seq_cst) const noexcept;
+ //void wait(T old, std::memory_order order = std::memory_order::seq_cst) const volatile noexcept;
+ //void notify_one() const noexcept;
+ //void notify_one() const volatile noexcept;
+ //void notify_all() const noexcept;
+ //void notify_all() const volatile noexcept;
+
+ protected:
+ int atomic_memory_order(std::memory_order order) const
+ {
+ if (order == std::memory_order_relaxed)
+ return __ATOMIC_RELAXED;
+ else if (order == std::memory_order_consume)
+ return __ATOMIC_CONSUME;
+ else if (order == std::memory_order_acquire)
+ return __ATOMIC_ACQUIRE;
+ else if (order == std::memory_order_release)
+ return __ATOMIC_RELEASE;
+ else if (order == std::memory_order_acq_rel)
+ return __ATOMIC_ACQ_REL;
+ else if (order == std::memory_order_seq_cst)
+ return __ATOMIC_SEQ_CST;
+ assert(0);
+ return __ATOMIC_RELAXED;
+ }
+
+ protected:
+ T* obj_;
+};
+
+template
+class atomic_ref::value>::type>: public atomic_ref_base
+{
+ public:
+ using Parent = atomic_ref_base;
+ using value_type = typename Parent::value_type;
+ using difference_type = typename Parent::difference_type;
+
+ public:
+ using Parent::Parent;
+
+ value_type operator++() const noexcept { return fetch_add(1) + 1; }
+ value_type operator++(int) const noexcept { return fetch_add(1); }
+ value_type operator--() const noexcept { return fetch_sub(1) - 1; }
+ value_type operator--(int) const noexcept { return fetch_sub(1); }
+
+ T operator+=( T arg ) const noexcept { return fetch_add(arg) + arg; }
+ T operator-=( T arg ) const noexcept { return fetch_sub(arg) - arg; }
+ T operator&=( T arg ) const noexcept { return fetch_and(arg) & arg; }
+ T operator|=( T arg ) const noexcept { return fetch_or(arg) | arg; }
+ T operator^=( T arg ) const noexcept { return fetch_xor(arg) ^ arg; }
+
+ T fetch_add(T arg, std::memory_order order = std::memory_order_seq_cst) const noexcept
+ { return __atomic_fetch_add(obj_, arg, atomic_memory_order(order)); }
+
+ T fetch_sub(T arg, std::memory_order order = std::memory_order_seq_cst) const noexcept
+ { return __atomic_fetch_sub(obj_, arg, atomic_memory_order(order)); }
+
+ T fetch_and(T arg, std::memory_order order = std::memory_order_seq_cst) const noexcept
+ { return __atomic_fetch_and(obj_, arg, atomic_memory_order(order)); }
+
+ T fetch_or(T arg, std::memory_order order = std::memory_order_seq_cst) const noexcept
+ { return __atomic_fetch_or(obj_, arg, atomic_memory_order(order)); }
+
+ T fetch_xor(T arg, std::memory_order order = std::memory_order_seq_cst) const noexcept
+ { return __atomic_fetch_xor(obj_, arg, atomic_memory_order(order)); }
+
+ protected:
+ using Parent::obj_;
+ using Parent::atomic_memory_order;
+};
+
+template
+class atomic_ref: public atomic_ref_base
+{
+ public:
+ using Parent = atomic_ref_base;
+ using value_type = typename Parent::value_type;
+ using difference_type = typename Parent::difference_type;
+
+ public:
+ using Parent::Parent;
+
+ value_type operator++() const noexcept { return fetch_add(1) + 1; }
+ value_type operator++(int) const noexcept { return fetch_add(1); }
+ value_type operator--() const noexcept { return fetch_sub(1) - 1; }
+ value_type operator--(int) const noexcept { return fetch_sub(1); }
+
+ T* operator+=(std::ptrdiff_t arg) const noexcept { return fetch_add(arg) + arg; }
+ T* operator-=(std::ptrdiff_t arg) const noexcept { return fetch_sub(arg) - arg; }
+
+ T* fetch_add(std::ptrdiff_t arg, std::memory_order order = std::memory_order_seq_cst) const noexcept
+ { return __atomic_fetch_add(obj_, arg * sizeof(T), atomic_memory_order(order)); }
+
+ T* fetch_sub(std::ptrdiff_t arg, std::memory_order order = std::memory_order_seq_cst) const noexcept
+ { return __atomic_fetch_sub(obj_, arg * sizeof(T), atomic_memory_order(order)); }
+
+ protected:
+ using Parent::obj_;
+ using Parent::atomic_memory_order;
+};
+
+}
+
+#endif // USE_SERIAL_ATOMIC_REF
diff --git a/include/atomic_ref_serial.hpp b/include/atomic_ref_serial.hpp
new file mode 100644
index 0000000..beff4cf
--- /dev/null
+++ b/include/atomic_ref_serial.hpp
@@ -0,0 +1,148 @@
+#pragma once
+
+#pragma message "Using serial atomic_ref"
+
+// Atuhor: Dmitriy Morozov
+// Version 2020-03-15
+
+#include
+#include
+#include
+
+namespace mrzv
+{
+
+template
+class atomic_ref;
+
+template
+class atomic_ref_base
+{
+ public:
+ using value_type = T;
+ using difference_type = value_type;
+
+ public:
+
+ explicit atomic_ref_base(T& obj): obj_(&obj) {}
+ atomic_ref_base(const atomic_ref_base& ref) noexcept =default;
+
+ T operator=(T desired) const noexcept { store(desired); return desired; }
+ atomic_ref_base&
+ operator=(const atomic_ref_base&) =delete;
+
+ bool is_lock_free() const noexcept { return true; }
+
+ void store(T desired, std::memory_order order = std::memory_order_seq_cst) const noexcept
+ { *obj_ = desired; }
+ T load(std::memory_order order = std::memory_order_seq_cst) const noexcept
+ { return *obj_; }
+
+ operator T() const noexcept { return load(); }
+
+ T exchange(T desired, std::memory_order order = std::memory_order_seq_cst) const noexcept
+ { std::swap(*obj_, desired); return desired; }
+
+ // Technically, not quite CAS, but simplified for the only meaningful scenario I can think of
+ bool compare_exchange_weak(T& expected, T desired,
+ std::memory_order,
+ std::memory_order) const noexcept
+ { assert(*obj_ == expected); *obj_ = desired; return true; }
+
+ bool compare_exchange_weak(T& expected, T desired,
+ std::memory_order order = std::memory_order_seq_cst ) const noexcept
+ { assert(*obj_ == expected); *obj_ = desired; return true; }
+
+ bool compare_exchange_strong(T& expected, T desired,
+ std::memory_order,
+ std::memory_order) const noexcept
+ { assert(*obj_ == expected); *obj_ = desired; return true; }
+
+
+ bool compare_exchange_strong(T& expected, T desired,
+ std::memory_order order = std::memory_order_seq_cst) const noexcept
+ { assert(*obj_ == expected); *obj_ = desired; return true; }
+
+ // would be great to have wait and notify, but unclear how to implement them efficiently with __atomic
+ //void wait(T old, std::memory_order order = std::memory_order::seq_cst) const noexcept;
+ //void wait(T old, std::memory_order order = std::memory_order::seq_cst) const volatile noexcept;
+ //void notify_one() const noexcept;
+ //void notify_one() const volatile noexcept;
+ //void notify_all() const noexcept;
+ //void notify_all() const volatile noexcept;
+
+ protected:
+ T* obj_;
+};
+
+template
+class atomic_ref::value>::type>: public atomic_ref_base
+{
+ public:
+ using Parent = atomic_ref_base;
+ using value_type = typename Parent::value_type;
+ using difference_type = typename Parent::difference_type;
+
+ public:
+ using Parent::Parent;
+
+ value_type operator++() const noexcept { return fetch_add(1) + 1; }
+ value_type operator++(int) const noexcept { return fetch_add(1); }
+ value_type operator--() const noexcept { return fetch_sub(1) - 1; }
+ value_type operator--(int) const noexcept { return fetch_sub(1); }
+
+ T operator+=( T arg ) const noexcept { return fetch_add(arg) + arg; }
+ T operator-=( T arg ) const noexcept { return fetch_sub(arg) - arg; }
+ T operator&=( T arg ) const noexcept { return fetch_and(arg) & arg; }
+ T operator|=( T arg ) const noexcept { return fetch_or(arg) | arg; }
+ T operator^=( T arg ) const noexcept { return fetch_xor(arg) ^ arg; }
+
+ T fetch_add(T arg, std::memory_order order = std::memory_order_seq_cst) const noexcept
+ { T result = *obj_; *obj_ += arg; return result; }
+
+ T fetch_sub(T arg, std::memory_order order = std::memory_order_seq_cst) const noexcept
+ { T result = *obj_; *obj_ -= arg; return result; }
+
+ T fetch_and(T arg, std::memory_order order = std::memory_order_seq_cst) const noexcept
+ { T result = *obj_; *obj_ &= arg; return result; }
+
+ T fetch_or(T arg, std::memory_order order = std::memory_order_seq_cst) const noexcept
+ { T result = *obj_; *obj_ |= arg; return result; }
+
+ T fetch_xor(T arg, std::memory_order order = std::memory_order_seq_cst) const noexcept
+ { T result = *obj_; *obj_ ^= arg; return result; }
+
+ protected:
+ using Parent::obj_;
+};
+
+template
+class atomic_ref: public atomic_ref_base
+{
+ public:
+ using Parent = atomic_ref_base;
+ using value_type = typename Parent::value_type;
+ using difference_type = typename Parent::difference_type;
+
+ public:
+ using Parent::Parent;
+
+ value_type operator++() const noexcept { return fetch_add(1) + 1; }
+ value_type operator++(int) const noexcept { return fetch_add(1); }
+ value_type operator--() const noexcept { return fetch_sub(1) - 1; }
+ value_type operator--(int) const noexcept { return fetch_sub(1); }
+
+ T* operator+=(std::ptrdiff_t arg) const noexcept { return fetch_add(arg) + arg; }
+ T* operator-=(std::ptrdiff_t arg) const noexcept { return fetch_sub(arg) - arg; }
+
+ T* fetch_add(std::ptrdiff_t arg, std::memory_order order = std::memory_order_seq_cst) const noexcept
+ { T* result = *obj_; *obj_ += arg; return result; }
+
+ T* fetch_sub(std::ptrdiff_t arg, std::memory_order order = std::memory_order_seq_cst) const noexcept
+ { T* result = *obj_; *obj_ -= arg; return result; }
+
+ protected:
+ using Parent::obj_;
+};
+
+}
diff --git a/include/boost/sort/parallel/detail/bis/backbone.hpp b/include/boost/sort/parallel/detail/bis/backbone.hpp
new file mode 100755
index 0000000..b56c3e8
--- /dev/null
+++ b/include/boost/sort/parallel/detail/bis/backbone.hpp
@@ -0,0 +1,223 @@
+//----------------------------------------------------------------------------
+/// @file backbone.hpp
+/// @brief This file constains the class backbone, which is part of the
+/// block_indirect_sort algorithm
+///
+/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
+/// Distributed under the Boost Software License, Version 1.0.\n
+/// ( See accompanying file LICENSE_1_0.txt or copy at
+/// http://www.boost.org/LICENSE_1_0.txt )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_BIS_BACKBONE_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_BIS_BACKBONE_HPP
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+
+namespace boost
+{
+namespace sort
+{
+namespace parallel
+{
+namespace detail
+{
+namespace bis
+{
+
+//---------------------------------------------------------------------------
+// USING SENTENCES
+//---------------------------------------------------------------------------
+using boost::sort::parallel::detail::util::stack_cnc;
+
+///---------------------------------------------------------------------------
+/// @struct backbone
+/// @brief This contains all the information shared betwen the classes of the
+/// block indirect sort algorithm
+
+//----------------------------------------------------------------------------
+template < uint32_t Block_size, class Iter_t, class Compare >
+struct backbone
+{
+ //-------------------------------------------------------------------------
+ // D E F I N I T I O N S
+ //-------------------------------------------------------------------------
+ typedef typename std::iterator_traits< Iter_t >::value_type value_t;
+ typedef std::atomic< uint32_t > atomic_t;
+ typedef range< size_t > range_pos;
+ typedef range< Iter_t > range_it;
+ typedef range< value_t * > range_buf;
+ typedef std::function< void(void) > function_t;
+ typedef block< Block_size, Iter_t > block_t;
+
+ //------------------------------------------------------------------------
+ // V A R I A B L E S
+ //------------------------------------------------------------------------
+ // range with all the element to sort
+ util::range< Iter_t > global_range;
+
+ // index vector of block_pos elements
+ std::vector< block_pos > index;
+
+ // Number of elements to sort
+ size_t nelem;
+
+ // Number of blocks to sort
+ size_t nblock;
+
+ // Number of elements in the last block (tail)
+ size_t ntail;
+
+ // object for to compare two elements
+ Compare cmp;
+
+ // range of elements of the last block (tail)
+ range_it range_tail;
+
+ // thread local varible. It is a pointer to the buffer
+ static thread_local value_t *buf;
+
+ // concurrent stack where store the function_t elements
+ stack_cnc< function_t > works;
+
+ // global indicator of error
+ bool error;
+ //
+ //------------------------------------------------------------------------
+ // F U N C T I O N S
+ //------------------------------------------------------------------------
+ backbone (Iter_t first, Iter_t last, Compare comp);
+
+ //------------------------------------------------------------------------
+ // function : get_block
+ /// @brief obtain the block in the position pos
+ /// @param pos : position of the range
+ /// @return block required
+ //------------------------------------------------------------------------
+ block_t get_block (size_t pos) const
+ {
+ return block_t (global_range.first + (pos * Block_size));
+ };
+ //-------------------------------------------------------------------------
+ // function : get_range
+ /// @brief obtain the range in the position pos
+ /// @param pos : position of the range
+ /// @return range required
+ //-------------------------------------------------------------------------
+ range_it get_range (size_t pos) const
+ {
+ Iter_t it1 = global_range.first + (pos * Block_size);
+ Iter_t it2 =
+ (pos == (nblock - 1)) ? global_range.last : it1 + Block_size;
+ return range_it (it1, it2);
+ };
+ //-------------------------------------------------------------------------
+ // function : get_range_buf
+ /// @brief obtain the auxiliary buffer of the thread
+ //-------------------------------------------------------------------------
+ range_buf get_range_buf ( ) const
+ {
+ return range_buf (buf, buf + Block_size);
+ };
+
+ //-------------------------------------------------------------------------
+ // function : exec
+ /// @brief Initialize the thread local buffer with the ptr_buf pointer,
+ /// and begin with the execution of the functions stored in works
+ //
+ /// @param ptr_buf : Pointer to the memory assigned to the thread_local
+ /// buffer
+ /// @param counter : atomic counter for to invoke to the exec function
+ /// with only 1 parameter
+ //-------------------------------------------------------------------------
+ void exec (value_t *ptr_buf, atomic_t &counter)
+ {
+ buf = ptr_buf;
+ exec (counter);
+ };
+
+ void exec (atomic_t &counter);
+
+//---------------------------------------------------------------------------
+}; // end struct backbone
+//---------------------------------------------------------------------------
+//
+//############################################################################
+// ##
+// ##
+// N O N I N L I N E F U N C T I O N S ##
+// ##
+// ##
+//############################################################################
+//
+// initialization of the thread_local pointer to the auxiliary buffer
+template < uint32_t Block_size, class Iter_t, class Compare >
+thread_local typename std::iterator_traits< Iter_t >::value_type
+ *backbone< Block_size, Iter_t, Compare >::buf = nullptr;
+
+//------------------------------------------------------------------------
+// function : backbone
+/// @brief constructor of the class
+//
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+/// @param comp : object for to compare two elements pointed by Iter_t
+/// iterators
+//------------------------------------------------------------------------
+template < uint32_t Block_size, class Iter_t, class Compare >
+backbone< Block_size, Iter_t, Compare >
+ ::backbone (Iter_t first, Iter_t last, Compare comp)
+ : global_range (first, last), cmp (comp), error (false)
+{
+ assert ((last - first) >= 0);
+ if (first == last) return; // nothing to do
+
+ nelem = size_t (last - first);
+ nblock = (nelem + Block_size - 1) / Block_size;
+ ntail = (nelem % Block_size);
+ index.reserve (nblock + 1);
+
+ for (size_t i = 0; i < nblock; ++i) index.emplace_back (block_pos (i));
+
+ range_tail.first =
+ (ntail == 0) ? last : (first + ((nblock - 1) * Block_size));
+ range_tail.last = last;
+};
+//
+//-------------------------------------------------------------------------
+// function : exec
+/// @brief execute the function_t stored in works, until counter is zero
+//
+/// @param counter : atomic counter. When 0 exits the function
+//-------------------------------------------------------------------------
+template < uint32_t Block_size, class Iter_t, class Compare >
+void backbone< Block_size, Iter_t, Compare >::exec (atomic_t &counter)
+{
+ function_t func_exec;
+ while (util::atomic_read (counter) != 0) {
+ if (works.pop_move_back (func_exec))
+ func_exec ( );
+ else
+ std::this_thread::yield ( );
+ };
+};
+//
+//****************************************************************************
+}; // End namespace bis
+}; // End namespace detail
+}; // End namespace parallel
+}; // End namespace sort
+}; // End namespace boost
+//****************************************************************************
+#endif
diff --git a/include/boost/sort/parallel/detail/bis/block.hpp b/include/boost/sort/parallel/detail/bis/block.hpp
new file mode 100755
index 0000000..adc68b1
--- /dev/null
+++ b/include/boost/sort/parallel/detail/bis/block.hpp
@@ -0,0 +1,186 @@
+//----------------------------------------------------------------------------
+/// @file block.hpp
+/// @brief This file contains the internal data structures used in the
+/// block_indirect_sort algorithm
+///
+/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
+/// Distributed under the Boost Software License, Version 1.0.\n
+/// ( See accompanying file LICENSE_1_0.txt or copy at
+/// http://www.boost.org/LICENSE_1_0.txt )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_BIS_BLOCK_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_BIS_BLOCK_HPP
+
+#include
+
+namespace boost
+{
+namespace sort
+{
+namespace parallel
+{
+namespace detail
+{
+namespace bis
+{
+//---------------------------------------------------------------------------
+// USING SENTENCES
+//---------------------------------------------------------------------------
+using boost::sort::parallel::detail::util::range;
+//
+//---------------------------------------------------------------------------
+/// @struct block_pos
+/// @brief represent a pair of values, a position represented as an unsigned
+/// variable ( position ), and a bool variable ( side ). They are packed
+/// in a size_t variable. The Least Significant Bit is the bool variable,
+/// and the others bits are the position
+//----------------------------------------------------------------------------
+class block_pos
+{
+ //------------------------------------------------------------------------
+ // VARIABLES
+ //-----------------------------------------------------------------------
+ size_t num; // number which store a position and a bool side
+
+ public:
+ //----------------------------- FUNCTIONS ------------------------------
+ block_pos (void) : num (0){};
+ //
+ //-------------------------------------------------------------------------
+ // function : block_pos
+ /// @brief constructor from a position and a side
+ /// @param position : position to sotre
+ /// @param side : side to store
+ //-------------------------------------------------------------------------
+ block_pos (size_t position, bool side = false)
+ {
+ num = (position << 1) + ((side) ? 1 : 0);
+ };
+ //
+ //-------------------------------------------------------------------------
+ // function : pos
+ /// @brief obtain the position stored inside the block_pos
+ /// @return position
+ //-------------------------------------------------------------------------
+ size_t pos (void) const { return (num >> 1); };
+ //
+ //-------------------------------------------------------------------------
+ // function : pos
+ /// @brief store a position inside the block_pos
+ /// @param position : value to store
+ //-------------------------------------------------------------------------
+ void set_pos (size_t position) { num = (position << 1) + (num & 1); };
+ //
+ //-------------------------------------------------------------------------
+ // function : side
+ /// @brief obtain the side stored inside the block_pos
+ /// @return bool value
+ //-------------------------------------------------------------------------
+ bool side (void) const { return ((num & 1) != 0); };
+ //
+ //-------------------------------------------------------------------------
+ // function : side
+ /// @brief store a bool value the block_pos
+ /// @param sd : bool value to store
+ //-------------------------------------------------------------------------
+ void set_side (bool sd) { num = (num & ~1) + ((sd) ? 1 : 0); };
+}; // end struct block_pos
+
+//
+//---------------------------------------------------------------------------
+/// @struct block
+/// @brief represent a group of Block_size contiguous elements, beginning
+/// with the pointed by first
+//----------------------------------------------------------------------------
+template < uint32_t Block_size, class Iter_t >
+struct block
+{
+ //----------------------------------------------------------------------
+ // VARIABLES
+ //----------------------------------------------------------------------
+ Iter_t first; // iterator to the first element of the block
+
+ //-------------------------------------------------------------------------
+ // function : block
+ /// @brief constructor from an iterator to the first element of the block
+ /// @param it : iterator to the first element of the block
+ //-------------------------------------------------------------------------
+ block (Iter_t it) : first (it){};
+
+ //-------------------------------------------------------------------------
+ // function : get_range
+ /// @brief convert a block in a range
+ /// @return range
+ //-------------------------------------------------------------------------
+ range< Iter_t > get_range (void)
+ {
+ return range_it (first, first + Block_size);
+ };
+
+}; // end struct block
+
+//
+//-------------------------------------------------------------------------
+// function : compare_block
+/// @brief compare two blocks using the content of the pointed by first
+/// @param block1 : first block to compare
+/// @param block2 : second block to compare
+/// @param cmp : comparison operator
+//-------------------------------------------------------------------------
+template < uint32_t Block_size, class Iter_t, class Compare >
+bool compare_block (block< Block_size, Iter_t > block1,
+ block< Block_size, Iter_t > block2,
+ Compare cmp = Compare ( ))
+{
+ return cmp (*block1.first, *block2.first);
+};
+//
+///---------------------------------------------------------------------------
+/// @struct compare_block_pos
+/// @brief This is a object for to compare two block_pos objects
+//----------------------------------------------------------------------------
+template < uint32_t Block_size, class Iter_t, class Compare >
+struct compare_block_pos
+{
+ //-----------------------------------------------------------------------
+ // VARIABLES
+ //-----------------------------------------------------------------------
+ Iter_t global_first; // iterator to the first element to sort
+ Compare comp; // comparison object for to compare two elements
+
+ //-------------------------------------------------------------------------
+ // function : compare_block_pos
+ /// @brief constructor
+ /// @param g_first : itertor to the first element to sort
+ /// @param cmp : comparison operator
+ //-------------------------------------------------------------------------
+ compare_block_pos (Iter_t g_first, Compare cmp)
+ : global_first (g_first), comp (cmp){};
+ //
+ //-------------------------------------------------------------------------
+ // function : operator ()
+ /// @brief compare two blocks using the content of the pointed by
+ /// global_first
+ /// @param block_pos1 : first block to compare
+ /// @param block_pos2 : second block to compare
+ //-------------------------------------------------------------------------
+ bool operator( ) (block_pos block_pos1, block_pos block_pos2) const
+ {
+ return comp (*(global_first + (block_pos1.pos ( ) * Block_size)),
+ *(global_first + (block_pos2.pos ( ) * Block_size)));
+ };
+
+}; // end struct compare_block_pos
+
+//****************************************************************************
+}; // End namespace bis
+}; // End namespace detail
+}; // End namespace parallel
+}; // End namespace sort
+}; // End namespace boost
+//****************************************************************************
+//
+#endif
diff --git a/include/boost/sort/parallel/detail/bis/merge_blocks.hpp b/include/boost/sort/parallel/detail/bis/merge_blocks.hpp
new file mode 100755
index 0000000..7210cfe
--- /dev/null
+++ b/include/boost/sort/parallel/detail/bis/merge_blocks.hpp
@@ -0,0 +1,407 @@
+//----------------------------------------------------------------------------
+/// @file merge_blocks.hpp
+/// @brief contains the class merge_blocks, which is part of the
+/// block_indirect_sort algorithm
+///
+/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
+/// Distributed under the Boost Software License, Version 1.0.\n
+/// ( See accompanying file LICENSE_1_0.txt or copy at
+/// http://www.boost.org/LICENSE_1_0.txt )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_BIS_MERGE_BLOCKS_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_BIS_MERGE_BLOCKS_HPP
+
+#include
+#include
+#include
+#include
+#include
+
+namespace boost
+{
+namespace sort
+{
+namespace parallel
+{
+namespace detail
+{
+namespace bis
+{
+//
+///---------------------------------------------------------------------------
+/// @struct merge_blocks
+/// @brief This class merge the blocks. The blocks to merge are defined by two
+/// ranges of positions in the index of the backbone
+//----------------------------------------------------------------------------
+template < uint32_t Block_size, uint32_t Group_size, class Iter_t,
+ class Compare >
+struct merge_blocks
+{
+ //-----------------------------------------------------------------------
+ // D E F I N I T I O N S
+ //-----------------------------------------------------------------------
+ typedef typename std::iterator_traits< Iter_t >::value_type value_t;
+ typedef std::atomic< uint32_t > atomic_t;
+ typedef util::range< size_t > range_pos;
+ typedef util::range< Iter_t > range_it;
+ typedef util::range< value_t * > range_buf;
+ typedef std::function< void(void) > function_t;
+ typedef backbone< Block_size, Iter_t, Compare > backbone_t;
+ typedef compare_block_pos< Block_size, Iter_t, Compare >
+ compare_block_pos_t;
+
+ //------------------------------------------------------------------------
+ // V A R I A B L E S
+ //------------------------------------------------------------------------
+ // Object with the elements to sort and all internal data structures of the
+ // algorithm
+ backbone_t &bk;
+ //
+ //------------------------------------------------------------------------
+ // F U N C T I O N S
+ //------------------------------------------------------------------------
+ merge_blocks (backbone_t &bkb, size_t pos_index1, size_t pos_index2,
+ size_t pos_index3);
+
+ void tail_process (std::vector< block_pos > &vblkpos1,
+ std::vector< block_pos > &vblkpos2);
+
+ void cut_range (range_pos rng);
+
+ void merge_range_pos (range_pos rng);
+
+ void extract_ranges (range_pos range_input);
+ //
+ //------------------------------------------------------------------------
+ // function : function_merge_range_pos
+ /// @brief create a function_t with a call to merge_range_pos, and insert
+ /// in the stack of the backbone
+ //
+ /// @param rng_input : range of positions of blocks in the index to merge
+ /// @param son_counter : atomic variable which is decremented when finish
+ /// the function. This variable is used for to know
+ /// when are finished all the function_t created
+ /// inside an object
+ /// @param error : global indicator of error.
+ ///
+ //------------------------------------------------------------------------
+ void function_merge_range_pos (const range_pos &rng_input,
+ atomic_t &counter, bool &error)
+ {
+ util::atomic_add (counter, 1);
+ function_t f1 = [this, rng_input, &counter, &error]( ) -> void {
+ if (not error) {
+ try
+ {
+ this->merge_range_pos (rng_input);
+ }
+ catch (std::bad_alloc &ba)
+ {
+ error = true;
+ };
+ }
+ util::atomic_sub (counter, 1);
+ };
+ bk.works.emplace_back (f1);
+ };
+ //
+ //------------------------------------------------------------------------
+ // function : function_cut_range
+ /// @brief create a function_t with a call to cut_range, and inser in
+ /// the stack of the backbone
+ //
+ /// @param rng_input : range of positions in the index to cut
+ /// @param counter : atomic variable which is decremented when finish
+ /// the function. This variable is used for to know
+ /// when are finished all the function_t created
+ /// inside an object
+ /// @param error : global indicator of error.
+ //------------------------------------------------------------------------
+ void function_cut_range (const range_pos &rng_input, atomic_t &counter,
+ bool &error)
+ {
+ util::atomic_add (counter, 1);
+ function_t f1 = [this, rng_input, &counter, &error]( ) -> void {
+ if (not error) {
+ try
+ {
+ this->cut_range (rng_input);
+ }
+ catch (std::bad_alloc &)
+ {
+ error = true;
+ };
+ }
+ util::atomic_sub (counter, 1);
+ };
+ bk.works.emplace_back (f1);
+ };
+
+//----------------------------------------------------------------------------
+}; // end struct merge_blocks
+//----------------------------------------------------------------------------
+//
+//############################################################################
+// ##
+// ##
+// N O N I N L I N E F U N C T I O N S ##
+// ##
+// ##
+//############################################################################
+//
+//-------------------------------------------------------------------------
+// function : merge_blocks
+/// @brief make the indirect merge of the two range_pos defined by their index
+/// position [pos_index1, pos_index2 ) and [ pos_index2, pos_index3 )
+//
+/// @param bkb : backbone with all the data to sort , and the internal data
+/// structures of the algorithm
+/// @param pos_index1 : first position of the first range in the index
+/// @param pos_index2 : last position of the first range and first position
+/// of the second range in the index
+/// @param pos_index3 : last position of the second range in the index
+//-------------------------------------------------------------------------
+template < uint32_t Block_size, uint32_t Group_size, class Iter_t,
+ class Compare >
+merge_blocks< Block_size, Group_size, Iter_t, Compare >
+ ::merge_blocks (backbone_t &bkb, size_t pos_index1,
+ size_t pos_index2, size_t pos_index3) : bk (bkb)
+{
+ size_t nblock1 = pos_index2 - pos_index1;
+ size_t nblock2 = pos_index3 - pos_index2;
+ if (nblock1 == 0 or nblock2 == 0) return;
+
+ //-----------------------------------------------------------------------
+ // Merging of the two intervals
+ //-----------------------------------------------------------------------
+ std::vector< block_pos > vpos1, vpos2;
+ vpos1.reserve (nblock1 + 1);
+ vpos2.reserve (nblock2 + 1);
+
+ for (size_t i = pos_index1; i < pos_index2; ++i) {
+ vpos1.emplace_back (bk.index[i].pos ( ), true);
+ };
+
+ for (size_t i = pos_index2; i < pos_index3; ++i) {
+ vpos2.emplace_back (bk.index[i].pos ( ), false);
+ };
+ //-------------------------------------------------------------------
+ // tail process
+ //-------------------------------------------------------------------
+ if (vpos2.back ( ).pos ( ) == (bk.nblock - 1) and
+ bk.range_tail.first != bk.range_tail.last)
+ {
+ tail_process (vpos1, vpos2);
+ nblock1 = vpos1.size ( );
+ nblock2 = vpos2.size ( );
+ };
+
+ compare_block_pos_t cmp_blk (bk.global_range.first, bk.cmp);
+ if (bk.error) return;
+ util::full_merge (vpos1.begin ( ), vpos1.end ( ), vpos2.begin ( ),
+ vpos2.end ( ), bk.index.begin ( ) + pos_index1, cmp_blk);
+ if (bk.error) return;
+ // Extracting the ranges for to merge the elements
+ extract_ranges (range_pos (pos_index1, pos_index1 + nblock1 + nblock2));
+};
+
+//
+//-------------------------------------------------------------------------
+// function : tail_process
+/// @brief make the process when the second vector of block_pos to merge is
+/// the last, and have an incomplete block ( tail)
+//
+/// @param vblkpos1 : first vector of block_pos elements to merge
+/// @param vblkpos2 : second vector of block_pos elements to merge
+//-------------------------------------------------------------------------
+template < uint32_t Block_size, uint32_t Group_size, class Iter_t,
+ class Compare >
+void merge_blocks< Block_size, Group_size, Iter_t, Compare >
+ ::tail_process (std::vector< block_pos > &vblkpos1,
+ std::vector< block_pos > &vblkpos2)
+{
+ if (vblkpos1.size ( ) == 0 or vblkpos2.size ( ) == 0) return;
+
+ vblkpos2.pop_back ( );
+
+ size_t posback1 = vblkpos1.back ( ).pos ( );
+ range_it range_back1 = bk.get_range (posback1);
+
+ if (util::is_mergeable (range_back1, bk.range_tail, bk.cmp)) {
+ util::in_place_merge_uncontiguous (range_back1, bk.range_tail,
+ bk.get_range_buf ( ), bk.cmp);
+ if (vblkpos1.size ( ) > 1) {
+ size_t pos_aux = vblkpos1[vblkpos1.size ( ) - 2].pos ( );
+ range_it range_aux = bk.get_range (pos_aux);
+
+ if (util::is_mergeable (range_aux, range_back1, bk.cmp)) {
+ vblkpos2.emplace_back (posback1, false);
+ vblkpos1.pop_back ( );
+ };
+ };
+ };
+};
+//
+//-------------------------------------------------------------------------
+// function : cut_range
+/// @brief when the rng_input is greather than Group_size, this function divide
+/// it in several parts creating function_t elements, which are inserted
+/// in the concurrent stack of the backbone
+//
+/// @param rng_input : range to divide
+//-------------------------------------------------------------------------
+template < uint32_t Block_size, uint32_t Group_size, class Iter_t,
+ class Compare >
+void merge_blocks< Block_size, Group_size, Iter_t, Compare >
+ ::cut_range (range_pos rng_input)
+{
+ if (rng_input.size ( ) < Group_size) {
+ merge_range_pos (rng_input);
+ return;
+ };
+
+ atomic_t counter (0);
+ size_t npart = (rng_input.size ( ) + Group_size - 1) / Group_size;
+ size_t size_part = rng_input.size ( ) / npart;
+
+ size_t pos_ini = rng_input.first;
+ size_t pos_last = rng_input.last;
+
+ while (pos_ini < pos_last) {
+ size_t pos = pos_ini + size_part;
+ while (pos < pos_last and
+ bk.index[pos - 1].side ( ) == bk.index[pos].side ( ))
+ {
+ ++pos;
+ };
+ if (pos < pos_last) {
+ in_place_merge_uncontiguous (
+ bk.get_range (bk.index[pos - 1].pos ( )),
+ bk.get_range (bk.index[pos].pos ( )), bk.get_range_buf ( ),
+ bk.cmp);
+ }
+ else
+ pos = pos_last;
+ if ((pos - pos_ini) > 1) {
+ range_pos rng_aux (pos_ini, pos);
+ function_merge_range_pos (rng_aux, counter, bk.error);
+ };
+ pos_ini = pos;
+ };
+ bk.exec (counter); // wait until finish all the ranges
+};
+
+//
+//-------------------------------------------------------------------------
+// function : merge_range_pos
+/// @brief make the indirect merge of the blocks inside the rng_input
+//
+/// @param rng_input : range of positions of the blocks to merge
+//-------------------------------------------------------------------------
+template < uint32_t Block_size, uint32_t Group_size, class Iter_t,
+ class Compare >
+void merge_blocks< Block_size, Group_size, Iter_t, Compare >
+ ::merge_range_pos (range_pos rng_input)
+{
+ if (rng_input.size ( ) < 2) return;
+ range_buf rbuf = bk.get_range_buf ( );
+
+ range_it rng_prev = bk.get_range (bk.index[rng_input.first].pos ( ));
+ init_move (rbuf, rng_prev);
+ range_it rng_posx (rng_prev);
+
+ for (size_t posx = rng_input.first + 1; posx != rng_input.last; ++posx) {
+ rng_posx = bk.get_range (bk.index[posx].pos ( ));
+ util::merge_flow (rng_prev, rbuf, rng_posx, bk.cmp);
+ rng_prev = rng_posx;
+
+ };
+ init_move (rng_posx, rbuf);
+};
+
+//
+//-------------------------------------------------------------------------
+// function : extract_ranges
+/// @brief from a big range of positions of blocks in the index. Examine which
+/// are mergeable, and generate a couple of ranges for to be merged.
+/// With the ranges obtained generate function_t elements and are
+/// inserted in the concurrent stack.
+/// When the range obtained is smaller than Group_size, generate a
+/// function_t calling to merge_range_pos, when is greater, generate a
+/// function_t calling to cut_range
+//
+/// @param rpos range_input : range of the position in the index, where must
+/// extract the ranges to merge
+//-------------------------------------------------------------------------
+template < uint32_t Block_size, uint32_t Group_size, class Iter_t,
+ class Compare >
+void merge_blocks< Block_size, Group_size, Iter_t, Compare >
+ ::extract_ranges (range_pos range_input)
+{
+ if (range_input.size ( ) < 2) return;
+ atomic_t counter (0);
+
+ // The names with x are positions of the index
+ size_t posx_ini = range_input.first;
+ block_pos bp_posx_ini = bk.index[posx_ini];
+
+ range_it rng_max = bk.get_range (bp_posx_ini.pos ( ));
+ bool side_max = bp_posx_ini.side ( );
+
+ block_pos bp_posx;
+ range_it rng_posx = rng_max;
+ bool side_posx = side_max;
+
+ for (size_t posx = posx_ini + 1; posx <= range_input.last; ++posx) {
+ bool final = (posx == range_input.last);
+ bool mergeable = false;
+
+ if (not final) {
+ bp_posx = bk.index[posx];
+ rng_posx = bk.get_range (bp_posx.pos ( ));
+ side_posx = bp_posx.side ( );
+ mergeable = (side_max != side_posx and
+ is_mergeable (rng_max, rng_posx, bk.cmp));
+ };
+ if (bk.error) return;
+ if (final or not mergeable) {
+ range_pos rp_final (posx_ini, posx);
+ if (rp_final.size ( ) > 1) {
+ if (rp_final.size ( ) > Group_size) {
+ function_cut_range (rp_final, counter, bk.error);
+ }
+ else
+ {
+ function_merge_range_pos (rp_final, counter, bk.error);
+ };
+ };
+ posx_ini = posx;
+ if (not final) {
+ rng_max = rng_posx;
+ side_max = side_posx;
+ };
+ }
+ else
+ {
+ if (bk.cmp (*(rng_max.back ( )), *(rng_posx.back ( )))) {
+ rng_max = rng_posx;
+ side_max = side_posx;
+ };
+ };
+ };
+ bk.exec (counter);
+};
+
+//
+//****************************************************************************
+}; // End namespace bis
+}; // End namespace detail
+}; // End namespace parallel
+}; // End namespace sort
+}; // End namespace boost
+//****************************************************************************
+//
+#endif
diff --git a/include/boost/sort/parallel/detail/bis/move_blocks.hpp b/include/boost/sort/parallel/detail/bis/move_blocks.hpp
new file mode 100755
index 0000000..4220921
--- /dev/null
+++ b/include/boost/sort/parallel/detail/bis/move_blocks.hpp
@@ -0,0 +1,278 @@
+//----------------------------------------------------------------------------
+/// @file move_blocks.hpp
+/// @brief contains the class move_blocks, which is part of the
+/// block_indirect_sort algorithm
+///
+/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
+/// Distributed under the Boost Software License, Version 1.0.\n
+/// ( See accompanying file LICENSE_1_0.txt or copy at
+/// http://www.boost.org/LICENSE_1_0.txt )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_BIS_MOVE_BLOCKS_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_BIS_MOVE_BLOCKS_HPP
+
+#include
+#include
+#include
+#include
+#include
+
+namespace boost
+{
+namespace sort
+{
+namespace parallel
+{
+namespace detail
+{
+namespace bis
+{
+//
+///---------------------------------------------------------------------------
+/// @struct move_blocks
+/// @brief This class move the blocks, trnasforming a logical sort by an index,
+/// in physical sort
+//----------------------------------------------------------------------------
+template < uint32_t Block_size, uint32_t Group_size, class Iter_t,
+ class Compare >
+struct move_blocks
+{
+ //-------------------------------------------------------------------------
+ // D E F I N I T I O N S
+ //-------------------------------------------------------------------------
+ typedef move_blocks< Block_size, Group_size, Iter_t, Compare > this_type;
+ typedef typename std::iterator_traits< Iter_t >::value_type value_t;
+ typedef std::atomic< uint32_t > atomic_t;
+ typedef util::range< size_t > range_pos;
+ typedef util::range< Iter_t > range_it;
+ typedef util::range< value_t * > range_buf;
+ typedef std::function< void(void) > function_t;
+ typedef backbone< Block_size, Iter_t, Compare > backbone_t;
+
+ //------------------------------------------------------------------------
+ // V A R I A B L E S
+ //------------------------------------------------------------------------
+ // Object with the elements to sort and all internal data structures of the
+ // algorithm
+ backbone_t &bk;
+
+ //------------------------------------------------------------------------
+ // F U N C T I O N S
+ //------------------------------------------------------------------------
+ move_blocks (backbone_t &bkb);
+
+ void move_sequence (const std::vector< size_t > &init_sequence);
+
+ void move_long_sequence (const std::vector< size_t > &init_sequence);
+ //
+ //------------------------------------------------------------------------
+ // function : function_move_sequence
+ /// @brief create a function_t with a call to move_sequence, and insert
+ /// in the stack of the backbone
+ ///
+ /// @param sequence :sequence of positions for to move the blocks
+ /// @param counter : atomic variable which is decremented when finish
+ /// the function. This variable is used for to know
+ /// when are finished all the function_t created
+ /// inside an object
+ /// @param error : global indicator of error.
+ //------------------------------------------------------------------------
+ void function_move_sequence (std::vector< size_t > &sequence,
+ atomic_t &counter, bool &error)
+ {
+ util::atomic_add (counter, 1);
+ function_t f1 = [this, sequence, &counter, &error]( ) -> void {
+ if (not error) {
+ try
+ {
+ this->move_sequence (sequence);
+ }
+ catch (std::bad_alloc &)
+ {
+ error = true;
+ };
+ }
+ util::atomic_sub (counter, 1);
+ };
+ bk.works.emplace_back (f1);
+ };
+ //
+ //------------------------------------------------------------------------
+ // function : function_move_long_sequence
+ /// @brief create a function_t with a call to move_long_sequence, and
+ /// insert in the stack of the backbone
+ //
+ /// @param sequence :sequence of positions for to move the blocks
+ /// @param counter : atomic variable which is decremented when finish
+ /// the function. This variable is used for to know
+ /// when are finished all the function_t created
+ /// inside an object
+ /// @param error : global indicator of error.
+ //------------------------------------------------------------------------
+ void function_move_long_sequence (std::vector< size_t > &sequence,
+ atomic_t &counter, bool &error)
+ {
+ util::atomic_add (counter, 1);
+ function_t f1 = [this, sequence, &counter, &error]( ) -> void {
+ if (not error) {
+ try
+ {
+ this->move_long_sequence (sequence);
+ }
+ catch (std::bad_alloc &)
+ {
+ error = true;
+ };
+ }
+ util::atomic_sub (counter, 1);
+ };
+ bk.works.emplace_back (f1);
+ };
+//---------------------------------------------------------------------------
+}; // end of struct move_blocks
+//---------------------------------------------------------------------------
+//
+//############################################################################
+// ##
+// ##
+// N O N I N L I N E F U N C T I O N S ##
+// ##
+// ##
+//############################################################################
+//
+//-------------------------------------------------------------------------
+// function : move_blocks
+/// @brief constructor of the class for to move the blocks to their true
+/// position obtained from the index
+//
+/// @param bkb : backbone with the index and the blocks
+//-------------------------------------------------------------------------
+template < uint32_t Block_size, uint32_t Group_size, class Iter_t,
+ class Compare >
+move_blocks< Block_size, Group_size, Iter_t, Compare >
+ ::move_blocks (backbone_t &bkb): bk (bkb)
+{
+ std::vector< std::vector< size_t > > vsequence;
+ vsequence.reserve (bk.index.size ( ) >> 1);
+ std::vector< size_t > sequence;
+ atomic_t counter (0);
+
+ size_t pos_index_ini = 0, pos_index_src = 0, pos_index_dest = 0;
+ while (pos_index_ini < bk.index.size ( )) {
+ while (pos_index_ini < bk.index.size ( ) and
+ bk.index[pos_index_ini].pos ( ) == pos_index_ini)
+ {
+ ++pos_index_ini;
+ };
+
+ if (pos_index_ini == bk.index.size ( )) break;
+
+ sequence.clear ( );
+ pos_index_src = pos_index_dest = pos_index_ini;
+ sequence.push_back (pos_index_ini);
+
+ while (bk.index[pos_index_dest].pos ( ) != pos_index_ini) {
+ pos_index_src = bk.index[pos_index_dest].pos ( );
+ sequence.push_back (pos_index_src);
+
+ bk.index[pos_index_dest].set_pos (pos_index_dest);
+ pos_index_dest = pos_index_src;
+ };
+
+ bk.index[pos_index_dest].set_pos (pos_index_dest);
+ vsequence.push_back (sequence);
+
+ if (sequence.size ( ) < Group_size) {
+ function_move_sequence (vsequence.back ( ), counter, bk.error);
+ }
+ else
+ {
+ function_move_long_sequence (vsequence.back ( ), counter, bk.error);
+ };
+ };
+ bk.exec (counter);
+};
+//
+//-------------------------------------------------------------------------
+// function : move_sequence
+/// @brief move the blocks, following the positions of the init_sequence
+//
+/// @param init_sequence : vector with the positions from and where move the
+/// blocks
+//-------------------------------------------------------------------------
+template < uint32_t Block_size, uint32_t Group_size, class Iter_t,
+ class Compare >
+void move_blocks< Block_size, Group_size, Iter_t, Compare >
+ ::move_sequence (const std::vector< size_t > &init_sequence)
+{
+ range_buf rbuf = bk.get_range_buf ( );
+ size_t pos_range2 = init_sequence[0];
+
+ range_it range2 = bk.get_range (pos_range2);
+ init_move (rbuf, range2);
+
+ for (size_t i = 1; i < init_sequence.size ( ); ++i) {
+ pos_range2 = init_sequence[i];
+ range_it range1 (range2);
+ range2 = bk.get_range (pos_range2);
+ init_move (range1, range2);
+ };
+ init_move (range2, rbuf);
+};
+//
+//-------------------------------------------------------------------------
+// function : move_long_sequence
+/// @brief move the blocks, following the positions of the init_sequence.
+/// if the sequence is greater than Group_size, it is divided in small
+/// sequences, creating function_t elements, for to be inserted in the
+/// concurrent stack
+//
+/// @param init_sequence : vector with the positions from and where move the
+/// blocks
+//-------------------------------------------------------------------------
+template < uint32_t Block_size, uint32_t Group_size, class Iter_t,
+ class Compare >
+void move_blocks< Block_size, Group_size, Iter_t, Compare >
+ ::move_long_sequence (const std::vector< size_t > &init_sequence)
+{
+ if (init_sequence.size ( ) < Group_size)
+ return move_sequence (init_sequence);
+
+ size_t npart = (init_sequence.size ( ) + Group_size - 1) / Group_size;
+ size_t size_part = init_sequence.size ( ) / npart;
+ atomic_t son_counter (0);
+
+ std::vector< size_t > sequence;
+ sequence.reserve (size_part);
+
+ std::vector< size_t > index_seq;
+ index_seq.reserve (npart);
+
+ auto it_pos = init_sequence.begin ( );
+ for (size_t i = 0; i < (npart - 1); ++i, it_pos += size_part) {
+ sequence.assign (it_pos, it_pos + size_part);
+ index_seq.emplace_back (*(it_pos + size_part - 1));
+ function_move_sequence (sequence, son_counter, bk.error);
+ };
+
+ sequence.assign (it_pos, init_sequence.end ( ));
+ index_seq.emplace_back (init_sequence.back ( ));
+ function_move_sequence (sequence, son_counter, bk.error);
+
+ bk.exec (son_counter);
+ if (bk.error) return;
+ move_long_sequence (index_seq);
+};
+//
+//****************************************************************************
+}; // End namespace bis
+}; // End namespace detail
+}; // End namespace parallel
+}; // End namespace sort
+}; // End namespace boost
+//****************************************************************************
+//
+#endif
diff --git a/include/boost/sort/parallel/detail/bis/parallel_sort.hpp b/include/boost/sort/parallel/detail/bis/parallel_sort.hpp
new file mode 100755
index 0000000..e6509aa
--- /dev/null
+++ b/include/boost/sort/parallel/detail/bis/parallel_sort.hpp
@@ -0,0 +1,216 @@
+//----------------------------------------------------------------------------
+/// @file parallel_sort.hpp
+/// @brief Contains the parallel_sort class, which is part of the
+/// block_indirect_sort algorithm
+///
+/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
+/// Distributed under the Boost Software License, Version 1.0.\n
+/// ( See accompanying file LICENSE_1_0.txt or copy at
+/// http://www.boost.org/LICENSE_1_0.txt )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_BIS_PARALLEL_SORT_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_BIS_PARALLEL_SORT_HPP
+
+#include
+
+namespace boost
+{
+namespace sort
+{
+namespace parallel
+{
+namespace detail
+{
+namespace bis
+{
+//----------------------------------------------------------------------------
+// USING SENTENCES
+//----------------------------------------------------------------------------
+using boost::sort::parallel::detail::util::nbits64;
+//
+///---------------------------------------------------------------------------
+/// @struct parallel_sort
+/// @brief This class do a parallel sort, using the quicksort filtering,
+/// splitting the data until the number of elements is smaller than a
+/// predefined value (max_per_thread)
+//----------------------------------------------------------------------------
+template < uint32_t Block_size, class Iter_t, class Compare >
+struct parallel_sort
+{
+ //-------------------------------------------------------------------------
+ // D E F I N I T I O N S
+ //-------------------------------------------------------------------------
+ typedef typename std::iterator_traits< Iter_t >::value_type value_t;
+ typedef std::atomic< uint32_t > atomic_t;
+ typedef std::function< void(void) > function_t;
+ typedef backbone< Block_size, Iter_t, Compare > backbone_t;
+
+ //------------------------------------------------------------------------
+ // V A R I A B L E S
+ //------------------------------------------------------------------------
+ // reference to a object with all the data to sort
+ backbone_t &bk;
+
+ // maximun number of element to sort woth 1 thread
+ size_t max_per_thread;
+
+ // atomic counter for to detect the end of the works created inside
+ // the object
+ atomic_t counter;
+
+ //------------------------------------------------------------------------
+ // F U N C T I O N S
+ //------------------------------------------------------------------------
+ parallel_sort (backbone_t &bkbn, Iter_t first, Iter_t last);
+
+ void divide_sort (Iter_t first, Iter_t last, uint32_t level);
+ //
+ //------------------------------------------------------------------------
+ // function : function_divide_sort
+ /// @brief create a function_t with a call to divide_sort, and inser in
+ /// the stack of the backbone
+ //
+ /// @param first : iterator to the first element of the range to divide
+ /// @param last : iterator to the next element after the last element of
+ /// the range to divide
+ /// @param level : level of depth in the division.When zero call to
+ /// introsort
+ /// @param counter : atomic variable which is decremented when finish
+ /// the function. This variable is used for to know
+ /// when are finished all the function_t created
+ /// inside an object
+ /// @param error : global indicator of error.
+ //------------------------------------------------------------------------
+ void function_divide_sort (Iter_t first, Iter_t last, uint32_t level,
+ atomic_t &counter, bool &error)
+ {
+ util::atomic_add (counter, 1);
+ function_t f1 = [this, first, last, level, &counter, &error]( ) {
+ if (not error) {
+ try
+ {
+ this->divide_sort (first, last, level);
+ }
+ catch (std::bad_alloc &)
+ {
+ error = true;
+ };
+ };
+ util::atomic_sub (counter, 1);
+ };
+ bk.works.emplace_back (f1);
+ };
+
+//--------------------------------------------------------------------------
+}; // end struct parallel_sort
+//--------------------------------------------------------------------------
+//
+//############################################################################
+// ##
+// ##
+// N O N I N L I N E F U N C T I O N S ##
+// ##
+// ##
+//############################################################################
+//
+//------------------------------------------------------------------------
+// function : parallel_sort
+/// @brief constructor of the class
+/// @param [in] bkbn : backbone struct with all the information to sort
+/// @param [in] first : iterator to the first element to sort
+/// @param [in] last : iterator to the next element after the last
+//------------------------------------------------------------------------
+template < uint32_t Block_size, class Iter_t, class Compare >
+parallel_sort< Block_size, Iter_t, Compare >
+ ::parallel_sort (backbone_t &bkbn, Iter_t first, Iter_t last)
+ : bk (bkbn), counter (0)
+{
+ assert ((last - first) >= 0);
+ size_t nelem = size_t (last - first);
+
+ //------------------- check if sort --------------------------------------
+ bool sorted = true;
+ for (Iter_t it1 = first, it2 = first + 1;
+ it2 != last and (sorted = not bk.cmp (*it2, *it1)); it1 = it2++)
+ ;
+ if (sorted) return;
+
+ //-------------------max_per_thread ---------------------------
+ uint32_t nbits_size = (nbits64 (sizeof (value_t))) >> 1;
+ if (nbits_size > 5) nbits_size = 5;
+ max_per_thread = 1 << (18 - nbits_size);
+
+ uint32_t level = ((nbits64 (nelem / max_per_thread)) * 3) / 2;
+
+ //---------------- check if only single thread -----------------------
+ if (nelem < (max_per_thread)) {
+ intro_sort (first, last, bk.cmp);
+ return;
+ };
+ if (not bk.error) divide_sort (first, last, level);
+
+ // wait until all the parts are finished
+ bk.exec (counter);
+};
+
+//------------------------------------------------------------------------
+// function : divide_sort
+/// @brief this function divide the data in two part, for to be sorted in
+/// a parallel mode
+/// @param first : iterator to the first element to sort
+/// @param last : iterator to the next element after the last
+/// @param level : level of depth before call to introsort
+//------------------------------------------------------------------------
+template < uint32_t Block_size, class Iter_t, class Compare >
+void parallel_sort< Block_size, Iter_t, Compare >
+ ::divide_sort (Iter_t first, Iter_t last, uint32_t level)
+{
+ //------------------- check if sort -----------------------------------
+ bool sorted = true;
+ for (Iter_t it1 = first, it2 = first + 1;
+ it2 != last and (sorted = not bk.cmp (*it2, *it1)); it1 = it2++)
+ ;
+ if (sorted) return;
+
+ //---------------- check if finish the subdivision -------------------
+ size_t nelem = last - first;
+ if (level == 0 or nelem < (max_per_thread)) {
+ return intro_sort (first, last, bk.cmp);
+ };
+
+ //-------------------- pivoting ----------------------------------
+ pivot9 (first, last, bk.cmp);
+ const value_t &val = const_cast< value_t & > (*first);
+ Iter_t c_first = first + 1, c_last = last - 1;
+
+ while (bk.cmp (*c_first, val)) ++c_first;
+ while (bk.cmp (val, *c_last)) --c_last;
+
+ while (not(c_first > c_last)) {
+ std::swap (*(c_first++), *(c_last--));
+ while (bk.cmp (*c_first, val)) ++c_first;
+ while (bk.cmp (val, *c_last)) --c_last;
+ };
+
+ std::swap (*first, *c_last);
+
+ // insert the work of the second half in the stack of works
+ function_divide_sort (c_first, last, level - 1, counter, bk.error);
+ if (bk.error) return;
+
+ // The first half is done by the same thread
+ function_divide_sort (first, c_last, level - 1, counter, bk.error);
+};
+//
+//****************************************************************************
+}; // End namespace bis
+}; // End namespace detail
+}; // End namespace parallel
+}; // End namespace sort
+}; // End namespace boost
+//****************************************************************************
+//
+#endif
diff --git a/include/boost/sort/parallel/detail/block_indirect_sort.hpp b/include/boost/sort/parallel/detail/block_indirect_sort.hpp
new file mode 100755
index 0000000..2421da2
--- /dev/null
+++ b/include/boost/sort/parallel/detail/block_indirect_sort.hpp
@@ -0,0 +1,339 @@
+//----------------------------------------------------------------------------
+/// @file block_indirect_sort.hpp
+/// @brief block indirect sort algorithm
+///
+/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
+/// Distributed under the Boost Software License, Version 1.0.\n
+/// ( See accompanying file LICENSE_1_0.txt or copy at
+/// http://www.boost.org/LICENSE_1_0.txt )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_BLOCK_INDIRECT_SORT_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_BLOCK_INDIRECT_SORT_HPP
+
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+
+// This value is the minimal number of threads for to use the
+// block_indirect_sort algorithm
+#define BOOST_NTHREAD_BORDER 6
+
+namespace boost
+{
+namespace sort
+{
+namespace parallel
+{
+namespace detail
+{
+//
+///---------------------------------------------------------------------------
+/// @struct block_indirect_sort
+/// @brief This class is the entry point of the block indirect sort. The code
+/// of this algorithm is divided in several classes:
+/// bis/block.hpp : basic structures used in the algorithm
+/// bis/backbone.hpp : data used by all the classes
+/// bis/merge_blocks.hpp : merge the internal blocks
+/// bis/move_blocks.hpp : move the blocks, and obtain all the elements
+/// phisicaly sorted
+/// bis/parallel_sort.hpp : make the parallel sort of each part in the
+/// initial division of the data
+///
+//----------------------------------------------------------------------------
+template < uint32_t Block_size, uint32_t Group_size, class Iter_t,
+ class Compare = util::compare_iter< Iter_t > >
+struct block_indirect_sort
+{
+ //------------------------------------------------------------------------
+ // D E F I N I T I O N S
+ //------------------------------------------------------------------------
+ typedef typename std::iterator_traits< Iter_t >::value_type value_t;
+ typedef std::atomic< uint32_t > atomic_t;
+ typedef util::range< size_t > range_pos;
+ typedef util::range< Iter_t > range_it;
+ typedef util::range< value_t * > range_buf;
+ typedef std::function< void(void) > function_t;
+
+ // classes used in the internal operations of the algorithm
+ typedef bis::block_pos block_pos_t;
+ typedef bis::block< Block_size, Iter_t > block_t;
+ typedef bis::backbone< Block_size, Iter_t, Compare > backbone_t;
+ typedef bis::parallel_sort< Block_size, Iter_t, Compare > parallel_sort_t;
+
+ typedef bis::merge_blocks< Block_size, Group_size, Iter_t, Compare >
+ merge_blocks_t;
+ typedef bis::move_blocks< Block_size, Group_size, Iter_t, Compare >
+ move_blocks_t;
+ typedef bis::compare_block_pos< Block_size, Iter_t, Compare >
+ compare_block_pos_t;
+ //
+ //------------------------------------------------------------------------
+ // V A R I A B L E S A N D C O N S T A N T S
+ //------------------------------------------------------------------------
+ // contains the data and the internal data structures of the algorithm for
+ // to be shared between the classes which are part of the algorithm
+ backbone_t bk;
+ // atomic counter for to detect the end of the works created inside
+ // the object
+ atomic_t counter;
+ // pointer to the uninitialized memory used for the thread buffers
+ value_t *ptr;
+ // indicate if the memory pointed by ptr is initialized
+ bool construct;
+ // range from extract the buffers for the threads
+ range_buf rglobal_buf;
+ // number of threads to use
+ uint32_t nthread;
+ //
+ //------------------------------------------------------------------------
+ // F U N C T I O N S
+ //------------------------------------------------------------------------
+
+ block_indirect_sort (Iter_t first, Iter_t last, Compare cmp, uint32_t nthr);
+
+ block_indirect_sort (Iter_t first, Iter_t last)
+ : block_indirect_sort (first, last, Compare ( ),
+ std::thread::hardware_concurrency ( )){};
+
+ block_indirect_sort (Iter_t first, Iter_t last, Compare cmp)
+ : block_indirect_sort (first, last, cmp,
+ std::thread::hardware_concurrency ( )){};
+
+ block_indirect_sort (Iter_t first, Iter_t last, uint32_t nthread)
+ : block_indirect_sort (first, last, Compare ( ), nthread){};
+
+ //
+ //------------------------------------------------------------------------
+ // function :destroy_all
+ /// @brief destructor all the data structures of the class (if the memory
+ /// is constructed, is destroyed) and return the uninitialized
+ /// memory
+ //------------------------------------------------------------------------
+ void destroy_all (void)
+ {
+ if (ptr != nullptr) {
+ if (construct) {
+ util::destroy (rglobal_buf);
+ construct = false;
+ };
+ std::return_temporary_buffer (ptr);
+ ptr = nullptr;
+ };
+ };
+
+ //
+ //------------------------------------------------------------------------
+ // function :~block_indirect_sort
+ /// @brief destructor of the class (if the memory is constructed, is
+ /// destroyed) and return the uninitialized memory
+ //------------------------------------------------------------------------
+ ~block_indirect_sort (void) { destroy_all ( ); };
+ void split_range (size_t pos_index1, size_t pos_index2,
+ uint32_t level_thread);
+
+ void start_function (void);
+
+ //-------------------------------------------------------------------------
+};
+// End class block_indirect_sort
+//----------------------------------------------------------------------------
+//
+//############################################################################
+// ##
+// ##
+// N O N I N L I N E F U N C T I O N S ##
+// ##
+// ##
+//############################################################################
+//
+//-------------------------------------------------------------------------
+// function : block_indirect_sort
+/// @brief begin with the execution of the functions stored in works
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+/// @param comp : object for to compare two elements pointed by Iter_t
+/// iterators
+/// @param nthr : Number of threads to use in the process.When this value
+/// is lower than 2, the sorting is done with 1 thread
+//-------------------------------------------------------------------------
+template < uint32_t Block_size, uint32_t Group_size, class Iter_t,
+ class Compare >
+block_indirect_sort< Block_size, Group_size, Iter_t,
+ Compare >::block_indirect_sort (Iter_t first, Iter_t last,
+ Compare cmp, uint32_t nthr)
+ : bk (first, last, cmp), counter (0), ptr (nullptr), construct (false),
+ nthread (nthr)
+{
+ try
+ {
+ assert ((last - first) >= 0);
+ size_t nelem = size_t (last - first);
+ if (nelem == 0) return;
+
+ //------------------- check if sort -----------------------------------
+ bool sorted = true;
+ for (Iter_t it1 = first, it2 = first + 1;
+ it2 != last and (sorted = not bk.cmp (*it2, *it1)); it1 = it2++)
+ ;
+ if (sorted) return;
+
+ //---------------- check if only single thread -----------------------
+ size_t nthreadmax = nelem / (Block_size * Group_size) + 1;
+ if (nthread > nthreadmax) nthread = (uint32_t)nthreadmax;
+ uint32_t nbits_size = (util::nbits64 (sizeof (value_t))) >> 1;
+ if (nbits_size > 5) nbits_size = 5;
+ size_t max_per_thread = 1 << (18 - nbits_size);
+
+ if (nelem < (max_per_thread) or nthread < 2) {
+ intro_sort (first, last, bk.cmp);
+ return;
+ };
+
+ //----------- creation of the temporary buffer --------------------
+ ptr = std::get_temporary_buffer< value_t > (Block_size * nthread).first;
+ if (ptr == nullptr) {
+ bk.error = true;
+ throw std::bad_alloc ( );
+ };
+
+ rglobal_buf = range_buf (ptr, ptr + (Block_size * nthread));
+ util::init (rglobal_buf, *first);
+ construct = true;
+
+ // creation of the buffers for the threads
+ std::vector< value_t * > vbuf (nthread);
+ for (uint32_t i = 0; i < nthread; ++i) {
+ vbuf[i] = ptr + (i * Block_size);
+ };
+
+ // Insert the first work in the stack
+ util::atomic_write (counter, 1);
+ function_t f1 = [&]( ) {
+ start_function ( );
+ util::atomic_sub (counter, 1);
+ };
+ bk.works.emplace_back (f1);
+
+ //---------------------------------------------------------------------
+ // PROCESS
+ //---------------------------------------------------------------------
+ std::vector< std::future< void > > vfuture (nthread);
+
+ // The function launched with the futures is "execute the functions of
+ // the stack until this->counter is zero
+ // vbuf[i] is the memory from the main thread for to configure the
+ // thread local buffer
+ for (uint32_t i = 0; i < nthread; ++i) {
+ auto f1 = [=, &vbuf]( ) { bk.exec (vbuf[i], this->counter); };
+ vfuture[i] = std::async (std::launch::async, f1);
+ };
+ for (uint32_t i = 0; i < nthread; ++i) vfuture[i].get ( );
+ if (bk.error) throw std::bad_alloc ( );
+ }
+ catch (std::bad_alloc &)
+ {
+ destroy_all ( );
+ throw;
+ }
+};
+
+//
+//-----------------------------------------------------------------------------
+// function : split_rage
+/// @brief this function splits a range of positions in the index, and
+/// depending of the size, sort directly or make to a recursive call
+/// to split_range
+/// @param pos_index1 : first position in the index
+/// @param pos_index2 : position after the last in the index
+/// @param level_thread : depth of the call. When 0 sort the blocks
+//-----------------------------------------------------------------------------
+template < uint32_t Block_size, uint32_t Group_size, class Iter_t,
+ class Compare >
+void block_indirect_sort< Block_size, Group_size, Iter_t,
+ Compare >::split_range (size_t pos_index1,
+ size_t pos_index2,
+ uint32_t level_thread)
+{
+ size_t nblock = pos_index2 - pos_index1;
+
+ //-------------------------------------------------------------------------
+ // In the blocks not sorted, the physical position is the logical position
+ //-------------------------------------------------------------------------
+ Iter_t first = bk.get_block (pos_index1).first;
+ Iter_t last = bk.get_range (pos_index2 - 1).last;
+
+ if (nblock < Group_size) {
+ intro_sort (first, last, bk.cmp);
+ return;
+ };
+
+ size_t pos_index_mid = pos_index1 + (nblock >> 1);
+ atomic_t son_counter (1);
+
+ //-------------------------------------------------------------------------
+ // Insert in the stack the work for the second part, and the actual thread,
+ // execute the first part
+ //-------------------------------------------------------------------------
+ if (level_thread != 0) {
+ auto f1 = [=, &son_counter]( ) {
+ split_range (pos_index_mid, pos_index2, level_thread - 1);
+ util::atomic_sub (son_counter, 1);
+ };
+ bk.works.emplace_back (f1);
+ if (bk.error) return;
+ split_range (pos_index1, pos_index_mid, level_thread - 1);
+ }
+ else
+ {
+ Iter_t mid = first + ((nblock >> 1) * Block_size);
+ auto f1 = [=, &son_counter]( ) {
+ parallel_sort_t (bk, mid, last);
+ util::atomic_sub (son_counter, 1);
+ };
+ bk.works.emplace_back (f1);
+ if (bk.error) return;
+ parallel_sort_t (bk, first, mid);
+ };
+ bk.exec (son_counter);
+ if (bk.error) return;
+ merge_blocks_t (bk, pos_index1, pos_index_mid, pos_index2);
+};
+//
+//-----------------------------------------------------------------------------
+// function : start_function
+/// @brief this function init the process. When the number of threads is lower
+/// than a predefined value, sort the elements with a parallel introsort.
+//-----------------------------------------------------------------------------
+template < uint32_t Block_size, uint32_t Group_size, class Iter_t,
+ class Compare >
+void block_indirect_sort< Block_size, Group_size, Iter_t,
+ Compare >::start_function (void)
+{
+ if (nthread < BOOST_NTHREAD_BORDER) {
+ parallel_sort_t (bk, bk.global_range.first, bk.global_range.last);
+ }
+ else
+ {
+ size_t level_thread = nbits64 (nthread - 1);
+ split_range (0, bk.nblock, level_thread - 1);
+ if (bk.error) return;
+ move_blocks_t k (bk);
+ };
+};
+//
+//****************************************************************************
+};// End namespace detail
+};// End namespace parallel
+};// End namespace sort
+};// End namespace boost
+//****************************************************************************
+//
+#endif
diff --git a/include/boost/sort/parallel/detail/heap_sort.hpp b/include/boost/sort/parallel/detail/heap_sort.hpp
new file mode 100755
index 0000000..d48312b
--- /dev/null
+++ b/include/boost/sort/parallel/detail/heap_sort.hpp
@@ -0,0 +1,205 @@
+//----------------------------------------------------------------------------
+/// @file heap_sort.hpp
+/// @brief Insertion Sort algorithm
+///
+/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
+/// Distributed under the Boost Software License, Version 1.0.\n
+/// ( See accompanying file LICENSE_1_0.txt or copy at
+/// http://www.boost.org/LICENSE_1_0.txt )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_HEAP_SORT_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_HEAP_SORT_HPP
+
+#include
+#include
+#include
+#include
+#include // for std::swap
+
+namespace boost
+{
+namespace sort
+{
+namespace parallel
+{
+namespace detail
+{
+//
+//---------------------------------------------------------------------------
+// struct : heap_sort
+/// @brief : Heap sort algorithm
+/// @remarks This algorithm is O(NLogN)
+//---------------------------------------------------------------------------
+template < class Iter_t, class Compare >
+struct heap_sort
+{
+ typedef typename std::iterator_traits< Iter_t >::value_type value_t;
+
+ //
+ //------------------------------------------------------------------------
+ // function : sort3
+ /// @brief Sort and signal the changes of three values
+ /// @param val_0 : first value to compare
+ /// @param val_1 : second value to compare
+ /// @param val_2 : third value to compare
+ /// @param [out] bool_0 : if true indicates val_0 had been changed
+ /// @param [out] bool_1 : if true indicates val_1 had been changed
+ /// @param [out] bool_2 : if true indicates val_2 had been changed
+ /// @return if true , some value had changed
+ /// @remarks
+ //------------------------------------------------------------------------
+ bool sort3 (value_t &val_0, value_t &val_1, value_t &val_2, bool &bool_0,
+ bool &bool_1, bool &bool_2)
+ {
+ bool_0 = bool_1 = bool_2 = false;
+ int value = 0;
+ if (val_0 < val_1) value += 4;
+ if (val_1 < val_2) value += 2;
+ if (val_0 < val_2) value += 1;
+
+ switch (value)
+ {
+ case 0: break;
+
+ case 2:
+ std::swap (val_1, val_2);
+ bool_1 = bool_2 = true;
+ break;
+
+ case 3:
+ if (not(val_0 > val_1)) {
+ std::swap (val_0, val_2);
+ bool_0 = bool_2 = true;
+ }
+ else
+ {
+ auto aux = std::move (val_2);
+ val_2 = std::move (val_1);
+ val_1 = std::move (val_0);
+ val_0 = std::move (aux);
+ bool_0 = bool_1 = bool_2 = true;
+ };
+ break;
+
+ case 4:
+ std::swap (val_0, val_1);
+ bool_0 = bool_1 = true;
+ break;
+
+ case 5:
+ if (val_1 > val_2) {
+ auto aux = std::move (val_0);
+ val_0 = std::move (val_1);
+ val_1 = std::move (val_2);
+ val_2 = std::move (aux);
+ bool_0 = bool_1 = bool_2 = true;
+ }
+ else
+ {
+ std::swap (val_0, val_2);
+ bool_0 = bool_2 = true;
+ };
+ break;
+
+ case 7:
+ std::swap (val_0, val_2);
+ bool_0 = bool_2 = true;
+ break;
+
+ default: abort ( );
+ };
+ return (bool_0 or bool_1 or bool_2);
+ };
+ //
+ //-----------------------------------------------------------------------
+ // function : make_heap
+ /// @brief Make the heap for to extract the sorted elements
+ /// @param first : iterator to the first element of the range
+ /// @param nelem : number of lements of the range
+ /// @param comp : object for to compare two elements
+ /// @remarks This algorithm is O(NLogN)
+ //------------------------------------------------------------------------
+ void make_heap (Iter_t first, size_t nelem, Compare comp)
+ {
+ size_t pos_father, pos_son;
+ Iter_t iter_father = first, iter_son = first;
+ bool sw = false;
+
+ for (size_t i = 1; i < nelem; ++i) {
+ pos_father = i;
+ iter_father = first + i;
+ sw = false;
+ do
+ {
+ iter_son = iter_father;
+ pos_son = pos_father;
+ pos_father = (pos_son - 1) >> 1;
+ iter_father = first + pos_father;
+ if ((sw = comp (*iter_father, *iter_son)))
+ std::swap (*iter_father, *iter_son);
+ } while (sw and pos_father != 0);
+ };
+ };
+ //
+ //------------------------------------------------------------------------
+ // function : heap_sort
+ /// @brief : Heap sort algorithm
+ /// @param first: iterator to the first element of the range
+ /// @param last : iterator to the next element of the last in the range
+ /// @param comp : object for to do the comparison between the elements
+ /// @remarks This algorithm is O(NLogN)
+ //------------------------------------------------------------------------
+ heap_sort (Iter_t first, Iter_t last, Compare comp)
+ {
+ assert ((last - first) >= 0);
+ size_t nelem = last - first;
+ if (nelem < 2) return;
+
+ //--------------------------------------------------------------------
+ // Creating the initial heap
+ //--------------------------------------------------------------------
+ make_heap (first, nelem, comp);
+
+ //--------------------------------------------------------------------
+ // Sort the heap
+ //--------------------------------------------------------------------
+ size_t pos_father, pos_son;
+ Iter_t iter_father = first, iter_son = first;
+
+ bool sw = false;
+ for (size_t i = 1; i < nelem; ++i) {
+ std::swap (*first, *(first + (nelem - i)));
+ pos_father = 0;
+ pos_son = 1;
+ iter_father = first;
+ sw = true;
+ while (sw and pos_son < (nelem - i)) {
+ // if the father have two sons must select the bigger
+ iter_son = first + pos_son;
+ if ((pos_son + 1) < (nelem - i) and
+ comp (*iter_son, *(iter_son + 1)))
+ {
+ ++pos_son;
+ ++iter_son;
+ };
+ if ((sw = comp (*iter_father, *iter_son)))
+ std::swap (*iter_father, *iter_son);
+ pos_father = pos_son;
+ iter_father = iter_son;
+ pos_son = (pos_father << 1) + 1;
+ };
+ };
+ };
+}; // End class heap_sort
+//
+//****************************************************************************
+}; // End namespace detail
+}; // End namespace parallel
+}; // End namespace sort
+}; // End namespace boost
+//****************************************************************************
+//
+#endif
diff --git a/include/boost/sort/parallel/detail/indirect.hpp b/include/boost/sort/parallel/detail/indirect.hpp
new file mode 100755
index 0000000..58b6911
--- /dev/null
+++ b/include/boost/sort/parallel/detail/indirect.hpp
@@ -0,0 +1,139 @@
+//----------------------------------------------------------------------------
+/// @file indirect.hpp
+/// @brief Indirect algorithm
+///
+/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
+/// Distributed under the Boost Software License, Version 1.0.\n
+/// ( See accompanying file LICENSE_1_0.txt or copy at
+/// http://www.boost.org/LICENSE_1_0.txt )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_INDIRECT_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_INDIRECT_HPP
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+namespace boost
+{
+namespace sort
+{
+namespace parallel
+{
+namespace detail
+{
+//
+//---------------------------------------------------------------------------
+/// @struct less_ptr_no_null
+///
+/// @remarks this is the comparison object for pointers. Compare the objects
+/// pointed by the iterators
+//---------------------------------------------------------------------------
+template < class Iter_t, class Compare = util::compare_iter< Iter_t > >
+struct less_ptr_no_null
+{
+ //----------------------------- Variables -----------------------
+ Compare comp; // comparison object of the elements pointed by Iter_t
+
+ //------------------------------------------------------------------------
+ // function : less_ptr_no_null
+ /// @brief constructor from a Compare object
+ /// @param C1 : comparison object
+ //-----------------------------------------------------------------------
+ inline less_ptr_no_null (Compare C1 = Compare ( )) : comp (C1){};
+
+ //------------------------------------------------------------------------
+ // function : operator ( )
+ /// @brief Make the comparison of the objects pointed by T1 and T2, using
+ // the internal comp
+ //
+ /// @param T1 : first iterator
+ /// @param T2 : second iterator
+ /// @return bool result of the comparison
+ //-----------------------------------------------------------------------
+ inline bool operator( ) (Iter_t T1, Iter_t T2) const
+ {
+ return comp (*T1, *T2);
+ };
+};
+//
+//-----------------------------------------------------------------------------
+// function : create_index
+/// @brief From a vector of objects, create a vector of iterators to
+/// the objects
+///
+/// @param first : iterator to the first element of the range
+/// @param last : iterator to the element after the last of the range
+/// @param index : vector where store the iterators
+//-----------------------------------------------------------------------------
+template < class Iter_t >
+void create_index (Iter_t first, Iter_t last, std::vector< Iter_t > &index)
+{
+ auto nelem = last - first;
+ assert (nelem >= 0);
+ index.clear ( );
+ index.reserve (nelem);
+ for (; first != last; ++first) index.push_back (first);
+};
+//
+//-----------------------------------------------------------------------------
+// function : sort_index
+/// @brief This function transform a logical sort of the elements in the index
+/// in a physical sort
+//
+/// @param global_first : iterator to the first element of the data
+/// @param [in] index : vector of the iterators
+//-----------------------------------------------------------------------------
+template < class Iter_t >
+void sort_index (Iter_t global_first, std::vector< Iter_t > &index)
+{
+ typedef typename std::iterator_traits< Iter_t >::value_type value_t;
+
+ size_t pos_dest = 0;
+ size_t pos_src = 0;
+ size_t pos_in_vector = 0;
+ size_t nelem = index.size ( );
+ Iter_t it_dest, it_src;
+
+ while (pos_in_vector < nelem) {
+ while (pos_in_vector < nelem and
+ (size_t (index[pos_in_vector] - global_first)) == pos_in_vector)
+ {
+ ++pos_in_vector;
+ };
+
+ if (pos_in_vector == nelem) return;
+ pos_dest = pos_src = pos_in_vector;
+ it_dest = global_first + pos_dest;
+ value_t Aux = std::move (*it_dest);
+
+ while ((pos_src = (size_t (index[pos_dest] - global_first))) !=
+ pos_in_vector)
+ {
+ index[pos_dest] = it_dest;
+ it_src = global_first + pos_src;
+ *it_dest = std::move (*it_src);
+ it_dest = it_src;
+ pos_dest = pos_src;
+ };
+
+ *it_dest = std::move (Aux);
+ index[pos_dest] = it_dest;
+ ++pos_in_vector;
+ };
+};
+//
+//****************************************************************************
+}; // End namespace detail
+}; // End namespace parallel
+}; // End namespace sort
+}; // End namespace boost
+//****************************************************************************
+//
+#endif
diff --git a/include/boost/sort/parallel/detail/insertion_sort.hpp b/include/boost/sort/parallel/detail/insertion_sort.hpp
new file mode 100755
index 0000000..1fb2d83
--- /dev/null
+++ b/include/boost/sort/parallel/detail/insertion_sort.hpp
@@ -0,0 +1,66 @@
+//----------------------------------------------------------------------------
+/// @file insertion_sort.hpp
+/// @brief Insertion Sort algorithm
+///
+/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
+/// Distributed under the Boost Software License, Version 1.0.\n
+/// ( See accompanying file LICENSE_1_0.txt or copy at
+/// http://www.boost.org/LICENSE_1_0.txt )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_INSERTION_SORT_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_INSERTION_SORT_HPP
+
+#include
+#include
+#include // std::swap
+
+namespace boost
+{
+namespace sort
+{
+namespace parallel
+{
+namespace detail
+{
+//
+//-----------------------------------------------------------------------------
+// function : insertion_sort
+/// @brief : Insertion sort algorithm
+/// @param first: iterator to the first element of the range
+/// @param last : iterator to the next element of the last in the range
+/// @param comp : object for to do the comparison between the elements
+/// @remarks This algorithm is O(N²)
+//-----------------------------------------------------------------------------
+template < class Iter_t, typename Compare >
+void insertion_sort (Iter_t first, Iter_t last, Compare comp)
+{
+ //--------------------------------------------------------------------
+ // DEFINITIONS
+ //--------------------------------------------------------------------
+ typedef typename std::iterator_traits< Iter_t >::value_type value_t;
+
+ if ((last - first) < 2) return;
+
+ for (Iter_t it_examine = first + 1; it_examine != last; ++it_examine) {
+ value_t Aux = std::move (*it_examine);
+ Iter_t it_insertion = it_examine;
+
+ while (it_insertion != first and comp (Aux, *(it_insertion - 1))) {
+ *it_insertion = std::move (*(it_insertion - 1));
+ --it_insertion;
+ };
+ *it_insertion = std::move (Aux);
+ };
+};
+//
+//****************************************************************************
+}; // End namespace detail
+}; // End namespace parallel
+}; // End namespace sort
+}; // End namespace boost
+//****************************************************************************
+//
+#endif
diff --git a/include/boost/sort/parallel/detail/intro_sort.hpp b/include/boost/sort/parallel/detail/intro_sort.hpp
new file mode 100755
index 0000000..0bf6bd1
--- /dev/null
+++ b/include/boost/sort/parallel/detail/intro_sort.hpp
@@ -0,0 +1,203 @@
+//----------------------------------------------------------------------------
+/// @file intro_sort.hpp
+/// @brief Intro Sort algorithm
+///
+/// @author Copyright (c) 2016 Francisco José Tapia (fjtapia@gmail.com )\n
+/// Distributed under the Boost Software License, Version 1.0.\n
+/// ( See accompanying file LICENSE_1_0.txt or copy at
+/// http://www.boost.org/LICENSE_1_0.txt )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_ALGORITHM_INTRO_SORT_HPP
+#define __BOOST_SORT_PARALLEL_ALGORITHM_INTRO_SORT_HPP
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+namespace boost
+{
+namespace sort
+{
+namespace parallel
+{
+namespace detail
+{
+//---------------------------------------------------------------------------
+// USING SENTENCES
+//---------------------------------------------------------------------------
+using util::compare_iter;
+using util::nbits64;
+//
+//-----------------------------------------------------------------------------
+// function : mid3
+/// @brief : return the iterator to the mid value of the three values passsed
+/// as parameters
+//
+/// @param iter_1 : iterator to the first value
+/// @param iter_2 : iterator to the second value
+/// @param iter_3 : iterator to the third value
+/// @param comp : object for to compare two values
+/// @return iterator to mid value
+//-----------------------------------------------------------------------------
+template < typename Iter_t, typename Compare >
+inline Iter_t mid3 (Iter_t iter_1, Iter_t iter_2, Iter_t iter_3, Compare comp)
+{
+ return comp (*iter_1, *iter_2)
+ ? (comp (*iter_2, *iter_3)
+ ? iter_2
+ : (comp (*iter_1, *iter_3) ? iter_3 : iter_1))
+ : (comp (*iter_3, *iter_2)
+ ? iter_2
+ : (comp (*iter_3, *iter_1) ? iter_3 : iter_1));
+};
+//
+//-----------------------------------------------------------------------------
+// function : pivot3
+/// @brief : receive a range between first and last, calcule the mid iterator
+/// with the first, the previous to the last, and the central
+/// position. With this mid iterator swap with the first position
+//
+/// @param first : iterator to the first element
+/// @param last : iterator to the last element
+/// @param comp : object for to compare two elements
+//-----------------------------------------------------------------------------
+template < class Iter_t, class Compare >
+inline void pivot3 (Iter_t first, Iter_t last, Compare comp)
+{
+ auto N2 = (last - first) >> 1;
+ Iter_t it_val = mid3 (first + 1, first + N2, last - 1, comp);
+ std::swap (*first, *it_val);
+};
+//
+//-----------------------------------------------------------------------------
+// function : mid9
+/// @brief : return the iterator to the mid value of the nine values passsed
+/// as parameters
+//
+/// @param iter_1 : iterator to the first value
+/// @param iter_2 : iterator to the second value
+/// @param iter_3 : iterator to the third value
+/// @param iter_4 : iterator to the fourth value
+/// @param iter_5 : iterator to the fifth value
+/// @param iter_6 : iterator to the sixth value
+/// @param iter_7 : iterator to the seventh value
+/// @param iter_8 : iterator to the eighth value
+/// @param iter_9 : iterator to the ninth value
+/// @return iterator to the mid value
+//-----------------------------------------------------------------------------
+template < class Iter_t, class Compare >
+inline Iter_t mid9 (Iter_t iter_1, Iter_t iter_2, Iter_t iter_3, Iter_t iter_4,
+ Iter_t iter_5, Iter_t iter_6, Iter_t iter_7, Iter_t iter_8,
+ Iter_t iter_9, Compare comp)
+{
+ return mid3 (mid3 (iter_1, iter_2, iter_3, comp),
+ mid3 (iter_4, iter_5, iter_6, comp),
+ mid3 (iter_7, iter_8, iter_9, comp), comp);
+};
+//
+//-----------------------------------------------------------------------------
+// function : pivot9
+/// @brief : receive a range between first and last, obtain 9 values between
+/// the elements including the first and the previous to the last.
+/// Obtain the iterator to the mid value and swap with the first
+/// position
+//
+/// @param first : iterator to the first element
+/// @param last : iterator to the last element
+/// @param comp : object for to compare two elements
+//-----------------------------------------------------------------------------
+template < class Iter_t, class Compare >
+inline void pivot9 (Iter_t first, Iter_t last, Compare comp)
+{
+ size_t cupo = (last - first) >> 3;
+ Iter_t itaux = mid9 (first + 1, first + cupo, first + 2 * cupo,
+ first + 3 * cupo, first + 4 * cupo, first + 5 * cupo,
+ first + 6 * cupo, first + 7 * cupo, last - 1, comp);
+ std::swap (*first, *itaux);
+};
+//
+//-----------------------------------------------------------------------------
+// function : intro_sort_internal
+/// @brief : internal function for to divide and sort the ranges
+//
+/// @param first : iterator to the first element
+/// @param last : iterator to the element after the last in the range
+/// @param level : level of depth from the top level call
+/// @param comp : object for to Compare elements
+//-----------------------------------------------------------------------------
+template < class Iter_t, typename Compare >
+void intro_sort_internal (Iter_t first, Iter_t last, uint32_t level,
+ Compare comp)
+{
+ typedef typename std::iterator_traits< Iter_t >::value_type value_t;
+
+ const uint32_t nmin = 32;
+ size_t nelem = last - first;
+ if (nelem < nmin) return insertion_sort (first, last, comp);
+
+ if (level == 0) {
+ heap_sort< Iter_t, Compare > (first, last, comp);
+ return;
+ };
+
+ pivot3 (first, last, comp);
+
+ const value_t &val = const_cast< value_t & > (*first);
+ Iter_t c_first = first + 1, c_last = last - 1;
+
+ while (comp (*c_first, val)) ++c_first;
+ while (comp (val, *c_last)) --c_last;
+
+ while (not(c_first > c_last)) {
+ std::swap (*(c_first++), *(c_last--));
+ while (comp (*c_first, val)) ++c_first;
+ while (comp (val, *c_last)) --c_last;
+ }; // End while
+
+ std::swap (*first, *c_last);
+ intro_sort_internal (first, c_last, level - 1, comp);
+ intro_sort_internal (c_first, last, level - 1, comp);
+};
+
+//
+//-----------------------------------------------------------------------------
+// function : intro_sort
+/// @brief : function for to sort range [first, last )
+/// @param first : iterator to the first element
+/// @param last : iterator to the element after the last in the range
+/// @param comp : object for to compare elements
+//-----------------------------------------------------------------------------
+template < class Iter_t, typename Compare >
+void intro_sort (Iter_t first, Iter_t last, Compare comp)
+{
+ auto nelem = last - first;
+ assert (nelem >= 0);
+
+ //------------------- check if sort --------------------------------------
+ bool sw = true;
+ for (Iter_t it1 = first, it2 = first + 1;
+ it2 != last and (sw = not comp (*it2, *it1)); it1 = it2++)
+ ;
+ if (sw) return;
+
+ uint32_t level = ((nbits64 (nelem) - 4) * 3) / 2;
+ intro_sort_internal (first, last, level, comp);
+};
+//
+//****************************************************************************
+}; // End namespace detail
+}; // End namespace parallel
+}; // End namespace sort
+}; // End namespace boost
+//****************************************************************************
+//
+#endif
diff --git a/include/boost/sort/parallel/detail/parallel_stable_sort.hpp b/include/boost/sort/parallel/detail/parallel_stable_sort.hpp
new file mode 100755
index 0000000..80bd509
--- /dev/null
+++ b/include/boost/sort/parallel/detail/parallel_stable_sort.hpp
@@ -0,0 +1,188 @@
+//----------------------------------------------------------------------------
+/// @file parallel_stable_sort.hpp
+/// @brief This file contains the class parallel_stable_sort
+///
+/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
+/// Distributed under the Boost Software License, Version 1.0.\n
+/// ( See accompanying file LICENSE_1_0.txt or copy at
+/// http://www.boost.org/LICENSE_1_0.txt )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_PARALLEL_STABLE_SORT_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_PARALLEL_STABLE_SORT_HPP
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+namespace boost
+{
+namespace sort
+{
+namespace parallel
+{
+namespace detail
+{
+//
+///---------------------------------------------------------------------------
+/// @struct parallel_stable_sort
+/// @brief This a structure for to implement a parallel stable sort, exception
+/// safe
+//----------------------------------------------------------------------------
+template < class Iter_t, class Compare = util::compare_iter< Iter_t > >
+struct parallel_stable_sort
+{
+ //-------------------------------------------------------------------------
+ // DEFINITIONS
+ //-------------------------------------------------------------------------
+ typedef typename std::iterator_traits< Iter_t >::value_type value_t;
+
+ //-------------------------------------------------------------------------
+ // VARIABLES
+ //-------------------------------------------------------------------------
+ // Number of elements to sort
+ size_t nelem;
+ // Pointer to the auxiliary memory needed for the algorithm
+ value_t *ptr;
+ // Minimal number of elements for to be sorted in parallel mode
+ const size_t nelem_min = 1 << 16;
+
+ //------------------------------------------------------------------------
+ // F U N C T I O N S
+ //------------------------------------------------------------------------
+ parallel_stable_sort (Iter_t first, Iter_t last)
+ : parallel_stable_sort (first, last, Compare ( ),
+ std::thread::hardware_concurrency ( )){};
+
+ parallel_stable_sort (Iter_t first, Iter_t last, Compare cmp)
+ : parallel_stable_sort (first, last, cmp,
+ std::thread::hardware_concurrency ( )){};
+
+ parallel_stable_sort (Iter_t first, Iter_t last, uint32_t num_thread)
+ : parallel_stable_sort (first, last, Compare ( ), num_thread){};
+
+ parallel_stable_sort (Iter_t first, Iter_t last, Compare cmp,
+ uint32_t num_thread);
+
+ //
+ //-----------------------------------------------------------------------------
+ // function : destroy_all
+ /// @brief The utility is to destroy the temporary buffer used in the
+ /// sorting process
+ //-----------------------------------------------------------------------------
+ void destroy_all ( )
+ {
+ if (ptr != nullptr) std::return_temporary_buffer (ptr);
+ };
+ //
+ //-----------------------------------------------------------------------------
+ // function :~parallel_stable_sort
+ /// @brief destructor of the class. The utility is to destroy the temporary
+ /// buffer used in the sorting process
+ //-----------------------------------------------------------------------------
+ ~parallel_stable_sort ( ) { destroy_all ( ); };
+}; // end struct parallel_stable_sort
+
+//
+//############################################################################
+// ##
+// ##
+// N O N I N L I N E F U N C T I O N S ##
+// ##
+// ##
+//############################################################################
+//
+//-----------------------------------------------------------------------------
+// function : parallel_stable_sort
+/// @brief constructor of the class
+///
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+/// @param comp : object for to compare two elements pointed by Iter_t
+/// iterators
+/// @param nthread : Number of threads to use in the process. When this value
+/// is lower than 2, the sorting is done with 1 thread
+//-----------------------------------------------------------------------------
+template < class Iter_t, class Compare >
+parallel_stable_sort< Iter_t, Compare >
+ ::parallel_stable_sort (Iter_t first, Iter_t last, Compare comp,
+ uint32_t nthread): nelem (0), ptr (nullptr)
+{
+ util::range< Iter_t > range_initial (first, last);
+ assert (range_initial.valid ( ));
+
+ nelem = range_initial.size ( );
+ size_t nptr = (nelem + 1) >> 1;
+
+ if (nelem < nelem_min or nthread < 2) {
+ spin_sort< Iter_t, Compare > (range_initial.first, range_initial.last,
+ comp);
+ return;
+ };
+
+ //------------------- check if sort --------------------------------------
+ bool sw = true;
+ for (Iter_t it1 = range_initial.first, it2 = range_initial.first + 1;
+ it2 != range_initial.last and (sw = not comp (*it2, *it1));
+ it1 = it2++)
+ ;
+ if (sw) return;
+
+ ptr = std::get_temporary_buffer< value_t > (nptr).first;
+ if (ptr == nullptr) throw std::bad_alloc ( );
+
+ //---------------------------------------------------------------------
+ // Parallel Process
+ //---------------------------------------------------------------------
+ range< Iter_t > range_first (range_initial.first,
+ range_initial.first + nptr);
+
+ range< Iter_t > range_second (range_initial.first + nptr,
+ range_initial.last);
+
+ range< value_t * > range_buffer (ptr, ptr + nptr);
+
+ try
+ {
+ sample_sort< Iter_t, Compare > (range_initial.first,
+ range_initial.first + nptr, comp,
+ nthread, range_buffer);
+ }
+ catch (std::bad_alloc &)
+ {
+ destroy_all ( );
+ throw std::bad_alloc ( );
+ };
+
+ try
+ {
+ sample_sort< Iter_t, Compare > (range_initial.first + nptr,
+ range_initial.last, comp, nthread,
+ range_buffer);
+ }
+ catch (std::bad_alloc &)
+ {
+ destroy_all ( );
+ throw std::bad_alloc ( );
+ };
+
+ range_buffer = init_move (range_buffer, range_first);
+ range_initial =
+ half_merge (range_initial, range_buffer, range_second, comp);
+}; // end of constructor
+
+//
+//****************************************************************************
+}; // End namespace algorithm
+}; // End namespace parallel
+}; // End namespace sort
+}; // End namespace boost
+//****************************************************************************
+//
+#endif
diff --git a/include/boost/sort/parallel/detail/sample_sort.hpp b/include/boost/sort/parallel/detail/sample_sort.hpp
new file mode 100755
index 0000000..81a34e4
--- /dev/null
+++ b/include/boost/sort/parallel/detail/sample_sort.hpp
@@ -0,0 +1,438 @@
+//----------------------------------------------------------------------------
+/// @file sample_sort.hpp
+/// @brief contains the class sample_sort
+///
+/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
+/// Distributed under the Boost Software License, Version 1.0.\n
+/// ( See accompanying file LICENSE_1_0.txt or copy at
+/// http://www.boost.org/LICENSE_1_0.txt )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_SAMPLE_SORT_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_SAMPLE_SORT_HPP
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+namespace boost
+{
+namespace sort
+{
+namespace parallel
+{
+namespace detail
+{
+//
+///---------------------------------------------------------------------------
+/// @struct sample_sort
+/// @brief This a structure for to implement a sample sort, exception
+/// safe
+/// @tparam
+/// @remarks
+//----------------------------------------------------------------------------
+template < class Iter_t, class Compare >
+struct sample_sort
+{
+ //------------------------------------------------------------------------
+ // DEFINITIONS
+ //------------------------------------------------------------------------
+ typedef typename iterator_traits< Iter_t >::value_type value_t;
+ typedef range< Iter_t > range_it;
+ typedef range< value_t * > range_buf;
+ typedef sample_sort< Iter_t, Compare > this_t;
+ typedef spin_sort< Iter_t, Compare > spin_sort_t;
+
+ //------------------------------------------------------------------------
+ // VARIABLES AND CONSTANTS
+ //------------------------------------------------------------------------
+ // minimun numbers of elements for to be sortd in parallel mode
+ static const uint32_t thread_min = (1 << 16);
+
+ // Number of threads to use in the algorithm
+ // Number of intervals for to do the internal division of the data
+ uint32_t nthread, ninterval;
+
+ // Bool variables indicating if the auxiliary memory is constructed
+ // and indicating in the auxiliary memory had been obtained inside the
+ /// algorithm or had been received as a parameter
+ bool construct = false, owner = false;
+
+ // Comparison object for to compare two elements
+ Compare comp;
+
+ // Range with all the elements to sort
+ range_it global_range;
+
+ // range with the auxiliary memory
+ range_buf global_buf;
+
+ // vector of futures
+ std::vector< std::future< void > > vfuture;
+
+ // vector of vectors which contains the ranges to merge obtained in the
+ // subdivision
+ std::vector< std::vector< range_it > > vv_range_it;
+
+ // each vector of ranges of the vv_range_it, need their corresponding buffer
+ // for to do the merge
+ std::vector< std::vector< range_buf > > vv_range_buf;
+
+ // Initial vector of ranges
+ std::vector< range_it > vrange_it_ini;
+
+ // Initial vector of buffers
+ std::vector< range_buf > vrange_buf_ini;
+
+ // atomic counter for to know when are finished the function_t created
+ // inside a function
+ std::atomic< uint32_t > njob;
+
+ // Indicate if an error in the algorithm for to undo all
+ bool error;
+
+ //------------------------------------------------------------------------
+ // FUNCTIONS OF THE STRUCT
+ //------------------------------------------------------------------------
+ void initial_configuration (void);
+
+ sample_sort (Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread,
+ value_t *paux, size_t naux);
+
+ sample_sort (Iter_t first, Iter_t last)
+ : sample_sort (first, last, Compare ( ),
+ std::thread::hardware_concurrency ( ), nullptr, 0){};
+
+ sample_sort (Iter_t first, Iter_t last, Compare cmp)
+ : sample_sort (first, last, cmp, std::thread::hardware_concurrency ( ),
+ nullptr, 0){};
+
+ sample_sort (Iter_t first, Iter_t last, uint32_t num_thread)
+ : sample_sort (first, last, Compare ( ), num_thread, nullptr, 0){};
+
+ sample_sort (Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread)
+ : sample_sort (first, last, cmp, num_thread, nullptr, 0){};
+
+ sample_sort (Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread,
+ range_buf range_buf_initial)
+ : sample_sort (first, last, cmp, num_thread, range_buf_initial.first,
+ range_buf_initial.size ( )){};
+
+ void destroy_all (void);
+ //
+ //-----------------------------------------------------------------------------
+ // function :~sample_sort
+ /// @brief destructor of the class. The utility is to destroy the temporary
+ /// buffer used in the sorting process
+ //-----------------------------------------------------------------------------
+ ~sample_sort (void) { destroy_all ( ); };
+ //
+ //-----------------------------------------------------------------------
+ // function : execute first
+ /// @brief this a function to assign to each thread in the first merge
+ //-----------------------------------------------------------------------
+ void execute_first (void)
+ {
+ uint32_t job = 0;
+ while ((job = util::atomic_add (njob, 1)) < ninterval) {
+ uninit_merge_level4 (vrange_buf_ini[job], vv_range_it[job],
+ vv_range_buf[job], comp);
+ };
+ };
+ //
+ //-----------------------------------------------------------------------
+ // function : execute
+ /// @brief this is a function to assignt each thread the final merge
+ //-----------------------------------------------------------------------
+ void execute (void)
+ {
+ uint32_t job = 0;
+ while ((job = util::atomic_add (njob, 1)) < ninterval) {
+ merge_vector4 (vrange_buf_ini[job], vrange_it_ini[job],
+ vv_range_buf[job], vv_range_it[job], comp);
+ };
+ };
+ //
+ //-----------------------------------------------------------------------
+ // function : first merge
+ /// @brief Implement the merge of the initially sparse ranges
+ //-----------------------------------------------------------------------
+ void first_merge (void)
+ { //---------------------------------- begin --------------------------
+ njob = 0;
+
+ for (uint32_t i = 0; i < nthread; ++i) {
+ vfuture[i] =
+ std::async (std::launch::async, &this_t::execute_first, this);
+ };
+ for (uint32_t i = 0; i < nthread; ++i) vfuture[i].get ( );
+ };
+ //
+ //-----------------------------------------------------------------------
+ // function : final merge
+ /// @brief Implement the final merge of the ranges
+ //-----------------------------------------------------------------------
+ void final_merge (void)
+ { //---------------------------------- begin --------------------------
+ njob = 0;
+
+ for (uint32_t i = 0; i < nthread; ++i) {
+ vfuture[i] =
+ std::async (std::launch::async, &this_t::execute, this);
+ };
+ for (uint32_t i = 0; i < nthread; ++i) vfuture[i].get ( );
+ };
+
+ //----------------------------------------------------------------------------
+}; // End class sample_sort
+//----------------------------------------------------------------------------
+//
+//############################################################################
+// ##
+// N O N I N L I N E F U N C T I O N S ##
+// ##
+// ##
+//############################################################################
+//
+//-----------------------------------------------------------------------------
+// function : sample_sort
+/// @brief constructor of the class
+///
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+/// @param cmp : object for to compare two elements pointed by Iter_t iterators
+/// @param num_thread : Number of threads to use in the process. When this value
+/// is lower than 2, the sorting is done with 1 thread
+/// @param paux : pointer to the auxiliary memory. If nullptr, the memory is
+/// created inside the class
+/// @param naux : number of elements of the memory pointed by paux
+//-----------------------------------------------------------------------------
+template < class Iter_t, typename Compare >
+sample_sort< Iter_t, Compare >
+ ::sample_sort (Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread,
+ value_t *paux, size_t naux)
+ : nthread (num_thread), owner (false), comp (cmp),
+ global_range (first, last), global_buf (nullptr, nullptr), error (false)
+{
+ assert ((last - first) >= 0);
+ size_t nelem = size_t (last - first);
+ construct = false;
+ njob = 0;
+ vfuture.resize (nthread);
+
+ // Adjust when have many threads and only a few elements
+ while (nelem > thread_min and (nthread * nthread) > (nelem >> 3)) {
+ nthread /= 2;
+ };
+ ninterval = (nthread << 3);
+
+ if (nthread < 2 or nelem <= (thread_min)) {
+ spin_sort< Iter_t, Compare > (first, last, comp);
+ return;
+ };
+
+ //------------------- check if sort --------------------------------------
+ bool sw = true;
+ for (Iter_t it1 = first, it2 = first + 1;
+ it2 != last and (sw = not comp (*it2, *it1)); it1 = it2++)
+ ;
+ if (sw) return;
+
+ if (paux != nullptr) {
+ assert (naux != 0);
+ global_buf.first = paux;
+ global_buf.last = paux + naux;
+ owner = false;
+ }
+ else
+ {
+ value_t *ptr = std::get_temporary_buffer< value_t > (nelem).first;
+ if (ptr == nullptr) throw std::bad_alloc ( );
+ owner = true;
+ global_buf = range_buf (ptr, ptr + nelem);
+ };
+ //------------------------------------------------------------------------
+ // PROCESS
+ //------------------------------------------------------------------------
+ try
+ {
+ initial_configuration ( );
+ }
+ catch (std::bad_alloc &)
+ {
+ error = true;
+ };
+ if (not error) {
+ first_merge ( );
+ construct = true;
+ final_merge ( );
+ };
+ if (error) {
+ destroy_all ( );
+ throw std::bad_alloc ( );
+ };
+};
+//
+//-----------------------------------------------------------------------------
+// function : destroy_all
+/// @brief destructor of the class. The utility is to destroy the temporary
+/// buffer used in the sorting process
+//-----------------------------------------------------------------------------
+template < class Iter_t, typename Compare >
+void sample_sort< Iter_t, Compare >::destroy_all (void)
+{
+ if (construct) {
+ destroy (global_buf);
+ construct = false;
+ }
+ if (global_buf.first != nullptr and owner)
+ std::return_temporary_buffer (global_buf.first);
+};
+
+//
+//-----------------------------------------------------------------------------
+// function : initial_configuration
+/// @brief Create the internal data structures, and obtain the inital set of
+/// ranges to merge
+//-----------------------------------------------------------------------------
+template < class Iter_t, typename Compare >
+void sample_sort< Iter_t, Compare >::initial_configuration (void)
+{
+ std::vector< range_it > vmem_thread;
+ std::vector< range_buf > vbuf_thread;
+ size_t nelem = global_range.size ( );
+
+ //------------------------------------------------------------------------
+ size_t cupo = nelem / nthread;
+ Iter_t it_first = global_range.first;
+ value_t *buf_first = global_buf.first;
+ vmem_thread.reserve (nthread + 1);
+ vbuf_thread.reserve (nthread + 1);
+
+ for (uint32_t i = 0; i < (nthread - 1);
+ ++i, it_first += cupo, buf_first += cupo)
+ {
+ vmem_thread.emplace_back (it_first, it_first + cupo);
+ vbuf_thread.emplace_back (buf_first, buf_first + cupo);
+ };
+
+ vmem_thread.emplace_back (it_first, global_range.last);
+ vbuf_thread.emplace_back (buf_first, global_buf.last);
+
+ //------------------------------------------------------------------------
+ // Sorting of the ranges
+ //------------------------------------------------------------------------
+ std::vector< std::future< void > > vfuture (nthread);
+
+ for (uint32_t i = 0; i < nthread; ++i) {
+ auto func = [=]( ) {
+ spin_sort_t (vmem_thread[i].first, vmem_thread[i].last, comp,
+ vbuf_thread[i]);
+ };
+ vfuture[i] = std::async (std::launch::async, func);
+ };
+
+ for (uint32_t i = 0; i < nthread; ++i) vfuture[i].get ( );
+
+ //------------------------------------------------------------------------
+ // Obtain the vector of milestones
+ //------------------------------------------------------------------------
+ std::vector< Iter_t > vsample;
+ vsample.reserve (nthread * (ninterval - 1));
+
+ for (uint32_t i = 0; i < nthread; ++i) {
+ size_t distance = vmem_thread[i].size ( ) / ninterval;
+ for (size_t j = 1, pos = distance; j < ninterval; ++j, pos += distance)
+ {
+ vsample.push_back (vmem_thread[i].first + pos);
+ };
+ };
+ typedef less_ptr_no_null< Iter_t, Compare > compare_ptr;
+ typedef typename std::vector< Iter_t >::iterator it_to_it;
+
+ spin_sort< it_to_it, compare_ptr > (vsample.begin ( ), vsample.end ( ),
+ compare_ptr (comp));
+
+ //------------------------------------------------------------------------
+ // Create the final milestone vector
+ //------------------------------------------------------------------------
+ std::vector< Iter_t > vmilestone;
+ vmilestone.reserve (ninterval);
+
+ for (uint32_t pos = nthread >> 1; pos < vsample.size ( ); pos += nthread) {
+ vmilestone.push_back (vsample[pos]);
+ };
+
+ //------------------------------------------------------------------------
+ // Creation of the first vector of ranges
+ //------------------------------------------------------------------------
+ std::vector< std::vector< range< Iter_t > > > vv_range_first (nthread);
+
+ for (uint32_t i = 0; i < nthread; ++i) {
+ Iter_t itaux = vmem_thread[i].first;
+
+ for (uint32_t k = 0; k < (ninterval - 1); ++k) {
+ Iter_t it2 = std::upper_bound (itaux, vmem_thread[i].last,
+ *vmilestone[k], comp);
+
+ vv_range_first[i].emplace_back (itaux, it2);
+ itaux = it2;
+ };
+ vv_range_first[i].emplace_back (itaux, vmem_thread[i].last);
+ };
+
+ //------------------------------------------------------------------------
+ // Copy in buffer and creation of the final matrix of ranges
+ //------------------------------------------------------------------------
+ vv_range_it.resize (ninterval);
+ vv_range_buf.resize (ninterval);
+ vrange_it_ini.reserve (ninterval);
+ vrange_buf_ini.reserve (ninterval);
+
+ for (uint32_t i = 0; i < ninterval; ++i) {
+ vv_range_it[i].reserve (nthread);
+ vv_range_buf[i].reserve (nthread);
+ };
+
+ Iter_t it = global_range.first;
+ value_t *it_buf = global_buf.first;
+
+ for (uint32_t k = 0; k < ninterval; ++k) {
+ size_t nelem_interval = 0;
+
+ for (uint32_t i = 0; i < nthread; ++i) {
+ size_t nelem_range = vv_range_first[i][k].size ( );
+ if (nelem_range != 0) {
+ vv_range_it[k].push_back (vv_range_first[i][k]);
+ };
+ nelem_interval += nelem_range;
+ };
+
+ vrange_it_ini.emplace_back (it, it + nelem_interval);
+ vrange_buf_ini.emplace_back (it_buf, it_buf + nelem_interval);
+
+ it += nelem_interval;
+ it_buf += nelem_interval;
+ };
+};
+//
+//****************************************************************************
+}; // End namespace detail
+}; // End namespace parallel
+}; // End namespace sort
+}; // End namespace boost
+//****************************************************************************
+//
+#endif
diff --git a/include/boost/sort/parallel/detail/select_block_indirect.hpp b/include/boost/sort/parallel/detail/select_block_indirect.hpp
new file mode 100755
index 0000000..229eea4
--- /dev/null
+++ b/include/boost/sort/parallel/detail/select_block_indirect.hpp
@@ -0,0 +1,78 @@
+//----------------------------------------------------------------------------
+/// @file select_block_indirect.hpp
+/// @brief block indirect sort algorithm
+///
+/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
+/// Distributed under the Boost Software License, Version 1.0.\n
+/// ( See accompanying file LICENSE_1_0.txt or copy at
+/// http://www.boost.org/LICENSE_1_0.txt )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_SELECT_BLOCK_INDIRECT_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_SELECT_BLOCK_INDIRECT_HPP
+
+#include
+#include
+#include
+
+namespace boost
+{
+namespace sort
+{
+namespace parallel
+{
+namespace detail
+{
+using boost::sort::parallel::detail::util::tmsb;
+//
+///---------------------------------------------------------------------------
+// function select_block_indirect
+/// @brief This class is select the block size in the block_indirect_sort
+/// algorithm depending of the type and size of the data to sort
+///
+//----------------------------------------------------------------------------
+template < class Iter_t, class Compare,
+ util::enable_if_string< util::value_iter< Iter_t > > * = nullptr >
+void select_block_indirect (Iter_t first, Iter_t last, Compare cmp,
+ uint32_t nthr = std::thread::hardware_concurrency ())
+{
+ block_indirect_sort< 128, 128, Iter_t, Compare > (first, last, cmp, nthr);
+};
+
+template < size_t Size >
+struct block_size
+{
+ static constexpr const uint32_t BitsSize =
+ (Size == 0) ? 0 : (Size > 256) ? 9 : tmsb[Size - 1];
+ static constexpr const uint32_t sz[10] = {4096, 4096, 4096, 4096, 2048,
+ 1024, 768, 512, 256, 128};
+ static constexpr const uint32_t data = sz[BitsSize];
+};
+//
+///---------------------------------------------------------------------------
+/// @struct select_block_indirect_sort
+/// @brief This class is select the block size in the block_indirect_sort
+/// algorithm depending of the type and size of the data to sort
+///
+//----------------------------------------------------------------------------
+template < class Iter_t, class Compare,
+ util::enable_if_not_string< util::value_iter< Iter_t > >* = nullptr >
+
+void select_block_indirect (Iter_t first, Iter_t last, Compare cmp,
+ uint32_t nthr = std::thread::hardware_concurrency ())
+{
+ block_indirect_sort) >::data,
+ 28, Iter_t, Compare > (first, last, cmp, nthr);
+};
+
+//
+//****************************************************************************
+}; // End namespace detail
+}; // End namespace parallel
+}; // End namespace sort
+}; // End namespace boost
+//****************************************************************************
+//
+#endif
diff --git a/include/boost/sort/parallel/detail/spin_sort.hpp b/include/boost/sort/parallel/detail/spin_sort.hpp
new file mode 100755
index 0000000..63736af
--- /dev/null
+++ b/include/boost/sort/parallel/detail/spin_sort.hpp
@@ -0,0 +1,249 @@
+//----------------------------------------------------------------------------
+/// @file spin_sort.hpp
+/// @brief Spin Sort algorithm
+///
+/// @author Copyright (c) 2016 Francisco José Tapia (fjtapia@gmail.com )\n
+/// Distributed under the Boost Software License, Version 1.0.\n
+/// ( See accompanying file LICENSE_1_0.txt or copy at
+/// http://www.boost.org/LICENSE_1_0.txt )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_ALGORITHM_SPIN_SORT_HPP
+#define __BOOST_SORT_PARALLEL_ALGORITHM_SPIN_SORT_HPP
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+namespace boost
+{
+namespace sort
+{
+namespace parallel
+{
+namespace detail
+{
+//
+//----------------------------------------------------------------------------
+// USING SENTENCES
+//----------------------------------------------------------------------------
+using util::range;
+using util::nbits64;
+using std::iterator_traits;
+
+//-----------------------------------------------------------------------------
+// function : range_sort
+/// @brief this function divide r_input in two parts, sort it,and merge moving
+/// the elements to range_buf
+/// @param range_input : range with the elements to sort
+/// @param range_buffer : range with the elements sorted
+/// @param comp : object for to compare two elements
+/// @param level : when is 1, sort with the insertion_sort algorithm
+/// if not make a recursive call splitting the ranges
+//-----------------------------------------------------------------------------
+template < class Iter1_t, class Iter2_t, class Compare >
+void range_sort (const range< Iter1_t > &range_input,
+ const range< Iter2_t > &range_buffer, Compare comp,
+ uint32_t level)
+{
+ typedef range< Iter1_t > range_it1;
+ typedef range< Iter2_t > range_it2;
+ assert (range_input.size ( ) == range_buffer.size ( ) and level != 0);
+
+ size_t nelem1 = (range_input.size ( ) + 1) >> 1;
+ range_it1 range_input1 (range_input.first, range_input.first + nelem1),
+ range_input2 (range_input.first + nelem1, range_input.last);
+
+ if (level < 2) {
+ insertion_sort (range_input1.first, range_input1.last, comp);
+ insertion_sort (range_input2.first, range_input2.last, comp);
+ }
+ else
+ {
+ range_sort (range_it2 (range_buffer.first, range_buffer.first + nelem1),
+ range_input1, comp, level - 1);
+
+ range_sort (range_it2 (range_buffer.first + nelem1, range_buffer.last),
+ range_input2, comp, level - 1);
+ };
+
+ full_merge (range_buffer, range_input1, range_input2, comp);
+};
+
+//---------------------------------------------------------------------------
+/// @struct spin_sort
+/// @brief This class implement s stable sort algorithm with 1 thread, with
+/// an auxiliary memory of N/2 elements
+//----------------------------------------------------------------------------
+template < class Iter_t, typename Compare = util::compare_iter< Iter_t > >
+class spin_sort
+{
+ //------------------------------------------------------------------------
+ // DEFINITIONS AND CONSTANTS
+ //------------------------------------------------------------------------
+ typedef typename iterator_traits< Iter_t >::value_type value_t;
+ typedef range< Iter_t > range_it;
+ typedef range< value_t * > range_buf;
+ // When the number of elements to sort is smaller than Sort_min, are sorted
+ // by the insertion sort algorithm
+ static const uint32_t Sort_min = 36;
+
+ //------------------------------------------------------------------------
+ // VARIABLES
+ //------------------------------------------------------------------------
+ // Pointer to the auxiliary memory
+ value_t *ptr;
+
+ // Number of elements in the auxiliary memory
+ size_t nptr;
+
+ // construct indicate if the auxiliary memory in initialized, and owner
+ // indicate if the auxiliary memory had been created inside the object or
+ // had
+ // been received as a parameter
+ bool construct = false, owner = false;
+
+ //------------------------------------------------------------------------
+ // PRIVATE FUNCTIONS
+ //-------------------------------------------------------------------------
+ spin_sort (Iter_t first, Iter_t last, Compare comp, value_t *paux,
+ size_t naux);
+
+public:
+ //------------------------------------------------------------------------
+ // PUBLIC FUNCTIONS
+ //-------------------------------------------------------------------------
+ spin_sort (Iter_t first, Iter_t last, Compare comp = Compare ( ))
+ : spin_sort (first, last, comp, nullptr, 0){};
+
+ spin_sort (Iter_t first, Iter_t last, Compare comp, range_buf range_aux)
+ : spin_sort (first, last, comp, range_aux.first, range_aux.size ( )){};
+ //
+ //-----------------------------------------------------------------------
+ // function :~spin_sort
+ /// @brief destructor of the struct. Destroy the elements if construct is
+ /// true,
+ /// and return the memory if owner is true
+ //-----------------------------------------------------------------------
+ ~spin_sort (void)
+ {
+ if (construct) {
+ destroy (range< value_t * > (ptr, ptr + nptr));
+ construct = false;
+ };
+ if (owner and ptr != nullptr) std::return_temporary_buffer (ptr);
+ };
+}; // End of class spin_sort
+//
+//############################################################################
+// ##
+// N O N I N L I N E F U N C T I O N S ##
+// ##
+// ##
+//############################################################################
+//
+//-----------------------------------------------------------------------------
+// function : spin_sort
+/// @brief constructor of the struct
+//
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+/// @param comp : object for to compare two elements pointed by Iter_t
+/// iterators
+/// @param paux : pointer to the auxiliary memory provided. If nullptr, the
+/// memory is created inside the class
+/// @param naux : number of elements pointed by paux
+//-----------------------------------------------------------------------------
+template < class Iter_t, class Compare >
+spin_sort< Iter_t, Compare >
+ ::spin_sort (Iter_t first, Iter_t last, Compare comp, value_t *paux,
+ size_t naux)
+ : ptr (paux), nptr (naux), construct (false), owner (false)
+{
+ range< Iter_t > range_input (first, last);
+ assert (range_input.valid ( ));
+
+ size_t nelem = range_input.size ( );
+ owner = construct = false;
+
+ nptr = (nelem + 1) >> 1;
+ size_t nelem_1 = nptr;
+ size_t nelem_2 = nelem - nelem_1;
+
+ if (nelem <= (Sort_min << 1)) {
+ insertion_sort (range_input.first, range_input.last, comp);
+ return;
+ };
+ //------------------- check if sort --------------------------------------
+ bool sw = true;
+ for (Iter_t it1 = range_input.first, it2 = range_input.first + 1;
+ it2 != range_input.last and (sw = not comp (*it2, *it1)); it1 = it2++)
+ ;
+ if (sw) return;
+
+ if (ptr == nullptr) {
+ ptr = std::get_temporary_buffer< value_t > (nptr).first;
+ if (ptr == nullptr) throw std::bad_alloc ( );
+ owner = true;
+ };
+ range_buf range_aux (ptr, (ptr + nptr));
+
+ //------------------------------------------------------------------------
+ // Process
+ //------------------------------------------------------------------------
+ uint32_t nlevel = nbits64 (((nelem + Sort_min - 1) / Sort_min) - 1) - 1;
+ assert (nlevel != 0);
+
+ if ((nlevel & 1) == 1) {
+ //---------------------------------------------------------------------
+ // if the number of levels is odd, the data are in the first parameter
+ // of range_sort, and the results appear in the second parameter
+ //---------------------------------------------------------------------
+ range_it range_1 (first, first + nelem_2),
+ range_2 (first + nelem_2, last);
+ range_aux = uninit_move (range_aux, range_2);
+ construct = true;
+
+ range_sort (range_aux, range_2, comp, nlevel);
+ range_buf rng_bx (range_aux.first, range_aux.first + nelem_2);
+
+ range_sort (range_1, rng_bx, comp, nlevel);
+ half_merge (range_input, rng_bx, range_2, comp);
+ }
+ else
+ {
+ //--------------------------------------------------------------------
+ // If the number of levels is even, the data are in the second
+ // parameter of range_sort, and the results are in the same parameter
+ //---------------------------------------------------------------------
+ range_it range_1 (first, first + nelem_1),
+ range_2 (first + nelem_1, last);
+ range_aux = uninit_move (range_aux, range_1);
+ construct = true;
+
+ range_sort (range_1, range_aux, comp, nlevel);
+
+ range_1.last = range_1.first + range_2.size ( );
+ range_sort (range_1, range_2, comp, nlevel);
+ half_merge (range_input, range_aux, range_2, comp);
+ };
+};
+
+//****************************************************************************
+}; // End namespace algorithm
+}; // End namespace parallel
+}; // End namespace sort
+}; // End namepspace boost
+//****************************************************************************
+//
+#endif
diff --git a/include/boost/sort/parallel/detail/util/atomic.hpp b/include/boost/sort/parallel/detail/util/atomic.hpp
new file mode 100755
index 0000000..9807d32
--- /dev/null
+++ b/include/boost/sort/parallel/detail/util/atomic.hpp
@@ -0,0 +1,93 @@
+//----------------------------------------------------------------------------
+/// @file atomic.hpp
+/// @brief Basic layer for to simplify the use of atomic functions
+/// @author Copyright(c) 2016 Francisco José Tapia (fjtapia@gmail.com )\n
+/// Distributed under the Boost Software License, Version 1.0.\n
+/// ( See accompanying file LICENSE_1_0.txt or copy at
+/// http://www.boost.org/LICENSE_1_0.txt )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_UTIL_ATOMIC_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_UTIL_ATOMIC_HPP
+
+#include
+#include
+#include
+
+namespace boost
+{
+namespace sort
+{
+namespace parallel
+{
+namespace detail
+{
+namespace util
+{
+//
+//-----------------------------------------------------------------------------
+// function : atomic_read
+/// @brief make the atomic read of an atomic variable, using a memory model
+/// @param at_var : atomic variable to read
+/// @return value obtained
+//-----------------------------------------------------------------------------
+template < typename T >
+inline T atomic_read (std::atomic< T > &at_var)
+{
+ return std::atomic_load_explicit< T > (&at_var, std::memory_order_acquire);
+};
+//
+//-----------------------------------------------------------------------------
+// function : atomic_add
+/// @brief Add a number to an atomic variable, using a memory model
+/// @param at_var : variable to add
+/// @param num : value to add to at_var
+/// @return result of the operation
+//-----------------------------------------------------------------------------
+template < typename T, typename T2 >
+inline T atomic_add (std::atomic< T > &at_var, T2 num)
+{
+ static_assert (std::is_integral< T2 >::value, "Bad parameter");
+ return std::atomic_fetch_add_explicit< T > (&at_var, (T)num,
+ std::memory_order_acq_rel);
+};
+//
+//-----------------------------------------------------------------------------
+// function : atomic_sub
+/// @brief Atomic subtract of an atomic variable using memory model
+/// @param at_var : Varibale to subtract
+/// @param num : value to sub to at_var
+/// @return result of the operation
+//-----------------------------------------------------------------------------
+template < typename T, typename T2 >
+inline T atomic_sub (std::atomic< T > &at_var, T2 num)
+{
+ static_assert (std::is_integral< T2 >::value, "Bad parameter");
+ return std::atomic_fetch_sub_explicit< T > (&at_var, (T)num,
+ std::memory_order_acq_rel);
+};
+//
+//-----------------------------------------------------------------------------
+// function : atomic_write
+/// @brief Write a value in an atomic variable using memory model
+/// @param at_var : varible to write
+/// @param num : value to write in at_var
+//-----------------------------------------------------------------------------
+template < typename T, typename T2 >
+inline void atomic_write (std::atomic< T > &at_var, T2 num)
+{
+ static_assert (std::is_integral< T2 >::value, "Bad parameter");
+ std::atomic_store_explicit< T > (&at_var, (T)num,
+ std::memory_order_release);
+};
+
+//****************************************************************************
+}; // End namespace util
+}; // End namespace detail
+}; // End namespace parallel
+}; // End namespace sort
+}; // End namespace boost
+//****************************************************************************
+#endif
diff --git a/include/boost/sort/parallel/detail/util/compare_traits.hpp b/include/boost/sort/parallel/detail/util/compare_traits.hpp
new file mode 100755
index 0000000..68fd55f
--- /dev/null
+++ b/include/boost/sort/parallel/detail/util/compare_traits.hpp
@@ -0,0 +1,99 @@
+//----------------------------------------------------------------------------
+/// @file compare_traits.hpp
+/// @brief this file contains the metaprogramming classes compare_iter and
+/// enable_if_not_integral
+/// @author Copyright(c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
+/// Distributed under the Boost Software License, Version 1.0.\n
+/// ( See accompanying file LICENSE_1_0.txt or copy at
+/// http://www.boost.org/LICENSE_1_0.txt )
+/// @version 0.1
+///
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_UTIL_COMPARE_TRAITS_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_UTIL_COMPARE_TRAITS_HPP
+
+#include
+#include
+#include
+
+namespace boost
+{
+namespace sort
+{
+namespace parallel
+{
+namespace detail
+{
+namespace util
+{
+//----------------------------------------------------------------------------
+// USING SENTENCES
+//----------------------------------------------------------------------------
+using std::iterator_traits;
+
+//---------------------------------------------------------------------------
+/// @class compare_iter
+/// @brief From the iterator, received as template parameter, obtain the type
+/// of the object pointed by the iterator, and with this define the
+/// std::less with this type obtained
+/// @remarks The main utility of this, is simplify the default template
+/// parameter of comparison
+//---------------------------------------------------------------------------
+template < class iter_t >
+using compare_iter =
+ std::less< typename iterator_traits< iter_t >::value_type >;
+//
+//---------------------------------------------------------------------------
+/// @class value_iter
+/// @brief From the iterator, obtain the type pointed by it
+/// @remarks The main utility of this, is simplify the default template
+/// parameter of comparison
+//---------------------------------------------------------------------------
+template < class iter_t >
+using value_iter = typename iterator_traits< iter_t >::value_type ;
+
+
+//
+//---------------------------------------------------------------------------
+/// @class enable_if_not_integral
+/// @brief This is a SFINAE class for to detect if the third parameter in the
+/// invocation of the parallel sorting algorithms is an integer
+/// representing the number of threads to use or is a comparison object
+/// @remarks
+//---------------------------------------------------------------------------
+template < class T >
+using enable_if_not_integral =
+ typename std::enable_if< !std::is_integral< T >::value >::type;
+
+//
+//---------------------------------------------------------------------------
+/// @class enable_if_string
+/// @brief This is a SFINAE class for to detect if the parameter is a
+/// std::string for to apply specialized parameters in the invocation
+/// of the block_indirect_sort algorithm
+/// @remarks
+//---------------------------------------------------------------------------
+template < class T >
+using enable_if_string =
+ typename std::enable_if< std::is_same< T, std::string >::value >::type;
+
+//
+//---------------------------------------------------------------------------
+/// @class enable_if_not_string
+/// @brief This is a SFINAE class for to detect if the parameter is a
+/// std::string for to apply specialized parameters in the invocation
+/// of the block_indirect_sort algorithm
+/// @remarks
+//---------------------------------------------------------------------------
+template < class T >
+using enable_if_not_string =
+ typename std::enable_if::value >::type;
+//
+//****************************************************************************
+}; // End namespace util
+}; // End namespace detail
+}; // End namespace parallel
+}; // End namespace sort
+}; // End namespace boost
+//****************************************************************************
+#endif
diff --git a/include/boost/sort/parallel/detail/util/low_level.hpp b/include/boost/sort/parallel/detail/util/low_level.hpp
new file mode 100755
index 0000000..c678396
--- /dev/null
+++ b/include/boost/sort/parallel/detail/util/low_level.hpp
@@ -0,0 +1,326 @@
+//----------------------------------------------------------------------------
+/// @file low_level.hpp
+/// @brief low level functions of create, destroy, move and merge functions
+///
+/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
+/// Distributed under the Boost Software License, Version 1.0.\n
+/// ( See accompanying file LICENSE_1_0.txt or copy at
+/// http://www.boost.org/LICENSE_1_0.txt )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_UTIL_LOW_LEVEL_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_UTIL_LOW_LEVEL_HPP
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+namespace boost
+{
+namespace sort
+{
+namespace parallel
+{
+namespace detail
+{
+namespace util
+{
+//---------------------------------------------------------------------------
+// USING SENTENCES
+//---------------------------------------------------------------------------
+using std::iterator_traits;
+
+//
+//-----------------------------------------------------------------------------
+// function : construct
+/// @brief create an object in the memory specified by ptr
+///
+/// @param ptr : pointer to the memory where to create the object
+/// @param args : arguments to the constructor
+//-----------------------------------------------------------------------------
+template < class Value_t, class... Args >
+inline void construct (Value_t *ptr, Args &&... args)
+{
+ (::new (static_cast< void * > (ptr))
+ Value_t (std::forward< Args > (args)...));
+};
+//
+//-----------------------------------------------------------------------------
+// function : destroy_object
+/// @brief destroy an object in the memory specified by ptr
+/// @param ptr : pointer to the object to destroy
+//-----------------------------------------------------------------------------
+template < class Value_t >
+inline void destroy_object (Value_t *ptr)
+{
+ ptr->~Value_t ( );
+};
+
+//
+//-----------------------------------------------------------------------------
+// function : init
+/// @brief initialize a range of objects with the object val moving across them
+///
+/// @param first : itertor to the first element to initialize
+/// @param last : iterator to the last element to initialize
+/// @param val : object used for the initialization
+//-----------------------------------------------------------------------------
+template < class Iter_t >
+void init (Iter_t first, Iter_t last,
+ typename iterator_traits< Iter_t >::value_type &val)
+{
+ if (first == last) return;
+ construct (&(*first), std::move (val));
+
+ Iter_t it1 = first, it2 = first + 1;
+ while (it2 != last) {
+ construct (&(*(it2++)), std::move (*(it1++)));
+ };
+ val = std::move (*(last - 1));
+};
+//
+//-----------------------------------------------------------------------------
+// function : init_move
+/// @brief Move initialized objets
+/// @param it_dest : iterator to the final place of the objects
+/// @param first : iterator to the first element to move
+/// @param last : iterator to the last element to move
+//-----------------------------------------------------------------------------
+template < class Iter1_t, class Iter2_t >
+Iter2_t init_move (Iter2_t it_dest, Iter1_t first, Iter1_t last)
+{
+ while (first != last) *(it_dest++) = std::move (*(first++));
+ return it_dest;
+};
+//
+//-----------------------------------------------------------------------------
+// function : uninit_move
+/// @brief Move objets to uninitialized memory
+///
+/// @param ptr : pointer to the memory where to create the objects
+/// @param first : iterator to the first element to move
+/// @param last : iterator to the last element to move
+//-----------------------------------------------------------------------------
+template < class Iter_t,
+ class Value_t = typename iterator_traits< Iter_t >::value_type >
+Value_t *uninit_move (Value_t *ptr, Iter_t first, Iter_t last)
+{
+ typedef typename iterator_traits< Iter_t >::value_type value2_t;
+
+ static_assert (std::is_same< Value_t, value2_t >::value,
+ "Incompatible iterators\n");
+
+ while (first != last) {
+ ::new (static_cast< void * > (ptr++)) Value_t (std::move (*(first++)));
+ };
+ return ptr;
+};
+//
+//-----------------------------------------------------------------------------
+// function : destroy
+/// @brief destroy the elements between first and last
+/// @param first : iterator to the first element to destroy
+/// @param last : iterator to the last element to destroy
+//-----------------------------------------------------------------------------
+template < class Iter_t >
+void destroy (Iter_t first, const Iter_t last)
+{
+ typedef typename iterator_traits< Iter_t >::value_type value_t;
+ while (first != last) {
+ (&(*(first++)))->~value_t ( );
+ };
+};
+//
+//-----------------------------------------------------------------------------
+// function : full_merge
+/// @brief Merge two contiguous buffers pointed by buf1 and buf2, and put
+/// in the buffer pointed by buf_out
+///
+/// @param buf1 : iterator to the first element in the first buffer
+/// @param end_buf1 : final iterator of first buffer
+/// @param buf2 : iterator to the first iterator to the second buffer
+/// @param end_buf2 : final iterator of the second buffer
+/// @param buf_out : buffer where move the elements merged
+/// @param comp : comparison object
+//-----------------------------------------------------------------------------
+template < class Iter1_t, class Iter2_t, class Compare >
+Iter2_t full_merge (Iter1_t buf1, const Iter1_t end_buf1, Iter1_t buf2,
+ const Iter1_t end_buf2, Iter2_t buf_out, Compare comp)
+{
+ typedef typename iterator_traits< Iter1_t >::value_type value1_t;
+ typedef typename iterator_traits< Iter2_t >::value_type value2_t;
+ static_assert (std::is_same< value1_t, value2_t >::value,
+ "Incompatible iterators\n");
+
+
+ while ((buf1 != end_buf1) and (buf2 != end_buf2)) {
+ *(buf_out++) = (not comp (*buf2, *buf1)) ? std::move (*(buf1++))
+ : std::move (*(buf2++));
+ };
+ return (buf1 == end_buf1) ? init_move (buf_out, buf2, end_buf2)
+ : init_move (buf_out, buf1, end_buf1);
+};
+//
+//-----------------------------------------------------------------------------
+// function : uninit_full_merge
+/// @brief Merge two contiguous buffers pointed by first1 and first2, and put
+/// in the uninitialized buffer pointed by it_out
+///
+/// @param first1 : iterator to the first element in the first buffer
+/// @param last1 : last iterator of the first buffer
+/// @param first2 : iterator to the first element to the second buffer
+/// @param last2 : final iterator of the second buffer
+/// @param it_out : uninitialized buffer where move the elements merged
+/// @param comp : comparison object
+//-----------------------------------------------------------------------------
+template < class Iter_t, class Value_t, class Compare >
+Value_t *uninit_full_merge (Iter_t first1, const Iter_t last1, Iter_t first2,
+ const Iter_t last2, Value_t *it_out, Compare comp)
+{
+ typedef typename iterator_traits< Iter_t >::value_type type1;
+ static_assert (std::is_same< Value_t, type1 >::value,
+ "Incompatible iterators\n");
+
+ while (first1 != last1 and first2 != last2) {
+ construct ((it_out++), (not comp (*first2, *first1))
+ ? std::move (*(first1++))
+ : std::move (*(first2++)));
+ };
+ return (first1 == last1) ? uninit_move (it_out, first2, last2)
+ : uninit_move (it_out, first1, last1);
+};
+//
+//---------------------------------------------------------------------------
+// function : half_merge
+/// @brief : Merge two buffers. The first buffer is in a separate memory.
+/// The second buffer have a empty space before buf2 of the same size
+/// than the (end_buf1 - buf1)
+///
+/// @param buf1 : iterator to the first element of the first buffer
+/// @param end_buf1 : iterator to the last element of the first buffer
+/// @param buf2 : iterator to the first element of the second buffer
+/// @param end_buf2 : iterator to the last element of the second buffer
+/// @param buf_out : iterator to the first element to the buffer where put
+/// the result
+/// @param comp : object for Compare two elements of the type pointed
+/// by the Iter1_t and Iter2_t
+//---------------------------------------------------------------------------
+template < class Iter1_t, class Iter2_t, class Compare >
+Iter2_t half_merge (Iter1_t buf1, const Iter1_t end_buf1, Iter2_t buf2,
+ const Iter2_t end_buf2, Iter2_t buf_out, Compare comp)
+{
+ typedef typename iterator_traits< Iter1_t >::value_type value1_t;
+ typedef typename iterator_traits< Iter2_t >::value_type value2_t;
+ static_assert (std::is_same< value1_t, value2_t >::value,
+ "Incompatible iterators\n");
+
+ while ((buf1 != end_buf1) and (buf2 != end_buf2)) {
+ *(buf_out++) = (not comp (*buf2, *buf1)) ? std::move (*(buf1++))
+ : std::move (*(buf2++));
+ };
+ return (buf2 == end_buf2) ? init_move (buf_out, buf1, end_buf1) : end_buf2;
+};
+//
+//-----------------------------------------------------------------------------
+// function : in_place_merge_uncontiguous
+/// @brief : merge two uncontiguous buffers, placing the results in the buffers
+/// Use an auxiliary buffer pointed by aux
+///
+/// @param src1 : iterator to the first element of the first buffer
+/// @param end_src1 : last iterator of the first buffer
+/// @param src2 : iterator to the first element of the second buffer
+/// @param end_src2 : last iterator of the second buffer
+/// @param aux : iterator to the first element of the auxiliary buffer
+/// @param comp : object for to Compare elements
+/// @return true : not changes done, false : changes in the buffers
+/// @remarks
+//-----------------------------------------------------------------------------
+template < class Iter1_t, class Iter2_t, class Iter3_t, class Compare >
+bool in_place_merge_uncontiguous (Iter1_t src1, const Iter1_t end_src1,
+ Iter2_t src2, const Iter2_t end_src2,
+ Iter3_t aux, Compare comp)
+{
+ typedef typename iterator_traits< Iter1_t >::value_type type1;
+ typedef typename iterator_traits< Iter2_t >::value_type type2;
+ typedef typename iterator_traits< Iter3_t >::value_type type3;
+ static_assert (std::is_same< type1, type2 >::value,
+ "Incompatible iterators\n");
+ static_assert (std::is_same< type3, type2 >::value,
+ "Incompatible iterators\n");
+
+ if (src1 == end_src1 or src2 == end_src2 or
+ not comp (*src2, *(end_src1 - 1)))
+ return true;
+
+ while (src1 != end_src1 and not comp (*src2, *src1)) ++src1;
+
+ Iter3_t const end_aux = aux + (end_src1 - src1);
+ Iter2_t src2_first = src2;
+ init_move (aux, src1, end_src1);
+
+ while ((src1 != end_src1) and (src2 != end_src2)) {
+ *(src1++) = std::move ((not comp (*src2, *aux)) ? *(aux++) : *(src2++));
+ }
+
+ if (src2 == end_src2) {
+ while (src1 != end_src1) *(src1++) = std::move (*(aux++));
+ init_move (src2_first, aux, end_aux);
+ }
+ else
+ {
+ half_merge (aux, end_aux, src2, end_src2, src2_first, comp);
+ };
+ return false;
+};
+
+//
+//-----------------------------------------------------------------------------
+// function : in_place_merge
+/// @brief : merge two contiguous buffers,using an auxiliary buffer pointed
+/// by buf. The results are in src1 and src2
+///
+/// @param src1: iterator to the first position of the first buffer
+/// @param src2: final iterator of the first buffer and first iterator
+/// of the second buffer
+/// @param end_src2 : final iterator of the second buffer
+/// @param buf : iterator to buffer used as auxiliary memory
+/// @param comp : object for to Compare elements
+/// @return true : not changes done, false : changes in the buffers
+//-----------------------------------------------------------------------------
+template < class Iter1_t, class Iter2_t, class Compare >
+bool in_place_merge (Iter1_t src1, Iter1_t src2, Iter1_t end_src2, Iter2_t buf,
+ Compare comp)
+{
+ typedef typename iterator_traits< Iter1_t >::value_type type1;
+ typedef typename iterator_traits< Iter2_t >::value_type type2;
+ static_assert (std::is_same< type1, type2 >::value,
+ "Incompatible iterators\n");
+
+ if (src1 == src2 or src2 == end_src2 or not comp (*src2, *(src2 - 1)))
+ return true;
+
+ Iter1_t end_src1 = src2;
+ while (src1 != end_src1 and not comp (*src2, *src1)) ++src1;
+
+ if (src1 == end_src1) return false;
+
+ size_t nx = end_src1 - src1;
+ init_move (buf, src1, end_src1);
+ half_merge (buf, buf + nx, src2, end_src2, src1, comp);
+ return false;
+};
+//
+//****************************************************************************
+}; // End namespace util
+}; // End namespave detail
+}; // End namespace parallel
+}; // End namespace sort
+}; // End namespace boost
+//****************************************************************************
+//
+#endif
diff --git a/include/boost/sort/parallel/detail/util/merge_four.hpp b/include/boost/sort/parallel/detail/util/merge_four.hpp
new file mode 100755
index 0000000..e932a2c
--- /dev/null
+++ b/include/boost/sort/parallel/detail/util/merge_four.hpp
@@ -0,0 +1,316 @@
+//----------------------------------------------------------------------------
+/// @file merge_four.hpp
+/// @brief This file have the functions for to merge 4 buffers
+///
+/// @author Copyright (c) 2016 Francisco José Tapia (fjtapia@gmail.com )\n
+/// Distributed under the Boost Software License, Version 1.0.\n
+/// ( See accompanying file LICENSE_1_0.txt or copy at
+/// http://www.boost.org/LICENSE_1_0.txt )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_UTIL_MERGE_FOUR_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_UTIL_MERGE_FOUR_HPP
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+namespace boost
+{
+namespace sort
+{
+namespace parallel
+{
+namespace detail
+{
+namespace util
+{
+//
+//############################################################################
+// ##
+// F U S I O N O F ##
+// ##
+// F O U R E L E M E N T S R A N G E ##
+// ##
+//############################################################################
+//
+using std::iterator_traits;
+
+//-----------------------------------------------------------------------------
+// function : less_range
+/// @brief Compare the elements pointed by it1 and it2, and if they
+/// are equals, compare their position, doing a stable comparison
+///
+/// @param it1 : iterator to the first element
+/// @param pos1 : position of the object pointed by it1
+/// @param it2 : iterator to the second element
+/// @param pos2 : position of the element pointed by it2
+/// @param comp : comparison object
+/// @return result of the comparison
+//-----------------------------------------------------------------------------
+template < class Iter_t,
+ class Compare = typename compare_iter< Iter_t >::value_type >
+inline bool less_range (Iter_t it1, uint32_t pos1, Iter_t it2, uint32_t pos2,
+ Compare comp = Compare ( ))
+{
+ return (comp (*it1, *it2)) ? true : (pos2 < pos1) ? false
+ : not(comp (*it2, *it1));
+};
+
+//-----------------------------------------------------------------------------
+// function : full_merge4
+/// @brief Merge four ranges
+///
+/// @param dest: range where move the elements merged. Their size must be
+/// greater or equal than the sum of the sizes of the ranges
+/// in vrange_input
+/// @param vrange_input : array of ranges to merge
+/// @param nrange_input : number of ranges in vrange_input
+/// @param comp : comparison object
+/// @return range with all the elements moved with the size adjusted
+//-----------------------------------------------------------------------------
+template < class Iter1_t, class Iter2_t, class Compare >
+range< Iter1_t > full_merge4 (const range< Iter1_t > &rdest,
+ range< Iter2_t > vrange_input[4],
+ uint32_t nrange_input, Compare comp)
+{
+ typedef range< Iter1_t > range1_t;
+ typedef typename iterator_traits< Iter1_t >::value_type type1;
+ typedef typename iterator_traits< Iter2_t >::value_type type2;
+ static_assert (std::is_same< type1, type2 >::value,
+ "Incompatible iterators\n");
+
+ size_t ndest = 0;
+ uint32_t i = 0;
+ while (i < nrange_input) {
+ if (vrange_input[i].size ( ) != 0) {
+ ndest += vrange_input[i++].size ( );
+ }
+ else
+ {
+ for (uint32_t k = i + 1; k < nrange_input; ++k) {
+ vrange_input[k - 1] = vrange_input[k];
+ };
+ --nrange_input;
+ };
+ };
+
+ if (nrange_input == 0) return range1_t (rdest.first, rdest.first);
+ if (nrange_input == 1) return init_move (rdest, vrange_input[0]);
+ if (nrange_input == 2) {
+ return full_merge (rdest, vrange_input[0], vrange_input[1], comp);
+ };
+
+ //------------------------------------------------------------------------
+ // Initial sort
+ //------------------------------------------------------------------------
+ uint32_t pos[4] = {0, 1, 2, 3}, npos = nrange_input;
+
+ //-----------------------------------------------------------------------
+ // thanks to Steven Ross by their suggestion about the optimal
+ // sorting networks
+ //-----------------------------------------------------------------------
+ if (less_range (vrange_input[pos[1]].first, pos[1],
+ vrange_input[pos[0]].first, pos[0], comp))
+ {
+ std::swap (pos[0], pos[1]);
+ };
+ if (npos == 4 and less_range (vrange_input[pos[3]].first, pos[3],
+ vrange_input[pos[2]].first, pos[2], comp))
+ {
+ std::swap (pos[3], pos[2]);
+ };
+ if (less_range (vrange_input[pos[2]].first, pos[2],
+ vrange_input[pos[0]].first, pos[0], comp))
+ {
+ std::swap (pos[0], pos[2]);
+ };
+ if (npos == 4 and less_range (vrange_input[pos[3]].first, pos[3],
+ vrange_input[pos[1]].first, pos[1], comp))
+ {
+ std::swap (pos[1], pos[3]);
+ };
+ if (less_range (vrange_input[pos[2]].first, pos[2],
+ vrange_input[pos[1]].first, pos[1], comp))
+ {
+ std::swap (pos[1], pos[2]);
+ };
+
+ Iter1_t it_dest = rdest.first;
+ while (npos > 2) {
+ *(it_dest++) = std::move (*(vrange_input[pos[0]].first++));
+ if (vrange_input[pos[0]].size ( ) == 0) {
+ pos[0] = pos[1];
+ pos[1] = pos[2];
+ pos[2] = pos[3];
+ --npos;
+ }
+ else
+ {
+ if (less_range (vrange_input[pos[1]].first, pos[1],
+ vrange_input[pos[0]].first, pos[0], comp))
+ {
+ std::swap (pos[0], pos[1]);
+ if (less_range (vrange_input[pos[2]].first, pos[2],
+ vrange_input[pos[1]].first, pos[1], comp))
+ {
+ std::swap (pos[1], pos[2]);
+ if (npos == 4 and
+ less_range (vrange_input[pos[3]].first, pos[3],
+ vrange_input[pos[2]].first, pos[2], comp))
+ {
+ std::swap (pos[2], pos[3]);
+ };
+ };
+ };
+ };
+ };
+
+ range1_t raux1 (rdest.first, it_dest), raux2 (it_dest, rdest.last);
+ if (pos[0] < pos[1]) {
+ return concat (raux1, full_merge (raux2, vrange_input[pos[0]],
+ vrange_input[pos[1]], comp));
+ }
+ else
+ {
+ return concat (raux1, full_merge (raux2, vrange_input[pos[1]],
+ vrange_input[pos[0]], comp));
+ };
+};
+
+//-----------------------------------------------------------------------------
+// function : uninit_full_merge4
+/// @brief Merge four ranges and put the result in uninitialized memory
+///
+/// @param dest: range where create and move the elements merged. Their
+/// size must be greater or equal than the sum of the sizes
+/// of the ranges in the array R
+/// @param vrange_input : array of ranges to merge
+/// @param nrange_input : number of ranges in vrange_input
+/// @param comp : comparison object
+/// @return range with all the elements move with the size adjusted
+//-----------------------------------------------------------------------------
+template < class Value_t, class Iter_t, class Compare >
+range< Value_t * > uninit_full_merge4 (const range< Value_t * > &dest,
+ range< Iter_t > vrange_input[4],
+ uint32_t nrange_input, Compare comp)
+{
+ typedef typename iterator_traits< Iter_t >::value_type type1;
+ static_assert (std::is_same< type1, Value_t >::value,
+ "Incompatible iterators\n");
+
+ size_t ndest = 0;
+ uint32_t i = 0;
+ while (i < nrange_input) {
+ if (vrange_input[i].size ( ) != 0) {
+ ndest += vrange_input[i++].size ( );
+ }
+ else
+ {
+ for (uint32_t k = i + 1; k < nrange_input; ++k) {
+ vrange_input[k - 1] = vrange_input[k];
+ };
+ --nrange_input;
+ };
+ };
+ if (nrange_input == 0) return range< Value_t * > (dest.first, dest.first);
+ if (nrange_input == 1) return uninit_move (dest, vrange_input[0]);
+ if (nrange_input == 2) {
+ return uninit_full_merge (dest, vrange_input[0], vrange_input[1], comp);
+ };
+
+ //------------------------------------------------------------------------
+ // Initial sort
+ //------------------------------------------------------------------------
+ uint32_t pos[4] = {0, 1, 2, 3}, npos = nrange_input;
+
+ //-----------------------------------------------------------------------
+ // thanks to Steven Ross by their suggestion about the optimal
+ // sorting networks
+ //-----------------------------------------------------------------------
+ if (less_range (vrange_input[pos[1]].first, pos[1],
+ vrange_input[pos[0]].first, pos[0], comp))
+ {
+ std::swap (pos[0], pos[1]);
+ };
+ if (npos == 4 and less_range (vrange_input[pos[3]].first, pos[3],
+ vrange_input[pos[2]].first, pos[2], comp))
+ {
+ std::swap (pos[3], pos[2]);
+ };
+ if (less_range (vrange_input[pos[2]].first, pos[2],
+ vrange_input[pos[0]].first, pos[0], comp))
+ {
+ std::swap (pos[0], pos[2]);
+ };
+ if (npos == 4 and less_range (vrange_input[pos[3]].first, pos[3],
+ vrange_input[pos[1]].first, pos[1], comp))
+ {
+ std::swap (pos[1], pos[3]);
+ };
+ if (less_range (vrange_input[pos[2]].first, pos[2],
+ vrange_input[pos[1]].first, pos[1], comp))
+ {
+ std::swap (pos[1], pos[2]);
+ };
+
+ Value_t *it_dest = dest.first;
+ while (npos > 2) {
+ construct (&(*(it_dest++)),
+ std::move (*(vrange_input[pos[0]].first++)));
+ if (vrange_input[pos[0]].size ( ) == 0) {
+ pos[0] = pos[1];
+ pos[1] = pos[2];
+ pos[2] = pos[3];
+ --npos;
+ }
+ else
+ {
+ if (less_range (vrange_input[pos[1]].first, pos[1],
+ vrange_input[pos[0]].first, pos[0], comp))
+ {
+ std::swap (pos[0], pos[1]);
+ if (less_range (vrange_input[pos[2]].first, pos[2],
+ vrange_input[pos[1]].first, pos[1], comp))
+ {
+ std::swap (pos[1], pos[2]);
+ if (npos == 4 and
+ less_range (vrange_input[pos[3]].first, pos[3],
+ vrange_input[pos[2]].first, pos[2], comp))
+ {
+ std::swap (pos[2], pos[3]);
+ };
+ };
+ };
+ };
+ }; // end while (npos > 2)
+
+ range< Value_t * > raux1 (dest.first, it_dest), raux2 (it_dest, dest.last);
+ if (pos[0] < pos[1]) {
+ return concat (raux1,
+ uninit_full_merge (raux2, vrange_input[pos[0]],
+ vrange_input[pos[1]], comp));
+ }
+ else
+ {
+ return concat (raux1,
+ uninit_full_merge (raux2, vrange_input[pos[1]],
+ vrange_input[pos[0]], comp));
+ };
+};
+
+//****************************************************************************
+}; // End namespace util
+}; // End namespace detail
+}; // End namespace parallel
+}; // End namespace sort
+}; // End namespace boost
+//****************************************************************************
+//
+#endif
diff --git a/include/boost/sort/parallel/detail/util/merge_vector.hpp b/include/boost/sort/parallel/detail/util/merge_vector.hpp
new file mode 100755
index 0000000..a3dab9f
--- /dev/null
+++ b/include/boost/sort/parallel/detail/util/merge_vector.hpp
@@ -0,0 +1,194 @@
+//----------------------------------------------------------------------------
+/// @file merge_vector.hpp
+/// @brief In this file have the functions for to do a stable merge of
+// ranges, in a vector
+///
+/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
+/// Distributed under the Boost Software License, Version 1.0.\n
+/// ( See accompanying file LICENSE_1_0.txt or copy at
+/// http://www.boost.org/LICENSE_1_0.txt )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_UTIL_MERGE_VECTOR_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_UTIL_MERGE_VECTOR_HPP
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+namespace boost
+{
+namespace sort
+{
+namespace parallel
+{
+namespace detail
+{
+namespace util
+{
+//############################################################################
+// ##
+// F U S I O N O F ##
+// ##
+// A V E C T O R O F R A N G E S ##
+// ##
+//############################################################################
+using std::iterator_traits;
+//
+//-----------------------------------------------------------------------------
+// function : merge_level4
+/// @brief merge the ranges in the vector v_input with the full_merge4 function.
+/// The v_output vector is used as auxiliary memory in the internal
+/// process. The final results is in the dest range.
+/// All the ranges of v_output are inside the range dest
+/// @param dest : range where move the elements merged
+/// @param v_input : vector of ranges to merge
+/// @param v_output : vector of ranges obtained
+/// @param comp : comparison object
+/// @return range with all the elements moved
+//-----------------------------------------------------------------------------
+template < class Iter1_t, class Iter2_t, class Compare >
+void merge_level4 (range< Iter1_t > dest,
+ std::vector< range< Iter2_t > > &v_input,
+ std::vector< range< Iter1_t > > &v_output, Compare comp)
+{
+ typedef range< Iter1_t > range1_t;
+ typedef typename iterator_traits< Iter1_t >::value_type type1;
+ typedef typename iterator_traits< Iter2_t >::value_type type2;
+ static_assert (std::is_same< type1, type2 >::value,
+ "Incompatible iterators\n");
+
+ v_output.clear ( );
+ if (v_input.size ( ) == 0) return;
+ if (v_input.size ( ) == 1) {
+ v_output.emplace_back (init_move (dest, v_input[0]));
+ return;
+ };
+
+ uint32_t nrange = v_input.size ( );
+ uint32_t pos_ini = 0;
+ while (pos_ini < v_input.size ( )) {
+ uint32_t nmerge = (nrange + 3) >> 2;
+ uint32_t nelem = (nrange + nmerge - 1) / nmerge;
+ range1_t rz = full_merge4 (dest, &v_input[pos_ini], nelem, comp);
+ v_output.emplace_back (rz);
+ dest.first = rz.last;
+ pos_ini += nelem;
+ nrange -= nelem;
+ };
+ return;
+};
+//
+//-----------------------------------------------------------------------------
+// function : uninit_merge_level4
+/// @brief merge the ranges moving the objects and constructing them in
+/// uninitialized memory, in the vector v_input
+/// using full_merge4. The v_output vector is used as auxiliary memory
+/// in the internal process. The final results is in the dest range.
+/// All the ranges of v_output are inside the range dest
+///
+/// @param dest : range where move the elements merged
+/// @param v_input : vector of ranges to merge
+/// @param v_output : vector of ranges obtained
+/// @param comp : comparison object
+/// @return range with all the elements moved and constructed
+//-----------------------------------------------------------------------------
+template < class Value_t, class Iter_t, class Compare >
+void uninit_merge_level4 (range< Value_t * > dest,
+ std::vector< range< Iter_t > > &v_input,
+ std::vector< range< Value_t * > > &v_output,
+ Compare comp)
+{
+ typedef range< Value_t * > range1_t;
+ typedef typename iterator_traits< Iter_t >::value_type type1;
+ static_assert (std::is_same< type1, Value_t >::value,
+ "Incompatible iterators\n");
+
+ v_output.clear ( );
+ if (v_input.size ( ) == 0) return;
+ if (v_input.size ( ) == 1) {
+ v_output.emplace_back (uninit_move (dest, v_input[0]));
+ return;
+ };
+
+ uint32_t nrange = v_input.size ( );
+ uint32_t pos_ini = 0;
+ while (pos_ini < v_input.size ( )) {
+ uint32_t nmerge = (nrange + 3) >> 2;
+ uint32_t nelem = (nrange + nmerge - 1) / nmerge;
+ range1_t rz = uninit_full_merge4 (dest, &v_input[pos_ini], nelem, comp);
+ v_output.emplace_back (rz);
+ dest.first = rz.last;
+ pos_ini += nelem;
+ nrange -= nelem;
+ };
+ return;
+};
+//
+//-----------------------------------------------------------------------------
+// function : merge_vector4
+/// @brief merge the ranges in the vector v_input using the merge_level4
+/// function. The v_output vector is used as auxiliary memory in the
+/// internal process
+/// The final results is in the range_output range.
+/// All the ranges of v_output are inside the range range_output
+/// All the ranges of v_input are inside the range range_input
+/// @param range_input : range including all the ranges of v_input
+/// @param ange_output : range including all the elements of v_output
+/// @param v_input : vector of ranges to merge
+/// @param v_output : vector of ranges obtained
+/// @param comp : comparison object
+/// @return range with all the elements moved
+//-----------------------------------------------------------------------------
+template < class Iter1_t, class Iter2_t, class Compare >
+range< Iter2_t >
+ merge_vector4 (range< Iter1_t > range_input, range< Iter2_t > range_output,
+ std::vector< range< Iter1_t > > &v_input,
+ std::vector< range< Iter2_t > > &v_output, Compare comp)
+{
+ typedef range< Iter2_t > range2_t;
+ typedef typename iterator_traits< Iter1_t >::value_type type1;
+ typedef typename iterator_traits< Iter2_t >::value_type type2;
+ static_assert (std::is_same< type1, type2 >::value,
+ "Incompatible iterators\n");
+
+ v_output.clear ( );
+ if (v_input.size ( ) == 0) {
+ return range2_t (range_output.first, range_output.first);
+ };
+ if (v_input.size ( ) == 1) {
+ return init_move (range_output, v_input[0]);
+ };
+ bool sw = false;
+ uint32_t nrange = v_input.size ( );
+
+ while (nrange > 1) {
+ if (sw) {
+ merge_level4 (range_input, v_output, v_input, comp);
+ sw = false;
+ nrange = v_input.size ( );
+ }
+ else
+ {
+ merge_level4 (range_output, v_input, v_output, comp);
+ sw = true;
+ nrange = v_output.size ( );
+ };
+ };
+ return (sw) ? v_output[0] : init_move (range_output, v_input[0]);
+};
+
+//****************************************************************************
+}; // End namespace util
+}; // End namespace detail
+}; // End namespace parallel
+}; // End namespace sort
+}; // End namespace boost
+//****************************************************************************
+//
+#endif
diff --git a/include/boost/sort/parallel/detail/util/nbits.hpp b/include/boost/sort/parallel/detail/util/nbits.hpp
new file mode 100755
index 0000000..e1c50ea
--- /dev/null
+++ b/include/boost/sort/parallel/detail/util/nbits.hpp
@@ -0,0 +1,91 @@
+//----------------------------------------------------------------------------
+/// @file algorithm.hpp
+/// @brief This file contains the description of several low level algorithms
+///
+/// @author Copyright (c) 2010 2015 Francisco José Tapia (fjtapia@gmail.com )\n
+/// Distributed under the Boost Software License, Version 1.0.\n
+/// ( See accompanying file LICENSE_1_0.txt or copy at
+/// http://www.boost.org/LICENSE_1_0.txt )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_UTIL_NBITS_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_UTIL_NBITS_HPP
+
+#include
+
+namespace boost
+{
+namespace sort
+{
+namespace parallel
+{
+namespace detail
+{
+namespace util
+{
+//
+//##########################################################################
+// ##
+// G L O B A L V A R I B L E S ##
+// ##
+//##########################################################################
+//
+// this array represent the number of bits needed for to represent the
+// first 256 numbers
+static constexpr const uint32_t tmsb[256] = {
+ 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8};
+//
+//##########################################################################
+// ##
+// I N L I N E F U N C T I O N S ##
+// ##
+//##########################################################################
+//
+//---------------------------------------------------------------------------
+// function : nbits32
+/// @brief Obtain the number of bits of a number equal or greater than num
+/// @param num : Number to examine
+/// @return Number of bits
+//---------------------------------------------------------------------------
+static inline uint32_t nbits32 (uint32_t num) noexcept
+{
+ int Pos = (num & 0xffff0000U) ? 16 : 0;
+ if ((num >> Pos) & 0xff00U) Pos += 8;
+ return (tmsb[num >> Pos] + Pos);
+}
+//
+//---------------------------------------------------------------------------
+// function : nbits64
+/// @brief Obtain the number of bits of a number equal or greater than num
+/// @param num : Number to examine
+/// @exception none
+/// @return Number of bits
+//---------------------------------------------------------------------------
+static inline uint32_t nbits64 (uint64_t num)
+{
+ uint32_t Pos = (num & 0xffffffff00000000ULL) ? 32 : 0;
+ if ((num >> Pos) & 0xffff0000ULL) Pos += 16;
+ if ((num >> Pos) & 0xff00ULL) Pos += 8;
+ return (tmsb[num >> Pos] + Pos);
+}
+
+//****************************************************************************
+}; // End namespace util
+}; // End namespace detail
+}; // End namespace parallel
+}; // End namespace sort
+}; // End namespace boost
+//****************************************************************************
+#endif
diff --git a/include/boost/sort/parallel/detail/util/range.hpp b/include/boost/sort/parallel/detail/util/range.hpp
new file mode 100755
index 0000000..ccd950a
--- /dev/null
+++ b/include/boost/sort/parallel/detail/util/range.hpp
@@ -0,0 +1,407 @@
+//----------------------------------------------------------------------------
+/// @file range.hpp
+/// @brief Define a range [first, last), and the associated operations
+///
+/// @author Copyright (c) 2016 Francisco José Tapia (fjtapia@gmail.com )\n
+/// Distributed under the Boost Software License, Version 1.0.\n
+/// ( See accompanyingfile LICENSE_1_0.txt or copy at
+/// http://www.boost.org/LICENSE_1_0.txt )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_UTIL_RANGE_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_UTIL_RANGE_HPP
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+namespace boost
+{
+namespace sort
+{
+namespace parallel
+{
+namespace detail
+{
+namespace util
+{
+//----------------------------------------------------------------------------
+// USING SENTENCES
+//----------------------------------------------------------------------------
+using std::iterator_traits;
+
+///---------------------------------------------------------------------------
+/// @struct range
+/// @brief this represent a range between two iterators
+/// @remarks
+//----------------------------------------------------------------------------
+template < class Iter_t >
+struct range
+{
+ Iter_t first, last;
+ //
+ //------------------------------------------------------------------------
+ // function : range
+ /// @brief empty constructor
+ //------------------------------------------------------------------------
+ range (void){};
+ //
+ //------------------------------------------------------------------------
+ // function : range
+ /// @brief constructor with two parameters
+ /// @param frs : iterator to the first element
+ /// @param lst : iterator to the last element
+ //-----------------------------------------------------------------------
+ range (Iter_t frs, Iter_t lst) : first (frs), last (lst){};
+ //
+ //-----------------------------------------------------------------------
+ // function : empty
+ /// @brief indicate if the range is empty
+ /// @return true : empty false : not empty
+ //-----------------------------------------------------------------------
+ bool empty (void) const { return (first == last); };
+ //
+ //-----------------------------------------------------------------------
+ // function : not_empty
+ /// @brief indicate if the range is not empty
+ /// @return true : not empty false : empty
+ //-----------------------------------------------------------------------
+ bool not_empty (void) const { return (first != last); };
+ //
+ //-----------------------------------------------------------------------
+ // function : valid
+ /// @brief Indicate if the range is well constructed, and valid
+ /// @return true : valid, false : not valid
+ //-----------------------------------------------------------------------
+ bool valid (void) const { return ((last - first) >= 0); };
+ //
+ //-----------------------------------------------------------------------
+ // function : size
+ /// @brief return the size of the range
+ /// @return size
+ //-----------------------------------------------------------------------
+ size_t size (void) const { return (last - first); };
+ //
+ //------------------------------------------------------------------------
+ // function : front
+ /// @brief return an iterator to the first element of the range
+ /// @return iterator
+ //-----------------------------------------------------------------------
+ Iter_t front (void) const { return first; };
+ //
+ //-------------------------------------------------------------------------
+ // function : back
+ /// @brief return an iterator to the last element of the range
+ /// @return iterator
+ //-------------------------------------------------------------------------
+ Iter_t back (void) const { return (last - 1); };
+};
+
+//
+//-----------------------------------------------------------------------------
+// function : concat
+/// @brief concatenate two contiguous ranges
+/// @param it1 : first range
+/// @param it2 : second range
+/// @return range resulting of the concatenation
+//-----------------------------------------------------------------------------
+template < class Iter_t >
+range< Iter_t > concat (const range< Iter_t > &it1, const range< Iter_t > &it2)
+{
+ return range< Iter_t > (it1.first, it2.last);
+};
+//
+//-----------------------------------------------------------------------------
+// function : init_move
+/// @brief Move initialized objets from the range src to dest
+/// @param dest : range where move the objects
+/// @param src : range from where move the objects
+/// @return range with the objects moved and the size adjusted
+//-----------------------------------------------------------------------------
+template < class Iter1_t, class Iter2_t >
+inline range< Iter2_t > init_move (const range< Iter2_t > &dest,
+ const range< Iter1_t > &src)
+{
+ typedef typename iterator_traits< Iter1_t >::value_type type1;
+ typedef typename iterator_traits< Iter2_t >::value_type type2;
+ static_assert (std::is_same< type1, type2 >::value,
+ "Incompatible iterators\n");
+
+ if (src.size ( ) == 0) {
+ return range< Iter2_t > (dest.first, dest.first);
+ };
+ init_move (dest.first, src.first, src.last);
+ return range< Iter2_t > (dest.first, dest.first + src.size ( ));
+};
+//-----------------------------------------------------------------------------
+// function : uninit_move
+/// @brief Move uninitialized objets from the range src creating them in dest
+///
+/// @param dest : range where move and create the objects
+/// @param src : range from where move the objects
+/// @return range with the objects moved and the size adjusted
+//-----------------------------------------------------------------------------
+template < class Iter1_t, class Iter2_t >
+inline range< Iter2_t > uninit_move (const range< Iter2_t > &dest,
+ const range< Iter1_t > &src)
+{
+ typedef typename iterator_traits< Iter1_t >::value_type type1;
+ typedef typename iterator_traits< Iter2_t >::value_type type2;
+ static_assert (std::is_same< type1, type2 >::value,
+ "Incompatible iterators\n");
+
+ if (src.size ( ) == 0) {
+ return range< Iter2_t > (dest.first, dest.first);
+ };
+ uninit_move (dest.first, src.first, src.last);
+ return range< Iter2_t > (dest.first, dest.first + src.size ( ));
+};
+//
+//-----------------------------------------------------------------------------
+// function : destroy
+/// @brief destroy a range of objects
+/// @param rng : range to destroy
+//-----------------------------------------------------------------------------
+template < class Iter_t >
+inline void destroy (range< Iter_t > rng)
+{
+ detail::util::destroy (rng.first, rng.last);
+};
+//
+//-----------------------------------------------------------------------------
+// function : init
+/// @brief initialize a range of objects with the object val moving across them
+/// @param rng : range of elements not initialized
+/// @param val : object used for the initialization
+/// @return range initialized
+//-----------------------------------------------------------------------------
+template < class Iter_t >
+inline range< Iter_t >
+ init (const range< Iter_t > &rng,
+ typename iterator_traits< Iter_t >::value_type &val)
+{
+ init (rng.first, rng.last, val);
+ return rng;
+};
+//
+//-----------------------------------------------------------------------------
+// function : is_mergeable
+/// @brief : indicate if two ranges have a possible merge
+/// @param src1 : first range
+/// @param src2 : second range
+/// @param comp : object for to compare elements
+/// @return true : they can be merged
+/// false : they can't be merged
+//-----------------------------------------------------------------------------
+template < class Iter1_t, class Iter2_t, class Compare >
+inline bool is_mergeable (const range< Iter1_t > &src1,
+ const range< Iter2_t > &src2, Compare comp)
+{
+ typedef typename iterator_traits< Iter1_t >::value_type type1;
+ typedef typename iterator_traits< Iter2_t >::value_type type2;
+ static_assert (std::is_same< type1, type2 >::value,
+ "Incompatible iterators\n");
+
+ return comp (*(src2.front ( )), *(src1.back ( )));
+};
+//
+//-----------------------------------------------------------------------------
+// function : full_merge
+/// @brief Merge two contiguous ranges src1 and src2, and put the result in
+/// the range dest, returning the range merged
+///
+/// @param dest : range where locate the lements merged. the size of dest
+/// must be greater or equal than the sum of the sizes of
+/// src1 and src2
+/// @param src1 : first range to merge
+/// @param src2 : second range to merge
+/// @param comp : comparison object
+/// @return range with the elements merged and the size adjusted
+//-----------------------------------------------------------------------------
+template < class Iter1_t, class Iter2_t, class Iter3_t, class Compare >
+inline range< Iter3_t > full_merge (const range< Iter3_t > &dest,
+ const range< Iter1_t > &src1,
+ const range< Iter2_t > &src2, Compare comp)
+{
+ typedef typename iterator_traits< Iter1_t >::value_type type1;
+ typedef typename iterator_traits< Iter2_t >::value_type type2;
+ typedef typename iterator_traits< Iter3_t >::value_type type3;
+ static_assert (std::is_same< type1, type2 >::value,
+ "Incompatible iterators\n");
+ static_assert (std::is_same< type3, type2 >::value,
+ "Incompatible iterators\n");
+
+ return range< Iter3_t > (dest.first,
+ full_merge (src1.first, src1.last, src2.first,
+ src2.last, dest.first, comp));
+};
+
+//-----------------------------------------------------------------------------
+// function : uninit_full_merge
+/// @brief Merge two contiguous uninitialized ranges src1 and src2, and create
+/// and move the result in the uninitialized range dest, returning the
+/// range merged
+//
+/// @param dest : range where locate the elements merged. the size of dest
+/// must be greater or equal than the sum of the sizes of
+/// src1 and src2. Initially is uninitialize memory
+/// @param src1 : first range to merge
+/// @param src2 : second range to merge
+/// @param comp : comparison object
+/// @return range with the elements merged and the size adjusted
+//-----------------------------------------------------------------------------
+template < class Iter1_t, class Iter2_t, class Value_t, class Compare >
+inline range< Value_t * > uninit_full_merge (const range< Value_t * > &dest,
+ const range< Iter1_t > &src1,
+ const range< Iter2_t > &src2,
+ Compare comp)
+{
+ typedef typename iterator_traits< Iter1_t >::value_type type1;
+ typedef typename iterator_traits< Iter2_t >::value_type type2;
+ static_assert (std::is_same< type1, type2 >::value,
+ "Incompatible iterators\n");
+ static_assert (std::is_same< Value_t, type2 >::value,
+ "Incompatible iterators\n");
+
+ return range< Value_t * > (
+ dest.first, uninit_full_merge (src1.first, src1.last, src2.first,
+ src2.last, dest.first, comp));
+};
+//
+//---------------------------------------------------------------------------
+// function : half_merge
+/// @brief : Merge two initialized buffers. The first buffer is in a separate
+/// memory
+//
+/// @param dest : range where finish the two buffers merged
+/// @param src1 : first range to merge in a separate memory
+/// @param src2 : second range to merge, in the final part of the
+/// range where deposit the final results
+/// @param comp : object for compare two elements of the type pointed
+/// by the Iter1_t and Iter2_t
+/// @return : range with the two buffers merged
+//---------------------------------------------------------------------------
+template < class Iter1_t, class Iter2_t, class Compare >
+inline range< Iter2_t > half_merge (const range< Iter2_t > &dest,
+ const range< Iter1_t > &src1,
+ const range< Iter2_t > &src2, Compare comp)
+{
+ typedef typename iterator_traits< Iter1_t >::value_type type1;
+ typedef typename iterator_traits< Iter2_t >::value_type type2;
+ static_assert (std::is_same< type1, type2 >::value,
+ "Incompatible iterators\n");
+
+ return range< Iter2_t > (dest.first,
+ half_merge (src1.first, src1.last, src2.first,
+ src2.last, dest.first, comp));
+};
+//
+//-----------------------------------------------------------------------------
+// function : in_place_merge_uncontiguous
+/// @brief : merge two non contiguous ranges src1, src2, using the range
+/// aux as auxiliary memory. The results are in the original ranges
+//
+/// @param src1 : first range to merge
+/// @param src2 : second range to merge
+/// @param aux : auxiliary range used in the merge
+/// @param comp : object for to compare elements
+/// @return true : not changes done, false : changes in the buffers
+//-----------------------------------------------------------------------------
+template < class Iter1_t, class Iter2_t, class Iter3_t, class Compare >
+inline bool in_place_merge_uncontiguous (const range< Iter1_t > &src1,
+ const range< Iter2_t > &src2,
+ const range< Iter3_t > &aux,
+ Compare comp)
+{
+ typedef typename iterator_traits< Iter1_t >::value_type type1;
+ typedef typename iterator_traits< Iter2_t >::value_type type2;
+ typedef typename iterator_traits< Iter3_t >::value_type type3;
+
+ static_assert (std::is_same< type1, type2 >::value,
+ "Incompatible iterators\n");
+ static_assert (std::is_same< type3, type2 >::value,
+ "Incompatible iterators\n");
+
+ return in_place_merge_uncontiguous (src1.first, src1.last, src2.first,
+ src2.last, aux.first, comp);
+};
+//
+//-----------------------------------------------------------------------------
+// function : in_place_merge
+/// @brief : merge two contiguous ranges ( src1, src2) using buf as
+/// auxiliary memory. The results are in the same ranges
+/// @param src1 : first range to merge
+/// @param src1 : second range to merge
+/// @param buf : auxiliary memory used in the merge
+/// @param comp : object for to compare elements
+/// @return true : not changes done, false : changes in the buffers
+//-----------------------------------------------------------------------------
+template < class Iter1_t, class Iter2_t, class Compare >
+inline range< Iter1_t >
+ in_place_merge (const range< Iter1_t > &src1, const range< Iter1_t > &src2,
+ const range< Iter2_t > &buf, Compare comp)
+{
+ typedef typename iterator_traits< Iter1_t >::value_type type1;
+ typedef typename iterator_traits< Iter2_t >::value_type type2;
+ static_assert (std::is_same< type1, type2 >::value,
+ "Incompatible iterators\n");
+
+ in_place_merge (src1.first, src1.last, src2.last, buf.first, comp);
+ return concat (src1, src2);
+};
+
+//
+//-----------------------------------------------------------------------------
+// function : merge_flow
+/// @brief : merge two ranges, as part of a merge the ranges in a list. This
+/// function reduce the number of movements compared with inplace_merge
+/// when you need to merge a sequence of ranges.
+/// This function merge the ranges rbuf and rng2, and the results
+/// are in rng1 and rbuf
+//
+/// @param rng1 : range where locate the first elements of the merge
+/// @param rbuf : range which provide the first elements, and where store
+/// the last results of the merge
+/// @param rng2 : range which provide the last elements to merge
+/// @param comp : object for to compare elements
+/// @return true : not changes done, false : changes in the buffers
+//-----------------------------------------------------------------------------
+template < class Iter1_t, class Iter2_t, class Compare >
+void merge_flow (range< Iter1_t > rng1, range< Iter2_t > rbuf,
+ range< Iter1_t > rng2, Compare cmp)
+{
+ typedef typename iterator_traits< Iter1_t >::value_type type1;
+ typedef typename iterator_traits< Iter2_t >::value_type type2;
+ static_assert (std::is_same< type1, type2 >::value,
+ "Incompatible iterators\n");
+
+ range< Iter2_t > rbx (rbuf);
+ range< Iter1_t > rx1 (rng1), rx2 (rng2);
+ assert (rbx.size ( ) == rx1.size ( ) and rx1.size ( ) == rx2.size ( ));
+ while (rx1.first != rx1.last) {
+ *(rx1.first++) = (cmp (*rbx.first, *rx2.first))
+ ? std::move (*(rbx.first++))
+ : std::move (*(rx2.first++));
+ };
+ if (rx2.first == rx2.last) return;
+ if (rbx.first == rbx.last)
+ util::init_move (rbuf, rng2);
+ else
+ util::half_merge (rbuf, rx2, rbx, cmp);
+};
+
+//****************************************************************************
+}; // End namespace util
+}; // End namespace detail
+}; // End namespace parallel
+}; // End namespace sort
+}; // End namespace boost
+//****************************************************************************
+//
+#endif
diff --git a/include/boost/sort/parallel/detail/util/spinlock.hpp b/include/boost/sort/parallel/detail/util/spinlock.hpp
new file mode 100755
index 0000000..ac8ca7f
--- /dev/null
+++ b/include/boost/sort/parallel/detail/util/spinlock.hpp
@@ -0,0 +1,96 @@
+//----------------------------------------------------------------------------
+/// @file spinlock_t.hpp
+/// @brief
+///
+/// @author Copyright (c) 2010 2015 Francisco José Tapia (fjtapia@gmail.com )\n
+/// Distributed under the Boost Software License, Version 1.0.\n
+/// ( See accompanyingfile LICENSE_1_0.txt or copy at
+/// http://www.boost.org/LICENSE_1_0.txt )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_UTIL_SPINLOCK_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_UTIL_SPINLOCK_HPP
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+namespace boost
+{
+namespace sort
+{
+namespace parallel
+{
+namespace detail
+{
+namespace util
+{
+//
+//---------------------------------------------------------------------------
+/// @class spinlock_t
+/// @brief This class implement, from atomic variables, a spinlock
+/// @remarks This class meet the BasicLockable requirements ( lock, unlock )
+//---------------------------------------------------------------------------
+class spinlock_t
+{
+ private:
+ //------------------------------------------------------------------------
+ // P R I V A T E V A R I A B L E S
+ //------------------------------------------------------------------------
+ std::atomic_flag af;
+
+ public:
+ //
+ //-------------------------------------------------------------------------
+ // function : spinlock_t
+ /// @brief class constructor
+ /// @param [in]
+ //-------------------------------------------------------------------------
+ explicit spinlock_t ( ) noexcept { af.clear ( ); };
+ //
+ //-------------------------------------------------------------------------
+ // function : lock
+ /// @brief Lock the spinlock_t
+ //-------------------------------------------------------------------------
+ void lock ( ) noexcept
+ {
+ if (af.test_and_set (std::memory_order_acquire)) {
+ std::this_thread::yield ( );
+ while (af.test_and_set (std::memory_order_relaxed)) {
+ std::this_thread::yield ( );
+ };
+ };
+ };
+ //
+ //-------------------------------------------------------------------------
+ // function : try_lock
+ /// @brief Try to lock the spinlock_t, if not, return false
+ /// @return true : locked
+ /// false: not previous locked
+ //-------------------------------------------------------------------------
+ bool try_lock ( ) noexcept
+ {
+ return not af.test_and_set (std::memory_order_acquire);
+ };
+ //
+ //-------------------------------------------------------------------------
+ // function : unlock
+ /// @brief unlock the spinlock_t
+ //-------------------------------------------------------------------------
+ void unlock ( ) noexcept { af.clear (std::memory_order_release); };
+
+}; // E N D C L A S S S P I N L O C K
+//
+//***************************************************************************
+}; // end namespace util
+}; // end namespace detail
+}; // end namespace parallel
+}; // end namespace sort
+}; // end namespace boost
+//***************************************************************************
+#endif
diff --git a/include/boost/sort/parallel/detail/util/stack_cnc.hpp b/include/boost/sort/parallel/detail/util/stack_cnc.hpp
new file mode 100755
index 0000000..e262a9c
--- /dev/null
+++ b/include/boost/sort/parallel/detail/util/stack_cnc.hpp
@@ -0,0 +1,131 @@
+//----------------------------------------------------------------------------
+/// @file stack_cnc.hpp
+/// @brief This file contains the implementation concurrent stack
+///
+/// @author Copyright (c) 2010 2015 Francisco José Tapia (fjtapia@gmail.com )\n
+/// Distributed under the Boost Software License, Version 1.0.\n
+/// ( See accompanyingfile LICENSE_1_0.txt or copy at
+/// http://www.boost.org/LICENSE_1_0.txt )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_DETAIL_UTIL_stack_cnc_t_HPP
+#define __BOOST_SORT_PARALLEL_DETAIL_UTIL_stack_cnc_t_HPP
+
+#include
+#include
+
+namespace boost
+{
+namespace sort
+{
+namespace parallel
+{
+namespace detail
+{
+namespace util
+{
+//
+//###########################################################################
+// ##
+// ################################################################ ##
+// # # ##
+// # C L A S S # ##
+// # S T A C K _ C N C # ##
+// # # ##
+// ################################################################ ##
+// ##
+//###########################################################################
+//
+//---------------------------------------------------------------------------
+/// @class stack_cnc
+/// @brief This class is a concurrent stack controled by a spin_lock
+/// @remarks
+//---------------------------------------------------------------------------
+template < typename T, typename Allocator = std::allocator< T > >
+class stack_cnc
+{
+ public:
+ //------------------------------------------------------------------------
+ // D E F I N I T I O N S
+ //------------------------------------------------------------------------
+ typedef std::vector< T, Allocator > vector_t;
+ typedef typename vector_t::size_type size_type;
+ typedef typename vector_t::difference_type difference_type;
+ typedef typename vector_t::value_type value_type;
+ typedef typename vector_t::pointer pointer;
+ typedef typename vector_t::const_pointer const_pointer;
+ typedef typename vector_t::reference reference;
+ typedef typename vector_t::const_reference const_reference;
+ typedef typename vector_t::allocator_type allocator_type;
+ typedef Allocator alloc_t;
+
+ protected:
+ //-------------------------------------------------------------------------
+ // INTERNAL VARIABLES
+ //-------------------------------------------------------------------------
+ vector_t v_t;
+ mutable spinlock_t spl;
+
+ public:
+ //
+ //-------------------------------------------------------------------------
+ // function : stack_cnc
+ /// @brief constructor
+ //-------------------------------------------------------------------------
+ explicit stack_cnc (void) : v_t ( ){};
+
+ //
+ //-------------------------------------------------------------------------
+ // function : stack_cnc
+ /// @brief Move constructor
+ //-------------------------------------------------------------------------
+ stack_cnc (stack_cnc &&) = delete;
+ //
+ //-------------------------------------------------------------------------
+ // function : ~stack_cnc
+ /// @brief Destructor
+ //-------------------------------------------------------------------------
+ virtual ~stack_cnc (void) { v_t.clear ( ); };
+
+ //-------------------------------------------------------------------------
+ // function : emplace_back
+ /// @brief Insert one element in the back of the container
+ /// @param args : group of arguments for to build the object to insert. Can
+ /// be values, references or rvalues
+ //-------------------------------------------------------------------------
+ template < class... Args >
+ void emplace_back (Args &&... args)
+ {
+ std::lock_guard< spinlock_t > guard (spl);
+ v_t.emplace_back (std::forward< Args > (args)...);
+ };
+
+ //
+ //-------------------------------------------------------------------------
+ // function :pop_move_back
+ /// @brief if exist, move the last element to P, and delete it
+ /// @param P : reference to a variable where move the element
+ /// @return true - Element moved and deleted
+ /// false - Empty stack_cnc
+ //-------------------------------------------------------------------------
+ bool pop_move_back (value_type &P)
+ {
+ std::lock_guard< spinlock_t > S (spl);
+ if (v_t.size ( ) == 0) return false;
+ P = std::move (v_t.back ( ));
+ v_t.pop_back ( );
+ return true;
+ };
+
+}; // end class stack_cnc
+
+//***************************************************************************
+}; // end namespace util
+}; // end namespace detail
+}; // end namespace parallel
+}; // end namespace sort
+}; // end namespace boost
+//***************************************************************************
+#endif
diff --git a/include/boost/sort/parallel/sort.hpp b/include/boost/sort/parallel/sort.hpp
new file mode 100755
index 0000000..332e782
--- /dev/null
+++ b/include/boost/sort/parallel/sort.hpp
@@ -0,0 +1,349 @@
+//----------------------------------------------------------------------------
+/// @file sort.hpp
+/// @brief This file contains the sort functions of the sort library
+///
+/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
+/// Distributed under the Boost Software License, Version 1.0.\n
+/// ( See accompanying file LICENSE_1_0.txt or copy at
+/// http://www.boost.org/LICENSE_1_0.txt )
+/// @version 0.1
+///
+/// @remarks
+//-----------------------------------------------------------------------------
+#ifndef __BOOST_SORT_PARALLEL_SORT_HPP
+#define __BOOST_SORT_PARALLEL_SORT_HPP
+
+#include
+#include
+#include
+#include
+
+namespace boost
+{
+namespace sort
+{
+namespace parallel
+{
+//
+//****************************************************************************
+// USING AND DEFINITIONS
+//****************************************************************************
+using std::iterator_traits;
+using detail::less_ptr_no_null;
+using detail::util::compare_iter;
+using detail::util::enable_if_not_integral;
+//
+//############################################################################
+// ##
+// S O R T , I N D I R E C T _ S O R T ##
+// ##
+//############################################################################
+//
+//-----------------------------------------------------------------------------
+// function : sort
+/// @brief non stable sort, based internally in the intro_sort algorithm
+///
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+/// @param comp : object for to compare two elements pointed by Iter_t
+/// iterators
+//-----------------------------------------------------------------------------
+template < class Iter_t, typename Compare = compare_iter< Iter_t > >
+void sort (Iter_t first, Iter_t last, Compare comp = Compare ( ))
+{
+ detail::intro_sort (first, last, comp);
+};
+//
+//-----------------------------------------------------------------------------
+// function : indirect_sort
+/// @brief : indirect sort. This function sort a vector of iterators to the
+/// elements. And after, move the elements, for to be phisically
+/// sorted. This is useful with big objects
+///
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+/// @param comp : object for to compare two elements pointed by Iter_t
+/// iterators
+//-----------------------------------------------------------------------------
+template < class Iter_t, typename Compare = compare_iter< Iter_t > >
+void indirect_sort (Iter_t first, Iter_t last, Compare comp = Compare ( ))
+{
+ typedef less_ptr_no_null< Iter_t, Compare > compare_ptr;
+
+ std::vector< Iter_t > v_iter;
+ detail::create_index (first, last, v_iter);
+ detail::intro_sort (v_iter.begin ( ), v_iter.end ( ), compare_ptr (comp));
+ detail::sort_index (first, v_iter);
+};
+//
+//############################################################################
+// ##
+// ##
+// P A R A L L E L _ S O R T ##
+// ##
+// ##
+//############################################################################
+//
+//-----------------------------------------------------------------------------
+// function : parallel_sort
+/// @brief this function implement a non stable parallel sort. The number of
+/// threads to use is the HW threads of the machine
+///
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+//-----------------------------------------------------------------------------
+template < class Iter_t >
+void parallel_sort (Iter_t first, Iter_t last)
+{
+ typedef compare_iter< Iter_t > Compare;
+ detail::select_block_indirect (first, last, Compare());
+};
+//
+//-----------------------------------------------------------------------------
+// function : parallel_sort
+/// @brief this function implement a non stable parallel sort.
+///
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+/// @param nthread : Number of threads to use in the process. When this value
+/// is lower than 2, the sorting is done with 1 thread
+//-----------------------------------------------------------------------------
+template < class Iter_t >
+void parallel_sort (Iter_t first, Iter_t last, uint32_t nthread)
+{
+ typedef compare_iter< Iter_t > Compare;
+ detail::select_block_indirect (first, last, Compare(), nthread);
+};
+//-----------------------------------------------------------------------------
+// function : parallel_sort
+/// @brief this function implement a non stable parallel sort.
+///
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+/// @param comp : object for to compare two elements pointed by Iter_t
+/// iterators
+/// @param nthread : Number of threads to use in the process. When this value
+/// is lower than 2, the sorting is done with 1 thread
+//-----------------------------------------------------------------------------
+template < class Iter_t, class Compare >
+void parallel_sort (Iter_t first, Iter_t last, Compare comp, uint32_t nthread)
+{
+ detail::select_block_indirect (first, last, comp, nthread);
+};
+//
+//-----------------------------------------------------------------------------
+// function : parallel_sort
+/// @brief : non stable parallel sort.
+///
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+/// @param comp : object for to compare two elements pointed by Iter_t
+/// iterators
+//-----------------------------------------------------------------------------
+template < class Iter_t, class Compare,
+ enable_if_not_integral< Compare > * = nullptr >
+void parallel_sort (Iter_t first, Iter_t last, Compare comp)
+{
+ detail::select_block_indirect (first, last, comp);
+};
+
+//
+//############################################################################
+// ##
+// ##
+// S T A B L E _ S O R T , I N D I R E C T _ S T A B L E _ S O R T ##
+// ##
+// ##
+//############################################################################
+//
+//-----------------------------------------------------------------------------
+// function : stable_sort
+/// @brief this function implement a single thread stable sort
+///
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+/// @param comp : object for to compare two elements pointed by Iter_t
+/// iterators
+//-----------------------------------------------------------------------------
+template < class Iter_t, class Compare = compare_iter< Iter_t > >
+void stable_sort (Iter_t first, Iter_t last, Compare comp = Compare ( ))
+{
+ detail::spin_sort< Iter_t, Compare > (first, last, comp);
+};
+//
+//-----------------------------------------------------------------------------
+// function : indirect_stable_sort
+/// @brief : indirect stable sort. This function make a stable sort of a vector
+/// of iterators to the elements. And after, move the elements, for
+/// to be phisically sorted. This is useful with big objects
+///
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+/// @param comp : object for to compare two elements pointed by Iter_t
+/// iterators
+//-----------------------------------------------------------------------------
+template < class Iter_t, class Compare = compare_iter< Iter_t > >
+void indirect_stable_sort (Iter_t first, Iter_t last,
+ Compare comp = Compare ( ))
+{
+ typedef less_ptr_no_null< Iter_t, Compare > compare_ptr;
+ typedef typename std::vector< Iter_t >::iterator iter_ptr;
+
+ std::vector< Iter_t > v_iter;
+ detail::create_index (first, last, v_iter);
+ detail::spin_sort< iter_ptr, compare_ptr >
+ (v_iter.begin ( ), v_iter.end ( ), compare_ptr (comp));
+ detail::sort_index (first, v_iter);
+};
+//
+//############################################################################
+// ##
+// ##
+// P A R A L L E L _ S T A B L E _ S O R T ##
+// ##
+// ##
+//############################################################################
+//
+//-----------------------------------------------------------------------------
+// function : parallel_stable_sort
+/// @brief : parallel stable sort algorithm.
+///
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+//-----------------------------------------------------------------------------
+template < class Iter_t >
+void parallel_stable_sort (Iter_t first, Iter_t last)
+{
+ typedef compare_iter< Iter_t > Compare;
+ detail::parallel_stable_sort< Iter_t, Compare > (first, last);
+};
+//
+//-----------------------------------------------------------------------------
+// function : parallel_stable_sort
+/// @brief parallel stable sort.
+///
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+/// @param nthread : Number of threads to use in the process. When this value
+/// is lower than 2, the sorting is done with 1 thread
+//-----------------------------------------------------------------------------
+template < class Iter_t >
+void parallel_stable_sort (Iter_t first, Iter_t last, uint32_t nthread)
+{
+ typedef compare_iter< Iter_t > Compare;
+ detail::parallel_stable_sort< Iter_t, Compare > (first, last, nthread);
+};
+//
+//-----------------------------------------------------------------------------
+// function : parallel_stable_sort
+/// @brief : parallel stable sort.
+///
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+/// @param comp : object for to compare two elements pointed by Iter_t
+/// iterators
+//-----------------------------------------------------------------------------
+template < class Iter_t, class Compare,
+ enable_if_not_integral< Compare > * = nullptr >
+void parallel_stable_sort (Iter_t first, Iter_t last, Compare comp)
+{
+ detail::parallel_stable_sort< Iter_t, Compare > (first, last, comp);
+};
+//
+//-----------------------------------------------------------------------------
+// function : parallel_stable_sort
+/// @brief parallel stable sort
+///
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+/// @param comp : object for to compare two elements pointed by Iter_t
+/// iterators
+/// @param nthread : Number of threads to use in the process. When this value
+/// is lower than 2, the sorting is done with 1 thread
+//-----------------------------------------------------------------------------
+template < class Iter_t, typename Compare >
+void parallel_stable_sort (Iter_t first, Iter_t last, Compare comp,
+ uint32_t nthread)
+{
+ detail::parallel_stable_sort< Iter_t, Compare > (first, last, comp,
+ nthread);
+};
+//
+//############################################################################
+// ##
+// ##
+// S A M P L E _ S O R T ##
+// ##
+// ##
+//############################################################################
+//
+//-----------------------------------------------------------------------------
+// function : sample_sort
+/// @brief parallel sample sort algorithm (stable sort)
+///
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+//-----------------------------------------------------------------------------
+template < class Iter_t >
+void sample_sort (Iter_t first, Iter_t last)
+{
+ typedef compare_iter< Iter_t > Compare;
+ detail::sample_sort< Iter_t, Compare > (first, last);
+};
+//
+//-----------------------------------------------------------------------------
+// function : sample_sort
+/// @brief parallel sample sort algorithm (stable sort)
+///
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+/// @param nthread : Number of threads to use in the process. When this value
+/// is lower than 2, the sorting is done with 1 thread
+//-----------------------------------------------------------------------------
+template < class Iter_t >
+void sample_sort (Iter_t first, Iter_t last, uint32_t nthread)
+{
+ typedef compare_iter< Iter_t > Compare;
+ detail::sample_sort< Iter_t, Compare > (first, last, nthread);
+};
+//
+//-----------------------------------------------------------------------------
+// function : sample_sort
+/// @brief parallel sample sort algorithm (stable sort)
+///
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+/// @param comp : object for to compare two elements pointed by Iter_t
+/// iterators
+//-----------------------------------------------------------------------------
+template < class Iter_t, class Compare,
+ enable_if_not_integral< Compare > * = nullptr >
+void sample_sort (Iter_t first, Iter_t last, Compare comp)
+{
+ detail::sample_sort< Iter_t, Compare > (first, last, comp);
+};
+//
+//-----------------------------------------------------------------------------
+// function : sample_sort
+/// @brief parallel sample sort algorithm (stable sort)
+///
+/// @param first : iterator to the first element of the range to sort
+/// @param last : iterator after the last element to the range to sort
+/// @param comp : object for to compare two elements pointed by Iter_t
+/// iterators
+/// @param nthread : Number of threads to use in the process. When this value
+/// is lower than 2, the sorting is done with 1 thread
+//-----------------------------------------------------------------------------
+template < class Iter_t, class Compare >
+void sample_sort (Iter_t first, Iter_t last, Compare comp, uint32_t nthread)
+{
+ detail::sample_sort< Iter_t, Compare > (first, last, comp, nthread);
+};
+//
+//****************************************************************************
+}; // End namespace parallel
+}; // End namespace sort
+}; // End namespace boost
+//****************************************************************************
+//
+#endif
diff --git a/include/counting_iterator.hpp b/include/counting_iterator.hpp
new file mode 100644
index 0000000..3562d11
--- /dev/null
+++ b/include/counting_iterator.hpp
@@ -0,0 +1,41 @@
+#pragma once
+
+#include
+
+namespace mrzv
+{
+
+class counting_iterator
+{
+ public:
+ typedef size_t value_type;
+ typedef const size_t& reference;
+ typedef const size_t* pointer;
+ typedef ptrdiff_t difference_type;
+ typedef std::random_access_iterator_tag iterator_category;
+
+ explicit counting_iterator(size_t x): m_inc(x) {}
+ size_t const& base() const { return m_inc; }
+ reference operator*() const { return m_inc; }
+ counting_iterator& operator++() { ++m_inc; return *this; }
+ counting_iterator& operator--() { --m_inc; return *this; }
+
+ counting_iterator& operator+=(difference_type i) { m_inc += i; return *this; }
+ counting_iterator& operator-=(difference_type i) { m_inc -= i; return *this; }
+
+ bool operator==(const counting_iterator& other) const { return m_inc == other.m_inc; }
+ bool operator!=(const counting_iterator& other) const { return m_inc != other.m_inc; }
+
+ counting_iterator operator+(difference_type x) const { counting_iterator result = *this; result += x; return result; }
+
+ bool operator<(const counting_iterator& other) const { return m_inc < other.m_inc; }
+
+ friend
+ difference_type operator-(const counting_iterator& x, const counting_iterator& y) { return x.m_inc - y.m_inc; }
+
+ // and loads of others
+ private:
+ size_t m_inc;
+};
+
+}
diff --git a/include/reclamation.hpp b/include/reclamation.hpp
new file mode 100644
index 0000000..27f3c04
--- /dev/null
+++ b/include/reclamation.hpp
@@ -0,0 +1,46 @@
+#pragma once
+
+#include
+#include
+
+namespace mrzv
+{
+
+template
+struct MemoryManager
+{
+ std::vector retiring_, retired_, to_delete_;
+ atomic_ref counter_;
+ unsigned n_threads_;
+ bool even_epoch_ = false;
+
+ MemoryManager(int& epoch_counter, unsigned n_threads):
+ counter_(epoch_counter), n_threads_(n_threads) {}
+
+ ~MemoryManager()
+ {
+ for(T* p : to_delete_)
+ delete p;
+ for(T* p : retired_)
+ delete p;
+ for(T* p : retiring_)
+ delete p;
+ }
+ bool is_even_epoch(int counter) const { return (counter / n_threads_) % 2 == 0; }
+ void retire(T* ptr) { if(ptr) retiring_.push_back(ptr); }
+ void quiescent()
+ {
+ if (even_epoch_ != is_even_epoch(counter_))
+ {
+ ++counter_;
+ even_epoch_ = !even_epoch_;
+ for(T* p : to_delete_)
+ delete p;
+ to_delete_.clear();
+ retired_.swap(to_delete_);
+ retiring_.swap(retired_);
+ }
+ }
+};
+
+}
diff --git a/include/trivial_concurrent_hash_map.hpp b/include/trivial_concurrent_hash_map.hpp
new file mode 100644
index 0000000..b3c05c6
--- /dev/null
+++ b/include/trivial_concurrent_hash_map.hpp
@@ -0,0 +1,128 @@
+#pragma once
+
+// Atuhor: Dmitriy Morozov
+// Version 2020-03-12
+
+#include
+#include
+
+#include
+
+namespace mrzv
+{
+
+template
+class trivial_concurrent_hash_map
+{
+ public:
+ using key_type = Key;
+ using mapped_type = T;
+ using value_type = std::pair;
+ using Storage = std::vector;
+ using iterator = typename Storage::iterator;
+
+ static constexpr size_t storage_multiplier = 8;
+
+ public:
+ void reserve(size_t hint)
+ {
+ size_t sz = 1;
+ while (sz < hint*storage_multiplier)
+ sz *= 2;
+ storage_.clear();
+ storage_.resize(sz, dummy());
+ }
+
+ inline iterator find(Key k);
+ inline std::pair
+ insert(value_type x);
+
+ // CAS update value
+ inline bool update(iterator it, T& expected, T desired)
+ { return atomic_ref(it->second).compare_exchange_weak(expected, desired); }
+
+ iterator end() { return storage_.end(); }
+
+ Key key(iterator it) { return atomic_ref(it->first).load(); }
+ T value(iterator it) { return atomic_ref(it->second).load(); }
+
+ template
+ void foreach(const F& f) const { for(auto& x : storage_) if (x.first != dummy_key()) f(x); }
+
+ public:
+ static Key dummy_key() { return static_cast(-1); }
+ static T dummy_value() { return static_cast(-1); }
+ static value_type dummy() { return value_type { dummy_key(), dummy_value() }; }
+
+ private:
+ static size_t hash(Key k) { return H()(k); }
+ static bool equal(Key k1, Key k2) { return E()(k1,k2); }
+
+ private:
+ Storage storage_;
+};
+
+}
+
+
+template
+typename mrzv::trivial_concurrent_hash_map::iterator
+mrzv::trivial_concurrent_hash_map::
+find(Key k)
+{
+ auto size = storage_.size();
+ auto idx = hash(k) % size;
+
+ while(true)
+ {
+ iterator it = storage_.begin() + idx;
+
+ atomic_ref ak(it->first);
+ Key kk = ak;
+
+ if (equal(kk, k)) {
+ while(value(it) == dummy_value()); // make sure a value has been written (possibly should add exponential backoff)
+ return it;
+ }
+ else if (equal(kk, dummy_key()))
+ return end();
+
+ idx = (idx + 1) % size;
+ }
+}
+
+template
+std::pair::iterator, bool>
+mrzv::trivial_concurrent_hash_map::
+insert(value_type x)
+{
+ Key k = x.first;
+
+ auto size = storage_.size();
+ auto idx = hash(k) % size;
+
+ while(true)
+ {
+ iterator it = storage_.begin() + idx;
+
+ atomic_ref ak(it->first);
+ Key kk = ak;
+
+ if (equal(kk, k))
+ return { it, false };
+ else if (equal(kk, dummy_key()))
+ {
+ Key dk = dummy_key();
+ if (ak.compare_exchange_weak(dk, k))
+ {
+ atomic_ref av(it->second);
+ av.store(x.second);
+ return { it, true };
+ } else if (dk == k) // somebody overwrote our own key, we lose
+ return { it, false };
+ // else something is written, but not our key, continue scanning
+ }
+
+ idx = (idx + 1) % size;
+ }
+}
diff --git a/ripser.cpp b/ripser.cpp
index cd546e8..0c82aa4 100644
--- a/ripser.cpp
+++ b/ripser.cpp
@@ -38,11 +38,25 @@
//#define USE_COEFFICIENTS
-//#define INDICATE_PROGRESS
-#define PRINT_PERSISTENCE_PAIRS
+#define INDICATE_PROGRESS
+//#define PRINT_PERSISTENCE_PAIRS
+
+//#define USE_SERIAL
+//#define USE_SHUFFLED_SERIAL
+
+#if defined(USE_SERIAL) || defined(USE_SHUFFLED_SERIAL)
+#define USING_SERIAL
+#define USE_SERIAL_ATOMIC_REF
+#endif
+
+#if !(defined USING_SERIAL)
+#define USE_TRIVIAL_CONCURRENT_HASHMAP
+#endif
//#define USE_GOOGLE_HASHMAP
+//#define USE_TBB_HASHMAP
+
#include
#include
#include
@@ -54,6 +68,30 @@
#include
#include
+#include
+#include
+#include
+#include
+
+#include
+
+#ifndef USING_SERIAL
+#include
+#endif
+
+#ifdef USE_SHUFFLED_SERIAL
+#include
+#endif
+
+#ifdef USE_PARALLEL_STL
+#include
+#endif
+
+#ifdef USE_TBB
+#include
+#include
+#endif
+
#ifdef USE_GOOGLE_HASHMAP
#include
template
@@ -62,9 +100,45 @@ class hash_map : public google::dense_hash_map {
explicit hash_map() : google::dense_hash_map() { this->set_empty_key(-1); }
inline void reserve(size_t hint) { this->resize(hint); }
};
+#elif defined(USE_TBB_HASHMAP)
+#include
+template
+class hash_map : public tbb::concurrent_unordered_map
+{
+ public:
+ using Parent = tbb::concurrent_unordered_map;
+ using iterator = typename Parent::iterator;
+
+ Key key(iterator it) const { return it->first; }
+ T value(iterator it) const { return it->second; }
+
+ bool update(iterator it, T& expected, T desired) { it->second = desired; return true; }
+
+ template
+ void foreach(const F& f) const { for(auto& x : (*this)) f(x); }
+
+ void reserve(size_t hint) {}
+};
+#elif defined(USE_TRIVIAL_CONCURRENT_HASHMAP)
+#include
+template
+class hash_map : public mrzv::trivial_concurrent_hash_map {};
#else
template
-class hash_map : public std::unordered_map {};
+class hash_map : public std::unordered_map
+{
+ public:
+ using Parent = std::unordered_map;
+ using iterator = typename Parent::iterator;
+
+ Key key(iterator it) const { return it->first; }
+ T value(iterator it) const { return it->second; }
+
+ bool update(iterator it, T& expected, T desired) { it->second = desired; return true; }
+
+ template
+ void foreach(const F& f) const { for(auto& x : (*this)) f(x); }
+};
#endif
typedef float value_t;
@@ -78,6 +152,7 @@ static const std::chrono::milliseconds time_step(40);
static const std::string clear_line("\r\033[K");
static const size_t num_coefficient_bits = 8;
+static const index_t coefficient_mask = (static_cast(1) << num_coefficient_bits) - 1;
static const index_t max_simplex_index =
(1l << (8 * sizeof(index_t) - 1 - num_coefficient_bits)) - 1;
@@ -134,26 +209,17 @@ std::vector multiplicative_inverse_vector(const coefficient_t m)
#ifdef USE_COEFFICIENTS
-struct __attribute__((packed)) entry_t {
- index_t index : 8 * sizeof(index_t) - num_coefficient_bits;
- coefficient_t coefficient : num_coefficient_bits;
- entry_t(index_t _index, coefficient_t _coefficient)
- : index(_index), coefficient(_coefficient) {}
- entry_t(index_t _index) : index(_index), coefficient(0) {}
- entry_t() : index(0), coefficient(0) {}
-};
-
-static_assert(sizeof(entry_t) == sizeof(index_t), "size of entry_t is not the same as index_t");
+typedef index_t entry_t;
-entry_t make_entry(index_t i, coefficient_t c) { return entry_t(i, c); }
-index_t get_index(const entry_t& e) { return e.index; }
-index_t get_coefficient(const entry_t& e) { return e.coefficient; }
-void set_coefficient(entry_t& e, const coefficient_t c) { e.coefficient = c; }
+entry_t make_entry(index_t i, coefficient_t c) { return (i << num_coefficient_bits) | c; }
+index_t get_index(const entry_t& e) { return (e >> num_coefficient_bits); }
+index_t get_coefficient(const entry_t& e) { return (e & coefficient_mask); }
+void set_coefficient(entry_t& e, const coefficient_t c) { e = (e & ~coefficient_mask) | c; }
-std::ostream& operator<<(std::ostream& stream, const entry_t& e) {
- stream << get_index(e) << ":" << get_coefficient(e);
- return stream;
-}
+//std::ostream& operator<<(std::ostream& stream, const entry_t& e) {
+// stream << get_index(e) << ":" << get_coefficient(e);
+// return stream;
+//}
#else
@@ -197,7 +263,7 @@ void set_coefficient(diameter_entry_t& p, const coefficient_t c) {
}
template struct greater_diameter_or_smaller_index {
- bool operator()(const Entry& a, const Entry& b) {
+ bool operator()(const Entry& a, const Entry& b) const {
return (get_diameter(a) > get_diameter(b)) ||
((get_diameter(a) == get_diameter(b)) && (get_index(a) < get_index(b)));
}
@@ -263,9 +329,6 @@ struct sparse_distance_matrix {
index_t num_edges;
- mutable std::vector::const_reverse_iterator> neighbor_it;
- mutable std::vector::const_reverse_iterator> neighbor_end;
-
sparse_distance_matrix(std::vector>&& _neighbors,
index_t _num_edges)
: neighbors(std::move(_neighbors)), num_edges(_num_edges) {}
@@ -338,29 +401,31 @@ template T begin(std::pair& p) { return p.first; }
template T end(std::pair& p) { return p.second; }
template class compressed_sparse_matrix {
- std::vector bounds;
- std::vector entries;
-
- typedef typename std::vector::iterator iterator;
- typedef std::pair iterator_pair;
+ std::vector*> columns;
public:
- size_t size() const { return bounds.size(); }
+ using Column = std::vector;
- iterator_pair subrange(const index_t index) {
- return {entries.begin() + (index == 0 ? 0 : bounds[index - 1]),
- entries.begin() + bounds[index]};
- }
+ compressed_sparse_matrix(size_t n):
+ columns(n, nullptr) {}
+ ~compressed_sparse_matrix() { for(Column* x : columns) delete x; }
- void append_column() { bounds.push_back(entries.size()); }
+ size_t size() const { return columns.size(); }
- void push_back(const ValueType e) {
- assert(0 < size());
- entries.push_back(e);
- ++bounds.back();
- }
+ Column* column(const index_t index)
+ { return mrzv::atomic_ref(columns[index]).load(); }
+
+ Column* exchange(const index_t index, Column* desired)
+ { return mrzv::atomic_ref(columns[index]).exchange(desired); }
+
+ void store(const index_t index, Column* desired)
+ { return mrzv::atomic_ref(columns[index]).store(desired); }
+
+ bool update(const index_t index, Column*& expected, Column* desired)
+ { return mrzv::atomic_ref(columns[index]).compare_exchange_weak(expected, desired); }
};
+
template
index_t get_max(index_t top, const index_t bottom, const Predicate pred) {
if (!pred(top)) {
@@ -383,9 +448,9 @@ template class ripser {
const value_t threshold;
const float ratio;
const coefficient_t modulus;
+ const unsigned num_threads;
const binomial_coeff_table binomial_coeff;
const std::vector multiplicative_inverse;
- mutable std::vector cofacet_entries;
struct entry_hash {
std::size_t operator()(const entry_t& e) const {
@@ -399,14 +464,26 @@ template class ripser {
}
};
- typedef hash_map entry_hash_map;
+ typedef hash_map entry_hash_map;
+
+ typedef compressed_sparse_matrix Matrix;
+ typedef typename Matrix::Column MatrixColumn;
+
+ typedef std::priority_queue,
+ greater_diameter_or_smaller_index> WorkingColumn;
public:
ripser(DistanceMatrix&& _dist, index_t _dim_max, value_t _threshold, float _ratio,
- coefficient_t _modulus)
+ coefficient_t _modulus, unsigned _num_threads)
: dist(std::move(_dist)), n(dist.size()),
dim_max(std::min(_dim_max, index_t(dist.size() - 2))), threshold(_threshold),
- ratio(_ratio), modulus(_modulus), binomial_coeff(n, dim_max + 2),
+ ratio(_ratio), modulus(_modulus),
+#ifndef USING_SERIAL
+ num_threads((_num_threads == 0 ? std::thread::hardware_concurrency() : _num_threads)),
+#else
+ num_threads(1),
+#endif
+ binomial_coeff(n, dim_max + 2),
multiplicative_inverse(multiplicative_inverse_vector(_modulus)) {}
index_t get_max_vertex(const index_t idx, const index_t k, const index_t n) const {
@@ -444,28 +521,93 @@ template class ripser {
columns_to_reduce.clear();
std::vector next_simplices;
- for (diameter_index_t& simplex : simplices) {
- simplex_coboundary_enumerator cofacets(diameter_entry_t(simplex, 1), dim, *this);
+ const size_t chunk_size = 1024;
+ std::atomic achunk {0};
+#ifdef INDICATE_PROGRESS
+ std::atomic