From 8f6f6e84e4d79741d52b36657a66cbac661ea58d Mon Sep 17 00:00:00 2001
From: "John F. Carr" <jfc@mit.edu>
Date: Wed, 17 Nov 2021 18:58:20 -0500
Subject: [PATCH] Version 2.0 release candidate 2

Co-authored-by: John F. Carr <jfc@mit.edu>
Co-authored-by: TB Schardl <neboat@mit.edu>
---
 CMakeLists.txt                  |    7 +-
 cmake/Modules/AddCheetah.cmake  |    4 +-
 cmake/base-config-ix.cmake      |    1 +
 cmake/config-ix.cmake           |    1 +
 include/CMakeLists.txt          |   19 +-
 include/cilk/cilk.h             |    2 +
 include/cilk/cilk_api.h         |   18 +-
 include/cilk/holder.h           | 1014 +--------
 include/cilk/hyperobject_base.h |   54 -
 include/cilk/metaprogramming.h  |  587 -----
 include/cilk/opadd_reducer.h    |   18 +
 include/cilk/ostream_reducer.h  |   65 +
 include/cilk/reducer.h          | 1866 ---------------
 include/cilk/reducer_file.h     |   48 -
 include/cilk/reducer_list.h     | 1146 ----------
 include/cilk/reducer_max.h      |   57 -
 include/cilk/reducer_min.h      |   57 -
 include/cilk/reducer_min_max.h  | 3743 -------------------------------
 include/cilk/reducer_opadd.h    |  702 ------
 include/cilk/reducer_opand.h    |  617 -----
 include/cilk/reducer_opmul.h    |  456 ----
 include/cilk/reducer_opor.h     |  612 -----
 include/cilk/reducer_opxor.h    |  611 -----
 include/cilk/reducer_ostream.h  |  496 ----
 include/cilk/reducer_string.h   |  763 -------
 include/cilk/reducer_vector.h   |  533 -----
 runtime/CMakeLists.txt          |   61 +-
 runtime/c_reducers.c            |    7 -
 runtime/cilk-internal.h         |  214 +-
 runtime/cilk2c.c                |   44 +-
 runtime/cilk2c.h                |    3 +-
 runtime/cilk2c_inlined.c        |  174 +-
 runtime/cilkred_map.c           |   71 +-
 runtime/cilkred_map.h           |   14 +-
 runtime/closure-type.h          |    3 +
 runtime/closure.h               |   25 +-
 runtime/fiber-pool.c            |   22 +-
 runtime/fiber.c                 |    8 +
 runtime/fiber.h                 |    3 +
 runtime/frame.h                 |  135 ++
 runtime/global.c                |   20 +-
 runtime/global.h                |   25 +-
 runtime/hyperobject_base.h      |   34 +
 runtime/hypertable.c            |  976 ++++++++
 runtime/hypertable.h            |  108 +
 runtime/init.c                  |  164 +-
 runtime/init.h                  |    3 +-
 runtime/internal-malloc.h       |    6 +
 runtime/local.h                 |    9 +-
 runtime/pedigree-internal.h     |   73 +
 runtime/pedigree_ext.c          |   49 +
 runtime/pedigree_globals.c      |   47 +-
 runtime/pedigree_lib.c          |   90 +
 runtime/personality.c           |   14 +-
 runtime/readydeque.h            |  135 +-
 runtime/reducer_api.c           |  113 +
 runtime/reducer_impl.c          |  167 +-
 runtime/rts-config.h            |   12 +-
 runtime/sched_stats.c           |   49 +-
 runtime/sched_stats.h           |    4 +
 runtime/scheduler.c             |  622 +++--
 runtime/worker.h                |   55 +
 runtime/worker_coord.h          |   73 +-
 runtime/worker_sleep.h          |  592 +++++
 64 files changed, 3290 insertions(+), 14431 deletions(-)
 delete mode 100644 include/cilk/hyperobject_base.h
 delete mode 100644 include/cilk/metaprogramming.h
 create mode 100644 include/cilk/opadd_reducer.h
 create mode 100644 include/cilk/ostream_reducer.h
 delete mode 100644 include/cilk/reducer.h
 delete mode 100644 include/cilk/reducer_file.h
 delete mode 100644 include/cilk/reducer_list.h
 delete mode 100644 include/cilk/reducer_max.h
 delete mode 100644 include/cilk/reducer_min.h
 delete mode 100644 include/cilk/reducer_min_max.h
 delete mode 100644 include/cilk/reducer_opadd.h
 delete mode 100644 include/cilk/reducer_opand.h
 delete mode 100644 include/cilk/reducer_opmul.h
 delete mode 100644 include/cilk/reducer_opor.h
 delete mode 100644 include/cilk/reducer_opxor.h
 delete mode 100644 include/cilk/reducer_ostream.h
 delete mode 100644 include/cilk/reducer_string.h
 delete mode 100644 include/cilk/reducer_vector.h
 delete mode 100644 runtime/c_reducers.c
 create mode 100644 runtime/frame.h
 create mode 100644 runtime/hyperobject_base.h
 create mode 100644 runtime/hypertable.c
 create mode 100644 runtime/hypertable.h
 create mode 100644 runtime/pedigree-internal.h
 create mode 100644 runtime/pedigree_ext.c
 create mode 100644 runtime/pedigree_lib.c
 create mode 100644 runtime/reducer_api.c
 create mode 100644 runtime/worker.h
 create mode 100644 runtime/worker_sleep.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index bacc1ad5..3469bbf2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -16,9 +16,9 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR CHEETAH_STANDALONE_BUI
   set_property(GLOBAL PROPERTY USE_FOLDERS ON)
 
   set(PACKAGE_NAME Cheetah)
-  set(PACKAGE_VERSION 12.0.0)
+  set(PACKAGE_VERSION 14.0.6)
   set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
-  set(PACKAGE_BUGREPORT "bugs@opencilk.org")
+  set(PACKAGE_BUGREPORT "https://github.com/OpenCilk/cheetah/issues/")
 endif()
 
 # Require out of source build.
@@ -109,6 +109,9 @@ if (CHEETAH_STANDALONE_BUILD)
   endif()
   set(LLVM_LIT_ARGS "${LIT_ARGS_DEFAULT}" CACHE STRING "Default options for lit")
   set(LLVM_LIT_OUTPUT_DIR "${CHEETAH_EXEC_OUTPUT_DIR}")
+
+  # Define llvm-link path
+  set(LLVM_LINK_PATH "${LLVM_TOOLS_BINARY_DIR}/llvm-link")
 endif()
 
 construct_cheetah_default_triple()
diff --git a/cmake/Modules/AddCheetah.cmake b/cmake/Modules/AddCheetah.cmake
index 34b95339..8ee51d51 100644
--- a/cmake/Modules/AddCheetah.cmake
+++ b/cmake/Modules/AddCheetah.cmake
@@ -466,10 +466,10 @@ function(add_cheetah_bitcode name)
     set(output_file_${libname} lib${output_name_${libname}}.bc)
     add_custom_command(
       OUTPUT ${output_dir_${libname}}/${output_file_${libname}}
-      COMMAND cp $<TARGET_OBJECTS:${libname}_compile> ${output_dir_${libname}}/${output_file_${libname}}
+      COMMAND ${LLVM_LINK_PATH} -o ${output_dir_${libname}}/${output_file_${libname}} $<TARGET_OBJECTS:${libname}_compile>
       DEPENDS ${libname}_compile $<TARGET_OBJECTS:${libname}_compile>
       COMMENT "Building bitcode ${output_file_${libname}}"
-      VERBATIM)
+      VERBATIM COMMAND_EXPAND_LISTS)
     add_custom_target(${libname} DEPENDS ${output_dir_${libname}}/${output_file_${libname}})
     install(FILES ${output_dir_${libname}}/${output_file_${libname}}
       DESTINATION ${install_dir_${libname}}
diff --git a/cmake/base-config-ix.cmake b/cmake/base-config-ix.cmake
index 798affc3..7c8f4162 100644
--- a/cmake/base-config-ix.cmake
+++ b/cmake/base-config-ix.cmake
@@ -6,6 +6,7 @@
 include(CheckIncludeFile)
 include(CheckCXXSourceCompiles)
 include(TestBigEndian)
+include(CMakePushCheckState)
 
 check_include_file(unwind.h HAVE_UNWIND_H)
 
diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
index 87eafc3b..cab331f7 100644
--- a/cmake/config-ix.cmake
+++ b/cmake/config-ix.cmake
@@ -1,6 +1,7 @@
 include(CheckLibraryExists)
 include(CheckCCompilerFlag)
 include(CheckCXXCompilerFlag)
+include(CMakePushCheckState)
 
 function(check_linker_flag flag out_var)
   cmake_push_check_state()
diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt
index f0a4722f..8dbda7b8 100644
--- a/include/CMakeLists.txt
+++ b/include/CMakeLists.txt
@@ -3,23 +3,8 @@ set(cilk_header_files
   cilk/cilk_api.h
   cilk/cilk_stub.h
   cilk/holder.h
-  cilk/hyperobject_base.h
-  cilk/metaprogramming.h
-  cilk/reducer.h
-  cilk/reducer_file.h
-  cilk/reducer_list.h
-  cilk/reducer_max.h
-  cilk/reducer_min.h
-  cilk/reducer_min_max.h
-  cilk/reducer_opadd.h
-  cilk/reducer_opand.h
-  cilk/reducer_opmul.h
-  cilk/reducer_opor.h
-  cilk/reducer_opxor.h
-  cilk/reducer_ostream.h
-  cilk/reducer_string.h
-  cilk/reducer_vector.h
-)
+  cilk/opadd_reducer.h
+  cilk/ostream_reducer.h)
 
 set(output_dir ${CHEETAH_OUTPUT_DIR}/include)
 set(out_files)
diff --git a/include/cilk/cilk.h b/include/cilk/cilk.h
index 900169a4..7014987a 100644
--- a/include/cilk/cilk.h
+++ b/include/cilk/cilk.h
@@ -6,4 +6,6 @@
 #define cilk_for   _Cilk_for
 #define cilk_scope   _Cilk_scope
 
+#define cilk_reducer _Hyperobject
+
 #endif /* _CILK_H */
diff --git a/include/cilk/cilk_api.h b/include/cilk/cilk_api.h
index 4f23dd5d..112c4471 100644
--- a/include/cilk/cilk_api.h
+++ b/include/cilk/cilk_api.h
@@ -1,5 +1,8 @@
 #ifndef _CILK_API_H
 #define _CILK_API_H
+
+#include <stddef.h> /* size_t */
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -11,7 +14,6 @@ extern unsigned __cilkrts_get_nworkers(void);
 extern unsigned __cilkrts_get_worker_number(void) __attribute__((deprecated));
 extern int __cilkrts_running_on_workers(void);
 
-#if defined(__cilk_pedigrees__) || defined(ENABLE_CILKRTS_PEDIGREE)
 #include <inttypes.h>
 typedef struct __cilkrts_pedigree {
     uint64_t rank;
@@ -19,10 +21,20 @@ typedef struct __cilkrts_pedigree {
 } __cilkrts_pedigree;
 extern __cilkrts_pedigree __cilkrts_get_pedigree(void);
 extern void __cilkrts_bump_worker_rank(void);
+extern void __cilkrts_dprand_set_seed(uint64_t seed);
+extern void __cilkrts_init_dprng(void);
 extern uint64_t __cilkrts_get_dprand(void);
-#endif // defined(__cilk_pedigrees__) || defined(ENABLE_CILKRTS_PEDIGREE)
 
-#undef VISIBILITY
+typedef void (*__cilk_identity_fn)(void *);
+typedef void (*__cilk_reduce_fn)(void *, void *);
+
+extern void *__cilkrts_reducer_lookup(void *key);
+extern void __cilkrts_reducer_register(void *key, size_t size,
+                                       __cilk_identity_fn id,
+                                       __cilk_reduce_fn reduce)
+  __attribute__((deprecated));
+extern void __cilkrts_reducer_unregister(void *key)
+  __attribute__((deprecated));
 
 #ifdef __cplusplus
 }
diff --git a/include/cilk/holder.h b/include/cilk/holder.h
index 80bd79da..31ff8294 100644
--- a/include/cilk/holder.h
+++ b/include/cilk/holder.h
@@ -1,1007 +1,21 @@
-/*
- *  Copyright (C) 2011-2018, Intel Corporation
- *  All rights reserved.
- *  
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *  
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *  
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- *  OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- *  AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
- *  WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *  POSSIBILITY OF SUCH DAMAGE.
- *  
- *  *********************************************************************
- *  
- *  PLEASE NOTE: This file is a downstream copy of a file maintained in
- *  a repository at cilkplus.org. Changes made to this file that are not
- *  submitted through the contribution process detailed at
- *  http://www.cilkplus.org/submit-cilk-contribution will be lost the next
- *  time that a new version is released. Changes only submitted to the
- *  GNU compiler collection or posted to the git repository at
- *  https://bitbucket.org/intelcilkruntime/intel-cilk-runtime are
- *  not tracked.
- *  
- *  We welcome your contributions to this open source project. Thank you
- *  for your assistance in helping us improve Cilk Plus.
- *
- */
+#ifndef _HOLDER_H
+#define _HOLDER_H
 
-/*
- * holder.h
- *
- * Purpose: hyperobject to provide different views of an object to each
- * parallel strand.
- */
-
-#ifndef HOLDER_H_INCLUDED
-#define HOLDER_H_INCLUDED
-
-#include <cilk/reducer.h>
-#include <memory>
-#include <utility>
-
-#ifdef __cplusplus
-
-/* C++ Interface
- *
- * Classes: holder<Type>
- *
- * Description:
- * ============
- * This component provides a hyperobject that isolates a parallel uses of a
- * common variable where it is not necessary to preserve changes from
- * different parallel strands.  In effect, a holder acts a bit like
- * thread-local storage, but has qualities that work better with the
- * fork-join structure of Intel(R) Cilk(TM) Plus.  In particular, a holder has the
- * following qualities:
- *
- * - The view of a holder before the first spawn within a function is the same
- *   as the view after each sync (as in the case of a reducer).
- * - The view of a holder within the first spawned child of a function (or the
- *   first child spawned after a sync) is the same as the view on entry to the
- *   function.
- * - The view of a holder before entering a _Cilk_for loop is the same as the
- *   view during the first iteration of the loop and the view at the end of
- *   the loop.
- * - The view of a holder in the continuation of a spawn or in an arbitrary
- *   iteration of a _Cilk_for loop is *non-deterministic*.  It is generally
- *   recommended that the holder be explicitly put into a known state in these
- *   situations.
- *
- * A holder can be used as an alternative to parameter-passing.  They are most
- * useful for replacing non-local variables without massive refactoring.  A
- * holder takes advantage of the fact that, most of the time, a holder view
- * does not change after a spawn or from one iteration of a parallel for loop
- * to the next (i.e., stealing is the exception, not the rule).  When the
- * holder view is a large object that is expensive to construct, this
- * optimization can save significant time versus creating a separate local
- * object for each view.  In addition, a holder using the "keep last" policy
- * will have the same value after a sync as the serialization of the same
- * program.  The last quality will often allow the program to avoid
- * recomputing a value.
- *
- * Usage Example:
- * ==============
- * Function 'compute()' is a complex function that computes a value using a
- * memoized algorithm, storing intermediate results in a hash table.  Compute
- * calls several other functions, each of which calls several other functions,
- * all of which share a global hash table.  In all, there are over a dozen
- * functions with a total of about 60 references to the hash table.  
- *..
- *  hash_table<int, X> memos;
- *
- *  void h(const X& x);  // Uses memos
- *
- *  double compute(const X& x)
- *  {
- *     memos.clear();
- *     // ...
- *     memos[i] = x;
- *     ...
- *     g(i);  // Uses memos
- *     // ...
- *     std::for_each(c.begin(), c.end(), h);  // Call h for each element of c
- *  }
- *
- *  int main()
- *  {
- *      const std::size_t ARRAY_SIZE = 1000000;
- *      extern X myArray[ARRAY_SIZE];
- *
- *      for (std::size_t i = 0; i < ARRAY_SIZE; ++i)
- *      {
- *          compute(myArray[i]);
- *      }
- *  }
- *..
- * We would like to replace the 'for' loop in 'main' with a 'cilk_for'.
- * Although the hash table is cleared on entry to each call to 'compute()',
- * and although the values stored in the hash table are no longer used after
- * 'compute()' returns, the use of the hash table as a global variable
- * prevents 'compute()' from being called safely in parallel.  One way to do
- * this would be to make 'memos' a private variable within the cilk_for loop
- * and pass it down to the actual computation, so that each loop iteration has
- * its own private copy:
- *..
- *      cilk_for (std::size_t i = 0; i < ARRAY_SIZE; ++i)
- *      {
- *          hash_table<int, X> memos;
- *          compute(myArray[i], memos);
- *      }
- *..
- * The problem with this approach is that it requires changing the signature
- * of 'compute', 'h', 'g', and every one of the dozen or so functions that
- * reference 'memos' as well as any function that calls those functions.  This
- * may break the abstraction of 'compute' and other functions, exposing an
- * implementation detail that was not part of the interface.  In addition, the
- * function 'h' is called through a templated algorithm, 'for_each', which
- * requires a fixed interface.  Finally, there is constructor and destructor
- * overhead for 'hash_table' each time through the loop.
- *
- * The alternative approach is to replace 'memos' with a holder.  The holder
- * would be available to all of the functions involved, but would not cause a
- * race between parallel loop iterations.  In order to make this work, each
- * use of the 'memos' variable must be (mechanically) replaced by a use of the
- * holder:
- *..
- *  cilk::holder<hash_table<int, X> > memos_h;
- *
- *  void h(const X& x);  // Uses memos_h
- *
- *  double compute(const X& x)
- *  {
- *     memos_h().clear();  // operator() used to "dereference" the holder
- *     // ...
- *     memos_h()[i] = x;   // operator() used to "dereference" the holder
- *     ...
- *     g(i);  // Uses memos_h
- *     // ...
- *     std::for_each(c.begin(), c.end(), h);  // Call h for each element of c
- *  }
- *..
- * Note that each reference to the holder must be modified with an empty pair
- * of parenthesis.  This syntax is needed because there is no facility in C++
- * for a "smart reference" that would allow 'memos_h' to be a perfect
- * replacement for 'memos'.  One way that a user can avoid this syntax change
- * is to wrap the holder in a class that has the same inteface as
- * 'hash_table' but redirects all calls to the holder:
- *..
- *  template <typename K, typename V>
- *  class hash_table_holder
- *  {
- *    private:
- *      cilk::holder<hash_table<K, V> > m_holder;
- *    public:
- *      void clear() { m_holder().clear(); }
- *      V& operator[](const K& x) { return m_holder()[x]; }
- *      std::size_t size() const { return m_holder().size(); }
- *      // etc. ...
- *  };
- *..
- * Using the above wrapper, the original code can be left unchanged except for
- * replacing 'hash_table' with 'hash_table_holder' and replacing 'for' with
- * 'cilk_for':
- *..
- *  hash_table_holder<int, X> memos;
- *
- *  void h(const X& x);  // Uses memos
- *
- *  double compute(const X& x)
- *  {
- *     memos.clear();  // Calls hash_table_holder::clear().
- *     // ...
- *  }
- *..
- * The above changes have no benefit over the use of thread-local storage.
- * What if one of the functions has a 'cilk_spawn', however?
- *..
- *  void h(const X& x)
- *  {
- *      Y y = x.nested();
- *      double d, w;
- *      if (y)
- *      {
- *          w = cilk_spawn compute_width(y); // May use 'memos'
- *          d = compute_depth(y);            // Does not use 'memos'
- *          cilk_sync;
- *          compute(y);  // recursive call.  Uses 'memos'.
- *      }
- *  }
- *..
- * In the above example, the view of the holder within 'compute_width' is the
- * same as the view on entry to 'h'.  More importantly, the view of the holder
- * within the recursive call to 'compute' is the same as the view on entry to
- * 'h', even if a different worker is executing the recursive call.  Thus, the
- * holder view within a Intel Cilk Plus program has useful qualities not found in
- * thread-local storage.
- */
+#include <type_traits>
 
 namespace cilk {
-    
-    /**
-     * After a sync, the value stored in a holder matches the most recent
-     * value stored into the holder by one of the starnds entering the sync.
-     * The holder policy used to instantiate the holder determines which of
-     * the entering strands determines the final value of the holder. A policy
-     * of 'holder_keep_indeterminate' (the default) is the most efficient, and
-     * results in an indeterminate value depending on the runtime schedule
-     * (see below for more specifics).  An indeterminate value after a sync is
-     * often acceptable, especially if the value of the holder is not reused
-     * after the sync.  All of the remaining policies retain the value of the
-     * last strand that would be executed in the serialization of the program.
-     * They differ in the mechanism used to move the value from one view to
-     * another.  A policy of 'holder_keep_last_copy' moves values by
-     * copy-assignment.  A policy of 'holder_keep_last_swap' moves values by
-     * calling 'swap'.  A policy of 'holder_keep_last_move' is available only
-     * for compilers that support C++0x rvalue references and moves values by
-     * move-assignment.  A policy of 'holder_keep_last' attempts to choose the
-     * most efficient mechanism: member-function 'swap' if the view type
-     * supports it, otherwise move-assignment if supported, otherwise
-     * copy-assignment.  (The swap member function for a class that provides
-     * one is almost always as fast or faster than move-assignment or
-     * copy-assignment.)
-     *
-     * The behavior of 'holder_keep_indeterminate', while indeterminate, is
-     * not random and can be used for advanced programming or debugging.  With
-     * a policy of 'holder_keep_intermediate', values are never copied or
-     * moved between views.  The value of the view after a sync is the same as
-     * the value set in the last spawned child before a steal occurs or the
-     * last value set in the continuation if no steal occurs.  Using this
-     * knowledge, a programmer can use a holder to detect the earliest steal
-     * in a piece of code.  An indeterminate holder is also useful for keeping
-     * cached data similar to the way some applications might use thread-local
-     * storage.
-     */
-    enum holder_policy {
-        holder_keep_indeterminate,
-        holder_keep_last,
-        holder_keep_last_copy,
-        holder_keep_last_swap,
-#ifdef __CILKRTS_RVALUE_REFERENCES
-        holder_keep_last_move
-#endif
-    };
-
-    namespace internal {
-
-        // Private special-case holder policy using the swap member-function
-        const holder_policy holder_keep_last_member_swap =
-            (holder_policy) (holder_keep_last_swap | 0x10);
-
-        /* The constant, 'has_member_swap<T>::value', will be 'true' if 'T'
-         * has a non-static member function with prototype 'void swap(T&)'.
-         * The mechanism used to detect 'swap' is the most portable among
-         * present-day compilers, but is not the most robust.  Specifically,
-         * the prototype for 'swap' must exactly match 'void swap(T&)'.
-         * Near-matches like a 'swap' function that returns 'int' instead of
-         * 'void' will not be detected.  Detection will also fail if 'T'
-         * inherits 'swap' from a base class.
-         */
-        template <typename T>
-        class has_member_swap
-        {
-            // This technique for detecting member functions was described by
-            // Rani Sharoni in comp.lang.c++.moderated:
-            // http://groups.google.com/group/comp.lang.c++.moderated/msg/2b06b2432fddfb60
-
-            // sizeof(notchar) is guaranteed larger than 1
-            struct notchar { char x[2]; };
-
-            // Instantiationg Q<U, &U::swap> will fail unless U contains a
-            // non-static member with prototype 'void swap(U&)'.
-            template <class U, void (U::*)(U&)> struct Q { };
-
-            // First 'test' is preferred overload if U::swap exists with the
-            // correct prototype.  Second 'test' is preferred overload
-            // otherwise.
-            template <typename U> static char test(Q<U,&U::swap>*);
-            template <typename U> static notchar test(...);
-
-        public:
-            /// 'value' will be true if T has a non-static member function
-            /// with prototype 'void swap(T&)'.
-            static const bool value = (1 == sizeof(test<T>(0)));
-        };
-
-        template <typename T> const bool has_member_swap<T>::value;
-
-        /**
-         * @brief Utility class for exception safety.
-         *
-         * The constuctor for this class takes a pointer and an allocator and
-         * holds on to them.  The destructor deallocates the pointed-to
-         * object, without calling its destructor, typically to recover memory
-         * in case an exception is thrown. The release member clears the
-         * pointer so that the deallocation is prevented, i.e., when the
-         * exception danger has passed.  The behavior of this class is similar
-         * to auto_ptr and unique_ptr.
-         */
-        template <typename Type, typename Allocator = std::allocator<Type> >
-        class auto_deallocator
-        {
-            Allocator m_alloc;
-            Type*     m_ptr;
-
-            // Non-copiable
-            auto_deallocator(const auto_deallocator&);
-            auto_deallocator& operator=(const auto_deallocator&);
-
-        public:
-            /// Constructor
-            explicit auto_deallocator(Type* p, const Allocator& a = Allocator())
-                : m_alloc(a), m_ptr(p) { }
-
-            /// Destructor - free allocated resources
-            ~auto_deallocator() { if (m_ptr) m_alloc.deallocate(m_ptr, 1); }
-
-            /// Remove reference to resource
-            void release() { m_ptr = 0; }
-        };
-
-        /**
-         * Pure-abstract base class to initialize holder views
-         */
-        template <typename Type, typename Allocator>
-        class init_base
-        {
-        public:
-            virtual ~init_base() { }
-            virtual init_base* clone_self(Allocator& a) const = 0;
-            virtual void delete_self(Allocator& a) = 0;
-            virtual void construct_view(Type* p, Allocator& a) const = 0;
-        };
-
-        /**
-         * Class to default-initialize a holder view
-         */
-        template <typename Type, typename Allocator>
-        class default_init : public init_base<Type, Allocator>
-        {
-            typedef init_base<Type, Allocator> base;
-
-            /// Private constructor (called from static make() function).
-            default_init() { }
-
-            // Non-copiable
-            default_init(const default_init&);
-            default_init& operator=(const default_init&);
-
-        public:
-            // Static factory function
-            static default_init* make(Allocator& a);
-
-            // Virtual function overrides
-            virtual ~default_init();
-            virtual base* clone_self(Allocator& a) const;
-            virtual void delete_self(Allocator& a);
-            virtual void construct_view(Type* p, Allocator& a) const;
-        };
-
-        template <typename Type, typename Allocator>
-        default_init<Type, Allocator>*
-        default_init<Type, Allocator>::make(Allocator&)
-        {
-            // Return a pointer to a singleton.  All instances of this class
-            // are identical, so we need only one.
-            static default_init self;
-            return &self;
-        }
-
-        template <typename Type, typename Allocator>
-        default_init<Type, Allocator>::~default_init()
-        {
-        }
-
-        template <typename Type, typename Allocator>
-        init_base<Type, Allocator>*
-        default_init<Type, Allocator>::clone_self(Allocator& a) const
-        {
-            return make(a);
-        }
-
-        template <typename Type, typename Allocator>
-        void default_init<Type, Allocator>::delete_self(Allocator&)
-        {
-            // Since make() returned a shared singleton, there is nothing to
-            // delete here.
-        }
-
-        template <typename Type, typename Allocator>
-        void
-        default_init<Type, Allocator>::construct_view(Type* p,
-                                                      Allocator&) const
-        {
-            ::new((void*) p) Type();
-            // TBD: In a C++0x library, this should be rewritten
-            // std::allocator_traits<Allocator>::construct(a, p);
-        }
-
-        /**
-         * Class to copy-construct a view from a stored exemplar.
-         */
-        template <typename Type, typename Allocator>
-        class exemplar_init : public init_base<Type, Allocator>
-        {
-            typedef init_base<Type, Allocator> base;
-
-            Type* m_exemplar;
-
-            // Private constructors (called from make() functions).
-            exemplar_init(const Type& val, Allocator& a);
-#ifdef __CILKRTS_RVALUE_REFERENCES
-            exemplar_init(Type&& val,      Allocator& a);
-#endif
-
-            // Non-copyiable
-            exemplar_init(const exemplar_init&);
-            exemplar_init& operator=(const exemplar_init&);
-
-        public:
-            // Static factory functions
-            static exemplar_init* make(const Type& val,
-                                       Allocator& a = Allocator());
-#ifdef __CILKRTS_RVALUE_REFERENCES
-            static exemplar_init* make(Type&& val,
-                                       Allocator& a = Allocator());
-#endif
-
-            // Virtual function overrides
-            virtual ~exemplar_init();
-            virtual base* clone_self(Allocator& a) const;
-            virtual void delete_self(Allocator& a);
-            virtual void construct_view(Type* p, Allocator& a) const;
-        };
-
-        template <typename Type, typename Allocator>
-        exemplar_init<Type, Allocator>::exemplar_init(const Type& val,
-                                                      Allocator&  a)
-        {
-            m_exemplar = a.allocate(1);
-            auto_deallocator<Type, Allocator> guard(m_exemplar, a);
-            a.construct(m_exemplar, val);
-            guard.release();
-        }
-
-#ifdef __CILKRTS_RVALUE_REFERENCES
-        template <typename Type, typename Allocator>
-        exemplar_init<Type, Allocator>::exemplar_init(Type&&     val,
-                                                      Allocator& a)
-        {
-            m_exemplar = a.allocate(1);
-            auto_deallocator<Type, Allocator> guard(m_exemplar, a);
-            a.construct(m_exemplar, std::forward<Type>(val));
-            guard.release();
-        }
-#endif
-
-        template <typename Type, typename Allocator>
-        exemplar_init<Type, Allocator>*
-        exemplar_init<Type, Allocator>::make(const Type& val,
-                                             Allocator&  a)
-        {
-            typedef typename Allocator::template rebind<exemplar_init>::other
-                self_alloc_t;
-            self_alloc_t alloc(a);
-
-            exemplar_init *self = alloc.allocate(1);
-            auto_deallocator<exemplar_init, self_alloc_t> guard(self, alloc);
-
-            // Don't use allocator to construct self.  Allocator should be
-            // used only on elements of type 'Type'.
-            ::new((void*) self) exemplar_init(val, a);
-
-            guard.release();
-
-            return self;
-        }
-
-#ifdef __CILKRTS_RVALUE_REFERENCES
-        template <typename Type, typename Allocator>
-        exemplar_init<Type, Allocator>*
-        exemplar_init<Type, Allocator>::make(Type&&           val,
-                                             Allocator& a)
-        {
-            typedef typename Allocator::template rebind<exemplar_init>::other
-                self_alloc_t;
-            self_alloc_t alloc(a);
-
-            exemplar_init *self = alloc.allocate(1);
-            auto_deallocator<exemplar_init, self_alloc_t> guard(self, alloc);
-
-            // Don't use allocator to construct self.  Allocator should be
-            // used only on elements of type 'Type'.
-            ::new((void*) self) exemplar_init(std::forward<Type>(val), a);
-
-            guard.release();
-
-            return self;
-        }
-#endif
-
-        template <typename Type, typename Allocator>
-        exemplar_init<Type, Allocator>::~exemplar_init()
-        {
-            // Called only by delete_self, which deleted the exemplar using an
-            // allocator.
-        }
-
-        template <typename Type, typename Allocator>
-        init_base<Type, Allocator>*
-        exemplar_init<Type, Allocator>::clone_self(Allocator& a) const
-        {
-            return make(*m_exemplar, a);
-        }
-
-        template <typename Type, typename Allocator>
-        void exemplar_init<Type, Allocator>::delete_self(Allocator& a)
-        {
-            typename Allocator::template rebind<exemplar_init>::other alloc(a);
-
-            a.destroy(m_exemplar);
-            a.deallocate(m_exemplar, 1);
-            m_exemplar = 0;
-
-            this->~exemplar_init();
-            alloc.deallocate(this, 1);
-        }
-
-        template <typename Type, typename Allocator>
-        void
-        exemplar_init<Type, Allocator>::construct_view(Type*            p,
-                                                       Allocator& a) const
-        {
-            a.construct(p, *m_exemplar);
-            // TBD: In a C++0x library, this should be rewritten
-            // std::allocator_traits<Allocator>::construct(a, p, *m_exemplar);
-        }
-
-        /**
-         * Class to construct a view using a stored functor.  The functor,
-         * 'f', must be be invokable using the expression 'Type x = f()'.
-         */
-        template <typename Func, typename Allocator>
-        class functor_init :
-            public init_base<typename Allocator::value_type, Allocator>
-        {
-            typedef typename Allocator::value_type            value_type;
-            typedef init_base<value_type, Allocator>          base;
-            typedef typename Allocator::template rebind<Func>::other f_alloc;
-
-            Func *m_functor;
-
-            /// Private constructors (called from make() functions
-            functor_init(const Func& f, Allocator& a);
-#ifdef __CILKRTS_RVALUE_REFERENCES
-            functor_init(Func&& f, Allocator& a);
-#endif
-
-            // Non-copiable
-            functor_init(const functor_init&);
-            functor_init& operator=(const functor_init&);
-
-        public:
-            // Static factory functions
-            static functor_init* make(const Func& val,
-                                      Allocator& a = Allocator());
-#ifdef __CILKRTS_RVALUE_REFERENCES
-            static functor_init* make(Func&& val,
-                                      Allocator& a = Allocator());
-#endif
-
-            // Virtual function overrides
-            virtual ~functor_init();
-            virtual base* clone_self(Allocator& a) const;
-            virtual void delete_self(Allocator& a);
-            virtual void
-                construct_view(value_type* p, Allocator& a) const;
-        };
-
-        /// Specialization to strip off reference from 'Func&'.
-        template <typename Func, typename Allocator>
-        struct functor_init<Func&, Allocator>
-            : functor_init<Func, Allocator> { };
-
-        /// Specialization to strip off reference and cvq from 'const Func&'.
-        template <typename Func, typename Allocator>
-        struct functor_init<const Func&, Allocator>
-            : functor_init<Func, Allocator> { };
-
-        template <typename Func, typename Allocator>
-        functor_init<Func, Allocator>::functor_init(const Func& f,
-                                                    Allocator&  a)
-        {
-            f_alloc alloc(a);
-
-            m_functor = alloc.allocate(1);
-            auto_deallocator<Func, f_alloc> guard(m_functor, alloc);
-            alloc.construct(m_functor, f);
-            guard.release();
-        }
-
-#ifdef __CILKRTS_RVALUE_REFERENCES
-        template <typename Func, typename Allocator>
-        functor_init<Func, Allocator>::functor_init(Func&&     f,
-                                                    Allocator& a)
-        {
-            f_alloc alloc(a);
-
-            m_functor = alloc.allocate(1);
-            auto_deallocator<Func, f_alloc> guard(m_functor, alloc);
-            alloc.construct(m_functor, std::forward<Func>(f));
-            guard.release();
-        }
-#endif
-
-        template <typename Func, typename Allocator>
-        functor_init<Func, Allocator>*
-        functor_init<Func, Allocator>::make(const Func& f, Allocator& a)
-        {
-            typedef typename Allocator::template rebind<functor_init>::other
-                self_alloc_t;
-            self_alloc_t alloc(a);
-
-            functor_init *self = alloc.allocate(1);
-            auto_deallocator<functor_init, self_alloc_t> guard(self, alloc);
-
-            // Don't use allocator to construct self.  Allocator should be
-            // used only on elements of type 'Func'.
-            ::new((void*) self) functor_init(f, a);
-
-            guard.release();
-
-            return self;
-        }
-
-#ifdef __CILKRTS_RVALUE_REFERENCES
-        template <typename Func, typename Allocator>
-        functor_init<Func, Allocator>*
-        functor_init<Func, Allocator>::make(Func&& f, Allocator& a)
-        {
-            typedef typename Allocator::template rebind<functor_init>::other
-                self_alloc_t;
-            self_alloc_t alloc(a);
-
-            functor_init *self = alloc.allocate(1);
-            auto_deallocator<functor_init, self_alloc_t> guard(self, alloc);
-
-            // Don't use allocator to construct self.  Allocator should be
-            // used only on elements of type 'Func'.
-            ::new((void*) self) functor_init(std::forward<Func>(f), a);
-
-            guard.release();
-
-            return self;
-        }
-#endif
-
-        template <typename Func, typename Allocator>
-        functor_init<Func, Allocator>::~functor_init()
-        {
-            // Called only by delete_self, which deleted the functor using an
-            // allocator.
-        }
-
-        template <typename Func, typename Allocator>
-        init_base<typename Allocator::value_type, Allocator>*
-        functor_init<Func, Allocator>::clone_self(Allocator& a) const
-        {
-            return make(*m_functor, a);
-        }
-
-        template <typename Func, typename Allocator>
-        inline
-        void functor_init<Func, Allocator>::delete_self(Allocator& a)
-        {
-            typename Allocator::template rebind<functor_init>::other alloc(a);
-            f_alloc fa(a);
-
-            fa.destroy(m_functor);
-            fa.deallocate(m_functor, 1);
-            m_functor = 0;
-
-            this->~functor_init();
-            alloc.deallocate(this, 1);
-        }
-
-        template <typename Func, typename Allocator>
-        void functor_init<Func, Allocator>::construct_view(value_type* p,
-                                                           Allocator& a) const
-        {
-            a.construct(p, (*m_functor)());
-            // In C++0x, the above should be written
-            // std::allocator_traits<Allocator>::construct(a, p, m_functor());
-        }
-
-        /**
-         * Functor called to reduce a holder
-         */
-        template <typename Type, holder_policy Policy>
-        struct holder_reduce_functor;
-
-        /**
-         * Specialization to keep the left (first) value.
-         */
-        template <typename Type>
-        struct holder_reduce_functor<Type, holder_keep_indeterminate>
-        {
-            void operator()(Type* left, Type* right) const { }
-        };
-
-        /**
-         * Specialization to copy-assign from the right (last) value.
-         */
-        template <typename Type>
-        struct holder_reduce_functor<Type, holder_keep_last_copy>
-        {
-            void operator()(Type* left, Type* right) const {
-                *left = *right;
-            }
-        };
-
-        /*
-         * Specialization to keep the right (last) value via swap.
-         */
-        template <typename Type>
-        struct holder_reduce_functor<Type, holder_keep_last_swap>
-        {
-            void operator()(Type* left, Type* right) const {
-                using std::swap;
-                swap(*left, *right);
-            }
-        };
-
-#ifdef __CILKRTS_RVALUE_REFERENCES
-        /*
-         * Specialization to move-assign from the right (last) value.
-         */
-        template <typename Type>
-        struct holder_reduce_functor<Type, holder_keep_last_move>
-        {
-            void operator()(Type* left, Type* right) const {
-                *left = std::move(*right);
-            }
-        };
-#endif
-
-        /*
-         * Specialization to keep the right (last) value via the swap member
-         * function.
-         */
-        template <typename Type>
-        struct holder_reduce_functor<Type, holder_keep_last_member_swap>
-        {
-            void operator()(Type* left, Type* right) const {
-                left->swap(*right);
-            }
-        };
-
-        /*
-         * Specialization to keep the right (last) value by the most efficient
-         * means detectable.
-         */
-        template <typename Type>
-        struct holder_reduce_functor<Type, holder_keep_last> :
-            holder_reduce_functor<Type,
-                                  (holder_policy)
-                                  (has_member_swap<Type>::value ?
-                                  holder_keep_last_member_swap :
-#ifdef __CILKRTS_RVALUE_REFERENCES
-                                  holder_keep_last_move
-#else
-                                  holder_keep_last_copy
-#endif
-                                  )>
-        {
-        };
-    } // end namespace internal
-
-    /**
-     * Monoid for holders.
-     * Allocator type is required to be thread-safe.
-     */
-    template <typename Type,
-              holder_policy Policy = holder_keep_indeterminate,
-              typename Allocator = std::allocator<Type> >
-    class holder_monoid : public monoid_base<Type>
-    {
-        // Allocator is mutable because the copy of the monoid inside the
-        // reducer is const (to avoid races on the shared state).  However,
-        // the allocator is required to be thread-safe, so it is ok (and
-        // necessary) to modify.
-        mutable Allocator                     m_allocator;
-        internal::init_base<Type, Allocator> *m_initializer;
-
-    public:
-        /// This constructor uses default-initialization for both the leftmost
-        /// view and each identity view.
-        holder_monoid(const Allocator& a = Allocator())
-            : m_allocator(a)
-            , m_initializer(
-                internal::default_init<Type, Allocator>::make(m_allocator))
-            { }
-
-        /// These constructors use 'val' as an exemplar to copy-construct both
-        /// the leftmost view and each identity view.
-        holder_monoid(const Type& val, const Allocator& a = Allocator())
-            : m_allocator(a)
-            , m_initializer(internal::exemplar_init<Type, Allocator>::make(
-                                val, m_allocator)) { }
-        /// This constructor uses 'f' as a functor to construct both
-        /// the leftmost view and each identity view.
-        template <typename Func>
-        holder_monoid(const Func& f, const Allocator& a = Allocator())
-            : m_allocator(a)
-            , m_initializer(
-                internal::functor_init<Func, Allocator>::make(f,m_allocator))
-            { }
-
-        /// Copy constructor
-        holder_monoid(const holder_monoid& rhs)
-            : m_allocator(rhs.m_allocator)
-            , m_initializer(rhs.m_initializer->clone_self(m_allocator)) { }
-
-        /// "Extended" copy constructor with allocator
-        holder_monoid(const holder_monoid& rhs, const Allocator& a)
-            : m_allocator(a)
-            , m_initializer(rhs.m_initializer->clone_self(m_allocator)) { }
-
-#ifdef __CILKRTS_RVALUE_REFERENCES
-        /// Move constructor
-        holder_monoid(holder_monoid&& rhs)
-            : m_allocator(rhs.m_allocator)
-            , m_initializer(rhs.m_initializer) {
-            rhs.m_initializer =
-                internal::default_init<Type, Allocator>::make(m_allocator);
-        }
-
-        /// "Extended" move constructor with allocator
-        holder_monoid(holder_monoid&& rhs, const Allocator& a)
-            : m_allocator(a)
-            , m_initializer(0) {
-            if (a != rhs.m_allocator)
-                m_initializer = rhs.m_initializer->clone_self(a);
-            else {
-                m_initializer = rhs.m_initializer;
-                rhs.m_initializer =
-                    internal::default_init<Type, Allocator>::make(m_allocator);
-            }
-        }
-#endif
-        /// Destructor
-        ~holder_monoid() { m_initializer->delete_self(m_allocator); }
-
-        holder_monoid& operator=(const holder_monoid& rhs) {
-            if (this == &rhs) return *this;
-            m_initializer->delete_self(m_allocator);
-            m_initializer = rhs.m_initializer->clone_self(m_allocator);
-        }
-
-#ifdef __CILKRTS_RVALUE_REFERENCES
-        holder_monoid& operator=(holder_monoid&& rhs) {
-            if (m_allocator != rhs.m_allocator)
-                // Delegate to copy-assignment on unequal allocators
-                return operator=(static_cast<const holder_monoid&>(rhs));
-            std::swap(m_initializer, rhs.m_initializer);
-            return *this;
-        }
-#endif
-
-        /// Constructs IDENTITY value into the uninitilized '*p'
-        void identity(Type* p) const
-            { m_initializer->construct_view(p, m_allocator); }
-
-        /// Calls the destructor on the object pointed-to by 'p'
-        void destroy(Type* p) const
-            { m_allocator.destroy(p); }
-
-        /// Return a pointer to size bytes of raw memory
-        void* allocate(std::size_t s) const {
-            return m_allocator.allocate(1);
-        }
-
-        /// Deallocate the raw memory at p
-        void deallocate(void* p) const {
-            m_allocator.deallocate(static_cast<Type*>(p), sizeof(Type));
-        }
-
-        void reduce(Type* left, Type* right) const {
-            internal::holder_reduce_functor<Type, Policy>()(left, right);
-        }
-
-        void swap(holder_monoid& other) {
-            std::swap(m_initializer, other.m_initializer);
-        }
-
-        Allocator get_allocator() const {
-            return m_allocator;
-        }
-    };
-
-    // Namespace-scope swap
-    template <typename Type, holder_policy Policy, typename Allocator>
-    inline void swap(holder_monoid<Type, Policy, Allocator>& a,
-                     holder_monoid<Type, Policy, Allocator>& b)
-    {
-        a.swap(b);
-    }
-
-   /**
-    * Hyperobject to provide different views of an object to each
-    * parallel strand.
-    */
-    template <typename Type,
-              holder_policy Policy = holder_keep_indeterminate,
-              typename Allocator = std::allocator<Type> >
-    class holder : public reducer<holder_monoid<Type, Policy, Allocator> >
-    {
-        typedef holder_monoid<Type, Policy, Allocator> monoid_type;
-        typedef reducer<monoid_type> imp;
-
-        // Return a value of Type constructed using the functor Func.
-        template <typename Func>
-        Type make_value(const Func& f) const {
-            struct obj {
-                union {
-                    char buf[sizeof(Type)];
-                    void* align1;
-                    double align2;
-                };
-
-                obj(const Func& f) { f(static_cast<Type*>(buf)); }
-                ~obj() { static_cast<Type*>(buf)->~Type(); }
-
-                operator Type&() { return *static_cast<Type*>(buf); }
-            };
-
-            return obj(f);
-        }
-
-    public:
-        /// Default constructor uses default-initialization for both the
-        /// leftmost view and each identity view.
-        holder(const Allocator& alloc = Allocator())
-            : imp(monoid_type(alloc)) { }
-
-        /// Construct from an exemplar that is used to initialize both the
-        /// leftmost view and each identity view.
-        holder(const Type& v, const Allocator& alloc = Allocator())
-            // Alas, cannot use an rvalue reference for 'v' because it is used
-            // twice in the same expression for initializing imp.
-            : imp(monoid_type(v, alloc), v) { }
 
-        /// Construct from a functor that is used to initialize both the
-        /// leftmost view and each identity view.  The functor, 'f', must be be
-        /// invokable using the expression 'Type x = f()'.
-        template <typename Func>
-        holder(const Func& f, const Allocator& alloc = Allocator())
-            // Alas, cannot use an rvalue for 'f' because it is used twice in
-            // the same expression for initializing imp.
-            : imp(monoid_type(f, alloc), make_value(f)) { }
-    };
+template <typename A> static void init(void *view) {
+    new(view) A;
+}
+template <typename A> static void reduce(void *left, void *right) {
+    if (std::is_destructible<A>::value)
+        static_cast<A *>(right)->~A();
+}
 
-} // end namespace cilk
+template <typename A>
+using holder = A _Hyperobject(init<A>, reduce<A>);
 
-#else /* C */
-# error Holders are currently available only for C++
-#endif /* __cplusplus */
+} // namespace cilk
 
-#endif /* HOLDER_H_INCLUDED */
+#endif // _HOLDER_H
diff --git a/include/cilk/hyperobject_base.h b/include/cilk/hyperobject_base.h
deleted file mode 100644
index 006aa721..00000000
--- a/include/cilk/hyperobject_base.h
+++ /dev/null
@@ -1,54 +0,0 @@
-#ifndef _CILK_HYPEROBJECT_BASE
-#define _CILK_HYPEROBJECT_BASE
-
-#include <stddef.h>
-#include <stdint.h>
-#include <stdlib.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct __cilkrts_hyperobject_base;
-
-/* Callback function signatures.  The first argument always points to the
- * reducer itself and is commonly ignored. */
-typedef void (*cilk_reduce_fn_t)(void *r, void *lhs, void *rhs);
-typedef void (*cilk_identity_fn_t)(void *r, void *view);
-typedef void (*cilk_destroy_fn_t)(void *r, void *view);
-typedef void *(*cilk_allocate_fn_t)(struct __cilkrts_hyperobject_base *r, size_t bytes);
-typedef void (*cilk_deallocate_fn_t)(struct __cilkrts_hyperobject_base *r, void *view);
-
-/** Representation of the monoid */
-typedef struct cilk_c_monoid {
-    cilk_reduce_fn_t reduce_fn;
-    cilk_identity_fn_t identity_fn;
-    cilk_destroy_fn_t destroy_fn;
-    cilk_allocate_fn_t allocate_fn;
-    cilk_deallocate_fn_t deallocate_fn;
-} cilk_c_monoid;
-
-/** Base of the hyperobject */
-typedef struct __cilkrts_hyperobject_base {
-    cilk_c_monoid __c_monoid;
-    uint32_t __id_num;      /* for runtime use only, initialize to 0 */
-    uint32_t __view_offset; /* offset (in bytes) to leftmost view */
-    size_t __view_size;     /* Size of each view */
-} __cilkrts_hyperobject_base;
-
-/* Library interface.
-   TODO: Add optimization hints like "strand pure" as in Cilk Plus. */
-void __cilkrts_hyper_create(__cilkrts_hyperobject_base *key);
-void __cilkrts_hyper_destroy(__cilkrts_hyperobject_base *key);
-#if defined __clang__ && defined __cilk && __cilk >= 300
-__attribute__((strand_pure, strand_malloc))
-#endif
-void *__cilkrts_hyper_lookup(__cilkrts_hyperobject_base *key);
-void *__cilkrts_hyper_alloc(__cilkrts_hyperobject_base *key, size_t bytes);
-void __cilkrts_hyper_dealloc(__cilkrts_hyperobject_base *key, void *view);
-
-#ifdef __cplusplus
-} /* end extern "C" */
-#endif
-
-#endif /* _CILK_HYPEROBJECT_BASE */
diff --git a/include/cilk/metaprogramming.h b/include/cilk/metaprogramming.h
deleted file mode 100644
index 4f8a69a6..00000000
--- a/include/cilk/metaprogramming.h
+++ /dev/null
@@ -1,587 +0,0 @@
-/*  metaprogramming.h                  -*- C++ -*-
- *
- *  Copyright (C) 2012-2018, Intel Corporation
- *  All rights reserved.
- *  
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *  
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *  
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- *  OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- *  AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
- *  WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *  POSSIBILITY OF SUCH DAMAGE.
- *  
- *  *********************************************************************
- *  
- *  PLEASE NOTE: This file is a downstream copy of a file maintained in
- *  a repository at cilkplus.org. Changes made to this file that are not
- *  submitted through the contribution process detailed at
- *  http://www.cilkplus.org/submit-cilk-contribution will be lost the next
- *  time that a new version is released. Changes only submitted to the
- *  GNU compiler collection or posted to the git repository at
- *  https://bitbucket.org/intelcilkruntime/intel-cilk-runtime are
- *  not tracked.
- *  
- *  We welcome your contributions to this open source project. Thank you
- *  for your assistance in helping us improve Cilk Plus.
- */
-
-/** @file metaprogramming.h
- *
- *  @brief Defines metaprogramming utility classes used in the Intel(R) Cilk(TM) Plus library.
- *
- *  @ingroup common
- */
-
-#ifndef METAPROGRAMMING_H_INCLUDED
-#define METAPROGRAMMING_H_INCLUDED
-
-#ifdef __cplusplus
-
-#include <functional>
-#include <new>
-#include <cstdlib>
-#ifdef _WIN32
-#include <malloc.h>
-#endif
-#include <algorithm>
-
-namespace cilk {
-
-namespace internal {
-
-/** Test if a class is empty.
- *
- *  If @a Class is an empty (and therefore necessarily stateless) class, then
- *  the "empty base-class optimization" guarantees that
- *  `sizeof(check_for_empty_class<Class>) == sizeof(char)`. Conversely, if
- *  `sizeof(check_for_empty_class<Class>) > sizeof(char)`, then @a Class is not
- *  empty, and we must discriminate distinct instances of @a Class.
- *
- *  Typical usage:
- *
- *      // General definition of A<B> for non-empty B:
- *      template <typename B, bool BIsEmpty = class_is_empty<B>::value> >
- *      class A { ... };
- *
- *      // Specialized definition of A<B> for empty B:
- *      template <typename B>
- *      class A<B, true> { ... };
- *
- *  @tparam Class   The class to be tested for emptiness.
- *
- *  @result         The `value` member will be `true` if @a Class is empty,
- *                  `false` otherwise.
- *
- *  @ingroup common
- */
-template <class Class>
-class class_is_empty {
-    class check_for_empty_class : public Class
-    {
-        char m_data;
-    public:
-        // Declared but not defined
-        check_for_empty_class();
-        check_for_empty_class(const check_for_empty_class&);
-        check_for_empty_class& operator=(const check_for_empty_class&);
-        ~check_for_empty_class();
-    };
-public:
-
-    /** Constant is true if and only if @a Class is empty.
-     */
-    static const bool value = (sizeof(check_for_empty_class) == sizeof(char));
-};
-
-
-/** A class containing raw bytes with a specified alignment and size.
- *
- *  An object of type `aligned_storage<S, A>` will have alignment `A` and
- *  size at least `S`. Its contents will be uninitialized bytes.
- *
- *  @tparam Size        The required minimum size of the resulting class.
- *  @tparam Alignment   The required alignment of the resulting class.
- *
- *  @pre    @a Alignment shall be a power of 2 no greater than 64.
- *
- *  @note   This is implemented using the `CILK_ALIGNAS` macro, which uses
- *          the non-standard, implementation-specific features
- *          `__declspec(align(N))` on Windows, and
- *          `__attribute__((__aligned__(N)))` on Unix. The `gcc` implementation
- *          of `__attribute__((__aligned__(N)))` requires a numeric literal `N`
- *          (_not_ an arbitrary compile-time constant expression). Therefore,
- *          this class is implemented using specialization on the required
- *          alignment.
- *
- *  @note   The template class is specialized only for the supported
- *          alignments. An attempt to instantiate it for an unsupported
- *          alignment will result in a compilation error.
- */
-template <std::size_t Size, std::size_t Alignment>
-struct aligned_storage;
-
-#define CILK_ALIGNAS(A) __attribute__((aligned(A)))
-/// @cond
-template<std::size_t Size> class aligned_storage<Size,  1>
-    { CILK_ALIGNAS( 1) char m_bytes[Size]; };
-template<std::size_t Size> class aligned_storage<Size,  2>
-    { CILK_ALIGNAS( 2) char m_bytes[Size]; };
-template<std::size_t Size> class aligned_storage<Size,  4>
-    { CILK_ALIGNAS( 4) char m_bytes[Size]; };
-template<std::size_t Size> class aligned_storage<Size,  8>
-    { CILK_ALIGNAS( 8) char m_bytes[Size]; };
-template<std::size_t Size> class aligned_storage<Size, 16>
-    { CILK_ALIGNAS(16) char m_bytes[Size]; };
-template<std::size_t Size> class aligned_storage<Size, 32>
-    { CILK_ALIGNAS(32) char m_bytes[Size]; };
-template<std::size_t Size> class aligned_storage<Size, 64>
-    { CILK_ALIGNAS(64) char m_bytes[Size]; };
-/// @endcond
-#undef CILK_ALIGNAS
-
-/** A buffer of uninitialized bytes with the same size and alignment as a
- *  specified type.
- *
- *  The class `storage_for_object<Type>` will have the same size and alignment
- *  properties as `Type`, but it will contain only raw (uninitialized) bytes.
- *  This allows the definition of a data member which can contain a `Type`
- *  object which is initialized explicitly under program control, rather
- *  than implicitly as part of the initialization of the containing class.
- *  For example:
- *
- *      class C {
- *          storage_for_object<MemberClass> _member;
- *      public:
- *          C() ... // Does NOT initialize _member
- *          void initialize(args)
- *              { new (_member.pointer()) MemberClass(args); }
- *          const MemberClass& member() const { return _member.object(); }
- *                MemberClass& member()       { return _member.object(); }
- *
- *  @tparam Type    The type whose size and alignment are to be reflected
- *                  by this class.
- */
-template <typename Type>
-class storage_for_object :
-    aligned_storage< sizeof(Type), __alignof(Type) >
-{
-public:
-    /// Return a typed reference to the buffer.
-    const Type& object() const { return *reinterpret_cast<Type*>(this); }
-    /// Return a typed reference to the buffer.
-          Type& object()       { return *reinterpret_cast<Type*>(this); }
-};
-
-
-/** Get the functor class corresponding to a binary function type.
- *
- *  The `binary_functor` template class can be instantiated with a binary
- *  functor class or with a real binary function, and will yield an equivalent
- *  binary functor class in either case.
- *
- *  @tparam F   A binary functor class, a binary function type, or a pointer to
- *              binary function type.
- *
- *  @result     `binary_functor<F>::%type` will be the same as @a F if @a F is
- *              a class. It will be a `std::pointer_to_binary_function` wrapper
- *              if @a F is a binary function or binary function pointer type.
- *              (It will _not_ necessarily be an `Adaptable Binary Function`
- *              class, since @a F might be a non-adaptable binary functor
- *              class.)
- *
- *  @ingroup common
- */
-template <typename F>
-struct binary_functor {
-     /// The binary functor class equivalent to @a F.
-     typedef F type;
-};
-
-/// @copydoc binary_functor
-/// Specialization for binary function.
-template <typename R, typename A, typename B>
-struct binary_functor<R(A,B)> {
-     /// The binary functor class equivalent to @a F.
-    typedef std::pointer_to_binary_function<A, B, R> type;
-};
-
-/// @copydoc binary_functor
-/// Specialization for pointer to binary function.
-template <typename R, typename A, typename B>
-struct binary_functor<R(*)(A,B)> {
-     /// The binary functor class equivalent to @a F.
-    typedef std::pointer_to_binary_function<A, B, R> type;
-};
-
-
-/** Indirect binary function class with specified types.
- *
- *  `typed_indirect_binary_function<F>` is an `Adaptable Binary Function` class
- *  based on an existing binary functor class or binary function type @a F. If
- *  @a F is a stateless class, then this class will be empty, and its
- *  `operator()` will invoke @a F's `operator()`. Otherwise, an object of this
- *  class will hold a pointer to an object of type @a F, and will refer its
- *  `operator()` calls to the pointed-to @a F object.
- *
- *  That is, suppose that we have the declarations:
- *
- *      F *p;
- *      typed_indirect_binary_function<F, int, int, bool> ibf(p);
- *
- *  Then:
- *
- *  -   `ibf(x, y) == (*p)(x, y)`.
- *  -   `ibf(x, y)` will not do a pointer dereference if `F` is an empty class.
- *
- *  @note       Just to repeat: if `F` is an empty class, then
- *              `typed_indirect_binary_function\<F\>' is also an empty class.
- *              This is critical for its use in the
- *              @ref cilk::cilk_lib_1_1::min_max_internal::view_base
- *              "min/max reducer view classes", where it allows the view to
- *              call a comparison functor in the monoid without actually
- *              having to allocate a pointer in the view class when the
- *              comparison class is empty.
- *
- *  @note       If you have an `Adaptable Binary Function` class or a binary
- *              function type, then you can use the
- *              @ref indirect_binary_function class, which derives the
- *              argument and result types parameter type instead of requiring
- *              you to specify them as template arguments.
- *
- *  @tparam F   A binary functor class, a binary function type, or a pointer to
- *              binary function type.
- *  @param A1   The first argument type.
- *  @param A2   The second argument type.
- *  @param R    The result type.
- *
- *  @see min_max::comparator_base
- *  @see indirect_binary_function
- *
- *  @ingroup common
- */
-template <  typename F
-         ,  typename A1
-         ,  typename A2
-         ,  typename R
-         ,  typename Functor    = typename binary_functor<F>::type
-         ,  bool FunctorIsEmpty = class_is_empty<Functor>::value
-         >
-class typed_indirect_binary_function : std::binary_function<A1, A2, R>
-{
-    const F* f;
-public:
-    /// Constructor captures a pointer to the wrapped function.
-    typed_indirect_binary_function(const F* f) : f(f) {}
-
-    /// Return the comparator pointer, or `NULL` if the comparator is stateless.
-    const F* pointer() const { return f; }
-
-    /// Apply the pointed-to functor to the arguments.
-    R operator()(const A1& a1, const A2& a2) const { return (*f)(a1, a2); }
-};
-
-
-/// @copydoc typed_indirect_binary_function
-/// Specialization for an empty functor class. (This is only possible if @a F
-/// itself is an empty class. If @a F is a function or pointer-to-function
-/// type, then the functor will contain a pointer.)
-template <typename F, typename A1, typename A2, typename R, typename Functor>
-class typed_indirect_binary_function<F, A1, A2, R, Functor, true> :
-    std::binary_function<A1, A2, R>
-{
-public:
-    /// Return `NULL` for the comparator pointer of a stateless comparator.
-    const F* pointer() const { return 0; }
-
-    /// Constructor discards the pointer to a stateless functor class.
-    typed_indirect_binary_function(const F* f) {}
-
-    /// Create an instance of the stateless functor class and apply it to the arguments.
-    R operator()(const A1& a1, const A2& a2) const { return F()(a1, a2); }
-};
-
-
-/** Indirect binary function class with inferred types.
- *
- *  This is identical to @ref cilk::internal::typed_indirect_binary_function,
- *  except that it derives the binary function argument and result types from
- *  the parameter type @a F instead of taking them as additional template
- *  parameters. If @a F is a class type, then it must be an `Adaptable Binary
- *  Function`.
- *
- *  @see typed_indirect_binary_function
- *
- *  @ingroup common
- */
-template <typename F, typename Functor = typename binary_functor<F>::type>
-class indirect_binary_function :
-    typed_indirect_binary_function< F
-                                  , typename Functor::first_argument_type
-                                  , typename Functor::second_argument_type
-                                  , typename Functor::result_type
-                                  >
-{
-    typedef     typed_indirect_binary_function< F
-                                  , typename Functor::first_argument_type
-                                  , typename Functor::second_argument_type
-                                  , typename Functor::result_type
-                                  >
-                base;
-public:
-    indirect_binary_function(const F* f) : base(f) {} ///< Constructor
-};
-
-
-/** Choose a type based on a boolean constant.
- *
- *  This metafunction is identical to C++11's condition metafunction.
- *  It needs to be here until we can reasonably assume that users will be
- *  compiling with C++11.
- *
- *  @tparam Cond    A boolean constant.
- *  @tparam IfTrue  A type.
- *  @tparam IfFalse A type.
- *  @result         The `type` member will be a typedef of @a IfTrue if @a Cond
- *                  is true, and a typedef of @a IfFalse if @a Cond is false.
- *
- *  @ingroup common
- */
-template <bool Cond, typename IfTrue, typename IfFalse>
-struct condition
-{
-    typedef IfTrue type;    ///< The type selected by the condition.
-};
-
-/// @copydoc condition
-/// Specialization for @a Cond == `false`.
-template <typename IfTrue, typename IfFalse>
-struct condition<false, IfTrue, IfFalse>
-{
-    typedef IfFalse type;   ///< The type selected by the condition.
-};
-
-
-/** @def __CILKRTS_STATIC_ASSERT
- *
- *  @brief Compile-time assertion.
- *
- *  Causes a compilation error if a compile-time constant expression is false.
- *
- *  @par    Usage example.
- *          This assertion  is used in reducer_min_max.h to avoid defining
- *          legacy reducer classes that would not be binary-compatible with the
- *          same classes compiled with earlier versions of the reducer library.
- *
- *              __CILKRTS_STATIC_ASSERT(
- *                  internal::class_is_empty< internal::binary_functor<Compare> >::value,
- *                  "cilk::reducer_max<Value, Compare> only works with an empty Compare class");
- *
- *  @note   In a C++11 compiler, this is just the language predefined
- *          `static_assert` macro.
- *
- *  @note   In a non-C++11 compiler, the @a Msg string is not directly included
- *          in the compiler error message, but it may appear if the compiler
- *          prints the source line that the error occurred on.
- *
- *  @param  Cond    The expression to test.
- *  @param  Msg     A string explaining the failure.
- *
- *  @ingroup common
- */
-#if defined(__INTEL_CXX11_MODE__) || defined(__GXX_EXPERIMENTAL_CXX0X__)
-#   define  __CILKRTS_STATIC_ASSERT(Cond, Msg) static_assert(Cond, Msg)
-#else
-#   define __CILKRTS_STATIC_ASSERT(Cond, Msg)                               \
-        typedef int __CILKRTS_STATIC_ASSERT_DUMMY_TYPE                      \
-            [::cilk::internal::static_assert_failure<(Cond)>::Success]
-
-/// @cond internal
-    template <bool> struct static_assert_failure { };
-    template <> struct static_assert_failure<true> { enum { Success = 1 }; };
-
-#   define  __CILKRTS_STATIC_ASSERT_DUMMY_TYPE \
-            __CILKRTS_STATIC_ASSERT_DUMMY_TYPE1(__cilkrts_static_assert_, __LINE__)
-#   define  __CILKRTS_STATIC_ASSERT_DUMMY_TYPE1(a, b) \
-            __CILKRTS_STATIC_ASSERT_DUMMY_TYPE2(a, b)
-#   define  __CILKRTS_STATIC_ASSERT_DUMMY_TYPE2(a, b) a ## b
-/// @endcond
-
-#endif
-
-/// @cond internal
-
-/** @name Aligned heap management.
- */
-//@{
-
-/** Implementation-specific aligned memory allocation function.
- *
- *  @param  size        The minimum number of bytes to allocate.
- *  @param  alignment   The required alignment (must be a power of 2).
- *  @return             The address of a block of memory of at least @a size
- *                      bytes. The address will be a multiple of @a alignment.
- *                      `NULL` if the allocation fails.
- *
- *  @see                deallocate_aligned()
- */
-inline void* allocate_aligned(std::size_t size, std::size_t alignment)
-{
-#ifdef _WIN32
-    return _aligned_malloc(size, alignment);
-#else
-#if defined(__ANDROID__) || defined(__VXWORKS__)
-    return memalign(std::max(alignment, sizeof(void*)), size);
-#else
-    void* ptr;
-    return (posix_memalign(&ptr, std::max(alignment, sizeof(void*)), size) == 0) ? ptr : 0;
-#endif
-#endif
-}
-
-/** Implementation-specific aligned memory deallocation function.
- *
- *  @param  ptr A pointer which was returned by a call to alloc_aligned().
- */
-inline void deallocate_aligned(void* ptr)
-{
-#ifdef _WIN32
-    _aligned_free(ptr);
-#else
-    std::free(ptr);
-#endif
-}
-
-/** Class to allocate and guard an aligned pointer.
- *
- *  A new_aligned_pointer object allocates aligned heap-allocated memory when
- *  it is created, and automatically deallocates it when it is destroyed
- *  unless its `ok()` function is called.
- *
- *  @tparam T   The type of the object to allocate on the heap. The allocated
- *              will have the size and alignment of an object of type T.
- */
-template <typename T>
-class new_aligned_pointer {
-    void* m_ptr;
-public:
-    /// Constructor allocates the pointer.
-    new_aligned_pointer() :
-        m_ptr(allocate_aligned(sizeof(T), __alignof(T))) {}
-    /// Destructor deallocates the pointer.
-    ~new_aligned_pointer() { if (m_ptr) deallocate_aligned(m_ptr); }
-    /// Get the pointer.
-    operator void*() { return m_ptr; }
-    /// Return the pointer and release the guard.
-    T* ok() {
-        T* ptr = static_cast<T*>(m_ptr);
-        m_ptr = 0;
-        return ptr;
-    }
-};
-
-//@}
-
-/// @endcond
-
-} // namespace internal
-
-//@{
-
-/** Allocate an aligned data structure on the heap.
- *
- *  `cilk::aligned_new<T>([args])` is equivalent to `new T([args])`, except
- *  that it guarantees that the returned pointer will be at least as aligned
- *  as the alignment requirements of type `T`.
- *
- *  @ingroup common
- */
-template <typename T>
-T* aligned_new()
-{
-    internal::new_aligned_pointer<T> ptr;
-    new (ptr) T();
-    return ptr.ok();
-}
-
-template <typename T, typename T1>
-T* aligned_new(const T1& x1)
-{
-    internal::new_aligned_pointer<T> ptr;
-    new (ptr) T(x1);
-    return ptr.ok();
-}
-
-template <typename T, typename T1, typename T2>
-T* aligned_new(const T1& x1, const T2& x2)
-{
-    internal::new_aligned_pointer<T> ptr;
-    new (ptr) T(x1, x2);
-    return ptr.ok();
-}
-
-template <typename T, typename T1, typename T2, typename T3>
-T* aligned_new(const T1& x1, const T2& x2, const T3& x3)
-{
-    internal::new_aligned_pointer<T> ptr;
-    new (ptr) T(x1, x2, x3);
-    return ptr.ok();
-}
-
-template <typename T, typename T1, typename T2, typename T3, typename T4>
-T* aligned_new(const T1& x1, const T2& x2, const T3& x3, const T4& x4)
-{
-    internal::new_aligned_pointer<T> ptr;
-    new (ptr) T(x1, x2, x3, x4);
-    return ptr.ok();
-}
-
-template <typename T, typename T1, typename T2, typename T3, typename T4, typename T5>
-T* aligned_new(const T1& x1, const T2& x2, const T3& x3, const T4& x4, const T5& x5)
-{
-    internal::new_aligned_pointer<T> ptr;
-    new (ptr) T(x1, x2, x3, x4, x5);
-    return ptr.ok();
-}
-
-//@}
-
-
-/** Deallocate an aligned data structure on the heap.
- *
- *  `cilk::aligned_delete(ptr)` is equivalent to `delete ptr`, except that it
- *  operates on a pointer that was allocated by aligned_new().
- *
- *  @ingroup common
- */
-template <typename T>
-void aligned_delete(const T* ptr)
-{
-    ptr->~T();
-    internal::deallocate_aligned((void*)ptr);
-}
-
-} // namespace cilk
-
-#endif // __cplusplus
-
-#endif // METAPROGRAMMING_H_INCLUDED
diff --git a/include/cilk/opadd_reducer.h b/include/cilk/opadd_reducer.h
new file mode 100644
index 00000000..84226f83
--- /dev/null
+++ b/include/cilk/opadd_reducer.h
@@ -0,0 +1,18 @@
+#ifndef _OPADD_REDUCER_H
+#define _OPADD_REDUCER_H
+
+namespace cilk {
+
+template <typename T> static void zero(void *v) {
+    *static_cast<T *>(v) = static_cast<T>(0);
+}
+
+template <typename T> static void plus(void *l, void *r) {
+    *static_cast<T *>(l) += *static_cast<T *>(r);
+}
+
+template <typename T> using opadd_reducer = T _Hyperobject(zero<T>, plus<T>);
+
+} // namespace cilk
+
+#endif // _OPADD_REDUCER_H
diff --git a/include/cilk/ostream_reducer.h b/include/cilk/ostream_reducer.h
new file mode 100644
index 00000000..fe410068
--- /dev/null
+++ b/include/cilk/ostream_reducer.h
@@ -0,0 +1,65 @@
+#include <ostream>
+#include <sstream>
+
+/* Adapted from Intel Cilk Plus */
+
+namespace cilk {
+
+template<typename Char, typename Traits>
+class ostream_view : public std::basic_ostream<Char, Traits>
+{
+    typedef std::basic_ostream<Char, Traits>  base;
+    typedef std::basic_ostream<Char, Traits>  ostream_type;
+
+    // A non-leftmost view is associated with a private string buffer. (The
+    // leftmost view is associated with the buffer of the reducer's associated
+    // ostream, so its private buffer is unused.)
+    //
+    std::basic_stringbuf<Char, Traits> m_buffer;
+
+public:
+    void reduce(ostream_view* other)
+    {
+        // Writing an empty buffer results in failure. Testing `sgetc()` is the
+        // easiest way of checking for an empty buffer.
+        if (other->m_buffer.sgetc() != Traits::eof()) {
+            *this << (&other->m_buffer);
+        }
+    }
+
+    static void reduce(void *left_v, void *right_v) {
+      ostream_view<Char, Traits> *left =
+        static_cast<ostream_view<Char, Traits> *>(left_v);
+      ostream_view<Char, Traits> *right =
+        static_cast<ostream_view<Char, Traits> *>(right_v);
+      left->reduce(right);
+      right->~ostream_view();
+    }
+
+    static void identity(void *view) {
+      new (view) ostream_view<Char, Traits>();
+    }
+
+    /** Non-leftmost (identity) view constructor. The view is associated with
+     *  its internal buffer. Required by @ref monoid_base.
+     */
+    ostream_view() : base(&m_buffer) {}
+
+    /** Leftmost view constructor. The view is associated with an existing
+     *  ostream.
+     */
+    ostream_view(const ostream_type& os) : base(0)
+    {
+        base::rdbuf(os.rdbuf());       // Copy stream buffer
+        base::flags(os.flags());       // Copy formatting flags
+        base::setstate(os.rdstate());  // Copy error state
+    }
+
+};
+
+template<typename Char, typename Traits = std::char_traits<Char>>
+  using ostream_reducer = ostream_view<Char, Traits>
+    _Hyperobject(&ostream_view<char, std::char_traits<char>>::identity,
+                 &ostream_view<char, std::char_traits<char>>::reduce);
+
+}
diff --git a/include/cilk/reducer.h b/include/cilk/reducer.h
deleted file mode 100644
index 8f984b80..00000000
--- a/include/cilk/reducer.h
+++ /dev/null
@@ -1,1866 +0,0 @@
-/*  reducer.h                  -*- C++ -*-
- *
- *  Copyright (C) 2009-2018, Intel Corporation
- *  All rights reserved.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- *  OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- *  AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
- *  WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *  POSSIBILITY OF SUCH DAMAGE.
- *
- *  *********************************************************************
- *
- *  PLEASE NOTE: This file is a downstream copy of a file maintained in
- *  a repository at cilkplus.org. Changes made to this file that are not
- *  submitted through the contribution process detailed at
- *  http://www.cilkplus.org/submit-cilk-contribution will be lost the next
- *  time that a new version is released. Changes only submitted to the
- *  GNU compiler collection or posted to the git repository at
- *  https://bitbucket.org/intelcilkruntime/intel-cilk-runtime are
- *  not tracked.
- *
- *  We welcome your contributions to this open source project. Thank you
- *  for your assistance in helping us improve Cilk Plus.
- */
-
-/** @file reducer.h
- *
- *  @brief Defines foundation classes for creating Intel(R) Cilk(TM) Plus
- * reducers.
- *
- *  @ingroup Reducers
- *
- *  @see @ref pagereducers
- *
- *  @defgroup Reducers Reducers
- */
-
-#ifndef REDUCER_H_INCLUDED
-#define REDUCER_H_INCLUDED
-
-#include "cilk/hyperobject_base.h"
-#include "cilk/metaprogramming.h"
-#include <assert.h>
-
-#define __CILKRTS_STRAND_STALE(fn) fn
-
-#ifdef __cplusplus
-
-//===================== C++ interfaces ===================================
-
-#include <new>
-
-namespace cilk {
-
-/** Class for provisionally constructed objects.
- *
- *  The monoid_base<T,V>::construct() functions manually construct both a
- *  monoid and a view. If one of these is constructed successfully, and the
- *  construction of the other (or some other initialization) fails, then the
- *  first one must be destroyed to avoid a memory leak. Because the
- *  construction is explicit, the destruction must be explicit, too.
- *
- *  A provisional_guard object wraps a pointer to a newly constructed
- *  object. A call to its confirm() function confirms that the object is
- *  really going to be used. If the guard is destroyed without being
- *  confirmed, then the pointed-to object is destroyed (but not
- *  deallocated).
- *
- *  Expected usage:
- *
- *      provisional_guard<T1> x1_provisional( new (x1) T1 );
- *      … more initialization …
- *      x1_provisional.confirm();
- *
- *  or
- *
- *      provisional_guard<T1> x1_provisional( new (x1) T1 );
- *      x1_provisional.confirm_if( new (x2) T2 );
- *
- *  If an exception is thrown in the "more initialization" code in the
- *  first example, or in the `T2` constructor in the second example, then
- *  `x1_provisional` will not be confirmed, so when its destructor is
- *  called during exception unwinding, the `T1` object that was constructed
- *  in `x1` will be destroyed.
- *
- *  **NOTE**: Do *not* be tempted to chain a `provisional_guard`
- *  constructor with `confirm_if` as in this example:
- *
- *      // BAD IDEA
- *      provisional_guard<T1>( new (x1) T1 ).confirm_if( new (x2) T2 );
- *
- *  The code above is problematic because the evaluation of the T2
- *  constructor is unsequenced with respect to the call to the
- *  `provisional_guard` constructor (and with respect the T1 constructor).
- *  Thus, the compiler may choose to evaluate `new (x2) T2` before
- *  constructing the guard and leak the T1 object if the `T2` constructor
- *  throws.
- *
- *  @tparam Type    The type of the provisionally constructed object.
- */
-template <typename Type> class provisional_guard {
-    Type *m_ptr;
-
-  public:
-    /** Constructor. Creates a guard for a provisionally constructed object.
-     *
-     *  @param ptr  A pointer to the provisionally constructed object.
-     */
-    provisional_guard(Type *ptr) : m_ptr(ptr) {}
-
-    /** Destructor. Destroy the object pointed to by the contained pointer
-     *  if it has not been confirmed.
-     */
-    ~provisional_guard() {
-        if (m_ptr)
-            m_ptr->~Type();
-    }
-
-    /** Confirm the provisional construction. Do *not* delete the contained
-     *  pointer when the guard is destroyed.
-     */
-    void confirm() { m_ptr = 0; }
-
-    /** Confirm provisional construction if argument is non-null. Note that
-     *  if an exception is thrown during evaluation of the argument
-     *  expression, then this function will not be called, and the
-     *  provisional object will not be confirmed. This allows the usage:
-     *
-     *      x1_provisional.confirm_if( new (x2) T2() );
-     *
-     *  @param cond An arbitrary pointer. The provisional object will be
-     *              confirmed if @a cond is not null.
-     *
-     *  @returns    The value of the @a cond argument.
-     */
-    template <typename Cond> Cond *confirm_if(Cond *cond) {
-        if (cond)
-            m_ptr = 0;
-        return cond;
-    }
-};
-
-/** Base class for defining monoids.
- *
- *  The monoid_base class template is useful for creating classes that model
- *  the monoid concept. It provides the core type and memory management
- *  functionality.  A subclass of monoid_base need only declare and implement
- *  the `identity` and `reduce` functions.
- *
- *  The monoid_base class also manages the integration between the monoid, the
- *  reducer class that is based on it, and an optional view class which wraps
- *  value objects and restricts access to their operations.
- *
- *  @tparam Value   The value type for the monoid.
- *  @tparam View    An optional view class that serves as a proxy for the value
- *                  type.
- *
- *  @see monoid_with_view
- */
-template <typename Value, typename View = Value> class monoid_base {
-
-  public:
-    /** Value type of the monoid.
-     */
-    typedef Value value_type;
-
-    /** View type of the monoid. Defaults to be the same as the value type.
-     *  @see monoid_with_view
-     */
-    typedef View view_type;
-
-    enum {
-        /** Should reducers created with this monoid be aligned?
-         *
-         *  @details
-         *  "Aligned" means that the view is allocated at a cache-line aligned
-         *  offset in the reducer, and the reducer must be cache-line aligned.
-         *  "Unaligned" means that the reducer as a whole is just naturally
-         *  aligned, but it contains a large enough block of uninitialized
-         *  storage for a cache-line aligned view to be allocated in it at
-         *  reducer construction time.
-         *
-         *  Since the standard heap allocator (new reducer) does not allocate
-         *  cache-line aligned storage, only unaligned reducers can be safely
-         *  allocated on the heap.
-         *
-         *  Default is false (unaligned) unless overridden in a subclass.
-         *
-         *  @since 1.02
-         *  (In Intel Cilk Plus library versions 1.0 and 1.01, the default was
-         * true. In Intel Cilk Plus library versions prior to 1.0, reducers were
-         * always aligned, and this data member did not exist.)
-         */
-        align_reducer = false
-    };
-
-    /** Destroys a view. Destroys (without deallocating) the @a View object
-     *  pointed to by @a p.
-     *
-     *  @param p    The address of the @a View object to be destroyed.
-     */
-    void destroy(view_type *p) const { p->~view_type(); }
-
-    /** Allocates raw memory. Allocate @a s bytes of memory with no
-     *  initialization.
-     *
-     *  @param s    The number of bytes of memory to allocate.
-     *  @return     An untyped pointer to the allocated memory.
-     */
-    void *allocate(size_t s) const { return operator new(s); }
-
-    /** Deallocates raw memory pointed to by @a p
-     *  without doing any destruction.
-     *
-     *  @param p    Pointer to the memory to be deallocated.
-     *
-     *  @pre        @a p points to a block of memory that was allocated by a
-     *              call to allocate().
-     */
-    void deallocate(void *p) const { operator delete(p); }
-
-    /** Creates the identity value. Constructs (without allocating) a @a View
-     *  object representing the default value of the @a Value type.
-     *
-     *  @param p    A pointer to a block of raw memory large enough to hold a
-     *              @a View object.
-     *
-     *  @post       The memory pointed to by @a p contains a @a View object that
-     *              represents the default value of the @a View type.
-     *
-     *  @deprecated This function constructs the @a View object with its default
-     *              constructor, which will often, but not always, yield the
-     *              appropriate identity value. Monoid classes should declare
-     *              their identity function explicitly, rather than relying on
-     *              this default definition.
-     */
-    void identity(View *p) const { new ((void *)p) View(); }
-
-    /** @name Constructs the monoid and the view with arbitrary arguments.
-     *
-     *  A @ref reducer object contains monoid and view data members, which are
-     *  declared as raw storage (byte arrays), so that they are not implicitly
-     *  constructed when the reducer is constructed. Instead, a reducer
-     *  constructor calls one of the monoid class's static construct()
-     *  functions with the addresses of the monoid and the view, and the
-     *  construct() function uses placement `new` to construct them.
-     *  This allows the monoid to determine the order in which the monoid and
-     *  view are constructed, and to make one of them dependent on the other.
-     *
-     *  Any arguments to the reducer constructor are just passed on as
-     *  additional arguments to the construct() function (after the monoid
-     *  and view addresses are set).
-     *
-     *  A monoid whose needs are satisfied by the suite of construct()
-     *  functions below, such as @ref monoid_with_view, can just inherit them
-     *  from monoid_base. Other monoids will need to provide their own versions
-     *  to override the monoid_base functions.
-     */
-    //@{
-
-    /** Default-constructs the monoid, identity-constructs the view.
-     *
-     *  @param monoid   Address of uninitialized monoid object.
-     *  @param view     Address of uninitialized initial view object.
-     */
-    //@{
-    template <typename Monoid>
-    static void construct(Monoid *monoid, View *view) {
-        provisional_guard<Monoid> guard(new ((void *)monoid) Monoid());
-        monoid->identity(view);
-        guard.confirm();
-    }
-    //@}
-
-    /** Default-constructs the monoid, and passes one to five const reference
-     *  arguments to the view constructor.
-     */
-    //@{
-
-    template <typename Monoid, typename T1>
-    static void construct(Monoid *monoid, View *view, const T1 &x1) {
-        provisional_guard<Monoid> guard(new ((void *)monoid) Monoid());
-        guard.confirm_if(new ((void *)view) View(x1));
-    }
-
-    template <typename Monoid, typename T1, typename T2>
-    static void construct(Monoid *monoid, View *view, const T1 &x1,
-                          const T2 &x2) {
-        provisional_guard<Monoid> guard(new ((void *)monoid) Monoid());
-        guard.confirm_if(new ((void *)view) View(x1, x2));
-    }
-
-    template <typename Monoid, typename T1, typename T2, typename T3>
-    static void construct(Monoid *monoid, View *view, const T1 &x1,
-                          const T2 &x2, const T3 &x3) {
-        provisional_guard<Monoid> guard(new ((void *)monoid) Monoid());
-        guard.confirm_if(new ((void *)view) View(x1, x2, x3));
-    }
-
-    template <typename Monoid, typename T1, typename T2, typename T3,
-              typename T4>
-    static void construct(Monoid *monoid, View *view, const T1 &x1,
-                          const T2 &x2, const T3 &x3, const T4 &x4) {
-        provisional_guard<Monoid> guard(new ((void *)monoid) Monoid());
-        guard.confirm_if(new ((void *)view) View(x1, x2, x3, x4));
-    }
-
-    template <typename Monoid, typename T1, typename T2, typename T3,
-              typename T4, typename T5>
-    static void construct(Monoid *monoid, View *view, const T1 &x1,
-                          const T2 &x2, const T3 &x3, const T4 &x4,
-                          const T5 &x5) {
-        provisional_guard<Monoid> guard(new ((void *)monoid) Monoid());
-        guard.confirm_if(new ((void *)view) View(x1, x2, x3, x4, x5));
-    }
-
-    //@}
-
-    /** Default-constructs the monoid, and passes one non-const reference
-     *  argument to the view constructor.
-     */
-    //@{
-    template <typename Monoid, typename T1>
-    static void construct(Monoid *monoid, View *view, T1 &x1) {
-        provisional_guard<Monoid> guard(new ((void *)monoid) Monoid());
-        guard.confirm_if(new ((void *)view) View(x1));
-    }
-    //@}
-
-    /** Copy-constructs the monoid, and identity-constructs the view
-     *  constructor.
-     *
-     *  @param monoid   Address of uninitialized monoid object.
-     *  @param view     Address of uninitialized initial view object.
-     *  @param m        Object to be copied into `*monoid`
-     */
-    //@{
-    template <typename Monoid>
-    static void construct(Monoid *monoid, View *view, const Monoid &m) {
-        provisional_guard<Monoid> guard(new ((void *)monoid) Monoid(m));
-        monoid->identity(view);
-        guard.confirm();
-    }
-    //@}
-
-    /** Copy-constructs the monoid, and passes one to four const reference
-     *  arguments to the view constructor.
-     */
-    //@{
-
-    template <typename Monoid, typename T1>
-    static void construct(Monoid *monoid, View *view, const Monoid &m,
-                          const T1 &x1) {
-        provisional_guard<Monoid> guard(new ((void *)monoid) Monoid(m));
-        guard.confirm_if(new ((void *)view) View(x1));
-    }
-
-    template <typename Monoid, typename T1, typename T2>
-    static void construct(Monoid *monoid, View *view, const Monoid &m,
-                          const T1 &x1, const T2 &x2) {
-        provisional_guard<Monoid> guard(new ((void *)monoid) Monoid(m));
-        guard.confirm_if(new ((void *)view) View(x1, x2));
-    }
-
-    template <typename Monoid, typename T1, typename T2, typename T3>
-    static void construct(Monoid *monoid, View *view, const Monoid &m,
-                          const T1 &x1, const T2 &x2, const T3 &x3) {
-        provisional_guard<Monoid> guard(new ((void *)monoid) Monoid(m));
-        guard.confirm_if(new ((void *)view) View(x1, x2, x3));
-    }
-
-    template <typename Monoid, typename T1, typename T2, typename T3,
-              typename T4>
-    static void construct(Monoid *monoid, View *view, const Monoid &m,
-                          const T1 &x1, const T2 &x2, const T3 &x3,
-                          const T4 &x4) {
-        provisional_guard<Monoid> guard(new ((void *)monoid) Monoid(m));
-        guard.confirm_if(new ((void *)view) View(x1, x2, x3, x4));
-    }
-
-    //@}
-
-    //@}
-};
-
-/** Monoid class that gets its value type and identity and reduce operations
- *  from its view.
- *
- *  A simple implementation of the monoid-view-reducer architecture would
- *  distribute knowledge about the type and operations for the reduction
- *  between the monoid and the view - the identity and reduction operations are
- *  specified in the monoid, the reduction operations are implemented in the
- *  view, and the value type is specified in both the monoid and the view.
- *  This is inelegant.
- *
- *  monoid_with_view is a subclass of @ref monoid_base that gets its value type
- *  and its identity and reduction operations from its view class. No
- *  customization of the monoid_with_view class itself is needed beyond
- *  instantiating it with an appropriate view class. (Customized subclasses of
- *  monoid_with_view may be needed for other reasons, such as to keep some
- *   state for the reducer.) All of the Intel Cilk Plus predefined reducers use
- *  monoid_with_view or one of its subclasses.
- *
- *  The view class `View` of a monoid_with_view must provide the following
- *  public definitions:
- *
- *  Definition                       | Meaning
- *  ---------------------------------|--------
- *  `value_type`                     | a typedef of the value type for the
- *                                   | reduction
- *  `View()`                         | a default constructor which constructs
- *                                   | the identity value for the reduction
- *  `void reduce(const View* other)` | a member function which applies the
- *                                   | reduction operation to the values of
- *                                   | `this` view and the `other` view,
- *                                   | leaving the result as the value of
- *                                   | `this` view, and leaving the value of
- *                                   | the `other` view undefined (but valid)
- *
- *  @tparam View    The view class for the monoid.
- *  @tparam Align   If true, reducers instantiated on this monoid will be
- *                  cache-aligned. By default, library reducers (unlike legacy
- *                  library reducer _wrappers_) are aligned only as required by
- *                  contents.
- */
-template <class View, bool Align = false>
-class monoid_with_view : public monoid_base<typename View::value_type, View> {
-  public:
-    /** Should reducers created with this monoid be aligned?
-     */
-    enum { align_reducer = Align };
-
-    /** Create the identity value.
-     *
-     *  Implements the monoid `identity` operation by using the @a View class's
-     *  default constructor.
-     *
-     *  @param  p   A pointer to a block of raw memory large enough to hold a
-     *              @p View object.
-     */
-    void identity(View *p) const { new ((void *)p) View(); }
-
-    /** Reduce the values of two views.
-     *
-     *  Implements the monoid `reduce` operation by calling the left view's
-     *  `%reduce()` function with the right view as an operand.
-     *
-     *  @param  left    The left operand of the reduce operation.
-     *  @param  right   The right operand of the reduce operation.
-     *  @post           The left view contains the result of the reduce
-     *                  operation, and the right view is undefined.
-     */
-    void reduce(View *left, View *right) const { left->reduce(right); }
-};
-
-/** Base class for simple views with (usually) scalar values.
- *
- *  The scalar_view class is intended as a base class which provides about half
- *  of the required definitions for simple views. It defines the `value_type`
- *  required by a @ref monoid_with_view (but not the identity constructor and
- *  reduce operation, which are inherently specific to a particular kind of
- *  reduction). It also defines the value access functions which will be called
- *  by the corresponding @ref reducer functions. (It uses copy semantics for
- *  the view_move_in() and view_move_out() functions, which is appropriate
- *  for simple scalar types, but not necessarily for more complex types like
- *  STL containers.
- *
- *  @tparam Type    The type of value wrapped by the view.
- */
-template <typename Type> class scalar_view {
-  protected:
-    Type m_value; ///< The wrapped accumulator variable.
-
-  public:
-    /** Value type definition required by @ref monoid_with_view.
-     */
-    typedef Type value_type;
-
-    /** Default constructor.
-     */
-    scalar_view() : m_value() {}
-
-    /** Value constructor.
-     */
-    scalar_view(const Type &v) : m_value(v) {}
-
-    /** @name Value functions required by the reducer class.
-     *
-     *  Note that the move in/out functions use simple assignment semantics.
-     */
-    //@{
-
-    /** Set the value of the view.
-     */
-    void view_move_in(Type &v) { m_value = v; }
-
-    /** Get the value of the view.
-     */
-    void view_move_out(Type &v) { v = m_value; }
-
-    /** Set the value of the view.
-     */
-    void view_set_value(const Type &v) { m_value = v; }
-
-    /** Get the value of the view.
-     */
-    Type const &view_get_value() const { return m_value; }
-
-    /** Type returned by view_get_value.
-     */
-    typedef Type const &return_type_for_get_value;
-
-    /** Get a reference to the value contained in the view. For legacy
-     *  reducer support only.
-     */
-    Type &view_get_reference() { return m_value; }
-
-    /** Get a reference to the value contained in the view. For legacy
-     *  reducer support only.
-     */
-    Type const &view_get_reference() const { return m_value; }
-    //@}
-};
-
-/** Wrapper class for move-in construction.
- *
- *  Some types allow their values to be _moved_ as an alternative to copying.
- *  Moving a value may be much faster than copying it, but may leave the value
- *  of the move's source undefined. Consider the `swap` operation provided by
- *  many STL container classes:
- *
- *      list<T> x, y;
- *      x = y;      // Copy
- *      x.swap(y);  // Move
- *
- *  The assignment _copies_ the value of `y` into `x` in time linear in the
- *  size of `y`, leaving `y` unchanged. The `swap` _moves_ the  value of `y`
- *  into `x` in constant time, but it also moves the value of `x` into `y`,
- *  potentially leaving `y` undefined.
- *
- *  A move_in_wrapper simply wraps a pointer to an object. It is created by a
- *  call to cilk::move_in(). Passing a move_in_wrapper to a view constructor
- *  (actually, passing it to a reducer constructor, which passes it to the
- *  monoid `construct()` function, which passes it to the view constructor)
- *  allows, but does not require, the value pointed to by the wrapper to be
- *  moved into the view instead of copied.
- *
- *  A view class exercises this option by defining a _move-in constructor_,
- *  i.e., a constructor with a move_in_wrapper parameter. The constructor calls
- *  the wrapper's `value()` function to get a reference to its pointed-to
- *  value, and can then use that reference in a move operation.
- *
- *  A move_in_wrapper also has an implicit conversion to its pointed-to value,
- *  so if a view class does not define a move-in constructor, its ordinary
- *  value constructor will be called with the wrapped value. For example, an
- *  @ref ReducersAdd "op_add" view does not have a move-in constructor, so
- *
- *      int x;
- *      reducer< op_add<int> > xr(move_in(x));
- *
- *  will simply call the `op_add_view(const int &)` constructor. But an
- *  @ref ReducersList "op_list_append" view does have a move-in  constructor,
- *  so
- *
- *      list<int> x;
- *      reducer< op_list_append<int> > xr(move_in(x));
- *
- *  will call the `op_list_append_view(move_in_wrapper<int>)` constructor,
- *  which can `swap` the value of `x` into the view.
- *
- *  @note   Remember that passing the value of a variable to a reducer
- *          constructor using a move_in_wrapper leaves the variable undefined.
- *          You cannot assume that the constructor either will or will not copy
- *          or move the value.
- *
- *  @tparam Type    The type of the wrapped value.
- *
- *  @see cilk::move_in()
- */
-template <typename Type> class move_in_wrapper {
-    Type *m_pointer;
-
-  public:
-    /** Constructor that captures the address of its argument. This is almost
-     *  always called from the @ref move_in function.
-     */
-    explicit move_in_wrapper(Type &ref) : m_pointer(&ref) {}
-
-    /** Implicit conversion to the wrapped value. This allows a move_in_wrapper
-     *  to be used where a value of the wrapped type is expected, in which case
-     *  the wrapper is completely transparent.
-     */
-    operator Type &() const { return *m_pointer; }
-
-    /** Get a reference to the pointed-to value. This has the same effect as
-     *  the implicit conversion, but makes the intent clearer in a move-in
-     *  constructor.
-     */
-    Type &value() const { return *m_pointer; }
-};
-
-/** Function to create a move_in_wrapper for a value.
- *
- *  @tparam Type    The type of the argument, which will be the `type` of the
- *                  created wrapper.
- *
- *  @see move_in_wrapper
- */
-template <typename Type> inline move_in_wrapper<Type> move_in(Type &ref) {
-    return move_in_wrapper<Type>(ref);
-}
-
-/** @copydoc move_in(Type&)
- *
- *  @note   Applying a function that is explicitly specified as modifying its
- *          argument to a const argument is obviously an irrational thing to
- *          do. This move_in() variant is just provided to allow calling a
- *          move-in constructor with a function return value, which the
- *          language treats as a const. Using it for any other purpose will
- *          probably end in tears.
- */
-template <typename Type> inline move_in_wrapper<Type> move_in(const Type &ref) {
-    return move_in_wrapper<Type>(ref);
-}
-
-/** Wrapper class to allow implicit downcasts to reducer subclasses.
- *
- *  The Intel Cilk Plus library contains a collection of reducer wrapper classes
- * which were created before the `cilk::reducer<Monoid>` style was developed.
- * For example, `cilk::reducer_opadd<Type>` provided essentially the same
- *  functionality that is now provided by
- *  `cilk::reducer< cilk::op_add<Type> >`. These legacy reducer classes are
- *  deprecated, but still supported, and they have been reimplemented as
- *  subclasses of the corresponding `cilk::reducer` classes. For example:
- *
- *      template <class T>
- *      reducer_opadd<T> : public reducer< op_add<T> > { ... };
- *
- *  This reimplementation allows transparent conversion between legacy and
- *  new reducers. That is, a `reducer<op_add>*` or `reducer<op_add>&` can be
- *  used anywhere that a `reducer_opadd*` or `reducer_opadd&` is expected,
- *  and vice versa.
- *
- *  The conversion from the legacy reducer to the new reducer is just an
- *  up-cast, which is provided for free by C++. The conversion from the new
- *  reducer to the legacy reducer is a down-cast, though, which requires an
- *  explicit conversion member function in the `reducer` class. The challenge
- *  is to define a function in the reducer template class which will convert
- *  each cilk::reducer specialization to the corresponding legacy reducer,
- *  if there is one.
- *
- *  The trick is in the legacy_reducer_downcast template class, which provides
- *  a mapping from  `cilk::reducer` specializations to legacy reducer classes.
- *  `reducer<Monoid>` has a conversion function to convert itself to
- *  `legacy_reducer_downcast< reducer<Monoid> >::%type`. By default,
- *  `legacy_reducer_downcast<Reducer>::%type` is just a trivial subclass of
- *  `Reducer`, which is uninteresting, but a reducer with a legacy counterpart
- *  will have a specialization of `legacy_reducer_downcast` whose `type` is
- *  the corresponding legacy reducer. For example:
- *
- *      template <typename Type>
- *      struct legacy_reducer_downcast< reducer< op_add<Type> > >
- *      {
- *          typedef reducer_opadd<Type> type;
- *      };
- *
- *
- *  @tparam Reducer The new-style reducer class whose corresponding legacy
- *                  reducer class is `type`, if there is such a legacy reducer
- *                  class.
- */
-template <typename Reducer> struct legacy_reducer_downcast {
-    /** The related legacy reducer class.
-     *
-     *  By default, this is just a trivial subclass of Reducer, but it can be
-     *  overridden in the specialization of legacy_reducer_downcast for
-     *  a reducer that has a corresponding legacy reducers.
-     */
-    struct type : Reducer {};
-};
-
-namespace internal {
-/// @cond internal
-
-template <typename Value, typename View> struct reducer_set_get {
-    // sizeof(notchar) != sizeof(char)
-    struct notchar {
-        char x[2];
-    };
-
-    // `does_view_define_return_type_for_get_value(View*)` returns `char` if
-    // `View` defines `return_type_for_get_value`, and `notchar` if it doesn't.
-
-    template <typename T> struct using_type {};
-
-    template <typename T>
-    static char does_view_define_return_type_for_get_value(
-        using_type<typename T::return_type_for_get_value> *);
-
-    template <typename T>
-    static notchar does_view_define_return_type_for_get_value(...);
-
-    // `VIEW_DOES_DEFINE_RETURN_TYPE_FOR_GET_VALUE` is true if `View` defines
-    // `return_type_for_get_value`.
-
-    enum {
-        VIEW_DOES_DEFINE_RETURN_TYPE_FOR_GET_VALUE =
-            sizeof(does_view_define_return_type_for_get_value<View>(0)) ==
-            sizeof(char)
-    };
-
-    // `return_type_for_get_value` is `View::return_type_for_get_value`
-    // if it is defined, and just `Value` otherwise.
-
-    template <typename InnerView, bool ViewDoesDefineReturnTypeForGetValue>
-    struct return_type_for_view_get_value {
-        typedef Value type;
-    };
-
-    template <typename InnerView>
-    struct return_type_for_view_get_value<InnerView, true> {
-        typedef typename InnerView::return_type_for_get_value type;
-    };
-
-  public:
-    typedef typename return_type_for_view_get_value<
-        View, VIEW_DOES_DEFINE_RETURN_TYPE_FOR_GET_VALUE>::type
-        return_type_for_get_value;
-
-    static void move_in(View &view, Value &v) { view.view_move_in(v); }
-    static void move_out(View &view, Value &v) { view.view_move_out(v); }
-
-    static void set_value(View &view, const Value &v) {
-        view.view_set_value(v);
-    }
-
-    static return_type_for_get_value get_value(const View &view) {
-        return view.view_get_value();
-    }
-};
-
-template <typename Value> struct reducer_set_get<Value, Value> {
-    typedef const Value &return_type_for_get_value;
-
-    static void move_in(Value &view, Value &v) { view = v; }
-    static void move_out(Value &view, Value &v) { v = view; }
-
-    static void set_value(Value &view, const Value &v) { view = v; }
-
-    static return_type_for_get_value get_value(const Value &view) {
-        return view;
-    }
-};
-
-/// @endcond
-
-/** Base class defining the data layout that is common to all reducers.
- */
-template <typename Monoid> class reducer_base {
-    typedef typename Monoid::view_type view_type;
-
-    // This makes the reducer a hyper-object. (Partially initialized in
-    // the derived reducer_content class.)
-    //
-    __cilkrts_hyperobject_base m_base;
-
-    // The monoid is allocated here as raw bytes, and is constructed explicitly
-    // by a call to the monoid_type::construct() function in the constructor of
-    // the `reducer` subclass.
-    //
-    storage_for_object<Monoid> m_monoid;
-
-    // Used for sanity checking at destruction.
-    //
-    void *m_initialThis;
-
-    // The leftmost view comes next. It is defined in the derived
-    // reducer_content class.
-
-    /** @name C-callable wrappers for the C++-coded monoid dispatch functions.
-     */
-    //@{
-
-    static void reduce_wrapper(void *r, void *lhs, void *rhs);
-    static void identity_wrapper(void *r, void *view);
-    static void destroy_wrapper(void *r, void *view);
-    static void *allocate_wrapper(void *r, size_t bytes);
-    static void deallocate_wrapper(void *r, void *view);
-
-    //@}
-
-  protected:
-    /** Constructor.
-     *
-     *  @param  leftmost    The address of the leftmost view in the reducer.
-     */
-    reducer_base(char* leftmost)
-      : m_base{{
-            (cilk_reduce_fn_t)     &reduce_wrapper,
-            (cilk_identity_fn_t)   &identity_wrapper,
-            (cilk_destroy_fn_t)    &destroy_wrapper,
-            (cilk_allocate_fn_t)   &allocate_wrapper,
-            (cilk_deallocate_fn_t) &deallocate_wrapper
-          },
-          0, /* Cilk Plus flags or OpenCilk ID */
-          (char*)leftmost - (char*)this, /* __view_offset */
-          sizeof(view_type) /* __view_size */
-	},
-        m_initialThis(this)
-    {
-        __cilkrts_hyper_create(&m_base);
-    }
-
-    /** Destructor.
-     */
-    __CILKRTS_STRAND_STALE(~reducer_base()) {
-        // Make sure we haven't been memcopy'd or corrupted
-        assert(this == m_initialThis);
-        __cilkrts_hyper_destroy(&m_base);
-    }
-
-    /** Monoid data member.
-     *
-     *  @return A pointer to the reducer's monoid data member.
-     */
-    Monoid *monoid_ptr() { return &m_monoid.object(); }
-
-    /** Leftmost view data member.
-     *
-     *  @return A pointer to the reducer's leftmost view data member.
-     *
-     *  @note   This function returns the address of the *leftmost* view,
-     *          which is unique for the lifetime of the reducer. It is
-     *          intended to be used in constructors and destructors.
-     *          Use the reducer::view() function to access the per-strand
-     *          view instance.
-     */
-    view_type *leftmost_ptr() {
-        char *view_addr = (char *)this + m_base.__view_offset;
-        return reinterpret_cast<view_type *>(view_addr);
-    }
-
-  public:
-    /** @name Access the current view.
-     *
-     *  These functions return a reference to the instance of the reducer's
-     *  view that was created for the current strand of a parallel computation
-     *  (and create it if it doesn't already exist). Note the difference from
-     *  the (private) leftmost_ptr() function, which returns a pointer to the
-     *  _leftmost_ view, which is the same in all strands.
-     */
-    //@{
-
-    /** Per-strand view instance.
-     *
-     *  @return A reference to the per-strand view instance.
-     */
-    view_type &view() {
-        return *static_cast<view_type *>(__cilkrts_hyper_lookup(&m_base));
-    }
-
-    /** @copydoc view()
-     */
-    const view_type &view() const {
-        return const_cast<reducer_base *>(this)->view();
-    }
-
-    //@}
-
-    /** Initial view pointer field.
-     *
-     *  @internal
-     *
-     *  @return a reference to the m_initialThis field.
-     *
-     *  @note   This function is provided for "white-box" testing of the
-     *          reducer layout code. There is never any reason for user code
-     *          to call it.
-     */
-    const void *const &initial_this() const { return m_initialThis; }
-};
-
-template <typename Monoid>
-void reducer_base<Monoid>::reduce_wrapper(void *r, void *lhs, void *rhs) {
-    Monoid *monoid = static_cast<reducer_base *>(r)->monoid_ptr();
-    monoid->reduce(static_cast<view_type *>(lhs),
-                   static_cast<view_type *>(rhs));
-}
-
-template <typename Monoid>
-void reducer_base<Monoid>::identity_wrapper(void *r, void *view) {
-    Monoid *monoid = static_cast<reducer_base *>(r)->monoid_ptr();
-    monoid->identity(static_cast<view_type *>(view));
-}
-
-template <typename Monoid>
-void reducer_base<Monoid>::destroy_wrapper(void *r, void *view) {
-    Monoid *monoid = static_cast<reducer_base *>(r)->monoid_ptr();
-    monoid->destroy(static_cast<view_type *>(view));
-}
-
-template <typename Monoid>
-void *reducer_base<Monoid>::allocate_wrapper(void *r, size_t bytes) {
-    Monoid *monoid = static_cast<reducer_base *>(r)->monoid_ptr();
-    return monoid->allocate(bytes);
-}
-
-template <typename Monoid>
-void reducer_base<Monoid>::deallocate_wrapper(void *r, void *view) {
-    Monoid *monoid = static_cast<reducer_base *>(r)->monoid_ptr();
-    monoid->deallocate(static_cast<view_type *>(view));
-}
-
-/** Base class defining the data members of a reducer.
- *
- *  @tparam Aligned The `m_view` data member, and therefore the entire
- *                  structure, are cache-line aligned if this parameter
- *                  is `true'.
- */
-template <typename Monoid, bool Aligned = Monoid::align_reducer>
-class reducer_content;
-
-/** Base class defining the data members of an aligned reducer.
- */
-template <typename Monoid>
-class reducer_content<Monoid, true> : public reducer_base<Monoid> {
-    typedef typename Monoid::view_type view_type;
-
-    // The leftmost view is defined as raw bytes. It will be constructed
-    // by the monoid `construct` function. It is cache-aligned, which
-    // will push it into a new cache line. Furthermore, its alignment causes
-    // the reducer as a whole to be cache-aligned, which makes the reducer
-    // size a multiple of a cache line. Since there is nothing in the reducer
-    // after the view, all this means that the leftmost view gets one or more
-    // cache lines all to itself, which prevents false sharing.
-    //
-    __attribute__((aligned((64)))) char m_leftmost[sizeof(view_type)];
-
-  protected:
-    reducer_content() : reducer_base<Monoid>((char *)&m_leftmost) {}
-};
-
-/** Base class defining the data members of an unaligned reducer.
- */
-template <typename Monoid>
-class reducer_content<Monoid, false> : public reducer_base<Monoid> {
-    typedef typename Monoid::view_type view_type; ///< The view type.
-
-    // Reserve space for the leftmost view. The view will be allocated at an
-    // aligned offset in this space at runtime, to guarantee that the view
-    // will get one or more cache lines all to itself, to prevent false
-    // sharing.
-    //
-    // The number of bytes to reserve is determined as follows:
-    // * Start with the view size.
-    // * Round up to a multiple of the cache line size, to get the total size
-    //   of the cache lines that will be dedicated to the view.
-    // * Add (cache line size - 1) filler bytes to guarantee that the reserved
-    //   area will contain a cache-aligned block of the required cache lines,
-    //   no matter where the reserved area starts.
-    //
-    char m_leftmost[((sizeof(view_type) + 63UL) & ~63UL) + 63U];
-    // View size rounded up to multiple cache lines
-
-  protected:
-    /** Constructor. Find the first cache-aligned position in the reserved
-     *  area, and pass it to the base constructor as the leftmost view
-     *  address.
-     */
-    reducer_content()
-        : reducer_base<Monoid>(
-              (char *)(((std::size_t)&m_leftmost + 63UL) & ~63UL)) {}
-};
-
-} // namespace internal
-
-// The __cilkrts_hyperobject_ functions are defined differently depending on
-// whether a file is compiled with or without the CILK_STUB option. Therefore,
-// reducers compiled in the two modes should be link-time incompatible, so that
-// object files compiled with stubbed reducers won't be linked into an
-// unstubbed program, or vice versa. We achieve this by putting the reducer
-// class definition into the cilk::stub namespace in a stubbed compilation.
-
-#ifdef CILK_STUB
-namespace stub {
-#endif
-
-/** Reducer class.
- *
- *  A reducer is instantiated on a Monoid.  The Monoid provides the value
- *  type, associative reduce function, and identity for the reducer.
- *
- *  @tparam Monoid  The monoid class that the reducer is instantiated on. It
- *                  must model the @ref reducers_monoid_concept "monoid
- *                  concept".
- *
- *  @see @ref pagereducers
- */
-template <class Monoid>
-class reducer : public internal::reducer_content<Monoid> {
-    typedef internal::reducer_content<Monoid> base;
-    using base::leftmost_ptr;
-    using base::monoid_ptr;
-
-  public:
-    typedef Monoid monoid_type;                     ///< The monoid type.
-    typedef typename Monoid::value_type value_type; ///< The value type.
-    typedef typename Monoid::view_type view_type;   ///< The view type.
-
-  private:
-    typedef internal::reducer_set_get<value_type, view_type> set_get;
-
-    reducer(const reducer &);            ///< Disallow copying.
-    reducer &operator=(const reducer &); ///< Disallow assignment.
-
-  public:
-    /** @name Constructors
-     *
-     *  All reducer constructors call the static `construct()` function of the
-     *  monoid class to construct the reducer's monoid and leftmost view.
-     *
-     *  The reducer constructor arguments are simply passed through to the
-     *  construct() function.  Thus, the constructor parameters accepted by a
-     *  particular reducer class are determined by its monoid class.
-     */
-    //@{
-
-    /** 0 – 6 const reference parameters.
-     */
-    //@{
-
-    reducer() { monoid_type::construct(monoid_ptr(), leftmost_ptr()); }
-
-    template <typename T1> reducer(const T1 &x1) {
-        monoid_type::construct(monoid_ptr(), leftmost_ptr(), x1);
-    }
-
-    template <typename T1, typename T2> reducer(const T1 &x1, const T2 &x2) {
-        monoid_type::construct(monoid_ptr(), leftmost_ptr(), x1, x2);
-    }
-
-    template <typename T1, typename T2, typename T3>
-    reducer(const T1 &x1, const T2 &x2, const T3 &x3) {
-        monoid_type::construct(monoid_ptr(), leftmost_ptr(), x1, x2, x3);
-    }
-
-    template <typename T1, typename T2, typename T3, typename T4>
-    reducer(const T1 &x1, const T2 &x2, const T3 &x3, const T4 &x4) {
-        monoid_type::construct(monoid_ptr(), leftmost_ptr(), x1, x2, x3, x4);
-    }
-
-    template <typename T1, typename T2, typename T3, typename T4, typename T5>
-    reducer(const T1 &x1, const T2 &x2, const T3 &x3, const T4 &x4,
-            const T5 &x5) {
-        monoid_type::construct(monoid_ptr(), leftmost_ptr(), x1, x2, x3, x4,
-                               x5);
-    }
-
-    template <typename T1, typename T2, typename T3, typename T4, typename T5,
-              typename T6>
-    reducer(const T1 &x1, const T2 &x2, const T3 &x3, const T4 &x4,
-            const T5 &x5, const T6 &x6) {
-        monoid_type::construct(monoid_ptr(), leftmost_ptr(), x1, x2, x3, x4, x5,
-                               x6);
-    }
-
-    //@}
-
-    /** 1 non-const reference parameter.
-     */
-    //@{
-
-    template <typename T1> reducer(T1 &x1) {
-        monoid_type::construct(monoid_ptr(), leftmost_ptr(), x1);
-    }
-
-    //@}
-
-    /** Destructor.
-     */
-    __CILKRTS_STRAND_STALE(~reducer()) {
-        leftmost_ptr()->~view_type();
-        monoid_ptr()->~monoid_type();
-    }
-
-    //@{
-    /** Get the monoid.
-     *
-     *  @return A reference to the monoid object belonging to this reducer.
-     */
-    Monoid &monoid() { return *monoid_ptr(); }
-
-    const Monoid &monoid() const {
-        return const_cast<reducer *>(this)->monoid();
-    }
-    //@}
-
-    //@{
-    /** Access the current view.
-     *
-     *  Return a reference to the instance of the reducer's view that was
-     *  created for the current strand of a parallel computation (and create
-     *  it if it doesn't already exist).
-     */
-    view_type &view() { return base::view(); }
-    const view_type &view() const { return base::view(); }
-    //@}
-
-    /** @name Dereference the reducer to get the view.
-     *
-     *  "Dereferencing" a reducer yields the view for the current strand. The
-     *  view, in turn, acts as a proxy for its contained value, exposing only
-     *  those operations which are consistent with the reducer's monoid. Thus,
-     *  all modifications of the reducer's accumulator variable are written as
-     *
-     *      *reducer OP ...
-     *
-     *  or
-     *
-     *      reducer->func(...)
-     *
-     *  (The permitted operations on a reducer's accumulator are listed in the
-     *  documentation for that particular kind of reducer.)
-     *
-     *  @note   `*r` is a synonym for `r.view()`. Recommended style is to use
-     *          `*r` (or `r->`) in the common case where code is simply
-     *          updating the accumulator variable wrapped in the view, and to
-     *          use `r.view()` in the unusual case where it is desirable to
-     *          call attention to the view itself.
-     */
-    //@{
-
-    //@{
-    /** Dereference operator.
-     *
-     *  @return A reference to the per-strand view instance.
-     */
-    view_type &operator*() { return view(); }
-    view_type const &operator*() const { return view(); }
-    //@}
-
-    //@{
-    /** Pointer operator.
-     *
-     *  @return A pointer to the per-strand view instance.
-     */
-    view_type *operator->() { return &view(); }
-    view_type const *operator->() const { return &view(); }
-    //@}
-
-    //@{
-    /** Deprecated view access.
-     *
-     *  `r()` is a synonym for `*r` which was used with early versions of
-     *  Intel Cilk Plus reducers. `*r` is now the preferred usage.
-     *
-     *  @deprecated Use operator*() instead of operator()().
-     *
-     *  @return A reference to the per-strand view instance.
-     */
-    view_type &operator()() { return view(); }
-    view_type const &operator()() const { return view(); }
-    //@}
-
-    //@}
-
-    /** @name Set and get the value.
-     *
-     *  These functions are used to set an initial value for the reducer before
-     *  starting the reduction, or to get the final value after the reduction
-     *  is complete.
-     *
-     *  @note   These functions are completely different from the view
-     *          operations that are made available via operator*() and
-     *          operator->(), which are used to _modify_ the reducer's value
-     *          _during_ the reduction.
-     *
-     *  @warning    These functions _can_ be called at any time, and in
-     *              general, they will refer to the value contained in the view
-     *              for the current strand. However, using them other than to
-     *              set the reduction's initial value or get its final value
-     *              will almost always result in undefined behavior.
-     */
-    //@{
-
-    /** Move a value into the reducer.
-     *
-     *  This function is used to set the initial value of the reducer's
-     *  accumulator variable by either copying or _moving_ the value of @a obj
-     *  into it. Moving a value can often be performed in constant time, even
-     *  for large container objects, but has the side effect of leaving the
-     *  value of @a obj undefined. (See the description of the
-     *  @ref move_in_wrapper class for a discussion of moving values.)
-     *
-     *  @par    Usage
-     *          A move_in() call to initialize a reducer is often paired with a
-     *          move_out() call to get its final value:
-     *
-     *              reducer<Type> xr;
-     *              xr.move_in(x);
-     *              … do the reduction …
-     *              xr.move_out(x);
-     *
-     *  @par Assumptions
-     *      -   You cannot assume either that this will function will copy its
-     *          value or that it will move it.
-     *      -   You must assume that the value of @a obj will be undefined
-     *          after the call to move_in().
-     *      -   You can assume that move_in() will be at least as efficient as
-     *          set_value(), and you should therefore prefer move_in() unless
-     *          you need the value of @a obj to be unchanged after the call.
-     *          (But you should usually prefer the move-in constructor over a
-     *          move_in() call - see the note below.)
-     *
-     *  @note   The behavior of a default constructor followed by move-in
-     *          initialization:
-     *
-     *              reducer<Type> xr;
-     *              xr.move_in(x);
-     *
-     *  @note   is not necessarily the same as a move-in constructor:
-     *
-     *      reducer<Type> xr(move_in(x));
-     *
-     *  @note   In particular, when @a Type is a container type with a
-     *          non-empty allocator, the move-in constructor will create the
-     *          accumulator variable with the same allocator as the input
-     *          argument @a x, while the default constructor will create the
-     *          accumulator variable with a default allocator. The mismatch of
-     *          allocators in the latter case means that the input argument
-     *          @a x may have to be copied in linear time instead of being
-     *          moved in constant time.
-     *
-     *  @note   Best practice is to prefer the move-in constructor over the
-     *          move-in function unless the move-in function is required for
-     *          some specific reason.
-     *
-     *  @warning    Calling this function other than to set the initial value
-     *              for a reduction will almost always result in undefined
-     *              behavior.
-     *
-     *  @param  obj The object containing the value that will be moved into the
-     *              reducer.
-     *
-     *  @post   The reducer contains the value that was initially in @a obj.
-     *  @post   The value of @a obj is undefined.
-     *
-     *  @see set_value()
-     */
-    void move_in(value_type &obj) { set_get::move_in(view(), obj); }
-
-    /** Move the value out of the reducer.
-     *
-     *  This function is used to retrieve the final value of the reducer's
-     *  accumulator variable by either copying or _moving_ the value of @a obj
-     *  into it. Moving a value can often be performed in constant time, even
-     *  for large container objects, but has the side effect of leaving the
-     *  value of the reducer's accumulator variable undefined. (See the
-     *  description of the @ref move_in_wrapper class for a discussion of
-     *  moving values.)
-     *
-     *  @par    Usage
-     *          A move_in() call to initialize a reducer is often paired with a
-     *          move_out() call to get its final value:
-     *
-     *              reducer<Type> xr;
-     *              xr.move_in(x);
-     *              … do the reduction …
-     *              xr.move_out(x);
-     *
-     *  @par Assumptions
-     *      -   You cannot assume either that this will function will copy its
-     *          value or that it will move it.
-     *      -   You must assume that the value of the reducer's accumulator
-     *          variable will be undefined after the call to move_out().
-     *      -   You can assume that move_out() will be at least as efficient as
-     *          get_value(), and you should therefore prefer move_out() unless
-     *          you need the accumulator variable to be preserved after the
-     *          call.
-     *
-     *  @warning    Calling this function other than to retrieve the final
-     *              value of a reduction will almost always result in undefined
-     *              behavior.
-     *
-     *  @param  obj The object that the value of the reducer will be moved into.
-     *
-     *  @post   @a obj contains the value that was initially in the reducer.
-     *  @post   The value of the reducer is undefined.
-     *
-     *  @see get_value()
-     */
-    void move_out(value_type &obj) { set_get::move_out(view(), obj); }
-
-    /** Set the value of the reducer.
-     *
-     *  This function sets the initial value of the reducer's accumulator
-     *  variable to the value of @a obj.
-     *
-     *  @note   The behavior of a default constructor followed by
-     *          initialization:
-     *
-     *      reducer<Type> xr;
-     *      xr.set_value(x);
-     *
-     *  @note   is not necessarily the same as a value constructor:
-     *
-     *      reducer<Type> xr(x);
-     *
-     *  @note   In particular, when @a Type is a container type with a
-     *          non-empty allocator, the value constructor will create the
-     *          accumulator variable with the same allocator as the input
-     *          argument @a x, while the default constructor will create the
-     *          accumulator variable with a default allocator.
-     *
-     *  @warning    Calling this function other than to set the initial value
-     *              for a reduction will almost always result in undefined
-     *              behavior.
-     *
-     *  @param  obj The object containing the value that will be copied into
-     *              the reducer.
-     *
-     *  @post   The reducer contains a copy of the value in @a obj.
-     *
-     *  @see move_in()
-     */
-    void set_value(const value_type &obj) { set_get::set_value(view(), obj); }
-
-    /** Get the value of the reducer.
-     *
-     *  This function gets the final value of the reducer's accumulator
-     *  variable.
-     *
-     *  @warning    Calling this function other than to retrieve the final
-     *              value of a reduction will almost always result in undefined
-     *              behavior.
-     *
-     *  @return     A reference to the value contained in the reducer.
-     *
-     *  @see move_out()
-     */
-    typename set_get::return_type_for_get_value get_value() const {
-        return set_get::get_value(view());
-    }
-
-    //@}
-
-    /** Implicit downcast to legacy reducer wrapper, if any.
-     *
-     *  @see legacy_reducer_downcast
-     */
-    operator typename legacy_reducer_downcast<reducer>::type &() {
-        typedef typename legacy_reducer_downcast<reducer>::type downcast_type;
-        return *reinterpret_cast<downcast_type *>(this);
-    }
-
-    /** Implicit downcast to legacy reducer wrapper, if any.
-     *
-     *  @see legacy_reducer_downcast
-     */
-    operator const typename legacy_reducer_downcast<reducer>::type &() const {
-        typedef typename legacy_reducer_downcast<reducer>::type downcast_type;
-        return *reinterpret_cast<const downcast_type *>(this);
-    }
-};
-
-#ifdef CILK_STUB
-} // namespace stub
-using stub::reducer;
-#endif
-
-} // end namespace cilk
-
-#endif /* __cplusplus */
-
-/** @page page_reducers_in_c Creating and Using Reducers in C
- *
- *  @tableofcontents
- *
- *  The Intel Cilk Plus runtime supports reducers written in C as well as in
- * C++. The basic logic is the same, but the implementation details are very
- *  different. The C++ reducer implementation uses templates heavily to create
- *  very generic components. The C reducer implementation uses macros, which
- *  are a much blunter instrument. The most immediate consequence is that the
- *  monoid/view/reducer architecture is mostly implicit rather than explicit
- *  in C reducers.
- *
- *  @section reducers_c_overview Overview of Using Reducers in C
- *
- *  The basic usage pattern for C reducers is:
- *
- *  1.  Create and initialize a reducer object.
- *  2.  Tell the Intel Cilk Plus runtime about the reducer.
- *  3.  Update the value contained in the reducer in a parallel computation.
- *  4.  Tell the Intel Cilk Plus runtime that you are done with the reducer.
- *  5.  Retrieve the value from the reducer.
- *
- *  @subsection reducers_c_creation Creating and Initializing a C Reducer
- *
- *  The basic pattern for creating and initializing a reducer object in C is
- *
- *      CILK_C_DECLARE_REDUCER(value-type) reducer-name =
- *          CILK_C_INIT_REDUCER(value-type,
- *                              reduce-function,
- *                              identity-function,
- *                              destroy-function,
- *                              initial-value);
- *
- *  This is simply an initialized definition of a variable named
- *  _reducer-name_. The @ref CILK_C_DECLARE_REDUCER macro expands to an
- *  anonymous `struct` declaration for a reducer object containing a view of
- *  type _value-type_, and the @ref CILK_C_INIT_REDUCER macro expands to a
- *  struct initializer.
- *
- *  @subsection reducers_c_reduce_func Reduce Functions
- *
- *  The reduce function for a reducer is called when a parallel execution
- *  strand terminates, to combine the values computed by the terminating
- *  strand and the strand to its left. It takes three arguments:
- *
- *  -   `void* reducer` - the address of the reducer.
- *  -   `void* left` - the address of the value for the left strand.
- *  -   `void* right` - the address of the value for the right (terminating)
- *                      strand.
- *
- *  It must apply the reducer's reduction operation to the `left` and `right`
- *  values, leaving the result in the `left` value. The `right` value is
- *  undefined after the reduce function call.
- *
- *  @subsection reducers_c_identity_func Identity Functions
- *
- *  The identity function for a reducer is called when a parallel execution
- *  strand begins, to initialize its value to the reducer's identity value. It
- *  takes two arguments:
- *
- *  -   `void* reducer` - the address of the reducer.
- *  -   `void* v` - the address of a freshly allocated block of memory of size
- *      `sizeof(value-type)`.
- *
- *  It must initialize the memory pointed to by `v` so that it contains the
- *  reducer's identity value.
- *
- *  @subsection reducers_c_destroy_func Destroy Functions
- *
- *  The destroy function for a reducer is called when a parallel execution
- *  strand terminates, to do any necessary cleanup before its value is
- *  deallocated. It takes two arguments:
- *
- *  -   `void* reducer` - the address of the reducer.
- *  -   `void* p` - the address of the value for the terminating strand.
- *
- *  It must release any resources belonging to the value pointed to by `p`, to
- *  avoid a resource leak when the memory containing the value is deallocated.
- *
- *  A null pointer can be used for the destructor function if the reducer's
- *  values do not need any cleanup.
- *
- *  @subsection reducers_c_register Tell the Intel Cilk Plus Runtime About the
- *  Reducer
- *
- *  Call the @ref CILK_C_REGISTER_REDUCER macro to register the reducer with
- *  the Intel Cilk Plus runtime:
- *
- *      CILK_C_REGISTER_REDUCER(reducer-name);
- *
- *  The runtime will manage reducer values for all registered reducers when
- *  parallel execution strands begin and end.
- *
- *  @subsection reducers_c_update Update the Value Contained in the Reducer
- *
- *  The @ref REDUCER_VIEW macro returns a reference to the reducer's value for
- *  the current parallel strand:
- *
- *      REDUCER_VIEW(reducer-name) = REDUCER_VIEW(reducer-name) OP x;
- *
- *  C++ reducer views restrict access to the wrapped value so that it can only
- *  be modified in ways consistent with the reducer's operation. No such
- *  protection is provided for C reducers.  It is entirely the responsibility
- *  of the user to avoid modifying the value in any inappropriate way.
- *
- *  @subsection c_reducers_unregister Tell the Intel Cilk Plus Runtime That You
- * Are Done with the Reducer
- *
- *  When the parallel computation is complete, call the @ref
- *  CILK_C_UNREGISTER_REDUCER macro to unregister the reducer with the
- *  Intel Cilk Plus runtime:
- *
- *      CILK_C_UNREGISTER_REDUCER(reducer-name);
- *
- *  The runtime will stop managing reducer values for the reducer.
- *
- *  @subsection c_reducers_retrieve Retrieve the Value from the Reducer
- *
- *  When the parallel computation is complete, use the @ref REDUCER_VIEW macro
- *  to retrieve the final value computed by the reducer.
- *
- *  @subsection reducers_c_example_custom Example - Creating and Using a
- *              Custom C Reducer
- *
- *  The `IntList` type represents a simple list of integers.
- *
- *      struct _intListNode {
- *          int value;
- *          _intListNode* next;
- *      } IntListNode;
- *      typedef struct { IntListNode* head; IntListNode* tail; } IntList;
- *
- *      // Initialize a list to be empty
- *      void IntList_init(IntList* list) { list->head = list->tail = 0; }
- *
- *      // Append an integer to the list
- *      void IntList_append(IntList* list, int x)
- *      {
- *          IntListNode* node = (IntListNode*) malloc(sizeof(IntListNode));
- *          if (list->tail) list->tail->next = node; else list->head = node;
- *          list->tail = node;
- *      }
- *
- *      // Append the right list to the left list, and leave the right list
- *      // empty
- *      void IntList_concat(IntList* left, IntList* right)
- *      {
- *          if (left->head) {
- *              left->tail->next = right->head;
- *              if (right->tail) left->tail = right->tail;
- *          }
- *          else {
- *              *left = *right;
- *          }
- *          IntList_init(*right);
- *      }
- *
- *  This code creates a reducer that supports creating an `IntList` by
- *  appending values to it.
- *
- *      void identity_IntList(void* reducer, void* list)
- *      {
- *          IntList_init((IntList*)list);
- *      }
- *
- *      void reduce_IntList(void* reducer, void* left, void* right)
- *      {
- *          IntList_concat((IntList*)left, (IntList*)right);
- *      }
- *
- *      CILK_C_DECLARE_REDUCER(IntList) my_list_int_reducer =
- *          CILK_C_INIT_REDUCER(IntList,
- *                              reduce_int_list,
- *                              identity_int_list,
- *                              0);
- *                              // Initial value omitted //
- *      ListInt_init(&REDUCER_VIEW(my_int_list_reducer));
- *
- *      CILK_C_REGISTER_REDUCER(my_int_list_reducer);
- *      cilk_for (int i = 0; i != n; ++i) {
- *          IntList_append(&REDUCER_VIEW(my_int_list_reducer), a[i]);
- *      }
- *      CILK_C_UNREGISTER_REDUCER(my_int_list_reducer);
- *
- *      IntList result = REDUCER_VIEW(my_int_list_reducer);
- *
- *  @section reducers_c_predefined Predefined C Reducers
- *
- *  Some of the predefined reducer classes in the Intel Cilk Plus library come
- * with a set of predefined macros to provide the same capabilities in C. In
- * general, two macros are provided for each predefined reducer family:
- *
- *  -   `CILK_C_REDUCER_operation(reducer-name, type-name, initial-value)` -
- *      Declares a reducer object named _reducer-name_ with initial value
- *      _initial-value_ to perform a reduction using the _operation_ on values
- *      of the type specified by _type-name_.  This is the equivalent of the
- *      general code described in @ref reducers_c_creation :
- *
- *          CILK_C_DECLARE_REDUCER(type) reducer-name =
- *              CILK_C_INIT_REDUCER(type, ..., initial-value);
- *
- *      where _type_ is the C type corresponding to _type_name_. See @ref
- *      reducers_c_type_names below for the _type-names_ that you can use.
- *
- *  -   `CILK_C_REDUCER_operation_TYPE(type-name)` - Expands to the `typedef`
- *      name for the type of the reducer object declared by
- *      `CILK_C_REDUCER_operation(reducer-name, type-name, initial-value)`.
- *
- *  See @ref reducers_c_example_predefined.
- *
- *  The predefined C reducers are:
- *
- *  |   Operation       |   Name        |   Documentation               |
- *  |-------------------|---------------|-------------------------------|
- *  |   addition        |   `OPADD`     |   @ref ReducersAdd            |
- *  |   bitwise AND     |   `OPAND`     |   @ref ReducersAnd            |
- *  |   bitwise OR      |   `OPOR`      |   @ref ReducersOr             |
- *  |   bitwise XOR     |   `OPXOR`     |   @ref ReducersXor            |
- *  |   multiplication  |   `OPMUL`     |   @ref ReducersMul            |
- *  |   minimum         |   `MIN`       |   @ref ReducersMinMax         |
- *  |   minimum & index |   `MIN_INDEX` |   @ref ReducersMinMax         |
- *  |   maximum         |   `MAX`       |   @ref ReducersMinMax         |
- *  |   maximum & index |   `MAX_INDEX` |   @ref ReducersMinMax         |
- *
- *  @subsection reducers_c_type_names Numeric Type Names
- *
- *  The type and function names created by the C reducer definition macros
- *  incorporate both the reducer kind (`opadd`, `opxor`, etc.) and the value
- *  type of the reducer (`int`, `double`, etc.). The value type is represented
- *  by a _numeric type name_ string. The types supported in C reducers, and
- *  their corresponding numeric type names, are given in the following table:
- *
- *  |   Type                |   Numeric Type Name           |
- *  |-----------------------|-------------------------------|
- *  |  `char`               |  `char`                       |
- *  |  `unsigned char`      |  `uchar`                      |
- *  |  `signed char`        |  `schar`                      |
- *  |  `wchar_t`            |  `wchar_t`                    |
- *  |  `short`              |  `short`                      |
- *  |  `unsigned short`     |  `ushort`                     |
- *  |  `int`                |  `int`                        |
- *  |  `unsigned int`       |  `uint`                       |
- *  |  `unsigned int`       |  `unsigned` (alternate name)  |
- *  |  `long`               |  `long`                       |
- *  |  `unsigned long`      |  `ulong`                      |
- *  |  `long long`          |  `longlong`                   |
- *  |  `unsigned long long` |  `ulonglong`                  |
- *  |  `float`              |  `float`                      |
- *  |  `double`             |  `double`                     |
- *  |  `long double`        |  `longdouble`                 |
- *
- *  @subsection reducers_c_example_predefined Example - Using a Predefined C
- *              Reducer
- *
- *  To compute the sum of all the values in an array of `unsigned int`:
- *
- *      CILK_C_REDUCER_OPADD(sum, uint, 0);
- *      CILK_C_REGISTER_REDUCER(sum);
- *      cilk_for(int i = 0; i != n; ++i) {
- *          REDUCER_VIEW(sum) += a[i];
- *      }
- *      CILK_C_UNREGISTER_REDUCER(sum);
- *      printf("The sum is %u\n", REDUCER_VIEW(sum));
- */
-
-/** @name C language reducer macros
- *
- *  These macros are used to declare and work with reducers in C code.
- *
- *  @see @ref page_reducers_in_c
- */
-//@{
-
-/// @cond internal
-
-/** @name Compound identifier macros.
- *
- *  These macros are used to construct an identifier by concatenating two or
- *  three identifiers.
- */
-//@{
-
-/** Expand to an identifier formed by concatenating two identifiers.
- */
-#define __CILKRTS_MKIDENT(a, b) __CILKRTS_MKIDENT_IMP(a, b, )
-
-/** Expand to an identifier formed by concatenating three identifiers.
- */
-#define __CILKRTS_MKIDENT3(a, b, c) __CILKRTS_MKIDENT_IMP(a, b, c)
-
-/** Helper macro to do the concatenation.
- */
-#define __CILKRTS_MKIDENT_IMP(a, b, c) a##b##c
-
-//@}
-
-/** Compiler-specific keyword for the "type of" operator.
- */
-#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
-#define _Typeof __typeof__
-#endif
-
-/** @name Predefined reducer function declaration macros.
- *
- *  These macros are used to create the function headers for the identity,
- *  reduction, and destructor functions for a builtin reducer family. The
- *  macro can be followed by a semicolon to create a declaration, or by a
- *  brace-enclosed body to create a definition.
- */
-//@{
-
-/** Create an identity function header.
- *
- *  @note The name of the function's value pointer parameter will always be `v`.
- *
- *  @param name The reducer family name.
- *  @param tn   The type name.
- */
-#define __CILKRTS_DECLARE_REDUCER_IDENTITY(name, tn)                           \
-    void __CILKRTS_MKIDENT3(name, _identity_, tn)(void *key, void *v)
-
-/** Create a reduction function header.
- *
- *  @param name The reducer family name.
- *  @param tn   The type name.
- *  @param l    The name to use for the function's left value pointer parameter.
- *  @param r    The name to use for the function's right value pointer
- *              parameter.
- */
-#define __CILKRTS_DECLARE_REDUCER_REDUCE(name, tn, l, r)                       \
-    void __CILKRTS_MKIDENT3(name, _reduce_, tn)(void *key, void *l, void *r)
-
-/** Create a destructor function header.
- *
- *  @param name The reducer family name.
- *  @param tn   The type name.
- *  @param p    The name to use for the function's value pointer parameter.
- */
-#define __CILKRTS_DECLARE_REDUCER_DESTROY(name, tn, p)                         \
-    void __CILKRTS_MKIDENT3(name, _destroy_, tn)(void *key, void *p)
-
-//@}
-
-/// @endcond
-
-/***************************************************************************
- *              Real implementation
- ***************************************************************************/
-
-/** Declaration of a C reducer structure type.
- *
- *  This macro expands into an anonymous structure declaration for a C reducer
- *  structure which contains a @a Type value. For example:
- *
- *      CILK_C_DECLARE_REDUCER(int) my_add_int_reducer =
- *          CILK_C_INIT_REDUCER(int, …);
- *
- *  @param Type The type of the value contained in the reducer object.
- *
- *  @see @ref reducers_c_creation
- */
-#define CILK_C_DECLARE_REDUCER(Type)                                           \
-    struct {                                                                   \
-        __cilkrts_hyperobject_base __cilkrts_hyperbase;                        \
-        Type __attribute__((aligned(64))) value;                               \
-    }
-
-/** Initializer for a C reducer structure.
- *
- *  This macro expands into a brace-enclosed structure initializer for a C
- *  reducer structure that was declared with
- *  `CILK_C_DECLARE_REDUCER(Type)`. For example:
- *
- *      CILK_C_DECLARE_REDUCER(int) my_add_int_reducer =
- *          CILK_C_INIT_REDUCER(int,
- *                              add_int_reduce,
- *                              add_int_identity,
- *                              0,
- *                              0);
- *
- *  @param Type     The type of the value contained in the reducer object. Must
- *                  be the same as the @a Type argument of the
- *                  CILK_C_DECLARE_REDUCER macro call that created the
- *                  reducer.
- *  @param Reduce   The address of the @ref reducers_c_reduce_func
- *                  "reduce function" for the reducer.
- *  @param Identity The address of the @ref reducers_c_identity_func
- *                  "identity function" for the reducer.
- *  @param Destroy  The address of the @ref reducers_c_destroy_func
- *                  "destroy function" for the reducer.
- *  @param ...      The initial value for the reducer. (A single expression if
- *                  @a Type is a scalar type; a list of values if @a Type is a
- *                  struct or array type.)
- *
- *  @see @ref reducers_c_creation
- */
-
-#define CILK_C_INIT_REDUCER(Type, Reduce, Identity, Destroy, ...)              \
-    {                                                                          \
-        {{Reduce, Identity, Destroy, __cilkrts_hyper_alloc,                    \
-          __cilkrts_hyper_dealloc},                                            \
-         0,                                                                    \
-         64, /* TODO: Assert that this really is 64. */                        \
-         sizeof(Type)},                                                        \
-            __VA_ARGS__                                                        \
-    }
-
-/** Register a reducer with the Intel Cilk Plus runtime.
- *
- *  The runtime will manage reducer values for all registered reducers when
- *  parallel execution strands begin and end. For example:
- *
- *      CILK_C_REGISTER_REDUCER(my_add_int_reducer);
- *      cilk_for (int i = 0; i != n; ++i) {
- *          …
- *      }
- *
- *  @param Expr The reducer to be registered.
- *
- *  @see @ref page_reducers_in_c
- */
-#define CILK_C_REGISTER_REDUCER(Expr)                                          \
-    __cilkrts_hyper_create(&(Expr).__cilkrts_hyperbase)
-
-/** Unregister a reducer with the Intel Cilk Plus runtime.
- *
- *  The runtime will stop managing reducer values for a reducer after it is
- *  unregistered. For example:
- *
- *      cilk_for (int i = 0; i != n; ++i) {
- *          …
- *      }
- *      CILK_C_UNREGISTER_REDUCER(my_add_int_reducer);
- *
- *  @param Expr The reducer to be unregistered.
- *
- *  @see @ref page_reducers_in_c
- */
-#define CILK_C_UNREGISTER_REDUCER(Expr)                                        \
-    __cilkrts_hyper_destroy(&(Expr).__cilkrts_hyperbase)
-
-/** Get the current view for a reducer.
- *
- *  The `REDUCER_VIEW(reducer-name)` returns a reference to the reducer's
- *  value for the current parallel strand. This can be used to initialize the
- *  value of the reducer before it is used, to modify the value of the reducer
- *  on the current parallel strand, or to retrieve the final value of the
- *  reducer at the end of the parallel computation.
- *
- *      REDUCER_VIEW(my_add_int_reducer) = REDUCER_VIEW(my_add_int_reducer) + x;
- *
- *  @note C++ reducer views restrict access to the wrapped value so that it
- *  can only be modified in ways consistent with the reducer's operation. No
- *  such protection is provided for C reducers. It is entirely the
- *  responsibility of the user to refrain from modifying the value in any
- *  inappropriate way.
- *
- *  @param Expr The reducer whose value is to be returned.
- *
- *  @see @ref page_reducers_in_c
- */
-#define REDUCER_VIEW(Expr)                                                     \
-    (*(_Typeof((Expr).value) *)__cilkrts_hyper_lookup(                         \
-        &(Expr).__cilkrts_hyperbase))
-
-//@} C language reducer macros
-
-#undef __CILKRTS_STRAND_STALE
-
-#endif // CILK_REDUCER_H_INCLUDED
diff --git a/include/cilk/reducer_file.h b/include/cilk/reducer_file.h
deleted file mode 100644
index 73aacda8..00000000
--- a/include/cilk/reducer_file.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- *  Copyright (C) 2009-2018, Intel Corporation
- *  All rights reserved.
- *  
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *  
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *  
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- *  OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- *  AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
- *  WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *  POSSIBILITY OF SUCH DAMAGE.
- *  
- *  *********************************************************************
- *  
- *  PLEASE NOTE: This file is a downstream copy of a file maintained in
- *  a repository at cilkplus.org. Changes made to this file that are not
- *  submitted through the contribution process detailed at
- *  http://www.cilkplus.org/submit-cilk-contribution will be lost the next
- *  time that a new version is released. Changes only submitted to the
- *  GNU compiler collection or posted to the git repository at
- *  https://bitbucket.org/intelcilkruntime/intel-cilk-runtime are
- *  not tracked.
- *  
- *  We welcome your contributions to this open source project. Thank you
- *  for your assistance in helping us improve Cilk Plus.
- *
- */
-
-
diff --git a/include/cilk/reducer_list.h b/include/cilk/reducer_list.h
deleted file mode 100644
index 73ff2247..00000000
--- a/include/cilk/reducer_list.h
+++ /dev/null
@@ -1,1146 +0,0 @@
-/*  reducer_list.h                  -*- C++ -*-
- *
- *  Copyright (C) 2009-2018, Intel Corporation
- *  All rights reserved.
- *  
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *  
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *  
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- *  OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- *  AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
- *  WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *  POSSIBILITY OF SUCH DAMAGE.
- *  
- *  *********************************************************************
- *  
- *  PLEASE NOTE: This file is a downstream copy of a file maintained in
- *  a repository at cilkplus.org. Changes made to this file that are not
- *  submitted through the contribution process detailed at
- *  http://www.cilkplus.org/submit-cilk-contribution will be lost the next
- *  time that a new version is released. Changes only submitted to the
- *  GNU compiler collection or posted to the git repository at
- *  https://bitbucket.org/intelcilkruntime/intel-cilk-runtime are
- *  not tracked.
- *  
- *  We welcome your contributions to this open source project. Thank you
- *  for your assistance in helping us improve Cilk Plus.
- */
-
-/** @file reducer_list.h
- *
- *  @brief Defines classes for parallel list creation by appending or
- *  prepending reducers.
- *
- *  @ingroup ReducersList
- *
- *  @see ReducersList
- */
-
-#ifndef REDUCER_LIST_H_INCLUDED
-#define REDUCER_LIST_H_INCLUDED
-
-#include <cilk/reducer.h>
-#include <list>
-
-/** @defgroup ReducersList List Reducers
- *
- *  List-append and list-prepend reducers create standard lists by
- *  concatenating a set of lists or values in parallel.
- *
- *  @ingroup Reducers
- *
- *  You should be familiar with @ref pagereducers "Intel(R) Cilk(TM) Plus reducers"
- *  (from file `reducers.md`) and particularly with @ref reducers_using, before
- *  trying to use the information in this file.
- *
- *  @section redlist_usage Usage Example
- *
- *      // Create a list containing the labels of the nodes of a tree in
- *      // "inorder" (left subtree, root, right subtree).
- *
- *      struct Tree { Tree* left; Tree* right; string label; ... };
- *
- *      list<string> x;
- *      cilk::reducer< cilk::op_list_append<string> > xr(cilk::move_in(x));
- *      collect_labels(tree, xr);
- *      xr.move_out(x);
- *
- *      void collect_labels(Tree* node,
- *                          cilk::reducer< cilk::op_list_append<string> >& xr)
- *      {
- *          if (node) {
- *              cilk_spawn collect_labels(node->left, xr);
- *              xr->push_back(node->label);
- *              collect_labels(node->right, xr);
- *              cilk_sync;
- *          }
- *      }
- *
- *  @section redlist_monoid The Monoid
- *
- *  @subsection redlist_monoid_values Value Set
- *
- *  The __value set__ of a list reducer is the set of values of the class
- *  `std::list<Type, Allocator>`, which we refer to as the reducer's _list
- *  type_.
- *
- *  @subsection redlist_monoid_operator Operator
- *
- *  The operator of a list-append reducer is defined as
- *
- *      x CAT y == (every element of x, followed by every element of y)
- *
- *  The operator of a list-prepend reducer is defined as
- *
- *      x RCAT y == (every element of y, followed by every element of x)
- *
- *  @subsection redlist_monoid_identity Identity
- *
- *  The identity value of a list reducer is the empty list, which is the value
- *  of the expression `std::list<Type, Allocator>([allocator])`.
- *
- *  @section redlist_operations Operations
- *
- *  In the operation descriptions below, the type name `List` refers to the
- *  reducer's string type, `std::list<Type, Allocator>`.
- *
- *  @subsection redlist_constructors Constructors
- *
- *  Any argument list which is valid for a `std::list` constructor is valid for
- *  a list reducer constructor. The usual move-in constructor is also provided:
- *
- *      reducer(move_in(List& variable))
- *
- *  A list reducer with no constructor arguments, or with only an allocator
- *  argument, will initially contain the identity value, an empty list.
- *
- *  @subsection redlist_get_set Set and Get
- *
- *      r.set_value(const List& value)
- *      const List& = r.get_value() const
- *      r.move_in(List& variable)
- *      r.move_out(List& variable)
- *
- *  @subsection redlist_view_ops View Operations
- *
- *  The view of a list-append reducer provides the following member functions:
- *
- *      void push_back(const Type& element)
- *      void insert_back(List::size_type n, const Type& element)
- *      template <typename Iter> void insert_back(Iter first, Iter last)
- *      void splice_back(List& x)
- *      void splice_back(List& x, List::iterator i)
- *      void splice_back(List& x, List::iterator first, List::iterator last)
- *
- *  The view of a list-prepend reducer provides the following member functions:
- *
- *      void push_front(const Type& element)
- *      void insert_front(List::size_type n, const Type& element)
- *      template <typename Iter> void insert_front(Iter first, Iter last)
- *      void splice_front(List& x)
- *      void splice_front(List& x, List::iterator i)
- *      void splice_front(List& x, List::iterator first, List::iterator last)
- *
- *  The `push_back` and `push_front` functions are the same as the
- *  corresponding `std::list` functions. The `insert_back`, `splice_back`,
- *  `insert_front`, and `splice_front` functions are the same as the
- *  `std::list` `insert` and `splice` functions, with the first parameter
- *  fixed to the end or beginning of the list, respectively.
- *
- *  @section redlist_performance Performance Considerations
- *
- *  An efficient reducer requires that combining the values of two views (using
- *  the view `reduce()` function) be a constant-time operations. Two lists can
- *  be merged in constant time using the `splice()` function if they have the
- *  same allocator. Therefore, the lists for new views are created (by the view
- *  identity constructor) using the same allocator as the list that was created
- *  when the reducer was constructed.
- *
- *  The performance of adding elements to a list reducer depends on the view
- *  operations that are used:
- *
- *  *   The `push` functions add a single element to the list, and therefore
- *      take constant time.
- *  *   An `insert` function that inserts _N_ elements adds each of them
- *      individually, and therefore takes _O(N)_ time.
- *  *   A `splice` function that inserts _N_ elements just adjusts a couple of
- *      pointers, and therefore takes constant time, _if the splice is from a
- *      list with the same allocator as the reducer_. Otherwise, it is
- *      equivalent to an `insert`, and takes _O(N)_ time.
- *
- *  This means that for best performance, if you will be adding elements to a
- *  list reducer in batches, you should `splice` them from a list having the
- *  same allocator as the reducer.
- *
- *  The reducer `move_in` and `move_out` functions do a constant-time `swap` if
- *  the variable has the same allocator as the reducer, and a linear-time copy
- *  otherwise.
- *
- *  Note that the allocator of a list reducer is determined when the reducer is
- *  constructed. The following two examples may have very different behavior:
- *
- *      list<Element, Allocator> a_list;
- *
- *      reducer< list_append<Element, Allocator> reducer1(move_in(a_list));
- *      ... parallel computation ...
- *      reducer1.move_out(a_list);
- *
- *      reducer< list_append<Element, Allocator> reducer2;
- *      reducer2.move_in(a_list);
- *      ... parallel computation ...
- *      reducer2.move_out(a_list);
- *
- *  *   `reducer1` will be constructed with the same allocator as `a_list`,
- *      because the list was specified in the constructor. The `move_in`
- *      and `move_out` can therefore be done with a `swap` in constant time.
- *  *   `reducer2` will be constructed with a _default_ allocator,
- *      "`Allocator()`", which may or may not be the same as the allocator of
- *      `a_list`. Therefore, the `move_in` and `move_out` may have to be done
- *      with a copy in _O(N)_ time.
- *
- *  (All instances of an allocator type with no internal state (like
- *  `std::allocator`) are "the same". You only need to worry about the "same
- *  allocator" issue when you create list reducers with custom allocator types.)
- *
- *  @section redlist_types Type and Operator Requirements
- *
- *  `std::list<Type, Allocator>` must be a valid type.
- */
-
-
-namespace cilk {
-
-namespace internal {
-
-/** @ingroup ReducersList */
-//@{
-
-/** Base class for list-append and prepend view classes.
- *
- *  @note   This class provides the definitions that are required for a class
- *          that will be used as the parameter of a @ref list_monoid_base
- *          specialization.
- *
- *  @tparam Type        The list element type (not the list type).
- *  @tparam Allocator   The list's allocator class.
- *
- *  @see ReducersList
- *  @see list_monoid_base
- */
-template <typename Type, typename Allocator>
-class list_view_base
-{
-protected:
-    /// The type of the contained list.
-    typedef std::list<Type, Allocator>  list_type;
-
-    /// The list accumulator variable.
-    list_type m_value;
-
-public:
-
-    /** @name Monoid support.
-     */
-    //@{
-
-    /// Required by @ref monoid_with_view
-    typedef list_type   value_type;
-
-    /// Required by @ref list_monoid_base
-    Allocator get_allocator() const
-    {
-        return m_value.get_allocator();
-    }
-
-    //@}
-
-
-    /** @name Constructors.
-     */
-    //@{
-
-    /// Standard list constructor.
-    explicit list_view_base(const Allocator& a = Allocator()) : m_value(a) {}
-    explicit list_view_base(
-        typename list_type::size_type n,
-        const Type& value = Type(),
-        const Allocator& a = Allocator() ) : m_value(n, value, a) {}
-    template <typename Iter>
-    list_view_base(Iter first, Iter last, const Allocator& a = Allocator()) :
-        m_value(first, last, a) {}
-    list_view_base(const list_type& list) : m_value(list) {}
-
-    /// Move-in constructor.
-    explicit list_view_base(move_in_wrapper<value_type> w)
-        : m_value(w.value().get_allocator())
-    {
-        m_value.swap(w.value());
-    }
-
-    //@}
-
-    /** @name Reducer support.
-     */
-    //@{
-
-    /// Required by reducer::move_in()
-    void view_move_in(value_type& v)
-    {
-        if (m_value.get_allocator() == v.get_allocator())
-            // Equal allocators. Do a (fast) swap.
-            m_value.swap(v);
-        else
-            // Unequal allocators. Do a (slow) copy.
-            m_value = v;
-        v.clear();
-    }
-
-    /// Required by reducer::move_out()
-    void view_move_out(value_type& v)
-    {
-        if (m_value.get_allocator() == v.get_allocator())
-            // Equal allocators.  Do a (fast) swap.
-            m_value.swap(v);
-        else
-            // Unequal allocators.  Do a (slow) copy.
-            v = m_value;
-        m_value.clear();
-    }
-
-    /// Required by reducer::set_value()
-    void view_set_value(const value_type& v) { m_value = v; }
-
-    /// Required by reducer::get_value()
-    value_type const& view_get_value()     const { return m_value; }
-
-    /// Type returned by view_get_value.
-    typedef value_type const& return_type_for_get_value;
-
-    // Required by legacy wrapper get_reference()
-    value_type      & view_get_reference()       { return m_value; }
-    value_type const& view_get_reference() const { return m_value; }
-
-    //@}
-};
-
-
-/** Base class for list-append and prepend monoid classes.
- *
- *  The key to efficient reducers is that the `identity` operation, which
- * creates a new per-strand view, and the `reduce` operation, which combines
- *  two per-strand views, must be constant-time operations. Two lists can be
- *  concatenated in constant time only if they have the same allocator.
- *  Therefore, all the per-strand list accumulator variables must be created
- *   with the same allocator as the leftmost view list.
- *
- *  This means that a list reduction monoid must have a copy of the allocator
- *  of the leftmost view's list, so that it can use it in the `identity`
- *  operation. This, in turn, requires that list reduction monoids have a
- *  specialized `construct()` function, which constructs the leftmost view
- *  before the monoid, and then passes the leftmost view's allocator to the
- *  monoid constructor.
- *
- *  @tparam View    The list-append or prepend view class.
- *  @tparam Align   If `false` (the default), reducers instantiated on this
- *                  monoid will be naturally aligned (the Intel Cilk Plus library 1.0
- *                  behavior). If `true`, reducers instantiated on this monoid
- *                  will be cache-aligned for binary compatibility with
- *                  reducers in Intel Cilk Plus library version 0.9.
- *
- *  @see ReducersList
- *  @see list_view_base
- */
-template <typename View, bool Align>
-class list_monoid_base : public monoid_with_view<View, Align>
-{
-    typedef typename View::value_type           list_type;
-    typedef typename list_type::allocator_type  allocator_type;
-    typedef provisional_guard<View>             view_guard;
-
-    allocator_type                              m_allocator;
-
-public:
-
-    /** Constructor.
-     *
-     *  There is no default constructor for list monoids, because the allocator
-     *  must always be specified.
-     *
-     *  @param  allocator   The list allocator to be used when
-     *                      identity-constructing new views.
-     */
-    list_monoid_base(const allocator_type& allocator = allocator_type()) :
-        m_allocator(allocator) {}
-
-    /** Creates an identity view.
-     *
-     *  List view identity constructors take the list allocator as an argument.
-     *
-     *  @param v    The address of the uninitialized memory in which the view
-     *              will be constructed.
-     */
-    void identity(View *v) const { ::new((void*) v) View(m_allocator); }
-
-    /** @name construct functions
-     *
-     *  All `construct()` functions first construct the leftmost view, using
-     *  the optional @a x1, @a x2, and @a x3 arguments that were passed in from
-     *  the reducer constructor. They then call the view's `get_allocator()`
-     *  function to get the list allocator from its contained list, and pass it
-     *  to the monoid constructor.
-     */
-    //@{
-
-    template <typename Monoid>
-    static void construct(Monoid* monoid, View* view)
-    {
-        view_guard vg( new((void*) view) View() );
-        vg.confirm_if( new((void*) monoid) Monoid(view->get_allocator()) ); 
-    }
-
-    template <typename Monoid, typename T1>
-    static void construct(Monoid* monoid, View* view, const T1& x1)
-    {
-        view_guard vg( new((void*) view) View(x1) );
-        vg.confirm_if( new((void*) monoid) Monoid(view->get_allocator()) ); 
-    }
-
-    template <typename Monoid, typename T1, typename T2>
-    static void construct(Monoid* monoid, View* view,
-                          const T1& x1, const T2& x2)
-    {
-        view_guard vg( new((void*) view) View(x1, x2) );
-        vg.confirm_if( new((void*) monoid) Monoid(view->get_allocator()) ); 
-    }
-
-    template <typename Monoid, typename T1, typename T2, typename T3>
-    static void construct(Monoid* monoid, View* view,
-                          const T1& x1, const T2& x2, const T3& x3)
-    {
-        view_guard vg( new((void*) view) View(x1, x2, x3) );
-        vg.confirm_if( new((void*) monoid) Monoid(view->get_allocator()) ); 
-    }
-
-    //@}
-};
-
-//@}
-
-} // namespace internal
-
-
-/** @ingroup ReducersList */
-//@{
-
-/** The list-append reducer view class.
- *
- *  This is the view class for reducers created with
- *  `cilk::reducer< cilk::op_list_append<Type, Allocator> >`. It holds the
- *  accumulator variable for the reduction, and allows only append operations
- *  to be performed on it.
- *
- *  @note   The reducer "dereference" operation (`reducer::operator *()`)
- *          yields a reference to the view. Thus, for example, the view class's
- *          `push_back` operation would be used in an expression like
- *          `r->push_back(a)`, where `r` is a list-append reducer variable.
- *
- *  @tparam Type        The list element type (not the list type).
- *  @tparam Allocator   The list allocator type.
- *
- *  @see ReducersList
- *  @see op_list_append
- */
-template <class Type,
-          class Allocator = typename std::list<Type>::allocator_type>
-class op_list_append_view : public internal::list_view_base<Type, Allocator>
-{
-    typedef internal::list_view_base<Type, Allocator>   base;
-    typedef std::list<Type, Allocator>                  list_type;
-    typedef typename list_type::iterator                iterator;
-
-    iterator end() { return this->m_value.end(); }
-
-public:
-
-    /** @name Constructors.
-     *
-     *  All op_list_append_view constructors simply pass their arguments on to
-     *  the @ref internal::list_view_base base class constructor.
-     *
-     *  @ref internal::list_view_base supports all the std::list constructor
-     *  forms, as well as the reducer move_in constructor form.
-     */
-    //@{
-
-    op_list_append_view() : base() {}
-
-    template <typename T1>
-    op_list_append_view(const T1& x1) : base(x1) {}
-
-    template <typename T1, typename T2>
-    op_list_append_view(const T1& x1, const T2& x2) : base(x1, x2) {}
-
-    template <typename T1, typename T2, typename T3>
-    op_list_append_view(const T1& x1, const T2& x2, const T3& x3) :
-        base(x1, x2, x3) {}
-
-    //@}
-
-    /** @name View modifier operations.
-     */
-    //@{
-
-    /** Adds an element at the end of the list.
-     *
-     *  This is equivalent to `list.push_back(element)`
-     */
-    void push_back(const Type& element)
-        { this->m_value.push_back(element); }
-
-    /** Inserts elements at the end of the list.
-     *
-     *  This is equivalent to `list.insert(list.end(), n, element)`
-     */
-    void insert_back(typename list_type::size_type n, const Type& element)
-        { this->m_value.insert(end(), n, element); }
-
-    /** Inserts elements at the end of the list.
-     *
-     *  This is equivalent to `list.insert(list.end(), first, last)`
-     */
-    template <typename Iter>
-    void insert_back(Iter first, Iter last)
-        { this->m_value.insert(end(), first, last); }
-
-    /** Splices elements at the end of the list.
-     *
-     *  This is equivalent to `list.splice(list.end(), x)`
-     */
-    void splice_back(list_type& x) {
-        if (x.get_allocator() == this->m_value.get_allocator())
-            this->m_value.splice(end(), x);
-        else {
-            insert_back(x.begin(), x.end());
-            x.clear();
-        }
-    }
-
-    /** Splices elements at the end of the list.
-     *
-     *  This is equivalent to `list.splice(list.end(), x, i)`
-     */
-    void splice_back(list_type& x, iterator i) {
-        if (x.get_allocator() == this->m_value.get_allocator())
-            this->m_value.splice(end(), x, i);
-        else {
-            push_back(*i);
-            x.erase(i);
-        }
-    }
-
-    /** Splices elements at the end of the list.
-     *
-     *  This is equivalent to `list.splice(list.end(), x, first, last)`
-     */
-    void splice_back(list_type& x, iterator first, iterator last) {
-        if (x.get_allocator() == this->m_value.get_allocator())
-            this->m_value.splice(end(), x, first, last);
-        else {
-            insert_back(first, last);
-            x.erase(first, last);
-        }
-    }
-
-    //@}
-
-    /** Reduces the views of two strands.
-     *
-     *  This function is invoked by the @ref op_list_append monoid to combine
-     *  the views of two strands when the right strand merges with the left
-     *  one. It appends the value contained in the right-strand view to the
-     *  value contained in the left-strand view, and leaves the value in the
-     *  right-strand view undefined.
-     *
-     *  @param  right   A pointer to the right-strand view. (`this` points to
-     *                  the left-strand view.)
-     *
-     *  @note   Used only by the @ref op_list_append monoid to implement the
-     *          monoid reduce operation.
-     */
-    void reduce(op_list_append_view* right)
-    {
-        this->m_value.splice(end(), right->m_value);
-    }
-};
-
-
-/** The list-prepend reducer view class.
- *
- *  This is the view class for reducers created with
- *  `cilk::reducer< cilk::op_list_prepend<Type, Allocator> >`. It holds the
- *  accumulator variable for the reduction, and allows only prepend operations
- *  to be performed on it.
- *
- *  @note   The reducer "dereference" operation (`reducer::operator *()`)
- *          yields a reference to the view. Thus, for example, the view class's
- *          `push_front` operation would be used in an expression like
- *          `r->push_front(a)`, where `r` is a list-prepend reducer variable.
- *
- *  @tparam Type        The list element type (not the list type).
- *  @tparam Allocator   The list allocator type.
- *
- *  @see ReducersList
- *  @see op_list_prepend
- */
-template <class Type,
-          class Allocator = typename std::list<Type>::allocator_type>
-class op_list_prepend_view : public internal::list_view_base<Type, Allocator>
-{
-    typedef internal::list_view_base<Type, Allocator>   base;
-    typedef std::list<Type, Allocator>                  list_type;
-    typedef typename list_type::iterator                iterator;
-
-    iterator begin() { return this->m_value.begin(); }
-
-public:
-
-    /** @name Constructors.
-     *
-     *  All op_list_prepend_view constructors simply pass their arguments on to
-     *  the @ref internal::list_view_base base class constructor.
-     *
-     *  @ref internal::list_view_base supports all the std::list constructor
-     *  forms, as well as the reducer move_in constructor form.
-     *
-     */
-    //@{
-
-    op_list_prepend_view() : base() {}
-
-    template <typename T1>
-    op_list_prepend_view(const T1& x1) : base(x1) {}
-
-    template <typename T1, typename T2>
-    op_list_prepend_view(const T1& x1, const T2& x2) : base(x1, x2) {}
-
-    template <typename T1, typename T2, typename T3>
-    op_list_prepend_view(const T1& x1, const T2& x2, const T3& x3) :
-        base(x1, x2, x3) {}
-
-    //@}
-
-    /** @name View modifier operations.
-     */
-    //@{
-
-    /** Adds an element at the beginning of the list.
-     *
-     *  This is equivalent to `list.push_front(element)`
-     */
-    void push_front(const Type& element)
-        { this->m_value.push_front(element); }
-
-    /** Inserts elements at the beginning of the list.
-     *
-     *  This is equivalent to `list.insert(list.begin(), n, element)`
-     */
-    void insert_front(typename list_type::size_type n, const Type& element)
-        { this->m_value.insert(begin(), n, element); }
-
-    /** Inserts elements at the beginning of the list.
-     *
-     *  This is equivalent to `list.insert(list.begin(), first, last)`
-     */
-    template <typename Iter>
-    void insert_front(Iter first, Iter last)
-        { this->m_value.insert(begin(), first, last); }
-
-    /** Splices elements at the beginning of the list.
-     *
-     *  This is equivalent to `list.splice(list.begin(), x)`
-     */
-    void splice_front(list_type& x) {
-        if (x.get_allocator() == this->m_value.get_allocator())
-            this->m_value.splice(begin(), x);
-        else {
-            insert_front(x.begin(), x.begin());
-            x.clear();
-        }
-    }
-
-    /** Splices elements at the beginning of the list.
-     *
-     *  This is equivalent to `list.splice(list.begin(), x, i)`
-     */
-    void splice_front(list_type& x, iterator i) {
-        if (x.get_allocator() == this->m_value.get_allocator())
-            this->m_value.splice(begin(), x, i);
-        else {
-            push_front(*i);
-            x.erase(i);
-        }
-    }
-
-    /** Splices elements at the beginning of the list.
-     *
-     *  This is equivalent to `list.splice(list.begin(), x, first, last)`
-     */
-    void splice_front(list_type& x, iterator first, iterator last) {
-        if (x.get_allocator() == this->m_value.get_allocator())
-            this->m_value.splice(begin(), x, first, last);
-        else {
-            insert_front(first, last);
-            x.erase(first, last);
-        }
-    }
-
-    //@}
-
-    /** Reduces the views of two strands.
-     *
-     *  This function is invoked by the @ref op_list_prepend monoid to combine
-     *  the views of two strands when the right strand merges with the left
-     *  one. It prepends the value contained in the right-strand view to the
-     *  value contained in the left-strand view, and leaves the value in the
-     *  right-strand view undefined.
-     *
-     *  @param  right   A pointer to the right-strand view. (`this` points to
-     *                  the left-strand view.)
-     *
-     *  @note   Used only by the @ref op_list_prepend monoid to implement the
-     *          monoid reduce operation.
-     */
-    /** Reduce operation.
-     *
-     *  Required by @ref monoid_base.
-     */
-    void reduce(op_list_prepend_view* right)
-    {
-        this->m_value.splice(begin(), right->m_value);
-    }
-};
-
-
-
-/** Monoid class for list-append reductions. Instantiate the cilk::reducer
- *  template class with a op_list_append monoid to create a list-append reducer
- *  class. For example, to create a list of strings:
- *
- *      cilk::reducer< cilk::op_list_append<std::string> > r;
- *
- *  @tparam Type    The list element type (not the list type).
- *  @tparam Alloc   The list allocator type.
- *  @tparam Align   If `false` (the default), reducers instantiated on this
- *                  monoid will be naturally aligned (the Intel Cilk Plus library 1.0
- *                  behavior). If `true`, reducers instantiated on this monoid
- *                  will be cache-aligned for binary compatibility with
- *                  reducers in Intel Cilk Plus library version 0.9.
- *
- *  @see ReducersList
- *  @see op_list_append_view
- */
-template <typename Type,
-          typename Allocator = typename std::list<Type>::allocator_type,
-          bool Align = false>
-struct op_list_append :
-    public internal::list_monoid_base<op_list_append_view<Type, Allocator>, Align>
-{
-    /// Construct with default allocator.
-    op_list_append() {}
-    /// Construct with specified allocator.
-    op_list_append(const Allocator& alloc) :
-        internal::list_monoid_base<op_list_append_view<Type, Allocator>, Align>(alloc) {}
-};
-
-/** Monoid class for list-prepend reductions. Instantiate the cilk::reducer
- *  template class with a op_list_prepend monoid to create a list-prepend
- *  reducer class. For example, to create a list of strings:
- *
- *      cilk::reducer< cilk::op_list_prepend<std::string> > r;
- *
- *  @tparam Type    The list element type (not the list type).
- *  @tparam Alloc   The list allocator type.
- *  @tparam Align   If `false` (the default), reducers instantiated on this
- *                  monoid will be naturally aligned (the Intel Cilk Plus library 1.0
- *                  behavior). If `true`, reducers instantiated on this monoid
- *                  will be cache-aligned for binary compatibility with
- *                  reducers in Intel Cilk Plus library version 0.9.
- *
- *  @see ReducersList
- *  @see op_list_prepend_view
- */
-template <typename Type,
-          typename Allocator = typename std::list<Type>::allocator_type,
-          bool Align = false>
-struct op_list_prepend :
-    public internal::list_monoid_base<op_list_prepend_view<Type, Allocator>, Align>
-{
-    /// Construct with default allocator.
-    op_list_prepend() {}
-    /// Construct with specified allocator.
-    op_list_prepend(const Allocator& alloc) :
-        internal::list_monoid_base<op_list_prepend_view<Type, Allocator>, Align>(alloc) {}
-};
-
-
-/** Deprecated list-append reducer wrapper class.
- *
- *  reducer_list_append is the same as
- *  @ref reducer<@ref op_list_append>, except that reducer_list_append is a
- *  proxy for the contained view, so that accumulator variable update
- *  operations can be applied directly to the reducer. For example, an element
- *  is appended to a `reducer<%op_list_append>` with `r->push_back(a)`, but an
- *  element can be appended to a `%reducer_list_append` with `r.push_back(a)`.
- *
- *  @deprecated Users are strongly encouraged to use `reducer<monoid>`
- *              reducers rather than the old wrappers like reducer_list_append.
- *              The `reducer<monoid>` reducers show the reducer/monoid/view
- *              architecture more clearly, are more consistent in their
- *              implementation, and present a simpler model for new
- *              user-implemented reducers.
- *
- *  @note   Implicit conversions are provided between `%reducer_list_append`
- *          and `reducer<%op_list_append>`. This allows incremental code
- *          conversion: old code that used `%reducer_list_append` can pass a
- *          `%reducer_list_append` to a converted function that now expects a
- *          pointer or reference to a `reducer<%op_list_append>`, and vice
- *          versa.
- *
- *  @tparam Type        The value type of the list.
- *  @tparam Allocator   The allocator type of the list.
- *
- *  @see op_list_append
- *  @see reducer
- *  @see ReducersList
- */
-template <class Type, class Allocator = std::allocator<Type> >
-class reducer_list_append :
-    public reducer<op_list_append<Type, Allocator, true> >
-{
-    typedef reducer<op_list_append<Type, Allocator, true> > base;
-    using base::view;
-public:
-
-    /// The reducer's list type.
-    typedef typename base::value_type list_type;
-
-    /// The list's element type.
-    typedef Type list_value_type;
-
-    /// The reducer's primitive component type.
-    typedef Type basic_value_type;
-
-    /// The monoid type.
-    typedef typename base::monoid_type Monoid;
-
-    /** @name Constructors
-     */
-    //@{
-
-    /** Constructs a reducer with an empty list.
-     */
-    reducer_list_append() {}
-
-    /** Constructs a reducer with a specified initial list value.
-     */
-    reducer_list_append(const std::list<Type, Allocator> &initial_value) :
-        base(initial_value) {}
-
-    //@}
-
-
-    /** @name Forwarded functions
-     *  @details Functions that update the contained accumulator variable are
-     *  simply forwarded to the contained @ref op_and_view. */
-    //@{
-
-    /// @copydoc op_list_append_view::push_back(const Type&)
-    void push_back(const Type& element) { view().push_back(element); }
-
-    //@}
-
-    /** Allows mutable access to the list within the current view.
-     *
-     *  @warning    If this method is called before the parallel calculation is
-     *              complete, the list returned by this method will be a partial
-     *              result.
-     *
-     *  @returns    A mutable reference to the list within the current view.
-     */
-    list_type &get_reference() { return view().view_get_reference(); }
-
-    /** Allows read-only access to the list within the current view.
-     *
-     *  @warning    If this method is called before the parallel calculation is
-     *              complete, the list returned by this method will be a partial
-     *              result.
-     *
-     *  @returns    A const reference to the list within the current view.
-     */
-    list_type const &get_reference() const { return view().view_get_reference(); }
-
-    /// @name Dereference
-    //@{
-    /** Dereferencing a wrapper is a no-op. It simply returns the wrapper.
-     *  Combined with the rule that a wrapper forwards view operations to the
-     *  view, this means that view operations can be written the same way on
-     *  reducers and wrappers, which is convenient for incrementally
-     *  converting code using wrappers to code using reducers. That is:
-     *
-     *      reducer< op_list_append<int> > r;
-     *      r->push_back(a);    // *r returns the view
-     *                          // push_back is a view member function
-     *
-     *      reducer_list_append<int> w;
-     *      w->push_back(a);    // *w returns the wrapper
-     *                          // push_back is a wrapper member function that
-     *                          // calls the corresponding view function
-     */
-    //@{
-    reducer_list_append&       operator*()       { return *this; }
-    reducer_list_append const& operator*() const { return *this; }
-
-    reducer_list_append*       operator->()       { return this; }
-    reducer_list_append const* operator->() const { return this; }
-    //@}
-
-    /** @name Upcast
-     *  @details In Intel Cilk Plus library 0.9, reducers were always cache-aligned.
-     *  In library  1.0, reducer cache alignment is optional. By default,
-     *  reducers are unaligned (i.e., just naturally aligned), but legacy
-     *  wrappers inherit from cache-aligned reducers for binary compatibility.
-     *
-     *  This means that a wrapper will automatically be upcast to its aligned
-     *  reducer base class. The following conversion operators provide
-     *  pseudo-upcasts to the corresponding unaligned reducer class.
-     */
-    //@{
-    operator reducer< op_list_append<Type, Allocator, false> >& ()
-    {
-        return *reinterpret_cast<
-            reducer< op_list_append<Type, Allocator, false> >*
-            >(this);
-    }
-    operator const reducer< op_list_append<Type, Allocator, false> >& () const
-    {
-        return *reinterpret_cast<
-            const reducer< op_list_append<Type, Allocator, false> >*
-            >(this);
-    }
-    //@}
-
-};
-
-
-/** Deprecated list-prepend reducer wrapper class.
- *
- *  reducer_list_prepend is the same as
- *  @ref reducer<@ref op_list_prepend>, except that reducer_list_prepend is a
- *  proxy for the contained view, so that accumulator variable update operations
- *  can be applied directly to the reducer. For example, an element is prepended
- *  to a `reducer<op_list_prepend>` with `r->push_back(a)`, but an element is
- *  prepended to a `reducer_list_prepend` with `r.push_back(a)`.
- *
- *  @deprecated Users are strongly encouraged to use `reducer<monoid>`
- *              reducers rather than the old wrappers like reducer_list_prepend.
- *              The `reducer<monoid>` reducers show the reducer/monoid/view
- *              architecture more clearly, are more consistent in their
- *              implementation, and present a simpler model for new
- *              user-implemented reducers.
- *
- *  @note   Implicit conversions are provided between `%reducer_list_prepend`
- *          and `reducer<%op_list_prepend>`. This allows incremental code
- *          conversion: old code that used `%reducer_list_prepend` can pass a
- *          `%reducer_list_prepend` to a converted function that now expects a
- *          pointer or reference to a `reducer<%op_list_prepend>`, and vice
- *          versa.
- *
- *  @tparam Type        The value type of the list.
- *  @tparam Allocator   The allocator type of the list.
- *
- *  @see op_list_prepend
- *  @see reducer
- *  @see ReducersList
- */
-template <class Type, class Allocator = std::allocator<Type> >
-class reducer_list_prepend :
-    public reducer<op_list_prepend<Type, Allocator, true> >
-{
-    typedef reducer<op_list_prepend<Type, Allocator, true> > base;
-    using base::view;
-public:
-
-    /** The reducer's list type.
-     */
-    typedef typename base::value_type list_type;
-
-    /** The list's element type.
-     */
-    typedef Type list_value_type;
-
-    /** The reducer's primitive component type.
-     */
-    typedef Type basic_value_type;
-
-    /** The monoid type.
-     */
-    typedef typename base::monoid_type Monoid;
-
-    /** @name Constructors
-     */
-    //@{
-
-    /** Constructs a reducer with an empty list.
-     */
-    reducer_list_prepend() {}
-
-    /** Constructs a reducer with a specified initial list value.
-     */
-    reducer_list_prepend(const std::list<Type, Allocator> &initial_value) :
-        base(initial_value) {}
-
-    //@}
-
-    /** @name Forwarded functions
-     *  @details Functions that update the contained accumulator variable are
-     *  simply forwarded to the contained @ref op_and_view.
-     */
-    //@{
-
-    /// @copydoc op_list_prepend_view::push_front(const Type&)
-    void push_front(const Type& element) { view().push_front(element); }
-
-    //@}
-
-    /** Allows mutable access to the list within the current view.
-     *
-     *  @warning    If this method is called before the parallel calculation is
-     *              complete, the list returned by this method will be a partial
-     *              result.
-     *
-     *  @returns    A mutable reference to the list within the current view.
-     */
-    list_type &get_reference() { return view().view_get_reference(); }
-
-    /** Allows read-only access to the list within the current view.
-     *
-     *  @warning    If this method is called before the parallel calculation is
-     *              complete, the list returned by this method will be a partial
-     *              result.
-     *
-     *  @returns    A const reference to the list within the current view.
-     */
-    list_type const &get_reference() const { return view().view_get_reference(); }
-
-    /// @name Dereference
-    /** Dereferencing a wrapper is a no-op. It simply returns the wrapper.
-     *  Combined with the rule that a wrapper forwards view operations to the
-     *  view, this means that view operations can be written the same way on
-     *  reducers and wrappers, which is convenient for incrementally
-     *  converting code using wrappers to code using reducers. That is:
-     *
-     *      reducer< op_list_prepend<int> > r;
-     *      r->push_front(a);    // *r returns the view
-     *                           // push_front is a view member function
-     *
-     *      reducer_list_prepend<int> w;
-     *      w->push_front(a);    // *w returns the wrapper
-     *                           // push_front is a wrapper member function that
-     *                           // calls the corresponding view function
-     */
-    //@{
-    reducer_list_prepend&       operator*()       { return *this; }
-    reducer_list_prepend const& operator*() const { return *this; }
-
-    reducer_list_prepend*       operator->()       { return this; }
-    reducer_list_prepend const* operator->() const { return this; }
-    //@}
-
-    /** @name Upcast
-     *  @details In Intel Cilk Plus library 0.9, reducers were always cache-aligned.
-     *  In library 1.0, reducer cache alignment is optional. By default,
-     *  reducers are unaligned (i.e., just naturally aligned), but legacy
-     *  wrappers inherit from cache-aligned reducers for binary compatibility.
-     *
-     *  This means that a wrapper will automatically be upcast to its aligned
-     *  reducer base class. The following conversion operators provide
-     *  pseudo-upcasts to the corresponding unaligned reducer class.
-     */
-    //@{
-    operator reducer< op_list_prepend<Type, Allocator, false> >& ()
-    {
-        return *reinterpret_cast<
-            reducer< op_list_prepend<Type, Allocator, false> >*
-            >(this);
-    }
-    operator const reducer< op_list_prepend<Type, Allocator, false> >& () const
-    {
-        return *reinterpret_cast<
-            const reducer< op_list_prepend<Type, Allocator, false> >*
-            >(this);
-    }
-    //@}
-
-};
-
-/// @cond internal
-
-/** Metafunction specialization for reducer conversion.
- *
- *  This specialization of the @ref legacy_reducer_downcast template class
- *  defined in reducer.h causes the `reducer< op_list_append<Type, Allocator> >`
- *  class to have an `operator reducer_list_append<Type, Allocator>& ()`
- *  conversion operator that statically downcasts the `reducer<op_list_append>`
- *  to the corresponding `reducer_list_append` type. (The reverse conversion,
- *  from `reducer_list_append` to `reducer<op_list_append>`, is just an upcast,
- *  which is provided for free by the language.)
- */
-template <class Type, class Allocator, bool Align>
-struct legacy_reducer_downcast<reducer<op_list_append<Type, Allocator, Align> > >
-{
-    typedef reducer_list_append<Type, Allocator> type;
-};
-
-/** Metafunction specialization for reducer conversion.
- *
- *  This specialization of the @ref legacy_reducer_downcast template class
- *  defined in reducer.h causes the
- *  `reducer< op_list_prepend<Type, Allocator> >` class to have an
- *  `operator reducer_list_prepend<Type, Allocator>& ()` conversion operator
- *  that statically downcasts the `reducer<op_list_prepend>` to the
- *  corresponding `reducer_list_prepend` type. (The reverse conversion, from
- *  `reducer_list_prepend` to `reducer<op_list_prepend>`, is just an upcast,
- *  which is provided for free by the language.)
- */
-template <class Type, class Allocator, bool Align>
-struct legacy_reducer_downcast<reducer<op_list_prepend<Type, Allocator, Align> > >
-{
-    typedef reducer_list_prepend<Type, Allocator> type;
-};
-
-/// @endcond
-
-//@}
-
-} // Close namespace cilk
-
-#endif //  REDUCER_LIST_H_INCLUDED
diff --git a/include/cilk/reducer_max.h b/include/cilk/reducer_max.h
deleted file mode 100644
index fa4d0c50..00000000
--- a/include/cilk/reducer_max.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*  reducer_max.h                  -*- C++ -*-
- *
- *  Copyright (C) 2009-2018, Intel Corporation
- *  All rights reserved.
- *  
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *  
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *  
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- *  OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- *  AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
- *  WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *  POSSIBILITY OF SUCH DAMAGE.
- *  
- *  *********************************************************************
- *  
- *  PLEASE NOTE: This file is a downstream copy of a file maintained in
- *  a repository at cilkplus.org. Changes made to this file that are not
- *  submitted through the contribution process detailed at
- *  http://www.cilkplus.org/submit-cilk-contribution will be lost the next
- *  time that a new version is released. Changes only submitted to the
- *  GNU compiler collection or posted to the git repository at
- *  https://bitbucket.org/intelcilkruntime/intel-cilk-runtime are
- *  not tracked.
- *  
- *  We welcome your contributions to this open source project. Thank you
- *  for your assistance in helping us improve Cilk Plus.
- */
-
-/** @file reducer_max.h
- *
- *  @brief Defines classes for doing parallel maximum reductions.
- *
- *  @ingroup ReducersMinMax
- *
- *  @see ReducersMinMax
- */
-
-#include "reducer_min_max.h"
diff --git a/include/cilk/reducer_min.h b/include/cilk/reducer_min.h
deleted file mode 100644
index 521a4d32..00000000
--- a/include/cilk/reducer_min.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*  reducer_min.h                  -*- C++ -*-
- *
- *  Copyright (C) 2009-2018, Intel Corporation
- *  All rights reserved.
- *  
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *  
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *  
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- *  OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- *  AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
- *  WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *  POSSIBILITY OF SUCH DAMAGE.
- *  
- *  *********************************************************************
- *  
- *  PLEASE NOTE: This file is a downstream copy of a file maintained in
- *  a repository at cilkplus.org. Changes made to this file that are not
- *  submitted through the contribution process detailed at
- *  http://www.cilkplus.org/submit-cilk-contribution will be lost the next
- *  time that a new version is released. Changes only submitted to the
- *  GNU compiler collection or posted to the git repository at
- *  https://bitbucket.org/intelcilkruntime/intel-cilk-runtime are
- *  not tracked.
- *  
- *  We welcome your contributions to this open source project. Thank you
- *  for your assistance in helping us improve Cilk Plus.
- */
-
-/** @file reducer_min.h
- *
- *  @brief Defines classes for doing parallel minimum reductions.
- *
- *  @ingroup ReducersMinMax
- *
- *  @see ReducersMinMax
- */
-
-#include "reducer_min_max.h"
diff --git a/include/cilk/reducer_min_max.h b/include/cilk/reducer_min_max.h
deleted file mode 100644
index 947dad09..00000000
--- a/include/cilk/reducer_min_max.h
+++ /dev/null
@@ -1,3743 +0,0 @@
-/*  reducer_min_max.h                  -*- C++ -*-
- *
- *  Copyright (C) 2009-2018, Intel Corporation
- *  All rights reserved.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- *  OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- *  AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
- *  WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *  POSSIBILITY OF SUCH DAMAGE.
- *
- *  *********************************************************************
- *
- *  PLEASE NOTE: This file is a downstream copy of a file maintained in
- *  a repository at cilkplus.org. Changes made to this file that are not
- *  submitted through the contribution process detailed at
- *  http://www.cilkplus.org/submit-cilk-contribution will be lost the next
- *  time that a new version is released. Changes only submitted to the
- *  GNU compiler collection or posted to the git repository at
- *  https://bitbucket.org/intelcilkruntime/intel-cilk-runtime are
- *  not tracked.
- *
- *  We welcome your contributions to this open source project. Thank you
- *  for your assistance in helping us improve Cilk Plus.
- */
-
-/** @file reducer_min_max.h
- *
- *  @brief Defines classes for doing parallel minimum and maximum reductions.
- *
- *  @ingroup ReducersMinMax
- *
- *  @see ReducersMinMax
- */
-
-#ifndef REDUCER_MIN_MAX_H_INCLUDED
-#define REDUCER_MIN_MAX_H_INCLUDED
-
-#include <cilk/reducer.h>
-
-#ifdef __cplusplus
-
-#include <algorithm>
-#include <limits>
-
-/** @defgroup ReducersMinMax Minimum and Maximum Reducers
- *
- *  Minimum and maximum reducers allow the computation of the minimum or
- *  maximum of a set of values in parallel.
- *
- *  @ingroup Reducers
- *
- *  You should be familiar with @ref pagereducers "Intel(R) Cilk(TM) Plus
- * reducers", described in file `reducers.md`, and particularly with @ref
- * reducers_using, before trying to use the information in this file.
- *
- *  @section redminmax_usage Usage Examples
- *
- *      cilk::reducer< cilk::op_max<int> > rm;
- *      cilk_for (int i = 0; i < ARRAY_SIZE; ++i)
- *      {
- *          rm->calc_max(a[i]); // or *rm = cilk::max_of(*max, a[i])
- *      }
- *      std::cout << "maximum value is " << rm.get_value() << std::endl;
- *
- *  and
- *
- *      cilk::reducer< cilk::op_min_index<int, double> > rmi;
- *      cilk_for (int i = 0; i < ARRAY_SIZE; ++i)
- *      {
- *          rmi->calc_min(i, a[i]) // or *rmi = cilk::min_of(*rmi, i, a[i]);
- *      }
- *      std::cout << "minimum value a[" << rmi.get_value().first << "] = "
- *                << rmi.get_value().second << std::endl;
- *
- *  @section redminmax_monoid The Monoid
- *
- *  @subsection redminmax_monoid_values Value Set
- *
- *  The value set of a minimum or maximum reducer is the set of values of
- *  `Type`, augmented with a "special identity value" which is not a value of
- *  `Type`, but which is defined to be greater than (less than) any value of
- *  `Type`.
- *
- *  @subsection redminmax_monoid_operator Operator
- *
- *  By default, the operator of a minimum reducer is defined as
- *
- *      x MIN y == (x < y) ? x : y
- *
- *  Thus, `a1 MIN a2 MIN … an` is the first `ai` which is not greater than any
- *  other `ai`.
- *
- *  The operator of a maximum reducer is defined as
- *
- *      x MAX y == (x > y) ? x : y
- *
- *  Thus, `a1 MAX a2 MAX … an` is the first `ai` which is not less than any
- *  other `ai`.
- *
- *  @subsection redminmax_monoid_comparators Comparators
- *
- *  Min/max reducers are not limited to finding the minimum or maximum value
- *  determined by the `<` or `>` operator. In fact, all min/max reducers use a
- *  _comparator_, which is either a function or an object of a function class
- *  that defines a [strict weak ordering]
- *  (http://en.wikipedia.org/wiki/Strict_weak_ordering#Strict_weak_orderings)
- *  on a set of values. (This is exactly the same as the requirement for the
- *  comparison predicate for STL associative containers and sorting
- *  algorithms.)
- *
- *  Just as with STL algorithms and containers, the comparator type parameter
- *  for min/max reducers is optional. If it is omitted, it defaults to
- *  `std::less`, which gives the behavior described in the previous section.
- *  Using non-default comparators (anything other than `std::less`) with
- *  min/max reducers is just like using them with STL containers and
- *  algorithms.
- *
- *  Taking comparator objects into account, the reduction operation `MIN` for a
- *  minimum reducer is defined as
- *
- *      x MIN y == compare(x, y) ? x : y
- *
- *  where `compare()` is the reducer's comparator. Similarly, the reduction
- *  operation MAX for a maximum reducer is defined as
- *
- *      x MAX y == compare(y, x) ? x : y
- *
- *  (If `compare(x, y) == x < y`, then `compare(y, x) == x > y`.)
- *
- *  @subsection redminmax_monoid_identity Identity
- *
- *  The identity value of a min/max reducer is its monoid's
- *  ["special identity value"](#redminmax_monoid_values), which is not a value
- *  of the reducer's data type. (See @ref redminmax_initial.)
- *
- *  @section redminmax_index Value and Index Reducers
- *
- *  Min/max reducers come in two families. The _value_ reducers, with the
- *  `op_min` and `op_max` monoids, simply find the smallest or largest value
- *  from a set of values. The _index_ reducers, with the `op_min_index` and
- *  `op_max_index` monoids, also record an index value associated with the
- *  first occurrence of the smallest or largest value.
- *
- *  In the `%op_min_index` usage example [above](#redminmax_usage), the values
- *  are taken from an array, and the index of a value is the index of the array
- *  element it comes from. More generally, though, an index can be any sort of
- *  key which identifies a particular value in a collection of values. For
- *  example, if the values were taken from the nodes of a tree, then the
- *  "index" of a value might be a pointer to the node containing that value.
- *
- *  A min/max index reducer is essentially the same as a min/max value reducer
- *  whose value type is an (index, value) pair, and whose comparator ignores
- *  the index part of the pair. (index, value) pairs are represented by
- *  `std::pair<Index, Type>` objects. This has the consequence that wherever
- *  the interface of a min/max value reducer has a `Type`, the interface of a
- *  min/max index reducer has a `std::pair<Index, Type>`. (There are
- *  convenience variants of the `reducer(Type)` constructor and the
- *  `calc_min()`, `calc_max()`, `%min_of()`, and `%max_of()` functions that
- *  take an index argument and a value argument instead of a single index/value
- *  pair argument.)
- *
- *  @section redminmax_operations Operations
- *
- *  @subsection redminmax_constructors Constructors
- *
- *  @subsubsection redminmax_constructors_value Min/Max Value Reducers
- *
- *      reducer()                           // identity
- *      reducer(const Compare& compare)     // identity
- *      reducer(const Type& value)
- *      reducer(move_in(Type& variable))
- *      reducer(const Type& value, const Compare& compare)
- *      reducer(move_in(Type& variable), const Compare& compare)
- *
- *  @subsubsection redminmax_constructors_index Min/Max Index Reducers
- *
- *      reducer()                           // identity
- *      reducer(const Compare& compare)     // identity
- *      reducer(const std::pair<Index, Type>& pair)
- *      reducer(const Index& index, const Type& value)
- *      reducer(move_in(std::pair<Index, Type>& variable))
- *      reducer(const std::pair<Index, Type>& pair, const Compare& compare)
- *      reducer(const Index& index, const Type& value, const Compare& compare)
- *      reducer(move_in(std::pair<Index, Type>& variable), const Compare&
- * compare)
- *
- *  See the explanation of the following two constructors in
- *  @ref redminmax_index_vector.
- *
- *      reducer(const Index& index)
- *      reducer(const Index& index, const Compare& compare)
- *
- *  @subsection redminmax_get_set Set and Get
- *
- *      r.set_value(const Type& value)
- *      Type = r.get_value() const
- *      r.move_in(Type& variable)
- *      r.move_out(Type& variable)
- *
- *  Note that for an index reducer, the `Type` in these operations is actually a
- *  `std::pair<Index, Type>`. (See @ref redminmax_index.) There is _not_ a
- *  `set_value(value, index)` operation.
- *
- *  @subsection redminmax_initial Initial Values and is_set()
- *
- *  The initial value of the leftmost view of a default-initialized min/max
- *  reducer, or of a non-leftmost view (created for a stolen parallel strand)
- *  is the special identity value, which is not a value of the reducer's value
- *  type.
- *
- *  A view will have a real (non-identity) value if:
- *
- *  -   it is the leftmost view of a reducer that was constructed with an
- *      initial value, or
- *  -   it was assigned a value with a call to `reducer.set_value()` or
- *      `reducer.move_in()`, or
- *  -   it has been updated with a call to `reducer->calc_min()` or
- *      `reducer->calc_max()`, or
- *  -   it has been updated with an assignment `*reducer = min_of(*reducer, x)`
- *      or `*reducer = max_of(*reducer, x)`.
- *
- *  Calling `get_value()` or `move_out()` on a reducer whose view has the
- *  special identity value will yield an undefined result. The `is_set()`
- *  function can be used to test whether a view has the special identity value
- *  or a real value. If a reducer's current view has the special identity
- *  value, then `reducer()->is_set()` will return `false` (and
- *  `reducer.get_value()` will return an undefined value); if the view has a
- *  real value, them `reducer->is_set()` will return `true` and
- *  `reducer.get_value()` will return the value.
- *
- *  @subsubsection redminmax_index_vector Special Issues with Min/Max Index
- * Reducers
- *
- *  The index portion of the computed index/value pair will be wrong in the
- *  following special case:
- *
- *  -   The reducer's value type is a simple numeric type.
- *  -   The reducer uses the default comparator (`std::less<Type>`).
- *  -   The reducer is updated at least once with a call to `calc_min()` or
- *      `calc_max()` or an assignment with `min_of()` or `max_of()`.
- *  -   The value in _every_  update to the reducer is the maximum value of the
- *      value type (for a min_index reducer) or the minimum value of the value
- *      type (for a max_index reducer).
- *
- *  In this case, `reducer.get_value().first` should be the index argument from
- *  the first reducer update, but it will actually be the default value of the
- *  `Index` type. Now, in the common case where the index type is an integer
- *  type and the reducer is finding the smallest or largest element in an
- *  array, the default value of the index type will be zero, which is the
- *  index of the first element in the array, so everything will work out:
- *
- *      unsigned a[3] = {0, 0, 0};
- *      reducer< op_max_index<int, unsigned> > r;
- *      for (int i = 0; i < 3; ++i) r->calc_max(i, a[i]);
- *      // r.get_value() = (0, 0)
- *
- *  However, it doesn't always work out so well:
- *
- *      typedef std::map<std::string, unsigned> my_map;
- *      my_map a;
- *      a["first"] = 0;
- *      a["second"] = 0;
- *      a["third"] = 0;
- *      reducer< op_max_index<std::string, unsigned> > r;
- *      for (typename my_map::iterator i = a.begin(); i != a.end(); ++i)
- *          r.calc_max(i->first, i->second);
- *      // r.get_value() = ("", 0), should be ("first", 0)
- *
- *  If you know that no data value is associated with the default index value,
- *  then you can treat the default index value as a flag meaning "use the index
- *  of the first data value." But suppose that you don't know whether there is
- *  an element in the map with index `""`. Then you won't know what to do when
- *  `r.get_value().first == ""`.
- *
- *  As a workaround for this conundrum, you can specify an alternative
- *  "default" index value. Either provide an index argument, _but not a
- *  value argument_, to the reducer constructor:
- *
- *      reducer< op_max_index<std::string, unsigned> >
- *          r(a.empty() ? std::string() : a.begin()->first);
- *
- *  or specify the default index with the view `set_default_index()` function:
- *
- *      reducer< op_max_index<std::string, unsigned> > r;
- *      if (!a.empty()) r->set_default_index(a.begin()->first);
- *
- *  Note that setting a default index, unlike setting an initial value, does
- *  not mark the view as having a non-identity value:
- *
- *      reducer< op_min_index<int, int> > r;
- *      r->set_default_index(-1);
- *      // r->is_set() = false
- *      // r.get_value() is undefined
- *
- *  @subsection redminmax_view_ops View Operations
- *
- *  The basic reduction operation is `x = x MIN a` for a minimum reducer, or
- *  `x = x MAX a` for a maximum reducer. The basic syntax for these operations
- *  uses the `calc_min()` and `calc_max()` member functions of the view class.
- *  An assignment syntax is also provided, using the `%cilk::min_of()` and
- *  `%cilk::max_of()` global functions:
- *
- *  Class          | Modifier            | Assignment
- *  ---------------|---------------------|-----------
- *  `op_min`       | `r->calc_min(x)`    | `*r = min_of(*r, x)` or `*r = min_of(x, *r)`
- *  `op_max`       | `r->calc_max(x)`    | `*r = max_of(*r, x)` or `*r = max_of(x, *r)`
- *  `op_min_index` | `r->calc_min(i, x)` | `*r = min_of(*r, i, x)` or `*r = min_of(i, x, *r)`
- *  `op_max_index` | `r->calc_max(i, x)` | `*r = max_of(*r, i, x)` or `*r = max_of(i, x, *r)`
- *
- *  Wherever an "`i`, `x`" argument pair is shown in the table above, a single
- *  pair argument may be passed instead. For example:
- *
- *      Index index;
- *      Type value;
- *      std::pair<Index, Type> ind_val(index, value);
- *      // The following statements are all equivalent.
- *      r->calc_min(index, value);
- *      r->calc_min(ind_val);
- *      *r = min_of(*r, index, value);
- *      *r = min_of(*r, ind_val);
- *
- *  The `calc_min()` and `calc_max()` member functions return a reference to
- *  the view, so they can be chained:
- *
- *      r->calc_max(x).calc_max(y).calc_max(z);
- *
- *  In a `%min_of()` or `%max_of()` assignment, the view on the left-hand side
- *  of the assignment must be the same as the view argument in the call.
- *  Otherwise, the behavior is undefined (but an assertion error will occur if
- *  the code is compiled with debugging enabled).
- *
- *      *r = max_of(*r, x);     // OK
- *      *r1 = max_of(*r2, y);   // ERROR
- *
- *  `%min_of()` and `%max_of()` calls can be nested:
- *
- *      *r = max_of(max_of(max_of(*r, x), y), z);
- *      *r = min_of(i, a[i], min_of(j, a[j], min_of(k, a[k], *r)));
- *
- *  @section redminmax_compatibility Binary Compatibility Issues
- *
- *  Most Intel Cilk Plus library reducers provide binary compatibility between
- *  `reducer_KIND` reducers compiled with Intel Cilk Plus library version 0.9
- *  (distributed with Intel® C++ Composer XE version 13.0 and earlier) and the
- *  ame reducers compiled with Intel Cilk Plus library version 1.0 and later.
- *
- *  Because of implementation changes that were needed to allow vectorization
- *  of loops containing min/max reducers, this binary compatibility is _not_
- *  generally available for min/max reducers, either between Intel Cilk Plus
- * library versions 0.9 and 1.0, or between versions 1.0 and 1.1. (Code compiled
- * with different versions can be linked together safely, but min/max reducers
- * in different library versions are in different namespaces, so reducer objects
- *  cannot be shared between them.)
- *
- *  If this is an inconvenience, the simplest solution is just to recompile any
- *  existing code you may have that uses min/max reducers. If that is
- *  impossible, you can define the `CILK_LIBRARY_0_9_REDUCER_MINMAX` macro (on
- *  the compiler command line, or in your source code before including
- *  `reducer_min_max.h`) when compiling with the new library. This will cause
- *  it to generate numeric reducers that will be link-time and run-time
- *  compatible with the 0.9 library.
- *
- *  @subsection redminmax_compatibility_stateful Non-empty Comparators
- *
- *  The representation of min/max reducers with non-empty comparator objects or
- *  with comparator functions is so different in between the 0.9 and 1.1
- *  libraries that there is no way to make them binary compatible, even when
- *  compiling with `CILK_LIBRARY_0_9_REDUCER_MINMAX`. Therefore, the
- *  `reducer_{min|max}[_index]` wrapper classes have been coded in the 1.0 and
- *  later library so that they will not even compile when instantiated with a
- *  non-empty comparator class.
- *
- *  This is not a problem when using an empty comparator class, such as the
- *  default `std::less`.
- *
- *  @section redminmax_types Type Requirements
- *
- *  `Type` and `Index` must be `Copy Constructible`, `Default Constructible`,
- *  and `Assignable`.
- *
- *  `Compare` must be `Copy Constructible` if the reducer is constructed with a
- *  `compare` argument, and `Default Constructible` otherwise.
- *
- *  The `Compare` function must induce a strict weak ordering on the elements
- *  of `Type`.
- *
- *  @section redminmax_in_c Minimum and Maximum Reducers in C
- *
- *  These macros can be used to do minimum and maximum reductions in C:
- *
- *  Declaration                  | Type                              | Operation
- *  -----------------------------|-----------------------------------|----------
- * @ref CILK_C_REDUCER_MIN       |@ref CILK_C_REDUCER_MIN_TYPE       |@ref CILK_C_REDUCER_MIN_CALC
- * @ref CILK_C_REDUCER_MAX       |@ref CILK_C_REDUCER_MAX_TYPE       |@ref CILK_C_REDUCER_MAX_CALC
- * @ref CILK_C_REDUCER_MIN_INDEX |@ref CILK_C_REDUCER_MIN_INDEX_TYPE |@ref CILK_C_REDUCER_MIN_INDEX_CALC
- * @ref CILK_C_REDUCER_MAX_INDEX |@ref CILK_C_REDUCER_MAX_INDEX_TYPE |@ref CILK_C_REDUCER_MAX_INDEX_CALC
- *
- *  For example:
- *
- *      CILK_C_REDUCER_MIN(r, int, INT_MAX);
- *      CILK_C_REGISTER_REDUCER(r);
- *      cilk_for(int i = 0; i != n; ++i) {
- *          CILK_C_REDUCER_MIN_CALC(r, a[i]);
- *      }
- *      CILK_C_UNREGISTER_REDUCER(r);
- *      printf("The smallest value in a is %d\n", REDUCER_VIEW(r));
- *
- *
- *      CILK_C_REDUCER_MAX_INDEX(r, uint, 0);
- *      CILK_C_REGISTER_REDUCER(r);
- *      cilk_for(int i = 0; i != n; ++i) {
- *          CILK_C_REDUCER_MAX_INDEX_CALC(r, i, a[i]);
- *      }
- *      CILK_C_UNREGISTER_REDUCER(r);
- *      printf("The largest value in a is %u at %d\n",
- *              REDUCER_VIEW (r).value, REDUCER_VIEW(r).index);
- *
- *  See @ref reducers_c_predefined.
- */
-
-namespace cilk {
-
-/** @defgroup ReducersMinMaxBinComp Binary compatibility
- *
- *  If the macro `CILK_LIBRARY_0_9_REDUCER_MINMAX` is defined, then we generate
- *  reducer code and data structures which are binary-compatible with code that
- *  was compiled with the old min/max wrapper definitions, so we want the
- *  mangled names of the legacy min/max reducer wrapper classes to be the
- *  same as the names produced by the old definitions.
- *
- *  Conversely, if the macro is not defined, then we generate binary-
- *  incompatible code, so we want different mangled names, to make sure that
- *  the linker does not allow new and old compiled legacy wrappers to be passed
- *  to one another. (Global variables are a different, and probably insoluble,
- *  problem.)
- *
- *  Similarly, min/max classes compiled with and without
- *  CILK_LIBRARY_0_9_REDUCER_MINMAX are binary-incompatible, and must get
- *  different mangled names.
- *
- *  The trick is, when compiling in normal (non-compatibility) mode, wrap
- *  everything in an extra namespace, and then `use` it into the top-level cilk
- *  namespace. Then
- *
- *  *   Classes and functions compiled in normal mode will be in
- *      different namespaces from the same classes and functions compiled in
- *      compatibility mode.
- *  *   The legacy wrapper classes and functions will be in the same namespace
- *      as the same classes and functions compiled with the 0.9 library if and
- *      only if they are compiled in compatibility mode.
- *
- *  @ingroup ReducersMinMax
- */
-
-#ifndef CILK_LIBRARY_0_9_REDUCER_MINMAX
-/** Namespace to wrap min/max reducer definitions when not compiling in "binary
- *  compatibility" mode.
- *
- *  By default, all of the min/max reducer definitions are defined in this
- *  namespace and then imported into namespace ::cilk, so that they do not
- *  clash with the legacy definitions with the same names. However, if the
- *  macro `CILK_LIBRARY_0_9_REDUCER_MINMAX` is defined, then the min/max
- *  definitions go directly into namespace ::cilk, so that, for example,
- *  cilk::reducer_max defined with the 1.0 library is equivalent (to the
- *  linker) to cilk::reducer_max defined with the 0.9 library.
- *
- *  @ingroup ReducersMinMaxBinComp
- *  @ingroup ReducersMinMax
- */
-namespace cilk_lib_1_1 {
-#endif
-
-/** Namespace containing internal implementation classes and functions for
- *  min/max reducers.
- *
- *  @ingroup ReducersMinMax
- */
-namespace min_max_internal {
-
-using ::cilk::internal::binary_functor;
-using ::cilk::internal::class_is_empty;
-using ::cilk::internal::typed_indirect_binary_function;
-
-/** @defgroup ReducersMinMaxIsSet The "is_set optimization"
- *
- *  The obvious definition of the identity value for a max or min reducer is as
- *  the smallest (or largest) value of the value type. However, for an
- *  arbitrary comparator and/or an arbitrary value type, the largest / smallest
- *  value may not be known. It may not even be defined - what is the largest
- *  string?
- *
- *  Therefore, min/max reducers represent their value internally as a pair
- *  `(value, is_set)`. When `is_set` is true, the pair represents the known
- *  value `value`; when `is_set` is false, the pair represents the identity
- *  value.
- *
- *  This is an effective solution, but the most common use of min/max reducers
- *  is probably with numeric types and the default definition of minimum or
- *  maximum (using `std::less`), in which case there are well-defined, knowable
- *  smallest and largest values. Testing `is_set` for every comparison is then
- *  unnecessary and wasteful.
- *
- *  The "is_set optimization" just means generating code that doesn't use
- *  `is_set` when it isn't needed. It is implemented using two metaprogramming
- *  classes:
- *
- *  -   do_is_set_optimization tests whether the optimization is applicable.
- *  -   identity_value gets the appropriate identity value for a type.
- *
- *  The is_set optimization is the reason that min/max reducers compiled with
- *  Intel Cilk Plus library 1.0 are binary-incompatible with the same reducers
- *  compiled with library 0.9, and therefore the optimization is suppressed when
- *  compiling in
- *  ReducersMinMaxBinComp "binary compatibility mode".
- *
- *  @ingroup ReducersMinMax
- */
-
-/** Tests whether the ReducersMinMaxIsSet "is_set optimization" is
- *  applicable.
- *
- *  The @ref do_is_set_optimization class is used to test whether the is_set
- *  optimization should be applied for a particular reducer. It is instantiated
- *  with a value type and a comparator, and defines a boolean constant,
- *  `value`. Then `%do_is_set_optimization<Type, Comp>::%value` can be used as
- *  a boolean template parameter to control the specialization of another
- *  class.
- *
- *  In ReducersMinMaxBinComp "binary compatibility mode" (i.e., when the
- *  `CILK_LIBRARY_0_9_REDUCER_MINMAX` macro is defined), `value` will always
- *  be false.
- *
- *  @tparam Type   The value type for the reducer.
- *  @tparam Compare The comparator type for the reducer.
- *
- *  @result The `value` data member will be `true` if @a Type is a numeric
- *          type, @a Compare is `std::less<Type>`, and
- *          `CILK_LIBRARY_0_9_REDUCER_MINMAX` is not defined.
- *
- *  @see ReducersMinMaxIsSet
- *  @see @ref view_content
- *
- *  @ingroup ReducersMinMaxIsSet
- */
-template <typename Type, typename Compare> struct do_is_set_optimization {
-    /// `True` if the is_set optimization should be applied to min/max reducers
-    /// with this value type and comparator; `false` otherwise.
-    static const bool value = false;
-};
-
-#ifndef CILK_LIBRARY_0_9_REDUCER_MINMAX
-/// @cond
-template <typename Type> struct do_is_set_optimization<Type, std::less<Type>> {
-    /// True in the special case where optimization is possible.
-    static const bool value = std::numeric_limits<Type>::is_specialized;
-};
-/// @endcond
-#endif
-
-/** Gets the identity value when using the ReducersMinMaxIsSet
- *  "is_set optimization".
- *
- *  This class defines a function which assigns the appropriate identity value
- *  to a variable when the is_set optimization is applicable.
- *
- *  @tparam Type    The value type for the reducer.
- *  @tparam Compare The comparator type for the reducer.
- *  @tparam ForMax  `true` to get the identity value for a max reducer (i.e.,
- *                  the smallest value of @a Type), `false` to get the identity
- *                  value for a min reducer (i.e., the largest value of
- *                  @a Type).
- *
- *  @result If @a Type and @a Compare qualify for the is_set optimization, the
- *          `set_identity()' function will set its argument variable to the
- *          smallest or largest value of @a Type, depending on @a ForMax.
- *          Otherwise, `set_identity()` will be a no-op.
- *
- *  @see ReducersMinMaxIsSet
- *
- *  @ingroup ReducersMinMaxIsSet
- *  @see @ref view_content
- */
-template <typename Type, typename Compare, bool ForMax,
-          bool = std::numeric_limits<Type>::is_specialized,
-          bool = std::numeric_limits<Type>::has_infinity>
-struct identity_value {
-    /// Assign the identity value to the reference parameter.
-    static void set_identity(Type &) {}
-};
-
-/// @cond
-template <typename Type>
-struct identity_value<Type, std::less<Type>, true, true, true> {
-    /// Floating max identity is negative infinity.
-    static void set_identity(Type &id) {
-        id = -std::numeric_limits<Type>::infinity();
-    }
-};
-
-template <typename Type>
-struct identity_value<Type, std::less<Type>, true, true, false> {
-    /// Integer max identity is minimum value of type.
-    static void set_identity(Type &id) {
-        id = std::numeric_limits<Type>::min();
-    }
-};
-
-template <typename Type>
-struct identity_value<Type, std::less<Type>, false, true, true> {
-    /// Floating min identity is positive infinity.
-    static void set_identity(Type &id) {
-        id = std::numeric_limits<Type>::infinity();
-    }
-};
-
-template <typename Type>
-struct identity_value<Type, std::less<Type>, false, true, false> {
-    /// Integer min identity is maximum value of type.
-    static void set_identity(Type &id) {
-        id = std::numeric_limits<Type>::max();
-    }
-};
-
-/// @endcond
-
-/** Adapter class to reverse the arguments of a predicate.
- *
- *  Observe that:
- *
- *      (x < y) == (y > x)
- *      max(x, y) == (x < y) ? y : x
- *      min(x, y) == (y < x) ? y : x == (x > y) ? y : x
- *
- *  More generally, if `c` is a predicate defining a `Strict Weak Ordering`,
- *  and  `c*(x, y) == c(y, x)`, then
- *
- *      max(x, y, c) == c(x, y) ? y : x
- *      min(x, y, c) == c(y, x) ? y : x == c*(x, y) ? y : x == max(x, y, c*)
- *
- *  For any predicate `C` with argument type `T`, the template class
- *  `%reverse_predicate<C, T>` defines a predicate which is identical to `C`,
- *  except that its arguments are reversed. Thus, for example, we could
- *  implement `%op_min_view<Type, Compare>` as
- *  `%op_max_view<Type, %reverse_predicate<Compare, Type> >`.
- *  (Actually, op_min_view and op_max_view are both implemented as subclasses
- *  of a common base class, view_base.)
- *
- *  @note   If `C` is an empty functor class, then `reverse_predicate(C)` will
- *          also be an empty functor class.
- *
- *  @tparam Predicate   The predicate whose arguments are to be reversed.
- *  @tparam Argument    @a Predicate's argument type.
- *
- *  @ingroup ReducersMinMax
- */
-template <typename Predicate,
-          typename Argument = typename Predicate::first_argument_type>
-class reverse_predicate : private binary_functor<Predicate>::type {
-    typedef typename binary_functor<Predicate>::type base;
-
-  public:
-    /// Default constructor
-    reverse_predicate() : base() {}
-    /// Constructor with predicate object
-    reverse_predicate(const Predicate &p) : base(p) {}
-    /// The reversed predicate operation
-    bool operator()(const Argument &x, const Argument &y) const {
-        return base::operator()(y, x);
-    }
-};
-
-/** Class to represent the comparator for a min/max view class.
- *
- *  This class is intended to accomplish two objectives in the implementation
- *  of min/max views.
- *
- *  1.  To minimize data bloat, when we have a reducer with a non-stateless
- *      comparator, we want to keep a single instance of the comparator object
- *      in the monoid, and just call it from the views.
- *  2.  In ReducersMinMaxBinComp "binary compatibility mode", views for
- *      reducers with a stateless comparator must have the same content as in
- *      Intel Cilk Plus library 0.9 - that is, they must contain only `value`
- * and `is_set` data members.
- *
- *  To achieve the first objective, we use the
- *  @ref internal::typed_indirect_binary_function class defined in
- *  metaprogramming.h to wrap a pointer to the actual comparator. If no
- *  pointer is needed because the actual comparator is stateless, the
- *  `typed_indirect_binary_function` class will be empty, too.
- *
- *  To achieve the second objective, we make the
- *  `typed_indirect_binary_function` class a base class of the view rather than
- *  a data member, so the "empty base class" rule will ensure no that no
- *  additional space is allocated in the view unless it is needed.
- *
- *  We could simply use typed_indirect_binary_function as the base class of the
- *  view, but this would mean writing comparisons as `(*this)(x, y)`, which is
- *  just weird. So, instead, we comparator_base as a subclass of
- *  typed_indirect_binary_function which provides function `compare()`
- *  as a synonym for `operator()`.
- *
- *  @tparam Type    The value type of the comparator class.
- *  @tparam Compare A predicate class.
- *
- *  @see internal::typed_indirect_binary_function
- *
- *  @ingroup ReducersMinMax
- */
-template <typename Type, typename Compare>
-class comparator_base
-    : private typed_indirect_binary_function<Compare, Type, Type, bool> {
-    typedef typed_indirect_binary_function<Compare, Type, Type, bool> base;
-
-  protected:
-    comparator_base(const Compare *f) : base(f) {} ///< Constructor.
-
-    /// Comparison function.
-    bool compare(const Type &a, const Type &b) const {
-        return base::operator()(a, b);
-    }
-
-    /// Get the comparator pointer.
-    const Compare *compare_pointer() const { return base::pointer(); }
-};
-
-/** @defgroup ReducersMinMaxViewContent Content classes for min/max views
- *
- *  @ingroup ReducersMinMax
- *
- *  Minimum and maximum reducer view classes inherit from a "view content"
- *  class. The content class defines the actual data members for the view,
- *  and provides typedefs and member functions for accessing the data members
- *  as needed to support the view functionality.
- *
- *  There are two content classes, which encapsulate the differences between
- *  simple min/max reducers and min/max with index reducers:
- *
- *  -   view_content
- *  -   index_view_content
- *
- *  @note   An obvious, and arguably simpler, encapsulation strategy would be
- *          to just let the `Type` of a min/max view be an (index, value) pair
- *          structure for min_index and max_index reducers. Then all views
- *          would just have a `Type` data member and an `is_set` data member,
- *          and the comparator for min_index and max_index views could be
- *          customized to consider only the value component of the (index,
- *          value) `Type` pair. Unfortunately, this would break binary
- *          compatibility with reducer_max_index and reducer_min_index in
- *          Intel Cilk Plus library 0.9, because the memory layout of an
- *          (index, value) pair followed by a `bool` is different from the
- *          memory layout of an index data member followed by a value data
- *          member followed by a `bool` data member. The content class is
- *          designed to exactly replicate the layout of the views in library 0.9
- *          reducers.
- *
- *  A content class `C`, and its objects `c`, must define the following:
- *
- *  Definition                          | Meaning
- *  ------------------------------------|--------
- *  `C::value_type`                     | A typedef for `Type` of the view. (A `std::pair<Index, Type>` for min_index and max_index views).
- *  `C::comp_value_type`                | A typedef for the type of value compared by the view's `compare()` function.
- *  `C()`                               | Constructs the content with the identity value.
- *  `C(const value_type&)`              | Constructs the content with a specified value.
- *  `c.is_set()`                        | Returns true if the content has a known value.
- *  `c.value()`                         | Returns the content's value.
- *  `c.set_value(const value_type&)`    | Sets the content's value. (The value becomes known.)
- *  `c.comp_value()`                    | Returns a const reference to the value or component of the value that is to be compared by the view's comparator.
- *  `C::comp_value(const value_type&)`  | Returns a const reference to a value or component of a value that is to be compared by the view's comparator.
- *
- *  @see view_base
- */
-
-/** Content class for op_min_view and op_max_view.
- *
- *  @tparam Type    The value type of the op_min_view or op_max_view.
- *  @tparam Compare The comparator class specified for the op_min_view or
- *                  op_max_view. (_Not_ the derived comparator class actually
- *                  used by the view_base. For example, the view_content of an
- *                  `op_min_view<int>` will have `Compare = std::less<int>`,
- *                  but its comparator_base will have
- *                  `Compare = reverse_predicate< std::less<int> >`.)
- *  @tparam ForMax  `true` if this is the content class for an op_max_view,
- *                  `false` if it is for an op_min_view.
- *
- *  @note   The general implementation of view_content uses an `is_set` data
- *          member. There is also a specialization which implements the
- *          ReducersMinMaxIsSet "is_set optimization". View classes that
- *          inherit from view_content do not need to know anything about the
- *          difference, though; the details are abstracted away in the
- *          view_content interface.
- *
- *  @see ReducersMinMaxViewContent
- *
- *  @ingroup ReducersMinMaxViewContent
- *  @ingroup ReducersMinMax
- */
-template <typename Type, typename Compare, bool ForMax,
-          bool = do_is_set_optimization<Type, Compare>::value>
-class view_content {
-  protected:
-    /// @cond
-    Type m_value;
-    bool m_is_set;
-    /// @endcond
-  public:
-    /// The value type of the view.
-    typedef Type value_type;
-
-    /// The type compared by the view's `compare()` function (which is the same
-    /// as the value type for view_content).
-    typedef Type comp_value_type;
-
-    /// Construct with the identity value.
-    view_content() : m_value(), m_is_set(false) {}
-
-    /// Construct with a defined value.
-    view_content(const value_type &value) : m_value(value), m_is_set(true) {}
-
-    /// Gets the value.
-    value_type value() const { return m_value; }
-
-    /// Sets the value.
-    void set_value(const value_type &value) { m_value = value; }
-
-    /// Sets the is_set flag.
-    void set_is_set() { m_is_set = true; }
-
-    /// Sets the index part of the value (which is meaningless for non-index
-    /// reducers, but required for view_base).
-    void set_default_index(const value_type &) {}
-
-    /// Gets the comparison value (which, for view_content, is the same as the
-    /// value).
-    const comp_value_type &comp_value() const { return m_value; }
-
-    /// Given an arbitrary value, gets the corresponding comparison value
-    /// (which, for view_content, is the same as the value).
-    static const comp_value_type &comp_value(const value_type &value) {
-        return value;
-    }
-
-    /// Gets a const reference to value part of the value (which is the same as
-    /// the value for view_content).
-    const Type &get_reference() const { return m_value; }
-
-    /// Gets a const reference to the index part of the value (which is
-    /// meaningless for non-index reducers, but required for view_base.
-    const Type &get_index_reference() const { return m_value; }
-
-    /// Tests if the value is defined.
-    bool is_set() const { return m_is_set; }
-
-    /// Tests if the view has a comparable value.
-    bool has_value() const { return is_set(); }
-};
-
-/// @cond
-
-/*  This is the specialization of the view_content class for cases where
- *  the is_set optimization is applicable).
- */
-template <typename Type, typename Compare, bool ForMax>
-class view_content<Type, Compare, ForMax, true>
-    : public view_content<Type, Compare, ForMax, false> {
-    typedef view_content<Type, Compare, ForMax, false> base;
-    typedef identity_value<Type, Compare, ForMax> Identity;
-
-  public:
-    typedef typename base::value_type value_type;
-    ;
-    typedef typename base::comp_value_type comp_value_type;
-    ;
-
-    view_content() : base() { Identity::set_identity(this->m_value); }
-
-    view_content(const value_type &value) : base(value) {}
-
-    bool has_value() const { return true; }
-};
-
-/// @endcond
-
-/** Content class for op_min_index_view and op_max_index_view.
- *
- *  @tparam Index   The index type of the op_min_index_view or
-                    op_max_index_view.
- *  @tparam Type    The value type of the op_min_view or op_max_view. (_Not_
- *                  the value type of the view, which will be
- *                  `std::pair<Index, Type>`.)
- *  @tparam Compare The comparator class specified for the op_min_index_view or
- *                  op_max_index_view. (_Not_ the derived comparator class
- *                  actually used by the view_base. For example, the
- *                  index_view_content of an `op_min_index_view<int>` will have
- *                  `Compare = std::less<int>`, but its comparator_base will
- *                  have `Compare = reverse_predicate< std::less<int> >`.)
- *  @tparam ForMax  `true` if this is the content class for an
- *                  op_max_index_view, `false` if it is for an
- *                  op_min_index_view.
- *
- *  @see ReducersMinMaxViewContent
- *
- *  @ingroup ReducersMinMaxViewContent
- *  @ingroup ReducersMinMax
- */
-template <typename Index, typename Type, typename Compare, bool ForMax,
-          bool = do_is_set_optimization<Type, Compare>::value>
-class index_view_content {
-  protected:
-    /// @cond
-    Index m_index;
-    Type m_value;
-    bool m_is_set;
-    /// @endcond
-  public:
-    /// The value type of the view (which is an <index, value> pair for
-    /// index_view_content).
-    typedef std::pair<Index, Type> value_type;
-
-    /// The type compared by the view's `compare()` function (which is the data
-    /// value type for index_view_content).
-    typedef Type comp_value_type;
-
-    /// Construct with the identity value.
-    index_view_content() : m_index(), m_value(), m_is_set(false) {}
-
-    /// Construct with an index/value pair.
-    index_view_content(const value_type &value)
-        : m_index(value.first), m_value(value.second), m_is_set(true) {}
-
-    /// Construct with an index and a value.
-    index_view_content(const Index &index, const Type &value)
-        : m_index(index), m_value(value), m_is_set(true) {}
-
-    /// Construct with just an index.
-    index_view_content(const Index &index)
-        : m_index(index), m_value(), m_is_set(false) {}
-
-    /// Gets the value.
-    value_type value() const { return value_type(m_index, m_value); }
-
-    /// Sets the value.
-    void set_value(const value_type &value) {
-        m_index = value.first;
-        m_value = value.second;
-    }
-
-    /// Sets the is_set flag.
-    void set_is_set() { m_is_set = true; }
-
-    /// Sets the (initial) index, without marking the view as set.
-    void set_default_index(const Index &index) { m_index = index; }
-
-    /// Gets the comparison value (which, for index_view_content, is the value
-    /// component of the index/value pair).
-    const comp_value_type &comp_value() const { return m_value; }
-
-    /// Given an arbitrary value (i.e., index/value pair), gets the
-    /// corresponding comparison value (which, for index_view_content, is the
-    /// value component of the index/value pair).
-    static const comp_value_type &comp_value(const value_type &value) {
-        return value.second;
-    }
-
-    /// Gets a const reference to the value part of the value.
-    const Type &get_reference() const { return m_value; }
-
-    /// Gets a const reference to the index part of the value.
-    const Index &get_index_reference() const { return m_index; }
-
-    /// Tests if the value is defined.
-    bool is_set() const { return m_is_set; }
-
-    /// Tests if the view has a comparable value.
-    bool has_value() const { return is_set(); }
-};
-
-/// @cond
-
-/*  This is the specialization of the index_view_content class for cases where
- *  the is_set optimization is applicable).
- */
-template <typename Index, typename Type, typename Compare, bool ForMax>
-class index_view_content<Index, Type, Compare, ForMax, true>
-    : public index_view_content<Index, Type, Compare, ForMax, false> {
-    typedef index_view_content<Index, Type, Compare, ForMax, false> base;
-    typedef identity_value<Type, Compare, ForMax> Identity;
-
-  public:
-    typedef typename base::value_type value_type;
-    ;
-    typedef typename base::comp_value_type comp_value_type;
-    ;
-
-    index_view_content() : base() { Identity::set_identity(this->m_value); }
-
-    index_view_content(const value_type &value) : base(value) {}
-
-    index_view_content(const Index &index, const Type &value)
-        : base(index, value) {}
-
-    index_view_content(const Index &index) : base() {
-        Identity::set_identity(this->m_value);
-        this->m_index = index;
-    }
-
-    /// Test if the view has a comparable value.
-    bool has_value() const { return true; }
-};
-
-/// @endcond
-
-template <typename View> class rhs_proxy;
-
-/** Creates an rhs_proxy.
- */
-template <typename View>
-inline rhs_proxy<View> make_proxy(const typename View::value_type &value,
-                                  const View &view);
-
-template <typename Content, typename Less, typename Compare> class view_base;
-
-/** Class to represent the right-hand side of
- *  `*reducer = {min|max}_of(*reducer, value)`.
- *
- *  The only assignment operator for a min/max view class takes a rhs_proxy as
- *  its operand. This results in the syntactic restriction that the only
- *  expressions that can be assigned to a min/max view are ones which generate
- *  an rhs_proxy - that is, expressions of the form `max_of(view, value)` and
- *  `min_of(view, value)`.
- *
- *  @warning
- *  The lhs and rhs views in such an assignment must be the same; otherwise,
- *  the behavior will be undefined. (I.e., `*r1 = min_of(*r1, x)` is legal;
- *  `*r1 = min_of(*r2, x)` is illegal.)  This condition will be checked with a
- *  runtime assertion when compiled in debug mode.
- *
- *  @tparam View    The view class (op_{min|max}[_index]_view) that this proxy
- *                  was created from.
- *
- *  @see view_base
- *
- *  @ingroup ReducersMinMax
- */
-template <typename View> class rhs_proxy {
-    typedef typename View::less_type less_type;
-    typedef typename View::compare_type compare_type;
-    typedef typename View::value_type value_type;
-    typedef typename View::content_type content_type;
-    typedef typename content_type::comp_value_type comp_value_type;
-
-    friend class view_base<content_type, less_type, compare_type>;
-    friend rhs_proxy make_proxy<View>(const typename View::value_type &value,
-                                      const View &view);
-
-    typed_indirect_binary_function<compare_type, comp_value_type,
-                                   comp_value_type, bool>
-        m_comp;
-    const View *m_view;
-    value_type m_value;
-
-    rhs_proxy &operator=(const rhs_proxy &); // Disable assignment operator
-    rhs_proxy();                             // Disable default constructor
-
-    // Constructor (called from view_base::make_proxy).
-    rhs_proxy(const View *view, const value_type &value,
-              const compare_type *compare)
-        : m_view(view), m_value(value), m_comp(compare) {}
-
-    // Checks matching view, then return value (called from view_base::assign).
-    value_type value(const typename View::base *view) const {
-        return m_value;
-    }
-
-  public:
-    /** Supports max_of(max_of(view, value), value) and the like.
-     */
-    rhs_proxy calc(const value_type &x) const {
-        return rhs_proxy(m_view,
-                         m_comp(content_type::comp_value(m_value),
-                                content_type::comp_value(x))
-                             ? x
-                             : m_value,
-                         m_comp.pointer());
-    }
-};
-
-template <typename View>
-inline rhs_proxy<View> make_proxy(const typename View::value_type &value,
-                                  const View &view) {
-    return rhs_proxy<View>(&view, value, view.compare_pointer());
-}
-
-//@}
-
-/** Base class for min and max view classes.
- *
- *  This class accumulates the minimum or maximum of a set of values which have
- *  occurred as arguments to the `calc()` function, as determined by a
- *  comparator. The accumulated value will be the first `calc()` argument value
- *  `x` such that `compare(x, y)` is false for every `calc()` argument value
- *  `y`.
- *
- *  If the comparator is `std::less`, then the accumulated value is the first
- *  argument value which is not less than any other argument value, i.e., the
- *  maximum. Similarly, if the comparator is `reverse_predicate<std::less>`,
- *  which is equivalent to `std::greater`, then the accumulated value is the
- *  first argument value which is not greater than any other argument value,
- *  i.e., the minimum.
- *
- *  @note   This class provides the definitions that are required for a class
- *          that will be used as the parameter of a
- *          min_max_internal::monoid_base specialization.
- *
- *  @tparam Content     A content class that provides the value types and data
- *                      members for the view.
- *  @tparam Less        A "less than" binary predicate that defines the min or
- *                      max function.
- *  @tparam Compare     A binary predicate to be used to compare the values.
- *                      (The same as @a Less for max reducers; its reversal for
- *                      min reducers.)
- *
- *  @see ReducersMinMaxViewContent
- *  @see op_max_view
- *  @see op_min_view
- *  @see op_max_index_view
- *  @see op_min_index_view
- *  @see monoid_base
- *
- *  @ingroup ReducersMinMax
- */
-template <typename Content, typename Less, typename Compare>
-class view_base :
-    // comparator_base comes first to ensure that it will get empty base class
-    // treatment
-    private comparator_base<typename Content::comp_value_type, Compare>,
-    private Content {
-    typedef comparator_base<typename Content::comp_value_type, Compare> base;
-    using base::compare;
-    using Content::comp_value;
-    using Content::has_value;
-    using Content::set_is_set;
-    using Content::set_value;
-    using Content::value;
-    typedef Content content_type;
-
-    template <typename View> friend class rhs_proxy;
-    template <typename View>
-    friend rhs_proxy<View> make_proxy(const typename View::value_type &value,
-                                      const View &view);
-
-  public:
-    /** @name Monoid support.
-     */
-    //@{
-
-    /** Value type. Required by @ref monoid_with_view.
-     */
-    typedef typename Content::value_type value_type;
-
-    /** The type of the comparator specified by the user, that defines the
-     *  ordering on @a Type. Required by min_max::monoid_base.
-     */
-    typedef Less less_type;
-
-    /** The type of the comparator actually used by the view. Required by
-     *  min_max::monoid_base. (This is the same as the @ref less_type for a
-     *  max reducer, or `reverse_predicate<less_type>` for a min reducer.)
-     */
-    typedef Compare compare_type;
-
-    /** Reduces two views. Required by @ref monoid_with_view.
-     */
-    void reduce(view_base *other) {
-        if (other->is_set() &&
-            (!this->is_set() ||
-             compare(this->comp_value(), other->comp_value()))) {
-            this->set_value(other->value());
-            this->set_is_set();
-        }
-    }
-
-    //@}
-
-    /** Default constructor. Initializes to identity value.
-     */
-    explicit view_base(const compare_type *compare)
-        : base(compare), Content() {}
-
-    /** Value constructor.
-     */
-    template <typename T1>
-    view_base(const T1 &x1, const compare_type *compare)
-        : base(compare), Content(x1) {}
-
-    /** Value constructor.
-     */
-    template <typename T1, typename T2>
-    view_base(const T1 &x1, const T2 &x2, const compare_type *compare)
-        : base(compare), Content(x1, x2) {}
-
-    /** Move-in constructor.
-     */
-    explicit view_base(move_in_wrapper<value_type> w,
-                       const compare_type *compare)
-        : base(compare), Content(w.value()) {}
-
-    /** @name Reducer support.
-     */
-    //@{
-
-    void view_move_in(value_type &v) {
-        set_value(v);
-        set_is_set();
-    }
-    void view_move_out(value_type &v) { v = value(); }
-    void view_set_value(const value_type &v) {
-        set_value(v);
-        set_is_set();
-    }
-    value_type view_get_value() const { return value(); }
-    //                  view_get_reference()                NOT SUPPORTED
-
-    //@}
-
-    /** Sets the contained index data member, without marking the view as set.
-     *  (Meaningless for non-index reducers.)
-     */
-    using Content::set_default_index;
-
-    /** Is the value defined?
-     */
-    using Content::is_set;
-
-    /** Reference to contained value data member.
-     *  @deprecated For legacy reducers only.
-     */
-    using Content::get_reference;
-
-    /** Reference to contained index data member.
-     *  (Meaningless for non-index reducers.)
-     *  @deprecated For legacy reducers only.
-     */
-    using Content::get_index_reference;
-
-  protected:
-    /** Updates the min/max value.
-     */
-    void calc(const value_type &x) {
-        if (!has_value() || compare(comp_value(), comp_value(x)))
-            set_value(x);
-        set_is_set();
-    }
-
-    /** Assigns the result of a `{min|max}_of(view, value)` expression to the
-     *  view.
-     *
-     *  @see rhs_proxy
-     */
-    template <typename View> void assign(const rhs_proxy<View> &rhs) {
-        calc(rhs.value(this));
-    }
-};
-
-/** Base class for min and max monoid classes.
- *
- *  The unique characteristic of minimum and maximum reducers is that they
- *  incorporate a comparator functor that defines what "minimum" or "maximum"
- *  means. The monoid for a reducer contains the comparator that will be used
- *  for the reduction. If the comparator is a function or a class with state,
- *  then each view will have a pointer to the comparator.
- *
- *  This means that the `construct()` functions first construct the monoid
- *  (possibly with an explicit comparator argument), and then construct the
- *  view with a pointer to the monoid's comparator.
- *
- *  @tparam View    The view class.
- *  @tparam Align   If true, reducers instantiated on this monoid will be
- *                  aligned. By default, library reducers (unlike legacy
- *                  library reducer _wrappers_) are unaligned.
- *
- *  @see view_base
- *
- *  @ingroup ReducersMinMax
- */
-template <typename View, bool Align = false>
-class monoid_base : public monoid_with_view<View, Align> {
-    typedef typename View::compare_type compare_type;
-    typedef typename View::less_type less_type;
-
-    const compare_type m_compare;
-
-    const compare_type *compare_pointer() const { return &m_compare; }
-
-  public:
-    /** Default constructor uses default comparator.
-     */
-    monoid_base() : m_compare() {}
-
-    /** Constructor.
-     *
-     *  @param  compare The comparator to use.
-     */
-    monoid_base(const compare_type &compare) : m_compare(compare) {}
-
-    /** Creates an identity view.
-     *
-     *  List view identity constructors take the list allocator as an argument.
-     *
-     *  @param v    The address of the uninitialized memory in which the view
-     *  will be constructed.
-     */
-    void identity(View *v) const { ::new ((void *)v) View(compare_pointer()); }
-
-    /** @name construct functions
-     *
-     *  Min/max monoid `construct()` functions optionally take one or two value
-     *  arguments, a @ref move_in argument, and/or a comparator argument.
-     */
-    //@{
-
-    template <typename Monoid>
-    static void construct(Monoid *monoid, View *view) {
-        provisional_guard<Monoid> mg(new ((void *)monoid) Monoid);
-        mg.confirm_if(new ((void *)view) View(monoid->compare_pointer()));
-    }
-
-    template <typename Monoid, typename T1>
-    static void construct(Monoid *monoid, View *view, const T1 &x1) {
-        provisional_guard<Monoid> mg(new ((void *)monoid) Monoid);
-        mg.confirm_if(new ((void *)view) View(x1, monoid->compare_pointer()));
-    }
-
-    template <typename Monoid, typename T1, typename T2>
-    static void construct(Monoid *monoid, View *view, const T1 &x1,
-                          const T2 &x2) {
-        provisional_guard<Monoid> mg(new ((void *)monoid) Monoid);
-        mg.confirm_if(new ((void *)view)
-                          View(x1, x2, monoid->compare_pointer()));
-    }
-
-    template <typename Monoid>
-    static void construct(Monoid *monoid, View *view,
-                          const less_type &compare) {
-        provisional_guard<Monoid> mg(new ((void *)monoid) Monoid(compare));
-        mg.confirm_if(new ((void *)view) View(monoid->compare_pointer()));
-    }
-
-    template <typename Monoid, typename T1>
-    static void construct(Monoid *monoid, View *view, const T1 &x1,
-                          const less_type &compare) {
-        provisional_guard<Monoid> mg(new ((void *)monoid) Monoid(compare));
-        mg.confirm_if(new ((void *)view) View(x1, monoid->compare_pointer()));
-    }
-
-    template <typename Monoid, typename T1, typename T2>
-    static void construct(Monoid *monoid, View *view, const T1 &x1,
-                          const T2 &x2, const less_type &compare) {
-        provisional_guard<Monoid> mg(new ((void *)monoid) Monoid(compare));
-        mg.confirm_if(new ((void *)view)
-                          View(x1, x2, monoid->compare_pointer()));
-    }
-
-    //@}
-};
-
-} // namespace min_max_internal
-
-/** @defgroup ReducersMinMaxMaxValue Maximum reducers (value only)
- *
- *  These reducers will find the largest value from a set of values.
- *
- *  @ingroup ReducersMinMax
- */
-//@{
-
-/** The maximum reducer view class.
- *
- *  This is the view class for reducers created with
- *  `cilk::reducer< cilk::op_max<Type, Compare> >`. It accumulates the maximum,
- *  as determined by a comparator, of a set of values which have occurred as
- *  arguments to the `calc_max()` function. The accumulated value will be the
- *  first argument `x` such that `compare(x, y)` is false for every argument
- *  `y`.
- *
- *  If the comparator is `std::less`, then the accumulated value is the first
- *  argument value which is not less than any other argument value, i.e., the
- *  maximum.
- *
- *  @note   The reducer "dereference" operation (`reducer::operator *()`)
- *          yields a reference to the view. Thus, for example, the view class's
- *          `calc_max()` function would be used in an expression like
- *          `r->calc_max(a)` where `r` is an op_max reducer variable.
- *
- *  @tparam Type    The type of the values compared by the reducer. This will
- *                  be the value type of a monoid_with_view that is
- *                  instantiated with this view.
- *  @tparam Compare A `Strict Weak Ordering` whose argument type is @a Type. It
- *                  defines the "less than" relation used to compute the
- *                  maximum.
- *
- *  @see ReducersMinMax
- *  @see op_max
- */
-template <typename Type, typename Compare>
-class op_max_view : public min_max_internal::view_base<
-                        min_max_internal::view_content<Type, Compare, true>,
-                        Compare, Compare> {
-    typedef min_max_internal::view_base<
-        min_max_internal::view_content<Type, Compare, true>, Compare, Compare>
-        base;
-    using base::assign;
-    using base::calc;
-    friend class min_max_internal::rhs_proxy<op_max_view>;
-
-  public:
-    /** @name Constructors.
-     *
-     *  All op_max_view constructors simply pass their arguments on to the
-     *  @ref view_base base class.
-     */
-    //@{
-
-    template <typename T1> op_max_view(const T1 &x1) : base(x1) {}
-
-    template <typename T1, typename T2>
-    op_max_view(const T1 &x1, const T2 &x2) : base(x1, x2) {}
-
-    //@}
-
-    /** @name View modifier operations.
-     */
-    //@{
-
-    /** Maximizes with a value.
-     *
-     *  If @a x is greater than the current value of the view (as defined by
-     *  the reducer's comparator), or if the view was created without an
-     *  initial value and its value has never been updated (with `calc_max()`
-     *  or `= max_of()`), then the value of the view is set to @a x.
-     *
-     *  @param  x   The value to maximize the view's value with.
-     *
-     *  @return     A reference to the view. (Allows chaining
-     *              `view.comp_max(a).comp_max(b)…`.)
-     */
-    op_max_view &calc_max(const Type &x) {
-        calc(x);
-        return *this;
-    }
-
-    /** Assigns the result of a `max_of(view, value)` expression to the view.
-     *
-     *  @param  rhs An rhs_proxy value created by a `max_of(view, value)`
-     *              expression.
-     *
-     *  @return     A reference to the view.
-     *
-     *  @see min_max_internal::view_base::rhs_proxy
-     */
-    op_max_view &
-    operator=(const min_max_internal::rhs_proxy<op_max_view> &rhs) {
-        assign(rhs);
-        return *this;
-    }
-
-    //@}
-};
-
-/** Computes the maximum of the value in an op_max_view and another value.
- *
- *  The result of this computation can only be assigned back to the original
- *  view or used in another max_of() call. For example,
- *
- *      *reducer = max_of(*reducer, x);
- *      *reducer = max_of(x, *reducer);
- *
- *  @see min_max_internal::rhs_proxy
- */
-template <typename Type, typename Compare>
-inline min_max_internal::rhs_proxy<op_max_view<Type, Compare>>
-max_of(const op_max_view<Type, Compare> &view, const Type &value) {
-    return min_max_internal::make_proxy(value, view);
-}
-
-/// @copydoc max_of(const op_max_view<Type, Compare>&, const Type&)
-template <typename Type, typename Compare>
-inline min_max_internal::rhs_proxy<op_max_view<Type, Compare>>
-max_of(const Type &value, const op_max_view<Type, Compare> &view) {
-    return min_max_internal::make_proxy(value, view);
-}
-
-/** Computes nested maximum.
- *
- *  Compute the maximum of the result of a max_of() call and another value.
- *
- *  The result of this computation can only be assigned back to the original
- *  view or wrapper, or used in another max_of() call. For example,
- *
- *      *reducer = max_of(x, max_of(y, *reducer));
- *      wrapper = max_of(max_of(wrapper, x), y);
- *
- *  @see min_max_internal::rhs_proxy
- */
-template <typename Type, typename Compare>
-inline min_max_internal::rhs_proxy<op_max_view<Type, Compare>>
-max_of(const min_max_internal::rhs_proxy<op_max_view<Type, Compare>> &proxy,
-       const Type &value) {
-    return proxy.calc(value);
-}
-
-/// @copydoc max_of(const min_max_internal::rhs_proxy< op_max_view<Type,
-/// Compare> >&, const Type&)
-template <typename Type, typename Compare>
-inline min_max_internal::rhs_proxy<op_max_view<Type, Compare>>
-max_of(const Type &value,
-       const min_max_internal::rhs_proxy<op_max_view<Type, Compare>> &proxy) {
-    return proxy.calc(value);
-}
-
-/** Monoid class for maximum reductions. Instantiate the cilk::reducer template
- *  class with an op_max monoid to create a maximum reducer class. For example,
- *  to compute the maximum of a set of `int` values:
- *
- *      cilk::reducer< cilk::op_max<int> > r;
- *
- *  @see ReducersMinMax
- *  @see op_max_view
- */
-template <typename Type, typename Compare = std::less<Type>, bool Align = false>
-class op_max
-    : public min_max_internal::monoid_base<op_max_view<Type, Compare>, Align> {
-    typedef min_max_internal::monoid_base<op_max_view<Type, Compare>, Align>
-        base;
-
-  public:
-    /// Construct with default comparator.
-    op_max() {}
-    /// Construct with specified comparator.
-    op_max(const Compare &compare) : base(compare) {}
-};
-
-//@}
-
-/** @defgroup ReducersMinMaxMinValue Minimum reducers (value only)
- *
- *  These reducers will find the smallest value from a set of values.
- *
- *  @ingroup ReducersMinMax
- */
-//@{
-
-/** The minimum reducer view class.
- *
- *  This is the view class for reducers created with
- *  `cilk::reducer< cilk::op_min<Type, Compare> >`. It accumulates the minimum,
- *  as determined by a comparator, of a set of values which have occurred as
- *  arguments to the `calc_min()` function. The accumulated value will be the
- *  first argument `x` such that `compare(y, x)` is false for every argument
- *  `y`.
- *
- *  If the comparator is `std::less`, then the accumulated value is the first
- *  argument value which no other argument value is less than, i.e., the
- *  minimum.
- *
- *  @note   The reducer "dereference" operation (`reducer::operator *()`)
- *          yields a reference to the view. Thus, for example, the view class's
- *          `calc_min()` function would be used in an expression like
- *          `r->calc_min(a)` where `r` is an op_min reducer variable.
- *
- *  @tparam Type    The type of the values compared by the reducer. This will
- *                  be the value type of a monoid_with_view that is
- *                  instantiated with this view.
- *  @tparam Compare A `Strict Weak Ordering` whose argument type is @a Type. It
- *                  defines the "less than" relation used to compute the
- *                  minimum.
- *
- *  @see ReducersMinMax
- *  @see op_min
- */
-template <typename Type, typename Compare>
-class op_min_view
-    : public min_max_internal::view_base<
-          min_max_internal::view_content<Type, Compare, false>, Compare,
-          min_max_internal::reverse_predicate<Compare, Type>> {
-    typedef min_max_internal::view_base<
-        min_max_internal::view_content<Type, Compare, false>, Compare,
-        min_max_internal::reverse_predicate<Compare, Type>>
-        base;
-    using base::assign;
-    using base::calc;
-    friend class min_max_internal::rhs_proxy<op_min_view>;
-
-  public:
-    /** @name Constructors.
-     *
-     *  All op_min_view constructors simply pass their arguments on to the
-     *  @ref view_base base class.
-     */
-    //@{
-
-    template <typename T1> op_min_view(const T1 &x1) : base(x1) {}
-
-    template <typename T1, typename T2>
-    op_min_view(const T1 &x1, const T2 &x2) : base(x1, x2) {}
-
-    //@}
-
-    /** @name View modifier operations.
-     */
-    //@{
-
-    /** Minimizes with a value.
-     *
-     *  If @a x is less than the current value of the view (as defined by the
-     *  reducer's comparator), or if the view was created without an initial
-     *  value and its value has never been updated (with `calc_min()` or
-     *  `= min_of()`), then the value of the view is set to @a x.
-     *
-     *  @param  x   The value to minimize the view's value with.
-     *
-     *  @return     A reference to the view. (Allows chaining
-     *              `view.comp_min(a).comp_min(b)…`.)
-     */
-    op_min_view &calc_min(const Type &x) {
-        calc(x);
-        return *this;
-    }
-
-    /** Assigns the result of a `min_of(view, value)` expression to the view.
-     *
-     *  @param  rhs An rhs_proxy value created by a `min_of(view, value)`
-     *              expression.
-     *
-     *  @return     A reference to the view.
-     *
-     *  @see min_max_internal::view_base::rhs_proxy
-     */
-    op_min_view &
-    operator=(const min_max_internal::rhs_proxy<op_min_view> &rhs) {
-        assign(rhs);
-        return *this;
-    }
-};
-
-/** Computes the minimum of the value in a view and another value.
- *
- *  The result of this computation can only be assigned back to the original
- *  view or used in another min_of() call. For example,
- *
- *      *reducer = min_of(*reducer, x);
- *      *reducer = min_of(x, *reducer);
- *
- *  @see min_max_internal::view_base::rhs_proxy
- */
-template <typename Type, typename Compare>
-inline min_max_internal::rhs_proxy<op_min_view<Type, Compare>>
-min_of(const op_min_view<Type, Compare> &view, const Type &value) {
-    return min_max_internal::make_proxy(value, view);
-}
-
-/// @copydoc min_of(const op_min_view<Type, Compare>&, const Type&)
-template <typename Type, typename Compare>
-inline min_max_internal::rhs_proxy<op_min_view<Type, Compare>>
-min_of(const Type &value, const op_min_view<Type, Compare> &view) {
-    return min_max_internal::make_proxy(value, view);
-}
-
-/** Computes nested minimum.
- *
- *  Compute the minimum of the result of a min_of() call and another value.
- *
- *  The result of this computation can only be assigned back to the original
- *  view or wrapper, or used in another min_of() call. For example,
- *
- *      *reducer = min_of(x, min_of(y, *reducer));
- *      wrapper = min_of(min_of(wrapper, x), y);
- *
- *  @see min_max_internal::rhs_proxy
- */
-template <typename Type, typename Compare>
-inline min_max_internal::rhs_proxy<op_min_view<Type, Compare>>
-min_of(const min_max_internal::rhs_proxy<op_min_view<Type, Compare>> &proxy,
-       const Type &value) {
-    return proxy.calc(value);
-}
-
-/// @copydoc min_of(const min_max_internal::rhs_proxy< op_min_view<Type,
-/// Compare> >&, const Type&)
-template <typename Type, typename Compare>
-inline min_max_internal::rhs_proxy<op_min_view<Type, Compare>>
-min_of(const Type &value,
-       const min_max_internal::rhs_proxy<op_min_view<Type, Compare>> &proxy) {
-    return proxy.calc(value);
-}
-
-/** Monoid class for minimum reductions. Instantiate the cilk::reducer template
- *  class with an op_min monoid to create a minimum reducer class. For example,
- *  to compute the minimum of a set of `int` values:
- *
- *      cilk::reducer< cilk::op_min<int> > r;
- *
- *  @see ReducersMinMax
- *  @see op_min_view
- */
-template <typename Type, typename Compare = std::less<Type>, bool Align = false>
-class op_min
-    : public min_max_internal::monoid_base<op_min_view<Type, Compare>, Align> {
-    typedef min_max_internal::monoid_base<op_min_view<Type, Compare>, Align>
-        base;
-
-  public:
-    /// Construct with default comparator.
-    op_min() {}
-    /// Construct with specified comparator.
-    op_min(const Compare &compare) : base(compare) {}
-};
-
-//@}
-
-/** @defgroup ReducersMinMaxMaxIndex Maximum reducers (value and index)
- *
- *  These reducers will find the largest value from a set of values, and its
- *  index in the set.
- *
- *  @ingroup ReducersMinMax
- */
-//@{
-
-/** The maximum index reducer view class.
- *
- *  This is the view class for reducers created with
- *  `cilk::reducer< cilk::op_max_index<Index, Type, Compare> >`. It accumulates
- *  the maximum, as determined by a comparator, of a set of values which have
- *  occurred as arguments to the `calc_max()` function, and records the index
- *  of the maximum value. The accumulated value will be the first argument `x`
- *  such that `compare(x, y)` is false for every argument `y`.
- *
- *  If the comparator is `std::less`, then the accumulated value is the first
- *  argument value which is not less than any other argument value, i.e., the
- *  maximum.
- *
- *  @note   The reducer "dereference" operation (`reducer::operator *()`)
- *          yields a reference to the view. Thus, for example, the view class's
- *          `calc_max()` function would be used in an expression like
- *          `r->calc_max(i, a)`where `r` is an op_max_index reducer
- *          variable.
- *
- *  @note   The word "index" suggests an integer index into an array, but there
- *          is no restriction on the index type or how it should be used. In
- *          general, it may be convenient to use it for any kind of key that
- *          can be used to locate the maximum value in the collection that it
- *          came from - for example:
- *              -   An index into an array.
- *              -   A key into an STL map.
- *              -   An iterator into any STL container.
- *
- *  @note   A max_index reducer is essentially a max reducer whose value type
- *          is a `std::pair<Index, Type>`. This fact is camouflaged in the view
- *          `calc_max` function, the global `max_of` functions, and the reducer
- *          value constructor, which can all take an index argument and a value
- *          argument as an alternative to a single `std::pair` argument.
- *          However, the reducer `set_value()`, `get_value()`, `move_in()`, and
- *          `move_out()` functions work only with pairs, not with individual
- *          value and/or index arguments.
- *
- *  @tparam Index   The type of the indices associated with the values.
- *  @tparam Type    The type of the values compared by the reducer. This will
- *                  be the value type of a monoid_with_view that is
- *                  instantiated with this view.
- *  @tparam Compare Used to compare the values. It must be a binary predicate.
- *                  If it is omitted, then the view computes the conventional
- *                  arithmetic maximum.
- *
- *  @see ReducersMinMax
- *  @see op_max_index
- */
-template <typename Index, typename Type, typename Compare>
-class op_max_index_view
-    : public min_max_internal::view_base<
-          min_max_internal::index_view_content<Index, Type, Compare, true>,
-          Compare, Compare> {
-    typedef min_max_internal::view_base<
-        min_max_internal::index_view_content<Index, Type, Compare, true>,
-        Compare, Compare>
-        base;
-    using base::assign;
-    using base::calc;
-    typedef std::pair<Index, Type> pair_type;
-    friend class min_max_internal::rhs_proxy<op_max_index_view>;
-
-  public:
-    /** @name Constructors.
-     *
-     *  All op_max_index_view constructors simply pass their arguments on to the
-     *  @ref view_base base class, except for the `(index, value [, compare])`
-     *  constructors, which create a `std::pair` containing the index and value.
-     */
-    //@{
-
-    op_max_index_view() : base() {}
-
-    template <typename T1> op_max_index_view(const T1 &x1) : base(x1) {}
-
-    template <typename T1, typename T2>
-    op_max_index_view(const T1 &x1, const T2 &x2) : base(x1, x2) {}
-
-    template <typename T1, typename T2, typename T3>
-    op_max_index_view(const T1 &x1, const T2 &x2, const T3 &x3)
-        : base(x1, x2, x3) {}
-
-    op_max_index_view(const Index &i, const Type &v) : base(pair_type(i, v)) {}
-
-    op_max_index_view(const Index &i, const Type &v,
-                      const typename base::compare_type *c)
-        : base(pair_type(i, v), c) {}
-
-    //@}
-
-    /** Maximizes with a value and index.
-     *
-     *  If @a x is greater than the current value of the view (as defined by
-     *  the reducer's comparator), or if the view was created without an
-     *  initial value and its value has never been updated (with `calc_max()`
-     *  or `= max_of()`), then the value of the view is set to @a x, and the
-     *  index is set to @a i..
-     *
-     *  @param  i   The index of the value @a x.
-     *  @param  x   The value to maximize the view's value with.
-     *
-     *  @return     A reference to the view. (Allows
-     *              `view.comp_max(i, a).comp_max(j, b)…`.)
-     */
-    op_max_index_view &calc_max(const Index &i, const Type &x) {
-        calc(pair_type(i, x));
-        return *this;
-    }
-
-    /** Maximizes with an index/value pair.
-     *
-     *  If @a pair.second is greater than the current value of the view (as
-     *  defined by the reducer's comparator), or if the view was created
-     *  without an initial value and its value has never been updated (with
-     *  `calc_max()` or `= max_of()`), then the value of the view is set to
-     *  @a pair.second, and the index is set to @a pair.first.
-     *
-     *  @param  pair    A pair containing a value to maximize the view's value
-     *                  with and its associated index.
-     *
-     *  @return         A reference to the view. (Allows
-     *                  `view.comp_max(p1).comp_max(p2)…`.)
-     */
-    op_max_index_view &calc_max(const pair_type &pair) {
-        calc(pair);
-        return *this;
-    }
-
-    /** Assigns the result of a `max_of(view, index, value)` expression to the
-     *  view.
-     *
-     *  @param  rhs An rhs_proxy value created by a `max_of(view, index, value)`
-     *              expression.
-     *
-     *  @return     A reference to the view.
-     *
-     *  @see min_max_internal::view_base::rhs_proxy
-     */
-    op_max_index_view &
-    operator=(const min_max_internal::rhs_proxy<op_max_index_view> &rhs) {
-        assign(rhs);
-        return *this;
-    }
-};
-
-/** Computes the maximum of the value in a view and another value.
- *
- *  The result of this computation can only be assigned back to the original
- *  view or used in another max_of() call. For example,
- *
- *      *reducer = max_of(*reducer, i, x);
- *      *reducer = max_of(i, x, *reducer);
- *
- *  @see min_max_internal::rhs_proxy
- */
-template <typename Index, typename Type, typename Compare>
-inline min_max_internal::rhs_proxy<op_max_index_view<Index, Type, Compare>>
-max_of(const op_max_index_view<Index, Type, Compare> &view, const Index &index,
-       const Type &value) {
-    return min_max_internal::make_proxy(std::pair<Index, Type>(index, value),
-                                        view);
-}
-
-/// @copydoc max_of(const op_max_index_view<Index, Type, Compare>&, const
-/// Index&, const Type&)
-template <typename Index, typename Type, typename Compare>
-inline min_max_internal::rhs_proxy<op_max_index_view<Index, Type, Compare>>
-max_of(const Index &index, const Type &value,
-       const op_max_index_view<Index, Type, Compare> &view) {
-    return min_max_internal::make_proxy(std::pair<Index, Type>(index, value),
-                                        view);
-}
-
-/// @copydoc max_of(const op_max_index_view<Index, Type, Compare>&, const
-/// Index&, const Type&)
-template <typename Index, typename Type, typename Compare>
-inline min_max_internal::rhs_proxy<op_max_index_view<Index, Type, Compare>>
-max_of(const op_max_index_view<Index, Type, Compare> &view,
-       const std::pair<Index, Type> &pair) {
-    return min_max_internal::make_proxy(pair, view);
-}
-
-/// @copydoc max_of(const op_max_index_view<Index, Type, Compare>&, const
-/// Index&, const Type&)
-template <typename Index, typename Type, typename Compare>
-inline min_max_internal::rhs_proxy<op_max_index_view<Index, Type, Compare>>
-max_of(const std::pair<Index, Type> &pair,
-       const op_max_index_view<Index, Type, Compare> &view) {
-    return min_max_internal::make_proxy(pair, view);
-}
-
-/** Computes the nested maximum between the value in a view and other values.
- *
- *  Compute the maximum of the result of a max_of() call and another value.
- *
- *  The result of this computation can only be assigned back to the original
- *  view or used in another max_of() call. For example,
- *
- *      *reducer = max_of(x, max_of(y, *reducer));
- *      *reducer = max_of(max_of(*reducer, x), y);
- *
- *  @see min_max_internal::rhs_proxy
- */
-template <typename Index, typename Type, typename Compare>
-inline min_max_internal::rhs_proxy<op_max_index_view<Index, Type, Compare>>
-max_of(
-    const min_max_internal::rhs_proxy<op_max_index_view<Index, Type, Compare>>
-        &proxy,
-    const Index &index, const Type &value) {
-    return proxy.calc(std::pair<Index, Type>(index, value));
-}
-
-/// @copydoc max_of(const min_max_internal::rhs_proxy< op_max_index_view<Index,
-/// Type, Compare> >&, const Index&, const Type&)
-template <typename Index, typename Type, typename Compare>
-inline min_max_internal::rhs_proxy<op_max_index_view<Index, Type, Compare>>
-max_of(
-    const Index &index, const Type &value,
-    const min_max_internal::rhs_proxy<op_max_index_view<Index, Type, Compare>>
-        &proxy) {
-    return proxy.calc(std::pair<Index, Type>(index, value));
-}
-
-/// @copydoc max_of(const min_max_internal::rhs_proxy< op_max_index_view<Index,
-/// Type, Compare> >&, const Index&, const Type&)
-template <typename Index, typename Type, typename Compare>
-inline min_max_internal::rhs_proxy<op_max_index_view<Index, Type, Compare>>
-max_of(
-    const min_max_internal::rhs_proxy<op_max_index_view<Index, Type, Compare>>
-        &proxy,
-    const std::pair<Index, Type> &pair) {
-    return proxy.calc(pair);
-}
-
-/// @copydoc max_of(const min_max_internal::rhs_proxy< op_max_index_view<Index,
-/// Type, Compare> >&, const Index&, const Type&)
-template <typename Index, typename Type, typename Compare>
-inline min_max_internal::rhs_proxy<op_max_index_view<Index, Type, Compare>>
-max_of(
-    const std::pair<Index, Type> &pair,
-    const min_max_internal::rhs_proxy<op_max_index_view<Index, Type, Compare>>
-        &proxy) {
-    return proxy.calc(pair);
-}
-
-/** Monoid class for maximum reductions with index. Instantiate the
- *  cilk::reducer template class with an op_max_index monoid to create a
- *  max_index reducer class. For example, to compute the maximum of an array of
- *  `double` values and the array index of the max value:
- *
- *      cilk::reducer< cilk::op_max_index<unsigned, double> > r;
- *
- *  @see ReducersMinMax
- *  @see op_max_index_view
- */
-template <typename Index, typename Type, typename Compare = std::less<Type>,
-          bool Align = false>
-class op_max_index : public min_max_internal::monoid_base<
-                         op_max_index_view<Index, Type, Compare>, Align> {
-    typedef min_max_internal::monoid_base<
-        op_max_index_view<Index, Type, Compare>, Align>
-        base;
-
-  public:
-    /// Construct with default comparator.
-    op_max_index() {}
-    /// Construct with specified comparator.
-    op_max_index(const Compare &compare) : base(compare) {}
-};
-
-//@}
-
-/** @defgroup ReducersMinMaxMinIndex Minimum reducers (value and index)
- *
- *  These reducers will find the smallest value from a set of values, and its
- *  index in the set.
- *
- *  @ingroup ReducersMinMax
- */
-//@{
-
-/** The minimum index reducer view class.
- *
- *  This is the view class for reducers created with
- *  `cilk::reducer<cilk::op_min_index<Index, Type, Compare> >`. It accumulates
- *  the minimum, as determined by a comparator, of a set of values which have
- *  occurred as arguments to the `calc_min()` function, and records the index
- *  of the minimum value. The accumulated value will be the first argument `x`
- *  such that `compare(y, x)` is false for every argument `y`.
- *
- *  If the comparator is `std::less`, then the accumulated value is the first
- *  argument value which no other argument value is less than, i.e., the
- *  minimum.
- *
- *  @note   The reducer "dereference" operation (`reducer::operator *()`)
- *          yields a reference to the view. Thus, for example, the view class's
- *          `calc_min()` function would be
- *          used in an expression like `r->calc_min(i, a)`where `r` is an
- *          op_min_index reducer variable.
- *
- *  @note   The word "index" suggests an integer index into an array, but there
- *          is no restriction on the index type or how it should be used. In
- *          general, it may be convenient to use it for any kind of key that
- *          can be used to locate the minimum value in the collection that it
- *          came from - for example:
- *              -   An index into an array.
- *              -   A key into an STL map.
- *              -   An iterator into any STL container.
- *
- *  @note   A min_index reducer is essentially a min reducer whose value type
- *          is a `std::pair<Index, Type>`. This fact is camouflaged in the view
- *          `calc_min` function, the global `min_of` functions, and the reducer
- *          value constructor, which can all take an index argument and a value
- *          argument as an alternative to a single `std::pair` argument.
- *          However, the reducer `set_value()`, `get_value()`, `move_in()`, and
- *          `move_out()` functions work only with pairs, not with individual
- *          value and/or index arguments.
- *
- *  @tparam Index   The type of the indices associated with the values.
- *  @tparam Type    The type of the values compared by the reducer. This will
- *                  be the value type of a monoid_with_view that is
- *                  instantiated with this view.
- *  @tparam Compare Used to compare the values. It must be a binary predicate.
- *                  If it is omitted, then the view computes the conventional
- *                  arithmetic minimum.
- *
- *  @see ReducersMinMax
- *  @see op_min_index
- */
-template <typename Index, typename Type, typename Compare>
-class op_min_index_view
-    : public min_max_internal::view_base<
-          min_max_internal::index_view_content<Index, Type, Compare, false>,
-          Compare, min_max_internal::reverse_predicate<Compare, Type>> {
-    typedef min_max_internal::view_base<
-        min_max_internal::index_view_content<Index, Type, Compare, false>,
-        Compare, min_max_internal::reverse_predicate<Compare, Type>>
-        base;
-    using base::assign;
-    using base::calc;
-    typedef std::pair<Index, Type> pair_type;
-    friend class min_max_internal::rhs_proxy<op_min_index_view>;
-
-  public:
-    /** @name Constructors.
-     *
-     *  All op_min_index_view constructors simply pass their arguments on to the
-     *  @ref view_base base class, except for the `(index, value [, compare])`
-     *  constructors, which create a `std::pair` containing the index and value.
-     */
-    //@{
-
-    op_min_index_view() : base() {}
-
-    template <typename T1> op_min_index_view(const T1 &x1) : base(x1) {}
-
-    template <typename T1, typename T2>
-    op_min_index_view(const T1 &x1, const T2 &x2) : base(x1, x2) {}
-
-    template <typename T1, typename T2, typename T3>
-    op_min_index_view(const T1 &x1, const T2 &x2, const T3 &x3)
-        : base(x1, x2, x3) {}
-
-    op_min_index_view(const Index &i, const Type &v) : base(pair_type(i, v)) {}
-
-    op_min_index_view(const Index &i, const Type &v,
-                      const typename base::compare_type *c)
-        : base(pair_type(i, v), c) {}
-
-    //@}
-
-    /** Minimizes with a value and index.
-     *
-     *  If @a x is greater than the current value of the view (as defined by
-     *  the reducer's comparator), or if the view was created without an
-     *  initial value and its value has never been updated (with `calc_min()`
-     *  or `= min_of()`), then the value of the view is set to @a x, and the
-     *  index is set to @a i..
-     *
-     *  @param  i   The index of the value @a x.
-     *  @param  x   The value to minimize the view's value with.
-     *
-     *  @return     A reference to the view. (Allows
-     *              `view.comp_min(i, a).comp_min(j, b)…`.)
-     */
-    op_min_index_view &calc_min(const Index &i, const Type &x) {
-        calc(pair_type(i, x));
-        return *this;
-    }
-
-    /** Maximizes with an index/value pair.
-     *
-     *  If @a pair.second is less than the current value of the view (as
-     *  defined by the reducer's comparator), or if the view was created
-     *  without an initial value and its value has never been updated (with
-     *  `calc_min()` or `= min_of()`), then the value of the view is set to
-     *  @a pair.second, and the index is set to @a pair.first.
-     *
-     *  @param  pair    A pair containing a value to minimize the view's value
-     *                  with and its associated index.
-     *
-     *  @return         A reference to the view. (Allows
-     *                  `view.comp_min(p1).comp_min(p2)…`.)
-     */
-    op_min_index_view &calc_min(const pair_type &pair) {
-        calc(pair);
-        return *this;
-    }
-
-    /** Assigns the result of a `min_of(view, index, value)` expression to the
-     *  view.
-     *
-     *  @param  rhs An rhs_proxy value created by a `min_of(view, index, value)`
-     *              expression.
-     *
-     *  @return     A reference to the view.
-     *
-     *  @see min_max_internal::view_base::rhs_proxy
-     */
-    op_min_index_view &
-    operator=(const min_max_internal::rhs_proxy<op_min_index_view> &rhs) {
-        assign(rhs);
-        return *this;
-    }
-};
-
-/** Computes the minimum of the value in a view and another value.
- *
- *  The result of this computation can only be assigned back to the original
- *  view or used in another min_of() call. For example,
- *
- *      *reducer = min_of(*reducer, i, x);
- *      *reducer = min_of(i, x, *reducer);
- *
- *  @see min_max_internal::min_min_view_base::rhs_proxy
- */
-template <typename Index, typename Type, typename Compare>
-inline min_max_internal::rhs_proxy<op_min_index_view<Index, Type, Compare>>
-min_of(const op_min_index_view<Index, Type, Compare> &view, const Index &index,
-       const Type &value) {
-    return min_max_internal::make_proxy(std::pair<Index, Type>(index, value),
-                                        view);
-}
-
-/// @copydoc min_of(const op_min_index_view<Index, Type, Compare>&, const
-/// Index&, const Type&)
-template <typename Index, typename Type, typename Compare>
-inline min_max_internal::rhs_proxy<op_min_index_view<Index, Type, Compare>>
-min_of(const Index &index, const Type &value,
-       const op_min_index_view<Index, Type, Compare> &view) {
-    return min_max_internal::make_proxy(std::pair<Index, Type>(index, value),
-                                        view);
-}
-
-/// @copydoc min_of(const op_min_index_view<Index, Type, Compare>&, const
-/// Index&, const Type&)
-template <typename Index, typename Type, typename Compare>
-inline min_max_internal::rhs_proxy<op_min_index_view<Index, Type, Compare>>
-min_of(const op_min_index_view<Index, Type, Compare> &view,
-       const std::pair<Index, Type> &pair) {
-    return min_max_internal::make_proxy(pair, view);
-}
-
-/// @copydoc min_of(const op_min_index_view<Index, Type, Compare>&, const
-/// Index&, const Type&)
-template <typename Index, typename Type, typename Compare>
-inline min_max_internal::rhs_proxy<op_min_index_view<Index, Type, Compare>>
-min_of(const std::pair<Index, Type> &pair,
-       const op_min_index_view<Index, Type, Compare> &view) {
-    return min_max_internal::make_proxy(pair, view);
-}
-
-/** Computes nested minimum between the value in a view and other values.
- *
- *  Compute the minimum of the result of a min_of() call and another value.
- *
- *  The result of this computation can only be assigned back to the original
- *  view or used in another min_of() call. For example,
- *
- *      *reducer = min_of(x, min_of(y, *reducer));
- *      *reducer = min_of(min_of(*reducer, x), y);
- *
- *  @see min_max_internal::min_min_view_base::rhs_proxy
- */
-template <typename Index, typename Type, typename Compare>
-inline min_max_internal::rhs_proxy<op_min_index_view<Index, Type, Compare>>
-min_of(
-    const min_max_internal::rhs_proxy<op_min_index_view<Index, Type, Compare>>
-        &proxy,
-    const Index &index, const Type &value) {
-    return proxy.calc(std::pair<Index, Type>(index, value));
-}
-
-/// @copydoc min_of(const min_max_internal::rhs_proxy< op_min_index_view<Index,
-/// Type, Compare> >&, const Index&, const Type&)
-template <typename Index, typename Type, typename Compare>
-inline min_max_internal::rhs_proxy<op_min_index_view<Index, Type, Compare>>
-min_of(
-    const Index &index, const Type &value,
-    const min_max_internal::rhs_proxy<op_min_index_view<Index, Type, Compare>>
-        &proxy) {
-    return proxy.calc(std::pair<Index, Type>(index, value));
-}
-
-/// @copydoc min_of(const min_max_internal::rhs_proxy< op_min_index_view<Index,
-/// Type, Compare> >&, const Index&, const Type&)
-template <typename Index, typename Type, typename Compare>
-inline min_max_internal::rhs_proxy<op_min_index_view<Index, Type, Compare>>
-min_of(
-    const min_max_internal::rhs_proxy<op_min_index_view<Index, Type, Compare>>
-        &proxy,
-    const std::pair<Index, Type> &pair) {
-    return proxy.calc(pair);
-}
-
-/// @copydoc min_of(const min_max_internal::rhs_proxy< op_min_index_view<Index,
-/// Type, Compare> >&, const Index&, const Type&)
-template <typename Index, typename Type, typename Compare>
-inline min_max_internal::rhs_proxy<op_min_index_view<Index, Type, Compare>>
-min_of(
-    const std::pair<Index, Type> &pair,
-    const min_max_internal::rhs_proxy<op_min_index_view<Index, Type, Compare>>
-        &proxy) {
-    return proxy.calc(pair);
-}
-
-/** Monoid class for minimum reductions with index. Instantiate the
- *  cilk::reducer template class with an op_min_index monoid to create a
- *  min_index reducer class. For example, to compute the minimum of an array of
- *  `double` values and the array index of the min value:
- *
- *      cilk::reducer< cilk::op_min_index<unsigned, double> > r;
- *
- *  @see ReducersMinMax
- *  @see op_min_index_view
- */
-template <typename Index, typename Type, typename Compare = std::less<Type>,
-          bool Align = false>
-class op_min_index : public min_max_internal::monoid_base<
-                         op_min_index_view<Index, Type, Compare>, Align> {
-    typedef min_max_internal::monoid_base<
-        op_min_index_view<Index, Type, Compare>, Align>
-        base;
-
-  public:
-    /// Construct with default comparator.
-    op_min_index() {}
-    /// Construct with specified comparator.
-    op_min_index(const Compare &compare) : base(compare) {}
-};
-
-//@}
-
-/** Deprecated maximum reducer wrapper class.
- *
- *  reducer_max is the same as @ref reducer<@ref op_max>, except that
- *  reducer_max is a proxy for the contained view, so that accumulator
- *  variable update operations can be applied directly to the reducer. For
- *  example, a value is maximized with  a `reducer<%op_max>` with
- *  `r->calc_max(a)`, but a value can be maximized with a `%reducer_max` with
- *  `r.calc_max(a)`.
- *
- *
- *  @deprecated Users are strongly encouraged to use `reducer<monoid>`
- *              reducers rather than the old wrappers like reducer_max.
- *              The `reducer<monoid>` reducers show the reducer/monoid/view
- *              architecture more clearly, are more consistent in their
- *              implementation, and present a simpler model for new
- *              user-implemented reducers.
- *
- *  @note   Implicit conversions are provided between `%reducer_max`
- *          and `reducer<%op_max>`. This allows incremental code
- *          conversion: old code that used `%reducer_max` can pass a
- *          `%reducer_max` to a converted function that now expects a
- *          pointer or reference to a `reducer<%op_max>`, and vice
- *          versa. **But see  @ref redminmax_compatibility.**
- *
- *  @tparam Type    The value type of the reducer.
- *  @tparam Compare The "less than" comparator type for the reducer.
- *
- *  @see op_max
- *  @see op_max_view
- *  @see reducer
- *  @see ReducersMinMax
- *  @ingroup ReducersMinMaxMaxValue
- */
-template <typename Type, typename Compare = std::less<Type>>
-class reducer_max : public reducer<op_max<Type, Compare, true>> {
-    __CILKRTS_STATIC_ASSERT(
-        ::cilk::internal::class_is_empty<
-            typename ::cilk::internal::binary_functor<Compare>::type>::value,
-        "cilk::reducer_max<Type, Compare> only works with "
-        "an empty Compare class");
-    typedef reducer<op_max<Type, Compare, true>> base;
-
-  public:
-    /// Type of data in a reducer_max.
-    typedef Type basic_value_type;
-
-    /// The view type for the reducer.
-    typedef typename base::view_type view_type;
-
-    /// The view type for the reducer.
-    typedef typename base::view_type View;
-
-    /// The monoid type for the reducer.
-    typedef typename base::monoid_type monoid_type;
-
-    /// The monoid type for the reducer.
-    typedef typename base::monoid_type Monoid;
-
-    /// The view's rhs proxy type.
-    typedef min_max_internal::rhs_proxy<View> rhs_proxy;
-
-    using base::view;
-
-    /** @name Constructors
-     */
-    //@{
-
-    /// Constructs the wrapper in its identity state (either `!is_set()`, or
-    /// `value() == identity value`).
-    reducer_max() : base() {}
-
-    /// Constructs the wrapper with a specified initial value.
-    explicit reducer_max(const Type &initial_value) : base(initial_value) {}
-
-    /// Constructs the wrapper in its identity state with a specified
-    /// comparator.
-    explicit reducer_max(const Compare &comp) : base(comp) {}
-
-    /// Constructs the wrapper with a specified initial value and a specified
-    /// comparator.
-    reducer_max(const Type &initial_value, const Compare &comp)
-        : base(initial_value, comp) {}
-
-    //@}
-
-    /** @name Forwarded functions
-     *  @details Functions that update the contained accumulator variable are
-     *  simply forwarded to the contained @ref op_max_view. */
-    //@{
-
-    /// @copydoc cilk_lib_1_1::min_max_internal::view_content::is_set() const
-    bool is_set() const { return view().is_set(); }
-
-    /// @copydoc op_max_view::calc_max(const Type&)
-    reducer_max &calc_max(const Type &x) {
-        view().calc_max(x);
-        return *this;
-    }
-
-    /// @copydoc op_max_view::operator=(const
-    /// min_max_internal::rhs_proxy<op_max_view>&)
-    reducer_max &operator=(const rhs_proxy &rhs) {
-        view() = rhs;
-        return *this;
-    }
-
-    //@}
-
-    /** Allows read-only access to the value within the current view.
-     *
-     *  @returns    A const reference to the value within the current view.
-     */
-    const Type &get_reference() const { return view().get_reference(); }
-
-    /// @name Dereference
-    /** Dereferencing a wrapper is a no-op. It simply returns the wrapper.
-     *  Combined with the rule that a wrapper forwards view operations to the
-     *  view, this means that view operations can be written the same way on
-     *  reducers and wrappers, which is convenient for incrementally
-     *  converting code using wrappers to code using reducers. That is:
-     *
-     *      reducer< op_max<int> > r;
-     *      r->calc_max(a);      // *r returns the view
-     *                           // calc_max is a view member function
-     *
-     *      reducer_max<int> w;
-     *      w->calc_max(a);      // *w returns the wrapper
-     *                           // calc_max is a wrapper member function that
-     *                           // calls the corresponding view function
-     */
-    //@{
-    reducer_max &operator*() { return *this; }
-    reducer_max const &operator*() const { return *this; }
-
-    reducer_max *operator->() { return this; }
-    reducer_max const *operator->() const { return this; }
-    //@}
-
-    /** @name Upcast
-     *  @details In Intel Cilk Plus library 0.9, reducers were always
-     * cache-aligned. In library 1.0, reducer cache alignment is optional. By
-     * default, reducers are unaligned (i.e., just naturally aligned), but
-     * legacy wrappers inherit from cache-aligned reducers for binary
-     * compatibility.
-     *
-     *  This means that a wrapper will automatically be upcast to its aligned
-     *  reducer base class. The following conversion operators provide
-     *  pseudo-upcasts to the corresponding unaligned reducer class.
-     */
-    //@{
-    operator reducer<op_max<Type, Compare, false>> &() {
-        return *reinterpret_cast<reducer<op_max<Type, Compare, false>> *>(this);
-    }
-
-    operator const reducer<op_max<Type, Compare, false>> &() const {
-        return *reinterpret_cast<const reducer<op_max<Type, Compare, false>> *>(
-            this);
-    }
-    //@}
-};
-
-/// @cond internal
-// The legacy definition of max_of(reducer_max, value) has different
-// behavior and a different return type than this definition. We add an
-// unused third argument to this version of the function to give it a different
-// signature, so that they won't end up sharing a single object file entry.
-struct max_of_1_0_t {};
-const max_of_1_0_t max_of_1_0 = {};
-/// @endcond
-
-/** Computes the maximum of the value in a reducer_max and another value.
- *
- *  @deprecated Because reducer_max is deprecated.
- *
- *  The result of this computation can only be assigned back to the original
- *  reducer or used in another max_of() call. For example,
- *
- *      reducer = max_of(reducer, x);
- *      reducer = max_of(x, reducer);
- *
- *  @see min_max_internal::rhs_proxy
- *
- *  @ingroup ReducersMinMaxMaxValue
- */
-template <typename Type, typename Compare>
-inline min_max_internal::rhs_proxy<op_max_view<Type, Compare>>
-max_of(const reducer_max<Type, Compare> &r, const Type &value,
-       const max_of_1_0_t & = max_of_1_0) {
-    return min_max_internal::make_proxy(value, r.view());
-}
-
-/// @copydoc max_of(const reducer_max<Type, Compare>&, const Type&, const
-/// max_of_1_0_t&)
-/// @ingroup ReducersMinMaxMaxValue
-template <typename Type, typename Compare>
-inline min_max_internal::rhs_proxy<op_max_view<Type, Compare>>
-max_of(const Type &value, const reducer_max<Type, Compare> &r,
-       const max_of_1_0_t & = max_of_1_0) {
-    return min_max_internal::make_proxy(value, r.view());
-}
-
-/** Deprecated minimum reducer wrapper class.
- *
- *  reducer_min is the same as @ref reducer<@ref op_min>, except that
- *  reducer_min is a proxy for the contained view, so that accumulator
- *  variable update operations can be applied directly to the reducer. For
- *  example, a value is minimized with  a `reducer<%op_min>` with
- *  `r->calc_min(a)`, but a value can be minimized with a `%reducer_min` with
- *  `r.calc_min(a)`.
- *
- *
- *  @deprecated Users are strongly encouraged to use `reducer<monoid>`
- *              reducers rather than the old wrappers like reducer_min.
- *              The `reducer<monoid>` reducers show the reducer/monoid/view
- *              architecture more clearly, are more consistent in their
- *              implementation, and present a simpler model for new
- *              user-implemented reducers.
- *
- *  @note   Implicit conversions are provided between `%reducer_min`
- *          and `reducer<%op_min>`. This allows incremental code
- *          conversion: old code that used `%reducer_min` can pass a
- *          `%reducer_min` to a converted function that now expects a
- *          pointer or reference to a `reducer<%op_min>`, and vice
- *          versa. **But see  @ref redminmax_compatibility.**
- *
- *  @tparam Type    The value type of the reducer.
- *  @tparam Compare The "less than" comparator type for the reducer.
- *
- *  @see op_min
- *  @see op_min_view
- *  @see reducer
- *  @see ReducersMinMax
- *  @ingroup ReducersMinMaxMinValue
- */
-template <typename Type, typename Compare = std::less<Type>>
-class reducer_min : public reducer<op_min<Type, Compare, true>> {
-    __CILKRTS_STATIC_ASSERT(
-        ::cilk::internal::class_is_empty<
-            typename ::cilk::internal::binary_functor<Compare>::type>::value,
-        "cilk::reducer_min<Type, Compare> only works with "
-        "an empty Compare class");
-    typedef reducer<op_min<Type, Compare, true>> base;
-
-  public:
-    /// Type of data in a reducer_min.
-    typedef Type basic_value_type;
-
-    /// The view type for the reducer.
-    typedef typename base::view_type view_type;
-
-    /// The view type for the reducer.
-    typedef typename base::view_type View;
-
-    /// The monoid type for the reducer.
-    typedef typename base::monoid_type monoid_type;
-
-    /// The monoid type for the reducer.
-    typedef typename base::monoid_type Monoid;
-
-    /// The view's rhs proxy type.
-    typedef min_max_internal::rhs_proxy<View> rhs_proxy;
-
-    using base::view;
-
-    /** @name Constructors
-     */
-    //@{
-
-    /// Constructs the wrapper in its identity state (either `!is_set()`, or
-    /// `value() == identity value`).
-    reducer_min() : base() {}
-
-    /// Constructs the wrapper with a specified initial value.
-    explicit reducer_min(const Type &initial_value) : base(initial_value) {}
-
-    /// Constructs the wrapper in its identity state with a specified
-    /// comparator.
-    explicit reducer_min(const Compare &comp) : base(comp) {}
-
-    /// Constructs the wrapper with a specified initial value and a specified
-    /// comparator.
-    reducer_min(const Type &initial_value, const Compare &comp)
-        : base(initial_value, comp) {}
-
-    //@}
-
-    /** @name Forwarded functions
-     *  @details Functions that update the contained accumulator variable are
-     *  simply forwarded to the contained @ref op_min_view. */
-    //@{
-
-    /// @copydoc cilk_lib_1_1::min_max_internal::view_content::is_set() const
-    bool is_set() const { return view().is_set(); }
-
-    /// @copydoc op_min_view::calc_min(const Type&)
-    reducer_min &calc_min(const Type &x) {
-        view().calc_min(x);
-        return *this;
-    }
-
-    /// @copydoc op_min_view::operator=(const
-    /// min_max_internal::rhs_proxy<op_min_view>&)
-    reducer_min &operator=(const rhs_proxy &rhs) {
-        view() = rhs;
-        return *this;
-    }
-
-    //@}
-
-    /** Allows read-only access to the value within the current view.
-     *
-     *  @returns    A const reference to the value within the current view.
-     */
-    const Type &get_reference() const { return view().get_reference(); }
-
-    /// @name Dereference
-    /** Dereferencing a wrapper is a no-op. It simply returns the wrapper.
-     *  Combined with the rule that a wrapper forwards view operations to the
-     *  view, this means that view operations can be written the same way on
-     *  reducers and wrappers, which is convenient for incrementally
-     *  converting code using wrappers to code using reducers. That is:
-     *
-     *      reducer< op_min<int> > r;
-     *      r->calc_min(a);      // *r returns the view
-     *                           // calc_min is a view member function
-     *
-     *      reducer_min<int> w;
-     *      w->calc_min(a);      // *w returns the wrapper
-     *                           // calc_min is a wrapper member function that
-     *                           // calls the corresponding view function
-     */
-    //@{
-    reducer_min &operator*() { return *this; }
-    reducer_min const &operator*() const { return *this; }
-
-    reducer_min *operator->() { return this; }
-    reducer_min const *operator->() const { return this; }
-    //@}
-
-    /** @name Upcast
-     *  @details In Intel Cilk Plus library 0.9, reducers were always
-     * cache-aligned. In library 1.0, reducer cache alignment is optional. By
-     * default, reducers are unaligned (i.e., just naturally aligned), but
-     * legacy wrappers inherit from cache-aligned reducers for binary
-     * compatibility.
-     *
-     *  This means that a wrapper will automatically be upcast to its aligned
-     *  reducer base class. The following conversion operators provide
-     *  pseudo-upcasts to the corresponding unaligned reducer class.
-     */
-    //@{
-    operator reducer<op_min<Type, Compare, false>> &() {
-        return *reinterpret_cast<reducer<op_min<Type, Compare, false>> *>(this);
-    }
-
-    operator const reducer<op_min<Type, Compare, false>> &() const {
-        return *reinterpret_cast<const reducer<op_min<Type, Compare, false>> *>(
-            this);
-    }
-    //@}
-};
-
-/** Computes the minimum of a reducer and a value.
- *
- *  @deprecated Because reducer_min is deprecated.
- */
-//@{
-// The legacy definition of min_of(reducer_min, value) has different
-// behavior and a different return type than this definition. We add an
-// unused third argument to this version of the function to give it a different
-// signature, so that they won't end up sharing a single object file entry.
-struct min_of_1_0_t {};
-const min_of_1_0_t min_of_1_0 = {};
-
-template <typename Type, typename Compare>
-inline min_max_internal::rhs_proxy<op_min_view<Type, Compare>>
-min_of(const reducer_min<Type, Compare> &r, const Type &value,
-       const min_of_1_0_t & = min_of_1_0) {
-    return min_max_internal::make_proxy(value, r.view());
-}
-
-template <typename Type, typename Compare>
-inline min_max_internal::rhs_proxy<op_min_view<Type, Compare>>
-min_of(const Type &value, const reducer_min<Type, Compare> &r,
-       const min_of_1_0_t & = min_of_1_0) {
-    return min_max_internal::make_proxy(value, r.view());
-}
-//@}
-
-/** Deprecated maximum with index reducer wrapper class.
- *
- *  reducer_max_index is the same as @ref reducer<@ref op_max_index>, except
- *  that reducer_max_index is a proxy for the contained view, so that
- *  accumulator variable update operations can be applied directly to the
- *  reducer. For example, a value is maximized with  a `reducer<%op_max_index>`
- *  with `r->calc_max(i, a)`, but a value can be maximized with a
- *  `%reducer_max` with `r.calc_max(i, aa)`.
- *
- *
- *  @deprecated Users are strongly encouraged to use `reducer<monoid>`
- *              reducers rather than the old wrappers like reducer_max.
- *              The `reducer<monoid>` reducers show the reducer/monoid/view
- *              architecture more clearly, are more consistent in their
- *              implementation, and present a simpler model for new
- *              user-implemented reducers.
- *
- *  @note   Implicit conversions are provided between `%reducer_max_index`
- *          and `reducer<%op_max_index>`. This allows incremental code
- *          conversion: old code that used `%reducer_max_index` can pass a
- *          `%reducer_max_index` to a converted function that now expects a
- *          pointer or reference to a `reducer<%op_max_index>`, and vice
- *          versa. **But see  @ref redminmax_compatibility.**
- *
- *  @tparam Index   The index type of the reducer.
- *  @tparam Type    The value type of the reducer.
- *  @tparam Compare The "less than" comparator type for the reducer.
- *
- *  @see op_max_index
- *  @see op_max_index_view
- *  @see reducer
- *  @see ReducersMinMax
- *  @ingroup ReducersMinMaxMaxIndex
- */
-template <typename Index, typename Type, typename Compare = std::less<Type>>
-class reducer_max_index
-    : public reducer<op_max_index<Index, Type, Compare, true>> {
-    __CILKRTS_STATIC_ASSERT(
-        ::cilk::internal::class_is_empty<
-            typename ::cilk::internal::binary_functor<Compare>::type>::value,
-        "cilk::reducer_max_index<Type, Compare> only works with "
-        "an empty Compare class");
-    typedef reducer<op_max_index<Index, Type, Compare, true>> base;
-
-  public:
-    /// Type of data in a reducer_max_index.
-    typedef Type basic_value_type;
-
-    /// The view type for the reducer.
-    typedef typename base::view_type view_type;
-
-    /// The view type for the reducer.
-    typedef typename base::view_type View;
-
-    /// The monoid type for the reducer.
-    typedef typename base::monoid_type monoid_type;
-
-    /// The monoid type for the reducer.
-    typedef typename base::monoid_type Monoid;
-
-    /// The view's rhs proxy type.
-    typedef min_max_internal::rhs_proxy<View> rhs_proxy;
-
-    using base::view;
-
-    /** @name Constructors
-     */
-    //@{
-
-    /// Constructs the wrapper in its identity state (`!is_set()`).
-    reducer_max_index() : base() {}
-
-    /// Construct with a specified initial index and value.
-    reducer_max_index(const Index &initial_index, const Type &initial_value)
-        : base(initial_index, initial_value) {}
-
-    /// Constructs the wrapper with a specified comparator.
-    explicit reducer_max_index(const Compare &comp) : base(comp) {}
-
-    /// Constructs the wrapper with a specified initial index, value,
-    /// and comparator.
-    reducer_max_index(const Index &initial_index, const Type &initial_value,
-                      const Compare &comp)
-        : base(initial_index, initial_value, comp) {}
-
-    //@}
-
-    /** @name Set / Get
-     */
-    //@{
-
-    /// Sets the index and value of this object.
-    void set_value(const Index &index, const Type &value) {
-        base::set_value(std::make_pair(index, value));
-    }
-
-    /// Returns the maximum value.
-    const Type &get_value() const { return view().get_reference(); }
-
-    /// Returns the maximum index.
-    const Index &get_index() const { return view().get_index_reference(); }
-
-    /// Returns a const reference to value data member in the view.
-    const Type &get_reference() const { return view().get_reference(); }
-
-    /// Returns a const reference to index data member in the view.
-    const Index &get_index_reference() const {
-        return view().get_index_reference();
-    }
-
-    //@}
-
-    /** @name Forwarded functions
-     *  @details Functions that update the contained accumulator variable are
-     *  simply forwarded to the contained @ref op_max_view. */
-    //@{
-
-    /// @copydoc cilk_lib_1_1::min_max_internal::view_content::is_set() const
-    bool is_set() const { return view().is_set(); }
-
-    /// @copydoc op_max_index_view::calc_max(const Index&, const Type&)
-    reducer_max_index &calc_max(const Index &i, const Type &x) {
-        view().calc_max(i, x);
-        return *this;
-    }
-
-    /// @copydoc op_max_view::operator=(const
-    /// min_max_internal::rhs_proxy<op_max_view>&)
-    reducer_max_index &operator=(const rhs_proxy &rhs) {
-        view() = rhs;
-        return *this;
-    }
-
-    //@}
-
-    /// @name Dereference
-    /** Dereferencing a wrapper is a no-op. It simply returns the wrapper.
-     *  Combined with the rule that a wrapper forwards view operations to the
-     *  view, this means that view operations can be written the same way on
-     *  reducers and wrappers, which is convenient for incrementally
-     *  converting code using wrappers to code using reducers. That is:
-     *
-     *      reducer< op_max_index<int, int> > r;
-     *      r->calc_max(i, a);   // *r returns the view
-     *                           // calc_max is a view member function
-     *
-     *      reducer_max_index<int, int> w;
-     *      w->calc_max(i, a);   // *w returns the wrapper
-     *                           // calc_max is a wrapper member function that
-     *                           // calls the corresponding view function
-     */
-    //@{
-    reducer_max_index &operator*() { return *this; }
-    reducer_max_index const &operator*() const { return *this; }
-
-    reducer_max_index *operator->() { return this; }
-    reducer_max_index const *operator->() const { return this; }
-    //@}
-
-    /** @name Upcast
-     *  @details In Intel Cilk Plus library 0.9, reducers were always
-     * cache-aligned. In library 1.0, reducer cache alignment is optional. By
-     * default, reducers are unaligned (i.e., just naturally aligned), but
-     * legacy wrappers inherit from cache-aligned reducers for binary
-     * compatibility.
-     *
-     *  This means that a wrapper will automatically be upcast to its aligned
-     *  reducer base class. The following conversion operators provide
-     *  pseudo-upcasts to the corresponding unaligned reducer class.
-     */
-    //@{
-    operator reducer<op_max_index<Index, Type, Compare, false>> &() {
-        return *reinterpret_cast<
-            reducer<op_max_index<Index, Type, Compare, false>> *>(this);
-    }
-
-    operator const reducer<op_max_index<Index, Type, Compare, false>> &()
-        const {
-        return *reinterpret_cast<
-            const reducer<op_max_index<Index, Type, Compare, false>> *>(this);
-    }
-    //@}
-};
-
-/** Deprecated minimum with index reducer wrapper class.
- *
- *  reducer_min_index is the same as @ref reducer<@ref op_min_index>, except
- *  that reducer_min_index is a proxy for the contained view, so that
- *  accumulator variable update operations can be applied directly to the
- *  reducer. For example, a value is minimized with  a `reducer<%op_min_index>`
- *  with `r->calc_min(i, a)`, but a value can be minimized with a
- *  `%reducer_min` with `r.calc_min(i, aa)`.
- *
- *
- *  @deprecated Users are strongly encouraged to use `reducer<monoid>`
- *              reducers rather than the old wrappers like reducer_min.
- *              The `reducer<monoid>` reducers show the reducer/monoid/view
- *              architecture more clearly, are more consistent in their
- *              implementation, and present a simpler model for new
- *              user-implemented reducers.
- *
- *  @note   Implicit conversions are provided between `%reducer_min_index`
- *          and `reducer<%op_min_index>`. This allows incremental code
- *          conversion: old code that used `%reducer_min_index` can pass a
- *          `%reducer_min_index` to a converted function that now expects a
- *          pointer or reference to a `reducer<%op_min_index>`, and vice
- *          versa. **But see  @ref redminmax_compatibility.**
- *
- *  @tparam Index   The index type of the reducer.
- *  @tparam Type    The value type of the reducer.
- *  @tparam Compare The "less than" comparator type for the reducer.
- *
- *  @see op_min_index
- *  @see op_min_index_view
- *  @see reducer
- *  @see ReducersMinMax
- *  @ingroup ReducersMinMaxMinIndex
- */
-template <typename Index, typename Type, typename Compare = std::less<Type>>
-class reducer_min_index
-    : public reducer<op_min_index<Index, Type, Compare, true>> {
-    __CILKRTS_STATIC_ASSERT(
-        ::cilk::internal::class_is_empty<
-            typename ::cilk::internal::binary_functor<Compare>::type>::value,
-        "cilk::reducer_min_index<Type, Compare> only works with "
-        "an empty Compare class");
-    typedef reducer<op_min_index<Index, Type, Compare, true>> base;
-
-  public:
-    /// Type of data in a reducer_min_index.
-    typedef Type basic_value_type;
-
-    /// The view type for the reducer.
-    typedef typename base::view_type view_type;
-
-    /// The view type for the reducer.
-    typedef typename base::view_type View;
-
-    /// The monoid type for the reducer.
-    typedef typename base::monoid_type monoid_type;
-
-    /// The monoid type for the reducer.
-    typedef typename base::monoid_type Monoid;
-
-    /// The view's rhs proxy type.
-    typedef min_max_internal::rhs_proxy<View> rhs_proxy;
-
-    using base::view;
-
-    /** @name Constructors
-     */
-    //@{
-
-    /// Constructs the wrapper in its identity state (`!is_set()`).
-    reducer_min_index() : base() {}
-
-    /// Construct with a specified initial index and value.
-    reducer_min_index(const Index &initial_index, const Type &initial_value)
-        : base(initial_index, initial_value) {}
-
-    /// Constructs the wrapper with a specified comparator.
-    explicit reducer_min_index(const Compare &comp) : base(comp) {}
-
-    /// Constructs the wrapper with a specified initial index, value,
-    /// and comparator.
-    reducer_min_index(const Index &initial_index, const Type &initial_value,
-                      const Compare &comp)
-        : base(initial_index, initial_value, comp) {}
-
-    //@}
-
-    /** @name Set / Get
-     */
-    //@{
-
-    /// Sets the index and value of this object.
-    void set_value(const Index &index, const Type &value) {
-        base::set_value(std::make_pair(index, value));
-    }
-
-    /// Returns the minimum value.
-    const Type &get_value() const { return view().get_reference(); }
-
-    /// Returns the minimum index.
-    const Index &get_index() const { return view().get_index_reference(); }
-
-    /// Returns a const reference to value data member in the view.
-    const Type &get_reference() const { return view().get_reference(); }
-
-    /// Returns a const reference to index data member in the view.
-    const Index &get_index_reference() const {
-        return view().get_index_reference();
-    }
-
-    //@}
-
-    /** @name Forwarded functions
-     *  @details Functions that update the contained accumulator variable are
-     *  simply forwarded to the contained @ref op_min_view. */
-    //@{
-
-    /// @copydoc cilk_lib_1_1::min_max_internal::view_content::is_set() const
-    bool is_set() const { return view().is_set(); }
-
-    /// @copydoc op_min_index_view::calc_min(const Index&, const Type&)
-    reducer_min_index &calc_min(const Index &i, const Type &x) {
-        view().calc_min(i, x);
-        return *this;
-    }
-
-    /// @copydoc op_min_view::operator=(const
-    /// min_max_internal::rhs_proxy<op_min_view>&)
-    reducer_min_index &operator=(const rhs_proxy &rhs) {
-        view() = rhs;
-        return *this;
-    }
-
-    //@}
-
-    /// @name Dereference
-    /** Dereferencing a wrapper is a no-op. It simply returns the wrapper.
-     *  Combined with the rule that a wrapper forwards view operations to the
-     *  view, this means that view operations can be written the same way on
-     *  reducers and wrappers, which is convenient for incrementally
-     *  converting code using wrappers to code using reducers. That is:
-     *
-     *      reducer< op_min_index<int, int> > r;
-     *      r->calc_min(i, a);   // *r returns the view
-     *                           // calc_min is a view member function
-     *
-     *      reducer_min_index<int, int> w;
-     *      w->calc_min(i, a);   // *w returns the wrapper
-     *                           // calc_min is a wrapper member function that
-     *                           // calls the corresponding view function
-     */
-    //@{
-    reducer_min_index &operator*() { return *this; }
-    reducer_min_index const &operator*() const { return *this; }
-
-    reducer_min_index *operator->() { return this; }
-    reducer_min_index const *operator->() const { return this; }
-    //@}
-
-    /** @name Upcast
-     *  @details In Intel Cilk Plus library 0.9, reducers were always
-     * cache-aligned. In library 1.0, reducer cache alignment is optional. By
-     * default, reducers are unaligned (i.e., just naturally aligned), but
-     * legacy wrappers inherit from cache-aligned reducers for binary
-     * compatibility.
-     *
-     *  This means that a wrapper will automatically be upcast to its aligned
-     *  reducer base class. The following conversion operators provide
-     *  pseudo-upcasts to the corresponding unaligned reducer class.
-     */
-    //@{
-    operator reducer<op_min_index<Index, Type, Compare, false>> &() {
-        return *reinterpret_cast<
-            reducer<op_min_index<Index, Type, Compare, false>> *>(this);
-    }
-
-    operator const reducer<op_min_index<Index, Type, Compare, false>> &()
-        const {
-        return *reinterpret_cast<
-            const reducer<op_min_index<Index, Type, Compare, false>> *>(this);
-    }
-    //@}
-};
-
-#ifndef CILK_LIBRARY_0_9_REDUCER_MINMAX
-} // namespace cilk_lib_1_1
-using namespace cilk_lib_1_1;
-#endif
-
-/// @cond internal
-/** Metafunction specialization for reducer conversion.
- *
- *  These specializations of the @ref legacy_reducer_downcast template class
- *  defined in reducer.h causes each `reducer< op_xxxx<Type> >` classes to have
- *  an `operator reducer_xxxx<Type>& ()` conversion operator that statically
- *  downcasts the `reducer<op_xxxx>` to the corresponding `reducer_xxxx` type.
- *  (The reverse conversion, from `reducer_xxxx` to `reducer<op_xxxx>`, is just
- *  an upcast, which is provided for free by the language.)
- */
-template <typename Type, typename Compare, bool Align>
-struct legacy_reducer_downcast<reducer<op_max<Type, Compare, Align>>> {
-    typedef reducer_max<Type> type;
-};
-
-template <typename Type, typename Compare, bool Align>
-struct legacy_reducer_downcast<reducer<op_min<Type, Compare, Align>>> {
-    typedef reducer_min<Type> type;
-};
-
-template <typename Index, typename Type, typename Compare, bool Align>
-struct legacy_reducer_downcast<
-    reducer<op_max_index<Index, Type, Compare, Align>>> {
-    typedef reducer_max_index<Index, Type> type;
-};
-
-template <typename Index, typename Type, typename Compare, bool Align>
-struct legacy_reducer_downcast<
-    reducer<op_min_index<Index, Type, Compare, Align>>> {
-    typedef reducer_min_index<Index, Type> type;
-};
-/// @endcond
-
-} // namespace cilk
-
-#endif // __cplusplus
-
-/** @name C language reducer macros
- *
- *  These macros are used to declare and work with numeric minimum and maximum
- * reducers in C code.
- *
- *  @see @ref page_reducers_in_c
- */
-//@{
-
-#ifdef CILK_C_DEFINE_REDUCERS
-
-/* Integer min/max constants */
-#include <limits.h>
-
-/* Wchar_t min/max constants */
-#if defined(_MSC_VER) || defined(__ANDROID__)
-#include <wchar.h>
-#else
-#include <stdint.h>
-#endif
-
-/* Floating-point min/max constants */
-#include <math.h>
-#ifndef HUGE_VALF
-static const unsigned int __huge_valf[] = {0x7f800000};
-#define HUGE_VALF (*((const float *)__huge_valf))
-#endif
-
-#ifndef HUGE_VALL
-static const unsigned int __huge_vall[] = {0, 0, 0x00007f80, 0};
-#define HUGE_VALL (*((const long double *)__huge_vall))
-#endif
-
-#endif
-
-/** Declares max reducer type name.
- *
- *  This macro expands into the identifier which is the name of the max reducer
- *  type for a specified numeric type.
- *
- *  @param  tn  The @ref reducers_c_type_names "numeric type name" specifying
- * the type of the reducer.
- *
- *  @see @ref reducers_c_predefined
- */
-#define CILK_C_REDUCER_MAX_TYPE(tn) __CILKRTS_MKIDENT(cilk_c_reducer_max_, tn)
-
-/** Declares a max reducer object.
- *
- *  This macro expands into a declaration of a max reducer object for a
- * specified numeric type. For example:
- *
- *      CILK_C_REDUCER_MAX(my_reducer, double, -DBL_MAX);
- *
- *  @param  obj The variable name to be used for the declared reducer object.
- *  @param  tn  The @ref reducers_c_type_names "numeric type name" specifying
- * the type of the reducer.
- *  @param  v   The initial value for the reducer. (A value which can be
- * assigned to the numeric type represented by @a tn.)
- *
- *  @see @ref reducers_c_predefined
- */
-#define CILK_C_REDUCER_MAX(obj, tn, v)                                         \
-    CILK_C_REDUCER_MAX_TYPE(tn)                                                \
-    obj = CILK_C_INIT_REDUCER(                                                 \
-        _Typeof(obj.value), __CILKRTS_MKIDENT(cilk_c_reducer_max_reduce_, tn), \
-        __CILKRTS_MKIDENT(cilk_c_reducer_max_identity_, tn),                   \
-        0, v)
-
-/** Maximizes with a value.
- *
- *  `CILK_C_REDUCER_MAX_CALC(reducer, v)` sets the current view of the
- *  reducer to the max of its previous value and a specified new value.
- *  This is equivalent to
- *
- *      REDUCER_VIEW(reducer) = max(REDUCER_VIEW(reducer), v)
- *
- *  @param reducer  The reducer whose contained value is to be updated.
- *  @param v        The value that it is to be maximized with.
- */
-#define CILK_C_REDUCER_MAX_CALC(reducer, v)                                    \
-    do {                                                                       \
-        _Typeof((reducer).value) *view = &(REDUCER_VIEW(reducer));             \
-        _Typeof(v) __value = (v);                                              \
-        if (*view < __value) {                                                 \
-            *view = __value;                                                   \
-        }                                                                      \
-    } while (0)
-
-/// @cond internal
-
-/** Declares the max reducer functions for a numeric type.
- *
- *  This macro expands into external function declarations for functions which
- * implement the reducer functionality for the max reducer type for a specified
- * numeric type.
- *
- *  @param  t   The value type of the reducer.
- *  @param  tn  The value "type name" identifier, used to construct the reducer
- * type name, function names, etc.
- */
-#define CILK_C_REDUCER_MAX_DECLARATION(t, tn, id)                              \
-    typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_MAX_TYPE(tn);             \
-    __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_max, tn, l, r);            \
-    __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_max, tn);
-
-/** Defines the max reducer functions for a numeric type.
- *
- *  This macro expands into function definitions for functions which implement
- * the reducer functionality for the max reducer type for a specified numeric
- * type.
- *
- *  @param  t   The value type of the reducer.
- *  @param  tn  The value "type name" identifier, used to construct the reducer
- * type name, function names, etc.
- */
-#define CILK_C_REDUCER_MAX_DEFINITION(t, tn, id)                               \
-    typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_MAX_TYPE(tn);             \
-    __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_max, tn, l, r) {           \
-        if (*(t *)l < *(t *)r)                                                 \
-            *(t *)l = *(t *)r;                                                 \
-    }                                                                          \
-    __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_max, tn) { *(t *)v = id; }
-
-//@{
-/** @def CILK_C_REDUCER_MAX_INSTANCE
- *  @brief Declare or define implementation functions for a reducer type.
- *
- *  In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS`
- * will be defined, and this macro will generate reducer implementation
- * functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` will be undefined, and
- * this macro will expand into external declarations for the functions.
- */
-#ifdef CILK_C_DEFINE_REDUCERS
-#define CILK_C_REDUCER_MAX_INSTANCE(t, tn, id)                                 \
-    CILK_C_REDUCER_MAX_DEFINITION(t, tn, id)
-#else
-#define CILK_C_REDUCER_MAX_INSTANCE(t, tn, id)                                 \
-    CILK_C_REDUCER_MAX_DECLARATION(t, tn, id)
-#endif
-//@}
-
-/*  Declare or define an instance of the reducer type and its functions for each
- *  numeric type.
- */
-#ifdef __cplusplus
-extern "C" {
-#endif
-CILK_C_REDUCER_MAX_INSTANCE(char, char, CHAR_MIN)
-CILK_C_REDUCER_MAX_INSTANCE(unsigned char, uchar, 0)
-CILK_C_REDUCER_MAX_INSTANCE(signed char, schar, SCHAR_MIN)
-CILK_C_REDUCER_MAX_INSTANCE(wchar_t, wchar_t, WCHAR_MIN)
-CILK_C_REDUCER_MAX_INSTANCE(short, short, SHRT_MIN)
-CILK_C_REDUCER_MAX_INSTANCE(unsigned short, ushort, 0)
-CILK_C_REDUCER_MAX_INSTANCE(int, int, INT_MIN)
-CILK_C_REDUCER_MAX_INSTANCE(unsigned int, uint, 0)
-CILK_C_REDUCER_MAX_INSTANCE(unsigned int, unsigned, 0) // alternate name
-CILK_C_REDUCER_MAX_INSTANCE(long, long, LONG_MIN)
-CILK_C_REDUCER_MAX_INSTANCE(unsigned long, ulong, 0)
-CILK_C_REDUCER_MAX_INSTANCE(long long, longlong, LLONG_MIN)
-CILK_C_REDUCER_MAX_INSTANCE(unsigned long long, ulonglong, 0)
-CILK_C_REDUCER_MAX_INSTANCE(float, float, -HUGE_VALF)
-CILK_C_REDUCER_MAX_INSTANCE(double, double, -HUGE_VAL)
-CILK_C_REDUCER_MAX_INSTANCE(long double, longdouble, -HUGE_VALL)
-#ifdef __cplusplus
-} /* end extern "C" */
-#endif
-
-/// @endcond
-
-/** Max_index reducer type name.
- *
- *  This macro expands into the identifier which is the name of the max_index
- * reducer type for a specified numeric type.
- *
- *  @param  tn  The @ref reducers_c_type_names "numeric type name" specifying
- * the type of the reducer.
- *
- *  @see @ref reducers_c_predefined
- */
-#define CILK_C_REDUCER_MAX_INDEX_TYPE(tn)                                      \
-    __CILKRTS_MKIDENT(cilk_c_reducer_max_index_, tn)
-
-/** Declares an op_max_index reducer object.
- *
- *  This macro expands into a declaration of a max_index reducer object for a
- * specified numeric type. For example:
- *
- *      CILK_C_REDUCER_MAX_INDEX(my_reducer, double, -DBL_MAX_INDEX);
- *
- *  @param  obj The variable name to be used for the declared reducer object.
- *  @param  tn  The @ref reducers_c_type_names "numeric type name" specifying
- * the type of the reducer.
- *  @param  v   The initial value for the reducer. (A value which can be
- * assigned to the numeric type represented by @a tn.)
- *
- *  @see @ref reducers_c_predefined
- */
-#define CILK_C_REDUCER_MAX_INDEX(obj, tn, v)                                   \
-    CILK_C_REDUCER_MAX_INDEX_TYPE(tn)                                          \
-    obj = CILK_C_INIT_REDUCER(                                                 \
-        _Typeof(obj.value),                                                    \
-        __CILKRTS_MKIDENT(cilk_c_reducer_max_index_reduce_, tn),               \
-        __CILKRTS_MKIDENT(cilk_c_reducer_max_index_identity_, tn),             \
-        0, {0, v})
-
-/** Maximizes with a value.
- *
- *  `CILK_C_REDUCER_MAX_INDEX_CALC(reducer, i, v)` sets the current view of the
- *  reducer to the max of its previous value and a specified new value.
- *  This is equivalent to
- *
- *      REDUCER_VIEW(reducer) = max_index(REDUCER_VIEW(reducer), v)
- *
- *  If the value of the reducer is changed to @a v, then the index of the
- * reducer is changed to @a i.
- *
- *  @param reducer  The reducer whose contained value and index are to be
- * updated.
- *  @param i        The index associated with the new value.
- *  @param v        The value that it is to be maximized with.
- */
-#define CILK_C_REDUCER_MAX_INDEX_CALC(reducer, i, v)                           \
-    do {                                                                       \
-        _Typeof((reducer).value) *view = &(REDUCER_VIEW(reducer));             \
-        _Typeof(v) __value = (v);                                              \
-        if (view->value < __value) {                                           \
-            view->index = (i);                                                 \
-            view->value = __value;                                             \
-        }                                                                      \
-    } while (0)
-
-/// @cond internal
-
-/** Declares the max_index view type.
- *
- *  The view of a max_index reducer is a structure containing both the
- *  maximum value for the reducer and the index that was associated with
- *  that value in the sequence of input values.
- */
-#define CILK_C_REDUCER_MAX_INDEX_VIEW(t, tn)                                   \
-    typedef struct {                                                           \
-        ptrdiff_t index;                                                       \
-        t value;                                                               \
-    } __CILKRTS_MKIDENT(cilk_c_reducer_max_index_view_, tn)
-
-/** Declares the max_index reducer functions for a numeric type.
- *
- *  This macro expands into external function declarations for functions which
- * implement the reducer functionality for the max_index reducer type for a
- * specified numeric type.
- *
- *  @param  t   The value type of the reducer.
- *  @param  tn  The value "type name" identifier, used to construct the reducer
- * type name, function names, etc.
- */
-#define CILK_C_REDUCER_MAX_INDEX_DECLARATION(t, tn, id)                        \
-    CILK_C_REDUCER_MAX_INDEX_VIEW(t, tn);                                      \
-    typedef CILK_C_DECLARE_REDUCER(                                            \
-        __CILKRTS_MKIDENT(cilk_c_reducer_max_index_view_, tn))                 \
-        CILK_C_REDUCER_MAX_INDEX_TYPE(tn);                                     \
-    __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_max_index, tn, l, r);      \
-    __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_max_index, tn);
-
-/** Defines the max_index reducer functions for a numeric type.
- *
- *  This macro expands into function definitions for functions which implement
- * the reducer functionality for the max_index reducer type for a specified
- * numeric type.
- *
- *  @param  t   The value type of the reducer.
- *  @param  tn  The value "type name" identifier, used to construct the reducer
- * type name, function names, etc.
- */
-#define CILK_C_REDUCER_MAX_INDEX_DEFINITION(t, tn, id)                         \
-    CILK_C_REDUCER_MAX_INDEX_VIEW(t, tn);                                      \
-    typedef CILK_C_DECLARE_REDUCER(                                            \
-        __CILKRTS_MKIDENT(cilk_c_reducer_max_index_view_, tn))                 \
-        CILK_C_REDUCER_MAX_INDEX_TYPE(tn);                                     \
-    __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_max_index, tn, l, r) {     \
-        typedef __CILKRTS_MKIDENT(cilk_c_reducer_max_index_view_, tn) view_t;  \
-        if (((view_t *)l)->value < ((view_t *)r)->value)                       \
-            *(view_t *)l = *(view_t *)r;                                       \
-    }                                                                          \
-    __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_max_index, tn) {         \
-        typedef __CILKRTS_MKIDENT(cilk_c_reducer_max_index_view_, tn) view_t;  \
-        ((view_t *)v)->index = 0;                                              \
-        ((view_t *)v)->value = id;                                             \
-    }
-
-//@{
-/** @def CILK_C_REDUCER_MAX_INDEX_INSTANCE
- *  @brief Declare or define implementation functions for a reducer type.
- *
- *  In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS`
- * will be defined, and this macro will generate reducer implementation
- * functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` will be undefined, and
- * this macro will expand into external declarations for the functions.
- */
-#ifdef CILK_C_DEFINE_REDUCERS
-#define CILK_C_REDUCER_MAX_INDEX_INSTANCE(t, tn, id)                           \
-    CILK_C_REDUCER_MAX_INDEX_DEFINITION(t, tn, id)
-#else
-#define CILK_C_REDUCER_MAX_INDEX_INSTANCE(t, tn, id)                           \
-    CILK_C_REDUCER_MAX_INDEX_DECLARATION(t, tn, id)
-#endif
-//@}
-
-/*  Declares or defines an instance of the reducer type and its functions for
- * each numeric type.
- */
-#ifdef __cplusplus
-extern "C" {
-#endif
-CILK_C_REDUCER_MAX_INDEX_INSTANCE(char, char, CHAR_MIN)
-CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned char, uchar, 0)
-CILK_C_REDUCER_MAX_INDEX_INSTANCE(signed char, schar, SCHAR_MIN)
-CILK_C_REDUCER_MAX_INDEX_INSTANCE(wchar_t, wchar_t, WCHAR_MIN)
-CILK_C_REDUCER_MAX_INDEX_INSTANCE(short, short, SHRT_MIN)
-CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned short, ushort, 0)
-CILK_C_REDUCER_MAX_INDEX_INSTANCE(int, int, INT_MIN)
-CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned int, uint, 0)
-CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned int, unsigned, 0) // alternate name
-CILK_C_REDUCER_MAX_INDEX_INSTANCE(long, long, LONG_MIN)
-CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned long, ulong, 0)
-CILK_C_REDUCER_MAX_INDEX_INSTANCE(long long, longlong, LLONG_MIN)
-CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned long long, ulonglong, 0)
-CILK_C_REDUCER_MAX_INDEX_INSTANCE(float, float, -HUGE_VALF)
-CILK_C_REDUCER_MAX_INDEX_INSTANCE(double, double, -HUGE_VAL)
-CILK_C_REDUCER_MAX_INDEX_INSTANCE(long double, longdouble, -HUGE_VALL)
-#ifdef __cplusplus
-} /* end extern "C" */
-#endif
-
-/// @endcond
-
-/** Declares min reducer type name.
- *
- *  This macro expands into the identifier which is the name of the min reducer
- *  type for a specified numeric type.
- *
- *  @param  tn  The @ref reducers_c_type_names "numeric type name" specifying
- * the type of the reducer.
- *
- *  @see @ref reducers_c_predefined
- */
-#define CILK_C_REDUCER_MIN_TYPE(tn) __CILKRTS_MKIDENT(cilk_c_reducer_min_, tn)
-
-/** Declares a min reducer object.
- *
- *  This macro expands into a declaration of a min reducer object for a
- * specified numeric type. For example:
- *
- *      CILK_C_REDUCER_MIN(my_reducer, double, DBL_MAX);
- *
- *  @param  obj The variable name to be used for the declared reducer object.
- *  @param  tn  The @ref reducers_c_type_names "numeric type name" specifying
- * the type of the reducer.
- *  @param  v   The initial value for the reducer. (A value which can be
- * assigned to the numeric type represented by @a tn.)
- *
- *  @see @ref reducers_c_predefined
- */
-#define CILK_C_REDUCER_MIN(obj, tn, v)                                         \
-    CILK_C_REDUCER_MIN_TYPE(tn)                                                \
-    obj = CILK_C_INIT_REDUCER(                                                 \
-        _Typeof(obj.value), __CILKRTS_MKIDENT(cilk_c_reducer_min_reduce_, tn), \
-        __CILKRTS_MKIDENT(cilk_c_reducer_min_identity_, tn),                   \
-        0, v)
-
-/** Minimizes with a value.
- *
- *  `CILK_C_REDUCER_MIN_CALC(reducer, v)` sets the current view of the
- *  reducer to the min of its previous value and a specified new value.
- *  This is equivalent to
- *
- *      REDUCER_VIEW(reducer) = min(REDUCER_VIEW(reducer), v)
- *
- *  @param reducer  The reducer whose contained value is to be updated.
- *  @param v        The value that it is to be minimized with.
- */
-#define CILK_C_REDUCER_MIN_CALC(reducer, v)                                    \
-    do {                                                                       \
-        _Typeof((reducer).value) *view = &(REDUCER_VIEW(reducer));             \
-        _Typeof(v) __value = (v);                                              \
-        if (*view > __value) {                                                 \
-            *view = __value;                                                   \
-        }                                                                      \
-    } while (0)
-
-/// @cond internal
-
-/** Declares the min reducer functions for a numeric type.
- *
- *  This macro expands into external function declarations for functions which
- * implement the reducer functionality for the min reducer type for a specified
- * numeric type.
- *
- *  @param  t   The value type of the reducer.
- *  @param  tn  The value "type name" identifier, used to construct the reducer
- * type name, function names, etc.
- */
-#define CILK_C_REDUCER_MIN_DECLARATION(t, tn, id)                              \
-    typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_MIN_TYPE(tn);             \
-    __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_min, tn, l, r);            \
-    __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_min, tn);
-
-/** Defines the min reducer functions for a numeric type.
- *
- *  This macro expands into function definitions for functions which implement
- * the reducer functionality for the min reducer type for a specified numeric
- * type.
- *
- *  @param  t   The value type of the reducer.
- *  @param  tn  The value "type name" identifier, used to construct the reducer
- * type name, function names, etc.
- */
-#define CILK_C_REDUCER_MIN_DEFINITION(t, tn, id)                               \
-    typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_MIN_TYPE(tn);             \
-    __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_min, tn, l, r) {           \
-        if (*(t *)l > *(t *)r)                                                 \
-            *(t *)l = *(t *)r;                                                 \
-    }                                                                          \
-    __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_min, tn) { *(t *)v = id; }
-
-//@{
-/** @def CILK_C_REDUCER_MIN_INSTANCE
- *  @brief Declare or define implementation functions for a reducer type.
- *
- *  In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS`
- * will be defined, and this macro will generate reducer implementation
- * functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` will be undefined, and
- * this macro will expand into external declarations for the functions.
- */
-#ifdef CILK_C_DEFINE_REDUCERS
-#define CILK_C_REDUCER_MIN_INSTANCE(t, tn, id)                                 \
-    CILK_C_REDUCER_MIN_DEFINITION(t, tn, id)
-#else
-#define CILK_C_REDUCER_MIN_INSTANCE(t, tn, id)                                 \
-    CILK_C_REDUCER_MIN_DECLARATION(t, tn, id)
-#endif
-//@}
-
-/*  Declares or defines an instance of the reducer type and its functions for
- * each numeric type.
- */
-#ifdef __cplusplus
-extern "C" {
-#endif
-CILK_C_REDUCER_MIN_INSTANCE(char, char, CHAR_MAX)
-CILK_C_REDUCER_MIN_INSTANCE(unsigned char, uchar, CHAR_MAX)
-CILK_C_REDUCER_MIN_INSTANCE(signed char, schar, SCHAR_MAX)
-CILK_C_REDUCER_MIN_INSTANCE(wchar_t, wchar_t, WCHAR_MAX)
-CILK_C_REDUCER_MIN_INSTANCE(short, short, SHRT_MAX)
-CILK_C_REDUCER_MIN_INSTANCE(unsigned short, ushort, USHRT_MAX)
-CILK_C_REDUCER_MIN_INSTANCE(int, int, INT_MAX)
-CILK_C_REDUCER_MIN_INSTANCE(unsigned int, uint, UINT_MAX)
-CILK_C_REDUCER_MIN_INSTANCE(unsigned int, unsigned, UINT_MAX) // alternate name
-CILK_C_REDUCER_MIN_INSTANCE(long, long, LONG_MAX)
-CILK_C_REDUCER_MIN_INSTANCE(unsigned long, ulong, ULONG_MAX)
-CILK_C_REDUCER_MIN_INSTANCE(long long, longlong, LLONG_MAX)
-CILK_C_REDUCER_MIN_INSTANCE(unsigned long long, ulonglong, ULLONG_MAX)
-CILK_C_REDUCER_MIN_INSTANCE(float, float, HUGE_VALF)
-CILK_C_REDUCER_MIN_INSTANCE(double, double, HUGE_VAL)
-CILK_C_REDUCER_MIN_INSTANCE(long double, longdouble, HUGE_VALL)
-#ifdef __cplusplus
-} /* end extern "C" */
-#endif
-
-/// @endcond
-
-/** Declares `min_index` reducer type name.
- *
- *  This macro expands into the identifier which is the name of the min_index
- * reducer type for a specified numeric type.
- *
- *  @param  tn  The @ref reducers_c_type_names "numeric type name" specifying
- * the type of the reducer.
- *
- *  @see @ref reducers_c_predefined
- */
-#define CILK_C_REDUCER_MIN_INDEX_TYPE(tn)                                      \
-    __CILKRTS_MKIDENT(cilk_c_reducer_min_index_, tn)
-
-/** Declares an op_min_index reducer object.
- *
- *  This macro expands into a declaration of a min_index reducer object for a
- * specified numeric type. For example:
- *
- *      CILK_C_REDUCER_MIN_INDEX(my_reducer, double, -DBL_MIN_INDEX);
- *
- *  @param  obj The variable name to be used for the declared reducer object.
- *  @param  tn  The @ref reducers_c_type_names "numeric type name" specifying
- * the type of the reducer.
- *  @param  v   The initial value for the reducer. (A value which can be
- * assigned to the numeric type represented by @a tn.)
- *
- *  @see @ref reducers_c_predefined
- */
-#define CILK_C_REDUCER_MIN_INDEX(obj, tn, v)                                   \
-    CILK_C_REDUCER_MIN_INDEX_TYPE(tn)                                          \
-    obj = CILK_C_INIT_REDUCER(                                                 \
-        _Typeof(obj.value),                                                    \
-        __CILKRTS_MKIDENT(cilk_c_reducer_min_index_reduce_, tn),               \
-        __CILKRTS_MKIDENT(cilk_c_reducer_min_index_identity_, tn),             \
-        0, {0, v})
-
-/** Minimizes with a value.
- *
- *  `CILK_C_REDUCER_MIN_INDEX_CALC(reducer, i, v)` sets the current view of the
- *  reducer to the min of its previous value and a specified new value.
- *  This is equivalent to
- *
- *      REDUCER_VIEW(reducer) = min_index(REDUCER_VIEW(reducer), v)
- *
- *  If the value of the reducer is changed to @a v, then the index of the
- * reducer is changed to @a i.
- *
- *  @param reducer  The reducer whose contained value and index are to be
- * updated.
- *  @param i        The index associated with the new value.
- *  @param v        The value that it is to be minimized with.
- */
-#define CILK_C_REDUCER_MIN_INDEX_CALC(reducer, i, v)                           \
-    do {                                                                       \
-        _Typeof((reducer).value) *view = &(REDUCER_VIEW(reducer));             \
-        _Typeof(v) __value = (v);                                              \
-        if (view->value > __value) {                                           \
-            view->index = (i);                                                 \
-            view->value = __value;                                             \
-        }                                                                      \
-    } while (0)
-
-/// @cond internal
-
-/** Declares the min_index view type.
- *
- *  The view of a min_index reducer is a structure containing both the
- *  minimum value for the reducer and the index that was associated with
- *  that value in the sequence of input values.
- */
-#define CILK_C_REDUCER_MIN_INDEX_VIEW(t, tn)                                   \
-    typedef struct {                                                           \
-        ptrdiff_t index;                                                       \
-        t value;                                                               \
-    } __CILKRTS_MKIDENT(cilk_c_reducer_min_index_view_, tn)
-
-/** Declares the min_index reducer functions for a numeric type.
- *
- *  This macro expands into external function declarations for functions which
- * implement the reducer functionality for the min_index reducer type for a
- * specified numeric type.
- *
- *  @param  t   The value type of the reducer.
- *  @param  tn  The value "type name" identifier, used to construct the reducer
- * type name, function names, etc.
- */
-#define CILK_C_REDUCER_MIN_INDEX_DECLARATION(t, tn, id)                        \
-    CILK_C_REDUCER_MIN_INDEX_VIEW(t, tn);                                      \
-    typedef CILK_C_DECLARE_REDUCER(                                            \
-        __CILKRTS_MKIDENT(cilk_c_reducer_min_index_view_, tn))                 \
-        CILK_C_REDUCER_MIN_INDEX_TYPE(tn);                                     \
-    __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_min_index, tn, l, r);      \
-    __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_min_index, tn);
-
-/** Defines the min_index reducer functions for a numeric type.
- *
- *  This macro expands into function definitions for functions which implement
- * the reducer functionality for the min_index reducer type for a specified
- * numeric type.
- *
- *  @param  t   The value type of the reducer.
- *  @param  tn  The value "type name" identifier, used to construct the reducer
- * type name, function names, etc.
- */
-#define CILK_C_REDUCER_MIN_INDEX_DEFINITION(t, tn, id)                         \
-    CILK_C_REDUCER_MIN_INDEX_VIEW(t, tn);                                      \
-    typedef CILK_C_DECLARE_REDUCER(                                            \
-        __CILKRTS_MKIDENT(cilk_c_reducer_min_index_view_, tn))                 \
-        CILK_C_REDUCER_MIN_INDEX_TYPE(tn);                                     \
-    __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_min_index, tn, l, r) {     \
-        typedef __CILKRTS_MKIDENT(cilk_c_reducer_min_index_view_, tn) view_t;  \
-        if (((view_t *)l)->value > ((view_t *)r)->value)                       \
-            *(view_t *)l = *(view_t *)r;                                       \
-    }                                                                          \
-    __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_min_index, tn) {         \
-        typedef __CILKRTS_MKIDENT(cilk_c_reducer_min_index_view_, tn) view_t;  \
-        ((view_t *)v)->index = 0;                                              \
-        ((view_t *)v)->value = id;                                             \
-    }
-
-//@{
-/** @def CILK_C_REDUCER_MIN_INDEX_INSTANCE
- *  @brief Declares or defines implementation functions for a reducer type.
- *
- *  In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS`
- * will be defined, and this macro will generate reducer implementation
- * functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` will be undefined, and
- * this macro will expand into external declarations for the functions.
- */
-#ifdef CILK_C_DEFINE_REDUCERS
-#define CILK_C_REDUCER_MIN_INDEX_INSTANCE(t, tn, id)                           \
-    CILK_C_REDUCER_MIN_INDEX_DEFINITION(t, tn, id)
-#else
-#define CILK_C_REDUCER_MIN_INDEX_INSTANCE(t, tn, id)                           \
-    CILK_C_REDUCER_MIN_INDEX_DECLARATION(t, tn, id)
-#endif
-//@}
-
-/*  Declares or defines an instance of the reducer type and its functions for
- * each numeric type.
- */
-#ifdef __cplusplus
-extern "C" {
-#endif
-CILK_C_REDUCER_MIN_INDEX_INSTANCE(char, char, CHAR_MAX)
-CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned char, uchar, CHAR_MAX)
-CILK_C_REDUCER_MIN_INDEX_INSTANCE(signed char, schar, SCHAR_MAX)
-CILK_C_REDUCER_MIN_INDEX_INSTANCE(wchar_t, wchar_t, WCHAR_MAX)
-CILK_C_REDUCER_MIN_INDEX_INSTANCE(short, short, SHRT_MAX)
-CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned short, ushort, USHRT_MAX)
-CILK_C_REDUCER_MIN_INDEX_INSTANCE(int, int, INT_MAX)
-CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned int, uint, UINT_MAX)
-CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned int, unsigned,
-                                  UINT_MAX) // alternate name
-CILK_C_REDUCER_MIN_INDEX_INSTANCE(long, long, LONG_MAX)
-CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned long, ulong, ULONG_MAX)
-CILK_C_REDUCER_MIN_INDEX_INSTANCE(long long, longlong, LLONG_MAX)
-CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned long long, ulonglong, ULLONG_MAX)
-CILK_C_REDUCER_MIN_INDEX_INSTANCE(float, float, HUGE_VALF)
-CILK_C_REDUCER_MIN_INDEX_INSTANCE(double, double, HUGE_VAL)
-CILK_C_REDUCER_MIN_INDEX_INSTANCE(long double, longdouble, HUGE_VALL)
-#ifdef __cplusplus
-} /* end extern "C" */
-#endif
-
-/// @endcond
-
-//@}
-
-#endif // defined REDUCER_MIN_MAX_H_INCLUDED
diff --git a/include/cilk/reducer_opadd.h b/include/cilk/reducer_opadd.h
deleted file mode 100644
index 832ae356..00000000
--- a/include/cilk/reducer_opadd.h
+++ /dev/null
@@ -1,702 +0,0 @@
-/*  reducer_opadd.h                  -*- C++ -*-
- *
- *  Copyright (C) 2009-2018, Intel Corporation
- *  All rights reserved.
- *  
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *  
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *  
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- *  OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- *  AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
- *  WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *  POSSIBILITY OF SUCH DAMAGE.
- *  
- *  *********************************************************************
- *  
- *  PLEASE NOTE: This file is a downstream copy of a file maintained in
- *  a repository at cilkplus.org. Changes made to this file that are not
- *  submitted through the contribution process detailed at
- *  http://www.cilkplus.org/submit-cilk-contribution will be lost the next
- *  time that a new version is released. Changes only submitted to the
- *  GNU compiler collection or posted to the git repository at
- *  https://bitbucket.org/intelcilkruntime/intel-cilk-runtime are
- *  not tracked.
- *  
- *  We welcome your contributions to this open source project. Thank you
- *  for your assistance in helping us improve Cilk Plus.
- */
-
-/** @file reducer_opadd.h
- *
- *  @brief Defines classes for doing parallel addition reductions.
- *
- *  @ingroup ReducersAdd
- *
- *  @see ReducersAdd
- */
-
-#ifndef REDUCER_OPADD_H_INCLUDED
-#define REDUCER_OPADD_H_INCLUDED
-
-#include <cilk/reducer.h>
-
-/** @defgroup ReducersAdd Addition Reducers
- *
- *  Addition reducers allow the computation of the sum of a set of values in
- *  parallel.
- *
- *  @ingroup Reducers
- *
- *  You should be familiar with @ref pagereducers "Intel(R) Cilk(TM) Plus reducers",
- *  described in file `reducers.md`, and particularly with @ref reducers_using,
- *  before trying to use the information in this file.
- *
- *  @section redopadd_usage Usage Example
- *
- *      cilk::reducer< cilk::op_add<int> > r;
- *      cilk_for (int i = 0; i != N; ++i) {
- *          *r += a[i];
- *      }
- *      return r.get_value();
- *
- *  @section redopadd_monoid The Monoid
- *
- *  @subsection redopadd_monoid_values Value Set
- *
- *  The value set of an addition reducer is the set of values of `Type`, which
- *  is expected to be a builtin numeric type (or something like it, such as
- *  `std::complex`).
- *
- *  @subsection redopadd_monoid_operator Operator
- *
- *  The operator of an addition reducer is the addition operator, defined by
- *  the "`+`" binary operator on `Type`.
- *
- *  @subsection redopadd_monoid_identity Identity
- *
- *  The identity value of the reducer is the numeric value "`0`". This is
- *  expected to be the value of the default constructor `Type()`.
- *
- *  @section redopadd_operations Operations
- *
- *  @subsection redopadd_constructors Constructors
- *
- *      reducer()   // identity
- *      reducer(const Type& value)
- *      reducer(move_in(Type& variable))
- *
- *  @subsection redopadd_get_set Set and Get
- *
- *      r.set_value(const Type& value)
- *      const Type& = r.get_value() const
- *      r.move_in(Type& variable)
- *      r.move_out(Type& variable)
- *
- *  @subsection redopadd_initial Initial Values
- *
- *  If an addition reducer is constructed without an explicit initial value,
- *  then its initial value will be its identity value, as long as `Type`
- *  satisfies the requirements of @ref redopadd_types.
- *
- *  @subsection redopadd_view_ops View Operations
- *
- *      *r += a
- *      *r -= a
- *      ++*r
- *      --*r
- *      (*r)++
- *      (*r)--
- *      *r = *r + a
- *      *r = *r - a
- *      *r = *r ± a1 ± a2 … ± an
- *
- *  The post-increment and post-decrement operations do not return a value. (If
- *  they did, they would expose the value contained in the view, which is
- *  non-deterministic in the middle of a reduction.)
- *
- *  Note that subtraction operations are allowed on an addition reducer because
- *  subtraction is equivalent to addition with a negated operand. It is true
- *  that `(x - y) - z` is not equivalent to `x - (y - z)`, but
- *  `(x + (-y)) + (-z)` _is_ equivalent to `x + ((-y) + (-z))`.
- *
- *  @section redopadd_floating_point Issues with Floating-Point Types
- *
- *  Because of precision and round-off issues, floating-point addition is not
- *  really associative. For example, `(1e30 + -1e30) + 1 == 1`, but
- *  `1e30 + (-1e30 + 1) == 0`.
- *
- *  In many cases, this won't matter, but computations which have been
- *  carefully ordered to control round-off errors may not deal well with
- *  being reassociated. In general, you should be sure to understand the
- *  floating-point behavior of your program before doing any transformation
- *  that will reassociate its computations.
- *
- *  @section redopadd_types Type and Operator Requirements
- *
- *  `Type` must be `Copy Constructible`, `Default Constructible`, and
- *  `Assignable`.
- *
- *  The operator "`+=`" must be defined on `Type`, with `x += a` having the
- *  same meaning as `x = x + a`. In addition, if the code uses the "`-=`",
- *  pre-increment, post-increment, pre-decrement, or post-decrement operators,
- *  then the corresponding operators must be defined on `Type`.
- *
- *  The expression `Type()` must be a valid expression which yields the
- *  identity value (the value of `Type` whose numeric value is zero).
- *
- *  @section redopadd_in_c Addition Reducers in C
- *
- *  The @ref CILK_C_REDUCER_OPADD and @ref CILK_C_REDUCER_OPADD_TYPE macros can
- *  be used to do addition reductions in C. For example:
- *
- *      CILK_C_REDUCER_OPADD(r, double, 0);
- *      CILK_C_REGISTER_REDUCER(r);
- *      cilk_for(int i = 0; i != n; ++i) {
- *          REDUCER_VIEW(r) += a[i];
- *      }
- *      CILK_C_UNREGISTER_REDUCER(r);
- *      printf("The sum of the elements of a is %f\n", REDUCER_VIEW(r));
- *
- *  See @ref reducers_c_predefined.
- */
-
-#ifdef __cplusplus
-
-namespace cilk {
-
-/** The addition reducer view class.
- *
- *  This is the view class for reducers created with
- *  `cilk::reducer< cilk::op_add<Type> >`. It holds the accumulator variable
- *  for the reduction, and allows only addition and subtraction operations to
- *  be performed on it.
- *
- *  @note   The reducer "dereference" operation (`reducer::operator *()`)
- *          yields a reference to the view. Thus, for example, the view class's
- *          `+=` operation would be used in an expression like `*r += a`, where
- *          `r` is an op_add reducer variable.
- *
- *  @tparam Type    The type of the contained accumulator variable. This will
- *                  be the value type of a monoid_with_view that is
- *                  instantiated with this view.
- *
- *  @see ReducersAdd
- *  @see op_add
- *
- *  @ingroup ReducersAdd
- */
-template <typename Type>
-class op_add_view : public scalar_view<Type>
-{
-    typedef scalar_view<Type> base;
-
-public:
-    /** Class to represent the right-hand side of
-     *  `*reducer = *reducer ± value`.
-     *
-     *  The only assignment operator for the op_add_view class takes an
-     *  rhs_proxy as its operand. This results in the syntactic restriction
-     *  that the only expressions that can be assigned to an op_add_view are
-     *  ones which generate an rhs_proxy - that is, expressions of the form
-     *  `op_add_view ± value ... ± value`.
-     *
-     *  @warning
-     *  The lhs and rhs views in such an assignment must be the same;
-     *  otherwise, the behavior will be undefined. (I.e., `v1 = v1 + x` is
-     *  legal; `v1 = v2 + x` is illegal.) This condition will be checked with a
-     *  runtime assertion when compiled in debug mode.
-     *
-     *  @see op_add_view
-     */
-    class rhs_proxy {
-        friend class op_add_view;
-
-        const op_add_view* m_view;
-        Type               m_value;
-
-        // Constructor is invoked only from op_add_view::operator+() and
-        // op_add_view::operator-().
-        //
-        rhs_proxy(const op_add_view* view, const Type& value) :
-            m_view(view), m_value(value) {}
-
-        rhs_proxy& operator=(const rhs_proxy&); // Disable assignment operator
-        rhs_proxy();                            // Disable default constructor
-
-    public:
-        ///@{
-        /** Adds or subtracts an additional rhs value. If `v` is an op_add_view
-         *  and `a1` is a value, then the expression `v + a1` invokes the view's
-         *  `operator+()` to create an rhs_proxy for `(v, a1)`; then
-         *  `v + a1 + a2` invokes the rhs_proxy's `operator+()` to create a new
-         *  rhs_proxy for `(v, a1+a2)`. This allows the right-hand side of an
-         *  assignment to be not just `view ± value`, but
-         *  `view ± value ± value ... ± value`. The effect is that
-         *
-         *      v = v ± a1 ± a2 ... ± an;
-         *
-         *  is evaluated as
-         *
-         *      v = v ± (±a1 ± a2 ... ± an);
-         */
-        rhs_proxy& operator+(const Type& x) { m_value += x; return *this; }
-        rhs_proxy& operator-(const Type& x) { m_value -= x; return *this; }
-        ///@}
-    };
-
-
-    /** Default/identity constructor. This constructor initializes the
-     *  contained value to `Type()`, which is expected to be the identity value
-     *  for addition on `Type`.
-     */
-    op_add_view() : base() {}
-
-    /** Construct with a specified initial value.
-     */
-    explicit op_add_view(const Type& v) : base(v) {}
-
-    /** Reduces the views of two strands.
-     *
-     *  This function is invoked by the @ref op_add monoid to combine the views
-     *  of two strands when the right strand merges with the left one. It adds
-     *  the value contained in the right-strand view to the value contained in
-     *  the left-strand view, and leaves the value in the right-strand view
-     *  undefined.
-     *
-     *  @param  right   A pointer to the right-strand view. (`this` points to
-     *                  the left-strand view.)
-     *
-     *  @note   Used only by the @ref op_add monoid to implement the monoid
-     *          reduce operation.
-     */
-    void reduce(op_add_view* right) { this->m_value += right->m_value; }
-
-    /** @name Accumulator variable updates.
-     *
-     *  These functions support the various syntaxes for incrementing or
-     *  decrementing the accumulator variable contained in the view.
-     */
-    ///@{
-
-    /** Increments the accumulator variable by @a x.
-     */
-    op_add_view& operator+=(const Type& x) { this->m_value += x; return *this; }
-
-    /** Decrements the accumulator variable by @a x.
-     */
-    op_add_view& operator-=(const Type& x) { this->m_value -= x; return *this; }
-
-    /** Pre-increment.
-     */
-    op_add_view& operator++() { ++this->m_value; return *this; }
-
-    /** Post-increments.
-     *
-     *  @note   Conventionally, post-increment operators return the old value
-     *          of the incremented variable. However, reducer views do not
-     *          expose their contained values, so `view++` does not have a
-     *          return value.
-     */
-    void operator++(int) { this->m_value++; }
-
-    /** Pre-decrements.
-     */
-    op_add_view& operator--() { --this->m_value; return *this; }
-
-    /** Post-decrements.
-     *
-     *  @note   Conventionally, post-decrement operators return the old value
-     *          of the decremented variable. However, reducer views do not
-     *          expose their contained values, so `view--` does not have a
-     *          return value.
-     */
-    void operator--(int) { this->m_value--; }
-
-    /** Creates an object representing `*this + x`.
-     *
-     *  @see rhs_proxy
-     */
-    rhs_proxy operator+(const Type& x) const { return rhs_proxy(this, x); }
-
-    /** Creates an object representing `*this - x`.
-     *
-     *  @see rhs_proxy
-     */
-    rhs_proxy operator-(const Type& x) const { return rhs_proxy(this, -x); }
-
-    /** Assigns the result of a `view ± value` expression to the view. Note that
-     *  this is the only assignment operator for this class.
-     *
-     *  @see rhs_proxy
-     */
-    op_add_view& operator=(const rhs_proxy& rhs) {
-        this->m_value += rhs.m_value;
-        return *this;
-    }
-
-    ///@}
-};
-
-
-/** Monoid class for addition reductions. Instantiate the cilk::reducer
- *  template class with an op_add monoid to create an addition reducer class.
- *  For example, to compute
- *  the sum of a set of `int` values:
- *
- *      cilk::reducer< cilk::op_add<int> > r;
- *
- *  @tparam Type    The reducer value type.
- *  @tparam Align   If `false` (the default), reducers instantiated on this
- *                  monoid will be naturally aligned (the Intel Cilk Plus library 1.0
- *                  behavior). If `true`, reducers instantiated on this monoid
- *                  will be cache-aligned for binary compatibility with
- *                  reducers in Intel Cilk Plus library version 0.9.
- *
- *  @see ReducersAdd
- *  @see op_add_view
- *
- *  @ingroup ReducersAdd
- */
-template <typename Type, bool Align = false>
-struct op_add : public monoid_with_view<op_add_view<Type>, Align> {};
-
-/** **Deprecated** addition reducer wrapper class.
- *
- *  reducer_opadd is the same as @ref reducer<@ref op_add>, except that
- *  reducer_opadd is a proxy for the contained view, so that accumulator
- *  variable update operations can be applied directly to the reducer. For
- *  example, a value is added to a `reducer<%op_add>` with `*r += a`, but a
- *  value can be added to a `%reducer_opadd` with `r += a`.
- *
- *  @deprecated Users are strongly encouraged to use `reducer<monoid>`
- *              reducers rather than the old wrappers like reducer_opadd.
- *              The `reducer<monoid>` reducers show the reducer/monoid/view
- *              architecture more clearly, are more consistent in their
- *              implementation, and present a simpler model for new
- *              user-implemented reducers.
- *
- *  @note   Implicit conversions are provided between `%reducer_opadd`
- *          and `reducer<%op_add>`. This allows incremental code
- *          conversion: old code that used `%reducer_opadd` can pass a
- *          `%reducer_opadd` to a converted function that now expects a
- *          pointer or reference to a `reducer<%op_add>`, and vice
- *          versa.
- *
- *  @tparam Type    The value type of the reducer.
- *
- *  @see op_add
- *  @see reducer
- *  @see ReducersAdd
- *
- *  @ingroup ReducersAdd
- */
-template <typename Type>
-class reducer_opadd : public reducer< op_add<Type, true> >
-{
-    typedef reducer< op_add<Type, true> > base;
-    using base::view;
-
-  public:
-    /// The view type for the reducer.
-    typedef typename base::view_type        view_type;
-
-    /// The view's rhs proxy type.
-    typedef typename view_type::rhs_proxy   rhs_proxy;
-
-    /// The view type for the reducer.
-    typedef view_type                       View;
-
-    /// The monoid type for the reducer.
-    typedef typename base::monoid_type      Monoid;
-
-    /** @name Constructors
-     */
-    ///@{
-
-    /** Default (identity) constructor.
-     *
-     * Constructs the wrapper with the default initial value of `Type()`.
-     */
-    reducer_opadd() {}
-
-    /** Value constructor.
-     *
-     *  Constructs the wrapper with a specified initial value.
-     */
-    explicit reducer_opadd(const Type& initial_value) : base(initial_value) {}
-
-    ///@}
-
-    /** @name Forwarded functions
-     *  @details Functions that update the contained accumulator variable are
-     *  simply forwarded to the contained @ref op_add_view. */
-    ///@{
-
-    /// @copydoc op_add_view::operator+=(const Type&)
-    reducer_opadd& operator+=(const Type& x)    { view() += x; return *this; }
-
-    /// @copydoc op_add_view::operator-=(const Type&)
-    reducer_opadd& operator-=(const Type& x)    { view() -= x; return *this; }
-
-    /// @copydoc op_add_view::operator++()
-    reducer_opadd& operator++()                 { ++view(); return *this; }
-
-    /// @copydoc op_add_view::operator++(int)
-    void operator++(int)                        { view()++; }
-
-    /// @copydoc op_add_view::operator-\-()
-    reducer_opadd& operator--()                 { --view(); return *this; }
-
-    /// @copydoc op_add_view::operator-\-(int)
-    void operator--(int)                        { view()--; }
-
-    // The legacy definitions of reducer_opadd::operator+() and
-    // reducer_opadd::operator-() have different behavior and a different
-    // return type than this definition. The legacy version is defined as a
-    // member function, so this new version is defined as a free function to
-    // give it a different signature, so that they won't end up sharing a
-    // single object file entry.
-
-    /// @copydoc op_add_view::operator+(const Type&) const
-    friend rhs_proxy operator+(const reducer_opadd& r, const Type& x)
-    {
-        return r.view() + x;
-    }
-    /// @copydoc op_add_view::operator-(const Type&) const
-    friend rhs_proxy operator-(const reducer_opadd& r, const Type& x)
-    {
-        return r.view() - x;
-    }
-    /// @copydoc op_add_view::operator=(const rhs_proxy&)
-    reducer_opadd& operator=(const rhs_proxy& temp)
-    {
-        view() = temp;
-        return *this;
-    }
-    ///@}
-
-    /** @name Dereference
-     *  @details Dereferencing a wrapper is a no-op. It simply returns the
-     *  wrapper. Combined with the rule that the wrapper forwards view
-     *  operations to its contained view, this means that view operations can
-     *  be written the same way on reducers and wrappers, which is convenient
-     *  for incrementally converting old code using wrappers to use reducers
-     *  instead. That is:
-     *
-     *      reducer< op_add<int> > r;
-     *      *r += a;    // *r returns the view
-     *                  // operator += is a view member function
-     *
-     *      reducer_opadd<int> w;
-     *      *w += a;    // *w returns the wrapper
-     *                  // operator += is a wrapper member function that
-     *                  // calls the corresponding view function
-     */
-    ///@{
-    reducer_opadd&       operator*()       { return *this; }
-    reducer_opadd const& operator*() const { return *this; }
-
-    reducer_opadd*       operator->()       { return this; }
-    reducer_opadd const* operator->() const { return this; }
-    ///@}
-
-    /** @name Upcast
-     *  @details In Intel Cilk Plus library 0.9, reducers were always cache-aligned.
-     *  In library 1.0, reducer cache alignment is optional. By default,
-     *  reducers are unaligned (i.e., just naturally aligned), but legacy
-     *  wrappers inherit from cache-aligned reducers for binary compatibility.
-     *
-     *  This means that a wrapper will automatically be upcast to its aligned
-     *  reducer base class. The following conversion operators provide
-     *  pseudo-upcasts to the corresponding unaligned reducer class.
-     */
-    ///@{
-    operator reducer< op_add<Type, false> >& ()
-    {
-        return *reinterpret_cast< reducer< op_add<Type, false> >* >(this);
-    }
-    operator const reducer< op_add<Type, false> >& () const
-    {
-        return *reinterpret_cast< const reducer< op_add<Type, false> >* >(this);
-    }
-    ///@}
-};
-
-/// @cond internal
-/** Metafunction specialization for reducer conversion.
- *
- *  This specialization of the @ref legacy_reducer_downcast template class
- *  defined in reducer.h causes the `reducer< op_add<Type> >` class to have an
- *  `operator reducer_opadd<Type>& ()` conversion operator that statically
- *  downcasts the `reducer<op_add>` to the corresponding `reducer_opadd` type.
- *  (The reverse conversion, from `reducer_opadd` to `reducer<op_add>`, is just
- *  an upcast, which is provided for free by the language.)
- *
- *  @ingroup ReducersAdd
- */
-template <typename Type, bool Align>
-struct legacy_reducer_downcast<reducer<op_add<Type, Align> > >
-{
-    typedef reducer_opadd<Type> type;
-};
-/// @endcond
-
-} // namespace cilk
-
-extern "C" {
-
-#endif // __cplusplus
-
-
-/** @ingroup ReducersAdd
- */
-///@{
-
-/** @name C Language Reducer Macros
- *
- *  These macros are used to declare and work with numeric op_add reducers in
- *  C code.
- *
- *  @see @ref page_reducers_in_c
- */
- ///@{
-
-/** Declares opadd reducer type name.
- *
- *  This macro expands into the identifier which is the name of the op_add
- *  reducer type for a specified numeric type.
- *
- *  @param  tn  The @ref reducers_c_type_names "numeric type name" specifying
- *              the type of the reducer.
- *
- *  @see @ref reducers_c_predefined
- *  @see ReducersAdd
- */
-#define CILK_C_REDUCER_OPADD_TYPE(tn)                                         \
-    __CILKRTS_MKIDENT(cilk_c_reducer_opadd_,tn)
-
-/** Declares an op_add reducer object.
- *
- *  This macro expands into a declaration of an op_add reducer object for a
- *  specified numeric type. For example:
- *
- *      CILK_C_REDUCER_OPADD(my_reducer, double, 0.0);
- *
- *  @param  obj The variable name to be used for the declared reducer object.
- *  @param  tn  The @ref reducers_c_type_names "numeric type name" specifying
- *              the type of the reducer.
- *  @param  v   The initial value for the reducer. (A value which can be
- *              assigned to the numeric type represented by @a tn.)
- *
- *  @see @ref reducers_c_predefined
- *  @see ReducersAdd
- */
-#define CILK_C_REDUCER_OPADD(obj,tn,v)                                        \
-    CILK_C_REDUCER_OPADD_TYPE(tn) obj =                                       \
-        CILK_C_INIT_REDUCER(_Typeof(obj.value),                               \
-                        __CILKRTS_MKIDENT(cilk_c_reducer_opadd_reduce_,tn),   \
-                        __CILKRTS_MKIDENT(cilk_c_reducer_opadd_identity_,tn), \
-                        0, v)
-
-/// @cond internal
-
-/** Declares the op_add reducer functions for a numeric type.
- *
- *  This macro expands into external function declarations for functions which
- *  implement the reducer functionality for the op_add reducer type for a
- *  specified numeric type.
- *
- *  @param  t   The value type of the reducer.
- *  @param  tn  The value "type name" identifier, used to construct the reducer
- *              type name, function names, etc.
- */
-#define CILK_C_REDUCER_OPADD_DECLARATION(t,tn)                             \
-    typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPADD_TYPE(tn);       \
-    __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opadd,tn,l,r);         \
-    __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opadd,tn);
-
-/** Defines the op_add reducer functions for a numeric type.
- *
- *  This macro expands into function definitions for functions which implement
- *  the reducer functionality for the op_add reducer type for a specified
- *  numeric type.
- *
- *  @param  t   The value type of the reducer.
- *  @param  tn  The value "type name" identifier, used to construct the reducer
- *              type name, function names, etc.
- */
-#define CILK_C_REDUCER_OPADD_DEFINITION(t,tn)                              \
-    typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPADD_TYPE(tn);       \
-    __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opadd,tn,l,r)          \
-        { *(t*)l += *(t*)r; }                                              \
-    __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opadd,tn)            \
-        { *(t*)v = 0; }
-
-///@{
-/** @def CILK_C_REDUCER_OPADD_INSTANCE
- *  @brief Declares or defines implementation functions for a reducer type.
- *
- *  In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS`
- *  will be defined, and this macro will generate reducer implementation
- *  functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` will be undefined,
- *  and this macro will expand into external declarations for the functions.
- */
-#ifdef CILK_C_DEFINE_REDUCERS
-#   define CILK_C_REDUCER_OPADD_INSTANCE(t,tn)  \
-        CILK_C_REDUCER_OPADD_DEFINITION(t,tn)
-#else
-#   define CILK_C_REDUCER_OPADD_INSTANCE(t,tn)  \
-        CILK_C_REDUCER_OPADD_DECLARATION(t,tn)
-#endif
-///@}
-
-/*  Declares or defines an instance of the reducer type and its functions for each
- *  numeric type.
- */
-CILK_C_REDUCER_OPADD_INSTANCE(char,                 char)
-CILK_C_REDUCER_OPADD_INSTANCE(unsigned char,        uchar)
-CILK_C_REDUCER_OPADD_INSTANCE(signed char,          schar)
-CILK_C_REDUCER_OPADD_INSTANCE(wchar_t,              wchar_t)
-CILK_C_REDUCER_OPADD_INSTANCE(short,                short)
-CILK_C_REDUCER_OPADD_INSTANCE(unsigned short,       ushort)
-CILK_C_REDUCER_OPADD_INSTANCE(int,                  int)
-CILK_C_REDUCER_OPADD_INSTANCE(unsigned int,         uint)
-CILK_C_REDUCER_OPADD_INSTANCE(unsigned int,         unsigned) /* alternate name */
-CILK_C_REDUCER_OPADD_INSTANCE(long,                 long)
-CILK_C_REDUCER_OPADD_INSTANCE(unsigned long,        ulong)
-CILK_C_REDUCER_OPADD_INSTANCE(long long,            longlong)
-CILK_C_REDUCER_OPADD_INSTANCE(unsigned long long,   ulonglong)
-CILK_C_REDUCER_OPADD_INSTANCE(float,                float)
-CILK_C_REDUCER_OPADD_INSTANCE(double,               double)
-CILK_C_REDUCER_OPADD_INSTANCE(long double,          longdouble)
-
-//@endcond
-
-#ifdef __cplusplus
-} /* end extern "C" */
-#endif
-
-///@}
-
-///@}
-
-#endif /*  REDUCER_OPADD_H_INCLUDED */
diff --git a/include/cilk/reducer_opand.h b/include/cilk/reducer_opand.h
deleted file mode 100644
index 46c39fcd..00000000
--- a/include/cilk/reducer_opand.h
+++ /dev/null
@@ -1,617 +0,0 @@
-/*  reducer_opand.h                  -*- C++ -*-
- *
- *  Copyright (C) 2009-2018, Intel Corporation
- *  All rights reserved.
- *  
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *  
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *  
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- *  OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- *  AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
- *  WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *  POSSIBILITY OF SUCH DAMAGE.
- *  
- *  *********************************************************************
- *  
- *  PLEASE NOTE: This file is a downstream copy of a file maintained in
- *  a repository at cilkplus.org. Changes made to this file that are not
- *  submitted through the contribution process detailed at
- *  http://www.cilkplus.org/submit-cilk-contribution will be lost the next
- *  time that a new version is released. Changes only submitted to the
- *  GNU compiler collection or posted to the git repository at
- *  https://bitbucket.org/intelcilkruntime/intel-cilk-runtime are
- *  not tracked.
- *  
- *  We welcome your contributions to this open source project. Thank you
- *  for your assistance in helping us improve Cilk Plus.
- */
-
-/** @file reducer_opand.h
- *
- *  @brief Defines classes for doing parallel bitwise AND reductions.
- *
- *  @ingroup ReducersAnd
- *
- *  @see ReducersAnd
- */
-
-#ifndef REDUCER_OPAND_H_INCLUDED
-#define REDUCER_OPAND_H_INCLUDED
-
-#include <cilk/reducer.h>
-
-/** @defgroup ReducersAnd Bitwise AND Reducers
- *
- *  Bitwise AND reducers allow the computation of the bitwise AND of a set of
- *  values in parallel.
- *
- *  @ingroup Reducers
- *
- *  You should be familiar with @ref pagereducers "Intel(R) Cilk(TM) Plus reducers",
- *  described in file `reducers.md`, and particularly with @ref reducers_using,
- *  before trying to use the information in this file.
- *
- *  @section redopand_usage Usage Example
- *
- *      cilk::reducer< cilk::op_and<unsigned> > r;
- *      cilk_for (int i = 0; i != N; ++i) {
- *          *r &= a[i];
- *      }
- *      unsigned result;
- *      r.move_out(result);
- *
- *  @section redopand_monoid The Monoid
- *
- *  @subsection redopand_monoid_values Value Set
- *
- *  The value set of a bitwise AND reducer is the set of values of `Type`,
- *  which is expected to be a builtin integer type which has a representation
- *  as a sequence of bits (or something like it, such as `bool` or
- *  `std::bitset`).
- *
- *  @subsection redopand_monoid_operator Operator
- *
- *  The bitwise AND operator is defined by the "`&`" binary operator on `Type`.
- *
- *  @subsection redopand_monoid_identity Identity
- *
- *  The identity value of the reducer is the value whose representation
- *  contains all 1-bits. This is expected to be the value of the expression
- *  `~Type()` (i.e., the bitwise negation operator applied to the default value
- *  of the value type).
- *
- *  @section redopand_operations Operations
- *
- *  @subsection redopand_constructors Constructors
- *
- *      reducer()   // identity
- *      reducer(const Type& value)
- *      reducer(move_in(Type& variable))
- *
- *  @subsection redopand_get_set Set and Get
- *
- *      r.set_value(const Type& value)
- *      const Type& = r.get_value() const
- *      r.move_in(Type& variable)
- *      r.move_out(Type& variable)
- *
- *  @subsection redopand_initial Initial Values
- *
- *  If a bitwise AND reducer is constructed without an explicit initial value,
- *  then its initial value will be its identity value, as long as `Type`
- *  satisfies the requirements of @ref redopand_types.
- *
- *  @subsection redopand_view_ops View Operations
- *
- *      *r &= a
- *      *r = *r & a
- *      *r = *r & a1 & a2 … & an
- *
- *  @section redopand_types Type and Operator Requirements
- *
- *  `Type` must be `Copy Constructible`, `Default Constructible`, and
- *  `Assignable`.
- *
- *  The operator "`&=`" must be defined on `Type`, with `x &= a` having the
- *  same meaning as `x = x & a`.
- *
- *  The expression `~ Type()` must be a valid expression which yields the
- *  identity value (the value of `Type` whose representation consists of all
- *  1-bits).
- *
- *  @section redopand_in_c Bitwise AND Reducers in C
- *
- *  The @ref CILK_C_REDUCER_OPAND and @ref CILK_C_REDUCER_OPAND_TYPE macros can
- *  be used to do bitwise AND reductions in C. For example:
- *
- *      CILK_C_REDUCER_OPAND(r, uint, ~0);
- *      CILK_C_REGISTER_REDUCER(r);
- *      cilk_for(int i = 0; i != n; ++i) {
- *          REDUCER_VIEW(r) &= a[i];
- *      }
- *      CILK_C_UNREGISTER_REDUCER(r);
- *      printf("The bitwise AND of the elements of a is %x\n", REDUCER_VIEW(r));
- *
- *  See @ref reducers_c_predefined.
- */
-
-#ifdef __cplusplus
-
-namespace cilk {
-
-/** The bitwise AND reducer view class.
- *
- *  This is the view class for reducers created with
- *  `cilk::reducer< cilk::op_and<Type> >`. It holds the accumulator variable
- *  for the reduction, and allows only AND operations to be performed on it.
- *
- *  @note   The reducer "dereference" operation (`reducer::operator *()`)
- *          yields a reference to the view. Thus, for example, the view class's
- *          `&=` operation would be used in an expression like `*r &= a`, where
- *          `r` is an opmod reducer variable.
- *
- *  @tparam Type    The type of the contained accumulator variable. This will
- *                  be the value type of a monoid_with_view that is
- *                  instantiated with this view.
- *
- *  @see ReducersAnd
- *  @see op_and
- *
- *  @ingroup ReducersAnd
- */
-template <typename Type>
-class op_and_view : public scalar_view<Type>
-{
-    typedef scalar_view<Type> base;
-
-public:
-    /** Class to represent the right-hand side of `*reducer = *reducer & value`.
-     *
-     *  The only assignment operator for the op_and_view class takes an
-     *  rhs_proxy as its operand. This results in the syntactic restriction
-     *  that the only expressions that can be assigned to an op_and_view are
-     *  ones which generate an rhs_proxy - that is, expressions of the form
-     *  `op_and_view & value ... & value`.
-     *
-     *  @warning
-     *  The lhs and rhs views in such an assignment must be the same;
-     *  otherwise, the behavior will be undefined. (I.e., `v1 = v1 & x` is
-     *  legal; `v1 = v2 & x` is illegal.)  This condition will be checked with
-     *  a runtime assertion when compiled in debug mode.
-     *
-     *  @see op_and_view
-     */
-    class rhs_proxy {
-    private:
-        friend class op_and_view;
-
-        const op_and_view* m_view;
-        Type               m_value;
-
-        // Constructor is invoked only from op_and_view::operator&().
-        //
-        rhs_proxy(const op_and_view* view, const Type& value) : m_view(view), m_value(value) {}
-
-        rhs_proxy& operator=(const rhs_proxy&); // Disable assignment operator
-        rhs_proxy();                            // Disable default constructor
-
-    public:
-        /** Bitwise AND with an additional `rhs` value. If `v` is an op_and_view
-         *  and `a1` is a value, then the expression `v & a1` invokes the
-         *  view's `operator&()` to create an rhs_proxy for `(v, a1)`; then
-         *  `v & a1 & a2` invokes the rhs_proxy's `operator&()` to create a new
-         *  rhs_proxy for `(v, a1&a2)`. This allows the right-hand side of an
-         *  assignment to be not just `view & value`, but
-         *  `view & value & value ... & value`. The effect is that
-         *
-         *      v = v & a1 & a2 ... & an;
-         *
-         *  is evaluated as
-         *
-         *      v = v & (a1 & a2 ... & an);
-         */
-        rhs_proxy& operator&(const Type& x) { m_value &= x; return *this; }
-    };
-
-
-    /** Default/identity constructor. This constructor initializes the
-     *  contained value to `~ Type()`.
-     */
-    op_and_view() : base(~Type()) {}
-
-    /** Construct with a specified initial value.
-     */
-    explicit op_and_view(const Type& v) : base(v) {}
-
-
-    /** Reduces the views of two strands.
-     *
-     *  This function is invoked by the @ref op_and monoid to combine the views
-     *  of two strands when the right strand merges with the left one. It
-     *  "ANDs" the value contained in the left-strand view with the value
-     *  contained in the right-strand view, and leaves the value in the
-     *  right-strand view undefined.
-     *
-     *  @param  right   A pointer to the right-strand view. (`this` points to
-     *                  the left-strand view.)
-     *
-     *  @note   Used only by the @ref op_and monoid to implement the monoid
-     *          reduce operation.
-     */
-    void reduce(op_and_view* right) { this->m_value &= right->m_value; }
-
-    /** @name Accumulator variable updates.
-     *
-     *  These functions support the various syntaxes for "ANDing" the
-     *  accumulator variable contained in the view with some value.
-     */
-    ///@{
-
-    /** Performs AND between the accumulator variable and @a x.
-     */
-    op_and_view& operator&=(const Type& x) { this->m_value &= x; return *this; }
-
-    /** Creates an object representing `*this & x`.
-     *
-     *  @see rhs_proxy
-     */
-    rhs_proxy operator&(const Type& x) const { return rhs_proxy(this, x); }
-
-    /** Assigns the result of a `view & value` expression to the view. Note that
-     *  this is the only assignment operator for this class.
-     *
-     *  @see rhs_proxy
-     */
-    op_and_view& operator=(const rhs_proxy& rhs) {
-        this->m_value &= rhs.m_value;
-        return *this;
-    }
-
-    ///@}
-};
-
-/** Monoid class for bitwise AND reductions. Instantiate the cilk::reducer
- *  template class with an op_and monoid to create a bitwise AND reducer
- *  class. For example, to compute the bitwise AND of a set of `unsigned long`
- *  values:
- *
- *      cilk::reducer< cilk::op_and<unsigned long> > r;
- *
- *  @tparam Type    The reducer value type.
- *  @tparam Align   If `false` (the default), reducers instantiated on this
- *                  monoid will be naturally aligned (the Intel Cilk Plus library 1.0
- *                  behavior). If `true`, reducers instantiated on this monoid
- *                  will be cache-aligned for binary compatibility with
- *                  reducers in Intel Cilk Plus library version 0.9.
- *
- *  @see ReducersAnd
- *  @see op_and_view
- *
- *  @ingroup ReducersAnd
- */
-template <typename Type, bool Align = false>
-struct op_and : public monoid_with_view<op_and_view<Type>, Align> {};
-
-/** Deprecated bitwise AND reducer class.
- *
- *  reducer_opand is the same as @ref reducer<@ref op_and>, except that
- *  reducer_opand is a proxy for the contained view, so that accumulator
- *  variable update operations can be applied directly to the reducer. For
- *  example, a value is "ANDed" with  a `reducer<%op_and>` with `*r &= a`, but a
- *  value can be "ANDed" with a `%reducer_opand` with `r &= a`.
- *
- *  @deprecated Users are strongly encouraged to use `reducer<monoid>`
- *              reducers rather than the old wrappers like reducer_opand.
- *              The `reducer<monoid>` reducers show the reducer/monoid/view
- *              architecture more clearly, are more consistent in their
- *              implementation, and present a simpler model for new
- *              user-implemented reducers.
- *
- *  @note   Implicit conversions are provided between `%reducer_opand`
- *          and `reducer<%op_and>`. This allows incremental code
- *          conversion: old code that used `%reducer_opand` can pass a
- *          `%reducer_opand` to a converted function that now expects a
- *          pointer or reference to a `reducer<%op_and>`, and vice
- *          versa.
- *
- *  @tparam Type    The value type of the reducer.
- *
- *  @see op_and
- *  @see reducer
- *  @see ReducersAnd
- *
- *  @ingroup ReducersAnd
- */
-template <typename Type>
-class reducer_opand : public reducer< op_and<Type, true> >
-{
-    typedef reducer< op_and<Type, true> > base;
-    using base::view;
-
-public:
-    /// The view type for the reducer.
-    typedef typename base::view_type        view_type;
-
-    /// The view's rhs proxy type.
-    typedef typename view_type::rhs_proxy   rhs_proxy;
-
-    /// The view type for the reducer.
-    typedef view_type                       View;
-
-    /// The monoid type for the reducer.
-    typedef typename base::monoid_type      Monoid;
-
-    /** @name Constructors
-     */
-    ///@{
-
-    /** Default constructor.
-     *
-     *  Constructs the wrapper with the default initial value of `Type()`
-     *  (not the identity value).
-     */
-    reducer_opand() : base(Type()) {}
-
-    /** Value constructor.
-     *
-     *  Constructs the wrapper with a specified initial value.
-     */
-    explicit reducer_opand(const Type& initial_value) : base(initial_value) {}
-
-    ///@}
-
-    /** @name Forwarded functions
-     *  @details Functions that update the contained accumulator variable are
-     *  simply forwarded to the contained @ref op_and_view. */
-    ///@{
-
-    /// @copydoc op_and_view::operator&=(const Type&)
-    reducer_opand& operator&=(const Type& x)
-    {
-        view() &= x;
-        return *this;
-    }
-
-    // The legacy definition of reducer_opand::operator&() has different
-    // behavior and a different return type than this definition. The legacy
-    // version is defined as a member function, so this new version is defined
-    // as a free function to give it a different signature, so that they won't
-    // end up sharing a single object file entry.
-
-    /// @copydoc op_and_view::operator&(const Type&) const
-    friend rhs_proxy operator&(const reducer_opand& r, const Type& x)
-    {
-        return r.view() & x;
-    }
-
-    /// @copydoc op_and_view::operator=(const rhs_proxy&)
-    reducer_opand& operator=(const rhs_proxy& temp)
-    {
-        view() = temp;
-        return *this;
-    }
-    ///@}
-
-    /** @name Dereference
-     *  @details Dereferencing a wrapper is a no-op. It simply returns the
-     *  wrapper. Combined with the rule that the wrapper forwards view
-     *  operations to its contained view, this means that view operations can
-     *  be written the same way on reducers and wrappers, which is convenient
-     *  for incrementally converting old code using wrappers to use reducers
-     *  instead. That is:
-     *
-     *      reducer< op_and<int> > r;
-     *      *r &= a;    // *r returns the view
-     *                  // operator &= is a view member function
-     *
-     *      reducer_opand<int> w;
-     *      *w &= a;    // *w returns the wrapper
-     *                  // operator &= is a wrapper member function that
-     *                  // calls the corresponding view function
-     */
-    ///@{
-    reducer_opand&       operator*()       { return *this; }
-    reducer_opand const& operator*() const { return *this; }
-
-    reducer_opand*       operator->()       { return this; }
-    reducer_opand const* operator->() const { return this; }
-    ///@}
-
-    /** @name Upcast
-     *  @details In Intel Cilk Plus library 0.9, reducers were always cache-aligned.
-     *  In library 1.0, reducer cache alignment is optional. By default,
-     *  reducers are unaligned (i.e., just naturally aligned), but legacy
-     *  wrappers inherit from cache-aligned reducers for binary compatibility.
-     *
-     *  This means that a wrapper will automatically be upcast to its aligned
-     *  reducer base class. The following conversion operators provide
-     *  pseudo-upcasts to the corresponding unaligned reducer class.
-     */
-    ///@{
-    operator reducer< op_and<Type, false> >& ()
-    {
-        return *reinterpret_cast< reducer< op_and<Type, false> >* >(this);
-    }
-    operator const reducer< op_and<Type, false> >& () const
-    {
-        return *reinterpret_cast< const reducer< op_and<Type, false> >* >(this);
-    }
-    ///@}
-};
-
-/// @cond internal
-/** Metafunction specialization for reducer conversion.
- *
- *  This specialization of the @ref legacy_reducer_downcast template class
- *  defined in reducer.h causes the `reducer< op_and<Type> >` class to have an
- *  `operator reducer_opand<Type>& ()` conversion operator that statically
- *  downcasts the `reducer<op_and>` to the corresponding `reducer_opand` type.
- *  (The reverse conversion, from `reducer_opand` to `reducer<op_and>`, is just
- *  an upcast, which is provided for free by the language.)
- *
- *  @ingroup ReducersAnd
- */
-template <typename Type, bool Align>
-struct legacy_reducer_downcast<reducer<op_and<Type, Align> > >
-{
-    typedef reducer_opand<Type> type;
-};
-/// @endcond
-
-} // namespace cilk
-
-#endif // __cplusplus
-
-
-/** @ingroup ReducersAdd
- */
-///@{
-
-/** @name C language reducer macros
- *
- *  These macros are used to declare and work with op_and reducers in C code.
- *
- *  @see @ref page_reducers_in_c
- */
- ///@{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/** Declares `opand` reducer type name.
- *
- *  This macro expands into the identifier which is the name of the op_and
- *  reducer type for a specified numeric type.
- *
- *  @param  tn  The @ref reducers_c_type_names "numeric type name" specifying
- *              the type of the reducer.
- *
- *  @see @ref reducers_c_predefined
- *  @see ReducersAnd
- */
-#define CILK_C_REDUCER_OPAND_TYPE(tn)                                         \
-    __CILKRTS_MKIDENT(cilk_c_reducer_opand_,tn)
-
-/** Declares an op_and reducer object.
- *
- *  This macro expands into a declaration of an op_and reducer object for a
- *  specified numeric type. For example:
- *
- *      CILK_C_REDUCER_OPAND(my_reducer, ulong, ~0UL);
- *
- *  @param  obj The variable name to be used for the declared reducer object.
- *  @param  tn  The @ref reducers_c_type_names "numeric type name" specifying
- *              the type of the reducer.
- *  @param  v   The initial value for the reducer. (A value which can be
- *              assigned to the numeric type represented by @a tn.)
- *
- *  @see @ref reducers_c_predefined
- *  @see ReducersAnd
- */
-#define CILK_C_REDUCER_OPAND(obj,tn,v)                                        \
-    CILK_C_REDUCER_OPAND_TYPE(tn) obj =                                       \
-        CILK_C_INIT_REDUCER(_Typeof(obj.value),                               \
-                        __CILKRTS_MKIDENT(cilk_c_reducer_opand_reduce_,tn),   \
-                        __CILKRTS_MKIDENT(cilk_c_reducer_opand_identity_,tn), \
-                        0, v)
-
-/// @cond internal
-
-/** Declares the op_and reducer functions for a numeric type.
- *
- *  This macro expands into external function declarations for functions which
- *  implement the reducer functionality for the op_and reducer type for a
- *  specified numeric type.
- *
- *  @param  t   The value type of the reducer.
- *  @param  tn  The value "type name" identifier, used to construct the reducer
- *              type name, function names, etc.
- */
-#define CILK_C_REDUCER_OPAND_DECLARATION(t,tn)                             \
-    typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPAND_TYPE(tn);       \
-    __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opand,tn,l,r);         \
-    __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opand,tn);
-
-/** Defines the op_and reducer functions for a numeric type.
- *
- *  This macro expands into function definitions for functions which implement
- *  the reducer functionality for the op_and reducer type for a specified
- *  numeric type.
- *
- *  @param  t   The value type of the reducer.
- *  @param  tn  The value "type name" identifier, used to construct the reducer
- *              type name, function names, etc.
- */
-#define CILK_C_REDUCER_OPAND_DEFINITION(t,tn)                              \
-    typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPAND_TYPE(tn);       \
-    __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opand,tn,l,r)          \
-        { *(t*)l &= *(t*)r; }                                              \
-    __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opand,tn)            \
-        { *(t*)v = ~((t)0); }
-
-///@{
-/** @def CILK_C_REDUCER_OPAND_INSTANCE
- *  @brief Declares or defines implementation functions for a reducer type.
- *
- *  In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS`
- *  will be defined, and this macro will generate reducer implementation
- *  functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` will be undefined, and
- *  this macro will expand into external declarations for the functions.
- */
-#ifdef CILK_C_DEFINE_REDUCERS
-#   define CILK_C_REDUCER_OPAND_INSTANCE(t,tn)  \
-        CILK_C_REDUCER_OPAND_DEFINITION(t,tn)
-#else
-#   define CILK_C_REDUCER_OPAND_INSTANCE(t,tn)  \
-        CILK_C_REDUCER_OPAND_DECLARATION(t,tn)
-#endif
-///@}
-
-/*  Declares or defines an instance of the reducer type and its functions for
- *  each numeric type.
- */
-CILK_C_REDUCER_OPAND_INSTANCE(char,                 char)
-CILK_C_REDUCER_OPAND_INSTANCE(unsigned char,        uchar)
-CILK_C_REDUCER_OPAND_INSTANCE(signed char,          schar)
-CILK_C_REDUCER_OPAND_INSTANCE(wchar_t,              wchar_t)
-CILK_C_REDUCER_OPAND_INSTANCE(short,                short)
-CILK_C_REDUCER_OPAND_INSTANCE(unsigned short,       ushort)
-CILK_C_REDUCER_OPAND_INSTANCE(int,                  int)
-CILK_C_REDUCER_OPAND_INSTANCE(unsigned int,         uint)
-CILK_C_REDUCER_OPAND_INSTANCE(unsigned int,         unsigned) /* alternate name */
-CILK_C_REDUCER_OPAND_INSTANCE(long,                 long)
-CILK_C_REDUCER_OPAND_INSTANCE(unsigned long,        ulong)
-CILK_C_REDUCER_OPAND_INSTANCE(long long,            longlong)
-CILK_C_REDUCER_OPAND_INSTANCE(unsigned long long,   ulonglong)
-
-//@endcond
-
-#ifdef __cplusplus
-} /* end extern "C" */
-#endif
-
-///@}
-
-///@}
-
-#endif /*  REDUCER_OPAND_H_INCLUDED */
diff --git a/include/cilk/reducer_opmul.h b/include/cilk/reducer_opmul.h
deleted file mode 100644
index d2139b24..00000000
--- a/include/cilk/reducer_opmul.h
+++ /dev/null
@@ -1,456 +0,0 @@
-/*  reducer_opmul.h                  -*- C++ -*-
- *
- *  Copyright (C) 2012-2018, Intel Corporation
- *  All rights reserved.
- *  
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *  
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *  
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- *  OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- *  AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
- *  WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *  POSSIBILITY OF SUCH DAMAGE.
- *  
- *  *********************************************************************
- *  
- *  PLEASE NOTE: This file is a downstream copy of a file maintained in
- *  a repository at cilkplus.org. Changes made to this file that are not
- *  submitted through the contribution process detailed at
- *  http://www.cilkplus.org/submit-cilk-contribution will be lost the next
- *  time that a new version is released. Changes only submitted to the
- *  GNU compiler collection or posted to the git repository at
- *  https://bitbucket.org/intelcilkruntime/intel-cilk-runtime are
- *  not tracked.
- *  
- *  We welcome your contributions to this open source project. Thank you
- *  for your assistance in helping us improve Cilk Plus.
- */
-
-/** @file reducer_opmul.h
- *
- *  @brief Defines classes for doing parallel multiplication reductions.
- *
- *  @ingroup ReducersMul
- *
- *  @see ReducersMul
- */
-
-#ifndef REDUCER_OPMUL_H_INCLUDED
-#define REDUCER_OPMUL_H_INCLUDED
-
-#include <cilk/reducer.h>
-
-/** @defgroup ReducersMul Multiplication Reducers
- *
- *  Multiplication reducers allow the computation of the product of a set of
- *  values in parallel.
- *
- *  @ingroup Reducers
- *
- *  You should be familiar with @ref pagereducers "Intel(R) Cilk(TM) Plus reducers",
- *  described in file `reducers.md`, and particularly with @ref reducers_using,
- *  before trying to use the information in this file.
- *
- *  @section redopmul_usage Usage Example
- *
- *      cilk::reducer< cilk::op_mul<double> > r;
- *      cilk_for (int i = 0; i != N; ++i) {
- *          *r *= a[i];
- *      }
- *      double product;
- *      r.move_out(product);
- *
- *  @section redopmul_monoid The Monoid
- *
- *  @subsection redopmul_monoid_values Value Set
- *
- *  The value set of a multiplication reducer is the set of values of `Type`,
- *  which is expected to be a builtin numeric type (or something like it, such
- *  as `std::complex`).
- *
- *  @subsection redopmul_monoid_operator Operator
- *
- *  The operator of a multiplication reducer is the multiplication operation,
- *  defined by the "`*`" binary operator on `Type`.
- *
- *  @subsection redopmul_monoid_identity Identity
- *
- *  The identity value of the reducer is the numeric value "`1`". This is
- *  expected to be the value of the expression `Type(1)`.
- *
- *  @section redopmul_operations Operations
- *
- *  @subsection redopmul_constructors Constructors
- *
- *      reducer()   // identity
- *      reducer(const Type& value)
- *      reducer(move_in(Type& variable))
- *
- *  @subsection redopmul_get_set Set and Get
- *
- *      r.set_value(const Type& value)
- *      const Type& = r.get_value() const
- *      r.move_in(Type& variable)
- *      r.move_out(Type& variable)
- *
- *  @subsection redopmul_initial Initial Values
- *
- *  If a multiplication reducer is constructed without an explicit initial
- *  value, then its initial value will be its identity value, as long as `Type`
- *  satisfies the requirements of @ref redopmul_types.
- *
- *  @subsection redopmul_view_ops View Operations
- *
- *      *r *= a
- *      *r = *r * a
- *      *r = *r * a1 * a2 … * an
- *
- *  @section redopmul_floating_point Issues with Floating-Point Types
- *
- *  Because of overflow and underflow issues, floating-point multiplication is
- *  not really associative. For example, `(1e200 * 1e-200) * 1e-200 == 1e-200`,
- *  but `1e200 * (1e-200 * 1e-200 == 0.
- *
- *  In many cases, this won't matter, but computations which have been
- *  carefully ordered to control overflow and underflow may not deal well with
- *  being reassociated. In general, you should be sure to understand the
- *  floating-point behavior of your program before doing any transformation
- *  that will reassociate its computations.
- *
- *  @section redopmul_types Type and Operator Requirements
- *
- *  `Type` must be `Copy Constructible`, `Default Constructible`, and
- *  `Assignable`.
- *
- *  The operator "`*=`" must be defined on `Type`, with `x *= a` having the same
- *  meaning as `x = x * a`.
- *
- *  The expression `Type(1)` must be a valid expression which yields the
- *  identity value (the value of `Type` whose numeric value is `1`).
- *
- *  @section redopmul_in_c Multiplication Reducers in C
- *
- *  The @ref CILK_C_REDUCER_OPMUL and @ref CILK_C_REDUCER_OPMUL_TYPE macros can
- *  be used to do multiplication reductions in C. For example:
- *
- *      CILK_C_REDUCER_OPMUL(r, double, 1);
- *      CILK_C_REGISTER_REDUCER(r);
- *      cilk_for(int i = 0; i != n; ++i) {
- *          REDUCER_VIEW(r) *= a[i];
- *      }
- *      CILK_C_UNREGISTER_REDUCER(r);
- *      printf("The product of the elements of a is %f\n", REDUCER_VIEW(r));
- *
- *  See @ref reducers_c_predefined.
- */
-
-#ifdef __cplusplus
-
-namespace cilk {
-
-/** The multiplication reducer view class.
- *
- *  This is the view class for reducers created with
- *  `cilk::reducer< cilk::op_mul<Type> >`. It holds the accumulator variable
- *  for the reduction, and allows only multiplication operations to be
- *  performed on it.
- *
- *  @note   The reducer "dereference" operation (`reducer::operator *()`)
- *          yields a reference to the view. Thus, for example, the view class's
- *          `*=` operation would be used in an expression like `*r *= a`, where
- *          `r` is an op_mul reducer variable.
- *
- *  @tparam Type    The type of the contained accumulator variable. This will
- *                  be the value type of a monoid_with_view that is
- *                  instantiated with this view.
- *
- *  @see ReducersMul
- *  @see op_mul
- *
- *  @ingroup ReducersMul
- */
-template <typename Type>
-class op_mul_view : public scalar_view<Type>
-{
-    typedef scalar_view<Type> base;
-
-public:
-    /** Class to represent the right-hand side of `*reducer = *reducer * value`.
-     *
-     *  The only assignment operator for the op_mul_view class takes an
-     *  rhs_proxy as its operand. This results in the syntactic restriction
-     *  that the only expressions that can be assigned to an op_mul_view are
-     *  ones which generate an rhs_proxy - that is, expressions of the form
-     *  `op_mul_view * value ... * value`.
-     *
-     *  @warning
-     *  The lhs and rhs views in such an assignment must be the same;
-     *  otherwise, the behavior will be undefined. (I.e., `v1 = v1 * x` is
-     *  legal; `v1 = v2 * x` is illegal.) This condition will be checked with a
-     *  runtime assertion when compiled in debug mode.
-     *
-     *  @see op_mul_view
-     */
-    class rhs_proxy {
-        friend class op_mul_view;
-
-        const op_mul_view* m_view;
-        Type               m_value;
-
-        // Constructor is invoked only from op_mul_view::operator*().
-        //
-        rhs_proxy(const op_mul_view* view, const Type& value) : m_view(view), m_value(value) {}
-
-        rhs_proxy& operator=(const rhs_proxy&); // Disable assignment operator
-        rhs_proxy();                            // Disable default constructor
-
-    public:
-        /** Multiplies by an additional `rhs` value. If `v` is an op_mul_view and
-         *  `a1` is a value, then the expression `v * a1` invokes the view's
-         *  `operator*()` to create an rhs_proxy for `(v, a1)`; then
-         *  `v * a1 * a2` invokes the rhs_proxy's `operator*()` to create a
-         *  new rhs_proxy for `(v, a1*a2)`. This allows the right-hand side of
-         *  an assignment to be not just `view * value`, but
-         *  `view * value * value ... * value`. The effect is that
-         *
-         *      v = v * a1 * a2 ... * an;
-         *
-         *  is evaluated as
-         *
-         *      v = v * (a1 * a2 ... * an);
-         */
-        rhs_proxy& operator*(const Type& x) { m_value *= x; return *this; }
-    };
-
-
-    /** Default/identity constructor. This constructor initializes the
-     *  contained value to `Type(1)`, which is expected to be the identity
-     *  value for multiplication on `Type`.
-     */
-    op_mul_view() : base(Type(1)) {}
-
-    /** Construct with a specified initial value.
-     */
-    explicit op_mul_view(const Type& v) : base(v) {}
-
-    /** Reduces two strand views.
-     *
-     *  This function is invoked by the @ref op_mul monoid to combine the views
-     *  of two strands when the right strand merges with the left one. It
-     *  multiplies the value contained in the left-strand view by the value
-     *  contained in the right-strand view, and leaves the value in the
-     *  right-strand view undefined.
-     *
-     *  @param  right   A pointer to the right-strand view. (`this` points to
-     *                  the left-strand view.)
-     *
-     *  @note   Used only by the @ref op_mul monoid to implement the monoid
-     *          reduce operation.
-     */
-    void reduce(op_mul_view* right) { this->m_value *= right->m_value; }
-
-    /** @name Accumulator variable updates.
-     *
-     *  These functions support the various syntaxes for multiplying the
-     *  accumulator variable contained in the view by some value.
-     */
-    ///@{
-
-    /** Multiplies the accumulator variable by @a x.
-     */
-    op_mul_view& operator*=(const Type& x) { this->m_value *= x; return *this; }
-
-    /** Creates an object representing `*this * x`.
-     *
-     *  @see rhs_proxy
-     */
-    rhs_proxy operator*(const Type& x) const { return rhs_proxy(this, x); }
-
-    /** Assigns the result of a `view * value` expression to the view. Note that
-     *  this is the only assignment operator for this class.
-     *
-     *  @see rhs_proxy
-     */
-    op_mul_view& operator=(const rhs_proxy& rhs) {
-        this->m_value *= rhs.m_value;
-        return *this;
-    }
-
-    ///@}
-};
-
-/** Monoid class for multiplication reductions. Instantiate the cilk::reducer
- *  template class with an op_mul monoid to create a multiplication reducer
- *  class. For example, to compute the product of a set of `double` values:
- *
- *      cilk::reducer< cilk::op_mul<double> > r;
- *
- *  @see ReducersMul
- *  @see op_mul_view
- *
- *  @ingroup ReducersMul
- */
-template <typename Type>
-struct op_mul : public monoid_with_view< op_mul_view<Type> > {};
-
-} // namespace cilk
-
-#endif // __cplusplus
-
-
-/** @ingroup ReducersAdd
- */
-///@{
-
-/** @name C language reducer macros
- *
- *  These macros are used to declare and work with numeric op_mul reducers in
- *  C code.
- *
- *  @see @ref page_reducers_in_c
- */
- ///@{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/** Declares `opmul` reducer type name.
- *
- *  This macro expands into the identifier which is the name of the op_mul
- *  reducer type for a specified numeric type.
- *
- *  @param  tn  The @ref reducers_c_type_names "numeric type name" specifying
- *              the type of the reducer.
- *
- *  @see @ref reducers_c_predefined
- *  @see ReducersMul
- */
-#define CILK_C_REDUCER_OPMUL_TYPE(tn)                                         \
-    __CILKRTS_MKIDENT(cilk_c_reducer_opmul_,tn)
-
-/** Declares an op_mul reducer object.
- *
- *  This macro expands into a declaration of an op_mul reducer object for a
- *  specified numeric type. For example:
- *
- *      CILK_C_REDUCER_OPMUL(my_reducer, double, 1.0);
- *
- *  @param  obj The variable name to be used for the declared reducer object.
- *  @param  tn  The @ref reducers_c_type_names "numeric type name" specifying
- *              the type of the reducer.
- *  @param  v   The initial value for the reducer. (A value which can be
- *              assigned to the numeric type represented by @a tn.)
- *
- *  @see @ref reducers_c_predefined
- *  @see ReducersMul
- */
-#define CILK_C_REDUCER_OPMUL(obj,tn,v)                                        \
-    CILK_C_REDUCER_OPMUL_TYPE(tn) obj =                                       \
-        CILK_C_INIT_REDUCER(_Typeof(obj.value),                               \
-                        __CILKRTS_MKIDENT(cilk_c_reducer_opmul_reduce_,tn),   \
-                        __CILKRTS_MKIDENT(cilk_c_reducer_opmul_identity_,tn), \
-                        0, v)
-
-/// @cond internal
-
-/** Declares the op_mul reducer functions for a numeric type.
- *
- *  This macro expands into external function declarations for functions which
- *  implement the reducer functionality for the op_mul reducer type for a
- *  specified numeric type.
- *
- *  @param  t   The value type of the reducer.
- *  @param  tn  The value "type name" identifier, used to construct the reducer
- *              type name, function names, etc.
- */
-#define CILK_C_REDUCER_OPMUL_DECLARATION(t,tn)                             \
-    typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPMUL_TYPE(tn);       \
-    __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opmul,tn,l,r);         \
-    __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opmul,tn);
-
-/** Defines the op_mul reducer functions for a numeric type.
- *
- *  This macro expands into function definitions for functions which implement
- *  the reducer functionality for the op_mul reducer type for a specified
- *  numeric type.
- *
- *  @param  t   The value type of the reducer.
- *  @param  tn  The value "type name" identifier, used to construct the reducer
- *              type name, function names, etc.
- */
-#define CILK_C_REDUCER_OPMUL_DEFINITION(t,tn)                              \
-    typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPMUL_TYPE(tn);       \
-    __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opmul,tn,l,r)          \
-        { *(t*)l *= *(t*)r; }                                              \
-    __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opmul,tn)            \
-        { *(t*)v = 1; }
-
-///@{
-/** @def CILK_C_REDUCER_OPMUL_INSTANCE
- *  @brief Declares or defines implementation functions for a reducer type.
- *
- *  In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS`
- *  will be defined, and this macro will generate reducer implementation
- *  functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` will be undefined, and
- *  this macro will expand into external declarations for the functions.
- */
-#ifdef CILK_C_DEFINE_REDUCERS
-#   define CILK_C_REDUCER_OPMUL_INSTANCE(t,tn)  \
-        CILK_C_REDUCER_OPMUL_DEFINITION(t,tn)
-#else
-#   define CILK_C_REDUCER_OPMUL_INSTANCE(t,tn)  \
-        CILK_C_REDUCER_OPMUL_DECLARATION(t,tn)
-#endif
-///@}
-
-/*  Declares or defines an instance of the reducer type and its functions for each
- *  numeric type.
- */
-CILK_C_REDUCER_OPMUL_INSTANCE(char,                 char)
-CILK_C_REDUCER_OPMUL_INSTANCE(unsigned char,        uchar)
-CILK_C_REDUCER_OPMUL_INSTANCE(signed char,          schar)
-CILK_C_REDUCER_OPMUL_INSTANCE(wchar_t,              wchar_t)
-CILK_C_REDUCER_OPMUL_INSTANCE(short,                short)
-CILK_C_REDUCER_OPMUL_INSTANCE(unsigned short,       ushort)
-CILK_C_REDUCER_OPMUL_INSTANCE(int,                  int)
-CILK_C_REDUCER_OPMUL_INSTANCE(unsigned int,         uint)
-CILK_C_REDUCER_OPMUL_INSTANCE(unsigned int,         unsigned) /* alternate name */
-CILK_C_REDUCER_OPMUL_INSTANCE(long,                 long)
-CILK_C_REDUCER_OPMUL_INSTANCE(unsigned long,        ulong)
-CILK_C_REDUCER_OPMUL_INSTANCE(long long,            longlong)
-CILK_C_REDUCER_OPMUL_INSTANCE(unsigned long long,   ulonglong)
-CILK_C_REDUCER_OPMUL_INSTANCE(float,                float)
-CILK_C_REDUCER_OPMUL_INSTANCE(double,               double)
-CILK_C_REDUCER_OPMUL_INSTANCE(long double,          longdouble)
-
-//@endcond
-
-#ifdef __cplusplus
-} /* end extern "C" */
-#endif
-
-///@}
-
-///@}
-
-#endif /*  REDUCER_OPMUL_H_INCLUDED */
diff --git a/include/cilk/reducer_opor.h b/include/cilk/reducer_opor.h
deleted file mode 100644
index 20ae2d1e..00000000
--- a/include/cilk/reducer_opor.h
+++ /dev/null
@@ -1,612 +0,0 @@
-/*  reducer_opor.h                  -*- C++ -*-
- *
- *  Copyright (C) 2009-2018, Intel Corporation
- *  All rights reserved.
- *  
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *  
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *  
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- *  OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- *  AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
- *  WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *  POSSIBILITY OF SUCH DAMAGE.
- *  
- *  *********************************************************************
- *  
- *  PLEASE NOTE: This file is a downstream copy of a file maintained in
- *  a repository at cilkplus.org. Changes made to this file that are not
- *  submitted through the contribution process detailed at
- *  http://www.cilkplus.org/submit-cilk-contribution will be lost the next
- *  time that a new version is released. Changes only submitted to the
- *  GNU compiler collection or posted to the git repository at
- *  https://bitbucket.org/intelcilkruntime/intel-cilk-runtime are
- *  not tracked.
- *  
- *  We welcome your contributions to this open source project. Thank you
- *  for your assistance in helping us improve Cilk Plus.
- */
-
-/** @file reducer_opor.h
- *
- *  @brief Defines classes for doing parallel bitwise OR reductions.
- *
- *  @ingroup ReducersOr
- *
- *  @see ReducersOr
- */
-
-#ifndef REDUCER_OPOR_H_INCLUDED
-#define REDUCER_OPOR_H_INCLUDED
-
-#include <cilk/reducer.h>
-
-/** @defgroup ReducersOr Bitwise `OR` Reducers
- *
- *  Bitwise `OR` reducers allow the computation of the bitwise `OR` of a set of
- *  values in parallel.
- *
- *  @ingroup Reducers
- *
- *  You should be familiar with @ref pagereducers "Intel(R) Cilk(TM) Plus reducers",
- *  described in file `reducers.md`, and particularly with @ref reducers_using,
- *  before trying to use the information in this file.
- *
- *  @section redopor_usage Usage Example
- *
- *      cilk::reducer< cilk::op_or<unsigned> > r;
- *      cilk_for (int i = 0; i != N; ++i) {
- *          *r |= a[i];
- *      }
- *      unsigned result;
- *      r.move_out(result);
- *
- *  @section redopor_monoid The Monoid
- *
- *  @subsection redopor_monoid_values Value Set
- *
- *  The value set of a bitwise `OR` reducer is the set of values of `Type`, which
- *  is expected to be a builtin integer type which has a representation as a
- *  sequence of bits (or something like it, such as `bool` or `std::bitset`).
- *
- *  @subsection redopor_monoid_operator Operator
- *
- *  The operator of a bitwise `OR` reducer is the bitwise OR operator, defined by
- *  the "`|`" binary operator on `Type`.
- *
- *  @subsection redopor_monoid_identity Identity
- *
- *  The identity value of the reducer is the value whose representation
- *  contains all 0-bits. This is expected to be the value of the default
- *  constructor `Type()`.
- *
- *  @section redopor_operations Operations
- *
- *  @subsection redopor_constructors Constructors
- *
- *      reducer()   // identity
- *      reducer(const Type& value)
- *      reducer(move_in(Type& variable))
- *
- *  @subsection redopor_get_set Set and Get
- *
- *      r.set_value(const Type& value)
- *      const Type& = r.get_value() const
- *      r.move_in(Type& variable)
- *      r.move_out(Type& variable)
- *
- *  @subsection redopor_initial Initial Values
- *
- *  If a bitwise OR reducer is constructed without an explicit initial value,
- *  then its initial value will be its identity value, as long as `Type`
- *  satisfies the requirements of @ref redopor_types.
- *
- *  @subsection redopor_view_ops View Operations
- *
- *      *r |= a
- *      *r = *r | a
- *      *r = *r | a1 | a2 … | an
- *
- *  @section redopor_types Type and Operator Requirements
- *
- *  `Type` must be `Copy Constructible`, `Default Constructible`, and
- *  `Assignable`.
- *
- *  The operator "`|=`" must be defined on `Type`, with `x |= a` having the
- *  same meaning as `x = x | a`.
- *
- *  The expression `Type()` must be a valid expression which yields the
- *  identity value (the value of `Type` whose representation consists of all
- *  0-bits).
- *
- *  @section redopor_in_c Bitwise OR Reducers in C
- *
- *  The @ref CILK_C_REDUCER_OPOR and @ref CILK_C_REDUCER_OPOR_TYPE macros can
- *  be used to do bitwise OR reductions in C. For example:
- *
- *      CILK_C_REDUCER_OPOR(r, uint, 0);
- *      CILK_C_REGISTER_REDUCER(r);
- *      cilk_for(int i = 0; i != n; ++i) {
- *          REDUCER_VIEW(r) |= a[i];
- *      }
- *      CILK_C_UNREGISTER_REDUCER(r);
- *      printf("The bitwise OR of the elements of a is %x\n", REDUCER_VIEW(r));
- *
- *  See @ref reducers_c_predefined.
- */
-
-#ifdef __cplusplus
-
-namespace cilk {
-
-/** The bitwise OR reducer view class.
- *
- *  This is the view class for reducers created with
- *  `cilk::reducer< cilk::op_or<Type> >`. It holds the accumulator variable for
- *  the reduction, and allows only `or` operations to be performed on it.
- *
- *  @note   The reducer "dereference" operation (`reducer::operator *()`)
- *          yields a reference to the view. Thus, for example, the view class's
- *          `|=` operation would be used in an expression like `*r |= a`, where
- *          `r` is an opmod reducer variable.
- *
- *  @tparam Type    The type of the contained accumulator variable. This will
- *                  be the value type of a monoid_with_view that is
- *                  instantiated with this view.
- *
- *  @see ReducersOr
- *  @see op_or
- *
- *  @ingroup ReducersOr
- */
-template <typename Type>
-class op_or_view : public scalar_view<Type>
-{
-    typedef scalar_view<Type> base;
-
-public:
-    /** Class to represent the right-hand side of `*reducer = *reducer | value`.
-     *
-     *  The only assignment operator for the op_or_view class takes an
-     *  rhs_proxy as its operand. This results in the syntactic restriction
-     *  that the only expressions that can be assigned to an op_or_view are
-     *  ones which generate an rhs_proxy - that is, expressions of the form
-     *  `op_or_view | value ... | value`.
-     *
-     *  @warning
-     *  The lhs and rhs views in such an assignment must be the same;
-     *  otherwise, the behavior will be undefined. (I.e., `v1 = v1 | x` is
-     *  legal; `v1 = v2 | x` is illegal.) This condition will be checked with
-     *  a runtime assertion when compiled in debug mode.
-     *
-     *  @see op_or_view
-     */
-    class rhs_proxy {
-        friend class op_or_view;
-
-        const op_or_view* m_view;
-        Type              m_value;
-
-        // Constructor is invoked only from op_or_view::operator|().
-        //
-        rhs_proxy(const op_or_view* view, const Type& value) : m_view(view), m_value(value) {}
-
-        rhs_proxy& operator=(const rhs_proxy&); // Disable assignment operator
-        rhs_proxy();                            // Disable default constructor
-
-    public:
-        /** bitwise OR with an additional rhs value. If `v` is an op_or_view
-         *  and `a1` is a value, then the expression `v | a1` invokes the
-         *  view's `operator|()` to create an rhs_proxy for `(v, a1)`; then
-         *  `v | a1 | a2` invokes the rhs_proxy's `operator|()` to create a new
-         *  rhs_proxy for `(v, a1|a2)`. This allows the right-hand side of an
-         *  assignment to be not just `view | value`, but
-         (  `view | value | value ... | value`. The effect is that
-         *
-         *      v = v | a1 | a2 ... | an;
-         *
-         *  is evaluated as
-         *
-         *      v = v | (a1 | a2 ... | an);
-         */
-        rhs_proxy& operator|(const Type& x) { m_value |= x; return *this; }
-    };
-
-
-    /** Default/identity constructor. This constructor initializes the
-     *  contained value to `Type()`.
-     */
-    op_or_view() : base() {}
-
-    /** Construct with a specified initial value.
-     */
-    explicit op_or_view(const Type& v) : base(v) {}
-
-    /** Reduces the views of two strands.
-     *
-     *  This function is invoked by the @ref op_or monoid to combine the views
-     *  of two strands when the right strand merges with the left one. It
-     *  "ORs" the value contained in the left-strand view by the value
-     *  contained in the right-strand view, and leaves the value in the
-     *  right-strand view undefined.
-     *
-     *  @param  right   A pointer to the right-strand view. (`this` points to
-     *                  the left-strand view.)
-     *
-     *  @note   Used only by the @ref op_or monoid to implement the monoid
-     *          reduce operation.
-     */
-    void reduce(op_or_view* right) { this->m_value |= right->m_value; }
-
-    /** @name Accumulator variable updates.
-     *
-     *  These functions support the various syntaxes for "ORing" the
-     *  accumulator variable contained in the view with some value.
-     */
-    ///@{
-
-    /** Perfoms an OR operation between the accumulator variable and @a x.
-     */
-    op_or_view& operator|=(const Type& x) { this->m_value |= x; return *this; }
-
-    /** Creates an object representing `*this | x`.
-     *
-     *  @see rhs_proxy
-     */
-    rhs_proxy operator|(const Type& x) const { return rhs_proxy(this, x); }
-
-    /** Assigns the result of a `view | value` expression to the view. Note that
-     *  this is the only assignment operator for this class.
-     *
-     *  @see rhs_proxy
-     */
-    op_or_view& operator=(const rhs_proxy& rhs) {
-        this->m_value |= rhs.m_value;
-        return *this;
-    }
-
-    ///@}
-};
-
-/** Monoid class for bitwise OR reductions. Instantiate the cilk::reducer
- *  template class with an op_or monoid to create a bitwise OR reducer
- *  class. For example, to compute the bitwise OR of a set of `unsigned long`
- *  values:
- *
- *      cilk::reducer< cilk::op_or<unsigned long> > r;
- *
- *  @tparam Type    The reducer value type.
- *  @tparam Align   If `false` (the default), reducers instantiated on this
- *                  monoid will be naturally aligned (the Intel Cilk Plus library 1.0
- *                  behavior). If `true`, reducers instantiated on this monoid
- *                  will be cache-aligned for binary compatibility with
- *                  reducers in Intel Cilk Plus library version 0.9.
- *
- *  @see ReducersOr
- *  @see op_or_view
- *
- *  @ingroup ReducersOr
- */
-template <typename Type, bool Align = false>
-struct op_or : public monoid_with_view<op_or_view<Type>, Align> {};
-
-/** Deprecated bitwise OR reducer class.
- *
- *  reducer_opor is the same as @ref reducer<@ref op_or>, except that
- *  reducer_opor is a proxy for the contained view, so that accumulator
- *  variable update operations can be applied directly to the reducer. For
- *  example, a value is "ORed" with  a `reducer<%op_or>` with `*r |= a`, but a
- *  value can be "ORed" with a `%reducer_opor` with `r |= a`.
- *
- *  @deprecated Users are strongly encouraged to use `reducer<monoid>`
- *              reducers rather than the old wrappers like reducer_opor.
- *              The `reducer<monoid>` reducers show the reducer/monoid/view
- *              architecture more clearly, are more consistent in their
- *              implementation, and present a simpler model for new
- *              user-implemented reducers.
- *
- *  @note   Implicit conversions are provided between `%reducer_opor`
- *          and `reducer<%op_or>`. This allows incremental code
- *          conversion: old code that used `%reducer_opor` can pass a
- *          `%reducer_opor` to a converted function that now expects a
- *          pointer or reference to a `reducer<%op_or>`, and vice
- *          versa.
- *
- *  @tparam Type    The value type of the reducer.
- *
- *  @see op_or
- *  @see reducer
- *  @see ReducersOr
- *
- *  @ingroup ReducersOr
- */
-template <typename Type>
-class reducer_opor : public reducer< op_or<Type, true> >
-{
-    typedef reducer< op_or<Type, true> > base;
-    using base::view;
-
-  public:
-    /// The view type for the reducer.
-    typedef typename base::view_type        view_type;
-
-    /// The view's rhs proxy type.
-    typedef typename view_type::rhs_proxy   rhs_proxy;
-
-    /// The view type for the reducer.
-    typedef view_type                       View;
-
-    /// The monoid type for the reducer.
-    typedef typename base::monoid_type      Monoid;
-
-    /** @name Constructors
-     */
-    ///@{
-
-    /** Default (identity) constructor.
-     *
-     * Constructs the wrapper with the default initial value of `Type()`.
-     */
-    reducer_opor() {}
-
-    /** Value constructor.
-     *
-     *  Constructs the wrapper with a specified initial value.
-     */
-    explicit reducer_opor(const Type& initial_value) : base(initial_value) {}
-
-    ///@}
-
-    /** @name Forwarded functions
-     *  @details Functions that update the contained accumulator variable are
-     *  simply forwarded to the contained @ref op_and_view. */
-    ///@{
-
-    /// @copydoc op_or_view::operator|=(const Type&)
-    reducer_opor& operator|=(const Type& x)
-    {
-        view() |= x; return *this;
-    }
-
-    // The legacy definition of reducer_opor::operator|() has different
-    // behavior and a different return type than this definition. The legacy
-    // version is defined as a member function, so this new version is defined
-    // as a free function to give it a different signature, so that they won't
-    // end up sharing a single object file entry.
-
-    /// @copydoc op_or_view::operator|(const Type&) const
-    friend rhs_proxy operator|(const reducer_opor& r, const Type& x)
-    {
-        return r.view() | x;
-    }
-
-    /// @copydoc op_and_view::operator=(const rhs_proxy&)
-    reducer_opor& operator=(const rhs_proxy& temp)
-    {
-        view() = temp; return *this;
-    }
-    ///@}
-
-    /** @name Dereference
-     *  @details Dereferencing a wrapper is a no-op. It simply returns the
-     *  wrapper. Combined with the rule that the wrapper forwards view
-     *  operations to its contained view, this means that view operations can
-     *  be written the same way on reducers and wrappers, which is convenient
-     *  for incrementally converting old code using wrappers to use reducers
-     *  instead. That is:
-     *
-     *      reducer< op_and<int> > r;
-     *      *r &= a;    // *r returns the view
-     *                  // operator &= is a view member function
-     *
-     *      reducer_opand<int> w;
-     *      *w &= a;    // *w returns the wrapper
-     *                  // operator &= is a wrapper member function that
-     *                  // calls the corresponding view function
-     */
-    ///@{
-    reducer_opor&       operator*()       { return *this; }
-    reducer_opor const& operator*() const { return *this; }
-
-    reducer_opor*       operator->()       { return this; }
-    reducer_opor const* operator->() const { return this; }
-    ///@}
-
-    /** @name Upcast
-     *  @details In Intel Cilk Plus library 0.9, reducers were always cache-aligned.
-     *  In library 1.0, reducer cache alignment is optional. By default,
-     *  reducers are unaligned (i.e., just naturally aligned), but legacy
-     *  wrappers inherit from cache-aligned reducers for binary compatibility.
-     *
-     *  This means that a wrapper will automatically be upcast to its aligned
-     *  reducer base class. The following conversion operators provide
-     *  pseudo-upcasts to the corresponding unaligned reducer class.
-     */
-    ///@{
-    operator reducer< op_or<Type, false> >& ()
-    {
-        return *reinterpret_cast< reducer< op_or<Type, false> >* >(this);
-    }
-    operator const reducer< op_or<Type, false> >& () const
-    {
-        return *reinterpret_cast< const reducer< op_or<Type, false> >* >(this);
-    }
-    ///@}
-
-};
-
-/// @cond internal
-/** Metafunction specialization for reducer conversion.
- *
- *  This specialization of the @ref legacy_reducer_downcast template class
- *  defined in reducer.h causes the `reducer< op_or<Type> >` class to have an
- *  `operator reducer_opor<Type>& ()` conversion operator that statically
- *  downcasts the `reducer<op_or>` to the corresponding `reducer_opor` type.
- *  (The reverse conversion, from `reducer_opor` to `reducer<op_or>`, is just
- *  an upcast, which is provided for free by the language.)
- *
- *  @ingroup ReducersOr
- */
-template <typename Type, bool Align>
-struct legacy_reducer_downcast<reducer<op_or<Type, Align> > >
-{
-    typedef reducer_opor<Type> type;
-};
-/// @endcond
-
-} // namespace cilk
-
-#endif /* __cplusplus */
-
-
-/** @ingroup ReducersOr
- */
-///@{
-
-/** @name C language reducer macros
- *
- *  These macros are used to declare and work with op_or reducers in C code.
- *
- *  @see @ref page_reducers_in_c
- */
- ///@{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/** Declares OPOR reducer type name.
- *
- *  This macro expands into the identifier which is the name of the op_or
- *  reducer type for a specified numeric type.
- *
- *  @param  tn  The @ref reducers_c_type_names "numeric type name" specifying
- *              the type of the reducer.
- *
- *  @see @ref reducers_c_predefined
- *  @see ReducersOr
- */
-#define CILK_C_REDUCER_OPOR_TYPE(tn)                                         \
-    __CILKRTS_MKIDENT(cilk_c_reducer_opor_,tn)
-
-/** Declares an op_or reducer object.
- *
- *  This macro expands into a declaration of an op_or reducer object for a
- *  specified numeric type. For example:
- *
- *      CILK_C_REDUCER_OPOR(my_reducer, ulong, 0);
- *
- *  @param  obj The variable name to be used for the declared reducer object.
- *  @param  tn  The @ref reducers_c_type_names "numeric type name" specifying
- *              the type of the reducer.
- *  @param  v   The initial value for the reducer. (A value which can be
- *              assigned to the numeric type represented by @a tn.)
- *
- *  @see @ref reducers_c_predefined
- *  @see ReducersOr
- */
-#define CILK_C_REDUCER_OPOR(obj,tn,v)                                        \
-    CILK_C_REDUCER_OPOR_TYPE(tn) obj =                                       \
-        CILK_C_INIT_REDUCER(_Typeof(obj.value),                               \
-                        __CILKRTS_MKIDENT(cilk_c_reducer_opor_reduce_,tn),   \
-                        __CILKRTS_MKIDENT(cilk_c_reducer_opor_identity_,tn), \
-                        0, v)
-
-/// @cond internal
-
-/** Declares the op_or reducer functions for a numeric type.
- *
- *  This macro expands into external function declarations for functions which
- *  implement the reducer functionality for the op_or reducer type for a
- *  specified numeric type.
- *
- *  @param  t   The value type of the reducer.
- *  @param  tn  The value "type name" identifier, used to construct the reducer
- *              type name, function names, etc.
- */
-#define CILK_C_REDUCER_OPOR_DECLARATION(t,tn)                             \
-    typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPOR_TYPE(tn);       \
-    __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opor,tn,l,r);         \
-    __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opor,tn);
-
-/** Defines the op_or reducer functions for a numeric type.
- *
- *  This macro expands into function definitions for functions which implement
- *  the reducer functionality for the op_or reducer type for a specified
- *  numeric type.
- *
- *  @param  t   The value type of the reducer.
- *  @param  tn  The value "type name" identifier, used to construct the reducer
- *              type name, function names, etc.
- */
-#define CILK_C_REDUCER_OPOR_DEFINITION(t,tn)                              \
-    typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPOR_TYPE(tn);       \
-    __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opor,tn,l,r)          \
-        { *(t*)l |= *(t*)r; }                                              \
-    __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opor,tn)            \
-        { *(t*)v = 0; }
-
-///@{
-/** @def CILK_C_REDUCER_OPOR_INSTANCE
- *  @brief Declares or defines implementation functions for a reducer type.
- *
- *  In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS`
- *  will be defined, and this macro will generate reducer implementation
- *  functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` will be undefined, and
- *  this macro will expand into external declarations for the functions.
- */
-#ifdef CILK_C_DEFINE_REDUCERS
-#   define CILK_C_REDUCER_OPOR_INSTANCE(t,tn)  \
-        CILK_C_REDUCER_OPOR_DEFINITION(t,tn)
-#else
-#   define CILK_C_REDUCER_OPOR_INSTANCE(t,tn)  \
-        CILK_C_REDUCER_OPOR_DECLARATION(t,tn)
-#endif
-///@}
-
-/*  Declare or define an instance of the reducer type and its functions for each
- *  numeric type.
- */
-CILK_C_REDUCER_OPOR_INSTANCE(char,                 char)
-CILK_C_REDUCER_OPOR_INSTANCE(unsigned char,        uchar)
-CILK_C_REDUCER_OPOR_INSTANCE(signed char,          schar)
-CILK_C_REDUCER_OPOR_INSTANCE(wchar_t,              wchar_t)
-CILK_C_REDUCER_OPOR_INSTANCE(short,                short)
-CILK_C_REDUCER_OPOR_INSTANCE(unsigned short,       ushort)
-CILK_C_REDUCER_OPOR_INSTANCE(int,                  int)
-CILK_C_REDUCER_OPOR_INSTANCE(unsigned int,         uint)
-CILK_C_REDUCER_OPOR_INSTANCE(unsigned int,         unsigned) /* alternate name */
-CILK_C_REDUCER_OPOR_INSTANCE(long,                 long)
-CILK_C_REDUCER_OPOR_INSTANCE(unsigned long,        ulong)
-CILK_C_REDUCER_OPOR_INSTANCE(long long,            longlong)
-CILK_C_REDUCER_OPOR_INSTANCE(unsigned long long,   ulonglong)
-
-//@endcond
-
-#ifdef __cplusplus
-} /* end extern "C" */
-#endif
-
-///@}
-
-///@}
-
-#endif /*  REDUCER_OPOR_H_INCLUDED */
diff --git a/include/cilk/reducer_opxor.h b/include/cilk/reducer_opxor.h
deleted file mode 100644
index 2e724a5c..00000000
--- a/include/cilk/reducer_opxor.h
+++ /dev/null
@@ -1,611 +0,0 @@
-/*  reducer_opxor.h                  -*- C++ -*-
- *
- *  Copyright (C) 2009-2018, Intel Corporation
- *  All rights reserved.
- *  
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *  
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *  
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- *  OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- *  AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
- *  WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *  POSSIBILITY OF SUCH DAMAGE.
- *  
- *  *********************************************************************
- *  
- *  PLEASE NOTE: This file is a downstream copy of a file maintained in
- *  a repository at cilkplus.org. Changes made to this file that are not
- *  submitted through the contribution process detailed at
- *  http://www.cilkplus.org/submit-cilk-contribution will be lost the next
- *  time that a new version is released. Changes only submitted to the
- *  GNU compiler collection or posted to the git repository at
- *  https://bitbucket.org/intelcilkruntime/intel-cilk-runtime are
- *  not tracked.
- *  
- *  We welcome your contributions to this open source project. Thank you
- *  for your assistance in helping us improve Cilk Plus.
- */
-
-/** @file reducer_opxor.h
- *
- *  @brief Defines classes for doing parallel bitwise or reductions.
- *
- *  @ingroup ReducersXor
- *
- *  @see ReducersXor
- */
-
-#ifndef REDUCER_OPXOR_H_INCLUDED
-#define REDUCER_OPXOR_H_INCLUDED
-
-#include <cilk/reducer.h>
-
-/** @defgroup ReducersXor Bitwise XOR Reducers
- *
- *  Bitwise XOR reducers allow the computation of the bitwise XOR of a set of
- *  values in parallel.
- *
- *  @ingroup Reducers
- *
- *  You should be familiar with @ref pagereducers "Intel(R) Cilk(TM) Plus reducers",
- *  described in file `reducers.md`, and particularly with @ref reducers_using,
- *  before trying to use the information in this file.
- *
- *  @section redopxor_usage Usage Example
- *
- *      cilk::reducer< cilk::op_xor<unsigned> > r;
- *      cilk_for (int i = 0; i != N; ++i) {
- *          *r ^= a[i];
- *      }
- *      unsigned result;
- *      r.move_out(result);
- *
- *  @section redopxor_monoid The Monoid
- *
- *  @subsection redopxor_monoid_values Value Set
- *
- *  The value set of a bitwise XOR reducer is the set of values of `Type`, which
- *  is expected to be a builtin integer type which has a representation as a
- *  sequence of bits (or something like it, such as `bool` or `std::bitset`).
- *
- *  @subsection redopxor_monoid_operator Operator
- *
- *  The bitwise XOR operator is defined by the "`^`" binary operator on `Type`.
- *
- *  @subsection redopxor_monoid_identity Identity
- *
- *  The identity value of the reducer is the value whose representation
- *  contains all 0-bits. This is expected to be the value of the default
- *  constructor `Type()`.
- *
- *  @section redopxor_operations Operations
- *
- *  @subsection redopxor_constructors Constructors
- *
- *      reducer()   // identity
- *      reducer(const Type& value)
- *      reducer(move_in(Type& variable))
- *
- *  @subsection redopxor_get_set Set and Get
- *
- *      r.set_value(const Type& value)
- *      const Type& = r.get_value() const
- *      r.move_in(Type& variable)
- *      r.move_out(Type& variable)
- *
- *  @subsection redopxor_initial Initial Values
- *
- *  If a bitwise XOR reducer is constructed without an explicit initial value,
- *  then its initial value will be its identity value, as long as `Type`
- *  satisfies the requirements of @ref redopxor_types.
- *
- *  @subsection redopxor_view_ops View Operations
- *
- *      *r ^= a
- *      *r = *r ^ a
- *      *r = *r ^ a1 ^ a2 … ^ an
- *
- *  @section redopxor_types Type and Operator Requirements
- *
- *  `Type` must be `Copy Constructible`, `Default Constructible`, and
- *  `Assignable`.
- *
- *  The operator "`^=`" must be defined on `Type`, with `x ^= a` having the
- *  same meaning as `x = x ^ a`.
- *
- *  The expression `Type()` must be a valid expression which yields the
- *  identity value (the value of `Type` whose representation consists of all
- *  0-bits).
- *
- *  @section redopxor_in_c Bitwise XOR Reducers in C
- *
- *  The @ref CILK_C_REDUCER_OPXOR and @ref CILK_C_REDUCER_OPXOR_TYPE macros can
- *  be used to do bitwise XOR reductions in C. For example:
- *
- *      CILK_C_REDUCER_OPXOR(r, uint, 0);
- *      CILK_C_REGISTER_REDUCER(r);
- *      cilk_for(int i = 0; i != n; ++i) {
- *          REDUCER_VIEW(r) ^= a[i];
- *      }
- *      CILK_C_UNREGISTER_REDUCER(r);
- *      printf("The bitwise XOR of the elements of a is %x\n", REDUCER_VIEW(r));
- *
- *  See @ref reducers_c_predefined.
- */
-
-#ifdef __cplusplus
-
-namespace cilk {
-
-/** The bitwise XOR reducer view class.
- *
- *  This is the view class for reducers created with
- *  `cilk::reducer< cilk::op_xor<Type> >`. It holds the accumulator variable
- *  for the reduction, and allows only `xor` operations to be performed on it.
- *
- *  @note   The reducer "dereference" operation (`reducer::operator *()`)
- *          yields a reference to the view. Thus, for example, the view class's
- *          `^=` operation would be used in an expression like `*r ^= a`, where
- *          `r` is an opmod reducer variable.
- *
- *  @tparam Type    The type of the contained accumulator variable. This will
- *                  be the value type of a monoid_with_view that is
- *                  instantiated with this view.
- *
- *  @see ReducersXor
- *  @see op_xor
- *
- *  @ingroup ReducersXor
- */
-template <typename Type>
-class op_xor_view : public scalar_view<Type>
-{
-    typedef scalar_view<Type> base;
-
-public:
-    /** Class to represent the right-hand side of `*reducer = *reducer ^ value`.
-     *
-     *  The only assignment operator for the op_xor_view class takes an
-     *  rhs_proxy as its operand. This results in the syntactic restriction
-     *  that the only expressions that can be assigned to an op_xor_view are
-     *  ones which generate an rhs_proxy - that is, expressions of the form
-     *  `op_xor_view ^ value ... ^ value`.
-     *
-     *  @warning
-     *  The lhs and rhs views in such an assignment must be the same;
-     *  otherwise, the behavior will be undefined. (I.e., `v1 = v1 ^ x` is
-     *  legal; `v1 = v2 ^ x` is illegal.) This condition will be checked with
-     *  a runtime assertion when compiled in debug mode.
-     *
-     *  @see op_xor_view
-     */
-    class rhs_proxy {
-        friend class op_xor_view;
-
-        const op_xor_view* m_view;
-        Type              m_value;
-
-        // Constructor is invoked only from op_xor_view::operator^().
-        //
-        rhs_proxy(const op_xor_view* view, const Type& value) : m_view(view), m_value(value) {}
-
-        rhs_proxy& operator=(const rhs_proxy&); // Disable assignment operator
-        rhs_proxy();                            // Disable default constructor
-
-    public:
-        /** bitwise XOR with an additional rhs value. If `v` is an op_xor_view
-         *  and `a1` is a value, then the expression `v ^ a1` invokes the
-         *  view's `operator^()` to create an rhs_proxy for `(v, a1)`; then
-         *  `v ^ a1 ^ a2` invokes the rhs_proxy's `operator^()` to create a new
-         *  rhs_proxy for `(v, a1^a2)`. This allows the right-hand side of an
-         *  assignment to be not just `view ^ value`, but
-         (  `view ^ value ^ value ... ^ value`. The effect is that
-         *
-         *      v = v ^ a1 ^ a2 ... ^ an;
-         *
-         *  is evaluated as
-         *
-         *      v = v ^ (a1 ^ a2 ... ^ an);
-         */
-        rhs_proxy& operator^(const Type& x) { m_value ^= x; return *this; }
-    };
-
-
-    /** Default/identity constructor. This constructor initializes the
-     *  contained value to `Type()`.
-     */
-    op_xor_view() : base() {}
-
-    /** Construct with a specified initial value.
-     */
-    explicit op_xor_view(const Type& v) : base(v) {}
-
-    /** Reduces the views of two strands.
-     *
-     *  This function is invoked by the @ref op_xor monoid to combine the views
-     *  of two strands when the right strand merges with the left one. It
-     *  "XORs" the value contained in the left-strand view by the value
-     *  contained in the right-strand view, and leaves the value in the
-     *  right-strand view undefined.
-     *
-     *  @param  right   A pointer to the right-strand view. (`this` points to
-     *                  the left-strand view.)
-     *
-     *  @note   Used only by the @ref op_xor monoid to implement the monoid
-     *          reduce operation.
-     */
-    void reduce(op_xor_view* right) { this->m_value ^= right->m_value; }
-
-    /** @name Accumulator variable updates.
-     *
-     *  These functions support the various syntaxes for "XORing" the
-     *  accumulator variable contained in the view with some value.
-     */
-    ///@{
-
-    /** Performs XOR operation between the accumulator variable and @a x.
-     */
-    op_xor_view& operator^=(const Type& x) { this->m_value ^= x; return *this; }
-
-    /** Creates an object representing `*this ^ x`.
-     *
-     *  @see rhs_proxy
-     */
-    rhs_proxy operator^(const Type& x) const { return rhs_proxy(this, x); }
-
-    /** Assigns the result of a `view ^ value` expression to the view. Note that
-     *  this is the only assignment operator for this class.
-     *
-     *  @see rhs_proxy
-     */
-    op_xor_view& operator=(const rhs_proxy& rhs) {
-        this->m_value ^= rhs.m_value;
-        return *this;
-    }
-
-    ///@}
-};
-
-/** Monoid class for bitwise XOR reductions. Instantiate the cilk::reducer
- *  template class with an op_xor monoid to create a bitwise XOR reducer
- *  class. For example, to compute the bitwise XOR of a set of `unsigned long`
- *  values:
- *
- *      cilk::reducer< cilk::op_xor<unsigned long> > r;
- *
- *  @tparam Type    The reducer value type.
- *  @tparam Align   If `false` (the default), reducers instantiated on this
- *                  monoid will be naturally aligned (the Intel Cilk Plus library 1.0
- *                  behavior). If `true`, reducers instantiated on this monoid
- *                  will be cache-aligned for binary compatibility with
- *                  reducers in Intel Cilk Plus library version 0.9.
- *
- *  @see ReducersXor
- *  @see op_xor_view
- *
- *  @ingroup ReducersXor
- */
-template <typename Type, bool Align = false>
-struct op_xor : public monoid_with_view<op_xor_view<Type>, Align> {};
-
-/** Deprecated bitwise XOR reducer class.
- *
- *  reducer_opxor is the same as @ref reducer<@ref op_xor>, except that
- *  reducer_opxor is a proxy for the contained view, so that accumulator
- *  variable update operations can be applied directly to the reducer. For
- *  example, a value is "XORed" with  a `reducer<%op_xor>` with `*r ^= a`, but a
- *  value can be "XORed" with a `%reducer_opxor` with `r ^= a`.
- *
- *  @deprecated Users are strongly encouraged to use `reducer<monoid>`
- *              reducers rather than the old wrappers like reducer_opand.
- *              The `reducer<monoid>` reducers show the reducer/monoid/view
- *              architecture more clearly, are more consistent in their
- *              implementation, and present a simpler model for new
- *              user-implemented reducers.
- *
- *  @note   Implicit conversions are provided between `%reducer_opxor`
- *          and `reducer<%op_xor>`. This allows incremental code
- *          conversion: old code that used `%reducer_opxor` can pass a
- *          `%reducer_opxor` to a converted function that now expects a
- *          pointer or reference to a `reducer<%op_xor>`, and vice
- *          versa.
- *
- *  @tparam Type    The value type of the reducer.
- *
- *  @see op_xor
- *  @see reducer
- *  @see ReducersXor
- *
- *  @ingroup ReducersXor
- */
-template <typename Type>
-class reducer_opxor : public reducer< op_xor<Type, true> >
-{
-    typedef reducer< op_xor<Type, true> > base;
-    using base::view;
-
-  public:
-    /// The view type for the reducer.
-    typedef typename base::view_type        view_type;
-
-    /// The view's rhs proxy type.
-    typedef typename view_type::rhs_proxy   rhs_proxy;
-
-    /// The view type for the reducer.
-    typedef view_type                       View;
-
-    /// The monoid type for the reducer.
-    typedef typename base::monoid_type      Monoid;
-
-    /** @name Constructors
-     */
-    ///@{
-
-    /** Default (identity) constructor.
-     *
-     * Constructs the wrapper with the default initial value of `Type()`.
-     */
-    reducer_opxor() {}
-
-    /** Value constructor.
-     *
-     *  Constructs the wrapper with a specified initial value.
-     */
-    explicit reducer_opxor(const Type& initial_value) : base(initial_value) {}
-
-    ///@}
-
-    /** @name Forwarded functions
-     *  @details Functions that update the contained accumulator variable are
-     *  simply forwarded to the contained @ref op_and_view. */
-    ///@{
-
-    /// @copydoc op_xor_view::operator^=(const Type&)
-    reducer_opxor& operator^=(const Type& x)
-    {
-        view() ^= x; return *this;
-    }
-
-    // The legacy definition of reducer_opxor::operator^() has different
-    // behavior and a different return type than this definition. The legacy
-    // version is defined as a member function, so this new version is defined
-    // as a free function to give it a different signature, so that they won't
-    // end up sharing a single object file entry.
-
-    /// @copydoc op_xor_view::operator^(const Type&) const
-    friend rhs_proxy operator^(const reducer_opxor& r, const Type& x)
-    {
-        return r.view() ^ x;
-    }
-
-    /// @copydoc op_and_view::operator=(const rhs_proxy&)
-    reducer_opxor& operator=(const rhs_proxy& temp)
-    {
-        view() = temp; return *this;
-    }
-    ///@}
-
-    /** @name Dereference
-     *  @details Dereferencing a wrapper is a no-op. It simply returns the
-     *  wrapper. Combined with the rule that the wrapper forwards view
-     *  operations to its contained view, this means that view operations can
-     *  be written the same way on reducers and wrappers, which is convenient
-     *  for incrementally converting old code using wrappers to use reducers
-     *  instead. That is:
-     *
-     *      reducer< op_and<int> > r;
-     *      *r &= a;    // *r returns the view
-     *                  // operator &= is a view member function
-     *
-     *      reducer_opand<int> w;
-     *      *w &= a;    // *w returns the wrapper
-     *                  // operator &= is a wrapper member function that
-     *                  // calls the corresponding view function
-     */
-    ///@{
-    reducer_opxor&       operator*()       { return *this; }
-    reducer_opxor const& operator*() const { return *this; }
-
-    reducer_opxor*       operator->()       { return this; }
-    reducer_opxor const* operator->() const { return this; }
-    ///@}
-
-    /** @name Upcast
-     *  @details In Intel Cilk Plus library 0.9, reducers were always cache-aligned.
-     *  In library 1.0, reducer cache alignment is optional. By default,
-     *  reducers are unaligned (i.e., just naturally aligned), but legacy
-     *  wrappers inherit from cache-aligned reducers for binary compatibility.
-     *
-     *  This means that a wrapper will automatically be upcast to its aligned
-     *  reducer base class. The following conversion operators provide
-     *  pseudo-upcasts to the corresponding unaligned reducer class.
-     */
-    ///@{
-    operator reducer< op_xor<Type, false> >& ()
-    {
-        return *reinterpret_cast< reducer< op_xor<Type, false> >* >(this);
-    }
-    operator const reducer< op_xor<Type, false> >& () const
-    {
-        return *reinterpret_cast< const reducer< op_xor<Type, false> >* >(this);
-    }
-    ///@}
-
-};
-
-/// @cond internal
-/** Metafunction specialization for reducer conversion.
- *
- *  This specialization of the @ref legacy_reducer_downcast template class
- *  defined in reducer.h causes the `reducer< op_xor<Type> >` class to have an
- *  `operator reducer_opxor<Type>& ()` conversion operator that statically
- *  downcasts the `reducer<op_xor>` to the corresponding `reducer_opxor` type.
- *  (The reverse conversion, from `reducer_opxor` to `reducer<op_xor>`, is just
- *  an upcast, which is provided for free by the language.)
- *
- *  @ingroup ReducersXor
- */
-template <typename Type, bool Align>
-struct legacy_reducer_downcast<reducer<op_xor<Type, Align> > >
-{
-    typedef reducer_opxor<Type> type;
-};
-/// @endcond
-
-} // namespace cilk
-
-#endif /* __cplusplus */
-
-
-/** @ingroup ReducersXor
- */
-///@{
-
-/** @name C language reducer macros
- *
- *  These macros are used to declare and work with op_xor reducers in C code.
- *
- *  @see @ref page_reducers_in_c
- */
- ///@{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/** Declares OPXOR reducer type name.
- *
- *  This macro expands into the identifier which is the name of the op_xor
- *  reducer type for a specified numeric type.
- *
- *  @param  tn  The @ref reducers_c_type_names "numeric type name" specifying
- *              the type of the reducer.
- *
- *  @see @ref reducers_c_predefined
- *  @see ReducersXor
- */
-#define CILK_C_REDUCER_OPXOR_TYPE(tn)                                         \
-    __CILKRTS_MKIDENT(cilk_c_reducer_opxor_,tn)
-
-/** Declares an op_xor reducer object.
- *
- *  This macro expands into a declaration of an op_xor reducer object for a
- *  specified numeric type. For example:
- *
- *      CILK_C_REDUCER_OPXOR(my_reducer, ulong, 0);
- *
- *  @param  obj The variable name to be used for the declared reducer object.
- *  @param  tn  The @ref reducers_c_type_names "numeric type name" specifying
- *              the type of the reducer.
- *  @param  v   The initial value for the reducer. (A value which can be
- *              assigned to the numeric type represented by @a tn.)
- *
- *  @see @ref reducers_c_predefined
- *  @see ReducersXor
- */
-#define CILK_C_REDUCER_OPXOR(obj,tn,v)                                        \
-    CILK_C_REDUCER_OPXOR_TYPE(tn) obj =                                       \
-        CILK_C_INIT_REDUCER(_Typeof(obj.value),                               \
-                        __CILKRTS_MKIDENT(cilk_c_reducer_opxor_reduce_,tn),   \
-                        __CILKRTS_MKIDENT(cilk_c_reducer_opxor_identity_,tn), \
-                        0, v)
-
-/// @cond internal
-
-/** Declares the op_xor reducer functions for a numeric type.
- *
- *  This macro expands into external function declarations for functions which
- *  implement the reducer functionality for the op_xor reducer type for a
- *  specified numeric type.
- *
- *  @param  t   The value type of the reducer.
- *  @param  tn  The value "type name" identifier, used to construct the reducer
- *              type name, function names, etc.
- */
-#define CILK_C_REDUCER_OPXOR_DECLARATION(t,tn)                             \
-    typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPXOR_TYPE(tn);       \
-    __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opxor,tn,l,r);         \
-    __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opxor,tn);
-
-/** Defines the op_xor reducer functions for a numeric type.
- *
- *  This macro expands into function definitions for functions which implement
- *  the reducer functionality for the op_xor reducer type for a specified
- *  numeric type.
- *
- *  @param  t   The value type of the reducer.
- *  @param  tn  The value "type name" identifier, used to construct the reducer
- *              type name, function names, etc.
- */
-#define CILK_C_REDUCER_OPXOR_DEFINITION(t,tn)                              \
-    typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPXOR_TYPE(tn);       \
-    __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opxor,tn,l,r)          \
-        { *(t*)l ^= *(t*)r; }                                              \
-    __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opxor,tn)            \
-        { *(t*)v = 0; }
-
-///@{
-/** @def CILK_C_REDUCER_OPXOR_INSTANCE
- *  @brief Declares or defines implementation functions for a reducer type.
- *
- *  In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS`
- *  will be defined, and this macro will generate reducer implementation
- *  functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` will be undefined, and
- *  this macro will expand into external declarations for the functions.
- */
-#ifdef CILK_C_DEFINE_REDUCERS
-#   define CILK_C_REDUCER_OPXOR_INSTANCE(t,tn)  \
-        CILK_C_REDUCER_OPXOR_DEFINITION(t,tn)
-#else
-#   define CILK_C_REDUCER_OPXOR_INSTANCE(t,tn)  \
-        CILK_C_REDUCER_OPXOR_DECLARATION(t,tn)
-#endif
-///@}
-
-/*  Declares or defines an instance of the reducer type and its functions for each
- *  numeric type.
- */
-CILK_C_REDUCER_OPXOR_INSTANCE(char,                 char)
-CILK_C_REDUCER_OPXOR_INSTANCE(unsigned char,        uchar)
-CILK_C_REDUCER_OPXOR_INSTANCE(signed char,          schar)
-CILK_C_REDUCER_OPXOR_INSTANCE(wchar_t,              wchar_t)
-CILK_C_REDUCER_OPXOR_INSTANCE(short,                short)
-CILK_C_REDUCER_OPXOR_INSTANCE(unsigned short,       ushort)
-CILK_C_REDUCER_OPXOR_INSTANCE(int,                  int)
-CILK_C_REDUCER_OPXOR_INSTANCE(unsigned int,         uint)
-CILK_C_REDUCER_OPXOR_INSTANCE(unsigned int,         unsigned) /* alternate name */
-CILK_C_REDUCER_OPXOR_INSTANCE(long,                 long)
-CILK_C_REDUCER_OPXOR_INSTANCE(unsigned long,        ulong)
-CILK_C_REDUCER_OPXOR_INSTANCE(long long,            longlong)
-CILK_C_REDUCER_OPXOR_INSTANCE(unsigned long long,   ulonglong)
-
-//@endcond
-
-#ifdef __cplusplus
-} /* end extern "C" */
-#endif
-
-///@}
-
-///@}
-
-#endif /*  REDUCER_OPXOR_H_INCLUDED */
diff --git a/include/cilk/reducer_ostream.h b/include/cilk/reducer_ostream.h
deleted file mode 100644
index b839ea45..00000000
--- a/include/cilk/reducer_ostream.h
+++ /dev/null
@@ -1,496 +0,0 @@
-/*  reducer_ostream.h                  -*- C++ -*-
- *
- *  Copyright (C) 2009-2018, Intel Corporation
- *  All rights reserved.
- *  
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *  
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *  
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- *  OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- *  AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
- *  WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *  POSSIBILITY OF SUCH DAMAGE.
- *  
- *  *********************************************************************
- *  
- *  PLEASE NOTE: This file is a downstream copy of a file maintained in
- *  a repository at cilkplus.org. Changes made to this file that are not
- *  submitted through the contribution process detailed at
- *  http://www.cilkplus.org/submit-cilk-contribution will be lost the next
- *  time that a new version is released. Changes only submitted to the
- *  GNU compiler collection or posted to the git repository at
- *  https://bitbucket.org/intelcilkruntime/intel-cilk-runtime are
- *  not tracked.
- *  
- *  We welcome your contributions to this open source project. Thank you
- *  for your assistance in helping us improve Cilk Plus.
- */
-
-/** @file reducer_ostream.h
- *
- *  @brief Defines a class for writing to an ostream in parallel.
- *
- *  @ingroup ReducersOstream
- *
- *  @see @ref ReducersOstream
- */
-
-#ifndef REDUCER_OSTREAM_H_INCLUDED
-#define REDUCER_OSTREAM_H_INCLUDED
-
-#include <cilk/reducer.h>
-#include <ostream>
-#include <sstream>
-
-/** @defgroup ReducersOstream Ostream Reducers
- *
- *  Ostream reducers allow multiple strands to write to an ostream in parallel.
- *
- *  @ingroup Reducers
- *
- *  You should be familiar with @ref pagereducers "Intel(R) Cilk(TM) Plus reducers",
- *  described in file reducers.md, and particularly with @ref reducers_using,
- *  before trying to use the information in this file.
- *
- *  @section redostream_usage Usage Example
- *
- *  One of the most common debugging techniques is adding `print` statements
- *  to the code being debugged. When the code is parallelized, the results can
- *  be less than satisfactory, as output from multiple strands is mingled in an
- *  unpredictable way. Like other reducers, an ostream reducer requires minimal
- *  recoding to guarantee that the output from parallelized computation will be
- *  ordered the same as though the computation were executed serially.
- *
- *      cilk::reducer<cilk::op_ostream> r(std::cerr);
- *      cilk_for (int i = 0; i != data.size(); ++i) {
- *          *r << "Iteration " << i << ":\n";
- *          ... some computation ...
- *          *r << "   Step 1:" << some information;
- *          ... some more computation ...
- *          *r << "   Step 2:" << some more information;
- *          ... still more computation ...
- *          *r << "   Step 3:" << still more information;
- *      }
- *
- *  Output on standard error:
- *
- *      Iteration 1:
- *          Step 1: ...
- *          Step 2: ...
- *          Step 3: ...
- *      Iteration 2:
- *          Step 1: ...
- *          Step 2: ...
- *          Step 3: ...
- *      Iteration 3:
- *          Step 1: ...
- *          Step 2: ...
- *          Step 3: ...
- *      ...
- *
- *  @section redostream_overview Overview
- *
- *  An "ostream reducer" is not really a reducer. It uses the reducer
- *  technology to coordinate operations on parallel strands to achieve
- *  the same behavior in a parallel computation that would be seen in a
- *  serial computation, but it does not have a monoid. It has a "monoid
- *  class," because that is part of the implementation framework, but it
- *  does not represent a mathematical monoid: there is no value type, no
- *  associative operation, and no identity value. The reducer is used for
- *  its side effect rather than to construct a value.
- *
- *  You might think of an ostream reducer as a relative of a
- *  @ref ReducersString "string reducer" which uses stream output
- *  syntax (`stream << value`) instead of string append syntax
- *  (`string += value`), and which writes its result string to an
- *  ostream instead of making it available as the reducer value.
- *
- *  Another difference is that "real" reducers protect their contained
- *  value quite strongly from improper access by the user. Ostream reducers,
- *  on the other hand, pretty much have to expose the ostream, since normal
- *  use of an ostream involves accessing its internal state. Furthermore,
- *  the ostream reducer just coordinates output to an existing ostream -
- *  there is nothing to keep the user from writing directly to the attached
- *  stream, with unpredictable results.
- *
- *  @section redostream_operations Operations
- *
- *  In the operation descriptions below, the type name `Ostream` refers to the
- *  reducer's ostream type, `std::basic_ostream<Char, Traits>`.
- *
- *  @subsection redostream_constructors Constructors
- *
- *  The only constructor is
- *
- *      reducer(const Ostream& os)
- *
- *  This creates a reducer that is associated with the existing ostream `os`.
- *  Anything "written to" the reducer will (eventually) be written to `os`.
- *
- *  @subsection redostream_get_set Set and Get
- *
- *  Just as a stream does not have a "value," neither does an ostream
- *  reducer. Therefore, none of the usual `set_value`, `get_value`,
- *  `move_in`, or `move_out` functions are available for ostream reducers.
- *
- *  @subsection redostream_initial Initial Values
- *
- *  Ostream reducers do not have default constructors.
- *
- *  @subsection redostream_view_ops View Operations
- *
- *  An ostream reducer view is actually a kind of `std::ostream`. Therefore,
- *  any operation that can be used on an ostream can be used on an ostream
- *  reducer view. For example:
- *
- *      reducer<op_ostream> r(cout);
- *      *r << setw(5) << (x=1) << endl;
- *
- *
- *  @section redostream_performance Performance Considerations
- *
- *  Ostream reducers work by creating a string stream for each non-leftmost
- *  view. When two strands are merged, the contents of the string buffer of the
- *  right view are written to the left view. Since all non-leftmost strands are
- *  eventually merged, all output is eventually written to the associated
- *  ostream.
- *
- *  This implementation has two consequences.
- *
- *  First, all output written to an ostream reducer on a stolen strand is kept
- *  in memory (in a string buffer) until the strand is merged with the leftmost
- *  strand. This means that some portion of the output written to an ostream
- *  reducer during a parallel computation - half of the total output, on
- *  average - will temporarily be held in memory during the computation.
- *  Obviously, ostream reducers will work better for small and moderate amounts
- *  of output.
- *
- *  Second, buffered ostream reducer content must be copied at every merge.
- *  The total amount of copying is potentially proportional to the total amount
- *  of output multiplied by the number of strands stolen during the computation.
- *
- *  In short, writing to an ostream in a parallel computation with an ostream
- *  reducer will always be less efficient than writing the same output directly
- *  to the ostream in a serial computation. The value of the ostream
- *  reducer is not in the writing of the ostream itself, but in removing the
- *  race and serialization obstacles that the ostream output would cause in an
- *  otherwise parallelizable computation.
- *
- *
- *  @section redostream_state Stream State
- *
- *  The reducer implementation can correctly order the output that is written
- *  to an ostream. However, an ostream has additional state that controls its
- *  behavior, such as its formatting attributes, error state, extensible arrays, *  and registered callbacks. If these are modified during the computation, the *  reducer implementation cannot guarantee that they will be the same in a
- *  parallel computation as in a serial computation. In particular:
- *
- *  -   In the serial execution, the ostream state in the continuation of a
- *      spawn will be the same as the state at the end of the spawned function.
- *      In the parallel execution, if the continuation is stolen, its view will
- *      contain a newly created ostream with the default initial state.
- *  -   In the serial execution, the ostream state following a sync is the same
- *      as the state before the sync. In the parallel execution, if the
- *      continuation is stolen, then the state following the sync will be the
- *      same as the state at the end of some spawned function.
- *
- *  In short, you must not make any assumptions about the stream state of an
- *  ostream reducer:
- *
- *  -   Following a `cilk_spawn`.
- *  -   Following a `cilk_sync`.
- *  -   At the start of an iteration of a `cilk_for` loop.
- *  -   Following the completion of a `cilk_for` loop.
- *
- *  @section redostream_types Type and Operator Requirements
- *
- *  `std::basic_ostream<Char, Traits>` must be a valid type.
-*/
-
-namespace cilk {
-
-/** @ingroup ReducersOstream */
-//@{
-
-/** The ostream reducer view class.
- *
- *  This is the view class for reducers created with
- *  `cilk::reducer< cilk::op_basic_ostream<Char, Traits> >`. It holds the
- *  actual ostream for a parallel strand, and allows only stream output
- *  operations to be performed on it.
- *
- *  @note   The reducer "dereference" operation (`reducer::operator *()`)
- *          yields a reference to the view. Thus, for example, the view
- *          class's `<<` operation would be used in an expression like
- *          `*r << "x = " << x`, where `r` is an ostream reducer.
- *
- *  @tparam Char        The ostream element type (not the ostream type).
- *  @tparam Traits      The character traits type.
- *
- *  @see ReducersOstream
- *  @see op_basic_ostream
- */
-template<typename Char, typename Traits>
-class op_basic_ostream_view : public std::basic_ostream<Char, Traits>
-{
-    typedef std::basic_ostream<Char, Traits>  base;
-    typedef std::basic_ostream<Char, Traits>  ostream_type;
-
-    // A non-leftmost view is associated with a private string buffer. (The
-    // leftmost view is associated with the buffer of the reducer's associated
-    // ostream, so its private buffer is unused.)
-    //
-    std::basic_stringbuf<Char, Traits> m_buffer;
-
-public:
-
-    /** Value type. Required by @ref monoid_with_view.
-     */
-    typedef ostream_type value_type;
-
-    /** Reduce operation. Required by @ref monoid_with_view.
-     */
-    void reduce(op_basic_ostream_view* other)
-    {
-        // Writing an empty buffer results in failure. Testing `sgetc()` is the
-        // easiest way of checking for an empty buffer.
-        if (other->m_buffer.sgetc() != Traits::eof()) {
-            *this << (&other->m_buffer);
-        }
-    }
-
-    /** Non-leftmost (identity) view constructor. The view is associated with
-     *  its internal buffer. Required by @ref monoid_base.
-     */
-    op_basic_ostream_view() : base(&m_buffer) {}
-
-    /** Leftmost view constructor. The view is associated with an existing
-     *  ostream.
-     */
-    op_basic_ostream_view(const ostream_type& os) : base(0)
-    {
-        base::rdbuf(os.rdbuf());       // Copy stream buffer
-        base::flags(os.flags());       // Copy formatting flags
-        base::setstate(os.rdstate());  // Copy error state
-    }
-
-    /** Sets/gets.
-     *
-     *  These are all no-ops.
-     */
-    //@{
-
-    void view_set_value(const value_type&)
-        { assert("set_value() is not allowed on ostream reducers" && 0); }
-    const value_type& view_get_value() const
-        { assert("get_value() is not allowed on ostream reducers" && 0);
-          return *this; }
-    typedef value_type const& return_type_for_get_value;
-    void view_move_in(const value_type&)
-        { assert("move_in() is not allowed on ostream reducers" && 0); }
-    void view_move_out(const value_type&)
-        { assert("move_out() is not allowed on ostream reducers" && 0); }
-
-    //@}
-};
-
-/** Ostream monoid class. Instantiate the cilk::reducer template class with an
- *  op_basic_ostream monoid to create an ostream reducer class:
- *
- *      cilk::reducer< cilk::op_basic_string<char> > r;
- *
- *  @tparam Char        The stream element type (not the stream type).
- *  @tparam Traits      The character traits type.
- *
- *  @see ReducersOstream
- *  @see op_basic_ostream_view
- *  @see reducer_ostream
- *  @see op_ostream
- *  @see op_wostream
- */
-template<typename Char,
-         typename Traits = std::char_traits<Char>,
-         bool     Align = false>
-class op_basic_ostream :
-    public monoid_with_view< op_basic_ostream_view<Char, Traits>, Align >
-{
-    typedef monoid_with_view< op_basic_ostream_view<Char, Traits>, Align >
-            base;
-    typedef std::basic_ostream<Char, Traits>            ostream_type;
-    typedef provisional_guard<typename base::view_type> view_guard;
-
-public:
-
-    /** View type of the monoid.
-     */
-    typedef typename base::view_type view_type;
-
-    /** @name Construct function.
-     *
-     *  The only supported ostream reducer constructor takes a reference to
-     *  an existing ostream.
-     *
-     *  @param os   The ostream destination for receive all data written to the
-     *              reducer.
-     */
-    static void construct(
-        op_basic_ostream*   monoid,
-        view_type*          view,
-        const ostream_type& os)
-    {
-        view_guard vg( new((void*) view) view_type(os) );
-        vg.confirm_if( new((void*) monoid) op_basic_ostream );
-    }
-};
-
-
-/**
- *  Convenience typedef for narrow ostreams.
- */
-typedef op_basic_ostream<char> op_ostream;
-
-/**
- *  Convenience typedef for wide ostreams.
- */
-typedef op_basic_ostream<wchar_t> op_wostream;
-
-/// @cond internal
-
-class reducer_ostream;
-
-/** Metafunction specialization for reducer conversion.
- *
- *  This specialization of the @ref legacy_reducer_downcast template class
- *  defined in reducer.h causes the `reducer<op_basic_ostream<char> >` class
- *  to have an `operator reducer_ostream& ()` conversion operator that
- *  statically downcasts the `reducer<op_basic_ostream<char> >` to
- *  `reducer_ostream`. (The reverse conversion, from `reducer_ostream` to
- *  `reducer<op_basic_ostream<char> >`, is just an upcast, which is provided
- *  for free by the language.)
- */
-template<bool Align>
-struct legacy_reducer_downcast<
-    reducer<op_basic_ostream<char, std::char_traits<char>, Align> > >
-{
-    typedef reducer_ostream type;
-};
-
-/// @endcond
-
-/** Deprecated ostream reducer class.
- *
- *  reducer_ostream is the same as @ref cilk::reducer<@ref op_ostream>, except
- *  that reducer_ostream is a proxy for the contained view, so that ostream
- *  operations can be applied directly to the reducer. For example, a number is
- *  written to a `reducer<op_ostream>` with `*r << x`, but a number can be
- *  written to a `reducer_ostream` with `r << x`.
- *
- *  @deprecated Users are strongly encouraged to use `reducer<monoid>`
- *              reducers rather than the old wrappers like reducer_ostream. The
- *              `reducer<monoid>` reducers show the reducer/monoid/view
- *              architecture more clearly, are more consistent in their
- *              implementation, and present a simpler model for new
- *              user-implemented reducers.
- *
- *  @note   Implicit conversions are provided between `%reducer_ostream`
- *          and `reducer<%op_ostream>`. This allows incremental code
- *          conversion: old code that used  `%reducer_ostream` can pass a
- *          `%reducer_ostream` to a converted function that now expects a
- *          pointer or reference to a `reducer<%op_ostream>`, and vice versa.
- *
- *  @tparam Char        The stream element type (not the stream type).
- *  @tparam Traits      The character traits type.
- *
- *  @see op_ostream
- *  @see reducer
- *  @see ReducersOstream
- */
-class reducer_ostream :
-      public reducer<op_basic_ostream<char, std::char_traits<char>, true> >
-{
-    typedef reducer<op_basic_ostream<char, std::char_traits<char>, true> > base;
-    using base::view;
-public:
-
-    /// The view type for the reducer.
-    typedef base::view_type        View;
-
-    /// The monoid type for the reducer.
-    typedef base::monoid_type      Monoid;
-
-    /** Constructs an initial `reducer_ostream` from a `std::ostream`.  The
-     *  specified stream is used as the eventual destination for all text
-     *  streamed to this hyperobject.
-     */
-    explicit reducer_ostream(const std::ostream &os) : base(os) {}
-
-    /** Returns a modifiable reference to the underlying 'ostream' object.
-     */
-    std::ostream& get_reference() { return view(); }
-
-    /** Writes to the ostream.
-     */
-    template<typename T>
-    std::ostream& operator<< (const T &v)
-    {
-        return view() << v;
-    }
-
-    /**
-     * Calls a manipulator.
-     *
-     * @param _Pfn Pointer to the manipulator function.
-     */
-    reducer_ostream& operator<< (std::ostream &(*_Pfn)(std::ostream &))
-    {
-        (*_Pfn)(view());
-        return *this;
-    }
-
-    /** @name Dereference
-     *  @details Dereferencing a wrapper is a no-op. It simply returns the
-     *  wrapper. Combined with the rule that the wrapper forwards view
-     *  operations to its contained view, this means that view operations can
-     *  be written the same way on reducers and wrappers, which is convenient
-     *  for incrementally converting old code using wrappers to use reducers
-     *  instead. That is:
-     *
-     *      reducer<op_ostream> r;
-     *      *r << "a";      // *r returns the view
-     *                      // operator<<() is a view member function
-     *
-     *      reducer_ostream w;
-     *      *w << "a";      // *w returns the wrapper
-     *                      // operator<<() is a wrapper member function
-     *                      // that calls the corresponding view function
-     */
-    //@{
-    reducer_ostream&       operator*()       { return *this; }
-    reducer_ostream const& operator*() const { return *this; }
-
-    reducer_ostream*       operator->()       { return this; }
-    reducer_ostream const* operator->() const { return this; }
-    //@}
-};
-
-} // namespace cilk
-
-#endif // REDUCER_OSTREAM_H_INCLUDED
diff --git a/include/cilk/reducer_string.h b/include/cilk/reducer_string.h
deleted file mode 100644
index 376b0bc5..00000000
--- a/include/cilk/reducer_string.h
+++ /dev/null
@@ -1,763 +0,0 @@
-/*  reducer_string.h                  -*- C++ -*-
- *
- *  Copyright (C) 2009-2018, Intel Corporation
- *  All rights reserved.
- *  
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *  
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *  
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- *  OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- *  AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
- *  WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *  POSSIBILITY OF SUCH DAMAGE.
- *  
- *  *********************************************************************
- *  
- *  PLEASE NOTE: This file is a downstream copy of a file maintained in
- *  a repository at cilkplus.org. Changes made to this file that are not
- *  submitted through the contribution process detailed at
- *  http://www.cilkplus.org/submit-cilk-contribution will be lost the next
- *  time that a new version is released. Changes only submitted to the
- *  GNU compiler collection or posted to the git repository at
- *  https://bitbucket.org/intelcilkruntime/intel-cilk-runtime are
- *  not tracked.
- *  
- *  We welcome your contributions to this open source project. Thank you
- *  for your assistance in helping us improve Cilk Plus.
- */
-
-/** @file reducer_string.h
- *
- *  @brief Defines classes for doing parallel string creation by appending.
- *
- *  @ingroup ReducersString
- *
- *  @see ReducersString
- */
-
-#ifndef REDUCER_STRING_H_INCLUDED
-#define REDUCER_STRING_H_INCLUDED
-
-#include <cilk/reducer.h>
-#include <string>
-#include <list>
-
-/** @defgroup ReducersString String Reducers
- *
- *  String reducers allow the creation of a string by concatenating a set of
- *  strings or characters in parallel.
- *
- *  @ingroup Reducers
- *
- *  You should be familiar with @ref pagereducers "Intel(R) Cilk(TM) Plus reducers",
- *  described in file reducers.md, and particularly with @ref reducers_using,
- *  before trying to use the information in this file.
- *
- *  @section redstring_usage Usage Example
- *
- *      vector<Data> data;
- *      void expensive_string_computation(const Data& x, string& s);
- *      cilk::reducer<cilk::op_string> r;
- *      cilk_for (int i = 0; i != data.size(); ++i) {
- *          string temp;
- *          expensive_string_computation(data[i], temp);
- *          *r += temp;
- *      }
- *      string result;
- *      r.move_out(result);
- *
- *  @section redstring_monoid The Monoid
- *
- *  @subsection redstring_monoid_values Value Set
- *
- *  The value set of a string reducer is the set of values of the class
- *  `std::basic_string<Char, Traits, Alloc>`, which we refer to as "the
- *  reducer's string type".
- *
- *  @subsection redstring_monoid_operator Operator
- *
- *  The operator of a string reducer is the string concatenation operator,
- *  defined by the "`+`" binary operator on the reducer's string type.
- *
- *  @subsection redstring_monoid_identity Identity
- *
- *  The identity value of a string reducer is the empty string, which is the
- *  value of the expression
- *  `std::basic_string<Char, Traits, Alloc>([allocator])`.
- *
- *  @section redstring_operations Operations
- *
- *  In the operation descriptions below, the type name `String` refers to the
- *  reducer's string type, `std::basic_string<Char, Traits, Alloc>`.
- *
- *  @subsection redstring_constructors Constructors
- *
- *  Any argument list which is valid for a `std::basic_string` constructor is
- *  valid for a string reducer constructor. The usual move-in constructor is
- *  also provided:
- *
- *      reducer(move_in(String& variable))
- *
- *  @subsection redstring_get_set Set and Get
- *
- *      r.set_value(const String& value)
- *      const String& = r.get_value() const
- *      r.move_in(String& variable)
- *      r.move_out(String& variable)
- *
- *  @subsection redstring_initial Initial Values
- *
- *  A string reducer with no constructor arguments, or with only an allocator
- *  argument, will initially contain the identity value, an empty string.
- *
- *  @subsection redstring_view_ops View Operations
- *
- *      *r += a
- *      r->append(a)
- *      r->append(a, b)
- *      r->push_back(a)
- *
- *  These operations on string reducer views are the same as the corresponding
- *  operations on strings.
- *
- *  @section redstring_performance Performance Considerations
- *
- *  String reducers work by creating a string for each view, collecting those
- *  strings in a list, and then concatenating them into a single result string
- *  at the end of the computation. This last step takes place in serial code,
- *  and necessarily takes time proportional to the length of the result string.
- *  Thus, a parallel string reducer cannot actually speed up the time spent
- *  directly creating the string. This trivial example would probably be slower
- *  (because of reducer overhead) than the corresponding serial code:
- *
- *      vector<string> a;
- *      reducer<op_string> r;
- *      cilk_for (int i = 0; i != a.length(); ++i) {
- *          *r += a[i];
- *      }
- *      string result;
- *      r.move_out(result);
- *
- *  What a string reducer _can_ do is to allow the _remainder_ of the
- *  computation to be done in parallel, without having to worry about managing
- *  the string computation.
- *
- *  The strings for new views are created (by the view identity constructor)
- *  using the same allocator as the string that was created when the reducer
- *  was constructed. Note that this allocator is determined when the reducer is
- *  constructed. The following two examples may have very different behavior:
- *
- *      string<Char, Traits, Allocator> a_string;
- *
- *      reducer< op_string<Char, Traits, Allocator> reducer1(move_in(a_string));
- *      ... parallel computation ...
- *      reducer1.move_out(a_string);
- *
- *      reducer< op_string<Char, Traits, Allocator> reducer2;
- *      reducer2.move_in(a_string);
- *      ... parallel computation ...
- *      reducer2.move_out(a_string);
- *
- *  *   `reducer1` will be constructed with the same allocator as `a_string`,
- *      because the string was specified in the constructor. The `move_in`
- *      and `move_out` can therefore be done with a `swap` in constant time.
- *  *   `reducer2` will be constructed with a _default_ allocator of type
- *      `Allocator`, which may not be the same as the allocator of `a_string`.
- *      Therefore, the `move_in` and `move_out` may have to be done with a copy
- *      in _O(N)_ time.
- *
- *  (All instances of an allocator type with no internal state (like
- *  `std::allocator`) are "the same". You only need to worry about the "same
- *  allocator" issue when you create string reducers with custom allocator
- *  types.)
- *
- *  @section redstring_types Type and Operator Requirements
- *
- *  `std::basic_string<Char, Traits, Alloc>` must be a valid type.
-*/
-
-namespace cilk {
-
-/** @ingroup ReducersString */
-//@{
-
-/** The string append reducer view class.
- *
- *  This is the view class for reducers created with
- *  `cilk::reducer< cilk::op_basic_string<Char, Traits, Allocator> >`. It holds
- *  the accumulator variable for the reduction, and allows only append
- *  operations to be performed on it.
- *
- *  @note   The reducer "dereference" operation (`reducer::operator *()`)
- *          yields a reference to the view. Thus, for example, the view class's
- *          `append` operation would be used in an expression like
- *          `r->append(a)`, where `r` is a string append reducer variable.
- *
- *  @tparam Char        The string element type (not the string type).
- *  @tparam Traits      The character traits type.
- *  @tparam Alloc       The string allocator type.
- *
- *  @see ReducersString
- *  @see op_basic_string
- */
-template<typename Char, typename Traits, typename Alloc>
-class op_basic_string_view
-{
-    typedef std::basic_string<Char, Traits, Alloc>  string_type;
-    typedef std::list<string_type>                  list_type;
-    typedef typename string_type::size_type         size_type;
-
-    // The view's value is represented by a list of strings and a single
-    // string. The value is the concatenation of the strings in the list with
-    // the single string at the end. All string operations apply to the single
-    // string; reduce operations cause lists of partial strings from multiple
-    // strands to be combined.
-    //
-    mutable string_type                             m_string;
-    mutable list_type                               m_list;
-
-    // Before returning the value of the reducer, concatenate all the strings
-    // in the list with the single string.
-    //
-    void flatten() const
-    {
-        if (m_list.empty()) return;
-
-        typename list_type::iterator i;
-
-        size_type len = m_string.size();
-        for (i = m_list.begin(); i != m_list.end(); ++i)
-            len += i->size();
-
-        string_type result(get_allocator());
-        result.reserve(len);
-
-        for (i = m_list.begin(); i != m_list.end(); ++i)
-            result += *i;
-        m_list.clear();
-
-        result += m_string;
-        result.swap(m_string);
-    }
-
-public:
-
-    /** @name Monoid support.
-     */
-    //@{
-
-    /// Required by @ref monoid_with_view
-    typedef string_type value_type;
-
-    /// Required by @ref op_string
-    Alloc get_allocator() const
-    {
-        return m_string.get_allocator();
-    }
-
-    /** Reduces the views of two strands.
-     *
-     *  This function is invoked by the @ref op_basic_string monoid to combine
-     *  the views of two strands when the right strand merges with the left
-     *  one. It appends the value contained in the right-strand view to the
-     *  value contained in the left-strand view, and leaves the value in the
-     *  right-strand view undefined.
-     *
-     *  @param  right   A pointer to the right-strand view. (`this` points to
-     *                  the left-strand view.)
-     *
-     *  @note   Used only by the @ref op_basic_string monoid to implement the
-     *          monoid reduce operation.
-     */
-    void reduce(op_basic_string_view* right)
-    {
-        if (!right->m_string.empty() || !right->m_list.empty()) {
-            // (list, string) + (right_list, right_string) =>
-            //      (list + {string} + right_list, right_string)
-            if (!m_string.empty()) {
-                // simulate m_list.push_back(std::move(m_string))
-                m_list.push_back(string_type(get_allocator()));
-                m_list.back().swap(m_string);
-            }
-            m_list.splice(m_list.end(), right->m_list);
-            m_string.swap(right->m_string);
-        }
-    }
-
-    //@}
-
-    /** @name Passes constructor arguments to the string constructor.
-     */
-    //@{
-
-    op_basic_string_view() : m_string() {}
-
-    template <typename T1>
-    op_basic_string_view(const T1& x1) : m_string(x1) {}
-
-    template <typename T1, typename T2>
-    op_basic_string_view(const T1& x1, const T2& x2) : m_string(x1, x2) {}
-
-    template <typename T1, typename T2, typename T3>
-    op_basic_string_view(const T1& x1, const T2& x2, const T3& x3) : m_string(x1, x2, x3) {}
-
-    template <typename T1, typename T2, typename T3, typename T4>
-    op_basic_string_view(const T1& x1, const T2& x2, const T3& x3, const T4& x4) :
-        m_string(x1, x2, x3, x4) {}
-
-    //@}
-
-    /** Move-in constructor.
-     */
-    explicit op_basic_string_view(move_in_wrapper<value_type> w)
-        : m_string(w.value().get_allocator())
-    {
-        m_string.swap(w.value());
-    }
-
-    /** @name @ref reducer support.
-     */
-    //@{
-
-    void view_move_in(string_type& s)
-    {
-        m_list.clear();
-        if (m_string.get_allocator() == s.get_allocator())
-            // Equal allocators. Do a (fast) swap.
-            m_string.swap(s);
-        else
-            // Unequal allocators. Do a (slow) copy.
-            m_string = s;
-        s.clear();
-    }
-
-    void view_move_out(string_type& s)
-    {
-        flatten();
-        if (m_string.get_allocator() == s.get_allocator())
-            // Equal allocators.  Do a (fast) swap.
-            m_string.swap(s);
-        else
-            // Unequal allocators.  Do a (slow) copy.
-            s = m_string;
-        m_string.clear();
-    }
-
-    void view_set_value(const string_type& s)
-        { m_list.clear(); m_string = s; }
-
-    string_type const& view_get_value()     const
-        { flatten(); return m_string; }
-
-    typedef string_type const& return_type_for_get_value;
-
-    string_type      & view_get_reference()
-        { flatten(); return m_string; }
-
-    string_type const& view_get_reference() const
-        { flatten(); return m_string; }
-
-    //@}
-
-    /** @name View modifier operations.
-     *
-     *  @details These simply wrap the corresponding operations on the underlying string.
-     */
-    //@{
-
-    template <typename T>
-    op_basic_string_view& operator +=(const T& x)
-        { m_string += x; return *this; }
-
-    template <typename T1>
-    op_basic_string_view& append(const T1& x1)
-        { m_string.append(x1); return *this; }
-
-    template <typename T1, typename T2>
-    op_basic_string_view& append(const T1& x1, const T2& x2)
-        { m_string.append(x1, x2); return *this; }
-
-    template <typename T1, typename T2, typename T3>
-    op_basic_string_view& append(const T1& x1, const T2& x2, const T3& x3)
-        { m_string.append(x1, x2, x3); return *this; }
-
-    void push_back(const Char x) { m_string.push_back(x); }
-
-    //@}
-};
-
-
-/** String append monoid class. Instantiate the cilk::reducer template class
- *  with an op_basic_string monoid to create a string append reducer class. For
- *  example, to concatenate a collection of standard strings:
- *
- *      cilk::reducer< cilk::op_basic_string<char> > r;
- *
- *  @tparam Char    The string element type (not the string type).
- *  @tparam Traits  The character traits type.
- *  @tparam Alloc   The string allocator type.
- *  @tparam Align   If `false` (the default), reducers instantiated on this
- *                  monoid will be naturally aligned (the Intel Cilk Plus library 1.0
- *                  behavior). If `true`, reducers instantiated on this monoid
- *                  will be cache-aligned for binary compatibility with
- *                  reducers in Intel Cilk Plus library version 0.9.
- *
- *  @see ReducersString
- *  @see op_basic_string_view
- *  @see reducer_basic_string
- *  @see op_string
- *  @see op_wstring
- */
-template<typename Char,
-         typename Traits = std::char_traits<Char>,
-         typename Alloc = std::allocator<Char>,
-         bool     Align = false>
-class op_basic_string :
-    public monoid_with_view< op_basic_string_view<Char, Traits, Alloc>, Align >
-{
-    typedef monoid_with_view< op_basic_string_view<Char, Traits, Alloc>, Align >
-            base;
-    typedef provisional_guard<typename base::view_type> view_guard;
-
-    Alloc m_allocator;
-
-public:
-
-    /** View type of the monoid.
-     */
-    typedef typename base::view_type view_type;
-
-    /** Constructor.
-     *
-     *  There is no default constructor for string monoids, because the
-     *  allocator must always be specified.
-     *
-     *  @param  allocator   The list allocator to be used when
-     *                      identity-constructing new views.
-     */
-    op_basic_string(const Alloc& allocator = Alloc()) : m_allocator(allocator)
-    {}
-
-    /** Creates an identity view.
-     *
-     *  String view identity constructors take the string allocator as an
-     *  argument.
-     *
-     *  @param v    The address of the uninitialized memory in which the view
-     *              will be constructed.
-     */
-    void identity(view_type *v) const
-        { ::new((void*) v) view_type(m_allocator); }
-
-    /** @name Construct functions
-     *
-     *  A string append reduction monoid must have a copy of the allocator of
-     *  the leftmost view's string, so that it can use it in the `identity`
-     *  operation. This, in turn, requires that string reduction monoids have a
-     *  specialized `construct()` function.
-     *
-     *  All string reducer monoid `construct()` functions first construct the
-     *  leftmost view, using the arguments that were passed in from the reducer
-     *  constructor. They then call the view's `get_allocator()` function to
-     *  get the string allocator from the string in the leftmost view, and pass
-     *  that to the monoid constructor.
-     */
-    //@{
-
-    static void construct(op_basic_string* monoid, view_type* view)
-    {
-        view_guard vg( new((void*) view) view_type() );
-        vg.confirm_if(
-            new((void*) monoid) op_basic_string(view->get_allocator()) ); 
-    }
-
-    template <typename T1>
-    static void construct(op_basic_string* monoid, view_type* view,
-                          const T1& x1)
-    {
-        view_guard vg( new((void*) view) view_type(x1) );
-        vg.confirm_if(
-            new((void*) monoid) op_basic_string(view->get_allocator()) ); 
-    }
-
-    template <typename T1, typename T2>
-    static void construct(op_basic_string* monoid, view_type* view,
-                          const T1& x1, const T2& x2)
-    {
-        view_guard vg( new((void*) view) view_type(x1, x2) );
-        vg.confirm_if(
-            new((void*) monoid) op_basic_string(view->get_allocator()) ); 
-    }
-
-    template <typename T1, typename T2, typename T3>
-    static void construct(op_basic_string* monoid, view_type* view,
-                          const T1& x1, const T2& x2, const T3& x3)
-    {
-        view_guard vg( new((void*) view) view_type(x1, x2, x3) );
-        vg.confirm_if(
-            new((void*) monoid) op_basic_string(view->get_allocator()) ); 
-    }
-
-    template <typename T1, typename T2, typename T3, typename T4>
-    static void construct(op_basic_string* monoid, view_type* view,
-                          const T1& x1, const T2& x2, const T3& x3,
-                          const T4& x4)
-    {
-        view_guard vg( new((void*) view) view_type(x1, x2, x3, x4) );
-        vg.confirm_if(
-            new((void*) monoid) op_basic_string(view->get_allocator()) ); 
-    }
-
-    //@}
-};
-
-
-/** Convenience typedef for 8-bit strings
- */
-typedef op_basic_string<char> op_string;
-
-/** Convenience typedef for 16-bit strings
- */
-typedef op_basic_string<wchar_t> op_wstring;
-
-
-/** Deprecated string append reducer class.
- *
- *  reducer_basic_string is the same as @ref reducer<@ref op_basic_string>,
- *  except that reducer_basic_string is a proxy for the contained view, so that
- *  accumulator variable update operations can be applied directly to the
- *  reducer. For example, a value is appended to a `reducer<%op_basic_string>`
- *  with `r->push_back(a)`, but a value can be appended to  a `%reducer_opand`
- *  with `r.push_back(a)`.
- *
- *  @deprecated Users are strongly encouraged to use `reducer<monoid>`
- *              reducers rather than the old wrappers like reducer_basic_string.
- *              The `reducer<monoid>` reducers show the reducer/monoid/view
- *              architecture more clearly, are more consistent in their
- *              implementation, and present a simpler model for new
- *              user-implemented reducers.
- *
- *  @note   Implicit conversions are provided between `%reducer_basic_string`
- *          and `reducer<%op_basic_string>`. This allows incremental code
- *          conversion: old code that used `%reducer_basic_string` can pass a
- *          `%reducer_basic_string` to a converted function that now expects a
- *          pointer or reference to a `reducer<%op_basic_string>`, and vice
- *          versa.
- *
- *  @tparam Char        The string element type (not the string type).
- *  @tparam Traits      The character traits type.
- *  @tparam Alloc       The string allocator type.
- *
- *  @see op_basic_string
- *  @see reducer
- *  @see ReducersString
- */
-template<typename Char,
-         typename Traits = std::char_traits<Char>,
-         typename Alloc = std::allocator<Char> >
-class reducer_basic_string :
-    public reducer< op_basic_string<Char, Traits, Alloc, true> >
-{
-    typedef reducer< op_basic_string<Char, Traits, Alloc, true> > base;
-    using base::view;
-public:
-
-    /// The reducer's string type.
-    typedef typename base::value_type       string_type;
-
-    /// The reducer's primitive component type.
-    typedef Char                            basic_value_type;
-
-    /// The string size type.
-    typedef typename string_type::size_type size_type;
-
-    /// The view type for the reducer.
-    typedef typename base::view_type        View;
-
-    /// The monoid type for the reducer.
-    typedef typename base::monoid_type      Monoid;
-
-
-    /** @name Constructors
-     */
-    //@{
-
-    /** @name Forward constructor calls to the base class.
-     *
-     *  All basic_string constructor forms are supported.
-     */
-    //@{
-    reducer_basic_string() {}
-
-    template <typename T1>
-    reducer_basic_string(const T1& x1) :
-        base(x1) {}
-
-    template <typename T1, typename T2>
-    reducer_basic_string(const T1& x1, const T2& x2) :
-        base(x1, x2) {}
-
-    template <typename T1, typename T2, typename T3>
-    reducer_basic_string(const T1& x1, const T2& x2, const T3& x3) :
-        base(x1, x2, x3) {}
-
-    template <typename T1, typename T2, typename T3, typename T4>
-    reducer_basic_string(const T1& x1, const T2& x2, const T3& x3, const T4& x4) :
-        base(x1, x2, x3, x4) {}
-    //@}
-
-    /** Allows mutable access to the string within the current view.
-     *
-     *  @warning    If this method is called before the parallel calculation is
-     *              complete, the string returned by this method will be a
-     *              partial result.
-     *
-     *  @returns    A mutable reference to the string within the current view.
-     */
-    string_type &get_reference()
-        { return view().view_get_reference(); }
-
-    /** Allows read-only access to the string within the current view.
-     *
-     *  @warning    If this method is called before the parallel calculation is
-     *              complete, the string returned by this method will be a
-     *              partial result.
-     *
-     *  @returns    A const reference to the string within the current view.
-     */
-    string_type const &get_reference() const
-        { return view().view_get_reference(); }
-
-    /** @name Appends to the string.
-     *
-     *  These operations are simply forwarded to the view.
-     */
-    //@{
-    void append(const Char *ptr)
-        { view().append(ptr); }
-    void append(const Char *ptr, size_type count)
-        { view().append(ptr, count); }
-    void append(const string_type &str, size_type offset, size_type count)
-        { view().append(str, offset, count); }
-    void append(const string_type &str)
-        { view().append(str); }
-    void append(size_type count, Char ch)
-        { view().append(count, ch); }
-
-    // Appends to the string
-    reducer_basic_string<Char, Traits, Alloc> &operator+=(Char ch)
-        { view() += ch; return *this; }
-    reducer_basic_string<Char, Traits, Alloc> &operator+=(const Char *ptr)
-        { view() += ptr; return *this; }
-    reducer_basic_string<Char, Traits, Alloc> &operator+=(const string_type &right)
-        { view() += right; return *this; }
-    //@}
-
-    /** @name Dereference
-     *  @details Dereferencing a wrapper is a no-op. It simply returns the
-     *  wrapper. Combined with the rule that the wrapper forwards view
-     *  operations to its contained view, this means that view operations can
-     *  be written the same way on reducers and wrappers, which is convenient
-     *  for incrementally converting old code using wrappers to use reducers
-     *  instead. That is:
-     *
-     *      reducer<op_string> r;
-     *      r->push_back(a);    // r-> returns the view
-     *                          // push_back() is a view member function
-     *
-     *      reducer_string w;
-     *      w->push_back(a);    // *w returns the wrapper
-     *                          // push_back() is a wrapper member function
-     *                          // that calls the corresponding view function
-     */
-    //@{
-    reducer_basic_string&       operator*()       { return *this; }
-    reducer_basic_string const& operator*() const { return *this; }
-
-    reducer_basic_string*       operator->()       { return this; }
-    reducer_basic_string const* operator->() const { return this; }
-    //@}
-
-    /** @name Upcast
-     *  @details In Intel Cilk Plus library 0.9, reducers were always cache-aligned.
-     *  In library 1.0, reducer cache alignment is optional. By default,
-     *  reducers are unaligned (i.e., just naturally aligned), but legacy
-     *  wrappers inherit from cache-aligned reducers for binary compatibility.
-     *
-     *  This means that a wrapper will automatically be upcast to its aligned
-     *  reducer base class. The following conversion operators provide
-     *  pseudo-upcasts to the corresponding unaligned reducer class.
-     */
-    //@{
-    operator reducer< op_basic_string<Char, Traits, Alloc, false> >& ()
-    {
-        return *reinterpret_cast< reducer<
-            op_basic_string<Char, Traits, Alloc, false> >*
-        >(this);
-    }
-    operator const reducer< op_basic_string<Char, Traits, Alloc, false> >& () const
-    {
-        return *reinterpret_cast< const reducer<
-            op_basic_string<Char, Traits, Alloc, false> >*
-        >(this);
-    }
-    //@}
-};
-
-
-/** Convenience typedef for 8-bit strings
- */
-typedef reducer_basic_string<char> reducer_string;
-
-/** Convenience typedef for 16-bit strings
- */
-typedef reducer_basic_string<wchar_t> reducer_wstring;
-
-/// @cond internal
-
-/// @cond internal
-/** Metafunction specialization for reducer conversion.
- *
- *  This specialization of the @ref legacy_reducer_downcast template class
- *  defined in reducer.h causes the `reducer< op_basic_string<Char> >` class to
- *  have an `operator reducer_basic_string<Char>& ()` conversion operator that
- *  statically downcasts the `reducer<op_basic_string>` to the corresponding
- *  `reducer_basic_string` type. (The reverse conversion, from
- *  `reducer_basic_string` to `reducer<op_basic_string>`, is just an upcast,
- *  which is provided for free by the language.)
- *
- *  @ingroup ReducersString
- */
-template<typename Char, typename Traits, typename Alloc, bool Align>
-struct legacy_reducer_downcast<
-    reducer<op_basic_string<Char, Traits, Alloc, Align> > >
-{
-    typedef reducer_basic_string<Char, Traits, Alloc> type;
-};
-
-/// @endcond
-
-//@}
-
-} // namespace cilk
-
-#endif //  REDUCER_STRING_H_INCLUDED
diff --git a/include/cilk/reducer_vector.h b/include/cilk/reducer_vector.h
deleted file mode 100644
index a5f00419..00000000
--- a/include/cilk/reducer_vector.h
+++ /dev/null
@@ -1,533 +0,0 @@
-/*  reducer_vector.h                  -*- C++ -*-
- *
- *  Copyright (C) 2009-2018, Intel Corporation
- *  All rights reserved.
- *  
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *  
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *  
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- *  OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- *  AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
- *  WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *  POSSIBILITY OF SUCH DAMAGE.
- *  
- *  *********************************************************************
- *  
- *  PLEASE NOTE: This file is a downstream copy of a file maintained in
- *  a repository at cilkplus.org. Changes made to this file that are not
- *  submitted through the contribution process detailed at
- *  http://www.cilkplus.org/submit-cilk-contribution will be lost the next
- *  time that a new version is released. Changes only submitted to the
- *  GNU compiler collection or posted to the git repository at
- *  https://bitbucket.org/intelcilkruntime/intel-cilk-runtime are
- *  not tracked.
- *  
- *  We welcome your contributions to this open source project. Thank you
- *  for your assistance in helping us improve Cilk Plus.
- */
-
-/** @file reducer_vector.h
- *
- *  @brief Defines classes for doing parallel vector creation by appending.
- *
- *  @ingroup ReducersVector
- *
- *  @see ReducersVector
- */
-
-#ifndef REDUCER_VECTOR_H_INCLUDED
-#define REDUCER_VECTOR_H_INCLUDED
-
-#include <cilk/reducer.h>
-#include <vector>
-#include <list>
-
-/** @defgroup ReducersVector Vector Reducers
- *
- *  Vector reducers allow the creation of a standard vector by
- *  appending a set of elements in parallel.
- *
- *  @ingroup Reducers
- *
- *  You should be familiar with @ref pagereducers "Intel(R) Cilk(TM) Plus reducers",
- *  described in file `reducers.md`, and particularly with @ref reducers_using,
- *  before trying to use the information in this file.
- *
- *  @section redvector_usage Usage Example
- *
- *      typedef ... SourceData;
- *      typedef ... ResultData;
- *      vector<SourceData> input;
- *      ResultData expensive_computation(const SourceData& x);
- *      cilk::reducer< cilk::op_vector<ResultData> > r;
- *      cilk_for (int i = 0; i != input.size(); ++i) {
- *          r->push_back(expensive_computation(input[i]));
- *      }
- *      vector result;
- *      r.move_out(result);
- *
- *  @section redvector_monoid The Monoid
- *
- *  @subsection redvector_monoid_values Value Set
- *
- *  The value set of a vector reducer is the set of values of the class
- *  `std::vector<Type, Alloc>`, which we refer to as "the reducer's vector
- *  type".
- *
- *  @subsection redvector_monoid_operator Operator
- *
- *  The operator of a vector reducer is vector concatenation.
- *
- *  @subsection redvector_monoid_identity Identity
- *
- *  The identity value of a vector reducer is the empty vector, which is the
- *  value of the expression `std::vector<Type, Alloc>([allocator])`.
- *
- *  @section redvector_operations Operations
- *
- *  In the operation descriptions below, the type name `Vector` refers to
- *  the reducer's vector type, `std::vector<Type, Alloc>`.
- *
- *  @subsection redvector_constructors Constructors
- *
- *  Any argument list which is valid for a `std::vector` constructor is valid
- *  for a vector reducer constructor. The usual move-in constructor is also
- *  provided:
- *
- *      reducer(move_in(Vector& variable))
- *
- *  @subsection redvector_get_set Set and Get
- *
- *      void r.set_value(const Vector& value)
- *      const Vector& = r.get_value() const
- *      void r.move_in(Vector& variable)
- *      void r.move_out(Vector& variable)
- *
- *  @subsection redvector_initial Initial Values
- *
- *  A vector reducer with no constructor arguments, or with only an allocator
- *  argument, will initially contain the identity value, an empty vector.
- *
- *  @subsection redvector_view_ops View Operations
- *
- *  The view of a vector reducer provides the following member functions:
- *
- *      void push_back(const Type& element)
- *      void insert_back(const Type& element)
- *      void insert_back(Vector::size_type n, const Type& element)
- *      template <typename Iter> void insert_back(Iter first, Iter last)
- *
- *  The `push_back` functions is the same as the corresponding `std::vector`
- *  function. The `insert_back` function is the same as the `std::vector`
- *  `insert` function, with the first parameter fixed to the end of the vector.
- *
- *  @section redvector_performance Performance Considerations
- *
- *  Vector reducers work by creating a vector for each view, collecting those
- *  vectors in a list, and then concatenating them into a single result vector
- *  at the end of the computation. This last step takes place in serial code,
- *  and necessarily takes time proportional to the length of the result vector.
- *  Thus, a parallel vector reducer cannot actually speed up the time spent
- *  directly creating the vector. This trivial example would probably be slower
- *  (because of reducer overhead) than the corresponding serial code:
- *
- *      vector<T> a;
- *      reducer<op_vector<T> > r;
- *      cilk_for (int i = 0; i != a.length(); ++i) {
- *          r->push_back(a[i]);
- *      }
- *      vector<T> result;
- *      r.move_out(result);
- *
- *  What a vector reducer _can_ do is to allow the _remainder_ of the
- *  computation to be done in parallel, without having to worry about
- *  managing the vector computation.
- *
- *  The vectors for new views are created (by the view identity constructor)
- *  using the same allocator as the vector that was created when the reducer
- *  was constructed. Note that this allocator is determined when the reducer
- *  is constructed. The following two examples may have very different
- *  behavior:
- *
- *      vector<Type, Allocator> a_vector;
- *
- *      reducer< op_vector<Type, Allocator> reducer1(move_in(a_vector));
- *      ... parallel computation ...
- *      reducer1.move_out(a_vector);
- *
- *      reducer< op_vector<Type, Allocator> reducer2;
- *      reducer2.move_in(a_vector);
- *      ... parallel computation ...
- *      reducer2.move_out(a_vector);
- *
- *  *   `reducer1` will be constructed with the same allocator as `a_vector`,
- *      because the vector was specified in the constructor. The `move_in`
- *      and`move_out` can therefore be done with a `swap` in constant time.
- *  *   `reducer2` will be constructed with a _default_ allocator of type
- *      `Allocator`, which may not be the same as the allocator of `a_vector`.
- *      Therefore, the `move_in` and `move_out` may have to be done with a
- *      copy in _O(N)_ time.
- *
- *  (All instances of an allocator class with no internal state (like
- *  `std::allocator`) are "the same". You only need to worry about the "same
- *  allocator" issue when you create vector reducers with a custom allocator
- *  class that has data members.)
- *
- *  @section redvector_types Type and Operator Requirements
- *
- *  `std::vector<Type, Alloc>` must be a valid type.
-*/
-
-namespace cilk {
-
-/** @ingroup ReducersVector */
-//@{
-
-/** @brief The vector reducer view class.
- *
- *  This is the view class for reducers created with
- *  `cilk::reducer< cilk::op_vector<Type, Allocator> >`. It holds the
- *  accumulator variable for the reduction, and allows only append operations
- *  to be performed on it.
- *
- *  @note   The reducer "dereference" operation (`reducer::operator *()`)
- *          yields a reference to the view. Thus, for example, the view
- *          class's `push_back` operation would be used in an expression like
- *          `r->push_back(a)`, where `r` is a vector reducer variable.
- *
- *  @tparam Type        The vector element type (not the vector type).
- *  @tparam Alloc       The vector allocator type.
- *
- *  @see @ref ReducersVector
- *  @see op_vector
- */
-template<typename Type, typename Alloc>
-class op_vector_view
-{
-    typedef std::vector<Type, Alloc>                vector_type;
-    typedef std::list<vector_type, typename Alloc::template rebind<vector_type>::other>
-                                                    list_type;
-    typedef typename vector_type::size_type         size_type;
-
-    // The view's value is represented by a list of vectors and a single
-    // vector. The value is the concatenation of the vectors in the list with
-    // the single vector at the end. All vector operations apply to the single
-    // vector; reduce operations cause lists of partial vectors from multiple
-    // strands to be combined.
-    //
-    mutable vector_type                             m_vector;
-    mutable list_type                               m_list;
-
-    // Before returning the value of the reducer, concatenate all the vectors
-    // in the list with the single vector.
-    //
-    void flatten() const
-    {
-        if (m_list.empty()) return;
-
-        typename list_type::iterator i;
-
-        size_type len = m_vector.size();
-        for (i = m_list.begin(); i != m_list.end(); ++i)
-            len += i->size();
-
-        vector_type result(get_allocator());
-        result.reserve(len);
-
-        for (i = m_list.begin(); i != m_list.end(); ++i)
-            result.insert(result.end(), i->begin(), i->end());
-        m_list.clear();
-
-        result.insert(result.end(), m_vector.begin(), m_vector.end());
-        result.swap(m_vector);
-    }
-
-public:
-
-    /** @name Monoid support.
-     */
-    //@{
-
-    /// Required by cilk::monoid_with_view
-    typedef vector_type value_type;
-
-    /// Required by @ref op_vector
-    Alloc get_allocator() const
-    {
-        return m_vector.get_allocator();
-    }
-
-    /** Reduces the views of two strands.
-     *
-     *  This function is invoked by the @ref op_vector monoid to combine
-     *  the views of two strands when the right strand merges with the left
-     *  one. It appends the value contained in the right-strand view to the
-     *  value contained in the left-strand view, and leaves the value in the
-     *  right-strand view undefined.
-     *
-     *  @param  other   A pointer to the right-strand view. (`this` points to
-     *                  the left-strand view.)
-     *
-     *  @note   Used only by the @ref op_vector monoid to implement the
-     *          monoid reduce operation.
-     */
-    void reduce(op_vector_view* other)
-    {
-        if (!other->m_vector.empty() || !other->m_list.empty()) {
-            // (list, string) + (other_list, other_string) =>
-            //      (list + {string} + other_list, other_string)
-            if (!m_vector.empty()) {
-                // simulate m_list.push_back(std::move(m_vector))
-                m_list.push_back(vector_type(get_allocator()));
-                m_list.back().swap(m_vector);
-            }
-            m_list.splice(m_list.end(), other->m_list);
-            m_vector.swap(other->m_vector);
-        }
-    }
-
-    //@}
-
-    /** @name Passes constructor arguments to the vector constructor.
-     */
-    //@{
-
-    op_vector_view() :
-        m_vector(), m_list(get_allocator()) {}
-
-    template <typename T1>
-    op_vector_view(const T1& x1) :
-        m_vector(x1), m_list(get_allocator()) {}
-
-    template <typename T1, typename T2>
-    op_vector_view(const T1& x1, const T2& x2) :
-        m_vector(x1, x2), m_list(get_allocator()) {}
-
-    template <typename T1, typename T2, typename T3>
-    op_vector_view(const T1& x1, const T2& x2, const T3& x3) :
-        m_vector(x1, x2, x3), m_list(get_allocator()) {}
-
-    template <typename T1, typename T2, typename T3, typename T4>
-    op_vector_view(const T1& x1, const T2& x2, const T3& x3, const T4& x4) :
-        m_vector(x1, x2, x3, x4), m_list(get_allocator()) {}
-
-    //@}
-
-    /** Move-in constructor.
-     */
-    explicit op_vector_view(cilk::move_in_wrapper<value_type> w) :
-        m_vector(w.value().get_allocator()),
-        m_list(w.value().get_allocator())
-    {
-        m_vector.swap(w.value());
-    }
-
-    /** @name Reducer support.
-     */
-    //@{
-
-    void view_move_in(vector_type& v)
-    {
-        m_list.clear();
-        if (get_allocator() == v.get_allocator()) {
-            // Equal allocators. Do a (fast) swap.
-            m_vector.swap(v);
-        }
-        else {
-            // Unequal allocators. Do a (slow) copy.
-            m_vector = v;
-        }
-        v.clear();
-    }
-
-    void view_move_out(vector_type& v)
-    {
-        flatten();
-        if (get_allocator() == v.get_allocator()) {
-            // Equal allocators.  Do a (fast) swap.
-            m_vector.swap(v);
-        }
-        else {
-            // Unequal allocators.  Do a (slow) copy.
-            v = m_vector;
-        m_vector.clear();
-        }
-    }
-
-    void view_set_value(const vector_type& v)
-    {
-        m_list.clear();
-        m_vector = v;
-    }
-
-    vector_type const& view_get_value()     const
-    {
-        flatten();
-        return m_vector;
-    }
-
-    typedef vector_type const& return_type_for_get_value;
-
-    //@}
-
-    /** @name View modifier operations.
-     *
-     *  @details These simply wrap the corresponding operations on the
-     *  underlying vector.
-     */
-    //@{
-
-    /** Adds an element at the end of the list.
-     *
-     *  Equivalent to `vector.push_back(…)`
-     */
-    void push_back(const Type x)
-    {
-        m_vector.push_back(x);
-    }
-
-    /** @name Insert elements at the end of the vector.
-     *
-     *  Equivalent to `vector.insert(vector.end(), …)`
-     */
-    //@{
-
-    void insert_back(const Type& element)
-        { m_vector.insert(m_vector.end(), element); }
-
-    void insert_back(typename vector_type::size_type n, const Type& element)
-        { m_vector.insert(m_vector.end(), n, element); }
-
-    template <typename Iter>
-    void insert_back(Iter first, Iter last)
-        { m_vector.insert(m_vector.end(), first, last); }
-
-    //@}
-
-    //@}
-};
-
-
-/** @brief The vector append monoid class.
- *
- *  Instantiate the cilk::reducer template class with an op_vector monoid to
- *  create a vector reducer class. For example, to concatenate a
- *  collection of integers:
- *
- *      cilk::reducer< cilk::op_vector<int> > r;
- *
- *  @tparam Type        The vector element type (not the vector type).
- *  @tparam Alloc       The vector allocator type.
- *
- *  @see ReducersVector
- *  @see op_vector_view
- *  @ingroup ReducersVector
- */
-template<typename Type, typename Alloc = std::allocator<Type> >
-class op_vector :
-    public cilk::monoid_with_view< op_vector_view<Type, Alloc>, false >
-{
-    typedef cilk::monoid_with_view< op_vector_view<Type, Alloc>, false > base;
-    typedef provisional_guard<typename base::view_type> view_guard;
-
-    // The allocator to be used when constructing new views.
-    Alloc m_allocator;
-
-public:
-
-    /// View type.
-    typedef typename base::view_type view_type;
-
-    /** Constructor.
-     *
-     *  There is no default constructor for vector monoids, because the
-     *  allocator must always be specified.
-     *
-     *  @param  allocator   The list allocator to be used when
-     *                      identity-constructing new views.
-     */
-    op_vector(const Alloc& allocator = Alloc()) : m_allocator(allocator) {}
-
-    /** Creates an identity view.
-     *
-     *  Vector view identity constructors take the vector allocator as an
-     *  argument.
-     *
-     *  @param v    The address of the uninitialized memory in which the view
-     *              will be constructed.
-     */
-    void identity(view_type *v) const
-    {
-        ::new((void*) v) view_type(m_allocator);
-    }
-
-    /** @name construct functions
-     *
-     *  A vector append monoid must have a copy of the allocator of
-     *  the leftmost view's vector, so that it can use it in the `identity`
-     *  operation. This, in turn, requires that vector append monoids have a
-     *  specialized `construct()` function.
-     *
-     *  All vector append monoid `construct()` functions first construct the
-     *  leftmost view, using the arguments that were passed in from the reducer
-     *  constructor. They then call the view's `get_allocator()` function to
-     *  get the vector allocator from the vector in the leftmost view, and pass
-     *  that to the monoid constructor.
-     */
-    //@{
-
-    static void construct(op_vector* monoid, view_type* view)
-    {
-        view_guard vg( new((void*) view) view_type() );
-        vg.confirm_if( new((void*) monoid) op_vector(view->get_allocator()) ); 
-    }
-
-    template <typename T1>
-    static void construct(op_vector* monoid, view_type* view, const T1& x1)
-    {
-        view_guard vg( new((void*) view) view_type(x1) );
-        vg.confirm_if( new((void*) monoid) op_vector(view->get_allocator()) ); 
-    }
-
-    template <typename T1, typename T2>
-    static void construct(op_vector* monoid, view_type* view,
-        const T1& x1, const T2& x2)
-    {
-        view_guard vg( new((void*) view) view_type(x1, x2) );
-        vg.confirm_if( new((void*) monoid) op_vector(view->get_allocator()) ); 
-    }
-
-    template <typename T1, typename T2, typename T3>
-    static void construct(op_vector* monoid, view_type* view,
-        const T1& x1, const T2& x2, const T3& x3)
-    {
-        view_guard vg( new((void*) view) view_type(x1, x2, x3) );
-        vg.confirm_if( new((void*) monoid) op_vector(view->get_allocator()) ); 
-    }
-
-    //@}
-};
-
-
-} // namespace cilk
-
-#endif //  REDUCER_VECTOR_H_INCLUDED
diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt
index 42235995..19b47769 100644
--- a/runtime/CMakeLists.txt
+++ b/runtime/CMakeLists.txt
@@ -2,7 +2,6 @@ set(CHEETAH_LIB_CMAKEFILES_DIR "${CMAKE_CURRENT_BINARY_DIR}${CMAKE_FILES_DIRECTO
 
 # Get sources
 set(CHEETAH_SOURCES
-  c_reducers.c
   cilk2c.c
   cilk2c_inlined.c
   cilkred_map.c
@@ -10,9 +9,12 @@ set(CHEETAH_SOURCES
   fiber.c
   fiber-pool.c
   global.c
+  hypertable.c
   init.c
   internal-malloc.c
+  pedigree_globals.c
   personality.c
+  reducer_api.c
   reducer_impl.c
   sched_stats.c
   scheduler.c
@@ -24,8 +26,8 @@ set(CHEETAH_ABI_SOURCE
   cilk2c_inlined.c
 )
 
-set(CHEETAH_PEDIGREE_GLOBALS_SOURCES
-  pedigree_globals.c
+set(CHEETAH_PEDIGREE_LIB_SOURCES
+  pedigree_lib.c
 )
 
 set(CHEETAH_PERSONALITY_C_SOURCES
@@ -52,6 +54,8 @@ set(CHEETAH_DYNAMIC_LIBS ${CHEETAH_COMMON_LIBS})
 
 add_flags_if_supported(-g3)
 add_flags_if_supported(-Wno-covered-switch-default)
+add_flags_if_supported(-fdebug-default-version=4)
+add_flags_if_supported(-Werror=int-conversion)
 if (CHEETAH_HAS_FOMIT_FRAME_POINTER_FLAG)
   set_source_files_properties(invoke-main.c PROPERTIES COMPILE_FLAGS -fno-omit-frame-pointer)
 endif()
@@ -70,8 +74,6 @@ set(CHEETAH_BITCODE_ABI_COMPILE_DEFS ${CHEETAH_COMPILE_DEFS}
   "CHEETAH_INTERNAL="
   "CHEETAH_INTERNAL_NORETURN=__attribute__((noreturn))"
   "CILK_DEBUG=0")
-set(CHEETAH_BITCODE_PEDIGREE_ABI_COMPILE_DEFS ${CHEETAH_BITCODE_ABI_COMPILE_DEFS}
-  "ENABLE_CILKRTS_PEDIGREE=1")
 
 # Set compile flags, compile defs, and link flags for ASan build
 set(CHEETAH_ASAN_COMPILE_FLAGS ${CHEETAH_COMPILE_FLAGS})
@@ -80,9 +82,6 @@ set(CHEETAH_ASAN_LINK_FLAGS ${CHEETAH_LINK_FLAGS} -fsanitize=address)
 set(CHEETAH_BITCODE_ABI_ASAN_COMPILE_FLAGS ${CHEETAH_BITCODE_ABI_COMPILE_FLAGS})
 set(CHEETAH_BITCODE_ABI_ASAN_COMPILE_DEFS ${CHEETAH_BITCODE_ABI_COMPILE_DEFS}
   "CILK_ENABLE_ASAN_HOOKS=1")
-set(CHEETAH_BITCODE_PEDIGREE_ABI_ASAN_COMPILE_DEFS
-  ${CHEETAH_BITCODE_PEDIGREE_ABI_COMPILE_DEFS}
-  "CILK_ENABLE_ASAN_HOOKS=1")
 
 set(CHEETAH_BUILD_ASAN_VER OFF)
 if (CHEETAH_ENABLE_ASAN AND (CHEETAH_HAS_ASAN OR TARGET asan))
@@ -104,14 +103,6 @@ if (APPLE)
     DEFS ${CHEETAH_BITCODE_ABI_COMPILE_DEFS}
     PARENT_TARGET cheetah)
 
-  add_cheetah_bitcode(opencilk-pedigrees-abi
-    OS ${CHEETAH_SUPPORTED_OS}
-    ARCHS ${CHEETAH_SUPPORTED_ARCH}
-    SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/${CHEETAH_ABI_SOURCE}
-    CFLAGS ${CHEETAH_BITCODE_ABI_COMPILE_FLAGS}
-    DEFS ${CHEETAH_BITCODE_PEDIGREE_ABI_COMPILE_DEFS}
-    PARENT_TARGET cheetah)
-
   if (CHEETAH_BUILD_ASAN_VER)
     add_cheetah_bitcode(opencilk-asan-abi
       OS ${CHEETAH_SUPPORTED_OS}
@@ -120,14 +111,6 @@ if (APPLE)
       CFLAGS ${CHEETAH_BITCODE_ABI_ASAN_COMPILE_FLAGS}
       DEFS ${CHEETAH_BITCODE_ABI_ASAN_COMPILE_DEFS}
       PARENT_TARGET cheetah)
-
-    add_cheetah_bitcode(opencilk-pedigrees-asan-abi
-      OS ${CHEETAH_SUPPORTED_OS}
-      ARCHS ${CHEETAH_SUPPORTED_ARCH}
-      SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/${CHEETAH_ABI_SOURCE}
-      CFLAGS ${CHEETAH_BITCODE_ABI_ASAN_COMPILE_FLAGS}
-      DEFS ${CHEETAH_BITCODE_PEDIGREE_ABI_ASAN_COMPILE_DEFS}
-      PARENT_TARGET cheetah)
   endif()
 
   if (CHEETAH_ENABLE_SHARED)
@@ -174,7 +157,7 @@ if (APPLE)
       SHARED
       OS ${CHEETAH_SUPPORTED_OS}
       ARCHS ${CHEETAH_SUPPORTED_ARCH}
-      SOURCES ${CHEETAH_PEDIGREE_GLOBALS_SOURCES}
+      SOURCES ${CHEETAH_PEDIGREE_LIB_SOURCES}
       CFLAGS ${CHEETAH_COMPILE_FLAGS}
       LINK_FLAGS ${CHEETAH_LINK_FLAGS}
       LINK_LIBS ${CHEETAH_DYNAMIC_LIBS}
@@ -226,7 +209,7 @@ if (APPLE)
 	SHARED
 	OS ${CHEETAH_SUPPORTED_OS}
 	ARCHS ${CHEETAH_SUPPORTED_ARCH}
-	SOURCES ${CHEETAH_PEDIGREE_GLOBALS_SOURCES}
+	SOURCES ${CHEETAH_PEDIGREE_LIB_SOURCES}
 	CFLAGS ${CHEETAH_ASAN_COMPILE_FLAGS}
 	LINK_FLAGS ${CHEETAH_ASAN_LINK_FLAGS}
 	LINK_LIBS ${CHEETAH_DYNAMIC_LIBS}
@@ -275,7 +258,7 @@ if (APPLE)
       STATIC
       OS ${CHEETAH_SUPPORTED_OS}
       ARCHS ${CHEETAH_SUPPORTED_ARCH}
-      SOURCES ${CHEETAH_PEDIGREE_GLOBALS_SOURCES}
+      SOURCES ${CHEETAH_PEDIGREE_LIB_SOURCES}
       CFLAGS ${CHEETAH_COMPILE_FLAGS}
       LINK_FLAGS ${CHEETAH_LINK_FLAGS}
       LINK_LIBS ${CHEETAH_COMMON_LIBS}
@@ -320,7 +303,7 @@ if (APPLE)
 	STATIC
 	OS ${CHEETAH_SUPPORTED_OS}
 	ARCHS ${CHEETAH_SUPPORTED_ARCH}
-	SOURCES ${CHEETAH_PEDIGREE_GLOBALS_SOURCES}
+	SOURCES ${CHEETAH_PEDIGREE_LIB_SOURCES}
 	CFLAGS ${CHEETAH_ASAN_COMPILE_FLAGS}
 	LINK_FLAGS ${CHEETAH_ASAN_LINK_FLAGS}
 	LINK_LIBS ${CHEETAH_COMMON_LIBS}
@@ -337,13 +320,6 @@ else() # Not APPLE
       DEFS ${CHEETAH_BITCODE_ABI_COMPILE_DEFS}
       PARENT_TARGET cheetah)
 
-    add_cheetah_bitcode(opencilk-pedigrees-abi
-      ARCHS ${arch}
-      SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/${CHEETAH_ABI_SOURCE}
-      CFLAGS ${CHEETAH_BITCODE_ABI_COMPILE_FLAGS}
-      DEFS ${CHEETAH_BITCODE_PEDIGREE_ABI_COMPILE_DEFS}
-      PARENT_TARGET cheetah)
-
     if (CHEETAH_BUILD_ASAN_VER)
       add_cheetah_bitcode(opencilk-asan-abi
 	ARCHS ${arch}
@@ -351,13 +327,6 @@ else() # Not APPLE
 	CFLAGS ${CHEETAH_BITCODE_ABI_ASAN_COMPILE_FLAGS}
 	DEFS ${CHEETAH_BITCODE_ABI_ASAN_COMPILE_DEFS}
 	PARENT_TARGET cheetah)
-
-      add_cheetah_bitcode(opencilk-pedigrees-asan-abi
-	ARCHS ${arch}
-	SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/${CHEETAH_ABI_SOURCE}
-	CFLAGS ${CHEETAH_BITCODE_ABI_ASAN_COMPILE_FLAGS}
-	DEFS ${CHEETAH_BITCODE_PEDIGREE_ABI_ASAN_COMPILE_DEFS}
-	PARENT_TARGET cheetah)
     endif()
 
     if (CHEETAH_ENABLE_SHARED)
@@ -400,7 +369,7 @@ else() # Not APPLE
       add_cheetah_runtime(opencilk-pedigrees
 	SHARED
 	ARCHS ${arch}
-	SOURCES ${CHEETAH_PEDIGREE_GLOBALS_SOURCES}
+	SOURCES ${CHEETAH_PEDIGREE_LIB_SOURCES}
 	CFLAGS ${CHEETAH_COMPILE_FLAGS}
 	LINK_FLAGS ${CHEETAH_LINK_FLAGS}
 	LINK_LIBS ${CHEETAH_DYNAMIC_LIBS}
@@ -449,7 +418,7 @@ else() # Not APPLE
 	add_cheetah_runtime(opencilk-pedigrees-asan
 	  SHARED
 	  ARCHS ${arch}
-	  SOURCES ${CHEETAH_PEDIGREE_GLOBALS_SOURCES}
+	  SOURCES ${CHEETAH_PEDIGREE_LIB_SOURCES}
 	  CFLAGS ${CHEETAH_ASAN_COMPILE_FLAGS}
 	  LINK_FLAGS ${CHEETAH_ASAN_LINK_FLAGS}
 	  LINK_LIBS ${CHEETAH_DYNAMIC_LIBS}
@@ -493,7 +462,7 @@ else() # Not APPLE
       add_cheetah_runtime(opencilk-pedigrees
 	STATIC
 	ARCHS ${arch}
-	SOURCES ${CHEETAH_PEDIGREE_GLOBALS_SOURCES}
+	SOURCES ${CHEETAH_PEDIGREE_LIB_SOURCES}
 	CFLAGS ${CHEETAH_COMPILE_FLAGS}
 	LINK_FLAGS ${CHEETAH_LINK_FLAGS}
 	LINK_LIBS ${CHEETAH_COMMON_LIBS}
@@ -534,7 +503,7 @@ else() # Not APPLE
         add_cheetah_runtime(opencilk-pedigrees-asan
           STATIC
           ARCHS ${arch}
-          SOURCES ${CHEETAH_PEDIGREE_GLOBALS_SOURCES}
+          SOURCES ${CHEETAH_PEDIGREE_LIB_SOURCES}
           CFLAGS ${CHEETAH_ASAN_COMPILE_FLAGS}
           LINK_FLAGS ${CHEETAH_ASAN_LINK_FLAGS}
           LINK_LIBS ${CHEETAH_COMMON_LIBS}
diff --git a/runtime/c_reducers.c b/runtime/c_reducers.c
deleted file mode 100644
index f13597cd..00000000
--- a/runtime/c_reducers.c
+++ /dev/null
@@ -1,7 +0,0 @@
-#define CILK_C_DEFINE_REDUCERS
-#include <cilk/reducer_min_max.h>
-#include <cilk/reducer_opadd.h>
-#include <cilk/reducer_opand.h>
-#include <cilk/reducer_opmul.h>
-#include <cilk/reducer_opor.h>
-#include <cilk/reducer_opxor.h>
diff --git a/runtime/cilk-internal.h b/runtime/cilk-internal.h
index e4371713..a3e0f67c 100644
--- a/runtime/cilk-internal.h
+++ b/runtime/cilk-internal.h
@@ -12,11 +12,13 @@ extern "C" {
 
 #include "debug.h"
 #include "fiber.h"
+#include "frame.h"
 #include "internal-malloc.h"
 #include "jmpbuf.h"
 #include "rts-config.h"
 #include "sched_stats.h"
 #include "types.h"
+#include "worker.h"
 
 #if defined __i386__ || defined __x86_64__
 #ifdef __SSE__
@@ -28,186 +30,6 @@ struct global_state;
 typedef struct global_state global_state;
 typedef struct local_state local_state;
 
-//===============================================
-// Cilk stack frame related defs
-//===============================================
-
-
-
-/**
- * Every spawning function has a frame descriptor.  A spawning function
- * is a function that spawns or detaches.  Only spawning functions
- * are visible to the Cilk runtime.
- */
-struct __cilkrts_stack_frame {
-    // Flags is a bitfield with values defined below. Client code
-    // initializes flags to 0 before the first Cilk operation.
-    uint32_t flags;
-    // The magic number includes the ABI version and a hash of the
-    // layout of this structure.
-    uint32_t magic;
-
-    // call_parent points to the __cilkrts_stack_frame of the closest
-    // ancestor spawning function, including spawn helpers, of this frame.
-    // It forms a linked list ending at the first stolen frame.
-    __cilkrts_stack_frame *call_parent;
-
-    // The client copies the worker from TLS here when initializing
-    // the structure.  The runtime ensures that the field always points
-    // to the __cilkrts_worker which currently "owns" the frame.
-    //
-    // TODO: Remove this pointer?  This pointer only seems to be needed for
-    // debugging purposes.  When the worker structure is genuinely needed, it
-    // seems to be accessible by calling __cilkrts_get_tls_worker(), which will
-    // be inlined and optimized to a simple move from TLS.
-    _Atomic(__cilkrts_worker *) worker;
-
-    // Before every spawn and nontrivial sync the client function
-    // saves its continuation here.
-    jmpbuf ctx;
-
-#ifdef ENABLE_CILKRTS_PEDIGREE
-    __cilkrts_pedigree pedigree; // Fields for pedigrees.
-    int64_t rank;
-    uint64_t dprng_dotproduct;
-    int64_t dprng_depth;
-#endif
-};
-
-//===========================================================
-// Value defines for the flags field in cilkrts_stack_frame
-//===========================================================
-
-/* CILK_FRAME_STOLEN is set if the frame has ever been stolen. */
-#define CILK_FRAME_STOLEN 0x001
-
-/* CILK_FRAME_UNSYNCHED is set if the frame has been stolen and
-   is has not yet executed _Cilk_sync. It is technically a misnomer in that a
-   frame can have this flag set even if all children have returned. */
-#define CILK_FRAME_UNSYNCHED 0x002
-
-/* Is this frame detached (spawned)? If so the runtime needs
-   to undo-detach in the slow path epilogue. */
-#define CILK_FRAME_DETACHED 0x004
-
-/* CILK_FRAME_EXCEPTION_PENDING is set if the frame has an exception
-   to handle after syncing. */
-#define CILK_FRAME_EXCEPTION_PENDING 0x008
-
-/* Is this frame excepting, meaning that a stolen continuation threw? */
-#define CILK_FRAME_EXCEPTING 0x010
-
-/* Is this the last (oldest) Cilk frame? */
-#define CILK_FRAME_LAST 0x080
-
-/* Is this frame in the epilogue, or more generally after the last
-   sync when it can no longer do any Cilk operations? */
-#define CILK_FRAME_EXITING 0x100
-
-/* Is this frame handling an exception? */
-// TODO: currently only used when throwing an exception from the continuation
-//       (i.e. from the personality function). Used in scheduler.c to disable
-//       asserts that fail if trying to longjmp back to the personality
-//       function.
-#define CILK_FRAME_SYNC_READY 0x200
-
-static const uint32_t frame_magic =
-    ((((((((((((__CILKRTS_ABI_VERSION * 13) +
-               offsetof(struct __cilkrts_stack_frame, worker)) *
-              13) +
-             offsetof(struct __cilkrts_stack_frame, ctx)) *
-            13) +
-           offsetof(struct __cilkrts_stack_frame, magic)) *
-          13) +
-         offsetof(struct __cilkrts_stack_frame, flags)) *
-        13) +
-       offsetof(struct __cilkrts_stack_frame, call_parent))
-          ))
-    ;
-
-#define CHECK_CILK_FRAME_MAGIC(G, F) (frame_magic == (F)->magic)
-
-//===========================================================
-// Helper functions for the flags field in cilkrts_stack_frame
-//===========================================================
-
-/* A frame is set to be stolen as long as it has a corresponding Closure */
-static inline void __cilkrts_set_stolen(__cilkrts_stack_frame *sf) {
-    sf->flags |= CILK_FRAME_STOLEN;
-}
-
-/* A frame is set to be unsynced only if it has parallel subcomputation
- * underneathe, i.e., only if it has spawned children executing on a different
- * worker
- */
-static inline void __cilkrts_set_unsynced(__cilkrts_stack_frame *sf) {
-    sf->flags |= CILK_FRAME_UNSYNCHED;
-}
-
-static inline void __cilkrts_set_synced(__cilkrts_stack_frame *sf) {
-    sf->flags &= ~CILK_FRAME_UNSYNCHED;
-}
-
-/* Returns nonzero if the frame is not synched. */
-static inline int __cilkrts_unsynced(__cilkrts_stack_frame *sf) {
-    return (sf->flags & CILK_FRAME_UNSYNCHED);
-}
-
-/* Returns nonzero if the frame has been stolen. */
-static inline int __cilkrts_stolen(__cilkrts_stack_frame *sf) {
-    return (sf->flags & CILK_FRAME_STOLEN);
-}
-
-/* Returns nonzero if the frame is synched. */
-static inline int __cilkrts_synced(__cilkrts_stack_frame *sf) {
-    return ((sf->flags & CILK_FRAME_UNSYNCHED) == 0);
-}
-
-/* Returns nonzero if the frame has never been stolen. */
-static inline int __cilkrts_not_stolen(__cilkrts_stack_frame *sf) {
-    return ((sf->flags & CILK_FRAME_STOLEN) == 0);
-}
-
-//===============================================
-// Worker related definition
-//===============================================
-
-// Actual declaration
-
-enum __cilkrts_worker_state {
-    WORKER_IDLE = 10,
-    WORKER_SCHED,
-    WORKER_STEAL,
-    WORKER_RUN
-};
-
-struct __cilkrts_worker {
-    // T and H pointers in the THE protocol
-    _Atomic(__cilkrts_stack_frame **) tail;
-    _Atomic(__cilkrts_stack_frame **) head;
-    _Atomic(__cilkrts_stack_frame **) exc;
-
-    // Limit of the Lazy Task Queue, to detect queue overflow
-    __cilkrts_stack_frame **ltq_limit;
-
-    // Worker id, a small integer
-    worker_id self;
-
-    // Global state of the runtime system, opaque to the client.
-    global_state *g;
-
-    // Additional per-worker state hidden from the client.
-    local_state *l;
-
-    // A slot that points to the currently executing Cilk frame.
-    __cilkrts_stack_frame *current_stack_frame;
-
-    // Map from reducer names to reducer values
-    cilkred_map *reducer_map;
-} __attribute__((aligned(256))); // This alignment reduces false sharing induced
-                                 // by hardware prefetchers on some systems,
-                                 // such as Intel CPUs.
-
 struct cilkrts_callbacks {
     unsigned last_init;
     unsigned last_exit;
@@ -218,11 +40,39 @@ struct cilkrts_callbacks {
 
 extern CHEETAH_INTERNAL struct cilkrts_callbacks cilkrts_callbacks;
 
-extern __thread __cilkrts_worker *tls_worker;
+extern bool __cilkrts_use_extension;
+#if ENABLE_EXTENSION
+#define USE_EXTENSION __cilkrts_use_extension
+#else
+#define USE_EXTENSION false
+#endif
+extern __thread __cilkrts_worker *__cilkrts_tls_worker;
+CHEETAH_INTERNAL extern __thread bool is_boss_thread;
 
 static inline __attribute__((always_inline)) __cilkrts_worker *
 __cilkrts_get_tls_worker(void) {
-    return tls_worker;
+    return __cilkrts_tls_worker;
+}
+
+void __cilkrts_register_extension(void *extension);
+void *__cilkrts_get_extension(void);
+void __cilkrts_extend_spawn(__cilkrts_worker *w, void **parent_extension,
+                            void **child_extension);
+void __cilkrts_extend_return_from_spawn(__cilkrts_worker *w, void **extension);
+void __cilkrts_extend_sync(void **extension);
+
+static inline __attribute__((always_inline)) void *
+__cilkrts_push_ext_stack(__cilkrts_worker *w, size_t size) {
+    uint8_t *ext_stack_ptr = ((uint8_t *)w->ext_stack) - size;
+    w->ext_stack = (void *)ext_stack_ptr;
+    return ext_stack_ptr;
+}
+
+static inline __attribute__((always_inline)) void *
+__cilkrts_pop_ext_stack(__cilkrts_worker *w, size_t size) {
+    uint8_t *ext_stack_ptr = ((uint8_t *)w->ext_stack) + size;
+    w->ext_stack = (void *)ext_stack_ptr;
+    return ext_stack_ptr;
 }
 
 #ifdef __cplusplus
diff --git a/runtime/cilk2c.c b/runtime/cilk2c.c
index 39a16ada..d9353f96 100644
--- a/runtime/cilk2c.c
+++ b/runtime/cilk2c.c
@@ -73,9 +73,10 @@ void __cilkrts_check_exception_raise(__cilkrts_stack_frame *sf) {
 
     __cilkrts_worker *w = sf->worker;
     CILK_ASSERT(w, sf->worker == __cilkrts_get_tls_worker());
+    ReadyDeque *deques = w->g->deques;
 
-    deque_lock_self(w);
-    Closure *t = deque_peek_bottom(w, w->self);
+    deque_lock_self(deques, w);
+    Closure *t = deque_peek_bottom(deques, w, w->self);
     Closure_lock(w, t);
     char *exn = t->user_exn.exn;
 
@@ -85,7 +86,7 @@ void __cilkrts_check_exception_raise(__cilkrts_stack_frame *sf) {
     sf->flags &= ~CILK_FRAME_EXCEPTION_PENDING;
 
     Closure_unlock(w, t);
-    deque_unlock_self(w);
+    deque_unlock_self(deques, w);
     if (exn != NULL) {
         _Unwind_RaiseException((struct _Unwind_Exception *)exn); // noreturn
     }
@@ -99,9 +100,10 @@ void __cilkrts_check_exception_resume(__cilkrts_stack_frame *sf) {
 
     __cilkrts_worker *w = sf->worker;
     CILK_ASSERT(w, sf->worker == __cilkrts_get_tls_worker());
+    ReadyDeque *deques = w->g->deques;
 
-    deque_lock_self(w);
-    Closure *t = deque_peek_bottom(w, w->self);
+    deque_lock_self(deques, w);
+    Closure *t = deque_peek_bottom(deques, w, w->self);
     Closure_lock(w, t);
     char *exn = t->user_exn.exn;
 
@@ -111,7 +113,7 @@ void __cilkrts_check_exception_resume(__cilkrts_stack_frame *sf) {
     sf->flags &= ~CILK_FRAME_EXCEPTION_PENDING;
 
     Closure_unlock(w, t);
-    deque_unlock_self(w);
+    deque_unlock_self(deques, w);
     if (exn != NULL) {
         _Unwind_Resume((struct _Unwind_Exception *)exn); // noreturn
     }
@@ -127,9 +129,10 @@ void __cilkrts_cleanup_fiber(__cilkrts_stack_frame *sf, int32_t sel) {
 
     __cilkrts_worker *w = sf->worker;
     CILK_ASSERT(w, sf->worker == __cilkrts_get_tls_worker());
+    ReadyDeque *deques = w->g->deques;
 
-    deque_lock_self(w);
-    Closure *t = deque_peek_bottom(w, w->self);
+    deque_lock_self(deques, w);
+    Closure *t = deque_peek_bottom(deques, w, w->self);
 
     // If t->parent_rsp is non-null, then the Cilk personality function executed
     // __cilkrts_sync(sf), which implies that sf is at the top of the deque.
@@ -140,7 +143,7 @@ void __cilkrts_cleanup_fiber(__cilkrts_stack_frame *sf, int32_t sel) {
     // non-null.
 
     if (NULL == t->parent_rsp) {
-        deque_unlock_self(w);
+        deque_unlock_self(deques, w);
         return;
     }
 
@@ -152,7 +155,7 @@ void __cilkrts_cleanup_fiber(__cilkrts_stack_frame *sf, int32_t sel) {
         t->saved_throwing_fiber = NULL;
     }
 
-    deque_unlock_self(w);
+    deque_unlock_self(deques, w);
     __builtin_longjmp(sf->ctx, 1); // Does not return
     return;
 }
@@ -173,3 +176,24 @@ void __cilkrts_sync(__cilkrts_stack_frame *sf) {
         longjmp_to_runtime(w);
     }
 }
+
+///////////////////////////////////////////////////////////////////////////
+/// Methods for handling extensions
+
+static inline __cilkrts_worker *get_worker_or_default(void) {
+    __cilkrts_worker *w = __cilkrts_get_tls_worker();
+    if (NULL == w)
+        w = default_cilkrts->workers[0];
+    return w;
+}
+
+void __cilkrts_register_extension(void *extension) {
+    __cilkrts_use_extension = true;
+    __cilkrts_worker *w = get_worker_or_default();
+    w->extension = extension;
+}
+
+void *__cilkrts_get_extension(void) {
+    __cilkrts_worker *w = get_worker_or_default();
+    return w->extension;
+}
diff --git a/runtime/cilk2c.h b/runtime/cilk2c.h
index 18b1c9ca..2821f5f6 100644
--- a/runtime/cilk2c.h
+++ b/runtime/cilk2c.h
@@ -98,6 +98,5 @@ CHEETAH_INTERNAL uint64_t __cilkrts_cilk_for_grainsize_64(uint64_t n);
 
 // Not marked as CHEETAH_API as it may be deprecated soon
 unsigned __cilkrts_get_nworkers(void);
-//CHEETAH_API int64_t* __cilkrts_get_pedigree(void);
-//void __cilkrts_pedigree_bump_rank(void);
+
 #endif
diff --git a/runtime/cilk2c_inlined.c b/runtime/cilk2c_inlined.c
index bc6ece4d..9abdd842 100644
--- a/runtime/cilk2c_inlined.c
+++ b/runtime/cilk2c_inlined.c
@@ -17,40 +17,25 @@
 #include "readydeque.h"
 #include "scheduler.h"
 
-#ifdef ENABLE_CILKRTS_PEDIGREE
-extern __cilkrts_pedigree cilkrts_root_pedigree_node;
-extern uint64_t DPRNG_PRIME;
-extern uint64_t* dprng_m_array;
-extern uint64_t dprng_m_X;
-
-uint64_t __cilkrts_dprng_swap_halves(uint64_t x);
-uint64_t __cilkrts_dprng_mix(uint64_t x);
-uint64_t __cilkrts_dprng_mix_mod_p(uint64_t x);
-uint64_t __cilkrts_dprng_sum_mod_p(uint64_t a, uint64_t b);
-void __cilkrts_init_dprng(void);
-
-uint64_t __cilkrts_get_dprand(void) {
-    __cilkrts_worker *w = __cilkrts_get_tls_worker();
-    __cilkrts_bump_worker_rank();
-    return __cilkrts_dprng_mix_mod_p(w->current_stack_frame->dprng_dotproduct);
-}
+#include "pedigree_ext.c"
 
-#endif
+// This variable encodes the alignment of a __cilkrts_stack_frame, both in its
+// value and in its own alignment.  Because LLVM IR does not associate
+// alignments with types, this variable communicates the desired alignment to
+// the compiler instead.
+_Alignas(__cilkrts_stack_frame)
+size_t __cilkrts_stack_frame_align = __alignof__(__cilkrts_stack_frame);
 
 // Begin a Cilkified region.  The routine runs on a Cilkifying thread to
 // transfer the execution of this function to the workers in global_state g.
 // This routine must be inlined for correctness.
 static inline __attribute__((always_inline)) void
-cilkify(global_state *g, __cilkrts_stack_frame *sf) {
-#ifdef ENABLE_CILKRTS_PEDIGREE
-    __cilkrts_init_dprng();
-#endif
-
+cilkify(__cilkrts_stack_frame *sf) {
     // After inlining, the setjmp saves the processor state, including the frame
     // pointer, of the Cilk function.
     if (__builtin_setjmp(sf->ctx) == 0) {
         sysdep_save_fp_ctrl_state(sf);
-        __cilkrts_internal_invoke_cilkified_root(g, sf);
+        __cilkrts_internal_invoke_cilkified_root(sf);
     } else {
         sanitizer_finish_switch_fiber();
     }
@@ -74,32 +59,6 @@ uncilkify(global_state *g, __cilkrts_stack_frame *sf) {
     }
 }
 
-#ifdef ENABLE_CILKRTS_PEDIGREE
-__attribute__((always_inline)) __cilkrts_pedigree __cilkrts_get_pedigree(void) {
-    __cilkrts_worker *w = __cilkrts_get_tls_worker();
-    if (w == NULL) {
-        return cilkrts_root_pedigree_node;
-    } else {
-        __cilkrts_pedigree ret_ped;
-        ret_ped.parent = &(w->current_stack_frame->pedigree);
-        ret_ped.rank = w->current_stack_frame->rank;
-        return ret_ped;
-    }
-}
-
-__attribute__((always_inline)) void __cilkrts_bump_worker_rank(void) {
-    __cilkrts_worker *w = __cilkrts_get_tls_worker();
-    if (w == NULL) {
-        cilkrts_root_pedigree_node.rank++;
-    } else {
-        w->current_stack_frame->rank++;
-    }
-    w->current_stack_frame->dprng_dotproduct = __cilkrts_dprng_sum_mod_p(
-        w->current_stack_frame->dprng_dotproduct,
-        dprng_m_array[w->current_stack_frame->dprng_depth]);
-}
-#endif
-
 // Enter a new Cilk function, i.e., a function that contains a cilk_spawn.  This
 // function must be inlined for correctness.
 __attribute__((always_inline)) void
@@ -107,7 +66,7 @@ __cilkrts_enter_frame(__cilkrts_stack_frame *sf) {
     __cilkrts_worker *w = __cilkrts_get_tls_worker();
     sf->flags = 0;
     if (NULL == w) {
-        cilkify(default_cilkrts, sf);
+        cilkify(sf);
         w = __cilkrts_get_tls_worker();
     }
     cilkrts_alert(CFRAME, w, "__cilkrts_enter_frame %p", (void *)sf);
@@ -117,25 +76,6 @@ __cilkrts_enter_frame(__cilkrts_stack_frame *sf) {
     atomic_store_explicit(&sf->worker, w, memory_order_relaxed);
     w->current_stack_frame = sf;
     // WHEN_CILK_DEBUG(sf->magic = CILK_STACKFRAME_MAGIC);
-
-#ifdef ENABLE_CILKRTS_PEDIGREE
-    // Pedigree maintenance.
-    if (sf->call_parent != NULL && !(sf->flags & CILK_FRAME_LAST)) {
-        sf->pedigree.rank = sf->call_parent->rank++;
-        sf->pedigree.parent = &(sf->call_parent->pedigree);
-        sf->dprng_depth = sf->call_parent->dprng_depth + 1;
-        sf->call_parent->dprng_dotproduct = __cilkrts_dprng_sum_mod_p(
-            sf->call_parent->dprng_dotproduct,
-            dprng_m_array[sf->call_parent->dprng_depth]);
-        sf->dprng_dotproduct = sf->call_parent->dprng_dotproduct;
-    } else {
-        sf->pedigree.rank = 0;
-        sf->pedigree.parent = NULL;
-        sf->dprng_depth = 0;
-        sf->dprng_dotproduct = dprng_m_X;
-    }
-    sf->rank = 0;
-#endif
 }
 
 // Enter a spawn helper, i.e., a fucntion containing code that was cilk_spawn'd.
@@ -152,25 +92,6 @@ __cilkrts_enter_frame_helper(__cilkrts_stack_frame *sf) {
     sf->call_parent = w->current_stack_frame;
     atomic_store_explicit(&sf->worker, w, memory_order_relaxed);
     w->current_stack_frame = sf;
-
-#ifdef ENABLE_CILKRTS_PEDIGREE
-    // Pedigree maintenance.
-    if (sf->call_parent != NULL && !(sf->flags & CILK_FRAME_LAST)) {
-        sf->pedigree.rank = sf->call_parent->rank++;
-        sf->pedigree.parent = &(sf->call_parent->pedigree);
-        sf->dprng_depth = sf->call_parent->dprng_depth + 1;
-        sf->call_parent->dprng_dotproduct = __cilkrts_dprng_sum_mod_p(
-            sf->call_parent->dprng_dotproduct,
-            dprng_m_array[sf->call_parent->dprng_depth]);
-        sf->dprng_dotproduct = sf->call_parent->dprng_dotproduct;
-    } else {
-        sf->pedigree.rank = 0;
-        sf->pedigree.parent = NULL;
-        sf->dprng_depth = 0;
-        sf->dprng_dotproduct = dprng_m_X;
-    }
-    sf->rank = 0;
-#endif
 }
 
 __attribute__((always_inline)) int
@@ -183,7 +104,8 @@ __cilk_prepare_spawn(__cilkrts_stack_frame *sf) {
     return res;
 }
 
-static inline __cilkrts_worker *get_tls_worker(__cilkrts_stack_frame *sf) {
+static inline
+__cilkrts_worker *get_worker_from_stack(__cilkrts_stack_frame *sf) {
     // In principle, we should be able to get the worker efficiently by calling
     // __cilkrts_get_tls_worker().  But code-generation on many systems assumes
     // that the thread on which a function runs never changes.  As a result, it
@@ -200,7 +122,7 @@ static inline __cilkrts_worker *get_tls_worker(__cilkrts_stack_frame *sf) {
 // parent frame.
 __attribute__((always_inline)) void
 __cilkrts_detach(__cilkrts_stack_frame *sf) {
-    __cilkrts_worker *w = get_tls_worker(sf);
+    __cilkrts_worker *w = get_worker_from_stack(sf);
     cilkrts_alert(CFRAME, w, "__cilkrts_detach %p", (void *)sf);
 
     CILK_ASSERT(w, CHECK_CILK_FRAME_MAGIC(w->g, sf));
@@ -208,6 +130,11 @@ __cilkrts_detach(__cilkrts_stack_frame *sf) {
     CILK_ASSERT(w, w->current_stack_frame == sf);
 
     struct __cilkrts_stack_frame *parent = sf->call_parent;
+
+    if (USE_EXTENSION) {
+        __cilkrts_extend_spawn(w, &parent->extension, &w->extension);
+    }
+
     sf->flags |= CILK_FRAME_DETACHED;
     struct __cilkrts_stack_frame **tail =
         atomic_load_explicit(&w->tail, memory_order_relaxed);
@@ -220,44 +147,58 @@ __cilkrts_detach(__cilkrts_stack_frame *sf) {
 }
 
 __attribute__((always_inline)) void __cilk_sync(__cilkrts_stack_frame *sf) {
-    if (sf->flags & CILK_FRAME_UNSYNCHED) {
-        if (__builtin_setjmp(sf->ctx) == 0) {
-            sysdep_save_fp_ctrl_state(sf);
-            __cilkrts_sync(sf);
-        } else {
-            sanitizer_finish_switch_fiber();
-            if (sf->flags & CILK_FRAME_EXCEPTION_PENDING) {
-                __cilkrts_check_exception_raise(sf);
+    if (sf->flags & CILK_FRAME_UNSYNCHED || USE_EXTENSION) {
+        if (sf->flags & CILK_FRAME_UNSYNCHED) {
+            if (__builtin_setjmp(sf->ctx) == 0) {
+                sysdep_save_fp_ctrl_state(sf);
+                __cilkrts_sync(sf);
+            } else {
+                sanitizer_finish_switch_fiber();
+                if (sf->flags & CILK_FRAME_EXCEPTION_PENDING) {
+                    __cilkrts_check_exception_raise(sf);
+                }
             }
         }
+        if (USE_EXTENSION) {
+            __cilkrts_worker *w = get_worker_from_stack(sf);
+            __cilkrts_extend_sync(&w->extension);
+        }
     }
 }
 
 __attribute__((always_inline)) void
 __cilk_sync_nothrow(__cilkrts_stack_frame *sf) {
-    if (sf->flags & CILK_FRAME_UNSYNCHED) {
-        if (__builtin_setjmp(sf->ctx) == 0) {
-            sysdep_save_fp_ctrl_state(sf);
-            __cilkrts_sync(sf);
-        } else {
-            sanitizer_finish_switch_fiber();
+    if (sf->flags & CILK_FRAME_UNSYNCHED || USE_EXTENSION) {
+        if (sf->flags & CILK_FRAME_UNSYNCHED) {
+            if (__builtin_setjmp(sf->ctx) == 0) {
+                sysdep_save_fp_ctrl_state(sf);
+                __cilkrts_sync(sf);
+            } else {
+                sanitizer_finish_switch_fiber();
+            }
+        }
+        if (USE_EXTENSION) {
+            __cilkrts_worker *w = get_worker_from_stack(sf);
+            __cilkrts_extend_sync(&w->extension);
         }
     }
 }
 
 __attribute__((always_inline)) void
 __cilkrts_leave_frame(__cilkrts_stack_frame *sf) {
-    __cilkrts_worker *w = get_tls_worker(sf);
+    __cilkrts_worker *w = get_worker_from_stack(sf);
     cilkrts_alert(CFRAME, w, "__cilkrts_leave_frame %p", (void *)sf);
 
     CILK_ASSERT(w, CHECK_CILK_FRAME_MAGIC(w->g, sf));
     CILK_ASSERT(w, sf->worker == __cilkrts_get_tls_worker());
     // WHEN_CILK_DEBUG(sf->magic = ~CILK_STACKFRAME_MAGIC);
 
+    __cilkrts_stack_frame *parent = sf->call_parent;
+
     // Pop this frame off the cactus stack.  This logic used to be in
     // __cilkrts_pop_frame, but has been manually inlined to avoid reloading the
     // worker unnecessarily.
-    w->current_stack_frame = sf->call_parent;
+    w->current_stack_frame = parent;
     sf->call_parent = NULL;
 
     // Check if sf is the final stack frame, and if so, terminate the Cilkified
@@ -290,7 +231,7 @@ __cilkrts_leave_frame(__cilkrts_stack_frame *sf) {
 
 __attribute__((always_inline)) void
 __cilkrts_leave_frame_helper(__cilkrts_stack_frame *sf) {
-    __cilkrts_worker *w = get_tls_worker(sf);
+    __cilkrts_worker *w = get_worker_from_stack(sf);
     cilkrts_alert(CFRAME, w, "__cilkrts_leave_frame_helper %p", (void *)sf);
 
     CILK_ASSERT(w, CHECK_CILK_FRAME_MAGIC(w->g, sf));
@@ -300,7 +241,12 @@ __cilkrts_leave_frame_helper(__cilkrts_stack_frame *sf) {
     // Pop this frame off the cactus stack.  This logic used to be in
     // __cilkrts_pop_frame, but has been manually inlined to avoid reloading the
     // worker unnecessarily.
-    w->current_stack_frame = sf->call_parent;
+    __cilkrts_stack_frame *parent = sf->call_parent;
+    w->current_stack_frame = parent;
+    if (USE_EXTENSION) {
+        __cilkrts_extend_return_from_spawn(w, &w->extension);
+        w->extension = parent->extension;
+    }
     sf->call_parent = NULL;
 
     CILK_ASSERT(w, sf->flags & CILK_FRAME_DETACHED);
@@ -347,21 +293,27 @@ void __cilkrts_enter_landingpad(__cilkrts_stack_frame *sf, int32_t sel) {
 
 __attribute__((always_inline))
 void __cilkrts_pause_frame(__cilkrts_stack_frame *sf, char *exn) {
-    __cilkrts_worker *w = get_tls_worker(sf);
+    __cilkrts_worker *w = get_worker_from_stack(sf);
     cilkrts_alert(CFRAME, w, "__cilkrts_pause_frame %p", (void *)sf);
 
     CILK_ASSERT(w, CHECK_CILK_FRAME_MAGIC(w->g, sf));
     CILK_ASSERT(w, sf->worker == __cilkrts_get_tls_worker());
 
+    __cilkrts_stack_frame *parent = sf->call_parent;
+
     // Pop this frame off the cactus stack.  This logic used to be in
     // __cilkrts_pop_frame, but has been manually inlined to avoid reloading the
     // worker unnecessarily.
-    w->current_stack_frame = sf->call_parent;
+    w->current_stack_frame = parent;
     sf->call_parent = NULL;
 
     // A __cilkrts_pause_frame may be reached before the spawn-helper frame has
     // detached.  In that case, THE is not required.
     if (sf->flags & CILK_FRAME_DETACHED) {
+        if (USE_EXTENSION) {
+            __cilkrts_extend_return_from_spawn(w, &w->extension);
+            w->extension = parent->extension;
+        }
         __cilkrts_stack_frame **tail =
             atomic_load_explicit(&w->tail, memory_order_relaxed);
         --tail;
diff --git a/runtime/cilkred_map.c b/runtime/cilkred_map.c
index 902df357..8432246a 100644
--- a/runtime/cilkred_map.c
+++ b/runtime/cilkred_map.c
@@ -13,23 +13,19 @@ static inline void swap_views(ViewInfo *v1, ViewInfo *v2) {
 }
 
 static inline void swap_vals(ViewInfo *v1, ViewInfo *v2) {
-    void *val = v1->val;
-    v1->val = v2->val;
-    v2->val = val;
+    void *val = v1->view;
+    v1->view = v2->view;
+    v2->view = val;
 }
 
-static inline void clear_view(ViewInfo *view) {
-    __cilkrts_hyperobject_base *key = view->key;
+static void clear_view(ViewInfo *view) {
+    hyperobject_base *hyper = view->hyper;
 
-    if (key != NULL) {
-        cilk_destroy_fn_t destroy = key->__c_monoid.destroy_fn;
-        if (destroy) {
-            key->__c_monoid.destroy_fn(key, view->val); // calls destructor
-        }
-        key->__c_monoid.deallocate_fn(key, view->val); // free the memory
+    if (hyper != NULL) {
+        __cilkrts_hyper_dealloc(view->view, hyper->view_size);
     }
-    view->key = NULL;
-    view->val = NULL;
+    view->view = NULL;
+    view->hyper = NULL;
 }
 
 // =================================================================
@@ -61,8 +57,8 @@ void cilkred_map_unlog_id(__cilkrts_worker *const w, cilkred_map *this_map,
     CILK_ASSERT(w, this_map->num_of_vinfo <= this_map->spa_cap);
     CILK_ASSERT(w, id < this_map->spa_cap);
 
-    this_map->vinfo[id].key = NULL;
-    this_map->vinfo[id].val = NULL;
+    this_map->vinfo[id].hyper = NULL;
+    this_map->vinfo[id].view = NULL;
 
     this_map->num_of_vinfo--;
     if (this_map->num_of_vinfo == 0) {
@@ -71,18 +67,16 @@ void cilkred_map_unlog_id(__cilkrts_worker *const w, cilkred_map *this_map,
 }
 
 /** @brief Return element mapped to 'key' or null if not found. */
-ViewInfo *cilkred_map_lookup(cilkred_map *this_map,
-                             __cilkrts_hyperobject_base *key) {
-    hyper_id_t id = key->__id_num;
-    if (__builtin_expect(!(id & HYPER_ID_VALID), 0)) {
+ViewInfo *cilkred_map_lookup(cilkred_map *this_map, hyperobject_base *hyper) {
+    hyper_id_t id = hyper->id_num;
+    if (__builtin_expect(!hyper->valid, 0)) {
         return NULL;
     }
-    id &= ~HYPER_ID_VALID;
     if (id >= this_map->spa_cap) {
         return NULL; /* TODO: grow map */
     }
     ViewInfo *ret = this_map->vinfo + id;
-    if (ret->key == NULL && ret->val == NULL) {
+    if (ret->hyper == NULL && ret->view == NULL) {
         return NULL;
     }
 
@@ -132,7 +126,7 @@ void cilkred_map_destroy_map(__cilkrts_worker *w, cilkred_map *h) {
     }
     if (DEBUG_ENABLED(REDUCER)) {
         for (hyper_id_t i = 0; i < h->spa_cap; ++i)
-            CILK_ASSERT(w, !h->vinfo[i].val);
+            CILK_ASSERT(w, !h->vinfo[i].view);
     }
     free(h->vinfo);
     h->vinfo = NULL;
@@ -160,6 +154,7 @@ void cilkred_map_merge(cilkred_map *this_map, __cilkrts_worker *w,
 
     if (other_map->num_of_vinfo == 0) {
         cilkred_map_destroy_map(w, other_map);
+        this_map->merging = false;
         return;
     }
 
@@ -168,20 +163,26 @@ void cilkred_map_merge(cilkred_map *this_map, __cilkrts_worker *w,
 
         for (i = 0; i < other_map->num_of_logs; i++) {
             hyper_id_t vindex = other_map->log[i];
-            __cilkrts_hyperobject_base *key = other_map->vinfo[vindex].key;
+            hyperobject_base *hyper = other_map->vinfo[vindex].hyper;
 
-            if (this_map->vinfo[vindex].key != NULL) {
-                CILK_ASSERT(w, key == this_map->vinfo[vindex].key);
+            if (hyper == NULL) {
+                /* The other map's hyperobject was deleted.
+                   The corresponding index in this map may
+                   belong to a different hyperobject. */
+                continue;
+            }
+            if (this_map->vinfo[vindex].hyper != NULL) {
+                CILK_ASSERT(w, hyper == this_map->vinfo[vindex].hyper);
                 if (kind == MERGE_INTO_RIGHT) { // other_map is the left val
                     swap_vals(&other_map->vinfo[vindex],
                               &this_map->vinfo[vindex]);
                 }
                 // updated val is stored back into the left
-                key->__c_monoid.reduce_fn(key, this_map->vinfo[vindex].val,
-                                          other_map->vinfo[vindex].val);
+                hyper->reduce_fn(this_map->vinfo[vindex].view,
+                                 other_map->vinfo[vindex].view);
                 clear_view(&other_map->vinfo[vindex]);
             } else {
-                CILK_ASSERT(w, this_map->vinfo[vindex].val == NULL);
+                CILK_ASSERT(w, this_map->vinfo[vindex].view == NULL);
                 swap_views(&other_map->vinfo[vindex], &this_map->vinfo[vindex]);
                 cilkred_map_log_id(w, this_map, vindex);
             }
@@ -190,20 +191,20 @@ void cilkred_map_merge(cilkred_map *this_map, __cilkrts_worker *w,
     } else {
         hyper_id_t i;
         for (i = 0; i < other_map->spa_cap; i++) {
-            if (other_map->vinfo[i].key != NULL) {
-                __cilkrts_hyperobject_base *key = other_map->vinfo[i].key;
+            if (other_map->vinfo[i].hyper != NULL) {
+                hyperobject_base *hyper = other_map->vinfo[i].hyper;
 
-                if (this_map->vinfo[i].key != NULL) {
-                    CILK_ASSERT(w, key == this_map->vinfo[i].key);
+                if (this_map->vinfo[i].hyper != NULL) {
+                    CILK_ASSERT(w, hyper == this_map->vinfo[i].hyper);
                     if (kind == MERGE_INTO_RIGHT) { // other_map is the left val
                         swap_vals(&other_map->vinfo[i], &this_map->vinfo[i]);
                     }
                     // updated val is stored back into the left
-                    key->__c_monoid.reduce_fn(key, this_map->vinfo[i].val,
-                                              other_map->vinfo[i].val);
+                    hyper->reduce_fn(this_map->vinfo[i].view,
+                                     other_map->vinfo[i].view);
                     clear_view(&other_map->vinfo[i]);
                 } else { // the 'this_map' page does not contain view
-                    CILK_ASSERT(w, this_map->vinfo[i].val == NULL);
+                    CILK_ASSERT(w, this_map->vinfo[i].view == NULL);
                     // transfer the key / val over
                     swap_views(&other_map->vinfo[i], &this_map->vinfo[i]);
                     cilkred_map_log_id(w, this_map, i);
diff --git a/runtime/cilkred_map.h b/runtime/cilkred_map.h
index 2d0d5717..7bafae54 100644
--- a/runtime/cilkred_map.h
+++ b/runtime/cilkred_map.h
@@ -3,14 +3,11 @@
 
 #include "cilk-internal.h"
 #include "debug.h"
-#include <cilk/hyperobject_base.h>
+#include "hyperobject_base.h"
 #include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>
 
-typedef uint32_t hyper_id_t; /* must match cilk/hyperobject_base.h */
-#define HYPER_ID_VALID 0x80000000
-
 enum merge_kind {
     MERGE_UNORDERED, ///< Assertion fails
     MERGE_INTO_LEFT, ///< Merges the argument from the right into the left
@@ -19,9 +16,8 @@ enum merge_kind {
 typedef enum merge_kind merge_kind;
 
 typedef struct view_info {
-    void *val; // pointer to the actual view for the reducer
-    // pointer to the hyperbase object for a given reducer
-    __cilkrts_hyperobject_base *key;
+    void *view;
+    hyperobject_base *hyper;
 } ViewInfo;
 
 /**
@@ -45,14 +41,12 @@ void cilkred_map_log_id(__cilkrts_worker *const w, cilkred_map *this_map,
                         hyper_id_t id);
 CHEETAH_INTERNAL
 void cilkred_map_unlog_id(__cilkrts_worker *const w, cilkred_map *this_map,
-
                           hyper_id_t id);
 
 /* Calling this function potentially invalidates any older ViewInfo pointers
    from the same map. */
 CHEETAH_INTERNAL
-ViewInfo *cilkred_map_lookup(cilkred_map *this_map,
-                             __cilkrts_hyperobject_base *key);
+ViewInfo *cilkred_map_lookup(cilkred_map *this_map, hyperobject_base *hyper);
 /**
  * Construct an empty reducer map from the memory pool associated with the
  * given worker.  This reducer map must be destroyed before the worker's
diff --git a/runtime/closure-type.h b/runtime/closure-type.h
index d636aea7..55137e51 100644
--- a/runtime/closure-type.h
+++ b/runtime/closure-type.h
@@ -39,6 +39,9 @@ struct Closure {
     struct cilk_fiber *fiber;
     struct cilk_fiber *fiber_child;
 
+    struct cilk_fiber *ext_fiber;
+    struct cilk_fiber *ext_fiber_child;
+
     worker_id owner_ready_deque; /* debug only */
     worker_id mutex_owner;       /* debug only */
 
diff --git a/runtime/closure.h b/runtime/closure.h
index c0f7b1ce..dbdd7fcf 100644
--- a/runtime/closure.h
+++ b/runtime/closure.h
@@ -156,6 +156,8 @@ static inline void Closure_init(Closure *t) {
     t->frame = NULL;
     t->fiber = NULL;
     t->fiber_child = NULL;
+    t->ext_fiber = NULL;
+    t->ext_fiber_child = NULL;
 
     t->orig_rsp = NULL;
 
@@ -340,32 +342,35 @@ void Closure_remove_callee(__cilkrts_worker *const w, Closure *caller) {
 
 /* This function is used for steal, the next function for sync.
    The invariants are slightly different. */
-static inline
-void Closure_suspend_victim(__cilkrts_worker *thief, __cilkrts_worker *victim,
-                            Closure *cl) {
+static inline void Closure_suspend_victim(struct ReadyDeque *deques,
+                                          __cilkrts_worker *thief,
+                                          __cilkrts_worker *victim,
+                                          Closure *cl) {
 
     Closure *cl1;
+    worker_id victim_id = victim->self;
 
     CILK_ASSERT(thief, !cl->user_rmap);
 
     Closure_checkmagic(thief, cl);
     Closure_assert_ownership(thief, cl);
-    deque_assert_ownership(thief, victim->self);
+    deque_assert_ownership(deques, thief, victim_id);
 
     CILK_ASSERT(thief, cl == thief->g->root_closure || cl->spawn_parent ||
                            cl->call_parent);
 
     Closure_change_status(thief, cl, CLOSURE_RUNNING, CLOSURE_SUSPENDED);
 
-    cl1 = deque_xtract_bottom(thief, victim->self);
+    cl1 = deque_xtract_bottom(deques, thief, victim_id);
     CILK_ASSERT(thief, cl == cl1);
     USE_UNUSED(cl1);
 }
 
-static inline
-void Closure_suspend(__cilkrts_worker *const w, Closure *cl) {
+static inline void Closure_suspend(struct ReadyDeque *deques,
+                                   __cilkrts_worker *const w, Closure *cl) {
 
     Closure *cl1;
+    worker_id self = w->self;
 
     CILK_ASSERT(w, !cl->user_rmap);
 
@@ -373,18 +378,18 @@ void Closure_suspend(__cilkrts_worker *const w, Closure *cl) {
 
     Closure_checkmagic(w, cl);
     Closure_assert_ownership(w, cl);
-    deque_assert_ownership(w, w->self);
+    deque_assert_ownership(deques, w, self);
 
     CILK_ASSERT(w, cl == w->g->root_closure || cl->spawn_parent ||
                        cl->call_parent);
     CILK_ASSERT(w, cl->frame != NULL);
     CILK_ASSERT(w, __cilkrts_stolen(cl->frame));
-    CILK_ASSERT(w, cl->frame->worker->self == w->self);
+    CILK_ASSERT(w, cl->frame->worker->self == self);
 
     Closure_change_status(w, cl, CLOSURE_RUNNING, CLOSURE_SUSPENDED);
     atomic_store_explicit(&cl->frame->worker, INVALID, memory_order_relaxed);
 
-    cl1 = deque_xtract_bottom(w, w->self);
+    cl1 = deque_xtract_bottom(deques, w, self);
 
     CILK_ASSERT(w, cl == cl1);
     USE_UNUSED(cl1);
diff --git a/runtime/fiber-pool.c b/runtime/fiber-pool.c
index 8d3ae800..b56e0171 100644
--- a/runtime/fiber-pool.c
+++ b/runtime/fiber-pool.c
@@ -9,7 +9,7 @@
 #include "local.h"
 #include "mutex.h"
 
-// Whent the pool becomes full (empty), free (allocate) this fraction
+// When the pool becomes full (empty), free (allocate) this fraction
 // of the pool back to (from) parent / the OS.
 #define BATCH_FRACTION 2
 #define GLOBAL_POOL_RATIO 10 // make global pool this much larger
@@ -90,6 +90,10 @@ static void fiber_pool_init(struct cilk_fiber_pool *pool, size_t stacksize,
 static void fiber_pool_destroy(struct cilk_fiber_pool *pool) {
     CILK_ASSERT_G(pool->size == 0);
     cilk_mutex_destroy(&pool->lock);
+    // pool->fibers might be NULL if the fiber pool was never actually
+    // initialized, e.g., because no Cilk code was run.
+    if (pool->fibers == NULL)
+        return;
     free(pool->fibers);
     pool->parent = NULL;
     pool->fibers = NULL;
@@ -282,6 +286,20 @@ void cilk_fiber_pool_global_destroy(global_state *g) {
     fiber_pool_destroy(&g->fiber_pool); // worker 0 should have freed everything
 }
 
+/**
+ * Per-worker fiber pool zero initialization.  Initializes the fiber pool to a
+ * safe zero state, in case that worker is created by
+ * cilk_fiber_pool_per_worker_init() never gets called on that worker.  Should
+ * initialize the fiber bool sufficiently for calls to
+ * cilk_fiber_pool_per_worker_terminate() and
+ * cilk_fiber_pool_per_worker_destroy() to succeed.
+ */
+void cilk_fiber_pool_per_worker_zero_init(__cilkrts_worker *w) {
+    struct cilk_fiber_pool *pool = &(w->l->fiber_pool);
+    pool->size = 0;
+    pool->fibers = NULL;
+}
+
 /**
  * Per-worker fiber pool initialization: should be called per worker so
  * so that fiber comes from the core on which the worker is running on.
@@ -295,8 +313,8 @@ void cilk_fiber_pool_per_worker_init(__cilkrts_worker *w) {
     CILK_ASSERT(w, NULL != pool->fibers);
     CILK_ASSERT(w, w->g->fiber_pool.stack_size == pool->stack_size);
 
-    fiber_pool_allocate_batch(w, pool, bufsize / BATCH_FRACTION);
     fiber_pool_stat_init(pool);
+    fiber_pool_allocate_batch(w, pool, bufsize / BATCH_FRACTION);
 }
 
 /* This does not yet destroy the fiber pool; merely collects
diff --git a/runtime/fiber.c b/runtime/fiber.c
index acc067ea..ad132d40 100644
--- a/runtime/fiber.c
+++ b/runtime/fiber.c
@@ -257,6 +257,14 @@ static void fiber_init(struct cilk_fiber *fiber) {
 // Supported public functions
 //===============================================================
 
+char *sysdep_get_stack_start(struct cilk_fiber *fiber) {
+    size_t align = 64;
+    char *sp = fiber->stack_high - align;
+    /* Debugging: make sure stack is accessible. */
+    ((volatile char *)sp)[-1];
+    return sp;
+}
+
 char *sysdep_reset_stack_for_resume(struct cilk_fiber *fiber,
                                     __cilkrts_stack_frame *sf) {
     CILK_ASSERT_G(fiber);
diff --git a/runtime/fiber.h b/runtime/fiber.h
index f4096d3e..6767d656 100644
--- a/runtime/fiber.h
+++ b/runtime/fiber.h
@@ -77,6 +77,8 @@ void sysdep_restore_fp_state(__cilkrts_stack_frame *sf) {
 #endif
 }
 
+CHEETAH_INTERNAL
+char *sysdep_get_stack_start(struct cilk_fiber *fiber);
 CHEETAH_INTERNAL
 char *sysdep_reset_stack_for_resume(struct cilk_fiber *fiber,
                                     __cilkrts_stack_frame *sf);
@@ -86,6 +88,7 @@ void sysdep_longjmp_to_sf(__cilkrts_stack_frame *sf);
 CHEETAH_INTERNAL void cilk_fiber_pool_global_init(global_state *g);
 CHEETAH_INTERNAL void cilk_fiber_pool_global_terminate(global_state *g);
 CHEETAH_INTERNAL void cilk_fiber_pool_global_destroy(global_state *g);
+CHEETAH_INTERNAL void cilk_fiber_pool_per_worker_zero_init(__cilkrts_worker *w);
 CHEETAH_INTERNAL void cilk_fiber_pool_per_worker_init(__cilkrts_worker *w);
 CHEETAH_INTERNAL void cilk_fiber_pool_per_worker_terminate(__cilkrts_worker *w);
 CHEETAH_INTERNAL void cilk_fiber_pool_per_worker_destroy(__cilkrts_worker *w);
diff --git a/runtime/frame.h b/runtime/frame.h
new file mode 100644
index 00000000..3af5877f
--- /dev/null
+++ b/runtime/frame.h
@@ -0,0 +1,135 @@
+#ifndef _CILK_FRAME_H
+#define _CILK_FRAME_H
+
+#include "rts-config.h"
+
+#include <stdint.h>
+#include "jmpbuf.h"
+
+struct __cilkrts_worker;
+struct __cilkrts_stack_frame;
+
+/**
+ * Every spawning function has a frame descriptor.  A spawning function
+ * is a function that spawns or detaches.  Only spawning functions
+ * are visible to the Cilk runtime.
+ */
+struct __cilkrts_stack_frame {
+    // Flags is a bitfield with values defined below. Client code
+    // initializes flags to 0 before the first Cilk operation.
+    uint32_t flags;
+    // The magic number includes the ABI version and a hash of the
+    // layout of this structure.
+    uint32_t magic;
+
+    // call_parent points to the __cilkrts_stack_frame of the closest
+    // ancestor spawning function, including spawn helpers, of this frame.
+    // It forms a linked list ending at the first stolen frame.
+    struct __cilkrts_stack_frame *call_parent;
+
+    // The client copies the worker from TLS here when initializing
+    // the structure.  The runtime ensures that the field always points
+    // to the __cilkrts_worker which currently "owns" the frame.
+    //
+    // TODO: Remove this pointer?  This pointer only seems to be needed for
+    // debugging purposes.  When the worker structure is genuinely needed, it
+    // seems to be accessible by calling __cilkrts_get_tls_worker(), which will
+    // be inlined and optimized to a simple move from TLS.
+    _Atomic(struct __cilkrts_worker *) worker;
+
+    // Before every spawn and nontrivial sync the client function
+    // saves its continuation here.
+    jmpbuf ctx;
+
+    // Optional state for an extension, only maintained if
+    // __cilkrts_use_extension == true.
+    void *extension;
+};
+
+//===========================================================
+// Value defines for the flags field in cilkrts_stack_frame
+//===========================================================
+
+/* CILK_FRAME_STOLEN is set if the frame has ever been stolen. */
+#define CILK_FRAME_STOLEN            0x001
+
+/* CILK_FRAME_UNSYNCHED is set if the frame has been stolen and
+   is has not yet executed _Cilk_sync. It is technically a misnomer in that a
+   frame can have this flag set even if all children have returned. */
+#define CILK_FRAME_UNSYNCHED         0x002
+
+/* Is this frame detached (spawned)? If so the runtime needs
+   to undo-detach in the slow path epilogue. */
+#define CILK_FRAME_DETACHED          0x004
+
+/* CILK_FRAME_EXCEPTION_PENDING is set if the frame has an exception
+   to handle after syncing. */
+#define CILK_FRAME_EXCEPTION_PENDING 0x008
+
+/* Is this frame excepting, meaning that a stolen continuation threw? */
+#define CILK_FRAME_EXCEPTING         0x010
+
+/* Is this the last (oldest) Cilk frame? */
+#define CILK_FRAME_LAST              0x080
+
+/* Is this frame handling an exception? */
+// TODO: currently only used when throwing an exception from the continuation
+//       (i.e. from the personality function). Used in scheduler.c to disable
+//       asserts that fail if trying to longjmp back to the personality
+//       function.
+#define CILK_FRAME_SYNC_READY        0x200
+
+static const uint32_t frame_magic =
+    (((((((((((((__CILKRTS_ABI_VERSION * 13) +
+                offsetof(struct __cilkrts_stack_frame, worker)) *
+               13) +
+              offsetof(struct __cilkrts_stack_frame, ctx)) *
+             13) +
+            offsetof(struct __cilkrts_stack_frame, magic)) *
+           13) +
+          offsetof(struct __cilkrts_stack_frame, flags)) *
+         13) +
+        offsetof(struct __cilkrts_stack_frame, call_parent)) *
+       13) +
+      offsetof(struct __cilkrts_stack_frame, extension)));
+
+#define CHECK_CILK_FRAME_MAGIC(G, F) (frame_magic == (F)->magic)
+
+//===========================================================
+// Helper functions for the flags field in cilkrts_stack_frame
+//===========================================================
+
+/* A frame is set to be stolen as long as it has a corresponding Closure */
+static inline void __cilkrts_set_stolen(struct __cilkrts_stack_frame *sf) {
+    sf->flags |= CILK_FRAME_STOLEN;
+}
+
+/* A frame is set to be unsynced only if it has parallel subcomputation
+ * underneathe, i.e., only if it has spawned children executing on a different
+ * worker
+ */
+static inline void __cilkrts_set_unsynced(struct __cilkrts_stack_frame *sf) {
+    sf->flags |= CILK_FRAME_UNSYNCHED;
+}
+
+static inline void __cilkrts_set_synced(struct __cilkrts_stack_frame *sf) {
+    sf->flags &= ~CILK_FRAME_UNSYNCHED;
+}
+
+/* Returns nonzero if the frame has been stolen.
+   Only used in assertions. */
+static inline int __cilkrts_stolen(struct __cilkrts_stack_frame *sf) {
+    return (sf->flags & CILK_FRAME_STOLEN);
+}
+
+/* Returns nonzero if the frame is synched.  Only used in assertions. */
+static inline int __cilkrts_synced(struct __cilkrts_stack_frame *sf) {
+    return ((sf->flags & CILK_FRAME_UNSYNCHED) == 0);
+}
+
+/* Returns nonzero if the frame has never been stolen. */
+static inline int __cilkrts_not_stolen(struct __cilkrts_stack_frame *sf) {
+    return ((sf->flags & CILK_FRAME_STOLEN) == 0);
+}
+
+#endif /* _CILK_FRAME_H */
diff --git a/runtime/global.c b/runtime/global.c
index e30222d3..394d18c4 100644
--- a/runtime/global.c
+++ b/runtime/global.c
@@ -3,6 +3,9 @@
 #endif
 
 #include <pthread.h>
+#ifdef __FreeBSD__
+#include <pthread_np.h>
+#endif
 #include <sched.h>
 #include <stdio.h>
 #include <string.h>
@@ -10,10 +13,15 @@
 
 #include "debug.h"
 #include "global.h"
+#include "hypertable.h"
 #include "init.h"
 #include "readydeque.h"
 #include "reducer_impl.h"
 
+#if defined __FreeBSD__ && __FreeBSD__ < 13
+typedef cpuset_t cpu_set_t;
+#endif
+
 global_state *default_cilkrts;
 
 unsigned cilkg_nproc = 0;
@@ -175,11 +183,13 @@ global_state *global_state_init(int argc, char *argv[]) {
     g->root_closure_initialized = false;
     atomic_store_explicit(&g->done, 0, memory_order_relaxed);
     atomic_store_explicit(&g->cilkified, 0, memory_order_relaxed);
-    atomic_store_explicit(&g->disengaged_deprived, 0, memory_order_relaxed);
+    atomic_store_explicit(&g->disengaged_sentinel, 0, memory_order_relaxed);
 
     g->terminate = false;
     g->exiting_worker = 0;
 
+    g->worker_args =
+        (struct worker_args *)calloc(active_size, sizeof(struct worker_args));
     g->workers =
         (__cilkrts_worker **)calloc(active_size, sizeof(__cilkrts_worker *));
     g->deques = (ReadyDeque *)cilk_aligned_alloc(
@@ -193,18 +203,22 @@ global_state *global_state_init(int argc, char *argv[]) {
 
     g->id_manager = NULL;
 
+    g->hyper_table = hyper_table_get_or_create(0);
+
     return g;
 }
 
 void for_each_worker(global_state *g, void (*fn)(__cilkrts_worker *, void *),
                      void *data) {
     for (unsigned i = 0; i < g->options.nproc; ++i)
-        fn(g->workers[i], data);
+        if (g->workers[i])
+            fn(g->workers[i], data);
 }
 
 void for_each_worker_rev(global_state *g,
                          void (*fn)(__cilkrts_worker *, void *), void *data) {
     unsigned i = g->options.nproc;
     while (i-- > 0)
-        fn(g->workers[i], data);
+        if (g->workers[i])
+            fn(g->workers[i], data);
 }
diff --git a/runtime/global.h b/runtime/global.h
index e7571a89..5f6a5517 100644
--- a/runtime/global.h
+++ b/runtime/global.h
@@ -43,11 +43,14 @@ struct rts_options {
     unsigned int force_reduce;   /* can be set via env variable CILK_FORCE_REDUCE */
 };
 
+struct worker_args;
+
 struct global_state {
     /* globally-visible options (read-only after init) */
     struct rts_options options;
 
-    unsigned int nworkers; /* size of next 3 arrays */
+    unsigned int nworkers; /* size of next 4 arrays */
+    struct worker_args *worker_args;
     struct __cilkrts_worker **workers;
     /* dynamically-allocated array of deques, one per processor */
     struct ReadyDeque *deques;
@@ -91,11 +94,14 @@ struct global_state {
     worker_id *worker_to_index;
     cilk_mutex index_lock;
 
-    // Count of number of disengaged and deprived workers.  Upper 32 bits count
-    // the disengaged workers.  Lower 32 bits count the deprived workers.  These
+    // Count of number of disengaged and sentinel workers.  Upper 32 bits count
+    // the disengaged workers.  Lower 32 bits count the sentinel workers.  These
     // two counts are stored in a single word to make it easier to update both
     // counts atomically.
-    _Atomic uint64_t disengaged_deprived __attribute__((aligned(CILK_CACHE_LINE)));
+    _Atomic uint64_t disengaged_sentinel __attribute__((aligned(CILK_CACHE_LINE)));
+#define GET_DISENGAGED(D) ((D) >> 32)
+#define GET_SENTINEL(D) ((D) & 0xffffffff)
+#define DISENGAGED_SENTINEL(A, B) (((uint64_t)(A) << 32) | (uint32_t)(B))
 
     _Atomic uint32_t disengaged_thieves_futex __attribute__((aligned(CILK_CACHE_LINE)));
 
@@ -106,11 +112,20 @@ struct global_state {
 
     struct reducer_id_manager *id_manager; /* null while Cilk is running */
 
+    struct hyper_table *hyper_table;
+
     struct global_sched_stats stats;
 };
 
-extern global_state *default_cilkrts;
+CHEETAH_INTERNAL extern global_state *default_cilkrts;
+
+struct worker_args {
+    worker_id id;
+    global_state *g;
+};
 
+CHEETAH_INTERNAL
+__cilkrts_worker *__cilkrts_init_tls_worker(worker_id i, global_state *g);
 CHEETAH_INTERNAL void set_nworkers(global_state *g, unsigned int nworkers);
 CHEETAH_INTERNAL void set_force_reduce(global_state *g,
                                        unsigned int force_reduce);
diff --git a/runtime/hyperobject_base.h b/runtime/hyperobject_base.h
new file mode 100644
index 00000000..5b328088
--- /dev/null
+++ b/runtime/hyperobject_base.h
@@ -0,0 +1,34 @@
+#ifndef _HYPEROBJECT_BASE
+#define _HYPEROBJECT_BASE
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <cilk/cilk_api.h>
+
+typedef uint32_t hyper_id_t;
+
+struct hyperobject_base;
+
+typedef struct hyperobject_base {
+    __cilk_identity_fn identity_fn;
+    __cilk_reduce_fn   reduce_fn;
+    size_t             view_size; // rounded to CACHE_LINE
+    hyper_id_t         id_num;
+    int                valid;
+    void               *key;
+    /* 3 words left in cache line */
+} hyperobject_base;
+
+// This needs to be exported so cilksan can preempt it.
+__attribute__((weak)) void *__cilkrts_hyper_alloc(size_t size);
+// This needs to be exported so cilksan can preempt it.
+__attribute__((weak)) void __cilkrts_hyper_dealloc(void *view, size_t size);
+CHEETAH_INTERNAL
+void cilkrts_hyper_register(hyperobject_base *hyper);
+CHEETAH_INTERNAL
+void cilkrts_hyper_unregister(hyperobject_base *hyper);
+CHEETAH_INTERNAL
+void *cilkrts_hyper_lookup(hyperobject_base *key);
+
+#endif /* _CILK_HYPEROBJECT_BASE */
diff --git a/runtime/hypertable.c b/runtime/hypertable.c
new file mode 100644
index 00000000..783d65da
--- /dev/null
+++ b/runtime/hypertable.c
@@ -0,0 +1,976 @@
+/* Open hash table with linear probing, mapping pointers to pointers.
+   Null keys and values are not allowed.  Internally, EMPTY (null)
+   indicates an empty slot and DELETED a deleted entry.  When an entry
+   is deleted an attempt is made to move another value into the vacated
+   slot to reduce chain lengths.  When the total of chain lengths is too
+   large the table is rehashed.
+
+   It is an error to delete a key that is not in the table.
+
+   It is an error to insert a key that is already in the table.
+   (But a deleted key can be re-inserted with a different value.)
+
+   Lookups are attempted without locks; this will not crash but may
+   give an incorrect result if the table changes during the lookup.
+   If the table changes, based on a modification count, the lookup
+   is retried with the lock held.
+
+   In multithreaded environments readers can use a hyper_table_cache,
+   a one entry cache.  (TODO: With lock-free reading this may not
+   be needed any more.)
+
+   The table includes a count of the number of modifications to signal
+   that a previous successful lookup has become stale.  This is
+   necessary to avoid the ABA problem.  (TODO: Separate insert and
+   delete counters would allow hits to be invalidated by delete
+   and misses to be invalidated by insert, potentially halving the
+   number of cache misses.)
+
+   TODO: Prove that if there are duplicate keys the first key is
+   paired with the correct value. */
+
+#include "hypertable.h"
+
+#include <assert.h>
+#include <pthread.h>
+#include <stdatomic.h>
+#include <stdint.h>
+#include <string.h>
+#include <strings.h> /* for fls() */
+
+#define CACHE_LINE 64
+
+#ifndef HYPER_TABLE_ASSERT
+#define HYPER_TABLE_ASSERT HYPER_TABLE_DEBUG
+#endif
+
+/* For debugging only, to examine code generation for static functions. */
+#ifndef HYPER_TABLE_CODEGEN
+#define HYPER_TABLE_CODEGEN 0
+#endif
+
+/* This should normally be set for performance. */
+#ifndef LOCK_FREE_LOOKUP
+#define LOCK_FREE_LOOKUP 1
+#endif
+
+/* This should normally be set for performance. */
+#ifndef ENABLE_CACHE
+#define ENABLE_CACHE 1
+#endif
+
+/* Emit a store-store barrier that prevents stores from moving
+   in either direction.  atomic_signal_fence(memory_order_release)
+   is sufficient but slightly stronger than needed.  Override it
+   on ARM to use a store-store barrier instead.
+   LLVM docs/Atomics.html explains:
+   "store-store fences are generally not exposed to IR
+   because they are extremely difficult to use correctly".  */
+#ifdef __aarch64__
+#define MEMBAR_ST_ST { \
+    atomic_signal_fence(memory_order_acquire); \
+    asm ("dmb ishst" : : : "memory"); \
+  }
+#else
+#define MEMBAR_ST_ST \
+    atomic_thread_fence(memory_order_release)
+#endif
+
+#define EMPTY 0
+/* For strict C compliance, because 1 might convert to a valid pointer,
+   define this to be the address of a file scope variable. */
+#define DELETED 1
+
+/* Alignment isn't important without a fast atomic 128 bit write
+   (available on ARM from v8.4).  Tell the compiler about alignment
+   anyway just in case it can do something with the information. */
+struct bucket {
+  uintptr_t key; /* EMPTY, DELETED, or a user-provided pointer. */
+  void *value;
+} __attribute__((aligned(2 * sizeof(void*))));
+
+#define LOG2_MIN_BUCKETS  5
+#define LOG2_MAX_BUCKETS 14 /* inclusive */
+
+#define BUCKET(TABLE, SIZE) \
+  &(TABLE)->buckets[(SIZE) - LOG2_MIN_BUCKETS]
+
+/* An integer big enough to hold 2^LOG2_MAX_BUCKETS (inclusive),
+   for internal use only. The API uses size_t.  Making it signed
+   allows reserving negative values for invalid indices.  An
+   unsigned integer could hold an extra bit.  There are small
+   differences in code generation for unsigned or 16 bit indices. */
+
+#if 0 /* signed implementation */
+typedef int32_t index_t;
+#define INVALID_INDEX     ((index_t)-1) /* or (index_t)~(index_t)0 */
+#define IS_INVALID(INDEX) ((INDEX) < 0) /* or !!(index_t)~(INDEX) */
+#define IS_VALID(INDEX)   ((INDEX) >=0) /* or !(index_t)~(INDEX) */
+#else /* unsigned implementation */
+typedef uint32_t index_t;
+#define INVALID_INDEX     ((index_t)~(index_t)0)
+#define IS_INVALID(INDEX) __builtin_expect(((INDEX) >> (sizeof(index_t) * 8 - 1)), 0)
+#define IS_VALID(INDEX)   !IS_INVALID(INDEX)
+#endif
+
+#define BUSY(GEN) __builtin_expect((GEN) & 1U, 0)
+
+#define ALLOC_FAILED(PTR) __builtin_expect(!(PTR), 0)
+#define NO_BUCKET(PTR) __builtin_expect(!(PTR), 0)
+
+struct hyper_table {
+    /* A count of changes to the table, with the low bit meaning
+       the table is busy and readers should wait or acquire the
+       lock.  The value increments once at the beginning and once
+       at the end of each modification. */
+    unsigned long _Atomic gen;
+
+    /* Log base 2 of capacity.  This field is an index into buckets[]
+       after subtracting LOG2_MIN_BUCKETS. */
+    int _Atomic log_capacity;
+
+    /* Number of values in the table. */
+    index_t entries;
+
+    /* A measure of total chain length added since last rehash, used
+       to decide when another rehash is required. */
+    index_t waste;
+
+    /* For statistics. */
+    unsigned int rehashes;
+
+    /* The cost of being lock-free most of the time is having to
+       keep around old storage.  Each array element is null or a
+       pointer to an array of size 2^(index + LOG2_MIN_BUCKETS). */
+    struct bucket *_Atomic buckets[LOG2_MAX_BUCKETS + 1 - LOG2_MIN_BUCKETS];
+
+    /* Number of child caches.  Currently unused. */
+    unsigned int _Atomic caches;
+
+    /* The lock serializes additions, deletions, and rehashes.  The only
+       field that can be modified without holding the lock is caches.
+       Lookups only take a lock if lock-free lookup detects a race. */
+    pthread_mutex_t lock;
+
+} __attribute__((aligned(CACHE_LINE)));
+
+static enum hyper_table_error
+hyper_table_insert_locked(struct hyper_table *, const void *, void *)
+    __attribute__((nonnull));
+static struct bucket *hyper_table_lookup_locked(struct hyper_table *,
+                                                const void *)
+    __attribute__((nonnull));
+static void *hyper_table_remove_locked(struct hyper_table *, const void *_p)
+    __attribute__((nonnull));
+
+#if defined(_ISOC11_SOURCE) || __FreeBSD__ >= 10 || \
+  __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 101500
+#define hyper_aligned_alloc(A, S) aligned_alloc(A, S)
+#else
+static void *hyper_aligned_alloc(size_t alignment, size_t size)
+{
+    void *ptr;
+    if (posix_memalign(&ptr, alignment, size) == 0)
+        return ptr;
+    return 0;
+}
+#endif
+
+static void lock_table(struct hyper_table *table)
+{
+    int error = pthread_mutex_lock(&table->lock);
+    assert(!error);
+}
+
+static void unlock_table(struct hyper_table *table)
+{
+#if HYPER_TABLE_ASSERT
+    assert(!BUSY(table->gen));
+#endif
+    int error = pthread_mutex_unlock(&table->lock);
+    assert(!error);
+}
+
+#if HYPER_TABLE_ASSERT
+__attribute__((noinline))
+static enum hyper_table_error fail(struct hyper_table *table,
+                                   enum hyper_table_error code)
+{
+    fprintf(stdout, "Operation failure code %d\n", (int)code);
+    hyper_table_dump(stdout, table);
+    fflush(stdout);
+    return code;
+}
+#else
+#define fail(table, code) (code)
+#endif
+
+/* Return a hash function where the low bits are hopefully random.
+   The caller is responsible for reducing the hash to the desired range. */
+static index_t calc_hash(uintptr_t key_in)
+{
+    uintptr_t key = key_in;
+    /* TODO: Improve this.  Knuth likes the golden ratio for hashing. */
+    /* Mac on x86 has addresses like 0x0000602000000210 with lots
+         of consecutive zero bits between groups of nonzero.  */
+    if (sizeof key > 4)
+        key += __builtin_rotateleft64(key, 21);
+    else
+        key += __builtin_rotateleft32(key, 21);
+    if (sizeof key > 4)
+        return (key * 0x595a5b5c5d5e5f53) >> 30;
+    else
+        return (key * 0x5a5a5a5b) >> 10;
+}
+
+struct hyper_table *hyper_table_create(size_t capacity_req)
+{
+    int start_size = LOG2_MIN_BUCKETS;
+    if (3 * capacity_req >= (size_t)1U << (LOG2_MAX_BUCKETS + 1)) {
+        start_size = LOG2_MAX_BUCKETS;
+    } else if (capacity_req <= ((size_t)3U << (LOG2_MIN_BUCKETS - 1))) {
+        start_size = LOG2_MIN_BUCKETS;
+    } else {
+        /* Multiply by 3/2 for rounding. */
+        long adjusted = capacity_req * 3; /* / 2 implied by -2 below */
+#if defined __linux__ || defined __APPLE__ /* No inlined flsl. */
+        start_size = 8 * sizeof(long) - 2 - __builtin_clzl(adjusted);
+#else
+        start_size = flsl(adjusted) - 2;
+#endif
+        assert(start_size >= LOG2_MIN_BUCKETS && start_size < LOG2_MAX_BUCKETS);
+    }
+
+    size_t capacity = (size_t)1 << start_size;
+    /* This needs to be a multiple of CACHE_LINE or aligned_alloc will fail. */
+    size_t bucket_bytes = capacity * sizeof(struct bucket);
+    struct bucket *buckets = hyper_aligned_alloc(CACHE_LINE, bucket_bytes);
+    if (ALLOC_FAILED(buckets))
+        return 0;
+    struct hyper_table *table =
+        hyper_aligned_alloc(CACHE_LINE, sizeof(struct hyper_table));
+    if (ALLOC_FAILED(table))
+        goto cleanup;
+    memset(table, 0, sizeof(struct hyper_table));
+    if (pthread_mutex_init(&table->lock, 0))
+        goto cleanup;
+    memset(buckets, 0, bucket_bytes);
+    atomic_store_explicit(&table->log_capacity, start_size, memory_order_relaxed);
+    /* Integer fields were set to zero by memset above.  Also assume memset
+         nulled pointers, which is not strictly required by the C standard. */
+    atomic_store_explicit(BUCKET(table, start_size), buckets,
+                          memory_order_relaxed);
+    atomic_store_explicit(&table->gen, 2, memory_order_release);
+    return table;
+ cleanup:
+    free(buckets);
+    free(table);
+    return 0;
+}
+
+void hyper_table_destroy(struct hyper_table *table)
+{
+    pthread_mutex_destroy(&table->lock);
+    for (unsigned i = 0; i < sizeof table->buckets / sizeof table->buckets[0]; ++i) {
+        struct bucket *b =
+            atomic_load_explicit(&table->buckets[i], memory_order_relaxed);
+        atomic_store_explicit(&table->buckets[i], 0, memory_order_relaxed);
+        free(b);
+    }
+    free(table);
+}
+
+/* Called by remove and lookup to find a bucket that holds the given key,
+     stopping the search when an empty bucket is found.  The caller must
+     handle a null value. */
+static struct bucket *
+find_bucket(struct bucket *buckets, int log_capacity,
+                index_t hash, uintptr_t key)
+{
+    index_t capacity = (index_t)1 << log_capacity;
+    index_t mask = capacity - 1U;
+    hash &= mask;
+    index_t index = hash;
+    do {
+        struct bucket *bucket = &buckets[index];
+        /* With reasonable load factors the first bucket will match. */
+        if (__builtin_expect(bucket->key == key, 1))
+            return bucket;
+        /* Predicted false for remove and true for lookup. */
+        if (bucket->key == EMPTY)
+            return 0;
+        index = (index + 1) & mask;
+    } while (index != hash);
+    return 0;
+}
+
+/* Set the busy flag in the low bit of table->gen to inform readers
+     that the table is being modified.  Return the old generation number
+     before setting the flag. */
+static unsigned long mark_busy(struct hyper_table *table)
+{
+    unsigned long gen = atomic_load_explicit(&table->gen, memory_order_relaxed);
+#if HYPER_TABLE_ASSERT
+    assert(!BUSY(gen));
+#endif
+    /* The store below is meant to act like a store with acquire semantics
+         (which does not exist in isolation).  The store-store barrier ensures
+         that the set of the busy bit is visible before any changes to data.  */
+    atomic_store_explicit(&table->gen, gen + 1, memory_order_relaxed);
+    MEMBAR_ST_ST;
+    return gen;
+}
+
+static void mark_free(struct hyper_table *table, unsigned long old_gen)
+{
+    /* Readers will load table->gen with acquire semantics.  If the value
+         is unchanged from the start of the read operation then the table
+         has not been modified. */
+    atomic_store_explicit(&table->gen, old_gen + 2, memory_order_release);
+}
+
+/* Find a bucket that is empty or deleted.  This function is called to
+     insert a key that is known not to be in the table.  If insert instead
+     meant modify the function would need to continue past deleted buckets
+     to look for a matching key. */
+static struct bucket *
+find_insert_point(struct bucket *buckets, index_t capacity,
+                      index_t start, index_t *waste)
+{
+    index_t mask = capacity - 1U;
+    index_t index = start;
+    index_t waste0 = *waste;
+    for (index_t i = 0; i < capacity; ++i) {
+        struct bucket *bucket = &buckets[index++ & mask];
+        uintptr_t key = bucket->key;
+        /* With reasonable load factors the first bucket will be available. */
+        if (__builtin_expect(key == EMPTY || key == DELETED, 1)) {
+            *waste = waste0 + i;
+            return bucket;
+        }
+    }
+    return 0;
+}
+
+/* Copy all valid entries to a new bucket list.  Assert that the
+     number of entries copied is the same number thought to be in
+     the table.  Return a measure of wasted space. */
+static void copy(struct bucket *restrict to_ptr, index_t to_size,
+                     const struct bucket *restrict from_ptr, index_t from_size,
+                     index_t expected)
+{
+    index_t waste = 0;
+    index_t new_entries = 0;
+    for (index_t from = 0; from < from_size; ++from) {
+        const struct bucket *b = &from_ptr[from];
+        uintptr_t key = b->key;
+        if (key == EMPTY || key == DELETED)
+            continue;
+        index_t hash = calc_hash(key);
+        /* In this function find_insert_point should never fail because the
+           new table is guaranteed to be big enough. */
+        *find_insert_point(to_ptr, to_size, hash, &waste) = *b;
+        ++new_entries;
+    }
+    assert(new_entries == expected);
+}
+
+/* Effectively, erase the table and re-insert all entries in an attempt
+     to reduce chain lengths. */
+static void table_rehash(struct hyper_table *table)
+{
+    /* The lock is held so loads can use relaxed order. */
+    int log_capacity =
+        atomic_load_explicit(&table->log_capacity, memory_order_relaxed);
+    struct bucket *old_buckets =
+        atomic_load_explicit(BUCKET(table, log_capacity), memory_order_relaxed);
+    assert(log_capacity <= LOG2_MAX_BUCKETS);
+    size_t capacity = (size_t)1 << log_capacity;
+
+    struct bucket *tmp =
+        hyper_aligned_alloc(CACHE_LINE, capacity * sizeof(struct bucket));
+    if (ALLOC_FAILED(tmp)) {
+        table->waste = 0; /* avoid repeated futile rehash attempts */
+        return;
+    }
+    memset(tmp, 0, capacity * sizeof(struct bucket));
+    copy(tmp, capacity, old_buckets, capacity, table->entries);
+    unsigned long old_gen = mark_busy(table);
+    memcpy(old_buckets, tmp, capacity * sizeof(struct bucket));
+    table->waste = 0;
+    ++table->rehashes;
+    mark_free(table, old_gen); /* includes release fence */
+    free(tmp);
+    return;
+}
+
+/* Return null on failure, otherwise the new bucket list. */
+static struct bucket *
+table_grow(struct hyper_table *table)
+{
+    /* The lock is held so loads can use relaxed order. */
+    int old_log_capacity =
+        atomic_load_explicit(&table->log_capacity, memory_order_relaxed);
+    struct bucket *old_buckets =
+        atomic_load_explicit(BUCKET(table, old_log_capacity), memory_order_relaxed);
+    assert(old_log_capacity < LOG2_MAX_BUCKETS);
+    int new_log_capacity = old_log_capacity + 1;
+    size_t old_capacity = (size_t)1 << old_log_capacity;
+    size_t new_capacity = (size_t)1 << new_log_capacity;
+
+    assert(new_log_capacity > old_log_capacity);
+
+    /* Reuse an old array if there is one.  This could happen
+         when shrinking is implemented. */
+    struct bucket *new_buckets =
+        atomic_load_explicit(BUCKET(table, new_log_capacity), memory_order_relaxed);
+    if (!new_buckets) {
+        new_buckets =
+            hyper_aligned_alloc(CACHE_LINE,
+                                new_capacity * sizeof(struct bucket));
+        if (ALLOC_FAILED(new_buckets))
+            return 0;
+        memset(new_buckets, 0, new_capacity * sizeof(struct bucket));
+        /* Publish the new pointer after the memory is cleared. */
+        atomic_store_explicit(BUCKET(table, new_log_capacity),
+                              new_buckets, memory_order_release);
+    }
+
+    copy(new_buckets, new_capacity, old_buckets, old_capacity, table->entries);
+
+    table->waste = 0;
+    ++table->rehashes;
+
+    /* First, mark the table busy so no readers come in between the
+         next two stores. */
+    unsigned long old_gen = mark_busy(table);
+
+    /* Force all writes to complete before the bucket pointer goes live. */
+    atomic_store_explicit(&table->log_capacity, new_log_capacity,
+                          memory_order_release);
+
+    mark_free(table, old_gen);
+
+    return new_buckets;
+}
+
+enum hyper_table_error
+hyper_table_insert(struct hyper_table *table, const void *key, void *value)
+{
+    if (__builtin_expect(!key, 0) || __builtin_expect(!value, 0))
+        return HYPER_NULL;
+    lock_table(table);
+    enum hyper_table_error error = hyper_table_insert_locked(table, key, value);
+    unlock_table(table);
+    return error;
+}
+
+/* Unlike lookup, this must be called with the lock held. */
+static enum hyper_table_error
+hyper_table_insert_locked(struct hyper_table *restrict table, const void *key_p,
+                              void *restrict value)
+{
+    /* Lock is held, relaxed order is fine. */
+    int log_capacity =
+        atomic_load_explicit(&table->log_capacity, memory_order_relaxed);
+    struct bucket *buckets =
+        atomic_load_explicit(BUCKET(table, log_capacity), memory_order_relaxed);
+    index_t capacity = (index_t)1 << log_capacity;
+    /* Keep the load factor .5 or less if possible.  If chain lengths are
+         growing long, which should be rare, rehash in place.  */
+    if (log_capacity < LOG2_MAX_BUCKETS &&
+          __builtin_expect(table->entries > capacity / 2, 0)) {
+        capacity *= 2;
+        buckets = table_grow(table);
+        /* Strictly speaking this error is recoverable, but inability to
+           allocate a new hash table indicates memory is about to run out.
+           Also, inability to recreate the old hash table is very unlikely. */
+        if (ALLOC_FAILED(buckets))
+            return fail(table, HYPER_NOMEM);
+    } else if (__builtin_expect(table->waste * 3UL > capacity, 0)) {
+        table_rehash(table); /* bucket pointer unchanged */
+    }
+    uintptr_t key = (uintptr_t)key_p;
+    index_t hash = calc_hash(key);
+    index_t waste = table->waste;
+    struct bucket *bucket = find_insert_point(buckets, capacity, hash, &waste);
+    if (NO_BUCKET(bucket))
+        return fail(table, HYPER_FULL);
+    index_t entries = table->entries;
+    /* Up to now lookups can proceed in parallel with this insertion,
+         but filling the bucket is not atomic. */
+    unsigned long old_gen = mark_busy(table);
+    /* These stores (before the release in mark_free) can happen in any order.  */
+    bucket->key = key;
+    bucket->value = value;
+    table->entries = entries + 1;
+    table->waste = waste;
+    mark_free(table, old_gen);
+    return HYPER_OK;
+}
+
+void *hyper_table_remove(struct hyper_table *table, const void *key)
+{
+    lock_table(table);
+    void *value = hyper_table_remove_locked(table, key);
+    unlock_table(table);
+    return value;
+}
+
+void *hyper_table_remove_locked(struct hyper_table *table, const void *key_p)
+{
+    /* Lock is held, relaxed order is fine for both loads. */
+    int log_capacity =
+        atomic_load_explicit(&table->log_capacity, memory_order_relaxed);
+    struct bucket *buckets =
+        atomic_load_explicit(BUCKET(table, log_capacity), memory_order_relaxed);
+    uintptr_t key = (uintptr_t)key_p;
+    index_t hash = calc_hash(key);
+    struct bucket *bucket = find_bucket(buckets, log_capacity, hash, key);
+#if HYPER_TABLE_ASSERT
+    /* In anticipated uses of this table, the entry must exist or outside
+         bookkeeping has gone wrong. */
+    assert(!NO_BUCKET(bucket));
+#endif
+    if (NO_BUCKET(bucket))
+        return 0;
+
+    index_t index = bucket - buckets;
+    index_t mask = ((index_t)1 << log_capacity) - 1;
+    index_t this_target = hash & mask;
+    index_t entries = table->entries;
+    index_t waste = table->waste;
+    if (this_target != index && waste > 0) {
+        --waste;
+    }
+    unsigned long old_gen = mark_busy(table);
+
+    table->entries = entries - 1;
+
+    void *value = bucket->value;
+    bucket->key = DELETED;
+    bucket->value = 0;
+    /* While the lock is held do some cleanup in the vicinity of the
+         deleted entry:
+         1. If the next bucket is empty mark this one empty, and also
+         the previous bucket if that bucket is deleted.
+         2. If the next bucket wants to be earlier in the chain, move it up
+         to the newly vacated slot.  */
+    index_t prev = (index - 1) & mask;
+    index_t next = (index + 1) & mask;
+    uintptr_t next_key = buckets[next].key;
+    if (next_key == EMPTY) {
+        buckets[index].key = EMPTY; /* deleted -> empty */
+        if (buckets[prev].key == DELETED)
+            buckets[prev].key = EMPTY; /* deleted -> empty */
+        goto done;
+    }
+    ++waste; /* a new deleted bucket has been created */
+    if (buckets[next].key == DELETED)
+        goto done;
+    /* Where does the next bucket want to be? */
+    index_t next_target = calc_hash(next_key) & mask;
+    /* If the next bucket wants to be earlier, advance it into this slot.
+         A simple equality test is good enough here.  If the bucket doesn't
+         want to be where it is, one place earlier is better. */
+    if (next_target != next) {
+        buckets[index] = buckets[next];
+        buckets[next].key = DELETED; /* full -> deleted */
+    }
+ done:
+    table->waste = waste;
+    mark_free(table, old_gen);
+    return value;
+}
+
+void *hyper_table_lookup(struct hyper_table *table, const void *key)
+{
+#if LOCK_FREE_LOOKUP
+    unsigned long gen1 =
+        atomic_load_explicit(&table->gen, memory_order_acquire);
+    /* See hyper_table_cache_lookup for comments. */
+    if (!BUSY(gen1)) {
+        int log_capacity =
+            atomic_load_explicit(&table->log_capacity, memory_order_relaxed);
+        struct bucket *buckets =
+            atomic_load_explicit(BUCKET(table, log_capacity),
+                                 memory_order_relaxed);
+        uintptr_t key_i = (uintptr_t)key;
+        struct bucket *bucket =
+            find_bucket(buckets, log_capacity, calc_hash(key_i), key_i);
+        if (!NO_BUCKET(bucket)) {
+          void *result = bucket->value;
+          atomic_thread_fence(memory_order_acquire);
+          unsigned long gen2 =
+              atomic_load_explicit(&table->gen, memory_order_relaxed);
+          if (__builtin_expect(gen1 == gen2, 1)) {
+              return result;
+          }
+        }
+    }
+#endif
+
+    lock_table(table);
+    struct bucket *bucket = hyper_table_lookup_locked(table, key);
+    void *value = bucket ? bucket->value : 0;
+    unlock_table(table);
+    return value;
+}
+
+static struct bucket *
+hyper_table_lookup_locked(struct hyper_table *table, const void *key_p)
+{
+    int log_capacity =
+        atomic_load_explicit(&table->log_capacity, memory_order_relaxed);
+    struct bucket *buckets =
+        atomic_load_explicit(BUCKET(table, log_capacity), memory_order_relaxed);
+    uintptr_t key = (uintptr_t)key_p;
+    return find_bucket(buckets, log_capacity, calc_hash(key), key);
+}
+
+void hyper_table_iter(struct hyper_table *table,
+                          void (*fn)(void *, const void *, void *),
+                          void *arg)
+{
+    int error = pthread_mutex_lock(&table->lock);
+    assert(!error);
+    int log_capacity =
+        atomic_load_explicit(&table->log_capacity, memory_order_relaxed);
+    struct bucket *buckets =
+        atomic_load_explicit(BUCKET(table, log_capacity), memory_order_relaxed);
+    index_t size = table->entries;
+    struct bucket tmp[size];
+    index_t out = 0;
+    index_t capacity = (index_t)1 << log_capacity;
+    for (index_t i = 0; i < capacity; ++i) {
+        if (buckets[i].key != EMPTY && buckets[i].key != DELETED)
+          tmp[out++] = buckets[i];
+    }
+    assert(out == table->entries);
+    pthread_mutex_unlock(&table->lock);
+    for (index_t i = 0; i < size; ++i)
+        fn(arg, (const void *)tmp[i].key, tmp[i].value);
+}
+
+#if HYPER_TABLE_DEBUG
+void hyper_table_dump(FILE *out, const struct hyper_table *table)
+{
+    int log_capacity =
+        atomic_load_explicit(&table->log_capacity, memory_order_acquire);
+    struct bucket *buckets =
+        atomic_load_explicit(BUCKET(table, log_capacity), memory_order_consume);
+    index_t capacity = (index_t)1 << log_capacity;
+    fprintf(out,
+              "Table %p size %lu capacity %lu waste %lu rehash %u gen %lu\n",
+              table, (unsigned long)table->entries,
+              (unsigned long)capacity,
+              (unsigned long)table->waste,
+              table->rehashes, table->gen);
+    for (index_t i = 0; i < capacity; ++i) {
+        if (buckets[i].key == EMPTY)
+            continue;
+        if (buckets[i].key == DELETED) {
+            fprintf(out, "[%5u] = link\n", (unsigned)i);
+            continue;
+        }
+        index_t target = calc_hash((uintptr_t)buckets[i].key) & (capacity - 1);
+        fprintf(out, "[%5u]: %p -> %p", (unsigned)i,
+                (void *)buckets[i].key, buckets[i].value);
+        if (target != i)
+            fprintf(out, " (target %3lu)", (unsigned long)target);
+        fputc('\n', out);
+    }
+    fflush(out);
+}
+#endif
+
+/* Once global_table is non-null its value will not change.
+
+     In order to ensure that readers of the table see the
+     initialization of the mutex, the value is published
+     with release semantics and loaded with consume semantics.
+     Consume order tells the compiler to tell the processor
+     not to allow any loads based off of [table] to be moved
+     before [table] is loaded.  In practice (1) most processors
+     do this automatically, (2) the stupid compiler promotes
+     consume to acquire anyway.
+
+     This ordering has no effect on x86 code generation: atomic
+     compare and exchange is always a full barrier, and causality
+     prevents any accesses based off of [global_table] from being
+     moved before the load.
+
+     TODO: Performance should be tested on ARM. */
+// FIXME: Memory leak of *global_table.
+static struct hyper_table *_Atomic global_table;
+
+struct hyper_table *hyper_table_get_or_create(size_t capacity)
+{
+    struct hyper_table *table =
+        atomic_load_explicit(&global_table, memory_order_consume);
+    if (!ALLOC_FAILED(table))
+        return table;
+    table = hyper_table_create(capacity);
+    if (ALLOC_FAILED(table))
+        return 0;
+    /* If [global_table] is still null, store [table] into [global_table].
+         Otherwise, copy [global_table] into [tmp].  */
+    struct hyper_table *tmp = 0;
+    if (__c11_atomic_compare_exchange_strong(&global_table, &tmp, table,
+                                             memory_order_release,
+                                             memory_order_consume))
+        return table;
+    hyper_table_destroy(table);
+    return tmp;
+}
+
+/* A simple two entry cache.  In order to prevent an ABA problem a
+     cache lookup reads a word from the parent table to check whether
+     the table has changed.  If a lookup races with a entry creation
+     or deletion the result is undefined.
+
+     The structure needs to fill a cache line to prevent false sharing.  */
+struct hyper_table_cache {
+    struct hyper_table *parent;
+#if ENABLE_CACHE
+    unsigned long gen;
+    struct bucket entry[2];
+    unsigned int count;
+#endif
+} __attribute__((aligned(CACHE_LINE)));
+
+static void hyper_table_cache_invalidate(struct hyper_table_cache *c)
+{
+#if ENABLE_CACHE
+    c->entry[0].key = 0;
+    c->entry[0].value = 0;
+    c->entry[1].key = 0;
+    c->entry[1].value = 0;
+    c->count = 0; /* any value will do */
+    c->gen = 1; /* 1 is never valid because the busy bit is set */
+#endif
+}
+
+struct hyper_table_cache *hyper_table_cache_create(struct hyper_table *parent)
+{
+    struct hyper_table_cache *c =
+        hyper_aligned_alloc(__alignof__(struct hyper_table_cache),
+                            sizeof(struct hyper_table_cache));
+    if (ALLOC_FAILED(c))
+        return 0;
+    c->parent = parent;
+    hyper_table_cache_invalidate(c);
+    atomic_fetch_add_explicit(&parent->caches, 1, memory_order_acquire);
+    return c;
+}
+
+void hyper_table_cache_destroy(struct hyper_table_cache *c)
+{
+    struct hyper_table *parent = c->parent;
+    c->parent = 0;
+    hyper_table_cache_invalidate(c);
+    atomic_fetch_sub_explicit(&parent->caches, 1, memory_order_release);
+    free(c);
+}
+
+static void *
+hyper_table_cache_lookup_slow(struct hyper_table_cache *cache,
+                                  struct hyper_table *table,
+                                  const void *key_p)
+{
+    lock_table(table);
+    struct bucket *bucket = hyper_table_lookup_locked(table, key_p);
+    /* Relaxed order is fine with the lock held. */
+    unsigned long gen = atomic_load_explicit(&table->gen, memory_order_relaxed);
+    void *value = 0;
+    if (bucket) {
+#if ENABLE_CACHE
+        unsigned int e = 1U & ++cache->count;
+        cache->entry[e] = *bucket;
+        cache->gen = gen;
+#endif
+        value = bucket->value;
+    }
+    unlock_table(table);
+    return value;
+}
+
+void *hyper_table_cache_lookup(struct hyper_table_cache *cache, const void *key_p)
+{
+    if (__builtin_expect(!key_p, 0))
+        return 0;
+
+    /* On memory ordering:
+
+         Table writers guarantee that there are no writes to the table
+         between a write of table->gen with low bit clear and the next
+         write to table->gen.  Writes to table->gen with low bit clear
+         have release semantics.
+
+         The first load of table->gen can find the low bit set or clear.
+
+         If the bit is clear, the acquire pairs with the store-release of
+         the last write to table->gen to ensure the table is consistent.
+
+         If the bit is set, a lock is taken to ensure consistency with
+         writers.  Writers also take the lock.
+
+         Following a load of table->gen with low bit clear, a second
+         load is issued at the end of the lookup fast path.  If it finds
+         a different value, a lock is taken as above.
+
+         What remains is to ensure that if both loads of table->gen
+         return the same value then values read in between them are
+         a consistent view of the table with no writes to it.
+
+         An acquire fence before the second load pairs with the store-release
+         of the new value of table->gen.  If the load of table->gen does not
+         see the new value, then none of the earlier loads saw stores that
+         preceded the write to table->gen. */
+
+    struct hyper_table *table = cache->parent;
+
+    /* The cache hit case can use a relaxed load because it makes
+         no other accesses to the main table.  The cache miss flow
+         depends on acquire semantics. */
+    unsigned long gen1 =
+        atomic_load_explicit(&table->gen, memory_order_acquire);
+
+#if ENABLE_CACHE
+    if (__builtin_expect(cache->gen == gen1, 1)) {
+        uintptr_t key0 = cache->entry[0].key;
+        uintptr_t key1 = cache->entry[1].key;
+        if (key0 == (uintptr_t)key_p)
+            return cache->entry[0].value;
+        if (key1 == (uintptr_t)key_p)
+            return cache->entry[1].value;
+    } else {
+        hyper_table_cache_invalidate(cache);
+    }
+#endif
+
+#if LOCK_FREE_LOOKUP
+    /* Attempt lock-free lookup first.  */
+    if (!BUSY(gen1)) {
+        /* Arguably the load of log_capacity should have memory_order_consume, but
+           1: That only matters for a few unsupported DEC ALPHA chips
+           (where stores issued in order remotely may appear out of order locally).
+           2: Consume is promoted to acquire, which has a cost. */
+        int log_capacity =
+            atomic_load_explicit(&table->log_capacity, memory_order_relaxed);
+        struct bucket *buckets =
+            atomic_load_explicit(BUCKET(table, log_capacity),
+                                 memory_order_relaxed);
+        uintptr_t key = (uintptr_t)key_p;
+        struct bucket *bucket =
+            find_bucket(buckets, log_capacity, calc_hash(key), key);
+        if (!NO_BUCKET(bucket)) {
+#if ENABLE_CACHE
+          /* Optimistically save the value in the cache.  The cache invalidate
+             call below will clean up if the value is incorrect. */
+          unsigned int e = 1U & ++cache->count;
+          cache->entry[e] = *bucket;
+          cache->gen = gen1;
+#endif
+          void *result = bucket->value;
+          /* See comment above on memory ordering. */
+          atomic_thread_fence(memory_order_acquire);
+          unsigned long gen2 =
+              atomic_load_explicit(&table->gen, memory_order_relaxed);
+          if (__builtin_expect(gen1 == gen2, 1)) {
+              return result;
+          }
+        }
+    }
+#endif
+
+    return hyper_table_cache_lookup_slow(cache, table, key_p);
+}
+
+void *hyper_table_cache_remove(struct hyper_table_cache *cache, const void *key)
+{
+    if (__builtin_expect(!key, 0))
+        return 0;
+    struct hyper_table *table = cache->parent;
+    lock_table(table);
+    hyper_table_cache_invalidate(cache);
+    void *value = hyper_table_remove_locked(table, key);
+    unlock_table(table);
+    return value;
+}
+
+enum hyper_table_error
+hyper_table_cache_insert(struct hyper_table_cache *cache, const void *key,
+                             void *value)
+{
+    if (__builtin_expect(!key, 0) || __builtin_expect(!value, 0))
+        return HYPER_NULL;
+    struct hyper_table *table = cache->parent;
+    lock_table(table);
+    enum hyper_table_error error =
+        hyper_table_insert_locked(table, key, value);
+    unsigned long gen = atomic_load_explicit(&table->gen, memory_order_relaxed);
+    unlock_table(table);
+#if ENABLE_CACHE
+    /* Reset the cache to hold only the newly added entry in slot 0,
+         with slot 1 being the next used. */
+    cache->count = 0;
+    cache->gen = gen;
+    cache->entry[0].key = (uintptr_t)key;
+    cache->entry[0].value = value;
+    cache->entry[1].key = 0;
+    cache->entry[1].value = 0;
+#endif
+    return error;
+}
+
+#if HYPER_TABLE_CODEGEN
+void copy_debug(struct bucket *restrict to_ptr, index_t to_size,
+                    const struct bucket *restrict from_ptr, index_t from_size,
+                    index_t expected)
+{
+    copy(to_ptr, to_size, from_ptr, from_size, expected);
+}
+
+struct bucket *find_bucket_debug(struct bucket *buckets, int log_capacity,
+                                     index_t hash, uintptr_t key)
+{
+    return find_bucket(buckets, log_capacity, hash, key);
+}
+
+unsigned long mark_busy_debug(struct hyper_table *table)
+{
+    return mark_busy(table);
+}
+#endif
+
+const char *hyper_table_error_string(enum hyper_table_error code)
+{
+    switch (code) {
+    case HYPER_OK: return "no error";
+    case HYPER_NOT_FOUND: return "key not found";
+    case HYPER_NULL: return "null key";
+    case HYPER_NOMEM: return "out of memory";
+    case HYPER_FULL: return "table full";
+    default: return "unknown error";
+    }
+}
+
+size_t hyper_table_size(const struct hyper_table *table)
+{
+    return table->entries;
+}
+
+size_t hyper_table_index(const struct hyper_table *table, const void *key)
+{
+    int log_capacity =
+        atomic_load_explicit(&table->log_capacity, memory_order_relaxed);
+    return calc_hash((uintptr_t)key) & (((index_t)1 << log_capacity) - 1);
+}
+
+size_t hyper_table_capacity(const struct hyper_table *table)
+{
+    int log_capacity =
+        atomic_load_explicit(&table->log_capacity, memory_order_relaxed);
+    return (index_t)1 << log_capacity;
+}
diff --git a/runtime/hypertable.h b/runtime/hypertable.h
new file mode 100644
index 00000000..d5b379d4
--- /dev/null
+++ b/runtime/hypertable.h
@@ -0,0 +1,108 @@
+#ifdef HYPER_TABLE_DEBUG
+#include <stdio.h>
+#endif
+#include <stdlib.h>
+
+#ifdef HYPER_TABLE_HIDDEN
+#define HYPER_TABLE_HIDE __attribute__((visibility("hidden"), nothrow))
+#define HYPER_TABLE_OP __attribute__((visibility("hidden"), nothrow, nonnull(1)))
+#define HYPER_TABLE_CHECK __attribute__((visibility("hidden"), nothrow, warn_unused_result, nonnull(1)))
+#define HYPER_TABLE_ALLOC __attribute__((visibility("hidden"), nothrow, malloc))
+#else
+#define HYPER_TABLE_HIDE  __attribute__((nothrow))
+#define HYPER_TABLE_OP __attribute__((nothrow, nonnull(1)))
+#define HYPER_TABLE_CHECK __attribute__((warn_unused_result, nothrow, nonnull(1)))
+#define HYPER_TABLE_ALLOC __attribute__((malloc, nothrow))
+#endif
+
+enum hyper_table_error {
+    HYPER_OK,
+    HYPER_NOT_FOUND,
+    HYPER_FULL,
+    HYPER_NULL,     /* user error: null key */
+    HYPER_NOMEM,    /* unable to allocate memory */
+};
+
+struct hyper_table;
+
+/* Get the unique global hyperobject table, creating it if it does
+   not already exist. */
+HYPER_TABLE_HIDE
+struct hyper_table *hyper_table_get_or_create(size_t capacity);
+
+/* Create a new hyperobject table. */
+HYPER_TABLE_ALLOC
+struct hyper_table *hyper_table_create(size_t capacity);
+
+/* Destroy a hyperobject table created by hyper_table_create. */
+HYPER_TABLE_OP
+void hyper_table_destroy(struct hyper_table *);
+
+/* Insert a new entry.  The key must not be in the table already. */
+HYPER_TABLE_CHECK
+enum hyper_table_error
+hyper_table_insert(struct hyper_table *, const void *key, void *value);
+
+/* Remove a key and return the old value.  The key must be in the table. */
+HYPER_TABLE_OP
+void *hyper_table_remove(struct hyper_table *, const void *key);
+
+/* Return the value for a key, or null if it is not present. */
+HYPER_TABLE_OP
+void *hyper_table_lookup(struct hyper_table *, const void *key);
+
+/* Return the number of keys in the table. */
+HYPER_TABLE_OP
+size_t hyper_table_size(const struct hyper_table *);
+
+/* Apply a function to every table entry. */
+HYPER_TABLE_OP
+void hyper_table_iter(struct hyper_table *,
+                      void (*fn)(void *, const void *, void *),
+                      void *);
+
+/* Return the current bucket list length, which will not be less
+   than hyper_table_size().  This is intended for testing. */
+HYPER_TABLE_OP
+size_t hyper_table_capacity(const struct hyper_table *);
+
+/* Return the index where a key belongs.  The value will be less
+   than hyper_table_capacity().  This is intended for testing. */
+HYPER_TABLE_OP
+size_t hyper_table_index(const struct hyper_table *, const void *);
+
+#if HYPER_TABLE_DEBUG
+/* Print a text representation of the table. */
+HYPER_TABLE_HIDE
+void hyper_table_dump(FILE *out, const struct hyper_table *table);
+#endif
+
+struct hyper_table_cache;
+
+/* Create, destroy, insert, lookup, and remove work like the
+   functions above except they use a cache. */
+
+HYPER_TABLE_CHECK
+struct hyper_table_cache *hyper_table_cache_create(struct hyper_table *);
+
+HYPER_TABLE_OP
+void hyper_table_cache_destroy(struct hyper_table_cache *);
+
+HYPER_TABLE_CHECK
+enum hyper_table_error
+hyper_table_cache_insert(struct hyper_table_cache *, const void *key,
+                         void *value);
+
+HYPER_TABLE_OP
+void *hyper_table_cache_lookup(struct hyper_table_cache *, const void *key);
+
+HYPER_TABLE_OP
+void *hyper_table_cache_remove(struct hyper_table_cache *, const void *key);
+
+HYPER_TABLE_HIDE
+const char *hyper_table_error_string(enum hyper_table_error error)
+    __attribute__((returns_nonnull));
+
+#undef HYPER_TABLE_HIDE
+#undef HYPER_TABLE_CHECK
+#undef HYPER_TABLE_ALLOC
diff --git a/runtime/init.c b/runtime/init.c
index a07f3ad7..f76c5e7f 100644
--- a/runtime/init.c
+++ b/runtime/init.c
@@ -19,9 +19,11 @@
 #endif
 #include <unistd.h>
 
+#include "cilk-internal.h"
 #include "debug.h"
 #include "fiber.h"
 #include "global.h"
+#include "hypertable.h"
 #include "init.h"
 #include "local.h"
 #include "readydeque.h"
@@ -31,31 +33,35 @@
 
 #include "reducer_impl.h"
 
-extern __thread bool is_boss_thread;
-
-#ifdef __FreeBSD__
+#if defined __FreeBSD__ && __FreeBSD__ < 13
 typedef cpuset_t cpu_set_t;
 #endif
 
-static local_state *worker_local_init(global_state *g) {
-    local_state *l = (local_state *)calloc(1, sizeof(local_state));
+static local_state *worker_local_init(local_state *l, global_state *g) {
     l->shadow_stack = (__cilkrts_stack_frame **)calloc(
         g->options.deqdepth, sizeof(struct __cilkrts_stack_frame *));
     for (int i = 0; i < JMPBUF_SIZE; i++) {
         l->rts_ctx[i] = NULL;
     }
+    l->hyper_table =
+        g->hyper_table ? hyper_table_cache_create(g->hyper_table) : NULL;
     l->fiber_to_free = NULL;
+    l->ext_fiber_to_free = NULL;
     l->state = WORKER_IDLE;
-    l->lock_wait = false;
     l->provably_good_steal = false;
     l->rand_next = 0; /* will be reset in scheduler loop */
-    l->index_to_worker =
-        (worker_id *)calloc(g->options.nproc, sizeof(worker_id));
     cilk_sched_stats_init(&(l->stats));
 
     return l;
 }
 
+static void worker_local_destroy(local_state *l, global_state *g) {
+    if (l->hyper_table) {
+        hyper_table_cache_destroy(l->hyper_table);
+        l->hyper_table = NULL;
+    }
+}
+
 static void deques_init(global_state *g) {
     cilkrts_alert(BOOT, NULL, "(deques_init) Initializing deques");
     for (unsigned int i = 0; i < g->options.nproc; i++) {
@@ -69,30 +75,49 @@ static void deques_init(global_state *g) {
 static void workers_init(global_state *g) {
     cilkrts_alert(BOOT, NULL, "(workers_init) Initializing workers");
     for (unsigned int i = 0; i < g->options.nproc; i++) {
-        cilkrts_alert(BOOT, NULL, "(workers_init) Initializing worker %u", i);
-        __cilkrts_worker *w = (__cilkrts_worker *)cilk_aligned_alloc(
-            __alignof__(__cilkrts_worker), sizeof(__cilkrts_worker));
-        w->self = i;
-        w->g = g;
-        w->l = worker_local_init(g);
-
-        w->ltq_limit = w->l->shadow_stack + g->options.deqdepth;
-        g->workers[i] = w;
-        __cilkrts_stack_frame **init = w->l->shadow_stack + 1;
-        atomic_store_explicit(&w->tail, init, memory_order_relaxed);
-        atomic_store_explicit(&w->head, init, memory_order_relaxed);
-        atomic_store_explicit(&w->exc, init, memory_order_relaxed);
-        w->current_stack_frame = NULL;
-        w->reducer_map = NULL;
-        // initialize internal malloc first
-        cilk_internal_malloc_per_worker_init(w);
+        if (i == 0) {
+            // Initialize worker 0, so we always have a worker structure to fall
+            // back on.
+            __cilkrts_init_tls_worker(0, g);
+        }
 
         // Initialize index-to-worker map entry for this worker.
+        g->worker_args[i].id = i;
+        g->worker_args[i].g = g;
         g->index_to_worker[i] = i;
         g->worker_to_index[i] = i;
     }
 }
 
+__cilkrts_worker *__cilkrts_init_tls_worker(worker_id i, global_state *g) {
+    cilkrts_alert(BOOT, NULL, "(workers_init) Initializing worker %u", i);
+    size_t alignment = 2 * __alignof__(__cilkrts_worker);
+    void *mem = cilk_aligned_alloc(
+        alignment, round_size_to_alignment(alignment, sizeof(__cilkrts_worker) +
+                                                          sizeof(local_state)));
+    __cilkrts_worker *w = (__cilkrts_worker *)mem;
+    w->self = i;
+    w->extension = NULL;
+    w->ext_stack = NULL;
+    w->g = g;
+    w->l = worker_local_init(mem + sizeof(__cilkrts_worker), g);
+
+    w->ltq_limit = w->l->shadow_stack + g->options.deqdepth;
+    g->workers[i] = w;
+    __cilkrts_stack_frame **init = w->l->shadow_stack + 1;
+    atomic_store_explicit(&w->tail, init, memory_order_relaxed);
+    atomic_store_explicit(&w->head, init, memory_order_relaxed);
+    atomic_store_explicit(&w->exc, init, memory_order_relaxed);
+    w->current_stack_frame = NULL;
+    w->reducer_map = NULL;
+    // initialize internal malloc first
+    cilk_internal_malloc_per_worker_init(w);
+    // zero-initialize the worker's fiber pool.
+    cilk_fiber_pool_per_worker_zero_init(w);
+
+    return w;
+}
+
 #ifdef CPU_SETSIZE
 static void move_bit(int cpu, cpu_set_t *to, cpu_set_t *from) {
     if (CPU_ISSET(cpu, from)) {
@@ -185,7 +210,7 @@ static void threads_init(global_state *g) {
             ;
     for (int w = worker_start; w < n_threads; w++) {
         int status = pthread_create(&g->threads[w], NULL, scheduler_thread_proc,
-                                    g->workers[w]);
+                                    &g->worker_args[w]);
 
         if (status != 0)
             cilkrts_bug(NULL, "Cilk: thread creation (%u) failed: %s", w,
@@ -225,11 +250,11 @@ global_state *__cilkrts_startup(int argc, char *argv[]) {
     cilkrts_alert(BOOT, NULL, "(__cilkrts_startup) argc %d", argc);
     global_state *g = global_state_init(argc, argv);
     reducers_init(g);
-    __cilkrts_init_tls_variables();
+    /* __cilkrts_init_tls_variables(); */
     workers_init(g);
     deques_init(g);
     CILK_ASSERT_G(0 == g->exiting_worker);
-    reducers_import(g, g->workers[g->exiting_worker]);
+    reducers_import(g, g->workers[0]);
 
     // Create the root closure and a fiber to go with it.  Use worker 0 to
     // allocate the closure and fiber.
@@ -315,14 +340,14 @@ static inline __attribute__((noinline)) void boss_wait_helper(void) {
     // function arguments and local variables in this function.  Get
     // fresh copies of these arguments from the runtime's global
     // state.
-    global_state *g = tls_worker->g;
+    global_state *g = __cilkrts_tls_worker->g;
     __cilkrts_stack_frame *sf = g->root_closure->frame;
     CILK_BOSS_START_TIMING(g);
 
 #if !BOSS_THIEF
-    worker_id self = tls_worker->self;
+    worker_id self = __cilkrts_tls_worker->self;
 #endif
-    tls_worker = NULL;
+    __cilkrts_tls_worker = NULL;
 
 #if !BOSS_THIEF
     // Wake up the worker the boss was impersonating, to let it take
@@ -334,8 +359,13 @@ static inline __attribute__((noinline)) void boss_wait_helper(void) {
     wait_until_cilk_done(g);
 
 #if BOSS_THIEF
-    g->workers[0]->reducer_map = g->workers[g->exiting_worker]->reducer_map;
-    g->workers[g->exiting_worker]->reducer_map = NULL;
+    __cilkrts_worker **workers = g->workers;
+    __cilkrts_worker *w0 = workers[0];
+    __cilkrts_worker *wexit = workers[g->exiting_worker];
+    w0->reducer_map = wexit->reducer_map;
+    wexit->reducer_map = NULL;
+    w0->extension = wexit->extension;
+    wexit->extension = NULL;
     g->exiting_worker = 0;
 #endif
 
@@ -357,13 +387,10 @@ static inline __attribute__((noinline)) void boss_wait_helper(void) {
 
 // Setup runtime structures to start a new Cilkified region.  Executed by the
 // Cilkifying thread in cilkify().
-void __cilkrts_internal_invoke_cilkified_root(global_state *g,
-                                              __cilkrts_stack_frame *sf) {
+void __cilkrts_internal_invoke_cilkified_root(__cilkrts_stack_frame *sf) {
+    global_state *g = default_cilkrts;
+
     CILK_ASSERT_G(!__cilkrts_get_tls_worker());
-    /* CILK_ASSERT_G( */
-    /*     !atomic_load_explicit(&g->start_thieves, memory_order_acquire)); */
-    /* CILK_ASSERT_G( */
-    /*     !atomic_load_explicit(&g->start_thieves_futex, memory_order_acquire)); */
 
     // Start the workers if necessary
     if (__builtin_expect(!g->workers_started, false)) {
@@ -376,17 +403,29 @@ void __cilkrts_internal_invoke_cilkified_root(global_state *g,
         // rts_srand(g->workers[0], (0 + 1) * 162347);
         g->workers[0]->l->rand_next = 162347;
 #endif
+        if (USE_EXTENSION) {
+            g->root_closure->ext_fiber =
+                cilk_fiber_allocate(g->workers[0], g->options.stacksize);
+        }
         is_boss_thread = true;
     }
 
     // The boss thread will impersonate the last exiting worker until it tries
     // to become a thief.
 #if BOSS_THIEF
-    tls_worker = g->workers[0];
+    __cilkrts_tls_worker = g->workers[0];
 #else
-    tls_worker = g->workers[g->exiting_worker];
+    __cilkrts_tls_worker = g->workers[g->exiting_worker];
 #endif
-    CILK_START_TIMING(tls_worker, INTERVAL_CILKIFY_ENTER);
+    if (USE_EXTENSION) {
+        // Initialize sf->extension, to appease the later call to
+        // setup_for_execution.
+        sf->extension = __cilkrts_tls_worker->extension;
+        // Initialize worker->ext_stack.
+        __cilkrts_tls_worker->ext_stack =
+            sysdep_get_stack_start(g->root_closure->ext_fiber);
+    }
+    CILK_START_TIMING(__cilkrts_tls_worker, INTERVAL_CILKIFY_ENTER);
 
     // Mark the root closure as not initialized
     g->root_closure_initialized = false;
@@ -413,6 +452,8 @@ void __cilkrts_internal_invoke_cilkified_root(global_state *g,
     // flags.
 
     /* reset_disengaged_var(g); */
+    CILK_ASSERT_G(!atomic_load_explicit(&g->cilkified, memory_order_relaxed) &&
+                  "OpenCilk runtime already executing a Cilk computation.");
     set_cilkified(g);
 
     // Set g->done = 0, so Cilk workers will continue trying to steal.
@@ -428,8 +469,9 @@ void __cilkrts_internal_invoke_cilkified_root(global_state *g,
     /* request_more_thieves(g, g->nworkers); */
 
     if (__builtin_setjmp(g->boss_ctx) == 0) {
-        CILK_SWITCH_TIMING(tls_worker, INTERVAL_CILKIFY_ENTER, INTERVAL_SCHED);
-        do_what_it_says_boss(tls_worker, g->root_closure);
+        CILK_SWITCH_TIMING(__cilkrts_tls_worker, INTERVAL_CILKIFY_ENTER,
+                           INTERVAL_SCHED);
+        do_what_it_says_boss(__cilkrts_tls_worker, g->root_closure);
     } else {
         // The stack on which
         // __cilkrts_internal_invoke_cilkified_root() was called may
@@ -453,6 +495,7 @@ void __cilkrts_internal_exit_cilkified_root(global_state *g,
     // exiting_worker.
     worker_id self = w->self;
     g->exiting_worker = self;
+    ReadyDeque *deques = g->deques;
 
     // Mark the computation as done.  Also "sleep" the workers: update global
     // flags so workers who exit the work-stealing loop will return to waiting
@@ -476,11 +519,11 @@ void __cilkrts_internal_exit_cilkified_root(global_state *g,
     // Cilkified region to start with an empty deque.  We go ahead and grab the
     // deque lock to make sure no other worker has a lingering pointer to the
     // closure.
-    deque_lock_self(w);
-    g->deques[w->self].bottom = (Closure *)NULL;
-    g->deques[w->self].top = (Closure *)NULL;
+    deque_lock_self(deques, w);
+    deques[self].bottom = (Closure *)NULL;
+    deques[self].top = (Closure *)NULL;
     WHEN_CILK_DEBUG(g->root_closure->owner_ready_deque = NO_WORKER);
-    deque_unlock_self(w);
+    deque_unlock_self(deques, w);
 
     // Clear the flags in sf.  This routine runs before leave_frame in a Cilk
     // function, but leave_frame is executed conditionally in Cilk functions
@@ -494,13 +537,18 @@ void __cilkrts_internal_exit_cilkified_root(global_state *g,
         // We finished the computation on the boss thread.  No need to jump to
         // the runtime in this case; just return normally.
         /* CILK_ASSERT(w, w->l->fiber_to_free == NULL); */
-        if (w->l->fiber_to_free) {
-            cilk_fiber_deallocate_to_pool(w, w->l->fiber_to_free);
+        local_state *l = w->l;
+        if (l->fiber_to_free) {
+            cilk_fiber_deallocate_to_pool(w, l->fiber_to_free);
+            l->fiber_to_free = NULL;
+        }
+        if (l->ext_fiber_to_free) {
+            cilk_fiber_deallocate_to_pool(w, l->ext_fiber_to_free);
+            l->ext_fiber_to_free = NULL;
         }
-        w->l->fiber_to_free = NULL;
         atomic_store_explicit(&g->cilkified, 0, memory_order_release);
-        w->l->state = WORKER_IDLE;
-        tls_worker = NULL;
+        l->state = WORKER_IDLE;
+        __cilkrts_tls_worker = NULL;
 
         // Restore the boss's original rsp, so the boss completes the Cilk
         // function on its original stack.
@@ -537,6 +585,8 @@ static void global_state_deinit(global_state *g) {
     pthread_cond_destroy(&g->start_root_worker_cond_var);
     pthread_mutex_destroy(&g->disengaged_lock);
     pthread_cond_destroy(&g->disengaged_cond_var);
+    free(g->worker_args);
+    g->worker_args = NULL;
     free(g->workers);
     g->workers = NULL;
     g->nworkers = 0;
@@ -569,6 +619,7 @@ static void worker_terminate(__cilkrts_worker *w, void *data) {
     if (rm) {
         cilkred_map_destroy_map(w, rm);
     }
+    worker_local_destroy(w->l, w->g);
     cilk_internal_malloc_per_worker_terminate(w); // internal malloc last
 }
 
@@ -605,12 +656,11 @@ static void workers_deinit(global_state *g) {
     while (i-- > 0) {
         __cilkrts_worker *w = g->workers[i];
         g->workers[i] = NULL;
+        if (!w)
+            continue;
         cilk_internal_malloc_per_worker_destroy(w); // internal malloc last
         free(w->l->shadow_stack);
         w->l->shadow_stack = NULL;
-        free(w->l->index_to_worker);
-        w->l->index_to_worker = NULL;
-        free(w->l);
         w->l = NULL;
         free(w);
     }
@@ -628,6 +678,8 @@ CHEETAH_INTERNAL void __cilkrts_shutdown(global_state *g) {
 
     // Deallocate the root closure and its fiber
     cilk_fiber_deallocate_global(g, g->root_closure->fiber);
+    if (USE_EXTENSION)
+        cilk_fiber_deallocate_global(g, g->root_closure->ext_fiber);
     Closure_destroy_global(g, g->root_closure);
 
     // Cleanup the global state
diff --git a/runtime/init.h b/runtime/init.h
index 34c55720..2df4f8b3 100644
--- a/runtime/init.h
+++ b/runtime/init.h
@@ -3,7 +3,8 @@
 
 #include "cilk-internal.h"
 
-void __cilkrts_internal_invoke_cilkified_root(global_state *g, __cilkrts_stack_frame *sf);
+// For invoke, the global state is implied.
+void __cilkrts_internal_invoke_cilkified_root(__cilkrts_stack_frame *sf);
 void __cilkrts_internal_exit_cilkified_root(global_state *g, __cilkrts_stack_frame *sf);
 
 // Used by Cilksan to set nworkers to 1 and force reduction
diff --git a/runtime/internal-malloc.h b/runtime/internal-malloc.h
index 2bde0900..a512ce0a 100644
--- a/runtime/internal-malloc.h
+++ b/runtime/internal-malloc.h
@@ -18,6 +18,12 @@ enum im_tag {
 
 CHEETAH_INTERNAL const char *name_for_im_tag(enum im_tag);
 
+/* Helper routine to round sizes to alignments, for use with cilk_aligned_alloc.
+ */
+static inline size_t round_size_to_alignment(size_t alignment, size_t size) {
+    return ((size + alignment - 1) / alignment) * alignment;
+}
+
 /* Custom implementation of aligned_alloc. */
 static inline void *cilk_aligned_alloc(size_t alignment, size_t size) {
 #if defined(_ISOC11_SOURCE)
diff --git a/runtime/local.h b/runtime/local.h
index 0be88c9f..aafd7947 100644
--- a/runtime/local.h
+++ b/runtime/local.h
@@ -3,20 +3,23 @@
 
 #include <stdbool.h>
 
+#include "internal-malloc-impl.h" /* for cilk_im_desc */
+
+struct hyper_table_cache;
+
 struct local_state {
     struct __cilkrts_stack_frame **shadow_stack;
+    struct hyper_table_cache *hyper_table;
 
     unsigned short state; /* __cilkrts_worker_state */
-    bool lock_wait;
     bool provably_good_steal;
     unsigned int rand_next;
-    // Local copy of the index-to-worker map.
-    worker_id *index_to_worker;
 
     jmpbuf rts_ctx;
     struct cilk_fiber_pool fiber_pool;
     struct cilk_im_desc im_desc;
     struct cilk_fiber *fiber_to_free;
+    struct cilk_fiber *ext_fiber_to_free;
     struct sched_stats stats;
 };
 
diff --git a/runtime/pedigree-internal.h b/runtime/pedigree-internal.h
new file mode 100644
index 00000000..bd6a794e
--- /dev/null
+++ b/runtime/pedigree-internal.h
@@ -0,0 +1,73 @@
+#ifndef _PEDIGREE_INTERNAL_H
+#define _PEDIGREE_INTERNAL_H
+
+#include <stdlib.h>
+#include <cilk/cilk_api.h>
+
+#include "cilk-internal.h"
+
+static const uint64_t DPRNG_PRIME = (uint64_t)(-59);
+extern uint64_t *__pedigree_dprng_m_array;
+extern uint64_t __pedigree_dprng_seed;
+
+typedef struct __pedigree_frame {
+    __cilkrts_pedigree pedigree; // Fields for pedigrees.
+    int64_t rank;
+    uint64_t dprng_dotproduct;
+    int64_t dprng_depth;
+} __pedigree_frame;
+
+typedef struct __pedigree_frame_storage_t {
+    size_t next_pedigree_frame;
+    __pedigree_frame* frames;
+} __pedigree_frame_storage_t;
+
+
+///////////////////////////////////////////////////////////////////////////
+// Helper methods
+
+static inline __attribute__((malloc)) __pedigree_frame *
+push_pedigree_frame(__cilkrts_worker *w) {
+    return __cilkrts_push_ext_stack(w, sizeof(__pedigree_frame));
+}
+
+static inline void pop_pedigree_frame(__cilkrts_worker *w) {
+    __cilkrts_pop_ext_stack(w, sizeof(__pedigree_frame));
+}
+
+static inline uint64_t __cilkrts_dprng_swap_halves(uint64_t x) {
+  return (x >> (4 * sizeof(uint64_t))) | (x << (4 * sizeof(uint64_t)));
+}
+
+static inline uint64_t __cilkrts_dprng_mix(uint64_t x) {
+  for (int i = 0; i < 4; i++) {
+      x = x * (2*x+1);
+      x = __cilkrts_dprng_swap_halves(x);
+  }
+  return x;
+}
+
+static inline uint64_t __cilkrts_dprng_mix_mod_p(uint64_t x) {
+  x = __cilkrts_dprng_mix(x);
+  return x - (DPRNG_PRIME & -(x >= DPRNG_PRIME));
+}
+
+static inline uint64_t __cilkrts_dprng_sum_mod_p(uint64_t a, uint64_t b) {
+    uint64_t z = a + b;
+    if ((z < a) || (z >= DPRNG_PRIME)) {
+        z -= DPRNG_PRIME;
+    }
+    return z;
+}
+
+// Helper method to advance the pedigree and dprng states.
+static inline __attribute__((always_inline)) __pedigree_frame *
+bump_worker_rank(void) {
+    __pedigree_frame *frame = (__pedigree_frame *)(__cilkrts_get_extension());
+    frame->rank++;
+    frame->dprng_dotproduct = __cilkrts_dprng_sum_mod_p(
+        frame->dprng_dotproduct, __pedigree_dprng_m_array[frame->dprng_depth]);
+    return frame;
+}
+
+#endif // _PEDIGREE_INTERNAL_H
diff --git a/runtime/pedigree_ext.c b/runtime/pedigree_ext.c
new file mode 100644
index 00000000..b99d2cd2
--- /dev/null
+++ b/runtime/pedigree_ext.c
@@ -0,0 +1,49 @@
+#include "pedigree-internal.h"
+
+// Pedigree-extension code, included in the runtime as part of the bitcode file.
+
+void __cilkrts_extend_spawn(__cilkrts_worker *w, void **parent_extension,
+                            void **child_extension) {
+    // Copy the child extension into the parent, and create a new
+    // __pedigree_frame for the child.
+    *parent_extension = *child_extension;
+
+    // Get a new pedigree frame for the child extension.
+    __pedigree_frame *frame = push_pedigree_frame(w);
+    *child_extension = frame;
+
+    // Initialize the new frame.
+    __pedigree_frame *parent_frame = (__pedigree_frame *)(*parent_extension);
+    // Copy the parent's rank into the child frame's pedigree.rank.
+    frame->pedigree.rank = parent_frame->rank;
+    // Append the child frame's pedigree onto the linked list.
+    frame->pedigree.parent = &(parent_frame->pedigree);
+    // Initialize the child frame's rank to 0.
+    frame->rank = 0;
+
+    // Increment the dprng_depth in the child frame.
+    frame->dprng_depth = parent_frame->dprng_depth + 1;
+    // Update the child frame's dprng_dotproduct.
+    uint64_t parent_dprng_dotproduct = parent_frame->dprng_dotproduct;
+    frame->dprng_dotproduct = __cilkrts_dprng_sum_mod_p(
+        parent_dprng_dotproduct, __pedigree_dprng_m_array[frame->dprng_depth]);
+
+    // Update the rank and dprng_dotproduct in the parent frame.
+    parent_frame->rank++;
+    parent_frame->dprng_dotproduct = __cilkrts_dprng_sum_mod_p(
+        parent_dprng_dotproduct,
+        __pedigree_dprng_m_array[parent_frame->dprng_depth]);
+}
+
+void __cilkrts_extend_return_from_spawn(__cilkrts_worker *w, void **extension) {
+    // Free the pedigree frame.
+    pop_pedigree_frame(w);
+}
+
+void __cilkrts_extend_sync(void **extension) {
+    // Update the rank and dprng_dotproduct.
+    __pedigree_frame *frame = (__pedigree_frame *)(*extension);
+    frame->rank++;
+    frame->dprng_dotproduct = __cilkrts_dprng_sum_mod_p(
+        frame->dprng_dotproduct, __pedigree_dprng_m_array[frame->dprng_depth]);
+}
diff --git a/runtime/pedigree_globals.c b/runtime/pedigree_globals.c
index 5c17960d..a3d31b6c 100644
--- a/runtime/pedigree_globals.c
+++ b/runtime/pedigree_globals.c
@@ -1,44 +1,5 @@
-#include <stdatomic.h>
-#include <stdio.h>
-#include <stdlib.h>
-#define ENABLE_CILKRTS_PEDIGREE
-#include <cilk/cilk_api.h> 
-
-__cilkrts_pedigree cilkrts_root_pedigree_node;
-uint64_t DPRNG_PRIME = (uint64_t)(-59);
-uint64_t* dprng_m_array;
-uint64_t dprng_m_X = 0;
-
-uint64_t __cilkrts_dprng_swap_halves(uint64_t x) {
-  return (x >> (4 * sizeof(uint64_t))) | (x << (4 * sizeof(uint64_t)));
-}
-
-uint64_t __cilkrts_dprng_mix(uint64_t x) {
-  for (int i = 0; i < 4; i++) {
-      x = x * (2*x+1);
-      x = __cilkrts_dprng_swap_halves(x);
-  } 
-  return x;
-}
-
-uint64_t __cilkrts_dprng_mix_mod_p(uint64_t x) {
-  x = __cilkrts_dprng_mix(x);
-  return x -  (DPRNG_PRIME & -(x >= DPRNG_PRIME));
-}
-
-uint64_t __cilkrts_dprng_sum_mod_p(uint64_t a, uint64_t b) {
-  uint64_t z = a+b;
-  if ((z < a) || (z >= DPRNG_PRIME)) {
-      z -= DPRNG_PRIME;
-  }
-  return z;
-}
-
-void __cilkrts_init_dprng(void) {
-    dprng_m_array = (uint64_t*) malloc(sizeof(uint64_t*) * 4096);
-    for (int i = 0; i < 4096; i++) {
-      dprng_m_array[i] = __cilkrts_dprng_mix_mod_p(0x8c679c168e6bf733ul + i);
-    }
-    dprng_m_X = __cilkrts_dprng_mix_mod_p(0x8c679c168e6bf733ul + 4096);
-}
+#include "pedigree-internal.h"
 
+// This variable needs to be accessed both from the external pedigree library
+// and the pedigree-extension code in the core runtime library.
+uint64_t *__pedigree_dprng_m_array = NULL;
diff --git a/runtime/pedigree_lib.c b/runtime/pedigree_lib.c
new file mode 100644
index 00000000..66474964
--- /dev/null
+++ b/runtime/pedigree_lib.c
@@ -0,0 +1,90 @@
+#include "pedigree-internal.h"
+
+// External pedigree library code.  Linking this code with a Cilk program
+// enables pedigrees.
+
+////////////////////////////////////////////////////////////////////////////////
+// Global variables local to the library.
+
+uint64_t __pedigree_dprng_seed = 0x8c679c168e6bf733ul;
+uint64_t __pedigree_dprng_m_X = 0;
+CHEETAH_INTERNAL
+__pedigree_frame root_frame = {.pedigree = {.rank = 0, .parent = NULL},
+                               .rank = 0,
+                               .dprng_depth = 0,
+                               .dprng_dotproduct = 0};
+
+////////////////////////////////////////////////////////////////////////////////
+// Initialization and deinitialization
+
+CHEETAH_INTERNAL
+void __cilkrts_deinit_dprng(void) {
+    if (__pedigree_dprng_m_array) {
+        free(__pedigree_dprng_m_array);
+        __pedigree_dprng_m_array = NULL;
+    }
+}
+
+void __cilkrts_init_dprng(void) {
+    // TODO: Disallow __cilkrts_init_dprng() from being called in parallel.
+    if (!__pedigree_dprng_m_array) {
+        __pedigree_dprng_m_array =
+            (uint64_t *)malloc(sizeof(uint64_t *) * 4096);
+        atexit(__cilkrts_deinit_dprng);
+    }
+
+    for (int i = 0; i < 4096; i++) {
+        __pedigree_dprng_m_array[i] =
+            __cilkrts_dprng_mix_mod_p(__pedigree_dprng_seed + i);
+    }
+    __pedigree_dprng_m_X =
+        __cilkrts_dprng_mix_mod_p(__pedigree_dprng_seed + 4096);
+}
+
+CHEETAH_INTERNAL
+void __pedigree_init(void) {
+    root_frame.dprng_dotproduct = __pedigree_dprng_m_X;
+
+    __cilkrts_register_extension(&root_frame);
+}
+
+CHEETAH_INTERNAL
+__attribute__((constructor)) void __pedigree_startup(void) {
+    __cilkrts_init_dprng();
+
+    if (!__cilkrts_is_initialized())
+        __cilkrts_atinit(__pedigree_init);
+    else
+        __pedigree_init();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// API methods, callable from user code.
+//
+// These methods are included here so that, if a Cilk program attempts to use
+// one of these routines without incorporating this library, the user will get
+// sensible-looking linker errors.
+
+// Helper method to advance the pedigree and dprng states.
+void __cilkrts_bump_worker_rank(void) { bump_worker_rank(); }
+
+// Set the seed for the dprand DPRNG.
+void __cilkrts_dprand_set_seed(uint64_t seed) {
+    __pedigree_dprng_seed = seed;
+    __cilkrts_init_dprng();
+}
+
+// Get the current value of the dprand DPRNG.
+uint64_t __cilkrts_get_dprand(void) {
+    __pedigree_frame *frame = bump_worker_rank();
+    return __cilkrts_dprng_mix_mod_p(frame->dprng_dotproduct);
+}
+
+// Get the current pedigree, in the form of a pointer to its leaf node.
+__cilkrts_pedigree __cilkrts_get_pedigree(void) {
+    __cilkrts_pedigree ret_ped;
+    __pedigree_frame *frame = (__pedigree_frame *)(__cilkrts_get_extension());
+    ret_ped.parent = &(frame->pedigree);
+    ret_ped.rank = frame->rank;
+    return ret_ped;
+}
diff --git a/runtime/personality.c b/runtime/personality.c
index 187bd4f3..4ffa03b2 100644
--- a/runtime/personality.c
+++ b/runtime/personality.c
@@ -45,6 +45,7 @@ _Unwind_Reason_Code __cilk_personality_internal(
 
     __cilkrts_worker *w = __cilkrts_get_tls_worker();
     __cilkrts_stack_frame *sf = w->current_stack_frame;
+    ReadyDeque *deques = w->g->deques;
 
     if (actions & _UA_SEARCH_PHASE) {
         // don't do anything out of the ordinary during search phase.
@@ -60,8 +61,9 @@ _Unwind_Reason_Code __cilk_personality_internal(
             sysdep_save_fp_ctrl_state(sf);
 
             if (__builtin_setjmp(sf->ctx) == 0) {
-                deque_lock_self(w);
-                Closure *t = deque_peek_bottom(w, w->self);
+
+                deque_lock_self(deques, w);
+                Closure *t = deque_peek_bottom(deques, w, w->self);
 
                 // ensure that we return here after a cilk_sync.
                 t->parent_rsp = t->orig_rsp;
@@ -70,7 +72,7 @@ _Unwind_Reason_Code __cilk_personality_internal(
                 // set closure_exception
                 t->user_exn.exn = (char *)ue_header;
 
-                deque_unlock_self(w);
+                deque_unlock_self(deques, w);
 
                 // For now, use this flag to indicate that we are setjmping from
                 // the personality function. This will "disable" some asserts in
@@ -83,9 +85,9 @@ _Unwind_Reason_Code __cilk_personality_internal(
 
         // after longjmping back, the worker may have changed.
         w = __cilkrts_get_tls_worker();
-        deque_lock_self(w);
-        Closure *t = deque_peek_bottom(w, w->self);
-        deque_unlock_self(w);
+        deque_lock_self(deques, w);
+        Closure *t = deque_peek_bottom(deques, w, w->self);
+        deque_unlock_self(deques, w);
         bool in_reraised_cfa = (t->reraise_cfa == (char *)get_cfa(context));
         bool skip_leaveframe = ((t->reraise_cfa != NULL) && !in_reraised_cfa);
         if (in_reraised_cfa)
diff --git a/runtime/readydeque.h b/runtime/readydeque.h
index ba48eeb5..a4b7e1ce 100644
--- a/runtime/readydeque.h
+++ b/runtime/readydeque.h
@@ -26,55 +26,45 @@ struct ReadyDeque {
  * Management of ReadyDeques
  *********************************************************/
 
-static inline
-void deque_assert_ownership(__cilkrts_worker *const w, worker_id pn) {
-    CILK_ASSERT(w, w->g->deques[pn].mutex_owner == w->self);
+static inline void deque_assert_ownership(ReadyDeque *deques,
+                                          __cilkrts_worker *const w,
+                                          worker_id pn) {
+    CILK_ASSERT(w, deques[pn].mutex_owner == w->self);
 }
 
-static inline
-void deque_lock_self(__cilkrts_worker *const w) {
-    struct local_state *l = w->l;
+static inline void deque_lock_self(ReadyDeque *deques,
+                                   __cilkrts_worker *const w) {
     worker_id id = w->self;
-    global_state *g = w->g;
-    l->lock_wait = true;
-    cilk_mutex_lock(&g->deques[id].mutex);
-    l->lock_wait = false;
-    g->deques[id].mutex_owner = id;
+    cilk_mutex_lock(&deques[id].mutex);
+    deques[id].mutex_owner = id;
 }
 
-static inline
-void deque_unlock_self(__cilkrts_worker *const w) {
+static inline void deque_unlock_self(ReadyDeque *deques,
+                                     __cilkrts_worker *const w) {
     worker_id id = w->self;
-    global_state *g = w->g;
-    g->deques[id].mutex_owner = NO_WORKER;
-    cilk_mutex_unlock(&g->deques[id].mutex);
+    deques[id].mutex_owner = NO_WORKER;
+    cilk_mutex_unlock(&deques[id].mutex);
 }
 
-static inline
-int deque_trylock(__cilkrts_worker *const w, worker_id pn) {
-    global_state *g = w->g;
-    int ret = cilk_mutex_try(&g->deques[pn].mutex);
+static inline int deque_trylock(ReadyDeque *deques, __cilkrts_worker *const w,
+                                worker_id pn) {
+    int ret = cilk_mutex_try(&deques[pn].mutex);
     if (ret) {
-        g->deques[pn].mutex_owner = w->self;
+        deques[pn].mutex_owner = w->self;
     }
     return ret;
 }
 
-static inline
-void deque_lock(__cilkrts_worker *const w, worker_id pn) {
-    global_state *g = w->g;
-    struct local_state *l = w->l;
-    l->lock_wait = true;
-    cilk_mutex_lock(&g->deques[pn].mutex);
-    l->lock_wait = false;
-    g->deques[pn].mutex_owner = w->self;
+static inline void deque_lock(ReadyDeque *deques, __cilkrts_worker *const w,
+                              worker_id pn) {
+    cilk_mutex_lock(&deques[pn].mutex);
+    deques[pn].mutex_owner = w->self;
 }
 
-static inline
-void deque_unlock(__cilkrts_worker *const w, worker_id pn) {
-    global_state *g = w->g;
-    g->deques[pn].mutex_owner = NO_WORKER;
-    cilk_mutex_unlock(&w->g->deques[pn].mutex);
+static inline void deque_unlock(ReadyDeque *deques, __cilkrts_worker *const w,
+                                worker_id pn) {
+    deques[pn].mutex_owner = NO_WORKER;
+    cilk_mutex_unlock(&deques[pn].mutex);
 }
 
 /*
@@ -84,44 +74,44 @@ void deque_unlock(__cilkrts_worker *const w, worker_id pn) {
  * ANGE: the precondition of these functions is that the worker w -> self
  * must have locked worker pn's deque before entering the function
  */
-static inline
-Closure *deque_xtract_top(__cilkrts_worker *const w, worker_id pn) {
+static inline Closure *
+deque_xtract_top(ReadyDeque *deques, __cilkrts_worker *const w, worker_id pn) {
 
     Closure *cl;
 
     /* ANGE: make sure w has the lock on worker pn's deque */
-    deque_assert_ownership(w, pn);
+    deque_assert_ownership(deques, w, pn);
 
-    cl = w->g->deques[pn].top;
+    cl = deques[pn].top;
     if (cl) {
         CILK_ASSERT(w, cl->owner_ready_deque == pn);
-        w->g->deques[pn].top = cl->next_ready;
+        deques[pn].top = cl->next_ready;
         /* ANGE: if there is only one entry in the deque ... */
-        if (cl == w->g->deques[pn].bottom) {
+        if (cl == deques[pn].bottom) {
             CILK_ASSERT(w, cl->next_ready == (Closure *)NULL);
-            w->g->deques[pn].bottom = (Closure *)NULL;
+            deques[pn].bottom = (Closure *)NULL;
         } else {
             CILK_ASSERT(w, cl->next_ready);
             (cl->next_ready)->prev_ready = (Closure *)NULL;
         }
         WHEN_CILK_DEBUG(cl->owner_ready_deque = NO_WORKER);
     } else {
-        CILK_ASSERT(w, w->g->deques[pn].bottom == (Closure *)NULL);
+        CILK_ASSERT(w, deques[pn].bottom == (Closure *)NULL);
     }
 
     return cl;
 }
 
-static inline
-Closure *deque_peek_top(__cilkrts_worker *const w, worker_id pn) {
+static inline Closure *deque_peek_top(ReadyDeque *deques,
+                                      __cilkrts_worker *const w, worker_id pn) {
 
     Closure *cl;
 
     /* ANGE: make sure w has the lock on worker pn's deque */
-    deque_assert_ownership(w, pn);
+    deque_assert_ownership(deques, w, pn);
 
     /* ANGE: return the top but does not unlink it from the rest */
-    cl = w->g->deques[pn].top;
+    cl = deques[pn].top;
     if (cl) {
         // If w is stealing, then it may peek the top of the deque of the worker
         // who is in the midst of exiting a Cilkified region.  In that case, cl
@@ -130,27 +120,28 @@ Closure *deque_peek_top(__cilkrts_worker *const w, worker_id pn) {
         CILK_ASSERT(w, cl->owner_ready_deque == pn ||
                            (w->self != pn && cl == w->g->root_closure));
     } else {
-        CILK_ASSERT(w, w->g->deques[pn].bottom == (Closure *)NULL);
+        CILK_ASSERT(w, deques[pn].bottom == (Closure *)NULL);
     }
 
     return cl;
 }
 
-static inline
-Closure *deque_xtract_bottom(__cilkrts_worker *const w, worker_id pn) {
+static inline Closure *deque_xtract_bottom(ReadyDeque *deques,
+                                           __cilkrts_worker *const w,
+                                           worker_id pn) {
 
     Closure *cl;
 
     /* ANGE: make sure w has the lock on worker pn's deque */
-    deque_assert_ownership(w, pn);
+    deque_assert_ownership(deques, w, pn);
 
-    cl = w->g->deques[pn].bottom;
+    cl = deques[pn].bottom;
     if (cl) {
         CILK_ASSERT(w, cl->owner_ready_deque == pn);
-        w->g->deques[pn].bottom = cl->prev_ready;
-        if (cl == w->g->deques[pn].top) {
+        deques[pn].bottom = cl->prev_ready;
+        if (cl == deques[pn].top) {
             CILK_ASSERT(w, cl->prev_ready == (Closure *)NULL);
-            w->g->deques[pn].top = (Closure *)NULL;
+            deques[pn].top = (Closure *)NULL;
         } else {
             CILK_ASSERT(w, cl->prev_ready);
             (cl->prev_ready)->next_ready = (Closure *)NULL;
@@ -158,58 +149,60 @@ Closure *deque_xtract_bottom(__cilkrts_worker *const w, worker_id pn) {
 
         WHEN_CILK_DEBUG(cl->owner_ready_deque = NO_WORKER);
     } else {
-        CILK_ASSERT(w, w->g->deques[pn].top == (Closure *)NULL);
+        CILK_ASSERT(w, deques[pn].top == (Closure *)NULL);
     }
 
     return cl;
 }
 
-static inline
-Closure *deque_peek_bottom(__cilkrts_worker *const w, worker_id pn) {
+static inline Closure *
+deque_peek_bottom(ReadyDeque *deques, __cilkrts_worker *const w, worker_id pn) {
 
     Closure *cl;
 
     /* ANGE: make sure w has the lock on worker pn's deque */
-    deque_assert_ownership(w, pn);
+    deque_assert_ownership(deques, w, pn);
 
-    cl = w->g->deques[pn].bottom;
+    cl = deques[pn].bottom;
     if (cl) {
         CILK_ASSERT(w, cl->owner_ready_deque == pn);
     } else {
-        CILK_ASSERT(w, w->g->deques[pn].top == (Closure *)NULL);
+        CILK_ASSERT(w, deques[pn].top == (Closure *)NULL);
     }
 
     return cl;
 }
 
-static inline
-void deque_assert_is_bottom(__cilkrts_worker *const w, Closure *t) {
+static inline void deque_assert_is_bottom(ReadyDeque *deques,
+                                          __cilkrts_worker *const w,
+                                          Closure *t) {
 
     /* ANGE: still need to make sure the worker self has the lock */
-    deque_assert_ownership(w, w->self);
-    CILK_ASSERT(w, t == deque_peek_bottom(w, w->self));
+    deque_assert_ownership(deques, w, w->self);
+    CILK_ASSERT(w, t == deque_peek_bottom(deques, w, w->self));
 }
 
 /*
  * ANGE: this allow w -> self to append Closure cl onto worker pn's ready
  *       deque (i.e. make cl the new bottom).
  */
-static inline
-void deque_add_bottom(__cilkrts_worker *const w, Closure *cl, worker_id pn) {
+static inline void deque_add_bottom(ReadyDeque *deques,
+                                    __cilkrts_worker *const w, Closure *cl,
+                                    worker_id pn) {
 
-    deque_assert_ownership(w, pn);
+    deque_assert_ownership(deques, w, pn);
     CILK_ASSERT(w, cl->owner_ready_deque == NO_WORKER);
 
-    cl->prev_ready = w->g->deques[pn].bottom;
+    cl->prev_ready = deques[pn].bottom;
     cl->next_ready = (Closure *)NULL;
-    w->g->deques[pn].bottom = cl;
+    deques[pn].bottom = cl;
     WHEN_CILK_DEBUG(cl->owner_ready_deque = pn);
 
-    if (w->g->deques[pn].top) {
+    if (deques[pn].top) {
         CILK_ASSERT(w, cl->prev_ready);
         (cl->prev_ready)->next_ready = cl;
     } else {
-        w->g->deques[pn].top = cl;
+        deques[pn].top = cl;
     }
 }
 
diff --git a/runtime/reducer_api.c b/runtime/reducer_api.c
new file mode 100644
index 00000000..704ab66c
--- /dev/null
+++ b/runtime/reducer_api.c
@@ -0,0 +1,113 @@
+
+/* Begin new reducer interface */
+
+#include <stdatomic.h>
+#include <stdint.h>
+#include <cilk/cilk_api.h>
+#include "rts-config.h"
+#include "hyperobject_base.h"
+#include "cilk-internal.h"
+#include "hypertable.h"
+#include "local.h"
+
+static const size_t HSIZE = 0; // meaning use default
+
+hyperobject_base *
+__cilkrts_add_key(void *leftmost, size_t size,
+                  __cilk_identity_fn id,
+                  __cilk_reduce_fn reduce) {
+  __cilkrts_worker *w = __cilkrts_get_tls_worker();
+
+  if (size <= 0)
+    cilkrts_bug(w, "User error: reducer size not positive");
+
+  size = size + (CILK_CACHE_LINE - 1) & ~(size_t)(CILK_CACHE_LINE - 1);
+
+  /* TODO: Internal malloc (which wants a non-null worker) */
+  hyperobject_base *hyper =
+    cilk_aligned_alloc(CILK_CACHE_LINE, sizeof (hyperobject_base));
+  if (!hyper)
+    cilkrts_bug(w, "unable to allocate hyperobject");
+  hyper->identity_fn = id;
+  hyper->reduce_fn   = reduce;
+  hyper->key         = leftmost;
+  hyper->view_size   = size;
+  hyper->id_num      = 0;
+  cilkrts_hyper_register(hyper);
+
+  if (w && w->l->hyper_table) {
+    enum hyper_table_error error =
+      hyper_table_cache_insert(w->l->hyper_table, leftmost, hyper);
+    if (error != HYPER_OK) {
+      cilkrts_bug(w, "unable to insert hyperobject in table (%s)",
+                  hyper_table_error_string(error));
+      cilkrts_hyper_unregister(hyper);
+      return 0;
+    }
+    return hyper;
+  }
+
+  struct hyper_table *table = hyper_table_get_or_create(HSIZE);
+  if (hyper_table_insert(table, leftmost, hyper) != HYPER_OK) {
+    cilkrts_bug(w, "unable to insert hyperobject in table");
+    cilkrts_hyper_unregister(hyper);
+    return 0;
+  }
+  return hyper;
+}
+
+void __cilkrts_drop_key(void *key) {
+  __cilkrts_worker *w = __cilkrts_get_tls_worker();
+  hyperobject_base *hyper;
+  if (w && w->l->hyper_table) {
+    hyper = hyper_table_cache_remove(w->l->hyper_table, key);
+  } else {
+    struct hyper_table *table = hyper_table_get_or_create(HSIZE);
+    hyper = hyper_table_remove(table, key);
+  }
+  if (!hyper)
+    return;
+  cilkrts_hyper_unregister(hyper);
+  free(hyper);
+}
+
+hyperobject_base *__cilkrts_hyper_key(void *key) {
+  __cilkrts_worker *w = __cilkrts_get_tls_worker();
+  if (w && w->l->hyper_table)
+    return hyper_table_cache_lookup(w->l->hyper_table, key);
+  struct hyper_table *table = hyper_table_get_or_create(HSIZE);
+  return hyper_table_lookup(table, key);
+}
+
+/* ABI, declared in cilk_api.h */
+void *__cilkrts_reducer_lookup(void *key) {
+  hyperobject_base *hyper = __cilkrts_hyper_key(key);
+  if (hyper)
+    return cilkrts_hyper_lookup(hyper);
+  return key;
+}
+
+void
+__cilkrts_reducer_register(void *key, size_t size,
+                           __cilk_identity_fn id,
+                           __cilk_reduce_fn reduce) {
+  __cilkrts_add_key(key, size, id, reduce);
+}
+
+void
+__cilkrts_reducer_register_32(void *key, uint32_t size,
+                              __cilk_identity_fn id,
+                              __cilk_reduce_fn reduce) {
+  __cilkrts_add_key(key, size, id, reduce);
+}
+
+void
+__cilkrts_reducer_register_64(void *key, uint64_t size,
+                              __cilk_identity_fn id,
+                              __cilk_reduce_fn reduce) {
+  __cilkrts_add_key(key, size, id, reduce);
+}
+
+void __cilkrts_reducer_unregister(void *key) {
+  __cilkrts_drop_key(key);
+}
diff --git a/runtime/reducer_impl.c b/runtime/reducer_impl.c
index 67d69934..41ffc48a 100644
--- a/runtime/reducer_impl.c
+++ b/runtime/reducer_impl.c
@@ -3,7 +3,7 @@
 #define _GNU_SOURCE
 #endif
 #include "reducer_impl.h"
-#include "cilk/hyperobject_base.h"
+#include "hyperobject_base.h"
 #include "global.h"
 #include "init.h"
 #include "internal-malloc.h"
@@ -11,6 +11,7 @@
 #include "scheduler.h"
 #include <assert.h>
 #include <dlfcn.h>
+#include <stdatomic.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <string.h>
@@ -19,7 +20,6 @@
 
 #define USE_INTERNAL_MALLOC 1
 
-#define REDUCER_LIMIT 1024U
 #define GLOBAL_REDUCER_LIMIT 100U
 
 // =================================================================
@@ -38,9 +38,71 @@ typedef struct reducer_id_manager {
     /* When Cilk is not running, global holds all the registered
        hyperobjects so they can be imported into the first worker.
        Size is GLOBAL_REDUCER_LIMIT, regardless of spa_cap.   */
-    __cilkrts_hyperobject_base **global;
+    hyperobject_base **global;
 } reducer_id_manager;
 
+/* A table of hyperobjects
+   TODO: Use the bitmap logic from local reducer maps.  */
+static struct {
+  pthread_mutex_t lock;
+  uint32_t size, count, hint;
+  hyperobject_base **list;
+} global_reducers __attribute__((aligned(32)))
+  = {PTHREAD_MUTEX_INITIALIZER, 0, 0, 0, 0};
+
+void remove_global_reducer(hyperobject_base *hyper) {
+    int error = pthread_mutex_lock(&global_reducers.lock);
+    if (error)
+        cilkrts_bug(0, "mutex lock error");
+    uint32_t index = hyper->id_num;
+    CILK_ASSERT_G(index < global_reducers.size);
+    CILK_ASSERT_G(global_reducers.list[index] == hyper);
+    global_reducers.list[index] = 0;
+    --global_reducers.count;
+    uint32_t hint = global_reducers.hint;
+    global_reducers.hint = hint > index ? hint : index;
+    pthread_mutex_unlock(&global_reducers.lock);
+}
+
+void add_global_reducer(hyperobject_base *hyper) {
+    int error = pthread_mutex_lock(&global_reducers.lock);
+    if (error)
+        cilkrts_bug(0, "mutex lock error");
+    hyperobject_base **list = global_reducers.list;
+    size_t size = global_reducers.size;
+    size_t count = global_reducers.count;
+    size_t hint = global_reducers.hint;
+    uint32_t index = 0;
+    if (!list) {
+        list = calloc(32, sizeof(hyperobject_base *));
+        size = 32;
+        index = 0;
+        for (int i = 0; i < 32; ++i)
+            list[i] = 0;
+    } else if (count == size) {
+        size_t new_size = size * 3 / 2;
+        CILK_ASSERT_G((uint32_t)new_size == new_size);
+        list = realloc(list, new_size * sizeof(hyperobject_base *));
+        while (++size < new_size)
+            list[size] = 0;
+        size = new_size;
+        index = size;
+    } else if (!list[hint]) {
+        index = hint;
+    } else {
+        index = size;
+        while (index-- > 0)
+            if (!list[index])
+                break;
+    }
+    hyper->id_num = index;
+    list[index] = hyper;
+    global_reducers.list = list;
+    global_reducers.count = count + 1;
+    global_reducers.size = size;
+    global_reducers.hint = (size == index + 1) ? 0 : index + 1;
+    pthread_mutex_unlock(&global_reducers.lock);
+}
 
 static void reducer_id_manager_assert_ownership(reducer_id_manager *m,
                                                 __cilkrts_worker *const w) {
@@ -93,7 +155,7 @@ static void free_reducer_id_manager(reducer_id_manager *m) {
         m->used = NULL;
         free(old);
     }
-    __cilkrts_hyperobject_base **global = m->global;
+    hyperobject_base **global = m->global;
     if (global) {
         m->global = NULL;
         free(global);
@@ -148,6 +210,14 @@ static void reducer_id_free(__cilkrts_worker *const ws, hyper_id_t id) {
     reducer_id_manager_unlock(m, ws);
 }
 
+static void *get_or_init_leftmost(__cilkrts_worker *w,
+                                  hyperobject_base *hyper) {
+    void *left = hyper->key;
+    if (!left)
+      cilkrts_bug(w, "User error: hyperobject has no leftmost object");
+    return left;
+}
+
 // =================================================================
 // Init / deinit functions
 // =================================================================
@@ -158,7 +228,7 @@ void reducers_init(global_state *g) {
     if (g->id_manager) {
         return;
     } else {
-        g->id_manager = init_reducer_id_manager(REDUCER_LIMIT);
+        g->id_manager = init_reducer_id_manager(DEFAULT_REDUCER_LIMIT);
     }
 }
 
@@ -177,14 +247,13 @@ CHEETAH_INTERNAL void reducers_import(global_state *g, __cilkrts_worker *w) {
        should be exported when Cilk exits. */
     cilkred_map *map = cilkred_map_make_map(w, m->spa_cap);
     for (hyper_id_t i = 0; i < m->hwm; ++i) {
-        __cilkrts_hyperobject_base *h = m->global[i];
-        if (h) {
-            map->vinfo[i].key = h;
-            map->vinfo[i].val = (char *)h + (ptrdiff_t)h->__view_offset;
+        hyperobject_base *hyper = m->global[i];
+        if (hyper) {
+            map->vinfo[i].hyper = hyper;
+            map->vinfo[i].view = get_or_init_leftmost(w, hyper);
+            CILK_ASSERT(w, hyper->valid);
+            cilkred_map_log_id(w, map, hyper->id_num);
         }
-        hyper_id_t id = h->__id_num;
-        CILK_ASSERT(w, id & HYPER_ID_VALID);
-        cilkred_map_log_id(w, map, id & ~HYPER_ID_VALID);
     }
     w->reducer_map = map;
 }
@@ -209,7 +278,7 @@ static cilkred_map *install_new_reducer_map(__cilkrts_worker *w) {
 /* remove the reducer from the current reducer map.  If the reducer
    exists in maps other than the current one, the behavior is
    undefined. */
-void __cilkrts_hyper_destroy(__cilkrts_hyperobject_base *key) {
+void cilkrts_hyper_unregister(hyperobject_base *hyper) {
 
     __cilkrts_worker *w = __cilkrts_get_tls_worker();
     // If we don't have a worker, use instead the last exiting worker from the
@@ -217,14 +286,13 @@ void __cilkrts_hyper_destroy(__cilkrts_hyperobject_base *key) {
     if (!w)
         w = default_cilkrts->workers[default_cilkrts->exiting_worker];
 
-    hyper_id_t id = key->__id_num;
-    cilkrts_alert(REDUCE_ID, w, "Destroy reducer %x at %p", (unsigned)id, key);
-    if (!__builtin_expect(id & HYPER_ID_VALID, HYPER_ID_VALID)) {
-        cilkrts_bug(w, "unregistering unregistered hyperobject %p", key);
+    hyper_id_t id = hyper->id_num;
+    cilkrts_alert(REDUCE_ID, w, "Destroy reducer %x at %p", (unsigned)id, hyper);
+    if (__builtin_expect(!hyper->valid, 0)) {
+        cilkrts_bug(w, "unregistering unregistered hyperobject %p", hyper);
         return;
     }
-    id &= ~HYPER_ID_VALID;
-    key->__id_num = id;
+    hyper->id_num = id;
 
     if (w) {
 #define UNSYNCED_REDUCER_MSG                                                   \
@@ -243,7 +311,7 @@ void __cilkrts_hyper_destroy(__cilkrts_hyperobject_base *key) {
     reducer_id_free(w, id);
 }
 
-void __cilkrts_hyper_create(__cilkrts_hyperobject_base *key) {
+void cilkrts_hyper_register(hyperobject_base *hyper) {
     // This function registers the specified hyperobject in the current
     // reducer map and registers the initial value of the hyperobject as the
     // leftmost view of the reducer.
@@ -260,18 +328,13 @@ void __cilkrts_hyper_create(__cilkrts_hyperobject_base *key) {
     }
 
     hyper_id_t id = reducer_id_get(m, w);
-    key->__id_num = id | HYPER_ID_VALID;
+    hyper->id_num = id;
+    hyper->valid  = 1;
 
-    cilkrts_alert(REDUCE_ID, w, "Create reducer %x at %p", (unsigned)id, key);
+    cilkrts_alert(REDUCE_ID, w, "Create reducer %x at %p", (unsigned)id, hyper);
 
     if (__builtin_expect(!w, 0)) {
-        if (id >= GLOBAL_REDUCER_LIMIT) {
-            cilkrts_bug(w, "Global reducer pool exhausted");
-        }
-        if (!m->global) {
-            m->global = calloc(GLOBAL_REDUCER_LIMIT, sizeof *m->global);
-        }
-        m->global[id] = key;
+        add_global_reducer(hyper);
         return;
     }
 
@@ -282,7 +345,7 @@ void __cilkrts_hyper_create(__cilkrts_hyperobject_base *key) {
     }
 
     /* Must not exist. */
-    CILK_ASSERT(w, cilkred_map_lookup(h, key) == NULL);
+    CILK_ASSERT(w, cilkred_map_lookup(h, hyper) == NULL);
 
     if (h->merging)
         cilkrts_bug(w, "User error: hyperobject used by another hyperobject");
@@ -290,27 +353,26 @@ void __cilkrts_hyper_create(__cilkrts_hyperobject_base *key) {
     CILK_ASSERT(w, w->reducer_map == h);
 
     ViewInfo *vinfo = &h->vinfo[id];
-    vinfo->key = key;
+    vinfo->hyper = hyper;
     // init with left most view
-    vinfo->val = (char *)key + (ptrdiff_t)key->__view_offset;
+    vinfo->view = get_or_init_leftmost(w, hyper);
     cilkred_map_log_id(w, h, id);
 
-    static_assert(sizeof(__cilkrts_hyperobject_base) <= 64,
+    static_assert(sizeof(hyperobject_base) <= 64,
                   "hyperobject base is too large");
 }
 
-void *__cilkrts_hyper_lookup(__cilkrts_hyperobject_base *key) {
+void *cilkrts_hyper_lookup(hyperobject_base *hyper) {
     __cilkrts_worker *w = __cilkrts_get_tls_worker();
-    hyper_id_t id = key->__id_num;
+    hyper_id_t id = hyper->id_num;
 
-    if (!__builtin_expect(id & HYPER_ID_VALID, HYPER_ID_VALID)) {
+    if (__builtin_expect(!hyper->valid, 0)) {
         cilkrts_bug(w, "User error: reference to unregistered hyperobject %p",
-                    key);
+                    hyper);
     }
-    id &= ~HYPER_ID_VALID;
 
     if (__builtin_expect(!w, 0)) {
-        return (char *)key + key->__view_offset;
+        return hyper->key;
     }
 
     /* TODO: If this is the first reference to a reducer created at
@@ -330,24 +392,25 @@ void *__cilkrts_hyper_lookup(__cilkrts_hyperobject_base *key) {
     if (h->merging)
         cilkrts_bug(w, "User error: hyperobject used by another hyperobject");
 
-    ViewInfo *vinfo = cilkred_map_lookup(h, key);
+    ViewInfo *vinfo = cilkred_map_lookup(h, hyper);
     if (vinfo == NULL) {
         CILK_ASSERT(w, id < h->spa_cap);
         vinfo = &h->vinfo[id];
-        CILK_ASSERT(w, vinfo->key == NULL && vinfo->val == NULL);
+        CILK_ASSERT(w, vinfo->hyper == NULL && vinfo->view == NULL);
 
-        void *val = key->__c_monoid.allocate_fn(key, key->__view_size);
-        key->__c_monoid.identity_fn(key, val);
+        void *view = __cilkrts_hyper_alloc(hyper->view_size);
+        hyper->identity_fn(view);
 
         // allocate space for the val and initialize it to identity
-        vinfo->key = key;
-        vinfo->val = val;
+        vinfo->hyper = hyper;
+        vinfo->view = view;
         cilkred_map_log_id(w, h, id);
     }
-    return vinfo->val;
+    return vinfo->view;
 }
 
-void *__cilkrts_hyper_alloc(__cilkrts_hyperobject_base *key, size_t bytes) {
+__attribute__((noinline))
+void *__cilkrts_hyper_alloc(size_t bytes) {
     if (USE_INTERNAL_MALLOC) {
         __cilkrts_worker *w = __cilkrts_get_tls_worker();
         if (!w)
@@ -355,18 +418,20 @@ void *__cilkrts_hyper_alloc(__cilkrts_hyperobject_base *key, size_t bytes) {
             // a Cilkified region
             w = default_cilkrts->workers[default_cilkrts->exiting_worker];
         return cilk_internal_malloc(w, bytes, IM_REDUCER_MAP);
-    } else
-        return cilk_aligned_alloc(16, bytes);
+    } else {
+        return cilk_aligned_alloc(CILK_CACHE_LINE, bytes);
+    }
 }
 
-void __cilkrts_hyper_dealloc(__cilkrts_hyperobject_base *key, void *view) {
+__attribute__((noinline))
+void __cilkrts_hyper_dealloc(void *view, size_t bytes) {
     if (USE_INTERNAL_MALLOC) {
         __cilkrts_worker *w = __cilkrts_get_tls_worker();
         if (!w)
             // Use instead the worker from the default CilkRTS that last exited
             // a Cilkified region
             w = default_cilkrts->workers[default_cilkrts->exiting_worker];
-        cilk_internal_free(w, view, key->__view_size, IM_REDUCER_MAP);
+        cilk_internal_free(w, view, bytes, IM_REDUCER_MAP);
     } else
         free(view);
 }
diff --git a/runtime/rts-config.h b/runtime/rts-config.h
index 63bc2978..f04daef5 100644
--- a/runtime/rts-config.h
+++ b/runtime/rts-config.h
@@ -18,7 +18,7 @@
 #endif
 #define __CILKRTS_VERSION 0x0
 
-#define __CILKRTS_ABI_VERSION 3
+#define __CILKRTS_ABI_VERSION 4
 
 #ifndef CILK_DEBUG
 #define CILK_DEBUG 1
@@ -38,6 +38,12 @@
 
 #define PROC_SPEED_IN_GHZ 2.2
 
+#define BUSY_LOOP_SPIN 4096
+
+#define ENABLE_THIEF_SLEEP 1
+
+#define ENABLE_EXTENSION 1
+
 #if defined __linux__
 #define CILK_PAGE_SIZE 0 /* page size not available at compile time */
 #elif defined __APPLE__
@@ -56,9 +62,11 @@
 #define DEFAULT_NPROC 0 // 0 for # of cores available
 #define DEFAULT_DEQ_DEPTH 1024
 #define DEFAULT_STACK_SIZE 0x100000 // 1 MBytes
-#define DEFAULT_FIBER_POOL_CAP 3  // initial per-worker fiber pool capacity
+#define DEFAULT_FIBER_POOL_CAP 8  // initial per-worker fiber pool capacity
 #define DEFAULT_REDUCER_LIMIT 1024
 #define DEFAULT_FORCE_REDUCE 0 // do not self steal to force reduce
 
 #define MAX_CALLBACKS 32 // Maximum number of init or exit callbacks
+
+#define HYPER_TABLE_HIDDEN 1
 #endif                   // _CONFIG_H
diff --git a/runtime/sched_stats.c b/runtime/sched_stats.c
index e5d16e0d..bb8e60c9 100644
--- a/runtime/sched_stats.c
+++ b/runtime/sched_stats.c
@@ -1,3 +1,4 @@
+#include <inttypes.h>
 #include <stdio.h>
 #include <time.h>
 
@@ -30,35 +31,11 @@ static const char *enum_to_str(enum timing_type t) {
     }
 }
 
-static inline double cycles_to_micro_sec(uint64_t cycle) {
-    return (double)cycle / ((double)PROC_SPEED_IN_GHZ * 1000.0);
-}
-
 __attribute__((unused)) static inline double
 micro_sec_to_sec(double micro_sec) {
     return micro_sec / 1000000.0;
 }
 
-static inline uint64_t begin_cycle_count() {
-    unsigned int low, high;
-    __asm__ volatile("cpuid\n\t"
-                     "rdtsc\n\t"
-                     "mov %%edx, %0\n\t"
-                     "mov %%eax, %1\n\t"
-                     : "=r"(high), "=r"(low)::"%rax", "%rbx", "%rcx", "%rdx");
-    return ((uint64_t)high << 32) | low;
-}
-
-static inline uint64_t end_cycle_count() {
-    unsigned int low, high;
-    __asm__ volatile("rdtscp\n\t"
-                     "mov %%edx, %0\n\t"
-                     "mov %%eax, %1\n\t"
-                     "cpuid\n\t"
-                     : "=r"(high), "=r"(low)::"%rax", "%rbx", "%rcx", "%rdx");
-    return ((uint64_t)high << 32) | low;
-}
-
 static inline double nsec_to_sec(uint64_t nsec) { return nsec / 1.0e9; }
 
 static inline uint64_t begin_time() {
@@ -83,6 +60,8 @@ void cilk_global_sched_stats_init(struct global_sched_stats *s) {
     s->exit_time = 0;
     s->steals = 0;
     s->repos = 0;
+    s->reeng_rqsts = 0;
+    s->onesen_rqsts = 0;
     for (int i = 0; i < NUMBER_OF_STATS; ++i) {
         s->time[i] = 0.0;
         s->count[i] = 0;
@@ -98,6 +77,8 @@ void cilk_sched_stats_init(struct sched_stats *s) {
     }
     s->steals = 0;
     s->repos = 0;
+    s->reeng_rqsts = 0;
+    s->onesen_rqsts = 0;
 }
 
 void cilk_start_timing(__cilkrts_worker *w, enum timing_type t) {
@@ -182,14 +163,16 @@ static void sched_stats_reset_worker(__cilkrts_worker *w,
     }
     w->l->stats.steals = 0;
     w->l->stats.repos = 0;
+    w->l->stats.reeng_rqsts = 0;
+    w->l->stats.onesen_rqsts = 0;
 }
 
 #define COL_DESC "%15s"
-#define HDR_DESC "%18s %8s"
+#define HDR_DESC "%18s %10s"
 #define WORKER_HDR_DESC "%10s %3u:"
-#define FIELD_DESC "%18.6f %8ld"
-#define COUNT_HDR_DESC "%8s"
-#define COUNT_DESC "%8ld"
+#define FIELD_DESC "%18.6f %10" PRIu64
+#define COUNT_HDR_DESC "%10s"
+#define COUNT_DESC "%10" PRIu64
 
 static void sched_stats_print_worker(__cilkrts_worker *w, void *data) {
     FILE *fp = (FILE *)data;
@@ -203,9 +186,13 @@ static void sched_stats_print_worker(__cilkrts_worker *w, void *data) {
     }
     w->g->stats.steals += w->l->stats.steals;
     w->g->stats.repos += w->l->stats.repos;
+    w->g->stats.reeng_rqsts += w->l->stats.reeng_rqsts;
+    w->g->stats.onesen_rqsts += w->l->stats.onesen_rqsts;
 
     fprintf(stderr, COUNT_DESC, w->l->stats.steals);
     fprintf(stderr, COUNT_DESC, w->l->stats.repos);
+    fprintf(stderr, COUNT_DESC, w->l->stats.reeng_rqsts);
+    fprintf(stderr, COUNT_DESC, w->l->stats.onesen_rqsts);
     fprintf(fp, "\n");
 }
 
@@ -216,6 +203,8 @@ void cilk_sched_stats_print(struct global_state *g) {
     }
     g->stats.steals = 0;
     g->stats.repos = 0;
+    g->stats.reeng_rqsts = 0;
+    g->stats.onesen_rqsts = 0;
 
     fprintf(stderr, "\nSCHEDULING STATS (SECONDS):\n");
     {
@@ -232,6 +221,8 @@ void cilk_sched_stats_print(struct global_state *g) {
     }
     fprintf(stderr, COUNT_HDR_DESC, "steals");
     fprintf(stderr, COUNT_HDR_DESC, "reposses");
+    fprintf(stderr, COUNT_HDR_DESC, "reengs");
+    fprintf(stderr, COUNT_HDR_DESC, "onesen");
     fprintf(stderr, "\n");
 
     for_each_worker(g, &sched_stats_print_worker, stderr);
@@ -242,6 +233,8 @@ void cilk_sched_stats_print(struct global_state *g) {
     }
     fprintf(stderr, COUNT_DESC, g->stats.steals);
     fprintf(stderr, COUNT_DESC, g->stats.repos);
+    fprintf(stderr, COUNT_DESC, g->stats.reeng_rqsts);
+    fprintf(stderr, COUNT_DESC, g->stats.onesen_rqsts);
     fprintf(stderr, "\n");
 
     for_each_worker(g, &sched_stats_reset_worker, NULL);
diff --git a/runtime/sched_stats.h b/runtime/sched_stats.h
index a2df087a..bf4cb4ee 100644
--- a/runtime/sched_stats.h
+++ b/runtime/sched_stats.h
@@ -27,6 +27,8 @@ struct sched_stats {
 
     uint64_t steals;
     uint64_t repos;
+    uint64_t reeng_rqsts;
+    uint64_t onesen_rqsts;
 };
 
 struct global_sched_stats {
@@ -38,6 +40,8 @@ struct global_sched_stats {
     uint64_t boss_end;
     uint64_t steals;
     uint64_t repos;
+    uint64_t reeng_rqsts;
+    uint64_t onesen_rqsts;
     double time[NUMBER_OF_STATS]; // Total time measured for all stats
     uint64_t count[NUMBER_OF_STATS];
 };
diff --git a/runtime/scheduler.c b/runtime/scheduler.c
index 0695a72b..cc24c4a1 100644
--- a/runtime/scheduler.c
+++ b/runtime/scheduler.c
@@ -7,6 +7,10 @@
 #include <string.h>
 #include <unwind.h>
 
+#ifdef __APPLE__
+#include <mach/mach_time.h>
+#endif
+
 #include "cilk-internal.h"
 #include "closure.h"
 #include "fiber.h"
@@ -16,11 +20,14 @@
 #include "readydeque.h"
 #include "scheduler.h"
 #include "worker_coord.h"
+#include "worker_sleep.h"
 
 #include "reducer_impl.h"
 
-__thread __cilkrts_worker *tls_worker = NULL;
-__thread bool is_boss_thread = false;
+bool __cilkrts_use_extension = false;
+
+__thread __cilkrts_worker *__cilkrts_tls_worker = NULL;
+CHEETAH_INTERNAL __thread bool is_boss_thread = false;
 
 // ==============================================
 // Misc. helper functions
@@ -33,9 +40,12 @@ static void rts_srand(__cilkrts_worker *const w, unsigned int seed) {
     w->l->rand_next = seed;
 }
 
-static unsigned int rts_rand(local_state *l) {
-    l->rand_next = l->rand_next * 1103515245 + 12345;
-    return (l->rand_next >> 16);
+static unsigned int update_rand_state(unsigned int state) {
+    return state * 1103515245 + 12345;
+}
+
+static unsigned int get_rand(unsigned int state) {
+    return state >> 16;
 }
 
 static void worker_change_state(__cilkrts_worker *w,
@@ -145,6 +155,12 @@ static void setup_for_sync(__cilkrts_worker *w, Closure *t) {
         w->l->fiber_to_free = t->fiber;
         t->fiber = t->fiber_child;
         t->fiber_child = NULL;
+
+        if (USE_EXTENSION) {
+            w->l->ext_fiber_to_free = t->ext_fiber;
+            t->ext_fiber = t->ext_fiber_child;
+            t->ext_fiber_child = NULL;
+        }
     }
 
     CILK_ASSERT(w, t->fiber);
@@ -163,16 +179,16 @@ static void setup_for_sync(__cilkrts_worker *w, Closure *t) {
 // ==============================================
 // TLS related functions
 // ==============================================
-static pthread_key_t worker_key;
+/* static pthread_key_t worker_key; */
 
 CHEETAH_INTERNAL void __cilkrts_init_tls_variables() {
-    int status = pthread_key_create(&worker_key, NULL);
-    USE_UNUSED(status);
-    CILK_ASSERT_G(status == 0);
+    /* int status = pthread_key_create(&worker_key, NULL); */
+    /* USE_UNUSED(status); */
+    /* CILK_ASSERT_G(status == 0); */
 }
 
 CHEETAH_INTERNAL void __cilkrts_set_tls_worker(__cilkrts_worker *w) {
-    tls_worker = w;
+    __cilkrts_tls_worker = w;
 }
 
 // ==============================================
@@ -180,10 +196,11 @@ CHEETAH_INTERNAL void __cilkrts_set_tls_worker(__cilkrts_worker *w) {
 // ==============================================
 
 /* Doing an "unconditional steal" to steal back the call parent closure */
-static Closure *setup_call_parent_resumption(__cilkrts_worker *const w,
+static Closure *setup_call_parent_resumption(ReadyDeque *deques,
+                                             __cilkrts_worker *const w,
                                              Closure *t) {
 
-    deque_assert_ownership(w, w->self);
+    deque_assert_ownership(deques, w, w->self);
     Closure_assert_ownership(w, t);
 
     CILK_ASSERT_POINTER_EQUAL(w, w, __cilkrts_get_tls_worker());
@@ -192,6 +209,9 @@ static Closure *setup_call_parent_resumption(__cilkrts_worker *const w,
     CILK_ASSERT(w, ((intptr_t)t->frame->worker) & 1);
     CILK_ASSERT_POINTER_EQUAL(w, w->head, w->tail);
     CILK_ASSERT_POINTER_EQUAL(w, w->current_stack_frame, t->frame);
+    if (USE_EXTENSION) {
+        w->extension = t->frame->extension;
+    }
 
     Closure_change_status(w, t, CLOSURE_SUSPENDED, CLOSURE_RUNNING);
     atomic_store_explicit(&t->frame->worker, w, memory_order_relaxed);
@@ -205,9 +225,11 @@ void Cilk_set_return(__cilkrts_worker *const w) {
     Closure *t;
 
     cilkrts_alert(RETURN, w, "(Cilk_set_return)");
+    ReadyDeque *deques = w->g->deques;
+    worker_id self = w->self;
 
-    deque_lock_self(w);
-    t = deque_peek_bottom(w, w->self);
+    deque_lock_self(deques, w);
+    t = deque_peek_bottom(deques, w, self);
     Closure_lock(w, t);
 
     CILK_ASSERT(w, t->status == CLOSURE_RUNNING);
@@ -222,7 +244,7 @@ void Cilk_set_return(__cilkrts_worker *const w) {
     CILK_ASSERT(w, t->simulated_stolen == false);
 
     Closure *call_parent = t->call_parent;
-    Closure *t1 = deque_xtract_bottom(w, w->self);
+    Closure *t1 = deque_xtract_bottom(deques, w, self);
 
     USE_UNUSED(t1);
     CILK_ASSERT(w, t == t1);
@@ -234,9 +256,13 @@ void Cilk_set_return(__cilkrts_worker *const w) {
     Closure_lock(w, call_parent);
     CILK_ASSERT(w, call_parent->fiber == t->fiber);
     t->fiber = NULL;
+    if (USE_EXTENSION) {
+        CILK_ASSERT(w, call_parent->ext_fiber == t->ext_fiber);
+        t->ext_fiber = NULL;
+    }
 
     Closure_remove_callee(w, call_parent);
-    setup_call_parent_resumption(w, call_parent);
+    setup_call_parent_resumption(deques, w, call_parent);
     Closure_unlock(w, call_parent);
 
     if (t->saved_throwing_fiber) {
@@ -244,9 +270,9 @@ void Cilk_set_return(__cilkrts_worker *const w) {
         t->saved_throwing_fiber = NULL;
     }
     Closure_destroy(w, t);
-    deque_add_bottom(w, call_parent, w->self);
+    deque_add_bottom(deques, w, call_parent, self);
 
-    deque_unlock_self(w);
+    deque_unlock_self(deques, w);
 }
 
 /***
@@ -271,6 +297,10 @@ static Closure *unconditional_steal(__cilkrts_worker *const w,
     CILK_ASSERT(w, (parent->fiber == NULL) && parent->fiber_child);
     parent->fiber = parent->fiber_child;
     parent->fiber_child = NULL;
+    if (USE_EXTENSION) {
+        parent->ext_fiber = parent->ext_fiber_child;
+        parent->ext_fiber_child = NULL;
+    }
     Closure_make_ready(parent);
 
     return parent;
@@ -465,12 +495,19 @@ static Closure *Closure_return(__cilkrts_worker *const w, Closure *child) {
         // Case where we are not the leftmost stack.
         CILK_ASSERT(w, parent->fiber_child != child->fiber);
         cilk_fiber_deallocate_to_pool(w, child->fiber);
+        if (USE_EXTENSION) {
+            cilk_fiber_deallocate_to_pool(w, child->ext_fiber);
+        }
     } else {
         // We are leftmost, pass stack/fiber up to parent.
         // Thus, no stack/fiber to free.
         parent->fiber_child = child->fiber;
+        if (USE_EXTENSION) {
+            parent->ext_fiber_child = child->ext_fiber;
+        }
     }
     child->fiber = NULL;
+    child->ext_fiber = NULL;
 
     Closure_remove_child(w, parent, child); // unlink child from tree
     // we have deposited our views and unlinked; we can quit now
@@ -586,9 +623,10 @@ void Cilk_exception_handler(char *exn) {
 
     Closure *t;
     __cilkrts_worker *w = __cilkrts_get_tls_worker();
+    ReadyDeque *deques = w->g->deques;
 
-    deque_lock_self(w);
-    t = deque_peek_bottom(w, w->self);
+    deque_lock_self(deques, w);
+    t = deque_peek_bottom(deques, w, w->self);
 
     CILK_ASSERT(w, t);
     Closure_lock(w, t);
@@ -618,13 +656,13 @@ void Cilk_exception_handler(char *exn) {
         }
 
         Closure_unlock(w, t);
-        deque_unlock_self(w);
+        deque_unlock_self(deques, w);
         sanitizer_unpoison_fiber(t->fiber);
         longjmp_to_runtime(w); // NOT returning back to user code
 
     } else { // not steal, not abort; false alarm
         Closure_unlock(w, t);
-        deque_unlock_self(w);
+        deque_unlock_self(deques, w);
 
         return;
     }
@@ -661,8 +699,7 @@ oldest_non_stolen_frame_in_stacklet(__cilkrts_stack_frame *head) {
 
 static Closure *setup_call_parent_closure_helper(
     __cilkrts_worker *const w, __cilkrts_worker *const victim_w,
-    __cilkrts_stack_frame *frame, Closure *oldest) {
-
+    __cilkrts_stack_frame *frame, void *extension, Closure *oldest) {
     Closure *call_parent, *curr_cl;
 
     if (oldest->frame == frame) {
@@ -670,9 +707,8 @@ static Closure *setup_call_parent_closure_helper(
         CILK_ASSERT(w, oldest->fiber);
         return oldest;
     }
-
-    call_parent = setup_call_parent_closure_helper(w, victim_w,
-                                                   frame->call_parent, oldest);
+    call_parent = setup_call_parent_closure_helper(
+        w, victim_w, frame->call_parent, extension, oldest);
     __cilkrts_set_stolen(frame);
     curr_cl = Closure_create(w);
     curr_cl->frame = frame;
@@ -685,6 +721,11 @@ static Closure *setup_call_parent_closure_helper(
                           memory_order_relaxed);
     curr_cl->fiber = call_parent->fiber;
 
+    if (USE_EXTENSION) {
+        curr_cl->frame->extension = extension;
+        curr_cl->ext_fiber = call_parent->ext_fiber;
+    }
+
     Closure_add_callee(w, call_parent, curr_cl);
 
     return curr_cl;
@@ -705,8 +746,8 @@ static void setup_closures_in_stacklet(__cilkrts_worker *const w,
     Closure *call_parent;
     Closure *oldest_cl = youngest_cl->call_parent;
     __cilkrts_stack_frame *youngest, *oldest;
-
     youngest = youngest_cl->frame;
+    void *extension = USE_EXTENSION ? youngest->extension : NULL;
     oldest = oldest_non_stolen_frame_in_stacklet(youngest);
 
     CILK_ASSERT(w, youngest == youngest_cl->frame);
@@ -722,13 +763,16 @@ static void setup_closures_in_stacklet(__cilkrts_worker *const w,
         CILK_ASSERT(w, oldest->flags & CILK_FRAME_DETACHED);
         __cilkrts_set_stolen(oldest);
         oldest_cl->frame = oldest;
+        if (USE_EXTENSION) {
+            oldest_cl->frame->extension = extension;
+        }
     }
     CILK_ASSERT(w, oldest->worker == victim_w);
     atomic_store_explicit(&oldest_cl->frame->worker, INVALID,
                           memory_order_relaxed);
 
     call_parent = setup_call_parent_closure_helper(
-        w, victim_w, youngest->call_parent, oldest_cl);
+        w, victim_w, youngest->call_parent, extension, oldest_cl);
 
     CILK_ASSERT(w, youngest_cl->fiber != oldest_cl->fiber);
     CILK_ASSERT(w, youngest->worker == victim_w);
@@ -788,13 +832,13 @@ static int do_dekker_on(__cilkrts_worker *const w,
  *       deque to get the parent closure.  This is the only time I can
  *       think of, where the ready deque contains more than one frame.
  ***/
-static Closure *promote_child(__cilkrts_worker *const w,
+static Closure *promote_child(ReadyDeque *deques, __cilkrts_worker *const w,
                               __cilkrts_worker *const victim_w, Closure *cl,
                               Closure **res) {
 
     worker_id pn = victim_w->self;
 
-    deque_assert_ownership(w, pn);
+    deque_assert_ownership(deques, w, pn);
     Closure_assert_ownership(w, cl);
 
     CILK_ASSERT(w, cl->status == CLOSURE_RUNNING);
@@ -813,7 +857,6 @@ static Closure *promote_child(__cilkrts_worker *const w,
     __cilkrts_stack_frame **head =
         atomic_load_explicit(&victim_w->head, memory_order_acquire);
     __cilkrts_stack_frame *frame_to_steal = *head;
-
     // ANGE: this must be true if we get this far
     // Note that it can be that H == T here; victim could have done T--
     // after the thief passes Dekker; in which case, thief gets the last
@@ -861,7 +904,7 @@ static Closure *promote_child(__cilkrts_worker *const w,
         spawn_parent->call_parent = cl;
 
         // suspend cl & remove it from deque
-        Closure_suspend_victim(w, victim_w, cl);
+        Closure_suspend_victim(deques, w, victim_w, cl);
         Closure_unlock(w, cl);
 
         Closure_lock(w, spawn_parent);
@@ -880,7 +923,7 @@ static Closure *promote_child(__cilkrts_worker *const w,
 
     /***
      * Register this child, which sets up its sibling links.
-     * We do this here intead of in finish_promote, because we must setup
+     * We do this here instead of in finish_promote, because we must setup
      * the sib links for the new child before its pointer escapses.
      ***/
     Closure_add_child(w, spawn_parent, spawn_child);
@@ -893,7 +936,7 @@ static Closure *promote_child(__cilkrts_worker *const w,
     spawn_child->frame = (__cilkrts_stack_frame *)NULL;
 
     /* insert the closure on the victim processor's deque */
-    deque_add_bottom(w, spawn_child, pn);
+    deque_add_bottom(deques, w, spawn_child, pn);
 
     /* at this point the child can be freely executed */
     return spawn_child;
@@ -947,13 +990,16 @@ static void finish_promote(__cilkrts_worker *const w,
  * NOTE: this function assumes that w holds the lock on victim_w's deque
  * and Closure cl and releases them before returning.
  ***/
-static Closure *extract_top_spawning_closure(__cilkrts_worker *const w,
+static Closure *extract_top_spawning_closure(ReadyDeque *deques,
+                                             __cilkrts_worker *const w,
                                              __cilkrts_worker *const victim_w,
                                              Closure *cl) {
     Closure *res = NULL, *child;
     struct cilk_fiber *parent_fiber = cl->fiber;
+    struct cilk_fiber *parent_ext_fiber = cl->ext_fiber;
+    worker_id victim_id = victim_w->self;
 
-    deque_assert_ownership(w, victim_w->self);
+    deque_assert_ownership(deques, w, victim_id);
     Closure_assert_ownership(w, cl);
     CILK_ASSERT(w, parent_fiber);
 
@@ -961,7 +1007,7 @@ static Closure *extract_top_spawning_closure(__cilkrts_worker *const w,
      * if dekker passes, promote the child to a full closure,
      * and steal the parent
      */
-    child = promote_child(w, victim_w, cl, &res);
+    child = promote_child(deques, w, victim_w, cl, &res);
     cilkrts_alert(STEAL, w,
                   "(Closure_steal) promote gave cl/res/child = %p/%p/%p",
                   (void *)cl, (void *)res, (void *)child);
@@ -971,7 +1017,7 @@ static Closure *extract_top_spawning_closure(__cilkrts_worker *const w,
         // ANGE: in this case, the spawning parent to steal / resume
         // is simply cl (i.e., there is only one frame in the stacklet),
         // so we didn't set res in promote_child.
-        res = deque_xtract_top(w, victim_w->self);
+        res = deque_xtract_top(deques, w, victim_id);
         CILK_ASSERT(w, cl == res);
     }
 
@@ -979,13 +1025,20 @@ static Closure *extract_top_spawning_closure(__cilkrts_worker *const w,
     // only create a new fiber if it's a real steal
     if (w == victim_w) {
         res->fiber = NULL;
+        res->ext_fiber = NULL;
     } else {
         res->fiber = cilk_fiber_allocate_from_pool(w);
+        if (USE_EXTENSION) {
+            res->ext_fiber = cilk_fiber_allocate_from_pool(w);
+        }
     }
 
     // make sure we are not hold lock on child
     Closure_assert_alienation(w, child);
     child->fiber = parent_fiber;
+    if (USE_EXTENSION) {
+        child->ext_fiber = parent_ext_fiber;
+    }
 
     return res;
 }
@@ -994,12 +1047,16 @@ static Closure *extract_top_spawning_closure(__cilkrts_worker *const w,
  * stealing protocol.  Tries to steal from the victim; returns a
  * stolen closure, or NULL if none.
  */
-static Closure *Closure_steal(__cilkrts_worker *const w, int victim) {
+static Closure *Closure_steal(__cilkrts_worker **workers, ReadyDeque *deques,
+                              __cilkrts_worker *const w, int victim) {
 
     Closure *cl;
     Closure *res = (Closure *)NULL;
     __cilkrts_worker *victim_w;
-    victim_w = w->g->workers[victim];
+    victim_w = workers[victim];
+
+    if (victim_w == NULL)
+        return NULL;
 
     // Fast test for an unsuccessful steal attempt using only read operations.
     // This fast test seems to improve parallel performance.
@@ -1014,15 +1071,15 @@ static Closure *Closure_steal(__cilkrts_worker *const w, int victim) {
     }
 
     //----- EVENT_STEAL_ATTEMPT
-    if (deque_trylock(w, victim) == 0) {
+    if (deque_trylock(deques, w, victim) == 0) {
         return NULL;
     }
 
-    cl = deque_peek_top(w, victim);
+    cl = deque_peek_top(deques, w, victim);
 
     if (cl) {
         if (Closure_trylock(w, cl) == 0) {
-            deque_unlock(w, victim);
+            deque_unlock(deques, w, victim);
             return NULL;
         }
 
@@ -1037,10 +1094,10 @@ static Closure *Closure_steal(__cilkrts_worker *const w, int victim) {
                 cilkrts_alert(STEAL, w,
                               "(Closure_steal) can steal from W%d; cl=%p",
                               victim, (void *)cl);
-                res = extract_top_spawning_closure(w, victim_w, cl);
+                res = extract_top_spawning_closure(deques, w, victim_w, cl);
 
                 // at this point, more steals can happen from the victim.
-                deque_unlock(w, victim);
+                deque_unlock(deques, w, victim);
 
                 CILK_ASSERT(w, res->fiber);
                 CILK_ASSERT(w, res->frame->worker == victim_w);
@@ -1070,7 +1127,7 @@ static Closure *Closure_steal(__cilkrts_worker *const w, int victim) {
             // MUST unlock the closure before the queue;
             // see rule D in the file PROTOCOLS
             Closure_unlock(w, cl);
-            deque_unlock(w, victim);
+            deque_unlock(deques, w, victim);
             break;
 
         default:
@@ -1082,7 +1139,7 @@ static Closure *Closure_steal(__cilkrts_worker *const w, int victim) {
                             Closure_status_to_str(cl->status));
         }
     } else {
-        deque_unlock(w, victim);
+        deque_unlock(deques, w, victim);
         //----- EVENT_STEAL_EMPTY_DEQUE
     }
 
@@ -1102,7 +1159,9 @@ static Closure *Closure_steal(__cilkrts_worker *const w, int victim) {
  ***/
 void promote_own_deque(__cilkrts_worker *w) {
 
-    if (deque_trylock(w, w->self) == 0) {
+    ReadyDeque *deques = w->g->deques;
+    worker_id self = w->self;
+    if (deque_trylock(deques, w, self) == 0) {
         cilkrts_bug(
             w, "Bug: failed to acquire deque lock when promoting own deque");
         return;
@@ -1110,12 +1169,12 @@ void promote_own_deque(__cilkrts_worker *w) {
 
     bool done = false;
     while (!done) {
-        Closure *cl = deque_peek_top(w, w->self);
+        Closure *cl = deque_peek_top(deques, w, self);
         CILK_ASSERT(w, cl);
         CILK_ASSERT(w, cl->status == CLOSURE_RUNNING);
 
         if (Closure_trylock(w, cl) == 0) {
-            deque_unlock(w, w->self);
+            deque_unlock(deques, w, self);
             cilkrts_bug(
                 w,
                 "Bug: failed to acquire deque lock when promoting own deque");
@@ -1123,7 +1182,7 @@ void promote_own_deque(__cilkrts_worker *w) {
         }
         if (do_dekker_on(w, w, cl)) {
             // unfortunately this function releases both locks
-            Closure *res = extract_top_spawning_closure(w, w, cl);
+            Closure *res = extract_top_spawning_closure(deques, w, w, cl);
             CILK_ASSERT(w, res);
             CILK_ASSERT(w, res->fiber == NULL);
             CILK_ASSERT(w, res->frame->worker == w);
@@ -1142,7 +1201,7 @@ void promote_own_deque(__cilkrts_worker *w) {
 
         } else {
             Closure_unlock(w, cl);
-            deque_unlock(w, w->self);
+            deque_unlock(deques, w, self);
             done = true; // we can break out; no more frames to promote
         }
     }
@@ -1181,13 +1240,18 @@ void longjmp_to_user_code(__cilkrts_worker *w, Closure *t) {
         // This is the first time we run the root closure in this Cilkified
         // region.  The closure has been completely setup at this point by
         // invoke_cilkified_root().  We just need jump to the user code.
-        volatile bool *initialized = &w->g->root_closure_initialized;
-        if (t == w->g->root_closure && *initialized == false) {
+        global_state *g = w->g;
+        volatile bool *initialized = &g->root_closure_initialized;
+        if (t == g->root_closure && *initialized == false) {
             *initialized = true;
         } else if (!t->simulated_stolen) {
             void *new_rsp = sysdep_reset_stack_for_resume(fiber, sf);
             USE_UNUSED(new_rsp);
             CILK_ASSERT(w, SP(sf) == new_rsp);
+            if (USE_EXTENSION) {
+                w->extension = sf->extension;
+                w->ext_stack = sysdep_get_stack_start(t->ext_fiber);
+            }
         }
     }
     CILK_SWITCH_TIMING(w, INTERVAL_SCHED, INTERVAL_WORK);
@@ -1218,9 +1282,10 @@ int Cilk_sync(__cilkrts_worker *const w, __cilkrts_stack_frame *frame) {
     int res = SYNC_READY;
 
     //----- EVENT_CILK_SYNC
+    ReadyDeque *deques = w->g->deques;
 
-    deque_lock_self(w);
-    t = deque_peek_bottom(w, w->self);
+    deque_lock_self(deques, w);
+    t = deque_peek_bottom(deques, w, w->self);
     Closure_lock(w, t);
     /* assert we are really at the top of the stack */
     CILK_ASSERT(w, Closure_at_top_of_stack(w));
@@ -1244,12 +1309,18 @@ int Cilk_sync(__cilkrts_worker *const w, __cilkrts_stack_frame *frame) {
     // gotten back to runtime but returning to another ancestor that needs
     // to sync ... in which case we might have a fiber to free, but it's
     // never the same fiber that we are on right now.
-    if (w->l->fiber_to_free) {
-        CILK_ASSERT(w, w->l->fiber_to_free != t->fiber);
+    local_state *l = w->l;
+    if (l->fiber_to_free) {
+        CILK_ASSERT(w, l->fiber_to_free != t->fiber);
         // we should free this fiber now and we can as long as we are not on
         // it
-        cilk_fiber_deallocate_to_pool(w, w->l->fiber_to_free);
-        w->l->fiber_to_free = NULL;
+        cilk_fiber_deallocate_to_pool(w, l->fiber_to_free);
+        l->fiber_to_free = NULL;
+    }
+    if (USE_EXTENSION && l->ext_fiber_to_free) {
+        CILK_ASSERT(w, l->ext_fiber_to_free != t->ext_fiber);
+        cilk_fiber_deallocate_to_pool(w, l->ext_fiber_to_free);
+        l->ext_fiber_to_free = NULL;
     }
 
     if (Closure_has_children(t)) {
@@ -1261,17 +1332,21 @@ int Cilk_sync(__cilkrts_worker *const w, __cilkrts_stack_frame *frame) {
         // exception in the continuation was thrown), we still need this
         // fiber for unwinding.
         if (t->user_exn.exn == NULL) {
-            w->l->fiber_to_free = t->fiber;
+             l->fiber_to_free = t->fiber;
         } else {
             t->saved_throwing_fiber = t->fiber;
         }
+        if (USE_EXTENSION) {
+            l->ext_fiber_to_free = t->ext_fiber;
+        }
         t->fiber = NULL;
+        t->ext_fiber = NULL;
         // place holder for reducer map; the view in tlmm (if any) are
         // updated by the last strand in Closure t before sync; need to
         // reduce these when successful provably good steal occurs
         cilkred_map *reducers = w->reducer_map;
         w->reducer_map = NULL;
-        Closure_suspend(w, t);
+        Closure_suspend(deques, w, t);
         t->user_rmap = reducers; /* set this after state change to suspended */
         res = SYNC_NOT_READY;
     } else {
@@ -1281,7 +1356,7 @@ int Cilk_sync(__cilkrts_worker *const w, __cilkrts_stack_frame *frame) {
     }
 
     Closure_unlock(w, t);
-    deque_unlock_self(w);
+    deque_unlock_self(deques, w);
 
     if (res == SYNC_READY) {
         struct closure_exception child_exn = t->child_exn;
@@ -1311,8 +1386,11 @@ int Cilk_sync(__cilkrts_worker *const w, __cilkrts_stack_frame *frame) {
     return res;
 }
 
-static void do_what_it_says(__cilkrts_worker *w, Closure *t) {
+static void do_what_it_says(ReadyDeque *deques, __cilkrts_worker *w,
+                            Closure *t) {
     __cilkrts_stack_frame *f;
+    worker_id self = w->self;
+    local_state *l = w->l;
 
     do {
         cilkrts_alert(SCHED, w, "(do_what_it_says) closure %p", (void *)t);
@@ -1321,7 +1399,8 @@ static void do_what_it_says(__cilkrts_worker *w, Closure *t) {
         switch (t->status) {
         case CLOSURE_READY:
             // ANGE: anything we need to free must have been freed at this point
-            CILK_ASSERT(w, w->l->fiber_to_free == NULL);
+            CILK_ASSERT(w, l->fiber_to_free == NULL);
+            CILK_ASSERT(w, l->ext_fiber_to_free == NULL);
 
             cilkrts_alert(SCHED, w, "(do_what_it_says) CLOSURE_READY");
             /* just execute it */
@@ -1336,9 +1415,9 @@ static void do_what_it_says(__cilkrts_worker *w, Closure *t) {
 
             // MUST unlock the closure before locking the queue
             // (rule A in file PROTOCOLS)
-            deque_lock_self(w);
-            deque_add_bottom(w, t, w->self);
-            deque_unlock_self(w);
+            deque_lock_self(deques, w);
+            deque_add_bottom(deques, w, t, self);
+            deque_unlock_self(deques, w);
 
             /* now execute it */
             cilkrts_alert(SCHED, w, "(do_what_it_says) Jump into user code");
@@ -1350,24 +1429,29 @@ static void do_what_it_says(__cilkrts_worker *w, Closure *t) {
             // code");
             // longjmp invalidates non-volatile variables
             __cilkrts_worker *volatile w_save = w;
-            if (__builtin_setjmp(w->l->rts_ctx) == 0) {
+            if (__builtin_setjmp(l->rts_ctx) == 0) {
                 worker_change_state(w, WORKER_RUN);
                 longjmp_to_user_code(w, t);
             } else {
                 w = w_save;
+                l = w->l;
                 CILK_ASSERT_POINTER_EQUAL(w, w, __cilkrts_get_tls_worker());
                 sanitizer_finish_switch_fiber();
                 worker_change_state(w, WORKER_SCHED);
                 // CILK_ASSERT(w, t->fiber == w->l->fiber_to_free);
-                if (w->l->fiber_to_free) {
-                    cilk_fiber_deallocate_to_pool(w, w->l->fiber_to_free);
+                if (l->fiber_to_free) {
+                    cilk_fiber_deallocate_to_pool(w, l->fiber_to_free);
+                    l->fiber_to_free = NULL;
+                }
+                if (USE_EXTENSION && l->ext_fiber_to_free) {
+                    cilk_fiber_deallocate_to_pool(w, l->ext_fiber_to_free);
+                    l->ext_fiber_to_free = NULL;
                 }
-                w->l->fiber_to_free = NULL;
 
                 // Attempt to get a closure from the bottom of our deque.
-                deque_lock_self(w);
-                t = deque_xtract_bottom(w, w->self);
-                deque_unlock_self(w);
+                deque_lock_self(deques, w);
+                t = deque_xtract_bottom(deques, w, self);
+                deque_unlock_self(deques, w);
             }
 
             break; // ?
@@ -1388,7 +1472,7 @@ static void do_what_it_says(__cilkrts_worker *w, Closure *t) {
             break;
         }
         if (t) {
-            WHEN_SCHED_STATS(w->l->stats.repos++);
+            WHEN_SCHED_STATS(l->stats.repos++);
         }
     } while (t);
 }
@@ -1397,7 +1481,7 @@ static void do_what_it_says(__cilkrts_worker *w, Closure *t) {
 // Cilk computation until it would enter the work-stealing loop.
 void do_what_it_says_boss(__cilkrts_worker *w, Closure *t) {
 
-    do_what_it_says(w, t);
+    do_what_it_says(w->g->deques, w, t);
 
     // At this point, the boss has run out of work to do.  Rather than become a
     // thief itself, the boss wakes up the root worker to become a thief.
@@ -1411,234 +1495,6 @@ void do_what_it_says_boss(__cilkrts_worker *w, Closure *t) {
 #endif
 }
 
-// Update the index-to-worker map to swap self with the worker at the target
-// index.
-static void swap_worker_with_target(global_state *g, worker_id self,
-                                    worker_id target_index) {
-    worker_id self_index = g->worker_to_index[self];
-    worker_id target_worker = g->index_to_worker[target_index];
-
-    // Update the index-to-worker map.
-    g->index_to_worker[self_index] = target_worker;
-    g->index_to_worker[target_index] = self;
-
-    // Update the worker-to-index map.
-    g->worker_to_index[target_worker] = self_index;
-    g->worker_to_index[self] = target_index;
-}
-
-// Called by a thief thread.  Causes the thief thread to try to sleep, that is,
-// to wait for a signal to resume work-stealing.
-static bool try_to_disengage_thief(global_state *g, worker_id self,
-                                   uint64_t disengaged_deprived) {
-    // Try to grab the lock on the index structure.
-    if (!cilk_mutex_try(&g->index_lock)) {
-        return false;
-    }
-
-    // Increment the number of disengaged thieves and decrement number of deprived
-    // thieves.
-    const uint64_t disengaged_mask = ((uint64_t)-1) << 32;
-    uint64_t disengaged = disengaged_deprived & disengaged_mask;
-    uint64_t new_disengaged_deprived =
-            ((disengaged + (1UL << 32)) & disengaged_mask) |
-            ((disengaged_deprived - 1) & ~disengaged_mask);
-    // Try to update the number of disengaged workers.  This step synchronizes
-    // with parallel calls to reengage thieves, calls to reengage thieves, and
-    // updates to the number of deprived workers.
-    // First atomically update the number of disengaged workers.
-    if (atomic_compare_exchange_strong_explicit(
-            &g->disengaged_deprived, &disengaged_deprived,
-            new_disengaged_deprived, memory_order_release,
-            memory_order_acquire)) {
-        // Update the index-to-worker map.
-        worker_id last_index = g->nworkers - (new_disengaged_deprived >> 32);
-        if (g->worker_to_index[self] < last_index) {
-            swap_worker_with_target(g, self, last_index);
-        }
-        // Release the lock on the index structure
-        cilk_mutex_unlock(&g->index_lock);
-
-        // Disengage this thread.
-        thief_disengage(g);
-
-        // The thread is now reengaged.  Grab the lock on the index structure.
-        cilk_mutex_lock(&g->index_lock);
-
-        // Decrement the number of disengaged workers.
-        while (true) {
-            // Atomically decrement the number of disengaged workers.
-            uint64_t disengaged_deprived = atomic_load_explicit(
-                &g->disengaged_deprived, memory_order_acquire);
-            disengaged = disengaged_deprived & disengaged_mask;
-            new_disengaged_deprived = ((disengaged - (1UL << 32)) & disengaged_mask) |
-                                    ((disengaged_deprived + 1) & ~disengaged_mask);
-            if (atomic_compare_exchange_strong_explicit(
-                    &g->disengaged_deprived, &disengaged_deprived,
-                    new_disengaged_deprived, memory_order_release,
-                    memory_order_acquire)) {
-                // Update the index structure.
-                last_index = g->nworkers - (disengaged_deprived >> 32);
-                if (g->worker_to_index[self] > last_index) {
-                    swap_worker_with_target(g, self, last_index);
-                }
-
-                // Release the lock on the index structure.
-                cilk_mutex_unlock(&g->index_lock);
-                return true;
-            }
-        }
-    } else {
-        // Release the lock on the index structure.
-        cilk_mutex_unlock(&g->index_lock);
-        return false;
-    }
-}
-
-// Attempt to disengage this thief thread.  The __cilkrts_worker parameter is only
-// used for debugging.
-static bool maybe_disengage_thief(global_state *g, worker_id self,
-                                unsigned int nworkers, __cilkrts_worker *w) {
-    // Check the number of active and deprived workers, and disengage this worker
-    // if there are too many deprived workers.
-    while (true) {
-        // Check if this deprived thread should sleep.
-        uint64_t disengaged_deprived =
-            atomic_load_explicit(&g->disengaged_deprived, memory_order_acquire);
-        const uint64_t disengaged_mask = ((uint64_t)-1) << 32;
-        uint32_t disengaged = (uint32_t)(disengaged_deprived >> 32);
-        uint32_t deprived = (uint32_t)(disengaged_deprived & ~disengaged_mask);
-
-        CILK_ASSERT(w, disengaged < nworkers);
-        CILK_ASSERT(w, deprived < nworkers);
-        int32_t active =
-            (int32_t)nworkers - (int32_t)disengaged - (int32_t)deprived;
-        CILK_ASSERT(w, active >= 1);
-        // TODO: Investigate whether it's better to keep the number of deprived
-        // workers less than the number of active workers.
-        if (active < (int32_t)deprived) {
-            // Too many deprived thieves.  Try to disengage this worker.  If it
-            // fails, repeat the loop.
-            if (try_to_disengage_thief(g, self, disengaged_deprived)) {
-                // The thief was successfully disengaged.  It has since been
-                // taken out of disengage.
-                return true;
-            }
-        } else {
-            // We have enough active workers to keep this worker out of disengage,
-            // but this worker was still unable to steal work.  Put this thief
-            // to sleep for a while using the conventional way.
-            // In testing, a nanosleep(0) takes approximately 50 us.
-            const struct timespec sleeptime = {.tv_sec = 0, .tv_nsec = 50000};
-            /* const struct timespec sleeptime = {.tv_sec = 0, .tv_nsec =
-             * 25000}; */
-            nanosleep(&sleeptime, NULL);
-            break;
-        }
-    }
-    return false;
-}
-
-// Threshold for number of consective failed steal attempts to declare a
-// thief as deprived.  Must be a power of 2.
-#define DEPRIVED_THRESHOLD 2048
-
-// Number of attempted steals the thief should do each time it copies the
-// worker state.  ATTEMPTS must divide DEPRIVED_THRESHOLD.
-#define ATTEMPTS 4
-
-static unsigned int go_to_sleep_maybe(global_state *const rts, worker_id self,
-                                      unsigned int nworkers,
-                                      __cilkrts_worker *const w,
-                                      Closure *const t, unsigned int fails) {
-
-    // Threshold for number of consecutive failed steal attempts to try
-    // disengaging this worker.  Must be a multiple of DEPRIVED_THRESHOLD and a
-    // power of 2.
-    const unsigned int DISENGAGE_THRESHOLD = 4 * DEPRIVED_THRESHOLD;
-    // Threshold for number of failed steal attempts to put this thief to sleep
-    // for an extended amount of time.  Must be larger than DISENGAGE_THRESHOLD.
-    const unsigned int SLEEP_THRESHOLD = 32 * DEPRIVED_THRESHOLD;
-
-    if (t) {
-        if (fails >= DEPRIVED_THRESHOLD) {
-            // This thief is no longer deprived.  Decrement the number
-            // of deprived thieves.
-            atomic_fetch_sub_explicit(&rts->disengaged_deprived, 1,
-                                      memory_order_release);
-
-            // Request to reengage at most 2 thieves.
-            // TODO: Investigate whether it's better to keep the number
-            // less than the number of active workers.
-            request_more_thieves(rts, 2);
-        }
-        fails = 0;
-
-    } else {
-        CILK_START_TIMING(w, INTERVAL_SLEEP);
-        fails += ATTEMPTS;
-
-        // Every DEPRIVED_THRESHOLD consecutive failed steal attempts,
-        // update the set of deprived workers, and maybe disengage this
-        // worker if there are too many deprived workers.
-        if (fails % DEPRIVED_THRESHOLD == 0) {
-            if (fails > (1 << 25)) {
-                // Prevent the fail count from exceeding this maximum, so we
-                // don't have to worry about the fail count overflowing.
-                //
-                // This maximum bound is chosen based on the maximum sleep
-                // time when fails > SLEEP_THRESHOLD, which specifies the
-                // time to sleep in nanoseconds.  Because the specification
-                // to nanosleep() disallows times with more than 1e9
-                // nanoseconds, we set the maximum fails value here
-                // accordinly and, in this case, simply sleep for 1 second.
-                fails = (1 << 25);
-                const struct timespec sleeptime = {.tv_sec = 1, .tv_nsec = 0};
-                nanosleep(&sleeptime, NULL);
-            } else if (DEPRIVED_THRESHOLD == fails) {
-                // This thief is now considered deprived.  Increment the
-                // number of deprived workers.
-                atomic_fetch_add_explicit(&rts->disengaged_deprived, 1,
-                                          memory_order_release);
-            } else if (fails % DISENGAGE_THRESHOLD == 0) {
-#if BOSS_THIEF
-                if (is_boss_thread) {
-                    // The boss thread should never disengage.  Sleep instead.
-                    const struct timespec sleeptime = {.tv_sec = 0,
-                                                       .tv_nsec = 50000};
-                    nanosleep(&sleeptime, NULL);
-                } else
-#endif
-                    if (maybe_disengage_thief(rts, self, nworkers, w)) {
-                    // The semaphore for reserving workers may have been
-                    // non-zero due to past successful steals, rather than a
-                    // recent successful steal.  Decrement fails so we try
-                    // to disengage this again sooner, in case there is
-                    // still nothing to steal.
-                    fails -= (DISENGAGE_THRESHOLD / 2);
-                }
-
-            } else if (fails > SLEEP_THRESHOLD) {
-                // This thief has failed a lot of consecutive steal
-                // attempts, but it's not disengaged.  Sleep for increasing
-                // lengths of time.
-                const struct timespec sleeptime = {.tv_sec = 0,
-                                                   .tv_nsec = 16 * fails};
-                nanosleep(&sleeptime, NULL);
-            } else if (fails % DISENGAGE_THRESHOLD != 0) {
-                // This thief has failed many consecutive steal attempts,
-                // but it's not disengaged.  Sleep for a short time.
-                const struct timespec sleeptime = {.tv_sec = 0,
-                                                   .tv_nsec = 50000};
-                nanosleep(&sleeptime, NULL);
-            }
-        }
-        CILK_STOP_TIMING(w, INTERVAL_SLEEP);
-    }
-
-    return fails;
-}
-
 void worker_scheduler(__cilkrts_worker *w) {
     Closure *t = NULL;
     CILK_ASSERT(w, w == __cilkrts_get_tls_worker());
@@ -1647,11 +1503,12 @@ void worker_scheduler(__cilkrts_worker *w) {
     worker_change_state(w, WORKER_SCHED);
     global_state *rts = w->g;
     worker_id self = w->self;
+    const bool is_boss = is_boss_thread;
 
     // Get this worker's local_state pointer, to avoid rereading it
     // unnecessarily during the work-stealing loop.  This optimization helps
     // reduce sharing on the worker structure.
-    local_state *l = w->l;
+    unsigned int rand_state = w->l->rand_next;
 
     // Get the number of workers.  We don't currently support changing the
     // number of workers dynamically during execution of a Cilkified region.
@@ -1659,8 +1516,20 @@ void worker_scheduler(__cilkrts_worker *w) {
     // Initialize count of consecutive failed steal attempts.  Effectively,
     // every worker is active upon entering this routine.
     unsigned int fails = 0;
+    unsigned int request_threshold = SENTINEL_THRESHOLD;
+    // Local history information of the state of the system, for sentinel
+    // workers to use to determine when to disengage and how many workers to
+    // reengage.
+    history_t inefficient_history = 0;
+    history_t efficient_history = 0;
+    unsigned int sentinel_count_history[SENTINEL_COUNT_HISTORY] = { 1 };
+    unsigned int sentinel_count_history_tail = 0;
+    unsigned int recent_sentinel_count = SENTINEL_COUNT_HISTORY;
+
     // Get pointers to the local and global copies of the index-to-worker map.
-    worker_id *local_index_to_worker = l->index_to_worker;
+    worker_id *index_to_worker = rts->index_to_worker;
+    __cilkrts_worker **workers = rts->workers;
+    ReadyDeque *deques = rts->deques;
 
     while (!atomic_load_explicit(&rts->done, memory_order_acquire)) {
         /* A worker entering the steal loop must have saved its reducer map into
@@ -1672,39 +1541,44 @@ void worker_scheduler(__cilkrts_worker *w) {
         while (!t && !atomic_load_explicit(&rts->done, memory_order_acquire)) {
             CILK_START_TIMING(w, INTERVAL_SCHED);
             CILK_START_TIMING(w, INTERVAL_IDLE);
+#if ENABLE_THIEF_SLEEP
             // Get the set of workers we can steal from and a local copy of the
             // index-to-worker map.  We'll attempt a few steals using these
             // local copies to minimize memory traffic.
-            uint64_t disengaged_deprived = atomic_load_explicit(
-                &rts->disengaged_deprived, memory_order_relaxed);
-            uint32_t disengaged = (uint32_t)(disengaged_deprived >> 32);
+            uint64_t disengaged_sentinel = atomic_load_explicit(
+                &rts->disengaged_sentinel, memory_order_relaxed);
+            uint32_t disengaged = GET_DISENGAGED(disengaged_sentinel);
             uint32_t stealable = nworkers - disengaged;
-            // TODO: Technically, ATTEMPTS should scale with the number of
-            // workers, to amortize the memcpy, which takes O(P)-time.  However,
-            // in testing, ATTEMPTS = 4 works well even on large worker counts
-            // (e.g., 96) in a NUMA environment.  I suspect the total cost of
-            // the memcpy is too small for such worker counts to worry about.
-            memcpy(local_index_to_worker, rts->index_to_worker,
-                   sizeof(worker_id) * stealable);
+
+            if (__builtin_expect(stealable == 1, false))
+                // If this worker detects only 1 stealable worker, then its the
+                // only worker in the work-stealing loop.
+                continue;
+
+#else // ENABLE_THIEF_SLEEP
+            uint32_t stealable = nworkers;
+#endif // ENABLE_THIEF_SLEEP
 
             int attempt = ATTEMPTS;
             do {
                 // Choose a random victim not equal to self.
                 worker_id victim =
-                    local_index_to_worker[rts_rand(l) % stealable];
+                        index_to_worker[get_rand(rand_state) % stealable];
+                rand_state = update_rand_state(rand_state);
                 while (victim == self) {
-                    victim = local_index_to_worker[rts_rand(l) % stealable];
+                    busy_loop_pause();
+                    victim = index_to_worker[get_rand(rand_state) % stealable];
+                    rand_state = update_rand_state(rand_state);
                 }
                 // Attempt to steal from that victim.
-                t = Closure_steal(w, victim);
+                t = Closure_steal(workers, deques, w, victim);
                 if (!t) {
-                    // Pause inside this busy loop.
-#ifdef __SSE__
-                    __builtin_ia32_pause();
-#endif
-#ifdef __aarch64__
-                    __builtin_arm_yield();
-#endif
+                    // Pause inside this busy loop.  We perform many pause
+                    // instructions in order to limit how much memory bandwidth
+                    // the theif consumes.
+                    for (int i = 0; i < STEAL_BUSY_PAUSE; ++i) {
+                        busy_loop_pause();
+                    }
                 }
             } while (!t && --attempt > 0);
 
@@ -1718,37 +1592,80 @@ void worker_scheduler(__cilkrts_worker *w) {
                 CILK_DROP_TIMING(w, INTERVAL_SCHED);
             }
 #endif
-            fails = go_to_sleep_maybe(rts, self, nworkers, w, t, fails);
+            fails = go_to_sleep_maybe(
+                rts, self, nworkers, w, t, fails, &request_threshold,
+                &inefficient_history, &efficient_history,
+                sentinel_count_history, &sentinel_count_history_tail,
+                &recent_sentinel_count);
         }
         CILK_START_TIMING(w, INTERVAL_SCHED);
         // If one Cilkified region stops and another one starts, then a worker
         // can reach this point with t == NULL and w->g->done == false.  Check
         // that t is not NULL before calling do_what_it_says.
         if (t) {
-            // if provably-good steal happens, do_what_it_says will return
-            // the next closure to execute
-            do_what_it_says(w, t);
+#if ENABLE_THIEF_SLEEP
+            const unsigned int MIN_FAILS = 2 * ATTEMPTS;
+            uint64_t start, end;
+            // Executing do_what_it_says involves some minimum amount of work,
+            // which can be used to amortize the cost of some failed steal
+            // attempts.  Therefore, avoid measuring the elapsed cycles if we
+            // haven't failed many steal attempts.
+            if (fails > MIN_FAILS) {
+                start = gettime_fast();
+            }
+#endif // ENABLE_THIEF_SLEEP
+            do_what_it_says(deques, w, t);
+#if ENABLE_THIEF_SLEEP
+            if (fails > MIN_FAILS) {
+                end = gettime_fast();
+                uint64_t elapsed = end - start;
+                // Decrement the count of failed steal attempts based on the
+                // amount of work done.
+                fails = decrease_fails_by_work(rts, w, fails, elapsed,
+                                               &request_threshold);
+                if (fails < SENTINEL_THRESHOLD) {
+                    inefficient_history = 0;
+                    efficient_history = 0;
+                }
+            } else {
+                fails = 0;
+                request_threshold = SENTINEL_THRESHOLD;
+            }
+#endif // ENABLE_THIEF_SLEEP
             t = NULL;
+        } else if (!is_boss &&
+                   atomic_load_explicit(&rts->done, memory_order_acquire)) {
+            // If it appears the computation is done, busy-wait for a while
+            // before exiting the work-stealing loop, in case another cilkified
+            // region is started soon.
+            unsigned int busy_fail = 0;
+            while (busy_fail++ < 2 * BUSY_LOOP_SPIN &&
+                   atomic_load_explicit(&rts->done, memory_order_acquire)) {
+                busy_loop_pause();
+            }
+            if (thief_should_wait(rts)) {
+                break;
+            }
         }
     }
 
-    if (fails >= DEPRIVED_THRESHOLD) {
-        // If this worker was deprived, decrement the number of deprived
-        // workers, essentially making this worker active.
-        atomic_fetch_sub_explicit(&rts->disengaged_deprived, 1,
-                                  memory_order_release);
-    }
+    // Reset the fail count.
+    reset_fails(rts, fails);
+    w->l->rand_next = rand_state;
+
     CILK_STOP_TIMING(w, INTERVAL_SCHED);
     worker_change_state(w, WORKER_IDLE);
 #if BOSS_THIEF
-    if (is_boss_thread) {
+    if (is_boss) {
         __builtin_longjmp(w->g->boss_ctx, 1);
     }
 #endif
 }
 
 void *scheduler_thread_proc(void *arg) {
-    __cilkrts_worker *w = (__cilkrts_worker *)arg;
+    struct worker_args *w_arg = (struct worker_args *)arg;
+    __cilkrts_worker *w = __cilkrts_init_tls_worker(w_arg->id, w_arg->g);
+
     cilkrts_alert(BOOT, w, "scheduler_thread_proc");
     __cilkrts_set_tls_worker(w);
 
@@ -1762,6 +1679,7 @@ void *scheduler_thread_proc(void *arg) {
     // Avoid redundant lookups of these commonly accessed worker fields.
     const worker_id self = w->self;
     global_state *rts = w->g;
+    const unsigned int nworkers = rts->nworkers;
 
     // Initialize worker's random-number generator.
     rts_srand(w, (self + 1) * 162347);
@@ -1776,7 +1694,11 @@ void *scheduler_thread_proc(void *arg) {
             root_worker_wait(rts, self);
         } else {
 #endif
-            thief_wait(rts);
+            if (thief_should_wait(rts)) {
+                disengage_worker(rts, nworkers, self);
+                thief_wait(rts);
+                reengage_worker(rts, nworkers, self);
+            }
 #if !BOSS_THIEF
         }
 #endif
@@ -1811,15 +1733,10 @@ void *scheduler_thread_proc(void *arg) {
             signal_uncilkified(rts);
 #if BOSS_THIEF
             unsigned int fail = 0;
-            while (fail++ < 2048 &&
+            while (fail++ < BUSY_LOOP_SPIN &&
                    !atomic_load_explicit(&rts->disengaged_thieves_futex,
                                          memory_order_acquire)) {
-#ifdef __SSE__
-                __builtin_ia32_pause();
-#endif
-#ifdef __aarch64__
-                __builtin_arm_yield();
-#endif
+                busy_loop_pause();
             }
 #endif // BOSS_THIEF
         } else {
@@ -1827,15 +1744,10 @@ void *scheduler_thread_proc(void *arg) {
             // Busy-wait for a while to amortize the cost of syscalls to put
             // thief threads to sleep.
             unsigned int fail = 0;
-            while (fail++ < 2048 &&
+            while (fail++ < BUSY_LOOP_SPIN &&
                    !atomic_load_explicit(&rts->disengaged_thieves_futex,
                                          memory_order_acquire)) {
-#ifdef __SSE__
-                __builtin_ia32_pause();
-#endif
-#ifdef __aarch64__
-                __builtin_arm_yield();
-#endif
+                busy_loop_pause();
             }
         }
     } while (true);
diff --git a/runtime/worker.h b/runtime/worker.h
new file mode 100644
index 00000000..cf1f3023
--- /dev/null
+++ b/runtime/worker.h
@@ -0,0 +1,55 @@
+#ifndef _CILK_WORKER_H
+#define _CILK_WORKER_H
+
+#include "rts-config.h"
+
+struct __cilkrts_stack_frame;
+struct local_state;
+struct global_state;
+
+enum __cilkrts_worker_state {
+    WORKER_IDLE = 10,
+    WORKER_SCHED,
+    WORKER_STEAL,
+    WORKER_RUN
+};
+
+struct __cilkrts_worker {
+    // T, H, and E pointers in the THE protocol.
+    // T and E are frequently accessed and should be in a hot cache line.
+    // H could be moved elsewhere because it is only touched when stealing.
+    _Atomic(struct __cilkrts_stack_frame **) head;
+    _Atomic(struct __cilkrts_stack_frame **) tail;
+    _Atomic(struct __cilkrts_stack_frame **) exc;
+
+    // Worker id, a small integer
+    worker_id self;
+
+    // 4 byte hole on 64 bit systems
+
+    // A slot that points to the currently executing Cilk frame.
+    struct __cilkrts_stack_frame *current_stack_frame;
+
+    // Map from reducer names to reducer values
+    cilkred_map *reducer_map;
+
+    // Global state of the runtime system, opaque to the client.
+    struct global_state *g;
+
+    // Additional per-worker state hidden from the client.
+    struct local_state *l;
+
+    // Cache line boundary on 64 bit systems with 64 byte cache lines
+
+    // Optional state, only maintained if __cilkrts_use_extension == true.
+    void *extension;
+    void *ext_stack;
+
+    // Limit of the Lazy Task Queue, to detect queue overflow (debug only)
+    struct __cilkrts_stack_frame **ltq_limit;
+
+} __attribute__((aligned(1024))); // This alignment reduces false sharing
+                                  // induced by hardware prefetchers on some
+                                  // systems, such as Intel CPUs.
+
+#endif /* _CILK_WORKER_H */
diff --git a/runtime/worker_coord.h b/runtime/worker_coord.h
index 1421d0b8..ebde70e2 100644
--- a/runtime/worker_coord.h
+++ b/runtime/worker_coord.h
@@ -134,13 +134,22 @@ static inline void worker_clear_start(volatile atomic_bool *start) {
 // Common internal interface for managing execution of workers.
 //=========================================================
 
+__attribute__((always_inline)) static void busy_loop_pause() {
+#ifdef __SSE__
+    __builtin_ia32_pause();
+#endif
+#ifdef __aarch64__
+    __builtin_arm_yield();
+#endif
+}
+
 // Called by a root-worker thread, that is, the worker w where w->self ==
 // g->exiting_worker.  Causes the root-worker thread to wait for a signal to
 // start work-stealing.
 static inline void root_worker_wait(global_state *g, const uint32_t id) {
     _Atomic uint32_t *root_worker_p = &g->start_root_worker;
 /*     unsigned int fail = 0; */
-/*     while (fail++ < 2048) { */
+/*     while (fail++ < BUSY_LOOP_SPIN) { */
 /*         if (id != atomic_load_explicit(root_worker_p, memory_order_acquire)) { */
 /*             return; */
 /*         } */
@@ -244,16 +253,11 @@ static inline void signal_uncilkified(global_state *g) {
 // region.
 static inline void wait_while_cilkified(global_state *g) {
     unsigned int fail = 0;
-    while (fail++ < 2048) {
+    while (fail++ < BUSY_LOOP_SPIN) {
         if (!atomic_load_explicit(&g->cilkified, memory_order_acquire)) {
             return;
         }
-#ifdef __SSE__
-        __builtin_ia32_pause();
-#endif
-#ifdef __aarch64__
-        __builtin_arm_yield();
-#endif
+        busy_loop_pause();
     }
 #if USE_FUTEX
     while (atomic_load_explicit(&g->cilkified, memory_order_acquire)) {
@@ -296,6 +300,11 @@ static inline void reset_disengaged_var(global_state *g) {
 static inline void request_more_thieves(global_state *g, uint32_t count) {
     CILK_ASSERT_G(count > 0);
 
+    // Don't allow this routine increment the futex beyond half the number of
+    // workers on the system.  This bounds how many successful steals can
+    // possibly keep thieves engaged unnecessarily in the future, when there may
+    // not be as much parallelism.
+    int32_t max_requests = (int32_t)(g->nworkers / 2);
 #if USE_FUTEX
     // This step synchronizes with concurrent calls to request_more_thieves and
     // concurrent calls to try_to_disengage_thief.
@@ -303,12 +312,7 @@ static inline void request_more_thieves(global_state *g, uint32_t count) {
         uint32_t disengaged_thieves_futex = atomic_load_explicit(
             &g->disengaged_thieves_futex, memory_order_acquire);
 
-        // Don't allow this routine increment the futex beyond half the number
-        // of workers on the system.  This bounds how many successful steals can
-        // possibly keep thieves engaged unnecessarily in the future, when there
-        // may not be as much parallelism.
-        int32_t max_to_wake =
-            (int32_t)(g->nworkers / 2) - disengaged_thieves_futex;
+        int32_t max_to_wake = max_requests - disengaged_thieves_futex;
         if (max_to_wake <= 0)
             return;
         uint64_t to_wake = max_to_wake < (int32_t)count ? max_to_wake : count;
@@ -331,11 +335,7 @@ static inline void request_more_thieves(global_state *g, uint32_t count) {
     uint32_t disengaged_thieves_futex = atomic_load_explicit(
         &g->disengaged_thieves_futex, memory_order_acquire);
 
-    // Don't allow this routine increment the futex beyond half the number
-    // of workers on the system.  This bounds how many successful steals can
-    // possibly keep thieves engaged unnecessarily in the future, when there
-    // may not be as much parallelism.
-    int32_t max_to_wake = (int32_t)(g->nworkers / 2) - disengaged_thieves_futex;
+    int32_t max_to_wake = max_requests - disengaged_thieves_futex;
     if (max_to_wake <= 0) {
         pthread_mutex_unlock(&g->disengaged_lock);
         return;
@@ -359,7 +359,7 @@ static inline void thief_disengage_futex(_Atomic uint32_t *futexp) {
         // designed to handle cases where multiple threads waiting on the futex
         // were woken up and where there may be spurious wakeups.
         uint32_t val;
-        while ((val = atomic_load_explicit(futexp, memory_order_relaxed)) > 0) {
+        while ((val = atomic_load_explicit(futexp, memory_order_acquire)) > 0) {
             if (atomic_compare_exchange_strong_explicit(futexp, &val, val - 1,
                                                         memory_order_release,
                                                         memory_order_acquire)) {
@@ -429,6 +429,39 @@ static inline void thief_wait(global_state *g) {
     thief_disengage(g);
 }
 
+// Called by a thief thread.  Check if the thief should start waiting for the
+// start of a cilkified region.  If a new cilkified region has been started
+// already, update the global state to indicate that this worker is engaged in
+// work stealing.
+static inline bool thief_should_wait(global_state *g) {
+    _Atomic uint32_t *futexp = &g->disengaged_thieves_futex;
+    uint32_t val = atomic_load_explicit(futexp, memory_order_acquire);
+#if USE_FUTEX
+    while (val > 0) {
+        if (atomic_compare_exchange_strong_explicit(futexp, &val, val - 1,
+                                                    memory_order_release,
+                                                    memory_order_acquire))
+            return false;
+        val = atomic_load_explicit(futexp, memory_order_acquire);
+    }
+    return true;
+#else
+    if (val == 0)
+        return true;
+
+    pthread_mutex_t *lock = &g->disengaged_lock;
+    pthread_mutex_lock(lock);
+    val = atomic_load_explicit(futexp, memory_order_relaxed);
+    if (val > 0) {
+        atomic_store_explicit(futexp, val - 1, memory_order_release);
+        pthread_mutex_unlock(lock);
+        return false;
+    }
+    pthread_mutex_unlock(lock);
+    return true;
+#endif
+}
+
 // Signal the thief threads to start work-stealing (or terminate, if
 // g->terminate == 1).
 static inline void wake_thieves(global_state *g) {
diff --git a/runtime/worker_sleep.h b/runtime/worker_sleep.h
new file mode 100644
index 00000000..236c2774
--- /dev/null
+++ b/runtime/worker_sleep.h
@@ -0,0 +1,592 @@
+#ifndef _WORKER_SLEEP_H
+#define _WORKER_SLEEP_H
+
+#include "cilk-internal.h"
+#include "worker_coord.h"
+
+#if defined(__APPLE__) && defined(__aarch64__)
+#define APPLE_ARM64
+#endif
+
+#ifdef APPLE_ARM64
+#include <mach/mach_time.h>
+#endif // APPLE_ARM64
+
+// Nanoseconds that a sentinel worker should sleep if it reaches the disengage
+// threshold but does not disengage.
+/* #define SLEEP_NSEC 12500 */
+#define NAP_NSEC 25000
+/* #define SLEEP_NSEC 50000 */
+#define SLEEP_NSEC 4 * NAP_NSEC
+
+// Ratio of active workers over sentinels that the system aims to maintain.
+#define AS_RATIO 2
+
+// Threshold for number of consective failed steal attempts to declare a
+// thief as sentinel.  Must be a power of 2.
+#define SENTINEL_THRESHOLD 256
+
+// Number of attempted steals the thief should do each time it copies the
+// worker state.  ATTEMPTS must divide SENTINEL_THRESHOLD.
+#define ATTEMPTS 8
+
+// Information for histories of efficient and inefficient worker-count samples
+// and for sentinel counts.
+typedef uint32_t history_t;
+#define HISTORY_LENGTH 32
+#define SENTINEL_COUNT_HISTORY 8
+
+// Amount of history that must be efficient/inefficient to reengage/disengage
+// workers.
+#define HISTORY_THRESHOLD HISTORY_LENGTH / 2
+
+// Threshold for number of consecutive failed steal attempts to try disengaging
+// this worker.  Must be a multiple of SENTINEL_THRESHOLD and a power of 2.
+#define DISENGAGE_THRESHOLD HISTORY_THRESHOLD * SENTINEL_THRESHOLD
+
+// Number of pauses to perform per steal attempt, to ensure failed steal
+// attempts don't take too much memory bandwidth away from the workers doing
+// work.
+#define STEAL_BUSY_PAUSE 16
+
+static inline __attribute__((always_inline)) uint64_t gettime_fast(void) {
+#ifdef APPLE_ARM64
+    // __builtin_readcyclecounter triggers "illegal instruction" runtime errors
+    // on Apple M1s.
+    return clock_gettime_nsec_np(CLOCK_MONOTONIC_RAW);
+#else
+    return __builtin_readcyclecounter();
+#endif // #if APPLE_ARM64
+}
+
+typedef struct worker_counts {
+    int32_t active;
+    int32_t sentinels;
+    int32_t disengaged;
+} worker_counts;
+
+// Update the index-to-worker map to swap self with the worker at the target
+// index.
+static void swap_worker_with_target(global_state *g, worker_id self,
+                                    worker_id target_index) {
+    worker_id *worker_to_index = g->worker_to_index;
+    worker_id *index_to_worker = g->index_to_worker;
+
+    worker_id self_index = worker_to_index[self];
+    worker_id target_worker = index_to_worker[target_index];
+
+    // Update the index-to-worker map.
+    index_to_worker[self_index] = target_worker;
+    index_to_worker[target_index] = self;
+
+    // Update the worker-to-index map.
+    worker_to_index[target_worker] = self_index;
+    worker_to_index[self] = target_index;
+}
+
+// Called by a thief thread.  Causes the thief thread to try to sleep, that is,
+// to wait for a signal to resume work-stealing.
+static bool try_to_disengage_thief(global_state *g, worker_id self,
+                                   uint64_t disengaged_sentinel) {
+    // Try to grab the lock on the index structure.
+    if (!cilk_mutex_try(&g->index_lock)) {
+        return false;
+    }
+
+    // Increment the number of disengaged thieves and decrement number of
+    // sentinels.
+    uint32_t disengaged = GET_DISENGAGED(disengaged_sentinel);
+    uint32_t sentinel = GET_SENTINEL(disengaged_sentinel);
+    uint64_t new_disengaged_sentinel =
+        DISENGAGED_SENTINEL(disengaged + 1, sentinel - 1);
+
+    unsigned int nworkers = g->nworkers;
+    worker_id *worker_to_index = g->worker_to_index;
+
+    // Try to update the number of disengaged workers.  This step synchronizes
+    // with parallel calls to reengage thieves, calls to reengage thieves, and
+    // updates to the number of sentinel workers.
+    // First atomically update the number of disengaged workers.
+    if (atomic_compare_exchange_strong_explicit(
+            &g->disengaged_sentinel, &disengaged_sentinel,
+            new_disengaged_sentinel, memory_order_release,
+            memory_order_acquire)) {
+        // Update the index-to-worker map.
+        worker_id last_index = nworkers - (new_disengaged_sentinel >> 32);
+        if (worker_to_index[self] < last_index) {
+            swap_worker_with_target(g, self, last_index);
+        }
+        // Release the lock on the index structure
+        cilk_mutex_unlock(&g->index_lock);
+
+        // Disengage this thread.
+        thief_disengage(g);
+
+        // The thread is now reengaged.  Grab the lock on the index structure.
+        cilk_mutex_lock(&g->index_lock);
+
+        // Decrement the number of disengaged workers.
+        while (true) {
+            // Atomically decrement the number of disengaged workers.
+            uint64_t disengaged_sentinel = atomic_load_explicit(
+                &g->disengaged_sentinel, memory_order_relaxed);
+            uint32_t disengaged = GET_DISENGAGED(disengaged_sentinel);
+            uint32_t sentinel = GET_SENTINEL(disengaged_sentinel);
+            new_disengaged_sentinel =
+                DISENGAGED_SENTINEL(disengaged - 1, sentinel + 1);
+            if (atomic_compare_exchange_strong_explicit(
+                    &g->disengaged_sentinel, &disengaged_sentinel,
+                    new_disengaged_sentinel, memory_order_release,
+                    memory_order_acquire)) {
+                // Update the index structure.
+                last_index = nworkers - GET_DISENGAGED(disengaged_sentinel);
+                if (worker_to_index[self] > last_index) {
+                    swap_worker_with_target(g, self, last_index);
+                }
+
+                // Release the lock on the index structure.
+                cilk_mutex_unlock(&g->index_lock);
+                return true;
+            }
+            busy_loop_pause();
+        }
+    } else {
+        // Release the lock on the index structure.
+        cilk_mutex_unlock(&g->index_lock);
+        return false;
+    }
+}
+
+// Helper function to parse the given value of disengaged_sentinel to determine
+// the number of active, sentinel, and disengaged workers.
+__attribute__((const, always_inline)) static inline worker_counts
+get_worker_counts(__cilkrts_worker *const w, uint64_t disengaged_sentinel,
+                  unsigned int nworkers) {
+    uint32_t disengaged = GET_DISENGAGED(disengaged_sentinel);
+    uint32_t sentinel = GET_SENTINEL(disengaged_sentinel);
+    CILK_ASSERT(w, disengaged < nworkers);
+    CILK_ASSERT(w, sentinel <= nworkers);
+    int32_t active =
+        (int32_t)nworkers - (int32_t)disengaged - (int32_t)sentinel;
+
+    worker_counts counts = {
+        .active = active, .sentinels = sentinel, .disengaged = disengaged};
+    return counts;
+}
+
+// Check if the given worker counts are inefficient, i.e., if active <
+// sentinels.
+__attribute__((const, always_inline)) static inline history_t
+is_inefficient(worker_counts counts) {
+    return counts.sentinels > 1 && counts.active >= 1 &&
+           counts.active * AS_RATIO < counts.sentinels * 1;
+}
+
+// Check if the given worker counts are efficient, i.e., if active >= 2 *
+// sentinels.
+__attribute__((const, always_inline)) static inline history_t
+is_efficient(worker_counts counts) {
+    return (counts.active * 1 >= counts.sentinels * AS_RATIO) ||
+           (counts.sentinels <= 1);
+}
+
+// Convert the elapsed time spent working into a fail count.
+__attribute__((const, always_inline)) static inline unsigned int
+get_scaled_elapsed(unsigned int elapsed) {
+#ifdef APPLE_ARM64
+    return ((elapsed * (1 * SENTINEL_THRESHOLD) / (16 * 65536)) / ATTEMPTS) *
+           ATTEMPTS;
+#else
+    return ((elapsed * (1 * SENTINEL_THRESHOLD) / (2 * 65536)) / ATTEMPTS) *
+           ATTEMPTS;
+#endif // APPLE_ARM64
+}
+
+// If steal attempts found work, update histories as appropriate and possibly
+// reengage workers.
+__attribute__((always_inline)) static inline unsigned int
+maybe_reengage_workers(global_state *const rts, worker_id self,
+                       unsigned int nworkers, __cilkrts_worker *const w,
+                       unsigned int fails,
+                       unsigned int *const request_threshold,
+                       history_t *const inefficient_history,
+                       history_t *const efficient_history,
+                       unsigned int *const sentinel_count_history,
+                       unsigned int *const sentinel_count_history_tail,
+                       unsigned int *const recent_sentinel_count) {
+    if (fails >= SENTINEL_THRESHOLD) {
+        // This thief is no longer a sentinel.  Decrement the number of
+        // sentinels.
+        uint64_t disengaged_sentinel = atomic_fetch_sub_explicit(
+            &rts->disengaged_sentinel, 1, memory_order_release);
+#if !ENABLE_THIEF_SLEEP
+        return 0;
+#endif
+        // Get the current worker counts, with this sentinel now active.
+        worker_counts counts =
+            get_worker_counts(w, disengaged_sentinel - 1, nworkers);
+        CILK_ASSERT(w, counts.active >= 1);
+
+        history_t my_efficient_history = *efficient_history;
+        history_t my_inefficient_history = *inefficient_history;
+        unsigned int my_sentinel_count = *recent_sentinel_count;
+        if (fails >= *request_threshold) {
+            // Update the inefficient history.
+            history_t curr_ineff = is_inefficient(counts);
+            my_inefficient_history = (my_inefficient_history >> 1) |
+                                     (curr_ineff << (HISTORY_LENGTH - 1));
+
+            // Update the efficient history.
+            history_t curr_eff = is_efficient(counts);
+            my_efficient_history = (my_efficient_history >> 1) |
+                                   (curr_eff << (HISTORY_LENGTH - 1));
+
+            // Update the sentinel count.
+            unsigned int current_sentinel_count = counts.sentinels + 1;
+            unsigned int tail = *sentinel_count_history_tail;
+            my_sentinel_count = my_sentinel_count -
+                                sentinel_count_history[tail] +
+                                current_sentinel_count;
+            *recent_sentinel_count = my_sentinel_count;
+            sentinel_count_history[tail] = current_sentinel_count;
+            *sentinel_count_history_tail = (tail + 1) % SENTINEL_COUNT_HISTORY;
+        }
+
+        // Request to reengage some thieves, depending on whether there are
+        // too many active workers compared to sentinel workers.
+
+        // Compute a number of additional workers to request, based on the
+        // efficiency history divided by the average recent sentinel count.
+        //
+        // Dividing by the average recent sentinel count is intended to
+        // handle the case where sentinels request more workers in parallel,
+        // based on the same independently collected history.
+        int32_t request;
+        int32_t eff_steps = __builtin_popcount(my_efficient_history);
+        int32_t ineff_steps = __builtin_popcount(my_inefficient_history);
+        int32_t eff_diff = eff_steps - ineff_steps;
+        if (eff_diff < HISTORY_THRESHOLD) {
+            request = 0;
+            *efficient_history = my_efficient_history;
+            *inefficient_history = my_inefficient_history;
+        } else {
+            unsigned int avg_sentinels =
+                my_sentinel_count / SENTINEL_COUNT_HISTORY;
+            request = eff_diff / avg_sentinels;
+            int32_t remainder = eff_diff % avg_sentinels;
+            if (remainder)
+                request += (self % remainder != 0);
+            // Charge the request for more workers against the efficiency
+            // history by resetting that history.
+            *efficient_history = 0;
+            *inefficient_history = 0;
+        }
+        WHEN_SCHED_STATS(w->l->stats.reeng_rqsts += request);
+
+        // Make sure at least 1 worker is requested if we're about to run
+        // out of sentinels.
+        if (request == 0 && counts.sentinels == 0 &&
+            counts.active < (int32_t)nworkers &&
+            !atomic_load_explicit(&rts->disengaged_thieves_futex,
+                                  memory_order_relaxed)) {
+            request = (counts.active + 3) / 4;
+            WHEN_SCHED_STATS(w->l->stats.onesen_rqsts += request);
+        }
+
+        if (request > 0) {
+            request_more_thieves(rts, request);
+        }
+
+        // Set a cap on the fail count.
+        if (fails > DISENGAGE_THRESHOLD)
+            fails = DISENGAGE_THRESHOLD;
+
+        // Update request threshold so that, in case this worker ends up
+        // executing a small task, it still adds samples to its history that
+        // are spread out in time.
+        *request_threshold = fails + (SENTINEL_THRESHOLD / 1);
+    }
+
+    return fails;
+}
+
+// Attempt to disengage this thief thread.  The __cilkrts_worker parameter is only
+// used for debugging.
+static bool maybe_disengage_thief(global_state *g, worker_id self,
+                                  unsigned int nworkers,
+                                  __cilkrts_worker *const w) {
+#if !ENABLE_THIEF_SLEEP
+    return false;
+#endif // !ENABLE_THIEF_SLEEP
+    // Check the number of active and sentinel workers, and disengage this
+    // worker if there are too many sentinel workers.
+    while (true) {
+        // Check if this sentinel thread should sleep.
+        uint64_t disengaged_sentinel =
+            atomic_load_explicit(&g->disengaged_sentinel, memory_order_acquire);
+
+        worker_counts counts = get_worker_counts(w, disengaged_sentinel, nworkers);
+
+        // Make sure that we don't inadvertently disengage the last sentinel.
+        if (is_inefficient(counts)) {
+            // Too many sentinels.  Try to disengage this worker.  If it fails,
+            // repeat the loop.
+            if (try_to_disengage_thief(g, self, disengaged_sentinel)) {
+                // The thief was successfully disengaged. It has since been
+                // reengaged.
+                return true;
+            }
+        } else {
+            break;
+        }
+        busy_loop_pause();
+    }
+    return false;
+}
+
+// If steal attempts did not find work, update histories as appropriate and
+// possibly disengage this worker.
+__attribute__((always_inline)) static inline unsigned int
+handle_failed_steal_attempts(global_state *const rts, worker_id self,
+                             unsigned int nworkers, __cilkrts_worker *const w,
+                             unsigned int fails,
+                             unsigned int *const request_threshold,
+                             history_t *const inefficient_history,
+                             history_t *const efficient_history,
+                             unsigned int *const sentinel_count_history,
+                             unsigned int *const sentinel_count_history_tail,
+                             unsigned int *const recent_sentinel_count) {
+    // Threshold for number of failed steal attempts to put this thief to sleep
+    // for an extended amount of time.  Must be at least SENTINEL_THRESHOLD and
+    // a power of 2.
+    const unsigned int NAP_THRESHOLD = 8 * SENTINEL_THRESHOLD;
+    const unsigned int SLEEP_THRESHOLD = 16 * NAP_THRESHOLD;
+
+    CILK_START_TIMING(w, INTERVAL_SLEEP);
+    fails += ATTEMPTS;
+
+    // Every SENTINEL_THRESHOLD consecutive failed steal attempts, update the
+    // set of sentinel workers, and maybe disengage this worker if there are too
+    // many sentinel workers.
+    if (fails % SENTINEL_THRESHOLD == 0) {
+        if (fails > (1 << 30)) {
+            // Prevent the fail count from exceeding this maximum, so we don't
+            // have to worry about the fail count overflowing.
+            fails = (1 << 30);
+            const struct timespec sleeptime = {.tv_sec = 0, .tv_nsec = SLEEP_NSEC};
+            nanosleep(&sleeptime, NULL);
+        } else {
+            if (SENTINEL_THRESHOLD == fails)
+                atomic_fetch_add_explicit(&rts->disengaged_sentinel, 1,
+                                          memory_order_release);
+#if BOSS_THIEF
+            if (is_boss_thread) {
+                if (fails % NAP_THRESHOLD == 0) {
+                    // The boss thread should never disengage.  Sleep
+                    // instead.
+                    const struct timespec sleeptime = {
+                        .tv_sec = 0,
+                        .tv_nsec =
+                            (fails > SLEEP_THRESHOLD) ? SLEEP_NSEC : NAP_NSEC};
+                    nanosleep(&sleeptime, NULL);
+                }
+            } else {
+#else
+            {
+#endif
+                // Check if the current worker counts.
+                uint64_t disengaged_sentinel = atomic_load_explicit(
+                    &rts->disengaged_sentinel, memory_order_acquire);
+                worker_counts counts =
+                    get_worker_counts(w, disengaged_sentinel, nworkers);
+
+                // Update the efficient history.
+                history_t curr_eff = is_efficient(counts);
+                history_t my_efficient_history = *efficient_history;
+                my_efficient_history = (my_efficient_history >> 1) |
+                                       (curr_eff << (HISTORY_LENGTH - 1));
+                int32_t eff_steps = __builtin_popcount(my_efficient_history);
+                *efficient_history = my_efficient_history;
+
+                // Update the sentinel count.
+                unsigned int current_sentinel_count = counts.sentinels;
+                unsigned int tail = *sentinel_count_history_tail;
+                *recent_sentinel_count = *recent_sentinel_count -
+                                         sentinel_count_history[tail] +
+                                         current_sentinel_count;
+                sentinel_count_history[tail] = current_sentinel_count;
+                *sentinel_count_history_tail =
+                    (tail + 1) % SENTINEL_COUNT_HISTORY;
+
+                // Update the inefficient history.
+                history_t curr_ineff = is_inefficient(counts);
+                history_t my_inefficient_history = *inefficient_history;
+                my_inefficient_history = (my_inefficient_history >> 1) |
+                                         (curr_ineff << (HISTORY_LENGTH - 1));
+                int32_t ineff_steps =
+                    __builtin_popcount(my_inefficient_history);
+                *inefficient_history = my_inefficient_history;
+
+                if (ENABLE_THIEF_SLEEP && curr_ineff &&
+                    (ineff_steps - eff_steps) > HISTORY_THRESHOLD) {
+                    uint64_t start, end;
+		    start = gettime_fast();
+                    if (maybe_disengage_thief(rts, self, nworkers, w)) {
+                        // The semaphore for reserving workers may have been
+                        // non-zero due to past successful steals, rather than a
+                        // recent successful steal.  Decrement fails so we try
+                        // to disengage this again sooner, in case there is
+                        // still nothing to steal.
+                        end = gettime_fast();
+                        unsigned int scaled_elapsed =
+                            get_scaled_elapsed(end - start);
+
+                        // Update histories
+                        if (scaled_elapsed > SENTINEL_THRESHOLD) {
+                            uint32_t samples =
+                                scaled_elapsed / SENTINEL_THRESHOLD;
+                            if (samples >= HISTORY_LENGTH) {
+                                *efficient_history = 0;
+                                *inefficient_history = 0;
+                            } else {
+                                *efficient_history >>= samples;
+                                *inefficient_history >>= samples;
+                            }
+                        }
+
+                        // Update fail count
+                        if (scaled_elapsed < SENTINEL_THRESHOLD)
+                            fails -= scaled_elapsed;
+                        else {
+                            fails = DISENGAGE_THRESHOLD - SENTINEL_THRESHOLD;
+                        }
+                        *request_threshold = SENTINEL_THRESHOLD;
+                    }
+                } else if (fails % NAP_THRESHOLD == 0) {
+                    // We have enough active workers to keep this worker out of
+                    // disengage, but this worker was still unable to steal
+                    // work.  Put this thief to sleep for a while using the
+                    // conventional way. In testing, a nanosleep(0) takes
+                    // approximately 50 us.
+                    const struct timespec sleeptime = {
+                        .tv_sec = 0,
+                        .tv_nsec = 
+                            (fails > SLEEP_THRESHOLD) ? SLEEP_NSEC : NAP_NSEC};
+                    nanosleep(&sleeptime, NULL);
+                } else {
+                    // We perform many pause instructions to reduce the thief's
+                    // load on the system in a lightweight manner.
+                    for (int i = 0; i < 8 * ATTEMPTS; ++i) {
+                        busy_loop_pause();
+                    }
+                }
+            }
+        }
+    } else {
+        // We perform many pause instructions to reduce the thief's load on
+        // the system in a lightweight manner.
+        for (int i = 0; i < 32 * ATTEMPTS; ++i) {
+            busy_loop_pause();
+        }
+    }
+    CILK_STOP_TIMING(w, INTERVAL_SLEEP);
+    return fails;
+}
+
+__attribute__((always_inline))
+static unsigned int go_to_sleep_maybe(global_state *const rts, worker_id self,
+                                      unsigned int nworkers,
+                                      __cilkrts_worker *const w,
+                                      Closure *const t, unsigned int fails,
+                                      unsigned int *const request_threshold,
+                                      history_t *const inefficient_history,
+                                      history_t *const efficient_history,
+                                      unsigned int *const sentinel_count_history,
+                                      unsigned int *const sentinel_count_history_tail,
+                                      unsigned int *const recent_sentinel_count) {
+    if (t) {
+        return maybe_reengage_workers(
+            rts, self, nworkers, w, fails, request_threshold,
+            inefficient_history, efficient_history, sentinel_count_history,
+            sentinel_count_history_tail, recent_sentinel_count);
+    } else {
+        return handle_failed_steal_attempts(
+            rts, self, nworkers, w, fails, request_threshold,
+            inefficient_history, efficient_history, sentinel_count_history,
+            sentinel_count_history_tail, recent_sentinel_count);
+    }
+}
+
+#if ENABLE_THIEF_SLEEP
+__attribute__((always_inline)) static unsigned int
+decrease_fails_by_work(global_state *const rts, __cilkrts_worker *const w,
+                       unsigned int fails, uint64_t elapsed,
+                       unsigned int *const request_threshold) {
+    uint64_t scaled_elapsed = get_scaled_elapsed(elapsed);
+
+    // Decrease the number of fails based on the work done.
+    if (scaled_elapsed > (uint64_t)fails)
+        fails = 0;
+    else {
+        fails -= scaled_elapsed;
+    }
+
+    // The fail count must be a multiple of ATTEMPTS for the sleep logic to
+    // work.
+    CILK_ASSERT(w, fails % ATTEMPTS == 0);
+
+    if (scaled_elapsed > (uint64_t)(*request_threshold) - SENTINEL_THRESHOLD)
+        *request_threshold = SENTINEL_THRESHOLD;
+    else
+        *request_threshold -= scaled_elapsed;
+
+    // If this worker is still sentinel, update sentinel-worker count.
+    if (fails >= SENTINEL_THRESHOLD)
+        atomic_fetch_add_explicit(&rts->disengaged_sentinel, 1,
+                                  memory_order_release);
+    return fails;
+}
+#endif // ENABLE_THIEF_SLEEP
+
+__attribute__((always_inline)) static unsigned int
+reset_fails(global_state *rts, unsigned int fails) {
+    if (fails >= SENTINEL_THRESHOLD) {
+        // If this worker was sentinel, decrement the number of sentinel
+        // workers, effectively making this worker active.
+        atomic_fetch_sub_explicit(&rts->disengaged_sentinel, 1,
+                                  memory_order_release);
+    }
+    return 0;
+}
+
+__attribute__((always_inline)) static inline void
+disengage_worker(global_state *g, unsigned int nworkers, worker_id self) {
+    cilk_mutex_lock(&g->index_lock);
+    uint64_t disengaged_sentinel = atomic_fetch_add_explicit(
+        &g->disengaged_sentinel, (1UL << 32), memory_order_release);
+    // Update the index-to-worker map.  We derive last_index from the new value
+    // of disengaged_sentinel, because the index is now invalid.
+    worker_id last_index = nworkers - ((disengaged_sentinel >> 32) + 1);
+    if (g->worker_to_index[self] < last_index) {
+        swap_worker_with_target(g, self, last_index);
+    }
+    // Release the lock on the index structure
+    cilk_mutex_unlock(&g->index_lock);
+}
+
+ __attribute__((always_inline)) static inline void
+reengage_worker(global_state *g, unsigned int nworkers, worker_id self) {
+    cilk_mutex_lock(&g->index_lock);
+    uint64_t disengaged_sentinel = atomic_fetch_sub_explicit(
+        &g->disengaged_sentinel, (1UL << 32), memory_order_release);
+    // Update the index-to-worker map.  We derive last_index from the old value
+    // of disengaged_sentinel, because the index is now valid.
+    worker_id last_index = nworkers - (disengaged_sentinel >> 32);
+    if (g->worker_to_index[self] > last_index) {
+        swap_worker_with_target(g, self, last_index);
+    }
+    // Release the lock on the index structure
+    cilk_mutex_unlock(&g->index_lock);
+}
+
+#endif /* _WORKER_SLEEP_H */