Squash-Merge Version 2.1.0

pc2 · Sep 15, 2021 · 8a8ff05 · 8a8ff05
1 parent 0e2bfa5
commit 8a8ff05
Show file tree

Hide file tree

Showing 55 changed files with 3,006 additions and 2,328 deletions.
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -76,15 +76,6 @@
         "codecvt": "cpp",
         "shared_mutex": "cpp"
     },
-    "C_Cpp.clang_format_style": "{ BasedOnStyle: LLVM, UseTab: Never, IndentWidth: 4, TabWidth: 4, BreakBeforeBraces: Allman, AllowShortIfStatementsOnASingleLine: false, IndentCaseLabels: false, ColumnLimit: 0, AccessModifierOffset: -4 }",
+    "C_Cpp.clang_format_style": "{ BasedOnStyle: LLVM, IndentWidth: 4, TabWidth: 4, ColumnLimit: 100, IndentPPDirectives: BeforeHash }",
     "python.pythonPath": "/usr/bin/python3",
-    "spellright.language": [
-        "English (American)"
-    ],
-    "spellright.documentTypes": [
-        "markdown",
-        "latex",
-        "plaintext",
-        "cpp"
-    ]
 }
diff --git a/Doxyfile b/Doxyfile
@@ -38,7 +38,7 @@ PROJECT_NAME           = "StencilStream"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = v2.0.0
+PROJECT_NUMBER         = v2.1.0
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
@@ -829,7 +829,7 @@ WARN_LOGFILE           =
 # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
 # Note: If this tag is empty the current directory is searched.
 
-INPUT                  = StencilStream README.md docs
+INPUT                  = StencilStream StencilStream/monotile StencilStream/tiling README.md docs
 
 # This tag can be used to specify the character encoding of the source files
 # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses

diff --git a/README.md b/README.md
@@ -45,26 +45,22 @@ Next are some important definitions: The cell type, the value of cells in the gr
 This is everything we need to define the transition function, so let's do it now:
 
 ``` C++
-auto conway = [](stencil::Stencil<Cell, stencil_radius> const &stencil, stencil::StencilInfo const &info)
-{
+auto conway = [](stencil::Stencil<Cell, stencil_radius> const &stencil) {
 ``` 
 
 As you can see, a transition function is just an invocable object. In this case, we have chosen a lambda expression, but more complicated applications may define their transition function as a class with an `operator()` method.
 
-The first argument is the stencil buffer itself and the second argument is a struct with useful information about the current invocation, like the coordinates of the central cell of the stencil buffer. This is the cell we are going to replace.
+The only argument is the stencil buffer itself. It also contains useful information about the current invocation, like the coordinates of the central cell of the stencil buffer. This is the cell we are going to replace.
 
 ``` C++
-    stencil::ID idx = info.center_cell_id;
+    stencil::ID idx = stencil.id;
 
     uint8_t alive_neighbours = 0;
 #pragma unroll
-    for (stencil::index_t c = -stencil_radius; c <= stencil::index_t(stencil_radius); c++)
-    {
+    for (stencil::index_t c = -stencil_radius; c <= stencil::index_t(stencil_radius); c++) {
 #pragma unroll
-        for (stencil::index_t r = -stencil_radius; r <= stencil::index_t(stencil_radius); r++)
-        {
-            if (stencil[stencil::ID(c, r)] && !(c == 0 && r == 0))
-            {
+        for (stencil::index_t r = -stencil_radius; r <= stencil::index_t(stencil_radius); r++) {
+            if (stencil[stencil::ID(c, r)] && !(c == 0 && r == 0)) {
                 alive_neighbours += 1;
             }
         }
@@ -74,12 +70,9 @@ The first argument is the stencil buffer itself and the second argument is a str
 First, we count the living neighbors since their numbers decides the fate of our cell. The `for`-loops for that are completely unrolled, which means that these evaluations will be carried out in parallel.
 
 ``` C++
-    if (stencil[stencil::ID(0, 0)])
-    {
+    if (stencil[stencil::ID(0, 0)]) {
         return alive_neighbours == 2 || alive_neighbours == 3;
-    }
-    else
-    {
+    } else {
         return alive_neighbours == 3;
     }
 };
@@ -88,15 +81,12 @@ First, we count the living neighbors since their numbers decides the fate of our
 Now we know how many of our neighbors are alive and can therefore return the new cell value according to [the rules of the game](https://en.wikipedia.org/wiki/Conway%27s_Game_of_Life#Rules).
 
 ``` C++
-cl::sycl::buffer<Cell, 2> read(stencil::uindex_t width, stencil::uindex_t height)
-{
+cl::sycl::buffer<Cell, 2> read(stencil::uindex_t width, stencil::uindex_t height) {
     cl::sycl::buffer<Cell, 2> input_buffer(cl::sycl::range<2>(width, height));
     auto buffer_ac = input_buffer.get_access<cl::sycl::access::mode::write>();
 
-    for (stencil::uindex_t r = 0; r < height; r++)
-    {
-        for (stencil::uindex_t c = 0; c < width; c++)
-        {
+    for (stencil::uindex_t r = 0; r < height; r++) {
+        for (stencil::uindex_t c = 0; c < width; c++) {
             char Cell;
             std::cin >> Cell;
             assert(Cell == 'X' || Cell == '.');
@@ -107,23 +97,17 @@ cl::sycl::buffer<Cell, 2> read(stencil::uindex_t width, stencil::uindex_t height
     return input_buffer;
 }
 
-void write(cl::sycl::buffer<Cell, 2> output_buffer)
-{
+void write(cl::sycl::buffer<Cell, 2> output_buffer) {
     auto buffer_ac = output_buffer.get_access<cl::sycl::access::mode::read>();
 
     stencil::uindex_t width = output_buffer.get_range()[0];
     stencil::uindex_t height = output_buffer.get_range()[1];
 
-    for (stencil::uindex_t r = 0; r < height; r++)
-    {
-        for (stencil::uindex_t c = 0; c < width; c++)
-        {
-            if (buffer_ac[c][r])
-            {
+    for (stencil::uindex_t r = 0; r < height; r++) {
+        for (stencil::uindex_t c = 0; c < width; c++) {
+            if (buffer_ac[c][r]) {
                 std::cout << "X";
-            }
-            else
-            {
+            } else {
                 std::cout << ".";
             }
         }
@@ -132,15 +116,13 @@ void write(cl::sycl::buffer<Cell, 2> output_buffer)
 }
 ```
 
-The next part is some boilerplate code to read the input from stdin and write the output to stdout. Nothing to spectacular.
+The next part is some boilerplate code to read the input from stdin and write the output to stdout. Nothing too spectacular.
 
 The only thing left is to run the calculations. We do this like this:
 
 ``` C++
-int main(int argc, char **argv)
-{
-    if (argc != 4)
-    {
+int main(int argc, char **argv) {
+    if (argc != 4) {
         std::cerr << "Usage: " << argv[0] << " <width> <height> <n_generations>" << std::endl;
         return 1;
     }
@@ -156,7 +138,7 @@ int main(int argc, char **argv)
     executor.set_input(grid_buffer);
 ```
 
-After checking and parsing the arguments, we read the input data and initialize the executor. This is the central API facade to control the calculations. In it's simplest form, it only requires cell type, the radius of the stencil and the type of the transition function as template arguments. It has more template arguments, but these are performance parameters. We are looking into them later. The actual constructor arguments are only the initial data, the halo value and an instance of the transition function.
+After checking and parsing the arguments, we read the input data. Then, we pick and initialize an executor. Executors are the user-facing facades of StencilStream and the library offers different executors that are optimized for different scenarios. In this case, we pick the `StencilExecutor`, which is the most universal executor. The static operation and performance parameters are defined as template parameters to an executor. In it's simplest form, it only requires our cell type, the radius of the stencil and the type of the transition function. 
 
 ``` C++
 #ifdef HARDWARE
@@ -199,15 +181,17 @@ ifdef AOCL_BOARD_PACKAGE_ROOT
 endif
 
 EMU_ARGS = $(ARGS)
-HW_ARGS = $(ARGS) -DHARDWARE -Xshardware 
+HW_ARGS = $(ARGS) -DHARDWARE -Xshardware
+
+RESOURCES = conway.cpp $(wildcard StencilStream/*) Makefile
 
-conway_emu: conway.cpp Makefile
+conway_emu: $(RESOURCES)
 	$(CC) $(EMU_ARGS) conway.cpp -o conway_emu
 
-conway_hw: conway.cpp Makefile
+conway_hw: $(RESOURCES)
 	$(CC) $(HW_ARGS) conway.cpp -o conway_hw
 
-conway_hw.report.tar.gz: conway.cpp Makefile
+conway_hw.report.tar.gz: $(RESOURCES)
 	rm -f conway_hw
 	$(CC) $(HW_ARGS) -fsycl-link conway.cpp -o conway_hw
 	tar -caf conway_hw.report.tar.gz conway_hw.prj/reports

diff --git a/StencilStream/AbstractExecutor.hpp b/StencilStream/AbstractExecutor.hpp
@@ -0,0 +1,146 @@
+/*
+ * Copyright © 2020-2021 Jan-Oliver Opdenhövel, Paderborn Center for Parallel Computing, Paderborn
+ * University
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
+ * associated documentation files (the “Software”), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge, publish, distribute,
+ * sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all copies or
+ * substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
+ * NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#pragma once
+#include "GenericID.hpp"
+#include "Index.hpp"
+#include <CL/sycl.hpp>
+
+namespace stencil {
+/**
+ * \brief Base class for all execution managers.
+ *
+ * Executors are the user-facing facades of StencilStream that orchestrate the computations.
+ * Different executors may use different architectures and strategies to apply transition functions
+ * to cells. Application code that may work with any executor can use this base class to access
+ * them. It has multiple logical attributes that can be configured:
+ *
+ * ### Grid
+ *
+ * The grid is the logical array of cells, set with \ref AbstractExecutor.set_input. A stencil
+ * executor does not work in place and a buffer used to initialize the grid can be used for other
+ * tasks afterwards. The \ref AbstractExecutor.run method alters the state of the grid and the grid
+ * can be copied back to a given buffer using \ref AbstractExecutor.copy_output.
+ *
+ * ### Transition Function
+ *
+ * A stencil executor stores an instance of the transition function since it may require some
+ * configuration and runtime-dynamic parameters too. An instance is required for the initialization,
+ * but it may be replaced at any time with \ref AbstractExecutor.set_trans_func.
+ *
+ * ### Generation Index
+ *
+ * This is the generation index of the current state of the grid. \ref AbstractExecutor.run updates
+ * and therefore, it can be ignored in most instances. However, it can be reset if a transition
+ * function needs it.
+ *
+ * \tparam T The cell type.
+ * \tparam stencil_radius The radius of the stencil buffer supplied to the transition function.
+ * \tparam TransFunc The type of the transition function.
+ */
+template <typename T, uindex_t stencil_radius, typename TransFunc> class AbstractExecutor {
+  public:
+    /**
+     * \brief Create a new abstract executor.
+     * \param halo_value The value of cells that are outside the grid.
+     * \param trans_func The instance of the transition function that should be used to calculate
+     * new generations.
+     */
+    AbstractExecutor(T halo_value, TransFunc trans_func)
+        : halo_value(halo_value), trans_func(trans_func), i_generation(0) {}
+
+    /**
+     * \brief Compute the next generations of the grid and store it internally.
+     *
+     * This will use the transition function to compute the next `n_generations` generations of the
+     * grid and store the new state of the grid internally. The resulting grid state can be
+     * retrieved with \ref AbstractExecutor.copy_output.
+     *
+     * \param n_generations The number of generations to calculate.
+     */
+    virtual void run(uindex_t n_generations) = 0;
+
+    /**
+     * \brief Set the internal state of the grid.
+     *
+     * This will copy the contents of the buffer to an internal representation. The buffer may be
+     * used for other purposes later. It must not reset the generation index. The range of the input
+     * buffer will be used as the new grid range.
+     *
+     * \param input_buffer The source buffer of the new grid state.
+     */
+    virtual void set_input(cl::sycl::buffer<T, 2> input_buffer) = 0;
+
+    /**
+     * \brief Copy the state of the grid to a buffer.
+     *
+     * This will copy the cells of the internal grid representation to the buffer. The range of the
+     * output buffer must be equal to the grid range (retrievable with \ref
+     * AbstractExecutor.get_grid_range).
+     *
+     * \param output_buffer The target buffer.
+     */
+    virtual void copy_output(cl::sycl::buffer<T, 2> output_buffer) = 0;
+
+    /**
+     * \brief Get the range of the internal grid.
+     */
+    virtual UID get_grid_range() const = 0;
+
+    /**
+     * \brief Get the value of cells outside of the grid.
+     */
+    T const get_halo_value() const { return halo_value; }
+
+    /**
+     * \brief Set the value of cells outside of the grid.
+     */
+    void set_halo_value(T halo_value) { this->halo_value = halo_value; }
+
+    /**
+     * \brief Get the configured transition function instance.
+     */
+    TransFunc get_trans_func() const { return trans_func; }
+
+    /**
+     * \brief Set the transition function instance.
+     */
+    void set_trans_func(TransFunc trans_func) { this->trans_func = trans_func; }
+
+    /**
+     * \brief Get the generation index of the grid.
+     */
+    uindex_t get_i_generation() const { return i_generation; }
+
+    /**
+     * \brief Set the generation index of the grid.
+     */
+    void set_i_generation(uindex_t i_generation) { this->i_generation = i_generation; }
+
+    /**
+     * \brief Increase the generation index of the grid by a certain delta.
+     */
+    void inc_i_generation(index_t delta) { this->i_generation += delta; }
+
+  private:
+    T halo_value;
+    TransFunc trans_func;
+    uindex_t i_generation;
+};
+} // namespace stencil