diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0f6ebb1..bcc03ac 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -14,7 +14,7 @@
 #
 
 cmake_minimum_required(VERSION 3.12 FATAL_ERROR)
-project(Dml VERSION 0.1.5 LANGUAGES C CXX)
+project(Dml VERSION 0.1.6 LANGUAGES C CXX)
 
 set(PROJECT_SOVERSION 0)
 
@@ -28,7 +28,6 @@ else()
 endif()
 
 # TODO: Remove all options below
-option(LIB_ACCEL_3_2 "Use libaccel-3.2" OFF)
 option(LOG_HW_INIT "Enables HW initialization log" OFF)
 option(EFFICIENT_WAIT "Enables usage of umonitor/umwait" OFF)
 
@@ -61,14 +60,13 @@ message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
 # TODO: Remove when option is removed
 if (DML_HW)
     message(STATUS "HW path: ON")
+    message(STATUS "Hardware initialization logging: ${LOG_HW_INIT}")
 endif()
 
 # TODO: Remove
 get_git_revision()
 
 add_subdirectory(sources)
-
-# Testing
 add_subdirectory(examples)
 
 # Install rules
diff --git a/CODEOWNERS b/CODEOWNERS
index a69cec5..74ce726 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -12,7 +12,3 @@
 # express or implied warranties, other than those that are expressly
 # stated in the License.
 #
-
-# Repository Control Files
-.gitignore      anton.rubtsov@intel.com
-CODEOWNERS      anton.rubtsov@intel.com
diff --git a/Doxyfile b/Doxyfile
index 3961778..4e9aeb2 100644
--- a/Doxyfile
+++ b/Doxyfile
@@ -53,7 +53,7 @@ PROJECT_NAME           = "Intel DML Library"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "v0.1.5-beta"
+PROJECT_NUMBER         = "v0.1.6-beta"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
@@ -831,8 +831,9 @@ WARN_LOGFILE           =
 # Note: If this tag is empty the current directory is searched.
 
 INPUT                  = examples \
-                         sources \
                          README.md \
+                         CONTRIBUTING.md \
+                         SECURITY.md \
                          include \
                          doc/LOW_LEVEL_API_GUIDE.md \
                          doc/HIGH_LEVEL_API_GUIDE.md \
@@ -922,9 +923,7 @@ RECURSIVE              = YES
 # Note that relative paths are relative to the directory from which doxygen is
 # run.
 
-EXCLUDE                = sources/hw-path/include/libaccel_config.h \
-                         include/dml/cpp/middle_layer/\
-                         sources/middle_layer/
+EXCLUDE                = include/dml/detail
 
 # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
 # directories that are symbolic links (a Unix file system feature) are excluded
@@ -961,15 +960,12 @@ EXCLUDE_SYMBOLS        = *_H__ \
                          DML_CORE_CHECK_* \
                          DML_BAD_* \
                          DML_PACKED_STRUCT_DECLARATION* \
-                         TEST_REGISTER* \
                          NULL \
                          DML_FUN \
                          OWN_API \
                          OWN_FUN \
                          OWN_API_INLINE \
-                         OWN_FUN_INLINE \
-                         DML_JOB_API_TEST_REGISTER \
-                         DML_UNIT_TEST_REGISTER
+                         OWN_FUN_INLINE
 
 # The EXAMPLE_PATH tag can be used to specify one or more files or directories
 # that contain example code fragments that are included (see the \include
diff --git a/README.md b/README.md
index c3c001f..700beec 100644
--- a/README.md
+++ b/README.md
@@ -68,11 +68,6 @@ Note:
 cmake -DCMAKE_BUILD_TYPE=Release -DDML_HW=ON <path_to_cmake_folder>
 ```
 
-```shell
-# Enable libaccel-config-3.2 support for hardware path
-cmake -DCMAKE_BUILD_TYPE=Release -DDML_HW=ON -DLIB_ACCEL_3_2=ON <path_to_cmake_folder> 
-```
-
 - To enable `-frecord-gcc-switches` flag, use the DML_RECORD_SWITCHES option as follows:
 
 ```shell
@@ -85,7 +80,7 @@ The resulting library is available in the `<install_dir>/lib` folder.
 ## Documentation
 
 - [Intel DML Reference Manual](./doc/DML_REFERENCE_MANUAL.md)
-- [Security Policy](doc/SECURITY.md) 
+- [Security Policy](./SECURITY.md) 
 
 To generate full documentation from sources with Doxygen, use the following commands:
 
@@ -100,12 +95,12 @@ To open the generated HTML Reference, open the `<dml_library>/doc/html/index.htm
 
 ## How to Contribute
 
-See [Contributing document](CONTRIBUTING.md) for details about contribution process.
+See [Contributing document](./CONTRIBUTING.md) for details about contribution process.
 
 ## License
 
 The library is licensed under the MIT license. Refer to the
-"[LICENSE](LICENSE)" file for the full license text.
+"[LICENSE](./LICENSE)" file for the full license text.
 
 This distribution includes third party software governed by separate license
-terms (see "[THIRD-PARTY-PROGRAMS](THIRD-PARTY-PROGRAMS)").
+terms (see "[THIRD-PARTY-PROGRAMS](./third-party-programs.txt)").
diff --git a/cmake/gnu.cmake b/cmake/gnu.cmake
index 7c9e85e..99c83cd 100644
--- a/cmake/gnu.cmake
+++ b/cmake/gnu.cmake
@@ -26,10 +26,13 @@ list(APPEND DML_SECURITY_DEFINITIONS
     $<$<CONFIG:RELEASE>:_FORTIFY_SOURCE=2>)
 
 list(APPEND DML_QUALITY_OPTIONS
-    $<$<BOOL:${DML_WERROR}>:-Werror>
     -Wall
     -Wextra
     -pedantic
     )
 
-list(APPEND DML_AVX512_OPTIONS -march=skylake-avx512) #  -mavx512dq -mavx512vl -mavx512bw -mclflushopt -mclwb
+list(APPEND DML_CPP_PRIVATE_OPTIONS
+        -fno-exceptions
+        -fno-rtti
+        -fno-threadsafe-statics
+        )
diff --git a/cmake/msvc.cmake b/cmake/msvc.cmake
index 77285cc..e345bf7 100644
--- a/cmake/msvc.cmake
+++ b/cmake/msvc.cmake
@@ -19,9 +19,6 @@ list(APPEND DML_SECURITY_OPTIONS
 
 list(APPEND DML_SECURITY_DEFINITIONS)
 
-list(APPEND DML_QUALITY_OPTIONS
-    $<$<BOOL:DML_WERROR>:/Wx>>
-    #/Wall MSVC warning level can be set only via CMAKE_CXX_FLAGS
-    )
+list(APPEND DML_QUALITY_OPTIONS /W3)
 
-list(APPEND DML_AVX512_OPTIONS /arch:AVX512)
+list(APPEND DML_CPP_PRIVATE_OPTIONS)
diff --git a/doc/DML_REFERENCE_MANUAL.md b/doc/DML_REFERENCE_MANUAL.md
index c82ef1b..73cb4fa 100644
--- a/doc/DML_REFERENCE_MANUAL.md
+++ b/doc/DML_REFERENCE_MANUAL.md
@@ -119,14 +119,14 @@ Hardware path is required to set up environment to utilize Intel DSA accelerator
 
 ### Build Prerequisites
 - Compiler:
-    - Linux* OS: gcc 8.2 or higher
-    - Windows* OS: MSVC 19 or higher
- - Libraries:    
-    - Linux: Universally Unique ID library: `uuid-dev` version 2.35.2 or higher.
+  - Linux* OS: gcc 8.2 or higher
+  - Windows* OS: MSVC 19 or higher
+- Libraries:
+  - Linux: Universally Unique ID library: `uuid-dev` version 2.35.2 or higher.
 - Cross-platform build tool: CMake* version 3.12 or higher
 - Make: GNU 'make' (Linux* OS) or 'nmake' (Windows*)
-- Documentation generator: 
-    - Doxygen 1.8.17 or higher
+- Documentation generator:
+  - Doxygen 1.8.17 or higher
 
 
 
diff --git a/doc/RELEASE_NOTES.md b/doc/RELEASE_NOTES.md
index c9ab7a7..7f5f58c 100644
--- a/doc/RELEASE_NOTES.md
+++ b/doc/RELEASE_NOTES.md
@@ -1,6 +1,31 @@
 Intel® Data Mover Library (Intel® DML) Release Notes
 ===============================================================================
 
+### Intel® DML v0.1.6-beta
+
+**Date: December 2021**
+
+**Note**: Release introduces bug fixes and several minor improvements
+
+**Features**:
+* Improved incorrect input checking
+* Added check for adjacent buffers for the DIF Strip operation. Status: `DML_STATUS_DIF_STRIP_ADJACENT_ERROR`
+* Reworked hardware related statuses for C API
+* Added new status to indicate submission failure:
+  * `DML_STATUS_WORK_QUEUES_NOT_AVAILABLE` for C API
+  * `dml::status_code::queue_busy` for C++ API
+* Removed LIBACCEL_3_2 cmake option. The supported version of accel-config is now 3.2 and higher
+* NUMA node id is detected before each submission now, so threads are safe to change nodes at any time
+
+**Bug fix**:
+* Fixed the issue when batch operation doesn't work for buffer not aligned on 64 bytes boundary
+* Fixed the issue when current thread NUMA node id is deduced incorrectly
+* Fixed crashes when there are no available devices for the current thread NUMA node id
+* Removed dependencies on C++ runtime from C API
+
+**Warnings**:
+* As NUMA node id of the current thread is now deduced correctly, ensure that accelerators' configuration is compatible. The library does no cross-socket submissions. If there is no available device for the current NUMA node id, then an error status code is reported.
+
 ### Intel® DML v0.1.5-beta
 
 **Date: November 2021**
diff --git a/examples/dml_job_api/job_wrapper_launchers.c b/examples/dml_job_api/job_wrapper_launchers.c
index 46ab635..3184d12 100644
--- a/examples/dml_job_api/job_wrapper_launchers.c
+++ b/examples/dml_job_api/job_wrapper_launchers.c
@@ -414,7 +414,8 @@ dml_status_t dif_strip_sample_launcher(dml_job_t *const dml_job_ptr)
            DIF_SAMPLE_REFERENCE_ARRAY_SIZE,
            DIF_SAMPLE_BLOCK_SIZE);
 
-    uint8_t source_array[DIF_SAMPLE_REFERENCE_ARRAY_SIZE];
+    // The same size used because of DML_STATUS_DIF_STRIP_ADJACENT_ERROR
+    uint8_t source_array[DIF_SAMPLE_PROTECTED_ARRAY_SIZE];
     uint8_t destination_array[DIF_SAMPLE_PROTECTED_ARRAY_SIZE];
 
     dml_status_t status = dml_dif_insert_8u(source_array,
diff --git a/include/dml/cpp/detail/make_result.hpp b/include/dml/cpp/detail/make_result.hpp
deleted file mode 100644
index 49a8f8b..0000000
--- a/include/dml/cpp/detail/make_result.hpp
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Copyright 2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-/**
- * @date 05/20/2021
- * @brief Contains internal execute implementation
- */
-
-#ifndef DML_DETAIL_MAKE_RESULT_HPP
-#define DML_DETAIL_MAKE_RESULT_HPP
-
-#include <dml/cpp/result.hpp>
-#include <dml/cpp/status_code.hpp>
-#include <dml/cpp/middle_layer/completion_record.hpp>
-#include <dml/cpp/middle_layer/result_views.hpp>
-
-namespace dml::detail
-{
-    /**
-     * @todo
-     */
-    inline auto to_own(ml::execution_status status) noexcept
-    {
-        switch (status)
-        {
-            case ml::execution_status::success:
-                return status_code::ok;
-            case ml::execution_status::false_predicate_success:
-                return status_code::false_predicate;
-            default:
-                // Anything else is considered an error temporarily
-                return status_code::error;
-        }
-    }
-
-    /**
-     * @todo
-     */
-    template <typename result_type>
-    auto make_result(ml::completion_record& record) noexcept
-    {
-        if constexpr (std::is_same_v<result_type, mem_move_result>)
-        {
-            auto view = ml::views::mem_move_result(record);
-
-            return mem_move_result{ to_own(static_cast<ml::execution_status>(view.status())) };
-        }
-        if constexpr (std::is_same_v<result_type, mem_copy_result>)
-        {
-            auto view = ml::views::mem_move_result(record);
-
-            return mem_copy_result{ to_own(static_cast<ml::execution_status>(view.status())) };
-        }
-        else if constexpr (std::is_same_v<result_type, fill_result>)
-        {
-            auto view = ml::views::fill_result(record);
-
-            return fill_result{ to_own(static_cast<ml::execution_status>(view.status())) };
-        }
-        else if constexpr (std::is_same_v<result_type, compare_result>)
-        {
-            auto view = ml::views::compare_result(record);
-
-            return compare_result{ to_own(static_cast<ml::execution_status>(view.status())),
-                                   static_cast<comparison_result>(view.result()),
-                                   view.bytes_completed() };
-        }
-        else if constexpr (std::is_same_v<result_type, create_delta_result>)
-        {
-            auto view = ml::views::create_delta_result(record);
-
-            return create_delta_result{ to_own(static_cast<ml::execution_status>(view.status())),
-                                        static_cast<comparison_result>(view.result()),
-                                        view.bytes_completed(),
-                                        view.delta_record_size() };
-        }
-        else if constexpr (std::is_same_v<result_type, apply_delta_result>)
-        {
-            auto view = ml::views::apply_delta_result(record);
-
-            return apply_delta_result{ to_own(static_cast<ml::execution_status>(view.status())) };
-        }
-        else if constexpr (std::is_same_v<result_type, dualcast_result>)
-        {
-            auto view = ml::views::dualcast_result(record);
-
-            return dualcast_result{ to_own(static_cast<ml::execution_status>(view.status())) };
-        }
-        else if constexpr (std::is_same_v<result_type, crc_result>)
-        {
-            auto view = ml::views::crc_result(record);
-
-            return crc_result{ to_own(static_cast<ml::execution_status>(view.status())),
-                               view.crc_value() };
-        }
-        else if constexpr (std::is_same_v<result_type, cache_flush_result>)
-        {
-            auto view = ml::views::cache_flush_result(record);
-
-            return cache_flush_result{ to_own(static_cast<ml::execution_status>(view.status())) };
-        }
-        else if constexpr (std::is_same_v<result_type, batch_result>)
-        {
-            auto view = ml::views::batch_result(record);
-
-            return batch_result{ to_own(static_cast<ml::execution_status>(view.status())),
-                                 view.descriptors_completed() };
-        }
-    }
-
-}  // namespace dml::detail
-
-#endif  //DML_DETAIL_MAKE_RESULT_HPP
diff --git a/include/dml/cpp/middle_layer/awaiter.hpp b/include/dml/cpp/middle_layer/awaiter.hpp
deleted file mode 100644
index ab9431f..0000000
--- a/include/dml/cpp/middle_layer/awaiter.hpp
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright 2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-#ifndef DML_AWAITER_HPP
-#define DML_AWAITER_HPP
-
-#include <cstdint>
-
-namespace dml::ml
-{
-    /**
-     * @brief Class that allows to defer scope exit to the moment when a certain address is changed
-     */
-    class awaiter final
-    {
-    public:
-        /**
-         * @brief Constructor of the class
-         *
-         * @param address       pointer to memory that should be asynchronously changed
-         * @param initial_value value to compare with
-         * @param period        number of clocks between checks
-         */
-        explicit awaiter(volatile void *address, uint8_t initial_value, uint32_t period = 200) noexcept;
-
-        /**
-         * @brief Destructor that performs actual wait
-         */
-        ~awaiter() noexcept;
-
-    private:
-        volatile uint8_t *address_ptr_;        /**<Pointer to memory that should be asynchronously changed */
-        uint32_t          period_        = 0u; /**<Number of clocks between checks */
-        uint8_t           initial_value_ = 0u; /**<Value to compare with */
-        uint32_t          idle_state_    = 0u; /**<State for CPU wait control */
-    };
-}  // namespace dml::ml
-
-#endif  //DML_AWAITER_HPP
diff --git a/include/dml/cpp/middle_layer/completion_record.hpp b/include/dml/cpp/middle_layer/completion_record.hpp
deleted file mode 100644
index 933a70a..0000000
--- a/include/dml/cpp/middle_layer/completion_record.hpp
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright 2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-/**
- * @date 05/19/2021
- * @brief Contains definitions of @ref dml::ml::completion_record type
- */
-
-#ifndef DML_ML_COMPLETION_RECORD_HPP
-#define DML_ML_COMPLETION_RECORD_HPP
-
-#include <type_traits>
-
-#include "awaiter.hpp"
-#include "types.hpp"
-
-namespace dml::ml
-{
-    /**
-     * @todo
-     */
-    struct alignas(32u) completion_record
-    {
-        byte_t bytes[32u]; /**< @todo */
-    };
-
-    /**
-     * @todo
-     */
-    inline void wait(volatile completion_record &record) noexcept
-    {
-        awaiter wait_for(static_cast<volatile void *>(&record), 0);
-    }
-
-    /**
-     * @todo
-     */
-    inline bool is_finished(const completion_record &record) noexcept
-    {
-        return 0 != record.bytes[0];
-    }
-
-}  // namespace dml::ml
-
-#endif  //DML_ML_COMPLETION_RECORD_HPP
diff --git a/include/dml/cpp/middle_layer/device.hpp b/include/dml/cpp/middle_layer/device.hpp
deleted file mode 100644
index e440d2f..0000000
--- a/include/dml/cpp/middle_layer/device.hpp
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright 2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-#ifndef DML_ML_DEVICE_HPP
-#define DML_ML_DEVICE_HPP
-
-#include "completion_record.hpp"
-#include "core.hpp"
-#include "make_descriptor.hpp"
-#include "status.hpp"
-
-namespace dml::ml
-{
-    /**
-     * @todo
-     */
-    class device
-    {
-    public:
-        /**
-         * @todo
-         */
-        device() = default;
-
-        /**
-         * @todo
-         */
-        virtual ~device() = default;
-
-        /**
-         * @todo
-         */
-        virtual submission_status submit(descriptor& dsc, completion_record& record) noexcept = 0;
-    };
-
-    /**
-     * @todo
-     */
-    class software: public device
-    {
-    public:
-        /**
-         * @todo
-         */
-        software() = default;
-
-        /**
-         * @todo
-         */
-        ~software() override = default;
-
-        /**
-         * @todo
-         */
-        submission_status submit(descriptor& dsc, completion_record& record) noexcept override
-        {
-            views::any_descriptor(dsc).completion_record_address() = reinterpret_cast<address_t>(&record);
-            static_cast<void>(core::submit(dsc));
-
-            return submission_status::success;
-        }
-    };
-
-    /**
-     * @todo
-     */
-    class hardware: public device
-    {
-    public:
-        /**
-         * @todo
-         */
-        hardware() = default;
-
-        /**
-         * @todo
-         */
-        ~hardware() override = default;
-
-#ifdef DML_HW
-        /**
-         * @todo
-         */
-        submission_status submit(descriptor& dsc, completion_record& record) noexcept override;
-#else
-        /**
-         * @todo
-         */
-        submission_status submit(descriptor& dsc, completion_record& record) noexcept override
-        {
-            return submission_status::failure;
-        }
-#endif
-    };
-
-}  // namespace dml::ml
-
-#endif  //DML_ML_DEVICE_HPP
diff --git a/include/dml/cpp/middle_layer/make_descriptor.hpp b/include/dml/cpp/middle_layer/make_descriptor.hpp
deleted file mode 100644
index 8b04163..0000000
--- a/include/dml/cpp/middle_layer/make_descriptor.hpp
+++ /dev/null
@@ -1,419 +0,0 @@
-/*
- * Copyright 2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-#ifndef DML_ML_OPERATION_HPP
-#define DML_ML_OPERATION_HPP
-
-#include "descriptor.hpp"
-#include "descriptor_views.hpp"
-#include "options.hpp"
-#include "types.hpp"
-#include "values.hpp"
-
-namespace dml::ml
-{
-    /**
-     * @todo
-     */
-    [[nodiscard]] inline descriptor make_nop_descriptor(const nop_options options) noexcept
-    {
-        auto dsc  = descriptor();
-        auto view = views::nop_descriptor(dsc);
-
-        view.operation() = static_cast<operation_t>(operation::nop);
-        view.flags()     = static_cast<flags_t>(options);
-
-        return dsc;
-    }
-
-    /**
-     * @todo
-     */
-    [[nodiscard]] inline descriptor make_drain_descriptor(address_t                      readback_address_1,
-                                                          address_t                      readback_address_2,
-                                                          const drain_options            options,
-                                                          const drain_additional_options additional_options) noexcept
-    {
-        auto dsc  = descriptor();
-        auto view = views::drain_descriptor(dsc);
-
-        view.operation()                = static_cast<operation_t>(operation::drain);
-        view.readback_address_1()       = readback_address_1;
-        view.readback_address_2()       = readback_address_2;
-        view.flags()                    = static_cast<flags_t>(options);
-        view.operation_specific_flags() = static_cast<operation_specific_flags_t>(additional_options);
-
-        return dsc;
-    }
-
-    /**
-     * @todo
-     */
-    [[nodiscard]] inline descriptor make_mem_move_descriptor(const byte_t *const    src,
-                                                             byte_t *const          dst,
-                                                             const transfer_size_t  size,
-                                                             const mem_move_options options) noexcept
-    {
-        auto dsc  = descriptor();
-        auto view = views::mem_move_descriptor(dsc);
-
-        view.operation()           = static_cast<operation_t>(operation::memory_move);
-        view.source_address()      = reinterpret_cast<address_t>(src);
-        view.destination_address() = reinterpret_cast<address_t>(dst);
-        view.transfer_size()       = size;
-        view.flags()               = static_cast<flags_t>(options);
-
-        return dsc;
-    }
-
-    /**
-     * @todo
-     */
-    [[nodiscard]] inline descriptor make_fill_descriptor(const uint64_t        pattern,
-                                                         byte_t *const         dst,
-                                                         const transfer_size_t size,
-                                                         const fill_options    options) noexcept
-    {
-        auto dsc  = descriptor();
-        auto view = views::fill_descriptor(dsc);
-
-        view.operation()           = static_cast<operation_t>(operation::fill);
-        view.pattern()             = pattern;
-        view.destination_address() = reinterpret_cast<address_t>(dst);
-        view.transfer_size()       = size;
-        view.flags()               = static_cast<flags_t>(options);
-
-        return dsc;
-    }
-
-    /**
-     * @todo
-     */
-    [[nodiscard]] inline descriptor make_dualcast_descriptor(const byte_t *const               src,
-                                                             byte_t *const                     dst1,
-                                                             byte_t *const                     dst2,
-                                                             const transfer_size_t             size,
-                                                             const dualcast_options            options,
-                                                             const dualcast_additional_options additional_options) noexcept
-    {
-        auto dsc  = descriptor();
-        auto view = views::dualcast_descriptor(dsc);
-
-        view.operation()                = static_cast<operation_t>(operation::dualcast);
-        view.source_address()           = reinterpret_cast<address_t>(src);
-        view.destination_1_address()    = reinterpret_cast<address_t>(dst1);
-        view.destination_2_address()    = reinterpret_cast<address_t>(dst2);
-        view.transfer_size()            = size;
-        view.flags()                    = static_cast<flags_t>(options);
-        view.operation_specific_flags() = static_cast<operation_specific_flags_t>(additional_options);
-
-        return dsc;
-    }
-
-    /**
-     * @todo
-     */
-    [[nodiscard]] inline descriptor make_compare_descriptor(const byte_t *const                   src1,
-                                                            const byte_t *const                   src2,
-                                                            const transfer_size_t                 size,
-                                                            const compare_options                 options,
-                                                            const compare_expected_result_options expected_result) noexcept
-    {
-        auto dsc  = descriptor();
-        auto view = views::compare_descriptor(dsc);
-
-        view.operation()        = static_cast<operation_t>(operation::compare);
-        view.source_1_address() = reinterpret_cast<address_t>(src1);
-        view.source_2_address() = reinterpret_cast<address_t>(src2);
-        view.transfer_size()    = size;
-        view.flags()            = static_cast<flags_t>(options);
-        view.expected_result()  = static_cast<result_t>(expected_result);
-
-        return dsc;
-    }
-
-    /**
-     * @todo
-     */
-    [[nodiscard]] inline descriptor make_compare_pattern_descriptor(const uint64_t                        pattern,
-                                                                    const byte_t                         *src,
-                                                                    const transfer_size_t                 size,
-                                                                    const compare_pattern_options         options,
-                                                                    const compare_expected_result_options expected_result) noexcept
-    {
-        auto dsc  = descriptor();
-        auto view = views::compare_pattern_descriptor(dsc);
-
-        view.operation()       = static_cast<operation_t>(operation::compare_pattern);
-        view.pattern()         = pattern;
-        view.source_address()  = reinterpret_cast<address_t>(src);
-        view.transfer_size()   = size;
-        view.flags()           = static_cast<flags_t>(options);
-        view.expected_result() = static_cast<result_t>(expected_result);
-
-        return dsc;
-    }
-
-    /**
-     * @todo
-     */
-    [[nodiscard]] inline descriptor make_crc_descriptor(const byte_t *const          src,
-                                                        const transfer_size_t        size,
-                                                        const crc_value_t            crc_seed,
-                                                        const crc_options            options,
-                                                        const crc_additional_options additional_options) noexcept
-    {
-        auto dsc  = descriptor();
-        auto view = views::crc_descriptor(dsc);
-
-        view.operation()                = static_cast<operation_t>(operation::crc);
-        view.source_address()           = reinterpret_cast<address_t>(src);
-        view.transfer_size()            = size;
-        view.flags()                    = static_cast<flags_t>(options);
-        view.operation_specific_flags() = static_cast<operation_specific_flags_t>(additional_options);
-        view.crc_seed()                 = crc_seed;
-
-        return dsc;
-    }
-
-    /**
-     * @todo
-     */
-    [[nodiscard]] inline descriptor make_copy_crc_descriptor(const byte_t *const               src,
-                                                             byte_t *const                     dst,
-                                                             const transfer_size_t             size,
-                                                             const crc_value_t                 crc_seed,
-                                                             const copy_crc_options            options,
-                                                             const copy_crc_additional_options additional_options) noexcept
-    {
-        auto dsc  = descriptor();
-        auto view = views::copy_crc_descriptor(dsc);
-
-        view.operation()                = static_cast<operation_t>(operation::copy_crc);
-        view.source_address()           = reinterpret_cast<address_t>(src);
-        view.destination_address()      = reinterpret_cast<address_t>(dst);
-        view.transfer_size()            = size;
-        view.flags()                    = static_cast<flags_t>(options);
-        view.operation_specific_flags() = static_cast<operation_specific_flags_t>(additional_options);
-        view.crc_seed()                 = crc_seed;
-
-        return dsc;
-    }
-
-    /**
-     * @todo
-     */
-    [[nodiscard]] inline descriptor make_create_delta_descriptor(const byte_t *const                 src1,
-                                                                 const byte_t *const                 src2,
-                                                                 const transfer_size_t               size,
-                                                                 byte_t *const                       delta_record,
-                                                                 const transfer_size_t               delta_max_size,
-                                                                 const create_delta_options          options,
-                                                                 const delta_expected_result_options expected_result) noexcept
-    {
-        auto dsc  = descriptor();
-        auto view = views::create_delta_descriptor(dsc);
-
-        view.operation()                 = static_cast<operation_t>(operation::create_delta);
-        view.source_1_address()          = reinterpret_cast<address_t>(src1);
-        view.source_2_address()          = reinterpret_cast<address_t>(src2);
-        view.delta_record_address()      = reinterpret_cast<address_t>(delta_record);
-        view.transfer_size()             = size;
-        view.maximum_delta_record_size() = delta_max_size;
-        view.flags()                     = static_cast<flags_t>(options);
-        view.expected_result_mask()      = static_cast<result_t>(expected_result);
-
-        return dsc;
-    }
-
-    /**
-     * @todo
-     */
-    [[nodiscard]] inline descriptor make_apply_delta_descriptor(const byte_t *const       delta_record,
-                                                                const transfer_size_t     delta_size,
-                                                                byte_t *const             dst,
-                                                                const transfer_size_t     size,
-                                                                const apply_delta_options options) noexcept
-    {
-        auto dsc  = descriptor();
-        auto view = views::apply_delta_descriptor(dsc);
-
-        view.operation()            = static_cast<operation_t>(operation::apply_delta);
-        view.delta_record_address() = reinterpret_cast<address_t>(delta_record);
-        view.destination_address()  = reinterpret_cast<address_t>(dst);
-        view.transfer_size()        = size;
-        view.delta_record_size()    = delta_size;
-        view.flags()                = static_cast<flags_t>(options);
-
-        return dsc;
-    }
-
-    /**
-     * @todo
-     */
-    [[nodiscard]] inline descriptor make_cache_flush_descriptor(byte_t *const             dst,
-                                                                const transfer_size_t     size,
-                                                                const cache_flush_options options) noexcept
-    {
-        auto dsc  = descriptor();
-        auto view = views::cache_flush_descriptor(dsc);
-
-        view.operation()           = static_cast<operation_t>(operation::cache_flush);
-        view.destination_address() = reinterpret_cast<address_t>(dst);
-        view.transfer_size()       = size;
-        view.flags()               = static_cast<flags_t>(options);
-
-        return dsc;
-    }
-
-    /**
-     * @todo
-     */
-    [[nodiscard]] inline descriptor make_dif_check_descriptor(const byte_t              *src,
-                                                              transfer_size_t            transfer_size,
-                                                              dif_parameters             src_parameters,
-                                                              dif_check_options          options,
-                                                              dif_additional_options     additional_options,
-                                                              dif_additional_src_options additional_src_options) noexcept
-    {
-        auto dsc  = descriptor();
-        auto view = views::dif_check_descriptor(dsc);
-
-        view.operation()           = static_cast<operation_t>(operation::dif_check);
-        view.source_address()      = reinterpret_cast<address_t>(src);
-        view.transfer_size()       = transfer_size;
-        view.flags()               = static_cast<flags_t>(options);
-        view.dif_flags()           = static_cast<dif_flags_t>(additional_options);
-        view.source_dif_flags()    = static_cast<dif_flags_t>(additional_src_options);
-        view.source_ref_tag()      = src_parameters.ref_tag_seed;
-        view.source_app_tag()      = src_parameters.app_tag_seed;
-        view.source_app_tag_mask() = src_parameters.app_tag_mask;
-
-        return dsc;
-    }
-
-    /**
-     * @todo
-     */
-    [[nodiscard]] inline descriptor make_dif_insert_descriptor(const byte_t              *src,
-                                                               byte_t                    *dst,
-                                                               transfer_size_t            transfer_size,
-                                                               dif_parameters             dst_parameters,
-                                                               dif_insert_options         options,
-                                                               dif_additional_options     additional_options,
-                                                               dif_additional_dst_options additional_dst_options) noexcept
-    {
-        auto dsc  = descriptor();
-        auto view = views::dif_insert_descriptor(dsc);
-
-        view.operation()                = static_cast<operation_t>(operation::dif_insert);
-        view.source_address()           = reinterpret_cast<address_t>(src);
-        view.destination_address()      = reinterpret_cast<address_t>(dst);
-        view.transfer_size()            = transfer_size;
-        view.flags()                    = static_cast<flags_t>(options);
-        view.dif_flags()                = static_cast<dif_flags_t>(additional_options);
-        view.destination_dif_flags()    = static_cast<dif_flags_t>(additional_dst_options);
-        view.destination_ref_tag()      = dst_parameters.ref_tag_seed;
-        view.destination_app_tag()      = dst_parameters.app_tag_seed;
-        view.destination_app_tag_mask() = dst_parameters.app_tag_mask;
-
-        return dsc;
-    }
-
-    /**
-     * @todo
-     */
-    [[nodiscard]] inline descriptor make_dif_strip_descriptor(const byte_t              *src,
-                                                              byte_t                    *dst,
-                                                              transfer_size_t            transfer_size,
-                                                              dif_parameters             src_parameters,
-                                                              dif_strip_options          options,
-                                                              dif_additional_options     additional_options,
-                                                              dif_additional_src_options additional_src_options) noexcept
-    {
-        auto dsc  = descriptor();
-        auto view = views::dif_strip_descriptor(dsc);
-
-        view.operation()           = static_cast<operation_t>(operation::dif_strip);
-        view.source_address()      = reinterpret_cast<address_t>(src);
-        view.destination_address() = reinterpret_cast<address_t>(dst);
-        view.transfer_size()       = transfer_size;
-        view.flags()               = static_cast<flags_t>(options);
-        view.dif_flags()           = static_cast<dif_flags_t>(additional_options);
-        view.source_dif_flags()    = static_cast<dif_flags_t>(additional_src_options);
-        view.source_ref_tag()      = src_parameters.ref_tag_seed;
-        view.source_app_tag()      = src_parameters.app_tag_seed;
-        view.source_app_tag_mask() = src_parameters.app_tag_mask;
-
-        return dsc;
-    }
-
-    /**
-     * @todo
-     */
-    [[nodiscard]] inline descriptor make_dif_update_descriptor(const byte_t              *src,
-                                                               byte_t                    *dst,
-                                                               transfer_size_t            transfer_size,
-                                                               dif_parameters             src_parameters,
-                                                               dif_parameters             dst_parameters,
-                                                               dif_update_options         options,
-                                                               dif_additional_options     additional_options,
-                                                               dif_additional_src_options additional_src_options,
-                                                               dif_additional_dst_options additional_dst_options) noexcept
-    {
-        auto dsc  = descriptor();
-        auto view = views::dif_update_descriptor(dsc);
-
-        view.operation()           = static_cast<operation_t>(operation::dif_update);
-        view.source_address()      = reinterpret_cast<address_t>(src);
-        view.destination_address() = reinterpret_cast<address_t>(dst);
-        view.transfer_size()       = transfer_size;
-        view.flags()               = static_cast<flags_t>(options);
-        view.dif_flags()           = static_cast<dif_flags_t>(additional_options);
-
-        view.source_dif_flags()    = static_cast<dif_flags_t>(additional_src_options);
-        view.source_ref_tag()      = src_parameters.ref_tag_seed;
-        view.source_app_tag()      = src_parameters.app_tag_seed;
-        view.source_app_tag_mask() = src_parameters.app_tag_mask;
-
-        view.destination_dif_flags()    = static_cast<dif_flags_t>(additional_dst_options);
-        view.destination_ref_tag()      = dst_parameters.ref_tag_seed;
-        view.destination_app_tag()      = dst_parameters.app_tag_seed;
-        view.destination_app_tag_mask() = dst_parameters.app_tag_mask;
-
-        return dsc;
-    }
-
-    /**
-     * @todo
-     */
-    [[nodiscard]] inline descriptor make_batch_descriptor(const descriptor *const src,
-                                                          const transfer_size_t   length,
-                                                          const batch_options     options) noexcept
-    {
-        auto dsc  = descriptor();
-        auto view = views::batch_descriptor(dsc);
-
-        view.operation()               = static_cast<operation_t>(operation::batch);
-        view.descriptor_list_address() = reinterpret_cast<address_t>(src);
-        view.descriptors_count()       = length;
-        view.flags()                   = static_cast<flags_t>(options);
-
-        return dsc;
-    }
-}  // namespace dml::ml
-
-#endif  //DML_ML_OPERATION_HPP
diff --git a/include/dml/cpp/middle_layer/option_types.hpp b/include/dml/cpp/middle_layer/option_types.hpp
deleted file mode 100644
index e58e910..0000000
--- a/include/dml/cpp/middle_layer/option_types.hpp
+++ /dev/null
@@ -1,590 +0,0 @@
-/*
- * Copyright 2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-#ifndef DML_COMMON_OPTION_TYPES_HPP
-#define DML_COMMON_OPTION_TYPES_HPP
-
-#include <cstdint>
-#include <type_traits>
-
-#include "types.hpp"
-#include "values.hpp"
-
-namespace dml::ml
-{
-    /**
-     * @todo
-     */
-    template <typename underlying_type_t>
-    class options_interface
-    {
-    public:
-        /**
-         * @todo
-         */
-        explicit constexpr options_interface(underlying_type_t flags = 0u) noexcept: flags_(flags)
-        {
-        }
-
-        /**
-         * @todo
-         */
-        constexpr explicit operator underlying_type_t() const noexcept
-        {
-            return flags_;
-        }
-
-        /**
-         * @todo
-         */
-        constexpr bool operator==(const options_interface &rhs) const
-        {
-            return flags_ == rhs.flags_;
-        }
-
-        /**
-         * @todo
-         */
-        constexpr bool operator!=(const options_interface &rhs) const
-        {
-            return rhs != *this;
-        }
-
-        /**
-         * @todo
-         */
-        [[nodiscard]] constexpr bool contains(const options_interface &rhs) const noexcept
-        {
-            return (this->flags_ & rhs.flags_) == rhs.flags_;
-        }
-
-        /**
-         * @todo
-         */
-
-    protected:
-        underlying_type_t flags_; /**< @todo */
-    };
-
-    /**
-     * @todo
-     */
-    class options: public options_interface<flags_t>
-    {
-    public:
-        using options_interface<flags_t>::options_interface;
-
-        constexpr options(flag value): options_interface<flags_t>(static_cast<std::underlying_type_t<flag>>(value))
-        {
-        }
-    };
-
-    /**
-     * @todo
-     */
-    class nop_options: public options
-    {
-    public:
-        using options::options;
-
-        /**
-         * @todo
-         */
-        constexpr nop_options operator|(const nop_options &rhs) const noexcept
-        {
-            return nop_options(this->flags_ | rhs.flags_);
-        }
-    };
-
-    /**
-     * @todo
-     */
-    class drain_options: public options
-    {
-    public:
-        using options::options;
-
-        /**
-         * @todo
-         */
-        constexpr drain_options operator|(const drain_options &rhs) const noexcept
-        {
-            return drain_options(this->flags_ | rhs.flags_);
-        }
-    };
-
-    /**
-     * @todo
-     */
-    class mem_move_options: public options
-    {
-    public:
-        using options::options;
-
-        /**
-         * @todo
-         */
-        constexpr mem_move_options operator|(const mem_move_options &rhs) const noexcept
-        {
-            return mem_move_options(this->flags_ | rhs.flags_);
-        }
-    };
-
-    /**
-     * @todo
-     */
-    class fill_options: public options
-    {
-    public:
-        using options::options;
-
-        /**
-         * @todo
-         */
-        constexpr fill_options operator|(const fill_options &rhs) const noexcept
-        {
-            return fill_options(this->flags_ | rhs.flags_);
-        }
-    };
-
-    /**
-     * @todo
-     */
-    class dualcast_options: public options
-    {
-    public:
-        using options::options;
-
-        /**
-         * @todo
-         */
-        constexpr dualcast_options operator|(const dualcast_options &rhs) const noexcept
-        {
-            return dualcast_options(this->flags_ | rhs.flags_);
-        }
-    };
-
-    /**
-     * @todo
-     */
-    class compare_options: public options
-    {
-    public:
-        using options::options;
-
-        /**
-         * @todo
-         */
-        constexpr compare_options operator|(const compare_options &rhs) const noexcept
-        {
-            return compare_options(this->flags_ | rhs.flags_);
-        }
-    };
-
-    /**
-     * @todo
-     */
-    class compare_pattern_options: public options
-    {
-    public:
-        using options::options;
-
-        /**
-         * @todo
-         */
-        constexpr compare_pattern_options operator|(const compare_pattern_options &rhs) const noexcept
-        {
-            return compare_pattern_options(this->flags_ | rhs.flags_);
-        }
-    };
-
-    /**
-* @todo
-*/
-    class crc_options: public options
-    {
-    public:
-        using options::options;
-
-        /**
-         * @todo
-         */
-        constexpr crc_options operator|(const crc_options &rhs) const noexcept
-        {
-            return crc_options(this->flags_ | rhs.flags_);
-        }
-    };
-
-    /**
-     * @todo
-     */
-    class copy_crc_options: public options
-    {
-    public:
-        using options::options;
-
-        /**
-         * @todo
-         */
-        constexpr copy_crc_options operator|(const copy_crc_options &rhs) const noexcept
-        {
-            return copy_crc_options(this->flags_ | rhs.flags_);
-        }
-    };
-
-    /**
-     * @todo
-     */
-    class create_delta_options: public options
-    {
-    public:
-        using options::options;
-
-        /**
-         * @todo
-         */
-        constexpr create_delta_options operator|(const create_delta_options &rhs) const noexcept
-        {
-            return create_delta_options(this->flags_ | rhs.flags_);
-        }
-    };
-
-    /**
-     * @todo
-     */
-    class apply_delta_options: public options
-    {
-    public:
-        using options::options;
-
-        /**
-         * @todo
-         */
-        constexpr apply_delta_options operator|(const apply_delta_options &rhs) const noexcept
-        {
-            return apply_delta_options(this->flags_ | rhs.flags_);
-        }
-    };
-
-    /**
-     * @todo
-     */
-    class cache_flush_options: public options
-    {
-    public:
-        using options::options;
-
-        /**
-         * @todo
-         */
-        constexpr cache_flush_options operator|(const cache_flush_options &rhs) const noexcept
-        {
-            return cache_flush_options(this->flags_ | rhs.flags_);
-        }
-    };
-
-    /**
-    * @todo
-    */
-    class dif_check_options: public options
-    {
-    public:
-        using options::options;
-
-        /**
-         * @todo
-         */
-        constexpr dif_check_options operator|(const dif_check_options &rhs) const noexcept
-        {
-            return dif_check_options(this->flags_ | rhs.flags_);
-        }
-    };
-
-    /**
-    * @todo
-    */
-    class dif_insert_options: public options
-    {
-    public:
-        using options::options;
-
-        /**
-         * @todo
-         */
-        constexpr dif_insert_options operator|(const dif_insert_options &rhs) const noexcept
-        {
-            return dif_insert_options(this->flags_ | rhs.flags_);
-        }
-    };
-
-    /**
-    * @todo
-    */
-    class dif_strip_options: public options
-    {
-    public:
-        using options::options;
-
-        /**
-         * @todo
-         */
-        constexpr dif_strip_options operator|(const dif_strip_options &rhs) const noexcept
-        {
-            return dif_strip_options(this->flags_ | rhs.flags_);
-        }
-    };
-
-    /**
-    * @todo
-    */
-    class dif_update_options: public options
-    {
-    public:
-        using options::options;
-
-        /**
-         * @todo
-         */
-        constexpr dif_update_options operator|(const dif_update_options &rhs) const noexcept
-        {
-            return dif_update_options(this->flags_ | rhs.flags_);
-        }
-    };
-
-    /**
-     * @todo
-     */
-    class batch_options: public options
-    {
-    public:
-        using options::options;
-
-        /**
-         * @todo
-         */
-        constexpr batch_options operator|(const batch_options &rhs) const noexcept
-        {
-            return batch_options(this->flags_ | rhs.flags_);
-        }
-    };
-
-    /**
-     * @todo
-     */
-    class additional_options: public options_interface<operation_specific_flags_t>
-    {
-    public:
-        using options_interface<operation_specific_flags_t>::options_interface;
-    };
-
-    /**
-     * @todo
-     */
-    class compare_expected_result_options: public options_interface<result_t>
-    {
-    public:
-        using options_interface<result_t>::options_interface;
-
-        /**
-         * @todo
-         */
-        constexpr compare_expected_result_options operator|(const compare_expected_result_options &rhs) const noexcept
-        {
-            return compare_expected_result_options(this->flags_ | rhs.flags_);
-        }
-    };
-
-    /**
-     * @todo
-     */
-    class delta_expected_result_options: public options_interface<result_t>
-    {
-    public:
-        using options_interface<result_t>::options_interface;
-
-        /**
-         * @todo
-         */
-        constexpr delta_expected_result_options operator|(const delta_expected_result_options &rhs) const noexcept
-        {
-            return delta_expected_result_options(this->flags_ | rhs.flags_);
-        }
-    };
-
-    /**
-     * @todo
-     */
-    class drain_additional_options: public additional_options
-    {
-    public:
-        using additional_options::additional_options;
-
-        constexpr drain_additional_options() = default;
-
-        constexpr drain_additional_options(drain_flag value): additional_options(static_cast<std::underlying_type_t<flag>>(value))
-        {
-        }
-
-        /**
-         * @todo
-         */
-        constexpr drain_additional_options operator|(const drain_additional_options &rhs) const noexcept
-        {
-            return drain_additional_options(this->flags_ | rhs.flags_);
-        }
-    };
-
-    /**
-     * @todo
-     */
-    class crc_additional_options: public additional_options
-    {
-    public:
-        using additional_options::additional_options;
-
-        constexpr crc_additional_options() = default;
-
-        constexpr crc_additional_options(crc_flag value): additional_options(static_cast<std::underlying_type_t<flag>>(value))
-        {
-        }
-
-        /**
-         * @todo
-         */
-        constexpr crc_additional_options operator|(const crc_additional_options &rhs) const noexcept
-        {
-            return crc_additional_options(this->flags_ | rhs.flags_);
-        }
-    };
-
-    /**
-     * @todo
-     */
-    class copy_crc_additional_options: public additional_options
-    {
-    public:
-        using additional_options::additional_options;
-
-        constexpr copy_crc_additional_options() = default;
-
-        constexpr copy_crc_additional_options(crc_flag value): additional_options(static_cast<std::underlying_type_t<flag>>(value))
-        {
-        }
-
-        /**
-         * @todo
-         */
-        constexpr copy_crc_additional_options operator|(const copy_crc_additional_options &rhs) const noexcept
-        {
-            return copy_crc_additional_options(this->flags_ | rhs.flags_);
-        }
-    };
-
-    /**
-     * @todo
-     */
-    class dualcast_additional_options: public additional_options
-    {
-    public:
-        using additional_options::additional_options;
-
-        constexpr dualcast_additional_options() = default;
-
-        constexpr dualcast_additional_options(dualcast_flag value): additional_options(static_cast<std::underlying_type_t<flag>>(value))
-        {
-        }
-
-        /**
-         * @todo
-         */
-        constexpr dualcast_additional_options operator|(const dualcast_additional_options &rhs) const noexcept
-        {
-            return dualcast_additional_options(this->flags_ | rhs.flags_);
-        }
-    };
-
-    /**
-     * @todo
-     */
-    class dif_additional_options: public additional_options
-    {
-    public:
-        using additional_options::additional_options;
-
-        constexpr dif_additional_options() = default;
-
-        constexpr dif_additional_options(dif_flag value): additional_options(static_cast<std::underlying_type_t<flag>>(value))
-        {
-        }
-
-        /**
-         * @todo
-         */
-        constexpr dif_additional_options operator|(const dif_additional_options &rhs) const noexcept
-        {
-            return dif_additional_options(this->flags_ | rhs.flags_);
-        }
-    };
-
-    /**
-     * @todo
-     */
-    class dif_additional_src_options: public additional_options
-    {
-    public:
-        using additional_options::additional_options;
-
-        constexpr dif_additional_src_options() = default;
-
-        constexpr dif_additional_src_options(dif_src_flag value): additional_options(static_cast<std::underlying_type_t<flag>>(value))
-        {
-        }
-
-        /**
-         * @todo
-         */
-        constexpr dif_additional_src_options operator|(const dif_additional_src_options &rhs) const noexcept
-        {
-            return dif_additional_src_options(this->flags_ | rhs.flags_);
-        }
-    };
-
-    /**
-     * @todo
-     */
-    class dif_additional_dst_options: public additional_options
-    {
-    public:
-        using additional_options::additional_options;
-
-        constexpr dif_additional_dst_options() = default;
-
-        constexpr dif_additional_dst_options(dif_dst_flag value): additional_options(static_cast<std::underlying_type_t<flag>>(value))
-        {
-        }
-
-        /**
-         * @todo
-         */
-        constexpr dif_additional_dst_options operator|(const dif_additional_dst_options &rhs) const noexcept
-        {
-            return dif_additional_dst_options(this->flags_ | rhs.flags_);
-        }
-    };
-
-}  // namespace dml::ml
-
-#endif  //DML_COMMON_OPTION_TYPES_HPP
diff --git a/include/dml/cpp/middle_layer/options.hpp b/include/dml/cpp/middle_layer/options.hpp
deleted file mode 100644
index 9bb3ca4..0000000
--- a/include/dml/cpp/middle_layer/options.hpp
+++ /dev/null
@@ -1,416 +0,0 @@
-/*
- * Copyright 2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-#ifndef DML_COMMON_OPTIONS_HPP
-#define DML_COMMON_OPTIONS_HPP
-
-#include "option_types.hpp"
-#include "values.hpp"
-
-namespace dml::ml
-{
-    /**
-     * @todo
-     */
-    struct nop_option
-    {
-        using type = nop_options;
-
-        static constexpr type none  = type();
-        static constexpr type fence = flag::fence;
-    };
-
-    /**
-     * @todo
-     */
-    struct drain_option
-    {
-        using type = drain_options;
-
-        static constexpr type none                  = type();
-        static constexpr type address_1_tc_selector = flag::address_1_tc_selector;
-        static constexpr type address_2_tc_selector = flag::address_2_tc_selector;
-    };
-
-    /**
-     * @todo
-     */
-    struct mem_move_option
-    {
-        using type = mem_move_options;
-
-        static constexpr type none                              = type();
-        static constexpr type fence                             = flag::fence;
-        static constexpr type block_on_fault                    = flag::block_on_fault;
-        static constexpr type cache_control                     = flag::cache_control;
-        static constexpr type address_1_tc_selector             = flag::address_1_tc_selector;
-        static constexpr type address_2_tc_selector             = flag::address_2_tc_selector;
-        static constexpr type strict_ordering                   = flag::strict_ordering;
-        static constexpr type destination_readback              = flag::destination_readback;
-        static constexpr type destination_steering_tag_selector = flag::destination_steering_tag_selector;
-    };
-
-    /**
-     * @todo
-     */
-    struct fill_option
-    {
-        using type = fill_options;
-
-        static constexpr type none                              = type();
-        static constexpr type fence                             = flag::fence;
-        static constexpr type block_on_fault                    = flag::block_on_fault;
-        static constexpr type cache_control                     = flag::cache_control;
-        static constexpr type address_2_tc_selector             = flag::address_2_tc_selector;
-        static constexpr type strict_ordering                   = flag::strict_ordering;
-        static constexpr type destination_readback              = flag::destination_readback;
-        static constexpr type destination_steering_tag_selector = flag::destination_steering_tag_selector;
-    };
-
-    /**
-     * @todo
-     */
-    struct dualcast_option
-    {
-        using type = dualcast_options;
-
-        static constexpr type none                              = type();
-        static constexpr type fence                             = flag::fence;
-        static constexpr type block_on_fault                    = flag::block_on_fault;
-        static constexpr type cache_control                     = flag::cache_control;
-        static constexpr type address_1_tc_selector             = flag::address_1_tc_selector;
-        static constexpr type address_2_tc_selector             = flag::address_2_tc_selector;
-        static constexpr type address_3_tc_selector             = flag::address_3_tc_selector;
-        static constexpr type strict_ordering                   = flag::strict_ordering;
-        static constexpr type destination_readback              = flag::destination_readback;
-        static constexpr type destination_steering_tag_selector = flag::destination_steering_tag_selector;
-    };
-
-    /**
-     * @todo
-     */
-    struct compare_option
-    {
-        using type = compare_options;
-
-        static constexpr type none                  = type();
-        static constexpr type fence                 = flag::fence;
-        static constexpr type block_on_fault        = flag::block_on_fault;
-        static constexpr type check_result          = flag::check_result;
-        static constexpr type address_1_tc_selector = flag::address_1_tc_selector;
-        static constexpr type address_2_tc_selector = flag::address_2_tc_selector;
-    };
-
-    /**
-     * @todo
-     */
-    struct compare_pattern_option
-    {
-        using type = compare_pattern_options;
-
-        static constexpr type none                  = type();
-        static constexpr type fence                 = flag::fence;
-        static constexpr type block_on_fault        = flag::block_on_fault;
-        static constexpr type check_result          = flag::check_result;
-        static constexpr type address_1_tc_selector = flag::address_1_tc_selector;
-    };
-
-    /**
-     * @todo
-     */
-    struct crc_option
-    {
-        using type = crc_options;
-
-        static constexpr type none                  = type();
-        static constexpr type fence                 = flag::fence;
-        static constexpr type block_on_fault        = flag::block_on_fault;
-        static constexpr type address_1_tc_selector = flag::address_1_tc_selector;
-        static constexpr type address_3_tc_selector = flag::address_3_tc_selector;
-    };
-
-    /**
-     * @todo
-     */
-    struct copy_crc_option
-    {
-        using type = copy_crc_options;
-
-        static constexpr type none                              = type();
-        static constexpr type fence                             = flag::fence;
-        static constexpr type block_on_fault                    = flag::block_on_fault;
-        static constexpr type cache_control                     = flag::cache_control;
-        static constexpr type address_1_tc_selector             = flag::address_1_tc_selector;
-        static constexpr type address_2_tc_selector             = flag::address_2_tc_selector;
-        static constexpr type address_3_tc_selector             = flag::address_3_tc_selector;
-        static constexpr type strict_ordering                   = flag::strict_ordering;
-        static constexpr type destination_readback              = flag::destination_readback;
-        static constexpr type destination_steering_tag_selector = flag::destination_steering_tag_selector;
-    };
-
-    /**
-     * @todo
-     */
-    struct create_delta_option
-    {
-        using type = create_delta_options;
-
-        static constexpr type none                              = type();
-        static constexpr type fence                             = flag::fence;
-        static constexpr type block_on_fault                    = flag::block_on_fault;
-        static constexpr type check_result                      = flag::check_result;
-        static constexpr type cache_control                     = flag::cache_control;
-        static constexpr type address_1_tc_selector             = flag::address_1_tc_selector;
-        static constexpr type address_2_tc_selector             = flag::address_2_tc_selector;
-        static constexpr type address_3_tc_selector             = flag::address_3_tc_selector;
-        static constexpr type strict_ordering                   = flag::strict_ordering;
-        static constexpr type destination_readback              = flag::destination_readback;
-        static constexpr type destination_steering_tag_selector = flag::destination_steering_tag_selector;
-    };
-
-    /**
-     * @todo
-     */
-    struct apply_delta_option
-    {
-        using type = apply_delta_options;
-
-        static constexpr type none                              = type();
-        static constexpr type fence                             = flag::fence;
-        static constexpr type block_on_fault                    = flag::block_on_fault;
-        static constexpr type cache_control                     = flag::cache_control;
-        static constexpr type address_1_tc_selector             = flag::address_1_tc_selector;
-        static constexpr type address_2_tc_selector             = flag::address_2_tc_selector;
-        static constexpr type strict_ordering                   = flag::strict_ordering;
-        static constexpr type destination_readback              = flag::destination_readback;
-        static constexpr type destination_steering_tag_selector = flag::destination_steering_tag_selector;
-    };
-
-    /**
-     * @todo
-     */
-    struct cache_flush_option
-    {
-        using type = cache_flush_options;
-
-        static constexpr type none                              = type();
-        static constexpr type fence                             = flag::fence;
-        static constexpr type block_on_fault                    = flag::block_on_fault;
-        static constexpr type cache_control                     = flag::cache_control;
-        static constexpr type address_2_tc_selector             = flag::address_2_tc_selector;
-        static constexpr type strict_ordering                   = flag::strict_ordering;
-        static constexpr type destination_readback              = flag::destination_readback;
-        static constexpr type destination_steering_tag_selector = flag::destination_steering_tag_selector;
-    };
-
-    /**
-     * @todo
-     */
-    struct batch_option
-    {
-        using type = batch_options;
-
-        static constexpr type none                  = type();
-        static constexpr type address_1_tc_selector = flag::address_1_tc_selector;
-    };
-
-    /**
-     * @todo
-     */
-    struct dif_check_option
-    {
-        using type = dif_check_options;
-
-        static constexpr type none                  = type(0);                     /**< @todo */
-        static constexpr type fence                 = flag::fence;                 /**< @todo */
-        static constexpr type block_on_fault        = flag::block_on_fault;        /**< @todo */
-        static constexpr type address_1_tc_selector = flag::address_1_tc_selector; /**< @todo */
-    };
-
-    /**
-     * @todo
-     */
-    struct dif_insert_option
-    {
-        using type = dif_insert_options;
-
-        static constexpr type none                              = type(0);                                 /**< @todo */
-        static constexpr type fence                             = flag::fence;                             /**< @todo */
-        static constexpr type block_on_fault                    = flag::block_on_fault;                    /**< @todo */
-        static constexpr type cache_control                     = flag::cache_control;                     /**< @todo */
-        static constexpr type address_1_tc_selector             = flag::address_1_tc_selector;             /**< @todo */
-        static constexpr type address_2_tc_selector             = flag::address_2_tc_selector;             /**< @todo */
-        static constexpr type strict_ordering                   = flag::strict_ordering;                   /**< @todo */
-        static constexpr type destination_readback              = flag::destination_readback;              /**< @todo */
-        static constexpr type destination_steering_tag_selector = flag::destination_steering_tag_selector; /**< @todo */
-    };
-
-    /**
-     * @todo
-     */
-    struct dif_strip_option
-    {
-        using type = dif_strip_options;
-
-        static constexpr type none                              = type(0);                                 /**< @todo */
-        static constexpr type fence                             = flag::fence;                             /**< @todo */
-        static constexpr type block_on_fault                    = flag::block_on_fault;                    /**< @todo */
-        static constexpr type cache_control                     = flag::cache_control;                     /**< @todo */
-        static constexpr type address_1_tc_selector             = flag::address_1_tc_selector;             /**< @todo */
-        static constexpr type address_2_tc_selector             = flag::address_2_tc_selector;             /**< @todo */
-        static constexpr type strict_ordering                   = flag::strict_ordering;                   /**< @todo */
-        static constexpr type destination_readback              = flag::destination_readback;              /**< @todo */
-        static constexpr type destination_steering_tag_selector = flag::destination_steering_tag_selector; /**< @todo */
-    };
-
-    /**
-     * @todo
-     */
-    struct dif_update_option
-    {
-        using type = dif_update_options;
-
-        static constexpr type none                              = type(0);                                 /**< @todo */
-        static constexpr type fence                             = flag::fence;                             /**< @todo */
-        static constexpr type block_on_fault                    = flag::block_on_fault;                    /**< @todo */
-        static constexpr type cache_control                     = flag::cache_control;                     /**< @todo */
-        static constexpr type address_1_tc_selector             = flag::address_1_tc_selector;             /**< @todo */
-        static constexpr type address_2_tc_selector             = flag::address_2_tc_selector;             /**< @todo */
-        static constexpr type strict_ordering                   = flag::strict_ordering;                   /**< @todo */
-        static constexpr type destination_readback              = flag::destination_readback;              /**< @todo */
-        static constexpr type destination_steering_tag_selector = flag::destination_steering_tag_selector; /**< @todo */
-    };
-
-    /**
-     * @todo
-     */
-    struct compare_expected_result_option
-    {
-        static constexpr auto expect_equal     = compare_expected_result_options(0);
-        static constexpr auto expect_not_equal = compare_expected_result_options(1);
-    };
-
-    /**
-     * @todo
-     */
-    struct delta_expected_result_option
-    {
-        static constexpr auto expect_equal     = delta_expected_result_options(1);
-        static constexpr auto expect_not_equal = delta_expected_result_options(2);
-        static constexpr auto expect_overflow  = delta_expected_result_options(4);
-    };
-
-    /**
-     * @todo
-     */
-    struct drain_additional_option
-    {
-        using type = drain_additional_options;
-
-        static constexpr type none                            = type();
-        static constexpr type readback_address_1_valid        = drain_flag::readback_address_1_valid;
-        static constexpr type readback_address_2_valid        = drain_flag::readback_address_2_valid;
-        static constexpr type suppress_tc_a_implicit_readback = drain_flag::suppress_tc_a_implicit_readback;
-        static constexpr type suppress_tc_b_implicit_readback = drain_flag::suppress_tc_b_implicit_readback;
-    };
-
-    /**
-     * @todo
-     */
-    struct crc_additional_option
-    {
-        using type = crc_additional_options;
-
-        static constexpr type none                   = type();
-        static constexpr type read_crc_seed          = crc_flag::read_crc_seed;
-        static constexpr type bypass_reflection      = crc_flag::bypass_crc_inversion_and_reflection;
-        static constexpr type bypass_data_reflection = crc_flag::bypass_data_reflection;
-    };
-
-    /**
-     * @todo
-     */
-    struct copy_crc_additional_option
-    {
-        using type = copy_crc_additional_options;
-
-        static constexpr type none                   = type();
-        static constexpr type read_crc_seed          = crc_flag::read_crc_seed;
-        static constexpr type bypass_reflection      = crc_flag::bypass_crc_inversion_and_reflection;
-        static constexpr type bypass_data_reflection = crc_flag::bypass_data_reflection;
-    };
-
-    /**
-     * @todo
-     */
-    struct dualcast_additional_option
-    {
-        using type = dualcast_additional_options;
-
-        static constexpr type none                                = type();
-        static constexpr type destination_2_steering_tag_selector = dualcast_flag::destination_2_steering_tag_selector;
-    };
-
-    /**
-     * @todo
-     */
-    struct dif_additional_option
-    {
-        using type = dif_additional_options;
-
-        static constexpr type block_size_512    = dif_flag::block_size_512;    /**< @todo */
-        static constexpr type block_size_520    = dif_flag::block_size_520;    /**< @todo */
-        static constexpr type block_size_4096   = dif_flag::block_size_4096;   /**< @todo */
-        static constexpr type block_size_4104   = dif_flag::block_size_4104;   /**< @todo */
-        static constexpr type invert_crc_seed   = dif_flag::invert_crc_seed;   /**< @todo */
-        static constexpr type invert_crc_result = dif_flag::invert_crc_result; /**< @todo */
-    };
-
-    /**
-     * @todo
-     */
-    struct dif_additional_src_option
-    {
-        using type = dif_additional_src_options;
-
-        static constexpr type none                      = type(0);                                 /**< @todo */
-        static constexpr type enable_all_f_detect_error = dif_src_flag::enable_all_f_detect_error; /**< @todo */
-        static constexpr type all_f_detect              = dif_src_flag::all_f_detect;              /**< @todo */
-        static constexpr type app_tag_f_detect          = dif_src_flag::app_tag_f_detect;          /**< @todo */
-        static constexpr type app_and_ref_tag_f_detect  = dif_src_flag::app_and_ref_tag_f_detect;  /**< @todo */
-        static constexpr type incrementing_app_tag_type = dif_src_flag::incrementing_app_tag_type; /**< @todo */
-        static constexpr type guard_check_disable       = dif_src_flag::guard_check_disable;       /**< @todo */
-        static constexpr type ref_tag_check_disable     = dif_src_flag::ref_tag_check_disable;     /**< @todo */
-        static constexpr type fixed_ref_tag_type        = dif_src_flag::fixed_ref_tag_type;        /**< @todo */
-    };
-
-    /**
-     * @todo
-     */
-    struct dif_additional_dst_option
-    {
-        using type = dif_additional_dst_options;
-
-        static constexpr type none                      = type(0);                                 /**< @todo */
-        static constexpr type app_tag_pass_through      = dif_dst_flag::app_tag_pass_through;      /**< @todo */
-        static constexpr type incrementing_app_tag_type = dif_dst_flag::incrementing_app_tag_type; /**< @todo */
-        static constexpr type guard_field_pass_through  = dif_dst_flag::guard_field_pass_through;  /**< @todo */
-        static constexpr type ref_tag_pass_through      = dif_dst_flag::ref_tag_pass_through;      /**< @todo */
-        static constexpr type fixed_ref_tag_type        = dif_dst_flag::fixed_ref_tag_type;        /**< @todo */
-    };
-}  // namespace dml::ml
-
-#endif  //DML_COMMON_OPTIONS_HPP
diff --git a/include/dml/cpp/middle_layer/values.hpp b/include/dml/cpp/middle_layer/values.hpp
deleted file mode 100644
index de17930..0000000
--- a/include/dml/cpp/middle_layer/values.hpp
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright 2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-#ifndef DML_ML_VALUES_HPP
-#define DML_ML_VALUES_HPP
-
-#include "types.hpp"
-
-namespace dml::ml
-{
-    enum class operation : operation_t
-    {
-        nop             = 0x00,
-        batch           = 0x01,
-        drain           = 0x02,
-        memory_move     = 0x03,
-        fill            = 0x04,
-        compare         = 0x05,
-        compare_pattern = 0x06,
-        create_delta    = 0x07,
-        apply_delta     = 0x08,
-        dualcast        = 0x09,
-        crc             = 0x10,
-        copy_crc        = 0x11,
-        dif_check       = 0x12,
-        dif_insert      = 0x13,
-        dif_strip       = 0x14,
-        dif_update      = 0x15,
-        cache_flush     = 0x20
-    };
-
-    enum class flag : flags_t
-    {
-        fence                                   = 0b0000000000000001,
-        block_on_fault                          = 0b0000000000000010,
-        completion_record_address_valid         = 0b0000000000000100,
-        request_completion_record               = 0b0000000000001000,
-        request_completion_interrupt            = 0b0000000000010000,
-        completion_record_steering_tag_selector = 0b0000000000100000,
-        check_result                            = 0b0000000010000000,
-        cache_control                           = 0b0000000100000000,
-        address_1_tc_selector                   = 0b0000001000000000,
-        address_2_tc_selector                   = 0b0000010000000000,
-        address_3_tc_selector                   = 0b0000100000000000,
-        completion_record_tc_selector           = 0b0001000000000000,
-        strict_ordering                         = 0b0010000000000000,
-        destination_readback                    = 0b0100000000000000,
-        destination_steering_tag_selector       = 0b1000000000000000
-    };
-
-    enum class drain_flag : operation_specific_flags_t
-    {
-        readback_address_1_valid        = 0b0001,
-        readback_address_2_valid        = 0b0010,
-        suppress_tc_a_implicit_readback = 0b0100,
-        suppress_tc_b_implicit_readback = 0b1000
-    };
-
-    enum class dualcast_flag : operation_specific_flags_t
-    {
-        destination_2_steering_tag_selector = 0b1
-    };
-
-    enum class crc_flag : operation_specific_flags_t
-    {
-        read_crc_seed                       = 0b001,
-        bypass_crc_inversion_and_reflection = 0b010,
-        bypass_data_reflection              = 0b100,
-    };
-
-    enum class dif_flag : operation_specific_flags_t
-    {
-        block_size_512    = 0b0000,
-        block_size_520    = 0b0001,
-        block_size_4096   = 0b0010,
-        block_size_4104   = 0b0011,
-        invert_crc_seed   = 0b0100,
-        invert_crc_result = 0b1000,
-    };
-
-    enum class dif_src_flag : operation_specific_flags_t
-    {
-        enable_all_f_detect_error = 0b00000001,
-        all_f_detect              = 0b00000010,
-        app_tag_f_detect          = 0b00000100,
-        app_and_ref_tag_f_detect  = 0b00001000,
-        incrementing_app_tag_type = 0b00010000,
-        guard_check_disable       = 0b00100000,
-        ref_tag_check_disable     = 0b01000000,
-        fixed_ref_tag_type        = 0b10000000
-    };
-
-    enum class dif_dst_flag : operation_specific_flags_t
-    {
-        app_tag_pass_through      = 0b00001000,
-        incrementing_app_tag_type = 0b00010000,
-        guard_field_pass_through  = 0b00100000,
-        ref_tag_pass_through      = 0b01000000,
-        fixed_ref_tag_type        = 0b10000000
-    };
-
-    enum class dif_status : dif_status_t
-    {
-        guard_mismatch     = 0x01,
-        app_tag_mismatch   = 0x02,
-        ref_tag_mismatch   = 0x04,
-        all_f_detect_error = 0x08
-    };
-}  // namespace dml::ml
-
-#endif  //DML_ML_VALUES_HPP
diff --git a/include/dml/detail/common/flags.hpp b/include/dml/detail/common/flags.hpp
new file mode 100644
index 0000000..c8b707b
--- /dev/null
+++ b/include/dml/detail/common/flags.hpp
@@ -0,0 +1,216 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#ifndef DML_COMMON_FLAGS_HPP
+#define DML_COMMON_FLAGS_HPP
+
+#include <dml/detail/common/types.hpp>
+#include <dml/detail/common/utils/enum.hpp>
+#include <type_traits>
+
+namespace dml::detail
+{
+    enum class flag : flags_t
+    {
+        fence                                   = 0b0000000000000001,
+        block_on_fault                          = 0b0000000000000010,
+        completion_record_address_valid         = 0b0000000000000100,
+        request_completion_record               = 0b0000000000001000,
+        request_completion_interrupt            = 0b0000000000010000,
+        completion_record_steering_tag_selector = 0b0000000000100000,
+        check_result                            = 0b0000000010000000,
+        cache_control                           = 0b0000000100000000,
+        address_1_tc_selector                   = 0b0000001000000000,
+        address_2_tc_selector                   = 0b0000010000000000,
+        address_3_tc_selector                   = 0b0000100000000000,
+        completion_record_tc_selector           = 0b0001000000000000,
+        strict_ordering                         = 0b0010000000000000,
+        destination_readback                    = 0b0100000000000000,
+        destination_steering_tag_selector       = 0b1000000000000000
+    };
+
+    enum class nop_flag : std::underlying_type_t<flag>
+    {
+        fence = to_underlying(flag::fence)
+    };
+
+    enum class batch_flag : std::underlying_type_t<flag>
+    {
+        address_1_tc_selector = to_underlying(flag::address_1_tc_selector)
+    };
+
+    enum class drain_flag : std::underlying_type_t<flag>
+    {
+        address_1_tc_selector = to_underlying(flag::address_1_tc_selector),
+        address_2_tc_selector = to_underlying(flag::address_2_tc_selector)
+    };
+
+    enum class mem_move_flag : std::underlying_type_t<flag>
+    {
+        fence                             = to_underlying(flag::fence),
+        block_on_fault                    = to_underlying(flag::block_on_fault),
+        cache_control                     = to_underlying(flag::cache_control),
+        address_1_tc_selector             = to_underlying(flag::address_1_tc_selector),
+        address_2_tc_selector             = to_underlying(flag::address_2_tc_selector),
+        strict_ordering                   = to_underlying(flag::strict_ordering),
+        destination_readback              = to_underlying(flag::destination_readback),
+        destination_steering_tag_selector = to_underlying(flag::destination_steering_tag_selector)
+    };
+
+    enum class fill_flag : std::underlying_type_t<flag>
+    {
+        fence                             = to_underlying(flag::fence),
+        block_on_fault                    = to_underlying(flag::block_on_fault),
+        cache_control                     = to_underlying(flag::cache_control),
+        address_2_tc_selector             = to_underlying(flag::address_2_tc_selector),
+        strict_ordering                   = to_underlying(flag::strict_ordering),
+        destination_readback              = to_underlying(flag::destination_readback),
+        destination_steering_tag_selector = to_underlying(flag::destination_steering_tag_selector)
+    };
+
+    enum class compare_flag : std::underlying_type_t<flag>
+    {
+        fence                 = to_underlying(flag::fence),
+        block_on_fault        = to_underlying(flag::block_on_fault),
+        check_result          = to_underlying(flag::check_result),
+        address_1_tc_selector = to_underlying(flag::address_1_tc_selector),
+        address_2_tc_selector = to_underlying(flag::address_2_tc_selector)
+    };
+
+    enum class compare_pattern_flag : std::underlying_type_t<flag>
+    {
+        fence                 = to_underlying(flag::fence),
+        block_on_fault        = to_underlying(flag::block_on_fault),
+        check_result          = to_underlying(flag::check_result),
+        address_1_tc_selector = to_underlying(flag::address_1_tc_selector)
+    };
+
+    enum class create_delta_flag : std::underlying_type_t<flag>
+    {
+        fence                             = to_underlying(flag::fence),
+        block_on_fault                    = to_underlying(flag::block_on_fault),
+        check_result                      = to_underlying(flag::check_result),
+        cache_control                     = to_underlying(flag::cache_control),
+        address_1_tc_selector             = to_underlying(flag::address_1_tc_selector),
+        address_2_tc_selector             = to_underlying(flag::address_2_tc_selector),
+        address_3_tc_selector             = to_underlying(flag::address_3_tc_selector),
+        strict_ordering                   = to_underlying(flag::strict_ordering),
+        destination_readback              = to_underlying(flag::destination_readback),
+        destination_steering_tag_selector = to_underlying(flag::destination_steering_tag_selector)
+    };
+
+    enum class apply_delta_flag : std::underlying_type_t<flag>
+    {
+        fence                             = to_underlying(flag::fence),
+        block_on_fault                    = to_underlying(flag::block_on_fault),
+        cache_control                     = to_underlying(flag::cache_control),
+        address_1_tc_selector             = to_underlying(flag::address_1_tc_selector),
+        address_2_tc_selector             = to_underlying(flag::address_2_tc_selector),
+        strict_ordering                   = to_underlying(flag::strict_ordering),
+        destination_readback              = to_underlying(flag::destination_readback),
+        destination_steering_tag_selector = to_underlying(flag::destination_steering_tag_selector)
+    };
+
+    enum class dualcast_flag : std::underlying_type_t<flag>
+    {
+        fence                             = to_underlying(flag::fence),
+        block_on_fault                    = to_underlying(flag::block_on_fault),
+        cache_control                     = to_underlying(flag::cache_control),
+        address_1_tc_selector             = to_underlying(flag::address_1_tc_selector),
+        address_2_tc_selector             = to_underlying(flag::address_2_tc_selector),
+        address_3_tc_selector             = to_underlying(flag::address_3_tc_selector),
+        strict_ordering                   = to_underlying(flag::strict_ordering),
+        destination_readback              = to_underlying(flag::destination_readback),
+        destination_steering_tag_selector = to_underlying(flag::destination_steering_tag_selector)
+    };
+
+    enum class crc_flag : std::underlying_type_t<flag>
+    {
+        fence                 = to_underlying(flag::fence),
+        block_on_fault        = to_underlying(flag::block_on_fault),
+        address_1_tc_selector = to_underlying(flag::address_1_tc_selector),
+        address_3_tc_selector = to_underlying(flag::address_3_tc_selector)
+    };
+
+    enum class copy_crc_flag : std::underlying_type_t<flag>
+    {
+        fence                             = to_underlying(flag::fence),
+        block_on_fault                    = to_underlying(flag::block_on_fault),
+        cache_control                     = to_underlying(flag::cache_control),
+        address_1_tc_selector             = to_underlying(flag::address_1_tc_selector),
+        address_2_tc_selector             = to_underlying(flag::address_2_tc_selector),
+        address_3_tc_selector             = to_underlying(flag::address_3_tc_selector),
+        strict_ordering                   = to_underlying(flag::strict_ordering),
+        destination_readback              = to_underlying(flag::destination_readback),
+        destination_steering_tag_selector = to_underlying(flag::destination_steering_tag_selector)
+    };
+
+    enum class dif_check_flag : std::underlying_type_t<flag>
+    {
+        fence                 = to_underlying(flag::fence),
+        block_on_fault        = to_underlying(flag::block_on_fault),
+        address_1_tc_selector = to_underlying(flag::address_1_tc_selector)
+    };
+
+    enum class dif_insert_flag : std::underlying_type_t<flag>
+    {
+        fence                             = to_underlying(flag::fence),
+        block_on_fault                    = to_underlying(flag::block_on_fault),
+        cache_control                     = to_underlying(flag::cache_control),
+        address_1_tc_selector             = to_underlying(flag::address_1_tc_selector),
+        address_2_tc_selector             = to_underlying(flag::address_2_tc_selector),
+        strict_ordering                   = to_underlying(flag::strict_ordering),
+        destination_readback              = to_underlying(flag::destination_readback),
+        destination_steering_tag_selector = to_underlying(flag::destination_steering_tag_selector)
+    };
+
+    enum class dif_strip_flag : std::underlying_type_t<flag>
+    {
+        fence                             = to_underlying(flag::fence),
+        block_on_fault                    = to_underlying(flag::block_on_fault),
+        cache_control                     = to_underlying(flag::cache_control),
+        address_1_tc_selector             = to_underlying(flag::address_1_tc_selector),
+        address_2_tc_selector             = to_underlying(flag::address_2_tc_selector),
+        strict_ordering                   = to_underlying(flag::strict_ordering),
+        destination_readback              = to_underlying(flag::destination_readback),
+        destination_steering_tag_selector = to_underlying(flag::destination_steering_tag_selector)
+    };
+
+    enum class dif_update_flag : std::underlying_type_t<flag>
+    {
+        fence                             = to_underlying(flag::fence),
+        block_on_fault                    = to_underlying(flag::block_on_fault),
+        cache_control                     = to_underlying(flag::cache_control),
+        address_1_tc_selector             = to_underlying(flag::address_1_tc_selector),
+        address_2_tc_selector             = to_underlying(flag::address_2_tc_selector),
+        strict_ordering                   = to_underlying(flag::strict_ordering),
+        destination_readback              = to_underlying(flag::destination_readback),
+        destination_steering_tag_selector = to_underlying(flag::destination_steering_tag_selector)
+    };
+
+    enum class cache_flush_flag : std::underlying_type_t<flag>
+    {
+        fence                             = to_underlying(flag::fence),
+        block_on_fault                    = to_underlying(flag::block_on_fault),
+        cache_control                     = to_underlying(flag::cache_control),
+        address_2_tc_selector             = to_underlying(flag::address_2_tc_selector),
+        strict_ordering                   = to_underlying(flag::strict_ordering),
+        destination_readback              = to_underlying(flag::destination_readback),
+        destination_steering_tag_selector = to_underlying(flag::destination_steering_tag_selector)
+    };
+}  // namespace dml::detail
+
+#endif  //DML_COMMON_FLAGS_HPP
diff --git a/include/dml/detail/common/specific_flags.hpp b/include/dml/detail/common/specific_flags.hpp
new file mode 100644
index 0000000..aa13862
--- /dev/null
+++ b/include/dml/detail/common/specific_flags.hpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#ifndef DML_COMMON_SPECIFIC_FLAGS_HPP
+#define DML_COMMON_SPECIFIC_FLAGS_HPP
+
+#include <dml/detail/common/types.hpp>
+#include <type_traits>
+
+namespace dml::detail
+{
+    enum class compare_result : result_t
+    {
+        equal     = 0,
+        not_equal = 1
+    };
+
+    enum class create_delta_result : result_t
+    {
+        equal     = 1,
+        not_equal = 2,
+        overflow  = 4
+    };
+
+    enum class drain_specific_flag : operation_specific_flags_t
+    {
+        readback_address_1_valid        = 0b0001,
+        readback_address_2_valid        = 0b0010,
+        suppress_tc_a_implicit_readback = 0b0100,
+        suppress_tc_b_implicit_readback = 0b1000
+    };
+
+    enum class dualcast_specific_flag : operation_specific_flags_t
+    {
+        destination_2_steering_tag_selector = 0b1
+    };
+
+    enum class crc_specific_flag : operation_specific_flags_t
+    {
+        read_crc_seed                       = 0b001,
+        bypass_crc_inversion_and_reflection = 0b010,
+        bypass_data_reflection              = 0b100
+    };
+
+    enum class dif_specific_flag : operation_specific_flags_t
+    {
+        block_size_512    = 0b0000,
+        block_size_520    = 0b0001,
+        block_size_4096   = 0b0010,
+        block_size_4104   = 0b0011,
+        invert_crc_seed   = 0b0100,
+        invert_crc_result = 0b1000
+    };
+
+    enum class dif_source_flag : operation_specific_flags_t
+    {
+        enable_all_f_detect_error = 0b00000001,
+        all_f_detect              = 0b00000010,
+        app_tag_f_detect          = 0b00000100,
+        app_and_ref_tag_f_detect  = 0b00001000,
+        incrementing_app_tag_type = 0b00010000,
+        guard_check_disable       = 0b00100000,
+        ref_tag_check_disable     = 0b01000000,
+        fixed_ref_tag_type        = 0b10000000
+    };
+
+    enum class dif_destination_flag : operation_specific_flags_t
+    {
+        app_tag_pass_through      = 0b00001000,
+        incrementing_app_tag_type = 0b00010000,
+        guard_field_pass_through  = 0b00100000,
+        ref_tag_pass_through      = 0b01000000,
+        fixed_ref_tag_type        = 0b10000000
+    };
+}  // namespace dml::detail
+
+#endif  //DML_COMMON_SPECIFIC_FLAGS_HPP
diff --git a/include/dml/cpp/middle_layer/status.hpp b/include/dml/detail/common/status.hpp
similarity index 85%
rename from include/dml/cpp/middle_layer/status.hpp
rename to include/dml/detail/common/status.hpp
index e3d4c8a..84b3332 100644
--- a/include/dml/cpp/middle_layer/status.hpp
+++ b/include/dml/detail/common/status.hpp
@@ -14,39 +14,40 @@
  *
  */
 
-#ifndef DML_ML_STATUS_HPP
-#define DML_ML_STATUS_HPP
+#ifndef DML_DETAIL_COMMON_STATUS_HPP
+#define DML_DETAIL_COMMON_STATUS_HPP
 
-#include <cstdint>
-#include <limits>
+#include <dml/detail/common/types.hpp>
 
-namespace dml::ml
+namespace dml::detail
 {
     enum class validation_status
     {
         success,
-        address_is_null,
-        size_is_null,
-        buffers_overlap,
-        address_is_misaligned,
-        delta_size_is_wrong,
-        delta_input_size_is_wrong,
-        delta_input_size_overflow,
-        delta_record_size_is_wrong,
-        dif_size_is_wrong,
-        dualcast_address_is_wrong,
-        batch_size_is_wrong,
+        null_address,
+        null_size,
+        large_size,
+        overlapping,
+        misalignment,
+        dif_strip_adjacent,
+        wrong_size,
+        wrong_delta_size,
+        wrong_dif_size,
+        wrong_dualcast_address,
+        wrong_batch_size,
         unsupported_operation
     };
 
     enum class submission_status
     {
         success,
+        queue_busy,
         failure
     };
 
-    enum class execution_status : std::uint8_t
+    enum class execution_status : status_t
     {
+        processing                    = 0x00u, /** Descriptor is still being processed */
         success                       = 0x01u, /**< Success. */
         false_predicate_success       = 0x02u, /**< Success with false predicate. */
         page_fault_during_processing  = 0x03u, /**< Partial completion due to page fault.  */
@@ -75,9 +76,7 @@ namespace dml::ml
         operation_readback_timeout    = 0x20u, /**< The operation failed due to a hardware error other than a completion timeout or unsuccessful */
         hardware_timeout              = 0x21u, /**< Hardware error (completion timeout or unsuccessful completion status) */
         address_translation_error     = 0x22u, /**< An error occurred during address translation */
-
-        unexpected = std::numeric_limits<std::uint8_t>::max() /**< Unexpected error code */
     };
-}  // namespace dml::ml
+}  // namespace dml::detail
 
-#endif  //DML_ML_STATUS_HPP
+#endif  //DML_DETAIL_COMMON_STATUS_HPP
diff --git a/include/dml/cpp/middle_layer/types.hpp b/include/dml/detail/common/types.hpp
similarity index 70%
rename from include/dml/cpp/middle_layer/types.hpp
rename to include/dml/detail/common/types.hpp
index e654214..8ca7d69 100644
--- a/include/dml/cpp/middle_layer/types.hpp
+++ b/include/dml/detail/common/types.hpp
@@ -14,88 +14,46 @@
  *
  */
 
-#ifndef DML_ML_TYPES_HPP
-#define DML_ML_TYPES_HPP
+#ifndef DML_DETAIL_COMMON_TYPES_HPP
+#define DML_DETAIL_COMMON_TYPES_HPP
 
+#include <cstddef>
 #include <cstdint>
 
-#include "status.hpp"
-
-namespace dml::ml
+namespace dml::detail
 {
-    /**
-     * @todo
-     */
     using byte_t = std::uint8_t;
 
-    /**
-     * @todo
-     */
+    using size_t = std::size_t;
+
+    using transfer_size_t = std::uint32_t;
+
     using operation_t = std::uint8_t;
 
-    /**
-     * @todo
-     */
     using status_t = std::uint8_t;
 
-    /**
-     * @todo
-     */
     using flags_t = std::uint16_t;
 
-    /**
-     * @todo
-     */
     using operation_specific_flags_t = std::uint8_t;
 
-    /**
-     * @todo
-     */
     using completion_interrupt_handle_t = std::uint16_t;
 
-    /**
-     * @todo
-     */
     using transfer_size_t = std::uint32_t;
 
-    /**
-     * @todo
-     */
     using address_t = uint64_t;
 
-    /**
-     * @todo
-     */
     using pattern_t = uint64_t;
 
-    /**
-     * @todo
-     */
     using result_t = uint8_t;
 
-    /**
-     * @todo
-     */
     using crc_value_t = uint32_t;
 
-    /**
-     * @todo
-     */
     using dif_flags_t = uint8_t;
 
-    /**
-     * @todo
-     */
     using dif_status_t = uint8_t;
 
-    /**
-     * @todo
-     */
     using dif_ref_tag_t = uint32_t;
 
-    /**
-     * @todo
-     */
     using dif_app_tag_t = uint16_t;
 
     struct dif_parameters
@@ -104,7 +62,6 @@ namespace dml::ml
         dif_app_tag_t app_tag_mask;
         dif_app_tag_t app_tag_seed;
     };
+}  // namespace dml::detail
 
-}  // namespace dml::ml
-
-#endif  //DML_ML_TYPES_HPP
+#endif  //DML_DETAIL_COMMON_TYPES_HPP
diff --git a/include/dml/detail/common/utils/enum.hpp b/include/dml/detail/common/utils/enum.hpp
new file mode 100644
index 0000000..21057ac
--- /dev/null
+++ b/include/dml/detail/common/utils/enum.hpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#ifndef DML_COMMON_UTILS_ENUM_HPP
+#define DML_COMMON_UTILS_ENUM_HPP
+
+#include <type_traits>
+
+namespace dml::detail
+{
+    template <typename enum_t>
+    [[nodiscard]] constexpr auto to_underlying(const enum_t enum_value) noexcept
+    {
+        return static_cast<std::underlying_type_t<enum_t>>(enum_value);
+    }
+
+    template <typename enum_t>
+    [[nodiscard]] constexpr bool intersects(const std::underlying_type_t<enum_t> lhs, const enum_t rhs) noexcept
+    {
+        return (lhs & to_underlying(rhs)) == to_underlying(rhs);
+    }
+}  // namespace dml::detail
+
+#endif  //DML_COMMON_UTILS_ENUM_HPP
diff --git a/include/dml/detail/ml/execution_path.hpp b/include/dml/detail/ml/execution_path.hpp
new file mode 100644
index 0000000..29673e6
--- /dev/null
+++ b/include/dml/detail/ml/execution_path.hpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#ifndef DML_ML_EXECUTION_PATH
+#define DML_ML_EXECUTION_PATH
+
+#include <dml/detail/common/status.hpp>
+#include <dml/detail/ml/operation.hpp>
+#include <dml/detail/ml/result.hpp>
+
+namespace dml::detail::ml::execution_path
+{
+    struct software
+    {
+        [[nodiscard]] static submission_status submit(operation& op, result& res) noexcept;
+    };
+
+    struct hardware
+    {
+        [[nodiscard]] static submission_status submit(operation& op, result& res) noexcept;
+    };
+}  // namespace dml::detail::ml::execution_path
+
+#endif  //DML_ML_EXECUTION_PATH
diff --git a/include/dml/detail/ml/operation.hpp b/include/dml/detail/ml/operation.hpp
new file mode 100644
index 0000000..527c016
--- /dev/null
+++ b/include/dml/detail/ml/operation.hpp
@@ -0,0 +1,128 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#ifndef DML_ML_OPERATION_HPP
+#define DML_ML_OPERATION_HPP
+
+#include <dml/detail/common/types.hpp>
+#include <dml/detail/ml/options.hpp>
+
+namespace dml::detail::ml
+{
+    struct alignas(64u) operation
+    {
+        byte_t bytes[64u];
+    };
+
+    [[nodiscard]] operation make_nop_operation(nop_options options) noexcept;
+
+    [[nodiscard]] operation make_drain_operation(address_t              readback_address_1,
+                                                 address_t              readback_address_2,
+                                                 drain_options          options,
+                                                 drain_specific_options specific_options) noexcept;
+
+    [[nodiscard]] operation make_mem_move_operation(const byte_t    *src,
+                                                    byte_t          *dst,
+                                                    transfer_size_t  size,
+                                                    mem_move_options options) noexcept;
+
+    [[nodiscard]] operation make_fill_operation(uint64_t pattern, byte_t *dst, transfer_size_t size, fill_options options) noexcept;
+
+    [[nodiscard]] operation make_dualcast_operation(const byte_t             *src,
+                                                    byte_t                   *dst1,
+                                                    byte_t                   *dst2,
+                                                    transfer_size_t           size,
+                                                    dualcast_options          options,
+                                                    dualcast_specific_options specific_options) noexcept;
+
+    [[nodiscard]] operation make_compare_operation(const byte_t   *src1,
+                                                   const byte_t   *src2,
+                                                   transfer_size_t size,
+                                                   compare_options options,
+                                                   compare_result  expected_result) noexcept;
+
+    [[nodiscard]] operation make_compare_pattern_operation(uint64_t                pattern,
+                                                           const byte_t           *src,
+                                                           transfer_size_t         size,
+                                                           compare_pattern_options options,
+                                                           compare_result          expected_result) noexcept;
+
+    [[nodiscard]] operation make_crc_operation(const byte_t        *src,
+                                               transfer_size_t      size,
+                                               crc_value_t          crc_seed,
+                                               crc_options          options,
+                                               crc_specific_options specific_options) noexcept;
+
+    [[nodiscard]] operation make_copy_crc_operation(const byte_t             *src,
+                                                    byte_t                   *dst,
+                                                    transfer_size_t           size,
+                                                    crc_value_t               crc_seed,
+                                                    copy_crc_options          options,
+                                                    copy_crc_specific_options specific_options) noexcept;
+
+    [[nodiscard]] operation make_create_delta_operation(const byte_t        *src1,
+                                                        const byte_t        *src2,
+                                                        transfer_size_t      size,
+                                                        byte_t              *delta_record,
+                                                        transfer_size_t      delta_max_size,
+                                                        create_delta_options options,
+                                                        create_delta_result  expected_result) noexcept;
+
+    [[nodiscard]] operation make_apply_delta_operation(const byte_t       *delta_record,
+                                                       transfer_size_t     delta_size,
+                                                       byte_t             *dst,
+                                                       transfer_size_t     size,
+                                                       apply_delta_options options) noexcept;
+
+    [[nodiscard]] operation make_cache_flush_operation(byte_t *dst, transfer_size_t size, cache_flush_options options) noexcept;
+
+    [[nodiscard]] operation make_dif_check_operation(const byte_t        *src,
+                                                     transfer_size_t      transfer_size,
+                                                     dif_parameters       src_parameters,
+                                                     dif_check_options    options,
+                                                     dif_specific_options specific_options,
+                                                     dif_source_options   source_options) noexcept;
+
+    [[nodiscard]] operation make_dif_insert_operation(const byte_t           *src,
+                                                      byte_t                 *dst,
+                                                      transfer_size_t         transfer_size,
+                                                      dif_parameters          dst_parameters,
+                                                      dif_insert_options      options,
+                                                      dif_specific_options    specific_options,
+                                                      dif_destination_options destination_options) noexcept;
+
+    [[nodiscard]] operation make_dif_strip_operation(const byte_t        *src,
+                                                     byte_t              *dst,
+                                                     transfer_size_t      transfer_size,
+                                                     dif_parameters       src_parameters,
+                                                     dif_strip_options    options,
+                                                     dif_specific_options specific_options,
+                                                     dif_source_options   source_options) noexcept;
+
+    [[nodiscard]] operation make_dif_update_operation(const byte_t           *src,
+                                                      byte_t                 *dst,
+                                                      transfer_size_t         transfer_size,
+                                                      dif_parameters          src_parameters,
+                                                      dif_parameters          dst_parameters,
+                                                      dif_update_options      options,
+                                                      dif_specific_options    specific_options,
+                                                      dif_source_options      source_options,
+                                                      dif_destination_options destination_options) noexcept;
+
+    [[nodiscard]] operation make_batch_operation(const operation *src, transfer_size_t length, batch_options options) noexcept;
+}  // namespace dml::detail::ml
+
+#endif  //DML_ML_OPERATION_HPP
diff --git a/include/dml/detail/ml/options.hpp b/include/dml/detail/ml/options.hpp
new file mode 100644
index 0000000..5558b6f
--- /dev/null
+++ b/include/dml/detail/ml/options.hpp
@@ -0,0 +1,105 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#ifndef DML_ML_OPTIONS_HPP
+#define DML_ML_OPTIONS_HPP
+
+#include <dml/detail/common/flags.hpp>
+#include <dml/detail/common/specific_flags.hpp>
+#include <dml/detail/common/types.hpp>
+
+namespace dml::detail::ml
+{
+    template <typename options_t>
+    class options final
+    {
+        using value_t = std::underlying_type_t<options_t>;
+
+    public:
+        constexpr options() noexcept = default;
+
+        template <options_t option>
+        [[nodiscard]] constexpr auto enable() const noexcept
+        {
+            return options(bit_mask_ | to_underlying(option));
+        }
+
+        constexpr explicit operator value_t() const noexcept
+        {
+            return bit_mask_;
+        }
+
+        // TODO: Should be private, but job API batch implementation is clunky
+
+    public:
+        constexpr explicit options(const value_t bit_mask) noexcept: bit_mask_(bit_mask)
+        {
+        }
+
+    private:
+        value_t bit_mask_{};
+    };
+
+    using nop_options = options<nop_flag>;
+
+    using batch_options = options<batch_flag>;
+
+    using drain_options = options<drain_flag>;
+
+    using mem_move_options = options<mem_move_flag>;
+
+    using fill_options = options<fill_flag>;
+
+    using compare_options = options<compare_flag>;
+
+    using compare_pattern_options = options<compare_pattern_flag>;
+
+    using create_delta_options = options<create_delta_flag>;
+
+    using apply_delta_options = options<apply_delta_flag>;
+
+    using dualcast_options = options<dualcast_flag>;
+
+    using crc_options = options<crc_flag>;
+
+    using copy_crc_options = options<copy_crc_flag>;
+
+    using dif_check_options = options<dif_check_flag>;
+
+    using dif_insert_options = options<dif_insert_flag>;
+
+    using dif_strip_options = options<dif_strip_flag>;
+
+    using dif_update_options = options<dif_update_flag>;
+
+    using cache_flush_options = options<cache_flush_flag>;
+
+    using drain_specific_options = options<drain_specific_flag>;
+
+    using dualcast_specific_options = options<dualcast_specific_flag>;
+
+    using crc_specific_options = options<crc_specific_flag>;
+
+    using copy_crc_specific_options = options<crc_specific_flag>;
+
+    using dif_specific_options = options<dif_specific_flag>;
+
+    using dif_source_options = options<dif_source_flag>;
+
+    using dif_destination_options = options<dif_destination_flag>;
+}  // namespace dml::detail::ml
+
+#endif  //DML_ML_OPTIONS_HPP
diff --git a/include/dml/detail/ml/result.hpp b/include/dml/detail/ml/result.hpp
new file mode 100644
index 0000000..463580a
--- /dev/null
+++ b/include/dml/detail/ml/result.hpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#ifndef DML_ML_RESULT_HPP
+#define DML_ML_RESULT_HPP
+
+#include <dml/detail/common/status.hpp>
+#include <dml/detail/common/types.hpp>
+#include <dml/detail/ml/operation.hpp>
+
+namespace dml::detail::ml
+{
+    struct alignas(32) result
+    {
+        byte_t bytes[32];
+    };
+
+    void bind(operation& op, result& res) noexcept;
+
+    void wait(result& res) noexcept;
+
+    [[nodiscard]] detail::execution_status get_status(result& res) noexcept;
+
+    [[nodiscard]] detail::result_t get_result(result& res) noexcept;
+
+    [[nodiscard]] detail::transfer_size_t get_bytes_completed(result& res) noexcept;
+
+    [[nodiscard]] detail::transfer_size_t get_delta_record_size(result& res) noexcept;
+
+    [[nodiscard]] detail::transfer_size_t get_crc_value(result& res) noexcept;
+
+    [[nodiscard]] inline bool is_finished(const volatile result& res) noexcept
+    {
+        return 0 != res.bytes[0];
+    }
+}  // namespace dml::detail::ml
+
+#endif  //DML_ML_RESULT_HPP
diff --git a/sources/cores/include/core_api.h b/include/dml/detail/ml/validation.hpp
similarity index 58%
rename from sources/cores/include/core_api.h
rename to include/dml/detail/ml/validation.hpp
index 3fa01e8..4fb485c 100644
--- a/sources/cores/include/core_api.h
+++ b/include/dml/detail/ml/validation.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright 2020-2021 Intel Corporation.
+ * Copyright 2021 Intel Corporation.
  *
  * This software and the related documents are Intel copyrighted materials,
  * and your use of them is governed by the express license under which they
@@ -14,27 +14,15 @@
  *
  */
 
-/**
- * @file
- * @brief
- * @date 2/10/2020
- *
- * @defgroup core_public_features Public Functions
- * @ingroup core_public
- * @{
- *
- * @brief Public Intel DML core features
- *
- */
-
-#ifndef KERNEL_API_H__
-#define KERNEL_API_H__
+#ifndef DML_ML_VALIDATION
+#define DML_ML_VALIDATION
 
-#include "core_compare.h"
-#include "core_memory.h"
-#include "core_cpu_features.h"
-#include "core_hash_functions.h"
+#include <dml/detail/common/status.hpp>
+#include <dml/detail/ml/operation.hpp>
 
-#endif //KERNEL_API_H__
+namespace dml::detail::ml
+{
+    [[nodiscard]] validation_status validate(operation& op) noexcept;
+}  // namespace dml::detail::ml
 
-/** @} */
+#endif  //DML_ML_VALIDATION
diff --git a/include/dml/dml.h b/include/dml/dml.h
index c2e30cb..c8987e2 100644
--- a/include/dml/dml.h
+++ b/include/dml/dml.h
@@ -94,8 +94,8 @@ dml_status_t dml_get_job_size(dml_path_t dml_path, uint32_t *job_size_ptr);
  * - @ref DML_STATUS_OK
  * - @ref DML_STATUS_PATH_ERROR
  * - @ref DML_STATUS_NULL_POINTER_ERROR
- * - @ref DML_STATUS_DRIVER_NOT_FOUND
- * - @ref DML_STATUS_HARDWARE_CONNECTION_ERROR
+ * - @ref DML_STATUS_LIBACCEL_NOT_FOUND
+ * - @ref DML_STATUS_LIBACCEL_ERROR
  *
  */
 dml_status_t dml_init_job(dml_path_t path, dml_job_t *dml_job_ptr);
@@ -109,7 +109,6 @@ dml_status_t dml_init_job(dml_path_t path, dml_job_t *dml_job_ptr);
  *
  * @return The follow statuses;
  *      - @ref DML_STATUS_OK;
- *      - @ref DML_STATUS_HARDWARE_DISCONNECTION_ERROR
  *
  */
 dml_status_t dml_finalize_job(dml_job_t *dml_job_ptr);
diff --git a/include/dml/dml.hpp b/include/dml/dml.hpp
index 635ca1b..f81ec22 100644
--- a/include/dml/dml.hpp
+++ b/include/dml/dml.hpp
@@ -55,12 +55,12 @@ namespace dml
 {
 }
 
-#include <dml/cpp/data_view.hpp>
-#include <dml/cpp/execute.hpp>
-#include <dml/cpp/execution_interface.hpp>
-#include <dml/cpp/execution_path.hpp>
-#include <dml/cpp/operations.hpp>
-#include <dml/cpp/sequence.hpp>
-#include <dml/cpp/submit.hpp>
+#include <dml/hl/data_view.hpp>
+#include <dml/hl/execute.hpp>
+#include <dml/hl/execution_interface.hpp>
+#include <dml/hl/execution_path.hpp>
+#include <dml/hl/operations.hpp>
+#include <dml/hl/sequence.hpp>
+#include <dml/hl/submit.hpp>
 
 #endif  //DML_DML_HPP
diff --git a/include/dml/dmldefs.h b/include/dml/dmldefs.h
index 7dabaa4..00a68d5 100644
--- a/include/dml/dmldefs.h
+++ b/include/dml/dmldefs.h
@@ -269,8 +269,6 @@ typedef enum
  * @brief All possible return values of the Intel DML Library functions.
  *
  * @note All general statuses are described here.
- * @note All driver errors described in the @ref DRIVER_STATUSES
- * @note All device errors described in the @ref hw_status_t enumeration
  */
 typedef enum
 {
@@ -307,20 +305,12 @@ typedef enum
     DML_STATUS_BATCH_SIZE_ERROR             = 29u,  /**< The desired batch size is bigger than the possible one */
     DML_STATUS_DRAIN_PAGE_FAULT_ERROR       = 30u,  /**< A page fault occured while translating a Readback Addres in a Drain descriptor */
     DML_STATUS_UNKNOWN_CACHE_SIZE_ERROR     = 31u,  /**< Max cache size can't be calculated */
+    DML_STATUS_DIF_STRIP_ADJACENT_ERROR     = 32u,  /**< SRC Address for DIF Strip operation should be greater than (DST Address + SRC Size) */
 
-    // Initialisation Errors
-    DML_STATUS_DRIVER_NOT_FOUND             = (DML_BASE_DRIVER_ERROR + 0u),  /**< Unable to initialize job because hardware driver was not found */
-    DML_STATUS_DRIVER_ERROR                 = (DML_BASE_DRIVER_ERROR + 1u),  /**< Unable to initialize job because hardware driver API is incompatible */
-    DML_STATUS_HARDWARE_CONNECTION_ERROR    = (DML_BASE_DRIVER_ERROR + 2u),  /**< Cannot connect to hardware to complete initialization */
-    DML_STATUS_HARDWARE_DISCONNECTION_ERROR = (DML_BASE_DRIVER_ERROR + 3u),  /**< Cannot disconnect hardware */
-    DML_STATUS_QUEUE_IS_BUSY                = (DML_BASE_DRIVER_ERROR + 4u),  /**< Descriptor can't be submitted into filled work queue*/
-    DML_STATUS_INSTANCE_NOT_FOUND           = (DML_BASE_DRIVER_ERROR + 5u),  /**< Accelerator instance can not be found */
-    DML_STATUS_VERSION_DETECTION_ERROR      = (DML_BASE_DRIVER_ERROR + 6u),  /**< Accelerator version can not be determined */
-    DML_STATUS_DEVICES_NOT_AVAILABLE        = (DML_BASE_DRIVER_ERROR + 7u),  /**< Enabled devices are not found */
-    DML_STATUS_WORK_QUEUES_NOT_AVAILABLE    = (DML_BASE_DRIVER_ERROR + 8u),  /**< Enabled work queues are not found */
-    DML_STATUS_INCORRECT_WORK_QUEUE_ID      = (DML_BASE_DRIVER_ERROR + 9u),  /**< Work Queue ID is wrong  */
-    DML_STATUS_WORK_QUEUE_CONNECTION_ERROR  = (DML_BASE_DRIVER_ERROR + 10u), /**< Work Queue can not be connected */
-    DML_STATUS_PORTAL_CREATION_ERROR        = (DML_BASE_DRIVER_ERROR + 11u), /**< Portal can not be created */
+    // Initialization Errors
+    DML_STATUS_LIBACCEL_NOT_FOUND           = (DML_BASE_DRIVER_ERROR + 0u),  /**< Unable to initialize job because hardware driver was not found */
+    DML_STATUS_LIBACCEL_ERROR               = (DML_BASE_DRIVER_ERROR + 1u),  /**< Unable to initialize job because hardware driver API is incompatible */
+    DML_STATUS_WORK_QUEUES_NOT_AVAILABLE    = (DML_BASE_DRIVER_ERROR + 2u),  /**< Enabled work queues are not found */
 } dml_status_t;
 
 
diff --git a/include/dml/cpp/data_view.hpp b/include/dml/hl/data_view.hpp
similarity index 90%
rename from include/dml/cpp/data_view.hpp
rename to include/dml/hl/data_view.hpp
index d37fddf..38187df 100644
--- a/include/dml/cpp/data_view.hpp
+++ b/include/dml/hl/data_view.hpp
@@ -23,7 +23,7 @@
  */
 
 #include <cstdint>
-#include <dml/cpp/types.hpp>
+#include <dml/hl/types.hpp>
 #include <iterator>
 #include <type_traits>
 
@@ -46,7 +46,9 @@ namespace dml
          * @param[in] data_ptr  Pointer to the data for the view
          * @param[in] size      Byte size of the data for the view
          */
-        data_view(byte_t *const data_ptr, const size_t size) noexcept: data_ptr_(data_ptr), size_(size) { }
+        data_view(byte_t *const data_ptr, const size_t size) noexcept: data_ptr_(data_ptr), size_(size)
+        {
+        }
 
         /**
          * @brief Explicitly deleted conversion constructor from @ref const_data_view
@@ -84,14 +86,20 @@ namespace dml
          *
          * @return Pointer to the viewed data
          */
-        [[nodiscard]] auto data() noexcept { return data_ptr_; }
+        [[nodiscard]] auto data() noexcept
+        {
+            return data_ptr_;
+        }
 
         /**
          * @brief Getter for the size of the viewed data
          *
          * @return Size of the viewed data
          */
-        [[nodiscard]] auto size() const noexcept { return size_; }
+        [[nodiscard]] auto size() const noexcept
+        {
+            return size_;
+        }
 
     private:
         byte_t *const  data_ptr_; /**< Pointer to the viewed data */
@@ -113,7 +121,9 @@ namespace dml
          * @param[in] data_ptr  Pointer to the data for the view
          * @param[in] size      Byte size of the data for the view
          */
-        const_data_view(const byte_t *const data_ptr, const size_t size) noexcept: data_ptr_(data_ptr), size_(size) { }
+        const_data_view(const byte_t *const data_ptr, const size_t size) noexcept: data_ptr_(data_ptr), size_(size)
+        {
+        }
 
         /**
          * @brief Constructor @ref data_view
@@ -122,7 +132,9 @@ namespace dml
          *
          * @param[in] other Instance of @ref data_view
          */
-        const_data_view(data_view other) noexcept: data_ptr_(other.data()), size_(other.size()) { }
+        const_data_view(data_view other) noexcept: data_ptr_(other.data()), size_(other.size())
+        {
+        }
 
         /**
          * @brief Default copy constructor
@@ -153,14 +165,20 @@ namespace dml
          *
          * @return Pointer to the viewed data
          */
-        [[nodiscard]] auto data() const noexcept { return data_ptr_; }
+        [[nodiscard]] auto data() const noexcept
+        {
+            return data_ptr_;
+        }
 
         /**
          * @brief Getter for the size of the viewed data
          *
          * @return Size of the viewed data
          */
-        [[nodiscard]] auto size() const noexcept { return size_; }
+        [[nodiscard]] auto size() const noexcept
+        {
+            return size_;
+        }
 
     private:
         const byte_t *const data_ptr_; /**< Pointer to the viewed immutable data */
@@ -217,12 +235,10 @@ namespace dml
      * @return Constructed @ref data_view
      */
     template <typename iterator_t>
-    inline auto make_view(iterator_t begin,
-                          iterator_t end) noexcept(noexcept(&*begin) &&noexcept(std::distance(begin, end)))
+    inline auto make_view(iterator_t begin, iterator_t end) noexcept(noexcept(&*begin) &&noexcept(std::distance(begin, end)))
     {
         using iterator_category = typename std::iterator_traits<iterator_t>::iterator_category;
-        static_assert(std::is_same_v<std::random_access_iterator_tag, iterator_category>,
-                      "Only random access iterators are supported.");
+        static_assert(std::is_same_v<std::random_access_iterator_tag, iterator_category>, "Only random access iterators are supported.");
         return make_view(&*begin, std::distance(begin, end));
     }
 
diff --git a/include/dml/cpp/detail/buffer.hpp b/include/dml/hl/detail/buffer.hpp
similarity index 90%
rename from include/dml/cpp/detail/buffer.hpp
rename to include/dml/hl/detail/buffer.hpp
index d7c5ed0..07136ce 100644
--- a/include/dml/cpp/detail/buffer.hpp
+++ b/include/dml/hl/detail/buffer.hpp
@@ -22,8 +22,7 @@
 #ifndef DML_DETAIL_BUFFER_HPP
 #define DML_DETAIL_BUFFER_HPP
 
-#include <dml/cpp/types.hpp>
-
+#include <dml/hl/types.hpp>
 #include <memory>
 
 namespace dml::detail
@@ -137,7 +136,10 @@ namespace dml::detail
          *
          * @return Reference to the element
          */
-        [[nodiscard]] auto &get() noexcept { return *aligned_data_; }
+        [[nodiscard]] auto &get() noexcept
+        {
+            return *aligned_data_;
+        }
 
         /**
          * @brief Returns reference to the element (const version)
@@ -146,11 +148,14 @@ namespace dml::detail
          *
          * @return Const reference to the element
          */
-        [[nodiscard]] const auto &get() const noexcept { return *aligned_data_; }
+        [[nodiscard]] const auto &get() const noexcept
+        {
+            return *aligned_data_;
+        }
 
     private:
-        elem_t *    data_{};         /**< Pointer to allocated memory */
-        elem_t *    aligned_data_{}; /**< Pointer to the element */
+        elem_t     *data_{};         /**< Pointer to allocated memory */
+        elem_t     *aligned_data_{}; /**< Pointer to the element */
         own_alloc_t allocator_{};    /**< Allocator instance */
     };
 
@@ -190,8 +195,7 @@ namespace dml::detail
          * @param count     Number of elements
          * @param allocator Instance of allocator
          */
-        buffer_array(size_t count, allocator_t allocator):
-            count_(count), data_(nullptr), aligned_data_(nullptr), allocator_(allocator)
+        buffer_array(size_t count, allocator_t allocator): count_(count), data_(nullptr), aligned_data_(nullptr), allocator_(allocator)
         {
             data_ = own_traits_t::allocate(allocator_, (count_ * memory_size) + alignment);
 
@@ -268,7 +272,10 @@ namespace dml::detail
          *
          * @return Number of elements
          */
-        [[nodiscard]] size_t get_count() const noexcept { return count_; }
+        [[nodiscard]] size_t get_count() const noexcept
+        {
+            return count_;
+        }
 
         /**
          * @brief Returns reference to the element by index
@@ -277,7 +284,10 @@ namespace dml::detail
          *
          * @return Reference to the element
          */
-        [[nodiscard]] auto &get(size_t index) noexcept { return aligned_data_[index]; }
+        [[nodiscard]] auto &get(size_t index) noexcept
+        {
+            return aligned_data_[index];
+        }
 
         /**
          * @brief Returns reference to the element by index (const version)
@@ -286,12 +296,15 @@ namespace dml::detail
          *
          * @return Const reference to the element
          */
-        [[nodiscard]] const auto &get(size_t index) const noexcept { return aligned_data_[index]; }
+        [[nodiscard]] const auto &get(size_t index) const noexcept
+        {
+            return aligned_data_[index];
+        }
 
     private:
         size_t      count_{};        /**< Number of elements in the array */
-        elem_t *    data_{};         /**< Pointer to the allocated memory */
-        elem_t *    aligned_data_{}; /**< Pointer to the array */
+        elem_t     *data_{};         /**< Pointer to the allocated memory */
+        elem_t     *aligned_data_{}; /**< Pointer to the array */
         own_alloc_t allocator_{};    /**< Allocator instance */
     };
 }  // namespace dml::detail
diff --git a/include/dml/cpp/detail/execute.hpp b/include/dml/hl/detail/execute.hpp
similarity index 81%
rename from include/dml/cpp/detail/execute.hpp
rename to include/dml/hl/detail/execute.hpp
index f85b508..3296dc3 100644
--- a/include/dml/cpp/detail/execute.hpp
+++ b/include/dml/hl/detail/execute.hpp
@@ -22,12 +22,12 @@
 #ifndef DML_DETAIL_EXECUTE_HPP
 #define DML_DETAIL_EXECUTE_HPP
 
-#include <dml/cpp/detail/utils.hpp>
-#include <dml/cpp/execution_path.hpp>
-#include <dml/cpp/middle_layer/completion_record.hpp>
-#include <dml/cpp/middle_layer/make_descriptor.hpp>
-#include <dml/cpp/middle_layer/validation.hpp>
-#include <dml/cpp/status_code.hpp>
+#include <dml/detail/ml/operation.hpp>
+#include <dml/detail/ml/result.hpp>
+#include <dml/detail/ml/validation.hpp>
+#include <dml/hl/detail/utils.hpp>
+#include <dml/hl/execution_path.hpp>
+#include <dml/hl/status_code.hpp>
 
 #include "make_result.hpp"
 
@@ -56,25 +56,23 @@ namespace dml::detail
 
         auto descriptor = make_operation();
 
-        if (auto status = ml::validate(descriptor); status != ml::validation_status::success)
+        if (auto status = ml::validate(descriptor); status != detail::validation_status::success)
         {
             return typename operation::result_type{ detail::to_own(status) };
         }
 
-        auto record = ml::completion_record();
+        auto record = detail::ml::result();
         // If execution_path::run returns status code
         auto status = execution_path()(descriptor, record);
-        if (status != ml::submission_status::success)
+        if (status != detail::submission_status::success)
         {
             return typename operation::result_type{ status_code::error };
         }
 
-#ifdef DML_HW
         if constexpr (std::is_same_v<execution_path, hardware>)
         {
-            ml::wait(record);
+            detail::ml::wait(record);
         }
-#endif
 
         return make_result<typename operation::result_type>(record);
     }
diff --git a/include/dml/cpp/detail/handler.hpp b/include/dml/hl/detail/handler.hpp
similarity index 90%
rename from include/dml/cpp/detail/handler.hpp
rename to include/dml/hl/detail/handler.hpp
index b5dea06..2479c4c 100644
--- a/include/dml/cpp/detail/handler.hpp
+++ b/include/dml/hl/detail/handler.hpp
@@ -22,7 +22,7 @@
 #ifndef DML_DETAIL_HANDLER_HPP
 #define DML_DETAIL_HANDLER_HPP
 
-#include <dml/cpp/middle_layer/completion_record.hpp>
+#include <dml/detail/ml/result.hpp>
 
 namespace dml
 {
@@ -41,7 +41,7 @@ namespace dml
          * @return Middle Layer result object
          */
         template <typename operation, typename allocator_t>
-        ml::completion_record &get_ml_result(handler<operation, allocator_t> &h) noexcept
+        detail::ml::result &get_ml_result(handler<operation, allocator_t> &h) noexcept
         {
             return h.record_.get();
         }
diff --git a/include/dml/hl/detail/make_result.hpp b/include/dml/hl/detail/make_result.hpp
new file mode 100644
index 0000000..46ef6ae
--- /dev/null
+++ b/include/dml/hl/detail/make_result.hpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+/**
+ * @date 05/20/2021
+ * @brief Contains internal execute implementation
+ */
+
+#ifndef DML_DETAIL_MAKE_RESULT_HPP
+#define DML_DETAIL_MAKE_RESULT_HPP
+
+#include <dml/detail/common/status.hpp>
+#include <dml/hl/result.hpp>
+#include <dml/hl/status_code.hpp>
+
+namespace dml::detail
+{
+    /**
+     * @todo
+     */
+    inline auto to_own(detail::execution_status status) noexcept
+    {
+        switch (status)
+        {
+            case detail::execution_status::success:
+                return status_code::ok;
+            case detail::execution_status::false_predicate_success:
+                return status_code::false_predicate;
+            default:
+                // Anything else is considered an error temporarily
+                return status_code::error;
+        }
+    }
+
+    /**
+     * @todo
+     */
+    template <typename result_type>
+    auto make_result(detail::ml::result& result) noexcept
+    {
+        if constexpr (std::is_same_v<result_type, mem_move_result>)
+        {
+            return mem_move_result{ to_own(detail::ml::get_status(result)) };
+        }
+        if constexpr (std::is_same_v<result_type, mem_copy_result>)
+        {
+            return mem_copy_result{ to_own(detail::ml::get_status(result)) };
+        }
+        else if constexpr (std::is_same_v<result_type, fill_result>)
+        {
+            return fill_result{ to_own(detail::ml::get_status(result)) };
+        }
+        else if constexpr (std::is_same_v<result_type, dml::compare_result>)
+        {
+            return dml::compare_result{ to_own(detail::ml::get_status(result)),
+                                        static_cast<comparison_result>(detail::ml::get_result(result)),
+                                        detail::ml::get_bytes_completed(result) };
+        }
+        else if constexpr (std::is_same_v<result_type, dml::create_delta_result>)
+        {
+            return dml::create_delta_result{ to_own(detail::ml::get_status(result)),
+                                             static_cast<comparison_result>(detail::ml::get_result(result)),
+                                             detail::ml::get_bytes_completed(result),
+                                             detail::ml::get_delta_record_size(result) };
+        }
+        else if constexpr (std::is_same_v<result_type, apply_delta_result>)
+        {
+            return apply_delta_result{ to_own(detail::ml::get_status(result)) };
+        }
+        else if constexpr (std::is_same_v<result_type, dualcast_result>)
+        {
+            return dualcast_result{ to_own(detail::ml::get_status(result)) };
+        }
+        else if constexpr (std::is_same_v<result_type, crc_result>)
+        {
+            return crc_result{ to_own(detail::ml::get_status(result)), detail::ml::get_crc_value(result) };
+        }
+        else if constexpr (std::is_same_v<result_type, cache_flush_result>)
+        {
+            return cache_flush_result{ to_own(detail::ml::get_status(result)) };
+        }
+        else if constexpr (std::is_same_v<result_type, batch_result>)
+        {
+            return batch_result{ to_own(detail::ml::get_status(result)), detail::ml::get_bytes_completed(result) };
+        }
+    }
+
+}  // namespace dml::detail
+
+#endif  //DML_DETAIL_MAKE_RESULT_HPP
diff --git a/include/dml/cpp/detail/submit.hpp b/include/dml/hl/detail/submit.hpp
similarity index 82%
rename from include/dml/cpp/detail/submit.hpp
rename to include/dml/hl/detail/submit.hpp
index 24b14a4..c32c54c 100644
--- a/include/dml/cpp/detail/submit.hpp
+++ b/include/dml/hl/detail/submit.hpp
@@ -22,10 +22,10 @@
 #ifndef DML_DETAIL_SUBMIT_HPP
 #define DML_DETAIL_SUBMIT_HPP
 
-#include <dml/cpp/detail/handler.hpp>
-#include <dml/cpp/detail/utils.hpp>
-#include <dml/cpp/execution_path.hpp>
-#include <dml/cpp/middle_layer/status.hpp>
+#include <dml/detail/ml/validation.hpp>
+#include <dml/hl/detail/handler.hpp>
+#include <dml/hl/detail/utils.hpp>
+#include <dml/hl/execution_path.hpp>
 
 namespace dml::detail
 {
@@ -59,7 +59,7 @@ namespace dml::detail
 
         auto operation = make_operation();
 
-        auto op_handler = executor.template make_handler<operation_t>(detail::to_own(ml::validate(operation)));
+        auto op_handler = executor.template make_handler<operation_t>(detail::to_own(detail::ml::validate(operation)));
 
         if (!op_handler.valid())
         {
@@ -67,7 +67,6 @@ namespace dml::detail
         }
 
         // If execution_path{} returns status code (hw path)
-#ifdef DML_HW
         if constexpr (std::is_same_v<execution_path, hardware>)
         {
             auto& result = detail::get_ml_result(op_handler);
@@ -77,27 +76,31 @@ namespace dml::detail
                     return execution_path{}(operation, result);
                 });
 
-            if (status != ml::submission_status::success)
+            if (status != detail::submission_status::success)
             {
-                return executor.template make_handler<operation_t>(status_code::error);
+                if(status == detail::submission_status::queue_busy)
+                {
+                    return executor.template make_handler<operation_t>(status_code::queue_busy);
+                }
+                else
+                {
+                    return executor.template make_handler<operation_t>(status_code::error);
+                }
             }
         }
         else
         {
-#endif
             auto& result = detail::get_ml_result(op_handler);
             executor.execute(
                 [operation, &result]() mutable
                 {
                     auto status = execution_path{}(operation, result);
-                    if (status != ml::submission_status::success)
+                    if (status != detail::submission_status::success)
                     {
                         result.bytes[0] = 0xFF;  // Temporary
                     }
                 });
-#ifdef DML_HW
         }
-#endif
 
         return op_handler;
     }
diff --git a/include/dml/cpp/detail/utils.hpp b/include/dml/hl/detail/utils.hpp
similarity index 63%
rename from include/dml/cpp/detail/utils.hpp
rename to include/dml/hl/detail/utils.hpp
index 9cd4106..39c3d07 100644
--- a/include/dml/cpp/detail/utils.hpp
+++ b/include/dml/hl/detail/utils.hpp
@@ -22,8 +22,8 @@
 #ifndef DML_DETAIL_UTILS_HPP
 #define DML_DETAIL_UTILS_HPP
 
-#include <dml/cpp/middle_layer/values.hpp>
-#include <dml/cpp/status_code.hpp>
+#include <dml/detail/common/status.hpp>
+#include <dml/hl/status_code.hpp>
 
 /**
  * @brief Checks whether two sizes are the same
@@ -39,6 +39,11 @@ namespace dml::detail
      */
     struct always_success
     {
+        /**
+         * @brief Functor which always return success status
+         *
+         * @return status_code::ok
+         */
         auto operator()() const noexcept
         {
             return status_code::ok;
@@ -46,39 +51,37 @@ namespace dml::detail
     };
 
     /**
-     * @brief Converts Middle Layer's @ref dml::ml::validation_status to @ref dml::status_code
+     * @brief Converts Middle Layer status code to @ref dml::status_code
      *
      * @param status Status for conversion
      *
-     * @return dml::status_code that represents dml::ml::validation_status
+     * @return dml::status_code
      */
-    [[nodiscard]] static constexpr auto to_own(ml::validation_status status) noexcept
+    [[nodiscard]] static constexpr auto to_own(detail::validation_status status) noexcept
     {
         switch (status)
         {
-            case ml::validation_status::success:
+            case detail::validation_status::success:
                 return status_code::ok;
-            case ml::validation_status::address_is_null:
+            case detail::validation_status::null_address:
                 return status_code::nullptr_error;
-            case ml::validation_status::size_is_null:
+            case detail::validation_status::null_size:
                 return status_code::bad_size;
-            case ml::validation_status::delta_size_is_wrong:
+            case detail::validation_status::wrong_size:
                 return status_code::bad_size;
-            case ml::validation_status::delta_input_size_is_wrong:
+            case detail::validation_status::large_size:
                 return status_code::bad_size;
-            case ml::validation_status::delta_input_size_overflow:
-                return status_code::bad_size;
-            case ml::validation_status::buffers_overlap:
+            case detail::validation_status::overlapping:
                 return status_code::buffers_overlapping;
-            case ml::validation_status::address_is_misaligned:
+            case detail::validation_status::misalignment:
                 return status_code::bad_alignment;
-            case ml::validation_status::delta_record_size_is_wrong:
+            case detail::validation_status::wrong_delta_size:
                 return status_code::delta_bad_size;
-            case ml::validation_status::dualcast_address_is_wrong:
+            case detail::validation_status::wrong_dualcast_address:
                 return status_code::dualcast_bad_padding;
-            case ml::validation_status::batch_size_is_wrong:
+            case detail::validation_status::wrong_batch_size:
                 return status_code::bad_length;
-            case ml::validation_status::unsupported_operation:
+            case detail::validation_status::unsupported_operation:
                 return status_code::unsupported_operation;
             default:
                 return status_code::error;
diff --git a/include/dml/cpp/execute.hpp b/include/dml/hl/execute.hpp
similarity index 81%
rename from include/dml/cpp/execute.hpp
rename to include/dml/hl/execute.hpp
index 79c8e56..d549657 100644
--- a/include/dml/cpp/execute.hpp
+++ b/include/dml/hl/execute.hpp
@@ -22,12 +22,12 @@
 #ifndef DML_EXECUTE_HPP
 #define DML_EXECUTE_HPP
 
-#include <dml/cpp/detail/execute.hpp>
-#include <dml/cpp/detail/utils.hpp>
-#include <dml/cpp/execution_path.hpp>
-#include <dml/cpp/middle_layer/make_descriptor.hpp>
-#include <dml/cpp/operations.hpp>
-#include <dml/cpp/sequence.hpp>
+#include <dml/detail/ml/operation.hpp>
+#include <dml/hl/detail/execute.hpp>
+#include <dml/hl/detail/utils.hpp>
+#include <dml/hl/execution_path.hpp>
+#include <dml/hl/operations.hpp>
+#include <dml/hl/sequence.hpp>
 
 namespace dml
 {
@@ -63,7 +63,7 @@ namespace dml
         return detail::execute<execution_path, batch_operation>(
             [&]()
             {
-                return ml::make_batch_descriptor(seq.data(), seq.length(), operation.get_options());
+                return detail::ml::make_batch_operation(seq.data(), seq.length(), operation.get_options());
             });
     }
 
@@ -94,7 +94,7 @@ namespace dml
         return detail::execute<execution_path, mem_move_operation>(
             [&]()
             {
-                return ml::make_mem_move_descriptor(src_view.data(), dst_view.data(), src_view.size(), operation.get_options());
+                return detail::ml::make_mem_move_operation(src_view.data(), dst_view.data(), src_view.size(), operation.get_options());
             },
             [&]()
             {
@@ -130,7 +130,7 @@ namespace dml
         return detail::execute<execution_path, mem_copy_operation>(
             [&]()
             {
-                return ml::make_mem_move_descriptor(src_view.data(), dst_view.data(), src_view.size(), operation.get_options());
+                return detail::ml::make_mem_move_operation(src_view.data(), dst_view.data(), src_view.size(), operation.get_options());
             },
             [&]()
             {
@@ -166,7 +166,7 @@ namespace dml
         return detail::execute<execution_path, fill_operation>(
             [&]()
             {
-                return ml::make_fill_descriptor(pattern, dst_view.data(), dst_view.size(), operation.get_options());
+                return detail::ml::make_fill_operation(pattern, dst_view.data(), dst_view.size(), operation.get_options());
             });
     }
 
@@ -198,12 +198,12 @@ namespace dml
         return detail::execute<execution_path, dualcast_operation>(
             [&]()
             {
-                return ml::make_dualcast_descriptor(src_view.data(),
-                                                    dst1_view.data(),
-                                                    dst2_view.data(),
-                                                    src_view.size(),
-                                                    operation.get_options(),
-                                                    operation.get_additional_options());
+                return detail::ml::make_dualcast_operation(src_view.data(),
+                                                           dst1_view.data(),
+                                                           dst2_view.data(),
+                                                           src_view.size(),
+                                                           operation.get_options(),
+                                                           operation.get_specific_options());
             },
             [&]()
             {
@@ -240,11 +240,11 @@ namespace dml
         return detail::execute<execution_path, compare_operation>(
             [&]()
             {
-                return ml::make_compare_descriptor(src1_view.data(),
-                                                   src2_view.data(),
-                                                   src1_view.size(),
-                                                   operation.get_options(),
-                                                   operation.get_expected_result());
+                return detail::ml::make_compare_operation(src1_view.data(),
+                                                          src2_view.data(),
+                                                          src1_view.size(),
+                                                          operation.get_options(),
+                                                          operation.get_expected_result());
             },
             [&]()
             {
@@ -280,11 +280,11 @@ namespace dml
         return detail::execute<execution_path, compare_pattern_operation>(
             [&]()
             {
-                return ml::make_compare_pattern_descriptor(pattern,
-                                                           src_view.data(),
-                                                           src_view.size(),
-                                                           operation.get_options(),
-                                                           operation.get_expected_result());
+                return detail::ml::make_compare_pattern_operation(pattern,
+                                                                  src_view.data(),
+                                                                  src_view.size(),
+                                                                  operation.get_options(),
+                                                                  operation.get_expected_result());
             });
     }
 
@@ -316,13 +316,13 @@ namespace dml
         return detail::execute<execution_path, create_delta_operation>(
             [&]()
             {
-                return ml::make_create_delta_descriptor(src1_view.data(),
-                                                        src2_view.data(),
-                                                        src1_view.size(),
-                                                        delta_view.data(),
-                                                        delta_view.size(),
-                                                        operation.get_options(),
-                                                        operation.get_expected_result());
+                return detail::ml::make_create_delta_operation(src1_view.data(),
+                                                               src2_view.data(),
+                                                               src1_view.size(),
+                                                               delta_view.data(),
+                                                               delta_view.size(),
+                                                               operation.get_options(),
+                                                               operation.get_expected_result());
             },
             [&]()
             {
@@ -359,11 +359,11 @@ namespace dml
         return detail::execute<execution_path, apply_delta_operation>(
             [&]()
             {
-                return ml::make_apply_delta_descriptor(delta_view.data(),
-                                                       delta_result.delta_record_size,
-                                                       dst_view.data(),
-                                                       dst_view.size(),
-                                                       operation.get_options());
+                return detail::ml::make_apply_delta_operation(delta_view.data(),
+                                                              delta_result.delta_record_size,
+                                                              dst_view.data(),
+                                                              dst_view.size(),
+                                                              operation.get_options());
             },
             [&]()
             {
@@ -402,11 +402,11 @@ namespace dml
         return detail::execute<execution_path, crc_operation>(
             [&]()
             {
-                return ml::make_crc_descriptor(src_view.data(),
-                                               src_view.size(),
-                                               crc_seed,
-                                               operation.get_options(),
-                                               operation.get_additional_options());
+                return detail::ml::make_crc_operation(src_view.data(),
+                                                      src_view.size(),
+                                                      crc_seed,
+                                                      operation.get_options(),
+                                                      operation.get_specific_options());
             });
     }
 
@@ -438,12 +438,12 @@ namespace dml
         return detail::execute<execution_path, copy_crc_operation>(
             [&]()
             {
-                return ml::make_copy_crc_descriptor(src_view.data(),
-                                                    dst_view.data(),
-                                                    src_view.size(),
-                                                    crc_seed,
-                                                    operation.get_options(),
-                                                    operation.get_additional_options());
+                return detail::ml::make_copy_crc_operation(src_view.data(),
+                                                           dst_view.data(),
+                                                           src_view.size(),
+                                                           crc_seed,
+                                                           operation.get_options(),
+                                                           operation.get_specific_options());
             },
             [&]()
             {
@@ -478,7 +478,7 @@ namespace dml
         return detail::execute<execution_path, cache_flush_operation>(
             [&]()
             {
-                return ml::make_cache_flush_descriptor(dst_view.data(), dst_view.size(), operation.get_options());
+                return detail::ml::make_cache_flush_operation(dst_view.data(), dst_view.size(), operation.get_options());
             });
     }
 
diff --git a/include/dml/cpp/execution_interface.hpp b/include/dml/hl/execution_interface.hpp
similarity index 90%
rename from include/dml/cpp/execution_interface.hpp
rename to include/dml/hl/execution_interface.hpp
index b7babd3..48a9a58 100644
--- a/include/dml/cpp/execution_interface.hpp
+++ b/include/dml/hl/execution_interface.hpp
@@ -22,7 +22,7 @@
 #ifndef DML_EXECUTION_INTERFACE_HPP
 #define DML_EXECUTION_INTERFACE_HPP
 
-#include <dml/cpp/handler.hpp>
+#include <dml/hl/handler.hpp>
 
 namespace dml
 {
@@ -65,7 +65,8 @@ namespace dml
          * @param allocator Instance of allocator
          */
         explicit execution_interface(executor_t executor = executor_t(), allocator_t allocator = allocator_t()):
-            executor_(executor), allocator_(allocator)
+            executor_(executor),
+            allocator_(allocator)
         {
         }
 
@@ -78,8 +79,7 @@ namespace dml
          * @return Executor return value (if present)
          */
         template <typename task_t>
-        auto execute(task_t &&task) const
-            noexcept(noexcept(std::declval<executor_t>()(std::forward<task_t>(task))))
+        auto execute(task_t &&task) const noexcept(noexcept(std::declval<executor_t>()(std::forward<task_t>(task))))
         {
             return executor_(std::forward<task_t>(task));
         }
@@ -125,8 +125,8 @@ namespace dml
      * @tparam execution_path Type of execution path
      */
     template <typename execution_path>
-    using default_execution_interface = execution_interface<typename execution_path::default_thread_spawner,
-                                                            typename execution_path::default_allocator>;
+    using default_execution_interface =
+        execution_interface<typename execution_path::default_thread_spawner, typename execution_path::default_allocator>;
 }  // namespace dml
 
 #endif  //DML_EXECUTION_INTERFACE_HPP
diff --git a/include/dml/cpp/execution_path.hpp b/include/dml/hl/execution_path.hpp
similarity index 89%
rename from include/dml/cpp/execution_path.hpp
rename to include/dml/hl/execution_path.hpp
index 9b7503c..8399296 100644
--- a/include/dml/cpp/execution_path.hpp
+++ b/include/dml/hl/execution_path.hpp
@@ -22,8 +22,7 @@
 #ifndef DML_EXECUTION_PATH_HPP
 #define DML_EXECUTION_PATH_HPP
 
-#include <dml/cpp/middle_layer/device.hpp>
-#include <dml/cpp/middle_layer/make_descriptor.hpp>
+#include <dml/detail/ml/execution_path.hpp>
 #include <thread>
 
 namespace dml
@@ -72,13 +71,12 @@ namespace dml
          * @param op         Instance of Middle Layer operation
          * @param res        Instance of Middle Layer result
          */
-        auto operator()(ml::descriptor &op, ml::completion_record &res) const noexcept
+        auto operator()(detail::ml::operation &op, detail::ml::result &res) const noexcept
         {
-            return ml::software().submit(op, res);
+            return detail::ml::execution_path::software::submit(op, res);
         }
     };
 
-#ifdef DML_HW
     /**
      * @brief Represent hardware execution path
      *
@@ -120,12 +118,11 @@ namespace dml
          *
          * @return @ref status_code::ok if submission was a success, error code otherwise
          */
-        [[nodiscard]] auto operator()(ml::descriptor& dsc, ml::completion_record &record) const noexcept
+        [[nodiscard]] auto operator()(detail::ml::operation &op, detail::ml::result &res) const noexcept
         {
-            return ml::hardware().submit(dsc, record);
+            return detail::ml::execution_path::hardware::submit(op, res);
         }
     };
-#endif
 
     /**
      * @}
diff --git a/include/dml/cpp/handler.hpp b/include/dml/hl/handler.hpp
similarity index 82%
rename from include/dml/cpp/handler.hpp
rename to include/dml/hl/handler.hpp
index 4a2db0b..9c0153c 100644
--- a/include/dml/cpp/handler.hpp
+++ b/include/dml/hl/handler.hpp
@@ -22,9 +22,9 @@
 #ifndef DML_HANDLER_HPP
 #define DML_HANDLER_HPP
 
-#include <dml/cpp/detail/buffer.hpp>
-#include <dml/cpp/detail/handler.hpp>
-#include <dml/cpp/middle_layer/completion_record.hpp>
+#include <dml/detail/ml/result.hpp>
+#include <dml/hl/detail/buffer.hpp>
+#include <dml/hl/detail/handler.hpp>
 
 namespace dml
 {
@@ -41,7 +41,7 @@ namespace dml
         /**
          * @brief Internal buffer type for a result
          */
-        using buffer_type = detail::buffer<ml::completion_record, allocator_t>;
+        using buffer_type = detail::buffer<detail::ml::result, allocator_t>;
 
         /**
          * @brief Actual operation's result type
@@ -59,8 +59,7 @@ namespace dml
          *
          * @param allocator Memory allocator to use
          */
-        explicit handler(allocator_t allocator = allocator_t()) noexcept:
-            record_(allocator, false), status_(status_code::error)
+        explicit handler(allocator_t allocator = allocator_t()) noexcept: record_(allocator, false), status_(status_code::error)
         {
         }
 
@@ -72,7 +71,10 @@ namespace dml
          *
          * @return True if hanlder is valid, false otherwise
          */
-        [[nodiscard]] bool valid() const noexcept { return status_ == status_code::ok; }
+        [[nodiscard]] bool valid() const noexcept
+        {
+            return status_ == status_code::ok;
+        }
 
         /**
          * @brief Get result for a submitted operation
@@ -87,14 +89,14 @@ namespace dml
         {
             if (status_ == status_code::ok)
             {
-                ml::wait(record_.get());
+                detail::ml::wait(record_.get());
 
                 return detail::make_result<result_type>(record_.get());
             }
             else
             {
                 // Aggregate initialization ensures only first element initialized
-                return result_type{status_};
+                return result_type{ status_ };
             }
         }
 
@@ -107,7 +109,7 @@ namespace dml
         {
             if (status_ == status_code::ok)
             {
-                return ml::is_finished(record_.get());
+                return detail::ml::is_finished(record_.get());
             }
             else
             {
@@ -124,12 +126,12 @@ namespace dml
          * @param status    Initial status
          * @param allocator Instance of memory allocator
          */
-        explicit handler(status_code status, allocator_t allocator):
-            record_(allocator, status == status_code::ok), status_(status)
+        explicit handler(status_code status, allocator_t allocator): record_(allocator, status == status_code::ok), status_(status)
         {
         }
 
-        friend ml::completion_record &detail::get_ml_result<>(handler<operation_t, allocator_t> &h) noexcept;
+        template <typename operation_t_, typename allocator_t_>
+        friend detail::ml::result &detail::get_ml_result(handler<operation_t_, allocator_t_> &h) noexcept;
 
     private:
         buffer_type record_; /**< Memory buffer for a result */
diff --git a/include/dml/cpp/operations.hpp b/include/dml/hl/operations.hpp
similarity index 78%
rename from include/dml/cpp/operations.hpp
rename to include/dml/hl/operations.hpp
index 1da4dca..ee0318e 100644
--- a/include/dml/cpp/operations.hpp
+++ b/include/dml/hl/operations.hpp
@@ -22,7 +22,8 @@
 #ifndef DML_OPERATIONS_MEM_MOVE_HPP
 #define DML_OPERATIONS_MEM_MOVE_HPP
 
-#include <dml/cpp/result.hpp>
+#include <dml/detail/ml/options.hpp>
+#include <dml/hl/result.hpp>
 
 namespace dml
 {
@@ -40,7 +41,7 @@ namespace dml
         /**
          * @brief Constructs the operation
          */
-        constexpr mem_move_operation() noexcept: options_(ml::mem_move_option::cache_control)
+        constexpr mem_move_operation() noexcept: options_(detail::ml::mem_move_options().enable<detail::mem_move_flag::cache_control>())
         {
         }
 
@@ -60,7 +61,7 @@ namespace dml
         }
 
     private:
-        ml::mem_move_options options_; /**< @todo */
+        detail::ml::mem_move_options options_; /**< @todo */
     };
 
     /**
@@ -87,7 +88,7 @@ namespace dml
         /**
          * @brief Constructs the operation
          */
-        constexpr mem_copy_operation() noexcept: options_(ml::mem_move_option::cache_control)
+        constexpr mem_copy_operation() noexcept: options_(detail::ml::mem_move_options().enable<detail::mem_move_flag::cache_control>())
         {
         }
 
@@ -107,7 +108,7 @@ namespace dml
         }
 
     private:
-        ml::mem_move_options options_; /**< @todo */
+        detail::ml::mem_move_options options_; /**< @todo */
     };
 
     /**
@@ -132,7 +133,7 @@ namespace dml
         /**
          * @brief Constructs the operation
          */
-        constexpr fill_operation() noexcept: options_(ml::fill_option::cache_control)
+        constexpr fill_operation() noexcept: options_(detail::ml::fill_options().enable<detail::fill_flag::cache_control>())
         {
         }
 
@@ -152,7 +153,7 @@ namespace dml
         }
 
     private:
-        ml::fill_options options_; /**< @todo */
+        detail::ml::fill_options options_; /**< @todo */
     };
 
     /**
@@ -177,7 +178,11 @@ namespace dml
         /**
          * @brief Constructs the operation
          */
-        constexpr dualcast_operation() = default;
+        constexpr dualcast_operation() noexcept:
+            options_(detail::ml::dualcast_options().enable<detail::dualcast_flag::cache_control>()),
+            specific_options_()
+        {
+        }
 
         /**
          * @brief Result type for this operation
@@ -197,14 +202,14 @@ namespace dml
         /**
          * @todo
          */
-        [[nodiscard]] constexpr auto get_additional_options() const noexcept
+        [[nodiscard]] constexpr auto get_specific_options() const noexcept
         {
-            return additional_options_;
+            return specific_options_;
         }
 
     private:
-        ml::dualcast_options            options_;            /**< @todo */
-        ml::dualcast_additional_options additional_options_; /**< @todo */
+        detail::ml::dualcast_options          options_;          /**< @todo */
+        detail::ml::dualcast_specific_options specific_options_; /**< @todo */
     };
 
     /**
@@ -258,7 +263,7 @@ namespace dml
          */
         [[nodiscard]] constexpr auto expect_equal() const noexcept
         {
-            return compare_operation(ml::compare_option::check_result, ml::compare_expected_result_option::expect_equal);
+            return compare_operation(options_.enable<detail::compare_flag::check_result>(), detail::compare_result::equal);
         }
 
         /**
@@ -268,7 +273,7 @@ namespace dml
          */
         [[nodiscard]] constexpr auto expect_not_equal() const noexcept
         {
-            return compare_operation(ml::compare_option::check_result, ml::compare_expected_result_option::expect_not_equal);
+            return compare_operation(options_.enable<detail::compare_flag::check_result>(), detail::compare_result::not_equal);
         }
 
         /**
@@ -284,7 +289,7 @@ namespace dml
          *
          * @return Expected result
          */
-        [[nodiscard]] ml::compare_expected_result_options get_expected_result() const
+        [[nodiscard]] detail::compare_result get_expected_result() const
         {
             return expected_;
         }
@@ -293,15 +298,15 @@ namespace dml
         /**
          * @brief Constructs the operation with specified options and expected result
          */
-        constexpr compare_operation(ml::compare_options options, ml::compare_expected_result_options expected) noexcept:
+        constexpr compare_operation(detail::ml::compare_options options, detail::compare_result expected) noexcept:
             options_(options),
             expected_(expected)
         {
         }
 
     private:
-        ml::compare_options                 options_{};  /**< @todo */
-        ml::compare_expected_result_options expected_{}; /**< @todo */
+        detail::ml::compare_options options_{};  /**< @todo */
+        detail::compare_result      expected_{}; /**< @todo */
     };
 
     /**
@@ -346,7 +351,7 @@ namespace dml
          *
          * See @ref compare_result
          */
-        using result_type = compare_result;
+        using result_type = dml::compare_result;
 
         /**
          * @brief Returns a new instance of the operation with "equal" expected result
@@ -355,7 +360,7 @@ namespace dml
          */
         [[nodiscard]] constexpr auto expect_equal() const noexcept
         {
-            return compare_pattern_operation(ml::compare_pattern_option::check_result, ml::compare_expected_result_option::expect_equal);
+            return compare_pattern_operation(options_.enable<detail::compare_pattern_flag::check_result>(), detail::compare_result::equal);
         }
 
         /**
@@ -365,8 +370,8 @@ namespace dml
          */
         [[nodiscard]] constexpr auto expect_not_equal() const noexcept
         {
-            return compare_pattern_operation(ml::compare_pattern_option::check_result,
-                                             ml::compare_expected_result_option::expect_not_equal);
+            return compare_pattern_operation(options_.enable<detail::compare_pattern_flag::check_result>(),
+                                             detail::compare_result::not_equal);
         }
 
         /**
@@ -382,7 +387,7 @@ namespace dml
          *
          * @return Expected result
          */
-        [[nodiscard]] ml::compare_expected_result_options get_expected_result() const
+        [[nodiscard]] detail::compare_result get_expected_result() const noexcept
         {
             return expected_;
         }
@@ -391,15 +396,15 @@ namespace dml
         /**
          * @brief Constructs the operation with specified options and expected result
          */
-        constexpr compare_pattern_operation(ml::compare_pattern_options options, ml::compare_expected_result_options expected) noexcept:
+        constexpr compare_pattern_operation(detail::ml::compare_pattern_options options, detail::compare_result expected) noexcept:
             options_(options),
             expected_(expected)
         {
         }
 
     private:
-        ml::compare_pattern_options         options_{};  /**< @todo */
-        ml::compare_expected_result_options expected_{}; /**< @todo */
+        detail::ml::compare_pattern_options options_{};  /**< @todo */
+        detail::compare_result              expected_{}; /**< @todo */
     };
 
     /**
@@ -429,14 +434,18 @@ namespace dml
         /**
          * @brief Constructs the operation
          */
-        constexpr create_delta_operation() = default;
+        constexpr create_delta_operation() noexcept:
+            options_(detail::ml::create_delta_options().enable<detail::create_delta_flag::cache_control>()),
+            expected_()
+        {
+        }
 
         /**
          * @brief Result type for this operation
          *
          * See @ref create_delta_result
          */
-        using result_type = create_delta_result;
+        using result_type = dml::create_delta_result;
 
         /**
          * @brief Returns a new instance of the operation with "equal" expected result
@@ -445,7 +454,7 @@ namespace dml
          */
         [[nodiscard]] constexpr auto expect_equal() const noexcept
         {
-            return create_delta_operation(ml::create_delta_option::check_result, ml::delta_expected_result_option::expect_equal);
+            return create_delta_operation(options_.enable<detail::create_delta_flag::check_result>(), detail::create_delta_result::equal);
         }
 
         /**
@@ -455,7 +464,8 @@ namespace dml
          */
         [[nodiscard]] constexpr auto expect_not_equal() const noexcept
         {
-            return create_delta_operation(ml::create_delta_option::check_result, ml::delta_expected_result_option::expect_not_equal);
+            return create_delta_operation(options_.enable<detail::create_delta_flag::check_result>(),
+                                          detail::create_delta_result::not_equal);
         }
 
         /**
@@ -471,7 +481,7 @@ namespace dml
          *
          * @return Expected result
          */
-        [[nodiscard]] ml::delta_expected_result_options get_expected_result() const
+        [[nodiscard]] detail::create_delta_result get_expected_result() const noexcept
         {
             return expected_;
         }
@@ -480,15 +490,15 @@ namespace dml
         /**
          * @brief Constructs the operation with specified options and expected result
          */
-        constexpr create_delta_operation(ml::create_delta_options options, ml::delta_expected_result_options expected) noexcept:
+        constexpr create_delta_operation(detail::ml::create_delta_options options, detail::create_delta_result expected) noexcept:
             options_(options),
             expected_(expected)
         {
         }
 
     private:
-        ml::create_delta_options          options_;    /**< @todo */
-        ml::delta_expected_result_options expected_{}; /**< @todo */
+        detail::ml::create_delta_options options_;    /**< @todo */
+        detail::create_delta_result      expected_{}; /**< @todo */
     };
 
     /**
@@ -515,7 +525,10 @@ namespace dml
         /**
          * @brief Constructs the operation
          */
-        constexpr apply_delta_operation() = default;
+        constexpr apply_delta_operation() noexcept:
+            options_(detail::ml::apply_delta_options().enable<detail::apply_delta_flag::cache_control>())
+        {
+        }
 
         /**
          * @brief Result type for this operation
@@ -533,7 +546,7 @@ namespace dml
         }
 
     private:
-        ml::apply_delta_options options_; /**< @todo */
+        detail::ml::apply_delta_options options_; /**< @todo */
     };
 
     /**
@@ -581,7 +594,7 @@ namespace dml
          */
         [[nodiscard]] constexpr auto bypass_reflection() const noexcept
         {
-            return crc_operation({}, this->additional_options_ | ml::crc_additional_option::bypass_reflection);
+            return crc_operation(options_, specific_options_.enable<detail::crc_specific_flag::bypass_crc_inversion_and_reflection>());
         }
 
         /**
@@ -591,7 +604,7 @@ namespace dml
          */
         [[nodiscard]] constexpr auto bypass_data_reflection() const noexcept
         {
-            return crc_operation({}, this->additional_options_ | ml::crc_additional_option::bypass_data_reflection);
+            return crc_operation(options_, specific_options_.enable<detail::crc_specific_flag::bypass_data_reflection>());
         }
 
         /**
@@ -605,24 +618,24 @@ namespace dml
         /**
          * @todo
          */
-        [[nodiscard]] constexpr auto get_additional_options() const noexcept
+        [[nodiscard]] constexpr auto get_specific_options() const noexcept
         {
-            return additional_options_;
+            return specific_options_;
         }
 
     private:
         /**
          * @brief Constructs the operation with specified parameters
          */
-        constexpr crc_operation(ml::crc_options options, ml::crc_additional_options additional_options) noexcept:
+        constexpr crc_operation(detail::ml::crc_options options, detail::ml::crc_specific_options specific_options) noexcept:
             options_(options),
-            additional_options_(additional_options)
+            specific_options_(specific_options)
         {
         }
 
     private:
-        ml::crc_options            options_;            /**< @todo */
-        ml::crc_additional_options additional_options_; /**< @todo */
+        detail::ml::crc_options          options_;          /**< @todo */
+        detail::ml::crc_specific_options specific_options_; /**< @todo */
     };
 
     /**
@@ -662,7 +675,11 @@ namespace dml
          *
          * Reflection and data reflection are enabled.
          */
-        constexpr copy_crc_operation() noexcept = default;
+        constexpr copy_crc_operation() noexcept:
+            options_(detail::ml::copy_crc_options().enable<detail::copy_crc_flag::cache_control>()),
+            specific_options_()
+        {
+        }
 
         /**
          * @brief Returns a new instance of the operation with bypass_reflection option enabled.
@@ -671,7 +688,7 @@ namespace dml
          */
         [[nodiscard]] constexpr auto bypass_reflection() const noexcept
         {
-            return copy_crc_operation({}, this->additional_options_ | ml::copy_crc_additional_option::bypass_reflection);
+            return copy_crc_operation(options_, specific_options_.enable<detail::crc_specific_flag::bypass_crc_inversion_and_reflection>());
         }
 
         /**
@@ -681,7 +698,7 @@ namespace dml
          */
         [[nodiscard]] constexpr auto bypass_data_reflection() const noexcept
         {
-            return copy_crc_operation({}, this->additional_options_ | ml::copy_crc_additional_option::bypass_data_reflection);
+            return copy_crc_operation(options_, specific_options_.enable<detail::crc_specific_flag::bypass_data_reflection>());
         }
 
         /**
@@ -695,24 +712,24 @@ namespace dml
         /**
          * @todo
          */
-        [[nodiscard]] constexpr auto get_additional_options() const noexcept
+        [[nodiscard]] constexpr auto get_specific_options() const noexcept
         {
-            return additional_options_;
+            return specific_options_;
         }
 
     private:
         /**
          * @brief Constructs the operation with specified parameters
          */
-        constexpr copy_crc_operation(ml::copy_crc_options options, ml::copy_crc_additional_options additional_options) noexcept:
+        constexpr copy_crc_operation(detail::ml::copy_crc_options options, detail::ml::copy_crc_specific_options specific_options) noexcept:
             options_(options),
-            additional_options_(additional_options)
+            specific_options_(specific_options)
         {
         }
 
     private:
-        ml::copy_crc_options            options_;            /**< @todo */
-        ml::copy_crc_additional_options additional_options_; /**< @todo */
+        detail::ml::copy_crc_options          options_;          /**< @todo */
+        detail::ml::copy_crc_specific_options specific_options_; /**< @todo */
     };
 
     /**
@@ -758,7 +775,7 @@ namespace dml
          */
         [[nodiscard]] constexpr auto dont_invalidate_cache() const noexcept
         {
-            return cache_flush_operation(ml::cache_flush_option::cache_control);
+            return cache_flush_operation(options_.enable<detail::cache_flush_flag::cache_control>());
         }
 
         /**
@@ -773,12 +790,12 @@ namespace dml
         /**
          * @brief Constructs the operation with specified parameter
          */
-        constexpr explicit cache_flush_operation(ml::cache_flush_options options) noexcept: options_(options)
+        constexpr explicit cache_flush_operation(detail::ml::cache_flush_options options) noexcept: options_(options)
         {
         }
 
     private:
-        ml::cache_flush_options options_; /**< @todo */
+        detail::ml::cache_flush_options options_; /**< @todo */
     };
 
     /**
@@ -821,7 +838,7 @@ namespace dml
         }
 
     private:
-        ml::batch_options options_; /**< @todo */
+        detail::ml::batch_options options_; /**< @todo */
     };
 
     /**
diff --git a/include/dml/cpp/result.hpp b/include/dml/hl/result.hpp
similarity index 56%
rename from include/dml/cpp/result.hpp
rename to include/dml/hl/result.hpp
index bfed222..5ffdf8c 100644
--- a/include/dml/cpp/result.hpp
+++ b/include/dml/hl/result.hpp
@@ -22,10 +22,9 @@
  * @brief Contains definitions of result types
  */
 
-#include <dml/cpp/status_code.hpp>
-#include <dml/cpp/types.hpp>
-
 #include <cstdint>
+#include <dml/hl/status_code.hpp>
+#include <dml/hl/types.hpp>
 
 namespace dml
 {
@@ -41,7 +40,7 @@ namespace dml
      */
     struct mem_move_result
     {
-        status_code status{status_code::error}; /**< Status of operation execution */
+        status_code status{ status_code::error }; /**< Status of operation execution */
     };
 
     /**
@@ -49,7 +48,7 @@ namespace dml
      */
     struct mem_copy_result
     {
-        status_code status{status_code::error}; /**< Status of operation execution */
+        status_code status{ status_code::error }; /**< Status of operation execution */
     };
 
     /**
@@ -57,7 +56,7 @@ namespace dml
      */
     struct fill_result
     {
-        status_code status{status_code::error}; /**< Status of operation execution */
+        status_code status{ status_code::error }; /**< Status of operation execution */
     };
 
     /**
@@ -65,7 +64,7 @@ namespace dml
      */
     struct dualcast_result
     {
-        status_code status{status_code::error}; /**< Status of operation execution */
+        status_code status{ status_code::error }; /**< Status of operation execution */
     };
 
     /**
@@ -73,9 +72,9 @@ namespace dml
      */
     struct compare_result
     {
-        status_code       status{status_code::error}; /**< Status of operation execution */
-        comparison_result result{};                   /**< Comparison result */
-        size_t            mismatch{};                 /**< First mismatch byte position */
+        status_code       status{ status_code::error }; /**< Status of operation execution */
+        comparison_result result{};                     /**< Comparison result */
+        size_t            mismatch{};                   /**< First mismatch byte position */
     };
 
     /**
@@ -83,10 +82,10 @@ namespace dml
      */
     struct create_delta_result
     {
-        status_code       status{status_code::error}; /**< Status of operation execution */
-        comparison_result result{};                   /**< Comparison result */
-        size_t            bytes_completed{};          /**< Bytes completed, before delta overflowed */
-        size_t            delta_record_size{};        /**< Delta record written size */
+        status_code       status{ status_code::error }; /**< Status of operation execution */
+        comparison_result result{};                     /**< Comparison result */
+        size_t            bytes_completed{};            /**< Bytes completed, before delta overflowed */
+        size_t            delta_record_size{};          /**< Delta record written size */
     };
 
     /**
@@ -94,7 +93,7 @@ namespace dml
      */
     struct apply_delta_result
     {
-        status_code status{status_code::error}; /**< Status of operation execution */
+        status_code status{ status_code::error }; /**< Status of operation execution */
     };
 
     /**
@@ -102,8 +101,8 @@ namespace dml
      */
     struct crc_result
     {
-        status_code status{status_code::error}; /**< Status of operation execution */
-        uint32_t    crc_value{};                /**< Calculated CRC value */
+        status_code status{ status_code::error }; /**< Status of operation execution */
+        uint32_t    crc_value{};                  /**< Calculated CRC value */
     };
 
     /**
@@ -111,7 +110,7 @@ namespace dml
      */
     struct cache_flush_result
     {
-        status_code status{status_code::error}; /**< Status of operation execution */
+        status_code status{ status_code::error }; /**< Status of operation execution */
     };
 
     /**
@@ -119,8 +118,8 @@ namespace dml
      */
     struct batch_result
     {
-        status_code status{status_code::error}; /**< Status of operation execution */
-        size_t      operations_completed{};     /**< Number of operation successfully completed */
+        status_code status{ status_code::error }; /**< Status of operation execution */
+        size_t      operations_completed{};       /**< Number of operation successfully completed */
     };
 
     /**
diff --git a/include/dml/cpp/sequence.hpp b/include/dml/hl/sequence.hpp
similarity index 80%
rename from include/dml/cpp/sequence.hpp
rename to include/dml/hl/sequence.hpp
index cf5817a..34e2652 100644
--- a/include/dml/cpp/sequence.hpp
+++ b/include/dml/hl/sequence.hpp
@@ -22,11 +22,14 @@
 #ifndef DML_SEQUENCE_HPP
 #define DML_SEQUENCE_HPP
 
-#include <dml/cpp/detail/buffer.hpp>
-#include <dml/cpp/detail/utils.hpp>
-#include <dml/cpp/handler.hpp>
-#include <dml/cpp/middle_layer/make_descriptor.hpp>
-#include <dml/cpp/operations.hpp>
+#include <dml/detail/common/status.hpp>
+#include <dml/detail/ml/operation.hpp>
+#include <dml/detail/ml/result.hpp>
+#include <dml/detail/ml/validation.hpp>
+#include <dml/hl/detail/buffer.hpp>
+#include <dml/hl/detail/utils.hpp>
+#include <dml/hl/handler.hpp>
+#include <dml/hl/operations.hpp>
 
 namespace dml
 {
@@ -45,12 +48,12 @@ namespace dml
         /**
          * @brief Type of buffer for Middle Layer operations
          */
-        using op_buffer_t = detail::buffer_array<ml::descriptor, allocator_t>;
+        using op_buffer_t = detail::buffer_array<detail::ml::operation, allocator_t>;
 
         /**
          * @brief Type of buffer for Middle Layer results
          */
-        using res_buffer_t = detail::buffer_array<ml::completion_record, allocator_t>;
+        using res_buffer_t = detail::buffer_array<detail::ml::result, allocator_t>;
 
     public:
         /**
@@ -61,7 +64,7 @@ namespace dml
          */
         explicit sequence(size_t length, allocator_t allocator = allocator_t()):
             operations_(length, allocator),
-            records_(length, allocator),
+            results_(length, allocator),
             current_length_(0u)
         {
         }
@@ -330,26 +333,27 @@ namespace dml
         /**
          * @todo
          */
-        inline status_code add(ml::descriptor operation) noexcept
+        inline status_code add(detail::ml::operation operation) noexcept
         {
             if (current_length_ == operations_.get_count())
             {
                 return status_code::batch_overflow;
             }
 
-            if (auto status = ml::validate(operation); status != ml::validation_status::success)
+            if (auto status = detail::ml::validate(operation); status != detail::validation_status::success)
             {
                 return detail::to_own(status);
             }
 
             operations_.get(current_length_) = operation;
+            detail::ml::bind(operations_.get(current_length_), results_.get(current_length_));
             current_length_++;
             return status_code::ok;
         }
 
     private:
         op_buffer_t  operations_;     /**< Buffer for operations array */
-        res_buffer_t records_;        /**< Buffer for results array */
+        res_buffer_t results_;        /**< Buffer for results array */
         size_t       current_length_; /**< Current number of operation stored in the sequence */
     };
 
@@ -358,7 +362,7 @@ namespace dml
     {
         DML_VALIDATE_SIZE_CONSISTENCY(src_view.size(), dst_view.size());
 
-        return add(ml::make_mem_move_descriptor(src_view.data(), dst_view.data(), src_view.size(), operation.get_options()));
+        return add(detail::ml::make_mem_move_operation(src_view.data(), dst_view.data(), src_view.size(), operation.get_options()));
     }
 
     template <typename allocator_t>
@@ -366,13 +370,13 @@ namespace dml
     {
         DML_VALIDATE_SIZE_CONSISTENCY(src_view.size(), dst_view.size());
 
-        return add(ml::make_mem_move_descriptor(src_view.data(), dst_view.data(), src_view.size(), operation.get_options()));
+        return add(detail::ml::make_mem_move_operation(src_view.data(), dst_view.data(), src_view.size(), operation.get_options()));
     }
 
     template <typename allocator_t>
     inline status_code sequence<allocator_t>::add(fill_operation operation, uint64_t pattern, data_view dst_view)
     {
-        return add(ml::make_fill_descriptor(pattern, dst_view.data(), dst_view.size(), operation.get_options()));
+        return add(detail::ml::make_fill_operation(pattern, dst_view.data(), dst_view.size(), operation.get_options()));
     }
 
     template <typename allocator_t>
@@ -384,12 +388,12 @@ namespace dml
         DML_VALIDATE_SIZE_CONSISTENCY(src_view.size(), dst1_view.size());
         DML_VALIDATE_SIZE_CONSISTENCY(src_view.size(), dst2_view.size());
 
-        return add(ml::make_dualcast_descriptor(src_view.data(),
-                                                dst1_view.data(),
-                                                dst2_view.data(),
-                                                src_view.size(),
-                                                operation.get_options(),
-                                                operation.get_additional_options()));
+        return add(detail::ml::make_dualcast_operation(src_view.data(),
+                                                       dst1_view.data(),
+                                                       dst2_view.data(),
+                                                       src_view.size(),
+                                                       operation.get_options(),
+                                                       operation.get_specific_options()));
     }
 
     template <typename allocator_t>
@@ -397,21 +401,21 @@ namespace dml
     {
         DML_VALIDATE_SIZE_CONSISTENCY(src1_view.size(), src2_view.size());
 
-        return add(ml::make_compare_descriptor(src1_view.data(),
-                                               src2_view.data(),
-                                               src1_view.size(),
-                                               operation.get_options(),
-                                               operation.get_expected_result()));
+        return add(detail::ml::make_compare_operation(src1_view.data(),
+                                                      src2_view.data(),
+                                                      src1_view.size(),
+                                                      operation.get_options(),
+                                                      operation.get_expected_result()));
     }
 
     template <typename allocator_t>
     inline status_code sequence<allocator_t>::add(compare_pattern_operation operation, uint64_t pattern, const_data_view src_view)
     {
-        return add(ml::make_compare_pattern_descriptor(pattern,
-                                                       src_view.data(),
-                                                       src_view.size(),
-                                                       operation.get_options(),
-                                                       operation.get_expected_result()));
+        return add(detail::ml::make_compare_pattern_operation(pattern,
+                                                              src_view.data(),
+                                                              src_view.size(),
+                                                              operation.get_options(),
+                                                              operation.get_expected_result()));
     }
 
     template <typename allocator_t>
@@ -422,13 +426,13 @@ namespace dml
     {
         DML_VALIDATE_SIZE_CONSISTENCY(src1_view.size(), src2_view.size());
 
-        return add(ml::make_create_delta_descriptor(src1_view.data(),
-                                                    src2_view.data(),
-                                                    src1_view.size(),
-                                                    delta_view.data(),
-                                                    delta_view.size(),
-                                                    operation.get_options(),
-                                                    operation.get_expected_result()));
+        return add(detail::ml::make_create_delta_operation(src1_view.data(),
+                                                           src2_view.data(),
+                                                           src1_view.size(),
+                                                           delta_view.data(),
+                                                           delta_view.size(),
+                                                           operation.get_options(),
+                                                           operation.get_expected_result()));
     }
 
     template <typename allocator_t>
@@ -441,21 +445,21 @@ namespace dml
         {
             return status_code::delta_delta_empty;
         }
-        return add(ml::make_apply_delta_descriptor(delta_view.data(),
-                                                   delta_result.delta_record_size,
-                                                   dst_view.data(),
-                                                   dst_view.size(),
-                                                   operation.get_options()));
+        return add(detail::ml::make_apply_delta_operation(delta_view.data(),
+                                                          delta_result.delta_record_size,
+                                                          dst_view.data(),
+                                                          dst_view.size(),
+                                                          operation.get_options()));
     }
 
     template <typename allocator_t>
     inline status_code sequence<allocator_t>::add(crc_operation operation, const_data_view src_view, uint32_t crc_seed)
     {
-        return add(ml::make_crc_descriptor(src_view.data(),
-                                           src_view.size(),
-                                           crc_seed,
-                                           operation.get_options(),
-                                           operation.get_additional_options()));
+        return add(detail::ml::make_crc_operation(src_view.data(),
+                                                  src_view.size(),
+                                                  crc_seed,
+                                                  operation.get_options(),
+                                                  operation.get_specific_options()));
     }
 
     template <typename allocator_t>
@@ -465,18 +469,18 @@ namespace dml
                                                   uint32_t           crc_seed)
     {
         DML_VALIDATE_SIZE_CONSISTENCY(src_view.size(), dst_view.size());
-        return add(ml::make_copy_crc_descriptor(src_view.data(),
-                                                dst_view.data(),
-                                                src_view.size(),
-                                                crc_seed,
-                                                operation.get_options(),
-                                                operation.get_additional_options()));
+        return add(detail::ml::make_copy_crc_operation(src_view.data(),
+                                                       dst_view.data(),
+                                                       src_view.size(),
+                                                       crc_seed,
+                                                       operation.get_options(),
+                                                       operation.get_specific_options()));
     }
 
     template <typename allocator_t>
     inline status_code sequence<allocator_t>::add(cache_flush_operation operation, data_view dst_view)
     {
-        return add(ml::make_cache_flush_descriptor(dst_view.data(), dst_view.size(), operation.get_options()));
+        return add(detail::ml::make_cache_flush_operation(dst_view.data(), dst_view.size(), operation.get_options()));
     }
 }  // namespace dml
 
diff --git a/include/dml/cpp/status_code.hpp b/include/dml/hl/status_code.hpp
similarity index 95%
rename from include/dml/cpp/status_code.hpp
rename to include/dml/hl/status_code.hpp
index 77603d4..0639ec7 100644
--- a/include/dml/cpp/status_code.hpp
+++ b/include/dml/hl/status_code.hpp
@@ -22,7 +22,7 @@
  * @brief Contains definition of status type
  */
 
-#include <dml/cpp/types.hpp>
+#include <dml/hl/types.hpp>
 
 namespace dml
 {
@@ -46,6 +46,7 @@ namespace dml
         batch_overflow,        /**< Batch is full */
         execution_failed,      /**< Unknown execution error */
         unsupported_operation, /**< Unknown execution error */
+        queue_busy,            /**< Enqueue failed to one or several queues */
         error                  /**< Internal library error occurred */
     };
 }  // namespace dml
diff --git a/include/dml/cpp/submit.hpp b/include/dml/hl/submit.hpp
similarity index 88%
rename from include/dml/cpp/submit.hpp
rename to include/dml/hl/submit.hpp
index b624d9b..651a0ce 100644
--- a/include/dml/cpp/submit.hpp
+++ b/include/dml/hl/submit.hpp
@@ -22,13 +22,13 @@
 #ifndef DML_SUBMIT_HPP
 #define DML_SUBMIT_HPP
 
-#include <dml/cpp/detail/handler.hpp>
-#include <dml/cpp/detail/submit.hpp>
-#include <dml/cpp/detail/utils.hpp>
-#include <dml/cpp/execution_interface.hpp>
-#include <dml/cpp/middle_layer/make_descriptor.hpp>
-#include <dml/cpp/operations.hpp>
-#include <dml/cpp/sequence.hpp>
+#include <dml/detail/ml/operation.hpp>
+#include <dml/hl/detail/handler.hpp>
+#include <dml/hl/detail/submit.hpp>
+#include <dml/hl/detail/utils.hpp>
+#include <dml/hl/execution_interface.hpp>
+#include <dml/hl/operations.hpp>
+#include <dml/hl/sequence.hpp>
 
 namespace dml
 {
@@ -81,7 +81,7 @@ namespace dml
             executor,
             [&]()
             {
-                return ml::make_batch_descriptor(seq.data(), seq.length(), operation.get_options());
+                return detail::ml::make_batch_operation(seq.data(), seq.length(), operation.get_options());
             });
     }
 
@@ -128,7 +128,7 @@ namespace dml
             executor,
             [&]()
             {
-                return ml::make_mem_move_descriptor(src_view.data(), dst_view.data(), src_view.size(), operation.get_options());
+                return detail::ml::make_mem_move_operation(src_view.data(), dst_view.data(), src_view.size(), operation.get_options());
             },
             [&]()
             {
@@ -180,7 +180,7 @@ namespace dml
             executor,
             [&]()
             {
-                return ml::make_mem_move_descriptor(src_view.data(), dst_view.data(), src_view.size(), operation.get_options());
+                return detail::ml::make_mem_move_operation(src_view.data(), dst_view.data(), src_view.size(), operation.get_options());
             },
             [&]()
             {
@@ -232,7 +232,7 @@ namespace dml
             executor,
             [&]()
             {
-                return ml::make_fill_descriptor(pattern, dst_view.data(), dst_view.size(), operation.get_options());
+                return detail::ml::make_fill_operation(pattern, dst_view.data(), dst_view.size(), operation.get_options());
             });
     }
 
@@ -281,12 +281,12 @@ namespace dml
             executor,
             [&]()
             {
-                return ml::make_dualcast_descriptor(src_view.data(),
-                                                    dst1_view.data(),
-                                                    dst2_view.data(),
-                                                    src_view.size(),
-                                                    operation.get_options(),
-                                                    operation.get_additional_options());
+                return detail::ml::make_dualcast_operation(src_view.data(),
+                                                           dst1_view.data(),
+                                                           dst2_view.data(),
+                                                           src_view.size(),
+                                                           operation.get_options(),
+                                                           operation.get_specific_options());
             },
             [&]()
             {
@@ -339,11 +339,11 @@ namespace dml
             executor,
             [&]()
             {
-                return ml::make_compare_descriptor(src1_view.data(),
-                                                   src2_view.data(),
-                                                   src1_view.size(),
-                                                   operation.get_options(),
-                                                   operation.get_expected_result());
+                return detail::ml::make_compare_operation(src1_view.data(),
+                                                          src2_view.data(),
+                                                          src1_view.size(),
+                                                          operation.get_options(),
+                                                          operation.get_expected_result());
             },
             [&]()
             {
@@ -394,7 +394,7 @@ namespace dml
         return detail::submit<execution_path, compare_pattern_operation>(executor,
                                                                          [&]()
                                                                          {
-                                                                             return ml::make_compare_pattern_descriptor(
+                                                                             return detail::ml::make_compare_pattern_operation(
                                                                                  pattern,
                                                                                  src_view.data(),
                                                                                  src_view.size(),
@@ -448,13 +448,13 @@ namespace dml
             executor,
             [&]()
             {
-                return ml::make_create_delta_descriptor(src1_view.data(),
-                                                        src2_view.data(),
-                                                        src1_view.size(),
-                                                        delta_view.data(),
-                                                        delta_view.size(),
-                                                        operation.get_options(),
-                                                        operation.get_expected_result());
+                return detail::ml::make_create_delta_operation(src1_view.data(),
+                                                               src2_view.data(),
+                                                               src1_view.size(),
+                                                               delta_view.data(),
+                                                               delta_view.size(),
+                                                               operation.get_options(),
+                                                               operation.get_expected_result());
             },
             [&]()
             {
@@ -508,11 +508,11 @@ namespace dml
             executor,
             [&]()
             {
-                return ml::make_apply_delta_descriptor(delta_view.data(),
-                                                       delta_result.delta_record_size,
-                                                       dst_view.data(),
-                                                       dst_view.size(),
-                                                       operation.get_options());
+                return detail::ml::make_apply_delta_operation(delta_view.data(),
+                                                              delta_result.delta_record_size,
+                                                              dst_view.data(),
+                                                              dst_view.size(),
+                                                              operation.get_options());
             },
             [&]()
             {
@@ -566,11 +566,11 @@ namespace dml
         return detail::submit<execution_path, crc_operation>(executor,
                                                              [&]()
                                                              {
-                                                                 return ml::make_crc_descriptor(src_view.data(),
-                                                                                                src_view.size(),
-                                                                                                crc_seed,
-                                                                                                operation.get_options(),
-                                                                                                operation.get_additional_options());
+                                                                 return detail::ml::make_crc_operation(src_view.data(),
+                                                                                                       src_view.size(),
+                                                                                                       crc_seed,
+                                                                                                       operation.get_options(),
+                                                                                                       operation.get_specific_options());
                                                              });
     }
 
@@ -619,12 +619,12 @@ namespace dml
             executor,
             [&]()
             {
-                return ml::make_copy_crc_descriptor(src_view.data(),
-                                                    dst_view.data(),
-                                                    src_view.size(),
-                                                    crc_seed,
-                                                    operation.get_options(),
-                                                    operation.get_additional_options());
+                return detail::ml::make_copy_crc_operation(src_view.data(),
+                                                           dst_view.data(),
+                                                           src_view.size(),
+                                                           crc_seed,
+                                                           operation.get_options(),
+                                                           operation.get_specific_options());
             },
             [&]()
             {
@@ -672,7 +672,7 @@ namespace dml
             executor,
             [&]()
             {
-                return ml::make_cache_flush_descriptor(dst_view.data(), dst_view.size(), operation.get_options());
+                return detail::ml::make_cache_flush_operation(dst_view.data(), dst_view.size(), operation.get_options());
             });
     }
 }  // namespace dml
diff --git a/include/dml/cpp/types.hpp b/include/dml/hl/types.hpp
similarity index 100%
rename from include/dml/cpp/types.hpp
rename to include/dml/hl/types.hpp
diff --git a/sources/CMakeLists.txt b/sources/CMakeLists.txt
index b144762..b794365 100644
--- a/sources/CMakeLists.txt
+++ b/sources/CMakeLists.txt
@@ -13,9 +13,7 @@
 # stated in the License.
 #
 
-# TODO: Move to middle_layer
-add_subdirectory(cores)
-
+add_subdirectory(core)
 add_subdirectory(middle_layer)
 add_subdirectory(c_api)
 add_subdirectory(cpp_api)
diff --git a/sources/c_api/CMakeLists.txt b/sources/c_api/CMakeLists.txt
index 3d0d0e5..6523224 100644
--- a/sources/c_api/CMakeLists.txt
+++ b/sources/c_api/CMakeLists.txt
@@ -14,46 +14,47 @@
 #
 
 add_library(dml STATIC
-    dml.cpp
-    dml_batch.cpp
-    dml_get_library_version.cpp
-
-    $<TARGET_OBJECTS:dml_core>
-    $<TARGET_OBJECTS:dml_middle_layer>
-    )
-
-target_compile_features(dml PUBLIC c_std_11)
-target_compile_features(dml PRIVATE cxx_std_17)
-
-# TODO: target_compile_options(dml PRIVATE ${DML_QUALITY_OPTIONS})
+        dml.cpp
+        dml_batch.cpp
+        dml_get_library_version.cpp
+        )
 
 target_include_directories(dml
-    PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}/../../include>
-    PUBLIC $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
-    PRIVATE include
-    PRIVATE ../cores/include # TODO: Remove
-    )
+        PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}/../../include>
+        PUBLIC $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
+        PRIVATE include
+        PRIVATE ../include
+        )
+target_sources(dml
+        PRIVATE $<TARGET_OBJECTS:dml_middle_layer>
+        PRIVATE $<TARGET_PROPERTY:dml_middle_layer,INTERFACE_SOURCES>
+        )
+target_compile_features(dml
+        PUBLIC c_std_11
+        PRIVATE cxx_std_17
+        )
+target_compile_options(dml
+        PRIVATE ${DML_QUALITY_OPTIONS}
+        PRIVATE ${DML_CPP_PRIVATE_OPTIONS}
+        )
+
+if(DML_HW)
+    target_link_libraries(dml PRIVATE ${CMAKE_DL_LIBS})
+endif()
 
 # Pass git revision to get_library_version source file
 get_git_revision()
 
 set_property(
-    SOURCE dml_get_library_version.cpp APPEND
-    PROPERTY COMPILE_DEFINITIONS DML_GIT_REVISION="${GIT_REV}")
+        SOURCE dml_get_library_version.cpp APPEND
+        PROPERTY COMPILE_DEFINITIONS DML_GIT_REVISION="${GIT_REV}")
 
 set_target_properties(dml PROPERTIES
-    VERSION ${PROJECT_VERSION}
-    SOVERSION ${PROJECT_SOVERSION})
+        VERSION ${PROJECT_VERSION}
+        SOVERSION ${PROJECT_SOVERSION})
 
 install(TARGETS dml
-    EXPORT ${PROJECT_NAME}Targets
-    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
-    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
-    RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
-
-target_include_directories(dml PRIVATE sw-path/include)
-
-if (DML_HW)
-    target_compile_definitions(dml PRIVATE DML_HW)
-    target_link_libraries(dml PRIVATE ${CMAKE_DL_LIBS})
-endif()
+        EXPORT ${PROJECT_NAME}Targets
+        LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+        ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+        RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
diff --git a/sources/c_api/dml_batch.cpp b/sources/c_api/dml_batch.cpp
index 9aa6121..db3b5a7 100644
--- a/sources/c_api/dml_batch.cpp
+++ b/sources/c_api/dml_batch.cpp
@@ -14,67 +14,63 @@
  *
  */
 
-#include <dml/cpp/middle_layer/completion_record.hpp>
-#include <dml/cpp/middle_layer/descriptor.hpp>
-#include <dml/cpp/middle_layer/make_descriptor.hpp>
-#include <dml/cpp/middle_layer/result_views.hpp>
-#include <dml/cpp/middle_layer/validation.hpp>
+#include <dml/detail/common/flags.hpp>
+#include <dml/detail/common/specific_flags.hpp>
+#include <dml/detail/common/types.hpp>
+#include <dml/detail/ml/operation.hpp>
+#include <dml/detail/ml/options.hpp>
+#include <dml/detail/ml/result.hpp>
+#include <dml/detail/ml/validation.hpp>
 
 #include "dml/dml.h"
 #include "macros.hpp"
 #include "range_check.hpp"
 #include "status.hpp"
+#include "utils.hpp"
 
 namespace dml
 {
+
     constexpr auto get_task_size() noexcept -> uint32_t
     {
-        return sizeof(dml::ml::descriptor) + sizeof(dml::ml::completion_record);
+        return sizeof(dml::detail::ml::operation) + sizeof(dml::detail::ml::result);
     }
 
     class batch
     {
     public:
-        batch(uint8_t *batch_data, uint32_t tasks_count) noexcept: batch_data_(batch_data), tasks_count_(tasks_count)
+        batch(uint8_t *batch_data, uint32_t tasks_count) noexcept: batch_data_(dml::align(batch_data)), tasks_count_(tasks_count)
         {
         }
 
-        template <typename make_callback_t>
-        void add_by_index(uint32_t index, make_callback_t &&make) const noexcept
+        template <typename make_operation>
+        void add_by_index(uint32_t index, make_operation &&make) const noexcept
         {
-            const auto descriptors = reinterpret_cast<dml::ml::descriptor *>(batch_data_);
-
-            descriptors[index] = make();
+            const auto operations = reinterpret_cast<dml::detail::ml::operation *>(batch_data_);
 
-            const auto records = reinterpret_cast<dml::ml::completion_record *>(descriptors + tasks_count_);
+            operations[index] = make();
 
-            auto view = dml::ml::views::any_descriptor(descriptors[index]);
+            const auto results = reinterpret_cast<dml::detail::ml::result *>(operations + tasks_count_);
 
-            view.completion_record_address() = reinterpret_cast<dml::ml::address_t>(records + index);
-            view.flags() |= static_cast<dml::ml::flags_t>(dml::ml::flag::request_completion_record) |
-                            static_cast<dml::ml::flags_t>(dml::ml::flag::completion_record_address_valid);
+            dml::detail::ml::bind(operations[index], results[index]);
         }
 
         [[nodiscard]] auto get_status(uint32_t index) const noexcept
         {
-            const auto descriptors = reinterpret_cast<dml::ml::descriptor *>(batch_data_);
+            const auto operations = reinterpret_cast<dml::detail::ml::operation *>(batch_data_);
 
-            const auto records = reinterpret_cast<dml::ml::completion_record *>(descriptors + tasks_count_);
+            const auto records = reinterpret_cast<dml::detail::ml::result *>(operations + tasks_count_);
 
-            auto view = dml::ml::views::any_result(records[index]);
-
-            return to_own_status(static_cast<dml::ml::execution_status>(view.status()));
+            return to_own_status(dml::detail::ml::get_status(records[index]));
         }
 
         [[nodiscard]] auto get_result(uint32_t index) const noexcept
         {
-            const auto descriptors = reinterpret_cast<dml::ml::descriptor *>(batch_data_);
-
-            const auto records = reinterpret_cast<dml::ml::completion_record *>(descriptors + tasks_count_);
+            const auto operations = reinterpret_cast<dml::detail::ml::operation *>(batch_data_);
 
-            auto view = dml::ml::views::any_result(records[index]);
+            const auto records = reinterpret_cast<dml::detail::ml::result *>(operations + tasks_count_);
 
-            return view.result();
+            return dml::detail::ml::get_result(records[index]);
         }
 
     private:
@@ -92,7 +88,9 @@ extern "C" dml_status_t dml_get_batch_size(const dml_job_t *dml_job_ptr, uint32_
         return DML_STATUS_BATCH_SIZE_ERROR;
     }
 
-    *byte_size_ptr = dml::get_task_size() * task_count;
+    const uint32_t required_alignment = 64u;
+
+    *byte_size_ptr = dml::get_task_size() * task_count + required_alignment;
 
     return DML_STATUS_OK;
 }
@@ -112,7 +110,7 @@ extern "C" dml_status_t dml_batch_set_nop_by_index(dml_job_t *dml_job_ptr, uint3
         .add_by_index(task_index,
                       [&]
                       {
-                          return dml::ml::make_nop_descriptor(dml::ml::nop_options(static_cast<uint16_t>(flags & 0xFFFF)));
+                          return dml::detail::ml::make_nop_operation(dml::detail::ml::nop_options(static_cast<uint16_t>(flags & 0xFFFF)));
                       });
 
     return DML_STATUS_OK;
@@ -140,12 +138,12 @@ extern "C" dml_status_t dml_batch_set_mem_move_by_index(dml_job_t            *dm
         return status;
     }
 
-    auto descriptor = dml::ml::make_mem_move_descriptor(source_ptr,
-                                                        destination_ptr,
-                                                        byte_length,
-                                                        dml::ml::mem_move_options(static_cast<uint16_t>(flags & 0xFFFF)));
+    auto operation = dml::detail::ml::make_mem_move_operation(source_ptr,
+                                                              destination_ptr,
+                                                              byte_length,
+                                                              dml::detail::ml::mem_move_options(static_cast<uint16_t>(flags & 0xFFFF)));
 
-    status = dml::to_own_status(dml::ml::validate(descriptor));
+    status = dml::to_own_status(dml::detail::ml::validate(operation));
     if (status != DML_STATUS_OK)
     {
         return status;
@@ -155,7 +153,7 @@ extern "C" dml_status_t dml_batch_set_mem_move_by_index(dml_job_t            *dm
         .add_by_index(task_index,
                       [&]
                       {
-                          return descriptor;
+                          return operation;
                       });
 
     return DML_STATUS_OK;
@@ -178,14 +176,15 @@ extern "C" dml_status_t dml_batch_set_dualcast_by_index(dml_job_t            *dm
         return DML_STATUS_BATCH_TASK_INDEX_OVERFLOW;
     }
 
-    auto descriptor = dml::ml::make_dualcast_descriptor(source_ptr,
-                                                        destination_first_ptr,
-                                                        destination_second_ptr,
-                                                        byte_length,
-                                                        dml::ml::dualcast_options(static_cast<uint16_t>(flags & 0xFFFF)),
-                                                        dml::ml::dualcast_additional_options(static_cast<uint8_t>((flags >> 16) & 0xFF)));
+    auto operation =
+        dml::detail::ml::make_dualcast_operation(source_ptr,
+                                                 destination_first_ptr,
+                                                 destination_second_ptr,
+                                                 byte_length,
+                                                 dml::detail::ml::dualcast_options(static_cast<uint16_t>(flags & 0xFFFF)),
+                                                 dml::detail::ml::dualcast_specific_options(static_cast<uint8_t>((flags >> 16) & 0xFF)));
 
-    auto status = dml::to_own_status(dml::ml::validate(descriptor));
+    auto status = dml::to_own_status(dml::detail::ml::validate(operation));
     if (status != DML_STATUS_OK)
     {
         return status;
@@ -195,7 +194,7 @@ extern "C" dml_status_t dml_batch_set_dualcast_by_index(dml_job_t            *dm
         .add_by_index(task_index,
                       [&]
                       {
-                          return descriptor;
+                          return operation;
                       });
 
     return DML_STATUS_OK;
@@ -218,12 +217,12 @@ extern "C" dml_status_t dml_batch_set_compare_by_index(dml_job_t            *dml
         return DML_STATUS_BATCH_TASK_INDEX_OVERFLOW;
     }
 
-    auto descriptor = dml::ml::make_compare_descriptor(source_first_ptr,
-                                                       source_second_ptr,
-                                                       byte_length,
-                                                       dml::ml::compare_options(static_cast<uint16_t>(flags & 0xFFFF)),
-                                                       dml::ml::compare_expected_result_options(expected_result));
-    auto status     = dml::to_own_status(dml::ml::validate(descriptor));
+    auto operation = make_compare_operation(source_first_ptr,
+                                            source_second_ptr,
+                                            byte_length,
+                                            dml::detail::ml::compare_options(static_cast<uint16_t>(flags & 0xFFFF)),
+                                            dml::detail::compare_result(expected_result));
+    auto status    = dml::to_own_status(dml::detail::ml::validate(operation));
     if (status != DML_STATUS_OK)
     {
         return status;
@@ -233,7 +232,7 @@ extern "C" dml_status_t dml_batch_set_compare_by_index(dml_job_t            *dml
         .add_by_index(task_index,
                       [&]
                       {
-                          return descriptor;
+                          return operation;
                       });
 
     return DML_STATUS_OK;
@@ -257,12 +256,13 @@ extern "C" dml_status_t dml_batch_set_compare_pattern_by_index(dml_job_t
         return DML_STATUS_BATCH_TASK_INDEX_OVERFLOW;
     }
 
-    auto descriptor = dml::ml::make_compare_pattern_descriptor(*reinterpret_cast<uint64_t *>(pattern_ptr),
-                                                               source_ptr,
-                                                               byte_length,
-                                                               dml::ml::compare_pattern_options(static_cast<uint16_t>(flags & 0xFFFF)),
-                                                               dml::ml::compare_expected_result_options(expected_result));
-    auto status     = dml::to_own_status(dml::ml::validate(descriptor));
+    auto operation =
+        dml::detail::ml::make_compare_pattern_operation(*reinterpret_cast<uint64_t *>(pattern_ptr),
+                                                        source_ptr,
+                                                        byte_length,
+                                                        dml::detail::ml::compare_pattern_options(static_cast<uint16_t>(flags & 0xFFFF)),
+                                                        dml::detail::compare_result(expected_result));
+    auto status = dml::to_own_status(dml::detail::ml::validate(operation));
     if (status != DML_STATUS_OK)
     {
         return status;
@@ -272,7 +272,7 @@ extern "C" dml_status_t dml_batch_set_compare_pattern_by_index(dml_job_t
         .add_by_index(task_index,
                       [&]
                       {
-                          return descriptor;
+                          return operation;
                       });
 
     return DML_STATUS_OK;
@@ -301,12 +301,12 @@ extern "C" dml_status_t dml_batch_set_crc_by_index(dml_job_t            *dml_job
         return status;
     }
 
-    auto descriptor = dml::ml::make_crc_descriptor(source_ptr,
-                                                   byte_length,
-                                                   *crc_seed_ptr,
-                                                   dml::ml::crc_options(static_cast<uint16_t>(flags & 0xFFFF)),
-                                                   dml::ml::crc_additional_options(static_cast<uint8_t>((flags >> 16) & 0xFF)));
-    status          = dml::to_own_status(dml::ml::validate(descriptor));
+    auto operation = dml::detail::ml::make_crc_operation(source_ptr,
+                                                         byte_length,
+                                                         *crc_seed_ptr,
+                                                         dml::detail::ml::crc_options(static_cast<uint16_t>(flags & 0xFFFF)),
+                                                         dml::detail::ml::crc_specific_options(static_cast<uint8_t>((flags >> 16) & 0xFF)));
+    status         = dml::to_own_status(dml::detail::ml::validate(operation));
     if (status != DML_STATUS_OK)
     {
         return status;
@@ -316,7 +316,7 @@ extern "C" dml_status_t dml_batch_set_crc_by_index(dml_job_t            *dml_job
         .add_by_index(task_index,
                       [&]
                       {
-                          return descriptor;
+                          return operation;
                       });
 
     return DML_STATUS_OK;
@@ -346,13 +346,14 @@ extern "C" dml_status_t dml_batch_set_copy_crc_by_index(dml_job_t            *dm
         return status;
     }
 
-    auto descriptor = dml::ml::make_copy_crc_descriptor(source_ptr,
-                                                        destination_ptr,
-                                                        byte_length,
-                                                        *crc_seed_ptr,
-                                                        dml::ml::copy_crc_options(static_cast<uint16_t>(flags & 0xFFFF)),
-                                                        dml::ml::copy_crc_additional_options(static_cast<uint8_t>((flags >> 16) & 0xFF)));
-    status          = dml::to_own_status(dml::ml::validate(descriptor));
+    auto operation =
+        dml::detail::ml::make_copy_crc_operation(source_ptr,
+                                                 destination_ptr,
+                                                 byte_length,
+                                                 *crc_seed_ptr,
+                                                 dml::detail::ml::copy_crc_options(static_cast<uint16_t>(flags & 0xFFFF)),
+                                                 dml::detail::ml::copy_crc_specific_options(static_cast<uint8_t>((flags >> 16) & 0xFF)));
+    status = dml::to_own_status(dml::detail::ml::validate(operation));
     if (status != DML_STATUS_OK)
     {
         return status;
@@ -362,7 +363,7 @@ extern "C" dml_status_t dml_batch_set_copy_crc_by_index(dml_job_t            *dm
         .add_by_index(task_index,
                       [&]
                       {
-                          return descriptor;
+                          return operation;
                       });
 
     return DML_STATUS_OK;
@@ -385,11 +386,11 @@ extern "C" dml_status_t dml_batch_set_fill_by_index(dml_job_t            *dml_jo
         return DML_STATUS_BATCH_TASK_INDEX_OVERFLOW;
     }
 
-    auto descriptor = dml::ml::make_fill_descriptor(*reinterpret_cast<const uint64_t *>(pattern_ptr),
-                                                    destination_ptr,
-                                                    byte_length,
-                                                    dml::ml::fill_options(static_cast<uint16_t>(flags & 0xFFFF)));
-    auto status     = dml::to_own_status(dml::ml::validate(descriptor));
+    auto operation = dml::detail::ml::make_fill_operation(*reinterpret_cast<const uint64_t *>(pattern_ptr),
+                                                          destination_ptr,
+                                                          byte_length,
+                                                          dml::detail::ml::fill_options(static_cast<uint16_t>(flags & 0xFFFF)));
+    auto status    = dml::to_own_status(dml::detail::ml::validate(operation));
     if (status != DML_STATUS_OK)
     {
         return status;
@@ -399,7 +400,7 @@ extern "C" dml_status_t dml_batch_set_fill_by_index(dml_job_t            *dml_jo
         .add_by_index(task_index,
                       [&]
                       {
-                          return descriptor;
+                          return operation;
                       });
 
     return DML_STATUS_OK;
@@ -420,10 +421,11 @@ extern "C" dml_status_t dml_batch_set_cache_flush_by_index(dml_job_t
         return DML_STATUS_BATCH_TASK_INDEX_OVERFLOW;
     }
 
-    auto descriptor = dml::ml::make_cache_flush_descriptor(destination_ptr,
-                                                           byte_length,
-                                                           dml::ml::cache_flush_options(static_cast<uint16_t>(flags & 0xFFFF)));
-    auto status     = dml::to_own_status(dml::ml::validate(descriptor));
+    auto operation =
+        dml::detail::ml::make_cache_flush_operation(destination_ptr,
+                                                    byte_length,
+                                                    dml::detail::ml::cache_flush_options(static_cast<uint16_t>(flags & 0xFFFF)));
+    auto status = dml::to_own_status(dml::detail::ml::validate(operation));
     if (status != DML_STATUS_OK)
     {
         return status;
@@ -433,7 +435,7 @@ extern "C" dml_status_t dml_batch_set_cache_flush_by_index(dml_job_t
         .add_by_index(task_index,
                       [&]
                       {
-                          return descriptor;
+                          return operation;
                       });
 
     return DML_STATUS_OK;
@@ -458,14 +460,15 @@ extern "C" dml_status_t dml_batch_set_delta_create_by_index(dml_job_t
         return DML_STATUS_BATCH_TASK_INDEX_OVERFLOW;
     }
 
-    auto descriptor = dml::ml::make_create_delta_descriptor(source_ptr,
-                                                            reference_ptr,
-                                                            compare_length,
-                                                            delta_record_ptr,
-                                                            delta_record_length,
-                                                            dml::ml::create_delta_options(static_cast<uint16_t>(flags & 0xFFFF)),
-                                                            dml::ml::delta_expected_result_options(expected_result));
-    auto status     = dml::to_own_status(dml::ml::validate(descriptor));
+    auto operation =
+        dml::detail::ml::make_create_delta_operation(source_ptr,
+                                                     reference_ptr,
+                                                     compare_length,
+                                                     delta_record_ptr,
+                                                     delta_record_length,
+                                                     dml::detail::ml::create_delta_options(static_cast<uint16_t>(flags & 0xFFFF)),
+                                                     dml::detail::create_delta_result(expected_result));
+    auto status = dml::to_own_status(dml::detail::ml::validate(operation));
     if (status != DML_STATUS_OK)
     {
         return status;
@@ -475,7 +478,7 @@ extern "C" dml_status_t dml_batch_set_delta_create_by_index(dml_job_t
         .add_by_index(task_index,
                       [&]
                       {
-                          return descriptor;
+                          return operation;
                       });
 
     return DML_STATUS_OK;
@@ -498,12 +501,13 @@ extern "C" dml_status_t dml_batch_set_delta_apply_by_index(dml_job_t
         return DML_STATUS_BATCH_TASK_INDEX_OVERFLOW;
     }
 
-    auto descriptor = dml::ml::make_apply_delta_descriptor(delta_record_ptr,
-                                                           delta_record_length,
-                                                           destination_ptr,
-                                                           destination_length,
-                                                           dml::ml::apply_delta_options(static_cast<uint16_t>(flags & 0xFFFF)));
-    auto status     = dml::to_own_status(dml::ml::validate(descriptor));
+    auto operation =
+        dml::detail::ml::make_apply_delta_operation(delta_record_ptr,
+                                                    delta_record_length,
+                                                    destination_ptr,
+                                                    destination_length,
+                                                    dml::detail::ml::apply_delta_options(static_cast<uint16_t>(flags & 0xFFFF)));
+    auto status = dml::to_own_status(dml::detail::ml::validate(operation));
     if (status != DML_STATUS_OK)
     {
         return status;
@@ -513,7 +517,7 @@ extern "C" dml_status_t dml_batch_set_delta_apply_by_index(dml_job_t
         .add_by_index(task_index,
                       [&]
                       {
-                          return descriptor;
+                          return operation;
                       });
 
     return DML_STATUS_OK;
@@ -526,8 +530,6 @@ extern "C" dml_status_t dml_batch_set_dif_check_by_index(dml_job_t
                                                          const dml_dif_config_t *dif_config_ptr,
                                                          dml_operation_flags_t   flags)
 {
-    constexpr uint32_t dif_block_sizes[4] = { 512u, 520u, 4096u, 4104u };
-
     CHECK_NULL(dml_job_ptr);
     CHECK_NULL(dml_job_ptr->destination_length);
     CHECK_NULL(source_ptr);
@@ -539,16 +541,16 @@ extern "C" dml_status_t dml_batch_set_dif_check_by_index(dml_job_t
         return DML_STATUS_BATCH_TASK_INDEX_OVERFLOW;
     }
 
-    auto descriptor = dml::ml::make_dif_check_descriptor(
+    auto operation = dml::detail::ml::make_dif_check_operation(
         source_ptr,
         source_length,
         { dif_config_ptr->source_reference_tag_seed,
           dif_config_ptr->source_application_tag_mask,
           dif_config_ptr->source_application_tag_seed },
-        dml::ml::dif_check_options(static_cast<uint16_t>(flags & 0xFFFF)),
-        dml::ml::dif_additional_options(static_cast<uint8_t>(((dif_config_ptr->flags >> 16) & 0xFF) | dif_config_ptr->block_size)),
-        dml::ml::dif_additional_src_options(static_cast<uint8_t>(dif_config_ptr->flags & 0xFF)));
-    auto status = dml::to_own_status(dml::ml::validate(descriptor));
+        dml::detail::ml::dif_check_options(static_cast<uint16_t>(flags & 0xFFFF)),
+        dml::detail::ml::dif_specific_options(static_cast<uint8_t>(((dif_config_ptr->flags >> 16) & 0xFF) | dif_config_ptr->block_size)),
+        dml::detail::ml::dif_source_options(static_cast<uint8_t>(dif_config_ptr->flags & 0xFF)));
+    auto status = dml::to_own_status(dml::detail::ml::validate(operation));
     if (status != DML_STATUS_OK)
     {
         return status;
@@ -558,7 +560,7 @@ extern "C" dml_status_t dml_batch_set_dif_check_by_index(dml_job_t
         .add_by_index(task_index,
                       [&]
                       {
-                          return descriptor;
+                          return operation;
                       });
 
     return DML_STATUS_OK;
@@ -573,7 +575,7 @@ extern "C" dml_status_t dml_batch_set_dif_update_by_index(dml_job_t
                                                           uint32_t                destination_length,
                                                           dml_operation_flags_t   flags)
 {
-    constexpr uint32_t dif_block_sizes[4] = { 512u, 520u, 4096u, 4104u };
+    static_cast<void>(destination_length);
 
     CHECK_NULL(dml_job_ptr);
     CHECK_NULL(dml_job_ptr->destination_length);
@@ -587,7 +589,7 @@ extern "C" dml_status_t dml_batch_set_dif_update_by_index(dml_job_t
         return DML_STATUS_BATCH_TASK_INDEX_OVERFLOW;
     }
 
-    auto descriptor = dml::ml::make_dif_update_descriptor(
+    auto operation = dml::detail::ml::make_dif_update_operation(
         source_ptr,
         destination_ptr,
         source_length,
@@ -597,11 +599,11 @@ extern "C" dml_status_t dml_batch_set_dif_update_by_index(dml_job_t
         { dif_config_ptr->destination_reference_tag_seed,
           dif_config_ptr->destination_application_tag_mask,
           dif_config_ptr->destination_application_tag_seed },
-        dml::ml::dif_update_options(static_cast<uint16_t>(flags & 0xFFFF)),
-        dml::ml::dif_additional_options(static_cast<uint8_t>(((dif_config_ptr->flags >> 16) & 0xFF) | dif_config_ptr->block_size)),
-        dml::ml::dif_additional_src_options(static_cast<uint8_t>(dif_config_ptr->flags & 0xFF)),
-        dml::ml::dif_additional_dst_options(static_cast<uint8_t>((dif_config_ptr->flags >> 8) & 0xFF)));
-    auto status = dml::to_own_status(dml::ml::validate(descriptor));
+        dml::detail::ml::dif_update_options(static_cast<uint16_t>(flags & 0xFFFF)),
+        dml::detail::ml::dif_specific_options(static_cast<uint8_t>(((dif_config_ptr->flags >> 16) & 0xFF) | dif_config_ptr->block_size)),
+        dml::detail::ml::dif_source_options(static_cast<uint8_t>(dif_config_ptr->flags & 0xFF)),
+        dml::detail::ml::dif_destination_options(static_cast<uint8_t>((dif_config_ptr->flags >> 8) & 0xFF)));
+    auto status = dml::to_own_status(dml::detail::ml::validate(operation));
     if (status != DML_STATUS_OK)
     {
         return status;
@@ -611,7 +613,7 @@ extern "C" dml_status_t dml_batch_set_dif_update_by_index(dml_job_t
         .add_by_index(task_index,
                       [&]
                       {
-                          return descriptor;
+                          return operation;
                       });
 
     return DML_STATUS_OK;
@@ -626,7 +628,7 @@ extern "C" dml_status_t dml_batch_set_dif_insert_by_index(dml_job_t
                                                           uint32_t                destination_length,
                                                           dml_operation_flags_t   flags)
 {
-    constexpr uint32_t dif_block_sizes[4] = { 512u, 520u, 4096u, 4104u };
+    static_cast<void>(destination_length);
 
     CHECK_NULL(dml_job_ptr);
     CHECK_NULL(dml_job_ptr->destination_length);
@@ -640,17 +642,17 @@ extern "C" dml_status_t dml_batch_set_dif_insert_by_index(dml_job_t
         return DML_STATUS_BATCH_TASK_INDEX_OVERFLOW;
     }
 
-    auto descriptor = dml::ml::make_dif_insert_descriptor(
+    auto operation = dml::detail::ml::make_dif_insert_operation(
         source_ptr,
         destination_ptr,
         source_length,
         { dif_config_ptr->destination_reference_tag_seed,
           dif_config_ptr->destination_application_tag_mask,
           dif_config_ptr->destination_application_tag_seed },
-        dml::ml::dif_insert_options(static_cast<uint16_t>(flags & 0xFFFF)),
-        dml::ml::dif_additional_options(static_cast<uint8_t>(((dif_config_ptr->flags >> 16) & 0xFF) | dif_config_ptr->block_size)),
-        dml::ml::dif_additional_dst_options(static_cast<uint8_t>((dif_config_ptr->flags >> 8) & 0xFF)));
-    auto status = dml::to_own_status(dml::ml::validate(descriptor));
+        dml::detail::ml::dif_insert_options(static_cast<uint16_t>(flags & 0xFFFF)),
+        dml::detail::ml::dif_specific_options(static_cast<uint8_t>(((dif_config_ptr->flags >> 16) & 0xFF) | dif_config_ptr->block_size)),
+        dml::detail::ml::dif_destination_options(static_cast<uint8_t>((dif_config_ptr->flags >> 8) & 0xFF)));
+    auto status = dml::to_own_status(dml::detail::ml::validate(operation));
     if (status != DML_STATUS_OK)
     {
         return status;
@@ -660,7 +662,7 @@ extern "C" dml_status_t dml_batch_set_dif_insert_by_index(dml_job_t
         .add_by_index(task_index,
                       [&]
                       {
-                          return descriptor;
+                          return operation;
                       });
 
     return DML_STATUS_OK;
@@ -675,7 +677,7 @@ extern "C" dml_status_t dml_batch_set_dif_strip_by_index(dml_job_t
                                                          uint32_t                destination_length,
                                                          dml_operation_flags_t   flags)
 {
-    constexpr uint32_t dif_block_sizes[4] = { 512u, 520u, 4096u, 4104u };
+    static_cast<void>(destination_length);
 
     CHECK_NULL(dml_job_ptr);
     CHECK_NULL(dml_job_ptr->destination_length);
@@ -689,18 +691,18 @@ extern "C" dml_status_t dml_batch_set_dif_strip_by_index(dml_job_t
         return DML_STATUS_BATCH_TASK_INDEX_OVERFLOW;
     }
 
-    auto descriptor = dml::ml::make_dif_strip_descriptor(
+    auto operation = dml::detail::ml::make_dif_strip_operation(
         source_ptr,
         destination_ptr,
         source_length,
         { dif_config_ptr->source_reference_tag_seed,
           dif_config_ptr->source_application_tag_mask,
           dif_config_ptr->source_application_tag_seed },
-        dml::ml::dif_strip_options(static_cast<uint16_t>(flags & 0xFFFF)),
-        dml::ml::dif_additional_options(static_cast<uint8_t>(((dif_config_ptr->flags >> 16) & 0xFF) | dif_config_ptr->block_size)),
-        dml::ml::dif_additional_src_options(static_cast<uint8_t>(dif_config_ptr->flags & 0xFF)));
+        dml::detail::ml::dif_strip_options(static_cast<uint16_t>(flags & 0xFFFF)),
+        dml::detail::ml::dif_specific_options(static_cast<uint8_t>(((dif_config_ptr->flags >> 16) & 0xFF) | dif_config_ptr->block_size)),
+        dml::detail::ml::dif_source_options(static_cast<uint8_t>(dif_config_ptr->flags & 0xFF)));
 
-    auto status = dml::to_own_status(dml::ml::validate(descriptor));
+    auto status = dml::to_own_status(dml::detail::ml::validate(operation));
     if (status != DML_STATUS_OK)
     {
         return status;
@@ -710,7 +712,7 @@ extern "C" dml_status_t dml_batch_set_dif_strip_by_index(dml_job_t
         .add_by_index(task_index,
                       [&]
                       {
-                          return descriptor;
+                          return operation;
                       });
 
     return DML_STATUS_OK;
@@ -743,7 +745,7 @@ extern "C" dml_status_t dml_batch_get_status(const dml_job_t *dml_job_ptr, uint3
         return DML_STATUS_BATCH_TASK_INDEX_OVERFLOW;
     }
 
-    *status_ptr = dml::batch(dml_job_ptr->destination_first_ptr, task_count).get_status(task_index);
+    *status_ptr = ::dml::batch(dml_job_ptr->destination_first_ptr, task_count).get_status(task_index);
 
     return DML_STATUS_OK;
 }
diff --git a/sources/c_api/dml_get_library_version.cpp b/sources/c_api/dml_get_library_version.cpp
index 2a917e6..f61e9a2 100644
--- a/sources/c_api/dml_get_library_version.cpp
+++ b/sources/c_api/dml_get_library_version.cpp
@@ -35,7 +35,7 @@
 #define DML_LIBRARY_MINOR_VERSION 1u
 
 /** Minor version of the library*/
-#define DML_LIBRARY_PATCH 5u
+#define DML_LIBRARY_PATCH 6u
 
 /** Supported CPU ISA */
 #define DML_LIBRARY_MINIMAL_CPU_ISA "N/A"
@@ -45,15 +45,9 @@
 
 extern "C" const dml_library_version_t* dml_get_library_version()
 {
-    static const dml_library_version_t library_version = { DML_LIBRARY_NAME,
-                                                           DML_LIBRARY_MINIMAL_CPU_ISA,
-                                                           __DATE__,
-                                                           DML_LIBRARY_VERSION,
-                                                           DML_LIBRARY_MIN_HW_VERSION,
-                                                           DML_GIT_REVISION,
-                                                           DML_LIBRARY_MAJOR_VERSION,
-                                                           DML_LIBRARY_MINOR_VERSION,
-                                                           DML_LIBRARY_PATCH };
+    static const dml_library_version_t library_version = { DML_LIBRARY_NAME,          DML_LIBRARY_MINIMAL_CPU_ISA, __DATE__,
+                                                           DML_LIBRARY_VERSION,       DML_LIBRARY_MIN_HW_VERSION,  DML_GIT_REVISION,
+                                                           DML_LIBRARY_MAJOR_VERSION, DML_LIBRARY_MINOR_VERSION,   DML_LIBRARY_PATCH };
 
     return &library_version;
 }
diff --git a/sources/c_api/include/impl.hpp b/sources/c_api/include/impl.hpp
index ef50f71..dd70938 100644
--- a/sources/c_api/include/impl.hpp
+++ b/sources/c_api/include/impl.hpp
@@ -17,8 +17,8 @@
 #ifndef DML_IMPL_HPP
 #define DML_IMPL_HPP
 
-#include <dml/cpp/middle_layer/device.hpp>
-#include <dml/cpp/middle_layer/validation.hpp>
+#include <dml/detail/ml/execution_path.hpp>
+#include <dml/detail/ml/validation.hpp>
 
 #include "job_view.hpp"
 #include "range_check.hpp"
@@ -29,7 +29,7 @@ namespace dml
 {
     inline dml_status_t wait(job_view job) noexcept
     {
-        ml::wait(job.state().record);
+        detail::ml::wait(job.state().record);
 
         // Extract result
         return write_result(job);
@@ -37,7 +37,7 @@ namespace dml
 
     inline dml_status_t check(job_view job) noexcept
     {
-        if (ml::is_finished(job.state().record))
+        if (detail::ml::is_finished(job.state().record))
         {
             // Extract result
             return write_result(job);
@@ -59,18 +59,18 @@ namespace dml
         write_descriptor(job);
 
         // Middle Layer checks
-        if (auto status = to_own_status(ml::validate(job.state().dsc)); status != DML_STATUS_OK)
+        if (auto status = to_own_status(detail::ml::validate(job.state().dsc)); status != DML_STATUS_OK)
         {
             return status;
         }
 
         if (job.state().path == DML_PATH_HW)
         {
-            return to_own_status(ml::hardware().submit(job.state().dsc, job.state().record));
+            return to_own_status(detail::ml::execution_path::hardware::submit(job.state().dsc, job.state().record));
         }
         else
         {
-            return to_own_status(ml::software().submit(job.state().dsc, job.state().record));
+            return to_own_status(detail::ml::execution_path::software::submit(job.state().dsc, job.state().record));
         }
     }
 
diff --git a/sources/c_api/include/job_view.hpp b/sources/c_api/include/job_view.hpp
index 0fb93bf..43723c6 100644
--- a/sources/c_api/include/job_view.hpp
+++ b/sources/c_api/include/job_view.hpp
@@ -19,16 +19,16 @@
 
 #include <dml/dml.h>
 
-#include "state.hpp"
-
 #include <memory>
 
+#include "state.hpp"
+
 namespace dml
 {
     constexpr auto get_job_size() noexcept
     {
-        constexpr auto alignment = 64u;
-        constexpr auto job_size = sizeof(dml_job_t);
+        constexpr auto alignment  = 64u;
+        constexpr auto job_size   = sizeof(dml_job_t);
         constexpr auto state_size = sizeof(dml::state);
 
         // Enough size for job, state, and to align state to default boundary
@@ -187,4 +187,4 @@ namespace dml
     };
 }  // namespace dml
 
-#endif // DML_JOB_VIEW_HPP
+#endif  // DML_JOB_VIEW_HPP
diff --git a/sources/c_api/include/macros.hpp b/sources/c_api/include/macros.hpp
index 639d043..ef26c19 100644
--- a/sources/c_api/include/macros.hpp
+++ b/sources/c_api/include/macros.hpp
@@ -23,8 +23,8 @@
     if (!(p))         \
     return DML_STATUS_NULL_POINTER_ERROR
 
-#define CHECK_PATH(p)                                                              \
+#define CHECK_PATH(p)                                                     \
     if (DML_PATH_AUTO != (p) && DML_PATH_SW != (p) && DML_PATH_HW != (p)) \
     return DML_STATUS_NULL_POINTER_ERROR
 
-#endif // DML_MACROS_HPP
+#endif  // DML_MACROS_HPP
diff --git a/sources/c_api/include/range_check.hpp b/sources/c_api/include/range_check.hpp
index ee19e26..11a4790 100644
--- a/sources/c_api/include/range_check.hpp
+++ b/sources/c_api/include/range_check.hpp
@@ -85,6 +85,8 @@ namespace dml
 
     inline dml_status_t range_check_crc(const uint8_t* src1, const uint32_t* crc_ptr, const uint32_t src_size) noexcept
     {
+        static_cast<void>(src1);
+        static_cast<void>(src_size);
         if (crc_ptr == nullptr)
         {
             return DML_STATUS_NULL_POINTER_ERROR;
@@ -100,6 +102,9 @@ namespace dml
                                              const uint32_t* crc_ptr,
                                              const uint32_t  src_size) noexcept
     {
+        static_cast<void>(src1);
+        static_cast<void>(dst1);
+        static_cast<void>(src_size);
         if (crc_ptr == nullptr)
         {
             return DML_STATUS_NULL_POINTER_ERROR;
diff --git a/sources/c_api/include/state.hpp b/sources/c_api/include/state.hpp
index 6b4a67c..6ae568d 100644
--- a/sources/c_api/include/state.hpp
+++ b/sources/c_api/include/state.hpp
@@ -18,17 +18,16 @@
 #define DML_STATE_HPP
 
 #include <cstddef>
-#include <dml/cpp/middle_layer/descriptor.hpp>
-#include <dml/cpp/middle_layer/completion_record.hpp>
+#include <dml/detail/common/types.hpp>
 
 namespace dml
 {
     struct state
     {
-        ml::descriptor dsc;
-        ml::completion_record record;
-        dml_path_t path;
+        dml::detail::ml::operation dsc;
+        dml::detail::ml::result    record;
+        dml_path_t                 path;
     };
 }  // namespace dml
 
-#endif // DML_STATE_HPP
+#endif  // DML_STATE_HPP
diff --git a/sources/c_api/include/status.hpp b/sources/c_api/include/status.hpp
index a791432..51e8dc1 100644
--- a/sources/c_api/include/status.hpp
+++ b/sources/c_api/include/status.hpp
@@ -19,123 +19,123 @@
 
 #include <dml/dml.h>
 
-#include <dml/cpp/middle_layer/status.hpp>
+#include <dml/detail/common/status.hpp>
 
 namespace dml
 {
-    inline dml_status_t to_own_status(ml::execution_status status) noexcept
+    inline dml_status_t to_own_status(detail::execution_status status) noexcept
     {
         switch (status)
         {
-            case ml::execution_status::success:
+            case detail::execution_status::success:
                 {
                     return DML_STATUS_OK;
                 }
-            case ml::execution_status::false_predicate_success:
+            case detail::execution_status::false_predicate_success:
                 {
                     return DML_STATUS_FALSE_PREDICATE_OK;
                 }
-            case ml::execution_status::page_fault_during_processing:
+            case detail::execution_status::page_fault_during_processing:
                 {
                     return DML_STATUS_PAGE_FAULT_ERROR;
                 }
-            case ml::execution_status::page_response_error:
+            case detail::execution_status::page_response_error:
                 {
                     return DML_STATUS_INTERNAL_ERROR;
                 }
-            case ml::execution_status::batch_error:
+            case detail::execution_status::batch_error:
                 {
                     return DML_STATUS_BATCH_ERROR;
                 }
-            case ml::execution_status::batch_page_fault_error:
+            case detail::execution_status::batch_page_fault_error:
                 {
                     return DML_STATUS_PAGE_FAULT_ERROR;
                 }
-            case ml::execution_status::offset_order_error:
+            case detail::execution_status::offset_order_error:
                 {
                     return DML_STATUS_DELTA_ASCENDT_ERROR;
                 }
-            case ml::execution_status::offset_overflow:
+            case detail::execution_status::offset_overflow:
                 {
                     return DML_STATUS_DELTA_OFFSET_ERROR;
                 }
-            case ml::execution_status::dif_control_error:
+            case detail::execution_status::dif_control_error:
                 {
                     return DML_STATUS_DIF_CHECK_ERROR;
                 }
-            case ml::execution_status::operation_error:
+            case detail::execution_status::operation_error:
                 {
                     return DML_STATUS_JOB_OPERATION_ERROR;
                 }
-            case ml::execution_status::flag_error:
+            case detail::execution_status::flag_error:
                 {
                     return DML_STATUS_JOB_FLAGS_ERROR;
                 }
-            case ml::execution_status::non_zero_reserved_field_error:
+            case detail::execution_status::non_zero_reserved_field_error:
                 {
                     return DML_STATUS_INTERNAL_ERROR;
                 }
-            case ml::execution_status::invalid_transfer_size_error:
+            case detail::execution_status::invalid_transfer_size_error:
                 {
                     return DML_STATUS_JOB_LENGTH_ERROR;
                 }
-            case ml::execution_status::descriptor_count_error:
+            case detail::execution_status::descriptor_count_error:
                 {
                     return DML_STATUS_BATCH_SIZE_ERROR;
                 }
-            case ml::execution_status::delta_size_error:
+            case detail::execution_status::delta_size_error:
                 {
                     return DML_STATUS_DELTA_INPUT_SIZE_ERROR;
                 }
-            case ml::execution_status::buffers_overlap:
+            case detail::execution_status::buffers_overlap:
                 {
                     return DML_STATUS_OVERLAPPING_BUFFER_ERROR;
                 }
-            case ml::execution_status::dualcast_misalign_error:
+            case detail::execution_status::dualcast_misalign_error:
                 {
                     return DML_STATUS_DUALCAST_ALIGN_ERROR;
                 }
-            case ml::execution_status::descriptor_list_align_error:
+            case detail::execution_status::descriptor_list_align_error:
                 {
                     return DML_STATUS_INTERNAL_ERROR;
                 }
-            case ml::execution_status::invalid_interrupt_handle:
+            case detail::execution_status::invalid_interrupt_handle:
                 {
                     return DML_STATUS_INTERNAL_ERROR;
                 }
-            case ml::execution_status::page_fault_on_translation:
+            case detail::execution_status::page_fault_on_translation:
                 {
                     return DML_STATUS_INTERNAL_ERROR;
                 }
-            case ml::execution_status::completion_record_align_error:
+            case detail::execution_status::completion_record_align_error:
                 {
                     return DML_STATUS_INTERNAL_ERROR;
                 }
-            case ml::execution_status::misalign_address_error:
+            case detail::execution_status::misalign_address_error:
                 {
                     return DML_STATUS_INTERNAL_ERROR;
                 }
-            case ml::execution_status::privilege_error:
+            case detail::execution_status::privilege_error:
                 {
                     return DML_STATUS_INTERNAL_ERROR;
                 }
-            case ml::execution_status::traffic_class_error:
+            case detail::execution_status::traffic_class_error:
                 {
                     return DML_STATUS_INTERNAL_ERROR;
                 }
-            case ml::execution_status::readback_translation_error:
+            case detail::execution_status::readback_translation_error:
                 {
                     return DML_STATUS_INTERNAL_ERROR;
                 }
-            case ml::execution_status::operation_readback_timeout:
+            case detail::execution_status::operation_readback_timeout:
                 {
                     return DML_STATUS_INTERNAL_ERROR;
                 }
-            case ml::execution_status::hardware_timeout:
+            case detail::execution_status::hardware_timeout:
                 {
                     return DML_STATUS_INTERNAL_ERROR;
                 }
-            case ml::execution_status::address_translation_error:
+            case detail::execution_status::address_translation_error:
                 {
                     return DML_STATUS_INTERNAL_ERROR;
                 }
@@ -146,48 +146,50 @@ namespace dml
         }
     }
 
-    inline dml_status_t to_own_status(ml::submission_status status) noexcept
+    inline dml_status_t to_own_status(detail::submission_status status) noexcept
     {
         switch (status)
         {
-            case ml::submission_status::success:
+            case detail::submission_status::success:
                 return DML_STATUS_OK;
-            case ml::submission_status::failure:
-                return DML_STATUS_INSTANCE_NOT_FOUND;
+            case detail::submission_status::queue_busy:
+                return DML_STATUS_WORK_QUEUE_OVERFLOW_ERROR;
+            case detail::submission_status::failure:
+                return DML_STATUS_WORK_QUEUES_NOT_AVAILABLE;
             default:
                 return DML_STATUS_INTERNAL_ERROR;
         }
     }
 
-    inline dml_status_t to_own_status(ml::validation_status status) noexcept
+    inline dml_status_t to_own_status(detail::validation_status status) noexcept
     {
         switch (status)
         {
-            case ml::validation_status::success:
+            case detail::validation_status::success:
                 return DML_STATUS_OK;
-            case ml::validation_status::address_is_null:
+            case detail::validation_status::null_address:
                 return DML_STATUS_NULL_POINTER_ERROR;
-            case ml::validation_status::size_is_null:
+            case detail::validation_status::null_size:
                 return DML_STATUS_JOB_LENGTH_ERROR;
-            case ml::validation_status::delta_size_is_wrong:
+            case detail::validation_status::wrong_size:
                 return DML_STATUS_DELTA_INPUT_SIZE_ERROR;
-            case ml::validation_status::buffers_overlap:
+            case detail::validation_status::overlapping:
                 return DML_STATUS_OVERLAPPING_BUFFER_ERROR;
-            case ml::validation_status::address_is_misaligned:
+            case detail::validation_status::misalignment:
                 return DML_STATUS_DELTA_ALIGN_ERROR;
-            case ml::validation_status::delta_input_size_is_wrong:
-                return DML_STATUS_DELTA_ALIGN_ERROR;
-            case ml::validation_status::delta_input_size_overflow:
+            case detail::validation_status::large_size:
                 return DML_STATUS_DELTA_INPUT_SIZE_ERROR;
-            case ml::validation_status::delta_record_size_is_wrong:
+            case detail::validation_status::wrong_delta_size:
                 return DML_STATUS_DELTA_RECORD_SIZE_ERROR;
-            case ml::validation_status::dualcast_address_is_wrong:
+            case detail::validation_status::wrong_dualcast_address:
                 return DML_STATUS_DUALCAST_ALIGN_ERROR;
-            case ml::validation_status::batch_size_is_wrong:
+            case detail::validation_status::wrong_batch_size:
                 return DML_STATUS_BATCH_SIZE_ERROR;
-            case ml::validation_status::dif_size_is_wrong:
+            case detail::validation_status::wrong_dif_size:
                 return DML_STATUS_JOB_LENGTH_ERROR;
-            case ml::validation_status::unsupported_operation:
+            case detail::validation_status::dif_strip_adjacent:
+                return DML_STATUS_DIF_STRIP_ADJACENT_ERROR;
+            case detail::validation_status::unsupported_operation:
                 return DML_STATUS_JOB_OPERATION_ERROR;
             default:
                 return DML_STATUS_INTERNAL_ERROR;
diff --git a/sources/c_api/include/utils.hpp b/sources/c_api/include/utils.hpp
index b942afd..e0fbbb8 100644
--- a/sources/c_api/include/utils.hpp
+++ b/sources/c_api/include/utils.hpp
@@ -33,4 +33,4 @@ namespace dml
     }
 }  // namespace dml
 
-#endif // DML_UTILS_HPP
+#endif  // DML_UTILS_HPP
diff --git a/sources/c_api/include/write_descriptor.hpp b/sources/c_api/include/write_descriptor.hpp
index a634531..b4ce4f7 100644
--- a/sources/c_api/include/write_descriptor.hpp
+++ b/sources/c_api/include/write_descriptor.hpp
@@ -17,7 +17,9 @@
 #ifndef DML_MAKE_DESCRIPTOR_HPP
 #define DML_MAKE_DESCRIPTOR_HPP
 
-#include <dml/cpp/middle_layer/make_descriptor.hpp>
+#include <dml/detail/ml/operation.hpp>
+
+#include "utils.hpp"
 
 namespace dml
 {
@@ -26,157 +28,157 @@ namespace dml
         switch (job.operation())
         {
             case DML_OP_NOP:
-                job.state().dsc = ml::make_nop_descriptor(ml::nop_options(job.flags()));
+                job.state().dsc = detail::ml::make_nop_operation(detail::ml::nop_options(job.flags()));
                 break;
             case DML_OP_BATCH:
-                job.state().dsc =
-                    ml::make_batch_descriptor(reinterpret_cast<const ml::descriptor*>(job.destination_first()),
-                                              job.destination_length() / (sizeof(dml::ml::descriptor) + sizeof(dml::ml::completion_record)),
-                                              ml::batch_options(job.flags()));
+                job.state().dsc = detail::ml::make_batch_operation(
+                    reinterpret_cast<const dml::detail::ml::operation*>(dml::align(job.destination_first())),
+                    job.destination_length() / (sizeof(dml::detail::ml::operation) + sizeof(dml::detail::ml::result)),
+                    detail::ml::batch_options(job.flags()));
                 break;
             case DML_OP_DRAIN:
-                job.state().dsc = ml::make_drain_descriptor(reinterpret_cast<ml::address_t>(job.destination_first()),
-                                                            reinterpret_cast<ml::address_t>(job.destination_second()),
-                                                            ml::drain_options(job.flags()),
-                                                            ml::drain_additional_options(job.specific_flags()));
+                job.state().dsc = detail::ml::make_drain_operation(reinterpret_cast<detail::address_t>(job.destination_first()),
+                                                                   reinterpret_cast<detail::address_t>(job.destination_second()),
+                                                                   detail::ml::drain_options(job.flags()),
+                                                                   detail::ml::drain_specific_options(job.specific_flags()));
                 break;
             case DML_OP_MEM_MOVE:
-                job.state().dsc = ml::make_mem_move_descriptor(job.source_first(),
-                                                               job.destination_first(),
-                                                               job.source_length(),
-                                                               ml::mem_move_options(job.flags()));
+                job.state().dsc = detail::ml::make_mem_move_operation(job.source_first(),
+                                                                      job.destination_first(),
+                                                                      job.source_length(),
+                                                                      detail::ml::mem_move_options(job.flags()));
                 break;
             case DML_OP_FILL:
-                job.state().dsc = ml::make_fill_descriptor(job.pattern(),
-                                                           job.destination_first(),
-                                                           job.destination_length(),
-                                                           ml::fill_options(job.flags()));
+                job.state().dsc = detail::ml::make_fill_operation(job.pattern(),
+                                                                  job.destination_first(),
+                                                                  job.destination_length(),
+                                                                  detail::ml::fill_options(job.flags()));
                 break;
             case DML_OP_DUALCAST:
-                job.state().dsc = ml::make_dualcast_descriptor(job.source_first(),
-                                                               job.destination_first(),
-                                                               job.destination_second(),
-                                                               job.source_length(),
-                                                               ml::dualcast_options(job.flags()),
-                                                               ml::dualcast_additional_options(job.specific_flags()));
+                job.state().dsc = detail::ml::make_dualcast_operation(job.source_first(),
+                                                                      job.destination_first(),
+                                                                      job.destination_second(),
+                                                                      job.source_length(),
+                                                                      detail::ml::dualcast_options(job.flags()),
+                                                                      detail::ml::dualcast_specific_options(job.specific_flags()));
                 break;
             case DML_OP_COMPARE:
-                job.state().dsc = ml::make_compare_descriptor(job.source_first(),
-                                                              job.source_second(),
-                                                              job.source_length(),
-                                                              ml::compare_options(job.flags()),
-                                                              ml::compare_expected_result_options(job.expected_result()));
+                job.state().dsc = detail::ml::make_compare_operation(job.source_first(),
+                                                                     job.source_second(),
+                                                                     job.source_length(),
+                                                                     detail::ml::compare_options(job.flags()),
+                                                                     detail::compare_result(job.expected_result()));
                 break;
             case DML_OP_COMPARE_PATTERN:
-                job.state().dsc = ml::make_compare_pattern_descriptor(job.pattern(),
-                                                                      job.source_first(),
-                                                                      job.source_length(),
-                                                                      ml::compare_pattern_options(job.flags()),
-                                                                      ml::compare_expected_result_options(job.expected_result()));
+                job.state().dsc = detail::ml::make_compare_pattern_operation(job.pattern(),
+                                                                             job.source_first(),
+                                                                             job.source_length(),
+                                                                             detail::ml::compare_pattern_options(job.flags()),
+                                                                             detail::compare_result(job.expected_result()));
                 break;
             case DML_OP_CRC:
                 {
-                    uint32_t crc_seed = 0;
-                    auto crc_options = ml::crc_additional_options(job.specific_flags());
+                    uint32_t crc_seed    = 0;
+                    auto     crc_options = detail::ml::crc_specific_options(job.specific_flags());
 
-                    if (crc_options.contains(ml::crc_additional_option::read_crc_seed))
+                    if (intersects(job.specific_flags(), detail::crc_specific_flag::read_crc_seed))
                     {
                         crc_seed = *job.crc_ptr();
                     }
 
                     // Erase read_crc_seed flag from enabled options
-                    uint8_t mask = 0xFF ^ static_cast<uint8_t>(ml::crc_additional_option::read_crc_seed);
-                    crc_options = ml::crc_additional_options(static_cast<uint8_t>(crc_options) & mask);
+                    uint8_t mask = 0xFF ^ to_underlying(detail::crc_specific_flag::read_crc_seed);
+                    crc_options  = detail::ml::crc_specific_options(static_cast<uint8_t>(crc_options) & mask);
 
-                    job.state().dsc = ml::make_crc_descriptor(job.source_first(),
-                                                              job.source_length(),
-                                                              crc_seed,
-                                                              ml::crc_options(job.flags()),
-                                                              crc_options);
+                    job.state().dsc = detail::ml::make_crc_operation(job.source_first(),
+                                                                     job.source_length(),
+                                                                     crc_seed,
+                                                                     detail::ml::crc_options(job.flags()),
+                                                                     crc_options);
                     break;
                 }
             case DML_OP_COPY_CRC:
                 {
-                    uint32_t crc_seed = 0;
-                    auto crc_options = ml::copy_crc_additional_options(job.specific_flags());
+                    uint32_t crc_seed    = 0;
+                    auto     crc_options = detail::ml::copy_crc_specific_options(job.specific_flags());
 
-                    if (crc_options.contains(ml::crc_additional_option::read_crc_seed))
+                    if (intersects(job.specific_flags(), detail::crc_specific_flag::read_crc_seed))
                     {
                         crc_seed = *job.crc_ptr();
                     }
 
                     // Erase read_crc_seed flag from enabled options
-                    uint8_t mask = 0xFF ^ static_cast<uint8_t>(ml::crc_additional_option::read_crc_seed);
-                    crc_options = ml::copy_crc_additional_options(static_cast<uint8_t>(crc_options) & mask);
+                    uint8_t mask = 0xFF ^ to_underlying(detail::crc_specific_flag::read_crc_seed);
+                    crc_options  = detail::ml::copy_crc_specific_options(static_cast<uint8_t>(crc_options) & mask);
 
-                    job.state().dsc = ml::make_copy_crc_descriptor(job.source_first(),
-                                                                   job.destination_first(),
-                                                                   job.source_length(),
-                                                                   crc_seed,
-                                                                   ml::copy_crc_options(job.flags()),
-                                                                   crc_options);
+                    job.state().dsc = detail::ml::make_copy_crc_operation(job.source_first(),
+                                                                          job.destination_first(),
+                                                                          job.source_length(),
+                                                                          crc_seed,
+                                                                          detail::ml::copy_crc_options(job.flags()),
+                                                                          crc_options);
                 }
                 break;
             case DML_OP_DELTA_CREATE:
-                job.state().dsc = ml::make_create_delta_descriptor(job.source_first(),
-                                                                   job.source_second(),
-                                                                   job.source_length(),
-                                                                   job.destination_first(),
-                                                                   job.destination_length(),
-                                                                   ml::create_delta_options(job.flags()),
-                                                                   ml::delta_expected_result_options(job.expected_result()));
+                job.state().dsc = detail::ml::make_create_delta_operation(job.source_first(),
+                                                                          job.source_second(),
+                                                                          job.source_length(),
+                                                                          job.destination_first(),
+                                                                          job.destination_length(),
+                                                                          detail::ml::create_delta_options(job.flags()),
+                                                                          detail::create_delta_result(job.expected_result()));
                 break;
             case DML_OP_DELTA_APPLY:
-                job.state().dsc = ml::make_apply_delta_descriptor(job.source_first(),
-                                                                  job.source_length(),
-                                                                  job.destination_first(),
-                                                                  job.destination_length(),
-                                                                  ml::apply_delta_options(job.flags()));
+                job.state().dsc = detail::ml::make_apply_delta_operation(job.source_first(),
+                                                                         job.source_length(),
+                                                                         job.destination_first(),
+                                                                         job.destination_length(),
+                                                                         detail::ml::apply_delta_options(job.flags()));
                 break;
             case DML_OP_DIF_CHECK:
-                job.state().dsc = ml::make_dif_check_descriptor(job.source_first(),
-                                                                job.source_length(),
-                                                                { job.src_ref_tag(), job.src_app_tag_mask(), job.src_app_tag() },
-                                                                ml::dif_check_options(job.flags()),
-                                                                ml::dif_additional_options(job.dif_flags()),
-                                                                ml::dif_additional_src_options(job.dif_src_flags()));
+                job.state().dsc = detail::ml::make_dif_check_operation(job.source_first(),
+                                                                       job.source_length(),
+                                                                       { job.src_ref_tag(), job.src_app_tag_mask(), job.src_app_tag() },
+                                                                       detail::ml::dif_check_options(job.flags()),
+                                                                       detail::ml::dif_specific_options(job.dif_flags()),
+                                                                       detail::ml::dif_source_options(job.dif_src_flags()));
                 break;
             case DML_OP_DIF_INSERT:
-                job.state().dsc = ml::make_dif_insert_descriptor(job.source_first(),
-                                                                 job.destination_first(),
-                                                                 job.source_length(),
-                                                                 { job.dst_ref_tag(), job.dst_app_tag_mask(), job.dst_app_tag() },
-                                                                 ml::dif_insert_options(job.flags()),
-                                                                 ml::dif_additional_options(job.dif_flags()),
-                                                                 ml::dif_additional_dst_options(job.dif_dst_flags()));
+                job.state().dsc = detail::ml::make_dif_insert_operation(job.source_first(),
+                                                                        job.destination_first(),
+                                                                        job.source_length(),
+                                                                        { job.dst_ref_tag(), job.dst_app_tag_mask(), job.dst_app_tag() },
+                                                                        detail::ml::dif_insert_options(job.flags()),
+                                                                        detail::ml::dif_specific_options(job.dif_flags()),
+                                                                        detail::ml::dif_destination_options(job.dif_dst_flags()));
                 break;
             case DML_OP_DIF_STRIP:
-                job.state().dsc = ml::make_dif_strip_descriptor(job.source_first(),
-                                                                job.destination_first(),
-                                                                job.source_length(),
-                                                                { job.src_ref_tag(), job.src_app_tag_mask(), job.src_app_tag() },
-                                                                ml::dif_strip_options(job.flags()),
-                                                                ml::dif_additional_options(job.dif_flags()),
-                                                                ml::dif_additional_src_options(job.dif_src_flags()));
+                job.state().dsc = detail::ml::make_dif_strip_operation(job.source_first(),
+                                                                       job.destination_first(),
+                                                                       job.source_length(),
+                                                                       { job.src_ref_tag(), job.src_app_tag_mask(), job.src_app_tag() },
+                                                                       detail::ml::dif_strip_options(job.flags()),
+                                                                       detail::ml::dif_specific_options(job.dif_flags()),
+                                                                       detail::ml::dif_source_options(job.dif_src_flags()));
                 break;
             case DML_OP_DIF_UPDATE:
-                job.state().dsc = ml::make_dif_update_descriptor(job.source_first(),
-                                                                 job.destination_first(),
-                                                                 job.source_length(),
-                                                                 { job.src_ref_tag(), job.src_app_tag_mask(), job.src_app_tag() },
-                                                                 { job.dst_ref_tag(), job.dst_app_tag_mask(), job.dst_app_tag() },
-                                                                 ml::dif_update_options(job.flags()),
-                                                                 ml::dif_additional_options(job.dif_flags()),
-                                                                 ml::dif_additional_src_options(job.dif_src_flags()),
-                                                                 ml::dif_additional_dst_options(job.dif_dst_flags()));
+                job.state().dsc = detail::ml::make_dif_update_operation(job.source_first(),
+                                                                        job.destination_first(),
+                                                                        job.source_length(),
+                                                                        { job.src_ref_tag(), job.src_app_tag_mask(), job.src_app_tag() },
+                                                                        { job.dst_ref_tag(), job.dst_app_tag_mask(), job.dst_app_tag() },
+                                                                        detail::ml::dif_update_options(job.flags()),
+                                                                        detail::ml::dif_specific_options(job.dif_flags()),
+                                                                        detail::ml::dif_source_options(job.dif_src_flags()),
+                                                                        detail::ml::dif_destination_options(job.dif_dst_flags()));
                 break;
             case DML_OP_CACHE_FLUSH:
-                job.state().dsc = ml::make_cache_flush_descriptor(job.destination_first(),
-                                                                  job.destination_length(),
-                                                                  ml::cache_flush_options(job.flags()));
+                job.state().dsc = detail::ml::make_cache_flush_operation(job.destination_first(),
+                                                                         job.destination_length(),
+                                                                         detail::ml::cache_flush_options(job.flags()));
                 break;
             default:
-                job.state().dsc = ml::descriptor{};
+                job.state().dsc = dml::detail::ml::operation{};
         }
     }
 }  // namespace dml
diff --git a/sources/c_api/include/write_result.hpp b/sources/c_api/include/write_result.hpp
index c389174..9c63faf 100644
--- a/sources/c_api/include/write_result.hpp
+++ b/sources/c_api/include/write_result.hpp
@@ -17,8 +17,6 @@
 #ifndef DML_WRITE_RESULT_HPP
 #define DML_WRITE_RESULT_HPP
 
-#include <dml/cpp/middle_layer/make_descriptor.hpp>
-
 #include "status.hpp"
 
 namespace dml
@@ -98,130 +96,100 @@ namespace dml
 
     inline dml_status_t write_result_nop(job_view job) noexcept
     {
-        auto result_view = ml::views::nop_result(job.state().record);
-
-        return to_own_status(static_cast<ml::execution_status>(result_view.status()));
+        return to_own_status(dml::detail::ml::get_status(job.state().record));
     }
 
     inline dml_status_t write_result_batch(job_view job) noexcept
     {
-        auto result_view = ml::views::batch_result(job.state().record);
-
-        return to_own_status(static_cast<ml::execution_status>(result_view.status()));
+        return to_own_status(dml::detail::ml::get_status(job.state().record));
     }
 
     inline dml_status_t write_result_drain(job_view job) noexcept
     {
-        auto result_view = ml::views::drain_result(job.state().record);
-
-        return to_own_status(static_cast<ml::execution_status>(result_view.status()));
+        return to_own_status(dml::detail::ml::get_status(job.state().record));
     }
 
     inline dml_status_t write_result_mem_move(job_view job) noexcept
     {
-        auto result_view = ml::views::mem_move_result(job.state().record);
+        job.set_result(detail::ml::get_result(job.state().record));
 
-        job.set_result(result_view.result());
-
-        return to_own_status(static_cast<ml::execution_status>(result_view.status()));
+        return to_own_status(dml::detail::ml::get_status(job.state().record));
     }
 
     inline dml_status_t write_result_fill(job_view job) noexcept
     {
-        auto result_view = ml::views::fill_result(job.state().record);
-
-        return to_own_status(static_cast<ml::execution_status>(result_view.status()));
+        return to_own_status(dml::detail::ml::get_status(job.state().record));
     }
 
     inline dml_status_t write_result_dualcast(job_view job) noexcept
     {
-        auto result_view = ml::views::fill_result(job.state().record);
-
-        return to_own_status(static_cast<ml::execution_status>(result_view.status()));
+        return to_own_status(dml::detail::ml::get_status(job.state().record));
     }
 
     inline dml_status_t write_result_crc(job_view job) noexcept
     {
-        auto result_view = ml::views::crc_result(job.state().record);
-
-        job.set_crc(result_view.crc_value());
+        job.set_crc(dml::detail::ml::get_crc_value(job.state().record));
 
-        return to_own_status(static_cast<ml::execution_status>(result_view.status()));
+        return to_own_status(dml::detail::ml::get_status(job.state().record));
     }
 
     inline dml_status_t write_result_compare(job_view job) noexcept
     {
-        auto result_view = ml::views::compare_result(job.state().record);
+        job.set_offset(dml::detail::ml::get_bytes_completed(job.state().record));
+        job.set_result(dml::detail::ml::get_result(job.state().record));
 
-        job.set_offset(result_view.bytes_completed());
-        job.set_result(result_view.result());
-
-        return to_own_status(static_cast<ml::execution_status>(result_view.status()));
+        return to_own_status(dml::detail::ml::get_status(job.state().record));
     }
 
     inline dml_status_t write_result_create_delta(job_view job) noexcept
     {
-        auto result_view = ml::views::create_delta_result(job.state().record);
-
         //job.set_offset();
-        job.set_destination_length(result_view.delta_record_size());
-        job.set_result(result_view.result());
+        job.set_destination_length(dml::detail::ml::get_delta_record_size(job.state().record));
+        job.set_result(dml::detail::ml::get_result(job.state().record));
         job.set_offset(*reinterpret_cast<uint16_t *>(job.destination_first()));
 
-        return to_own_status(static_cast<ml::execution_status>(result_view.status()));
+        return to_own_status(dml::detail::ml::get_status(job.state().record));
     }
 
     inline dml_status_t write_result_apply_delta(job_view job) noexcept
     {
-        auto result_view = ml::views::apply_delta_result(job.state().record);
-
-        return to_own_status(static_cast<ml::execution_status>(result_view.status()));
+        return to_own_status(dml::detail::ml::get_status(job.state().record));
     }
 
     inline dml_status_t write_result_dif_check(job_view job) noexcept
     {
-        auto result_view = ml::views::dif_check_result(job.state().record);
-
-        job.set_offset(result_view.bytes_completed());
-        job.set_result(result_view.dif_status());
+        job.set_offset(dml::detail::ml::get_bytes_completed(job.state().record));
+        job.set_result(dml::detail::ml::get_result(job.state().record));
 
-        return to_own_status(static_cast<ml::execution_status>(result_view.status()));
+        return to_own_status(dml::detail::ml::get_status(job.state().record));
     }
 
     inline dml_status_t write_result_dif_insert(job_view job) noexcept
     {
-        auto result_view = ml::views::dif_insert_result(job.state().record);
+        job.set_offset(dml::detail::ml::get_bytes_completed(job.state().record));
 
-        job.set_offset(result_view.bytes_completed());
-
-        return to_own_status(static_cast<ml::execution_status>(result_view.status()));
+        return to_own_status(dml::detail::ml::get_status(job.state().record));
     }
 
     inline dml_status_t write_result_dif_strip(job_view job) noexcept
     {
-        auto result_view = ml::views::dif_strip_result(job.state().record);
-
-        job.set_offset(result_view.bytes_completed());
-        job.set_result(result_view.dif_status());
+        job.set_offset(dml::detail::ml::get_bytes_completed(job.state().record));
+        job.set_result(dml::detail::ml::get_result(job.state().record));
 
-        return to_own_status(static_cast<ml::execution_status>(result_view.status()));
+        return to_own_status(dml::detail::ml::get_status(job.state().record));
     }
 
     inline dml_status_t write_result_dif_update(job_view job) noexcept
     {
-        auto result_view = ml::views::dif_update_result(job.state().record);
+        job.set_offset(dml::detail::ml::get_bytes_completed(job.state().record));
+        job.set_result(dml::detail::ml::get_result(job.state().record));
 
-        job.set_offset(result_view.bytes_completed());
-        job.set_result(result_view.dif_status());
-
-        return to_own_status(static_cast<ml::execution_status>(result_view.status()));
+        return to_own_status(dml::detail::ml::get_status(job.state().record));
     }
 
     inline dml_status_t write_result_cache_flush(job_view job) noexcept
     {
-        auto result_view = ml::views::cache_flush_result(job.state().record);
-
-        return to_own_status(static_cast<ml::execution_status>(result_view.status()));
+        return to_own_status(dml::detail::ml::get_status(job.state().record));
     }
 }  // namespace dml
 
diff --git a/sources/core/CMakeLists.txt b/sources/core/CMakeLists.txt
new file mode 100644
index 0000000..ac16653
--- /dev/null
+++ b/sources/core/CMakeLists.txt
@@ -0,0 +1,84 @@
+#
+# Copyright 2020-2021 Intel Corporation.
+#
+# This software and the related documents are Intel copyrighted materials,
+# and your use of them is governed by the express license under which they
+# were provided to you ("License"). Unless the License provides otherwise,
+# you may not use, modify, copy, publish, distribute, disclose or transmit
+# this software or the related documents without Intel's prior written
+# permission.
+#
+# This software and the related documents are provided as is, with no
+# express or implied warranties, other than those that are expressly
+# stated in the License.
+#
+
+add_library(dml_core OBJECT
+        # Core sources
+        src/software_device.cpp
+        src/hardware_device.cpp
+        src/nop.cpp
+        src/batch.cpp
+        src/drain.cpp
+        src/mem_move.cpp
+        src/fill.cpp
+        src/compare.cpp
+        src/compare_pattern.cpp
+        src/create_delta.cpp
+        src/apply_delta.cpp
+        src/dualcast.cpp
+        src/crc.cpp
+        src/copy_crc.cpp
+        src/dif_check.cpp
+        src/dif_insert.cpp
+        src/dif_strip.cpp
+        src/dif_update.cpp
+        src/cache_flush.cpp
+        src/kernels.hpp
+        src/validation.cpp
+
+        include/core/operations.hpp
+        include/core/descriptor_views.hpp
+        include/core/completion_record_views.hpp
+        include/core/validation.hpp
+        include/core/device.hpp
+        include/core/types.hpp
+        )
+
+target_link_libraries(dml_core
+        PRIVATE dml_hw_dispatcher
+        PRIVATE dml_sw_dispatcher
+        PRIVATE dml_dif_impl
+        )
+target_include_directories(dml_core
+        PUBLIC include
+        PUBLIC ../../include
+        )
+target_sources(dml_core
+        PUBLIC $<TARGET_OBJECTS:dml_sw_dispatcher>
+        PUBLIC $<TARGET_PROPERTY:dml_sw_dispatcher,INTERFACE_SOURCES>
+
+        PUBLIC $<TARGET_OBJECTS:dml_hw_dispatcher>
+        PUBLIC $<TARGET_PROPERTY:dml_hw_dispatcher,INTERFACE_SOURCES>
+
+        PUBLIC $<TARGET_OBJECTS:dml_dif_impl>
+        PUBLIC $<TARGET_PROPERTY:dml_dif_impl,INTERFACE_SOURCES>
+        )
+target_compile_features(dml_core
+        PUBLIC cxx_std_17
+        )
+target_compile_options(dml_core
+        PRIVATE ${DML_QUALITY_OPTIONS}
+        PRIVATE ${DML_CPP_PRIVATE_OPTIONS}
+        )
+target_compile_definitions(dml_core
+        PRIVATE $<TARGET_PROPERTY:dml_hw_dispatcher,INTERFACE_COMPILE_DEFINITIONS>
+        )
+
+if (DML_HW)
+    target_compile_definitions(dml_core PRIVATE DML_HW)
+endif ()
+
+add_subdirectory(src/sw_dispatcher)
+add_subdirectory(src/hw_dispatcher)
+add_subdirectory(src/dif_impl)
diff --git a/include/dml/cpp/middle_layer/result_views.hpp b/sources/core/include/core/completion_record_views.hpp
similarity index 64%
rename from include/dml/cpp/middle_layer/result_views.hpp
rename to sources/core/include/core/completion_record_views.hpp
index 91da013..d1237e9 100644
--- a/include/dml/cpp/middle_layer/result_views.hpp
+++ b/sources/core/include/core/completion_record_views.hpp
@@ -1,33 +1,27 @@
 /*
-* Copyright 2021 Intel Corporation.
-*
-* This software and the related documents are Intel copyrighted materials,
-* and your use of them is governed by the express license under which they
-* were provided to you ("License"). Unless the License provides otherwise,
-* you may not use, modify, copy, publish, distribute, disclose or transmit
-* this software or the related documents without Intel's prior written
-* permission.
-*
-* This software and the related documents are provided as is, with no
-* express or implied warranties, other than those that are expressly
-* stated in the License.
-*
-*/
-
-/**
- * @date 05/19/2021
- * @brief Contains definitions of @ref dml::ml::completion_record type
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
  */
 
-#ifndef DML_ML_RESULT_VIEWS_HPP
-#define DML_ML_RESULT_VIEWS_HPP
+#ifndef DML_CORE_COMPLETION_RECORD_VIEWS_HPP
+#define DML_CORE_COMPLETION_RECORD_VIEWS_HPP
 
-#include "completion_record.hpp"
-#include "types.hpp"
+#include <core/types.hpp>
 
-namespace dml::ml::views
+namespace dml::core
 {
-    class any_result
+    class any_completion_record
     {
     private:
         struct offsets
@@ -39,7 +33,7 @@ namespace dml::ml::views
         };
 
     public:
-        explicit any_result(completion_record& record): record_(record)
+        explicit any_completion_record(completion_record& record): record_(record)
         {
         }
 
@@ -67,21 +61,21 @@ namespace dml::ml::views
         completion_record& record_;
     };
 
-    class nop_result: public any_result
+    class nop_completion_record: public any_completion_record
     {
     public:
-        using any_result::any_result;
+        using any_completion_record::any_completion_record;
 
     private:
-        using any_result::result;
-        using any_result::bytes_completed;
-        using any_result::fault_address;
+        using any_completion_record::result;
+        using any_completion_record::bytes_completed;
+        using any_completion_record::fault_address;
     };
 
-    class batch_result: public any_result
+    class batch_completion_record: public any_completion_record
     {
     public:
-        using any_result::any_result;
+        using any_completion_record::any_completion_record;
 
         [[nodiscard]] transfer_size_t& descriptors_completed() const noexcept
         {
@@ -89,48 +83,48 @@ namespace dml::ml::views
         }
 
     private:
-        using any_result::result;
+        using any_completion_record::result;
     };
 
-    class drain_result: public any_result
+    class drain_completion_record: public any_completion_record
     {
     public:
-        using any_result::any_result;
+        using any_completion_record::any_completion_record;
 
     private:
-        using any_result::result;
-        using any_result::bytes_completed;
-        using any_result::fault_address;
+        using any_completion_record::result;
+        using any_completion_record::bytes_completed;
+        using any_completion_record::fault_address;
     };
 
-    class mem_move_result: public any_result
+    class mem_move_completion_record: public any_completion_record
     {
     public:
-        using any_result::any_result;
+        using any_completion_record::any_completion_record;
     };
 
-    class fill_result: public any_result
+    class fill_completion_record: public any_completion_record
     {
     public:
-        using any_result::any_result;
+        using any_completion_record::any_completion_record;
 
     private:
-        using any_result::result;
+        using any_completion_record::result;
     };
 
-    class compare_result: public any_result
+    class compare_completion_record: public any_completion_record
     {
     public:
-        using any_result::any_result;
+        using any_completion_record::any_completion_record;
     };
 
-    class compare_pattern_result: public any_result
+    class compare_pattern_completion_record: public any_completion_record
     {
     public:
-        using any_result::any_result;
+        using any_completion_record::any_completion_record;
     };
 
-    class create_delta_result: public any_result
+    class create_delta_completion_record: public any_completion_record
     {
     private:
         struct offsets
@@ -139,7 +133,7 @@ namespace dml::ml::views
         };
 
     public:
-        using any_result::any_result;
+        using any_completion_record::any_completion_record;
 
         [[nodiscard]] transfer_size_t& delta_record_size() const noexcept
         {
@@ -147,25 +141,25 @@ namespace dml::ml::views
         }
     };
 
-    class apply_delta_result: public any_result
+    class apply_delta_completion_record: public any_completion_record
     {
     public:
-        using any_result::any_result;
+        using any_completion_record::any_completion_record;
 
     private:
-        using any_result::result;
+        using any_completion_record::result;
     };
 
-    class dualcast_result: public any_result
+    class dualcast_completion_record: public any_completion_record
     {
     public:
-        using any_result::any_result;
+        using any_completion_record::any_completion_record;
 
     private:
-        using any_result::result;
+        using any_completion_record::result;
     };
 
-    class crc_result: public any_result
+    class crc_completion_record: public any_completion_record
     {
     private:
         struct offsets
@@ -174,7 +168,7 @@ namespace dml::ml::views
         };
 
     public:
-        using any_result::any_result;
+        using any_completion_record::any_completion_record;
 
         [[nodiscard]] crc_value_t& crc_value() const noexcept
         {
@@ -182,10 +176,10 @@ namespace dml::ml::views
         }
 
     private:
-        using any_result::result;
+        using any_completion_record::result;
     };
 
-    class dif_check_result: public any_result
+    class dif_check_completion_record: public any_completion_record
     {
     private:
         struct offsets
@@ -196,7 +190,7 @@ namespace dml::ml::views
         };
 
     public:
-        using any_result::any_result;
+        using any_completion_record::any_completion_record;
 
         [[nodiscard]] dif_status_t& dif_status() const noexcept
         {
@@ -219,10 +213,10 @@ namespace dml::ml::views
         }
 
     private:
-        using any_result::result;
+        using any_completion_record::result;
     };
 
-    class dif_insert_result: public any_result
+    class dif_insert_completion_record: public any_completion_record
     {
     private:
         struct offsets
@@ -233,7 +227,7 @@ namespace dml::ml::views
         };
 
     public:
-        using any_result::any_result;
+        using any_completion_record::any_completion_record;
 
         [[nodiscard]] dif_ref_tag_t& destination_ref_tag() const noexcept
         {
@@ -251,10 +245,10 @@ namespace dml::ml::views
         }
 
     private:
-        using any_result::result;
+        using any_completion_record::result;
     };
 
-    class dif_strip_result: public any_result
+    class dif_strip_completion_record: public any_completion_record
     {
     private:
         struct offsets
@@ -265,7 +259,7 @@ namespace dml::ml::views
         };
 
     public:
-        using any_result::any_result;
+        using any_completion_record::any_completion_record;
 
         [[nodiscard]] dif_status_t& dif_status() const noexcept
         {
@@ -288,10 +282,10 @@ namespace dml::ml::views
         }
 
     private:
-        using any_result::result;
+        using any_completion_record::result;
     };
 
-    class dif_update_result: public any_result
+    class dif_update_completion_record: public any_completion_record
     {
     private:
         struct offsets
@@ -305,7 +299,7 @@ namespace dml::ml::views
         };
 
     public:
-        using any_result::any_result;
+        using any_completion_record::any_completion_record;
 
         [[nodiscard]] dif_status_t& dif_status() const noexcept
         {
@@ -343,17 +337,17 @@ namespace dml::ml::views
         }
 
     private:
-        using any_result::result;
+        using any_completion_record::result;
     };
 
-    class cache_flush_result: public any_result
+    class cache_flush_completion_record: public any_completion_record
     {
     public:
-        using any_result::any_result;
+        using any_completion_record::any_completion_record;
 
     private:
-        using any_result::result;
+        using any_completion_record::result;
     };
-}  // namespace dml::ml::views
+}  // namespace dml::core
 
-#endif  //DML_ML_RESULT_VIEWS_HPP
+#endif  //DML_CORE_COMPLETION_RECORD_VIEWS_HPP
diff --git a/include/dml/cpp/middle_layer/descriptor_views.hpp b/sources/core/include/core/descriptor_views.hpp
similarity index 98%
rename from include/dml/cpp/middle_layer/descriptor_views.hpp
rename to sources/core/include/core/descriptor_views.hpp
index 3047e5a..3d78cc6 100644
--- a/include/dml/cpp/middle_layer/descriptor_views.hpp
+++ b/sources/core/include/core/descriptor_views.hpp
@@ -14,13 +14,12 @@
  *
  */
 
-#ifndef DML_ML_DESCRIPTOR_VIEWS_HPP
-#define DML_ML_DESCRIPTOR_VIEWS_HPP
+#ifndef DML_CORE_DESCRIPTOR_VIEW_HPP
+#define DML_CORE_DESCRIPTOR_VIEW_HPP
 
-#include "descriptor.hpp"
-#include "types.hpp"
+#include <core/types.hpp>
 
-namespace dml::ml::views
+namespace dml::core
 {
     class any_descriptor
     {
@@ -554,6 +553,6 @@ namespace dml::ml::views
     private:
         using any_descriptor::source_address;
     };
-}  // namespace dml::ml::views
+}  // namespace dml::core
 
-#endif  //DML_ML_DESCRIPTOR_VIEWS_HPP
+#endif  //DML_CORE_DESCRIPTOR_VIEW_HPP
diff --git a/sources/core/include/core/device.hpp b/sources/core/include/core/device.hpp
new file mode 100644
index 0000000..21593f6
--- /dev/null
+++ b/sources/core/include/core/device.hpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#ifndef DML_CORE_EXECUTION_DEVICE_HPP
+#define DML_CORE_EXECUTION_DEVICE_HPP
+
+#include <core/types.hpp>
+#include <dml/detail/common/status.hpp>
+
+namespace dml::core
+{
+    class software_device
+    {
+    public:
+        [[nodiscard]] dml::detail::submission_status submit(descriptor& dsc, completion_record& completion_record) noexcept;
+    };
+
+    class hardware_device
+    {
+    public:
+        [[nodiscard]] dml::detail::submission_status submit(descriptor& descriptor, completion_record& completion_record) noexcept;
+    };
+}  // namespace dml::core
+
+#endif  //DML_CORE_EXECUTION_DEVICE_HPP
diff --git a/sources/core/include/core/operations.hpp b/sources/core/include/core/operations.hpp
new file mode 100644
index 0000000..fbfd6ac
--- /dev/null
+++ b/sources/core/include/core/operations.hpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#ifndef DML_CORE_OPERATIONS_HPP
+#define DML_CORE_OPERATIONS_HPP
+
+#include <dml/detail/common/types.hpp>
+#include <type_traits>
+
+namespace dml::core
+{
+    enum class operation : operation_t
+    {
+        nop             = 0x00,
+        batch           = 0x01,
+        drain           = 0x02,
+        memory_move     = 0x03,
+        fill            = 0x04,
+        compare         = 0x05,
+        compare_pattern = 0x06,
+        create_delta    = 0x07,
+        apply_delta     = 0x08,
+        dualcast        = 0x09,
+        crc             = 0x10,
+        copy_crc        = 0x11,
+        dif_check       = 0x12,
+        dif_insert      = 0x13,
+        dif_strip       = 0x14,
+        dif_update      = 0x15,
+        cache_flush     = 0x20
+    };
+}  // namespace dml::core
+
+#endif  //DML_CORE_OPERATIONS_HPP
diff --git a/sources/core/include/core/types.hpp b/sources/core/include/core/types.hpp
new file mode 100644
index 0000000..9ff00d2
--- /dev/null
+++ b/sources/core/include/core/types.hpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#ifndef DML_CORE_TYPES_HPP
+#define DML_CORE_TYPES_HPP
+
+#include <dml/detail/common/types.hpp>
+
+namespace dml::core
+{
+    using byte_t = dml::detail::byte_t;
+
+    struct alignas(64u) descriptor
+    {
+        byte_t bytes[64u]{};
+    };
+
+    struct alignas(32u) completion_record
+    {
+        byte_t bytes[32u]{};
+    };
+
+    using status_t = dml::detail::status_t;
+
+    using transfer_size_t = dml::detail::transfer_size_t;
+
+    using operation_t = dml::detail::operation_t;
+
+    using flags_t = dml::detail::flags_t;
+
+    using operation_specific_flags_t = dml::detail::operation_specific_flags_t;
+
+    using completion_interrupt_handle_t = std::uint16_t;
+
+    using transfer_size_t = dml::detail::transfer_size_t;
+
+    using address_t = uint64_t;
+
+    using pattern_t = dml::detail::pattern_t;
+
+    using result_t = dml::detail::result_t;
+
+    using crc_value_t = dml::detail::crc_value_t;
+
+    using dif_flags_t = dml::detail::dif_flags_t;
+
+    using dif_status_t = dml::detail::dif_status_t;
+
+    using dif_ref_tag_t = dml::detail::dif_ref_tag_t;
+
+    using dif_app_tag_t = dml::detail::dif_app_tag_t;
+}  // namespace dml::core
+
+#endif  //DML_CORE_TYPES_HPP
diff --git a/sources/core/include/core/validation.hpp b/sources/core/include/core/validation.hpp
new file mode 100644
index 0000000..8705848
--- /dev/null
+++ b/sources/core/include/core/validation.hpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#ifndef DML_CORE_VALIDATION_HPP
+#define DML_CORE_VALIDATION_HPP
+
+#include <core/types.hpp>
+#include <dml/detail/common/status.hpp>
+
+namespace dml::core
+{
+    [[nodiscard]] dml::detail::validation_status validate(descriptor &dsc) noexcept;
+}  // namespace dml::core
+
+#endif  //DML_CORE_VALIDATION_HPP
diff --git a/sources/core/src/apply_delta.cpp b/sources/core/src/apply_delta.cpp
new file mode 100644
index 0000000..e21d06f
--- /dev/null
+++ b/sources/core/src/apply_delta.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include <dml/detail/common/status.hpp>
+#include <dml/detail/common/utils/enum.hpp>
+#include <optimization_dispatcher.hpp>
+
+#include "immintrin.h"
+#include "kernels.hpp"
+
+namespace dml::core::kernels
+{
+    void apply_delta(apply_delta_descriptor dsc, apply_delta_completion_record record) noexcept
+    {
+        const auto dst               = reinterpret_cast<byte_t *>(dsc.destination_address());
+        const auto delta_record      = reinterpret_cast<byte_t *>(dsc.delta_record_address());
+        const auto delta_record_size = dsc.delta_record_size();
+
+        dispatch::apply_delta(delta_record, dst, delta_record_size);
+
+        _mm_mfence();
+        record.status() = to_underlying(dml::detail::execution_status::success);
+    }
+}  // namespace dml::core::kernels
diff --git a/sources/core/src/batch.cpp b/sources/core/src/batch.cpp
new file mode 100644
index 0000000..e48cf81
--- /dev/null
+++ b/sources/core/src/batch.cpp
@@ -0,0 +1,114 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include <core/completion_record_views.hpp>
+#include <core/descriptor_views.hpp>
+#include <core/operations.hpp>
+#include <dml/detail/common/status.hpp>
+#include <dml/detail/common/utils/enum.hpp>
+
+#include "immintrin.h"
+#include "kernels.hpp"
+
+namespace dml::core::kernels
+{
+    void batch(batch_descriptor dsc, batch_completion_record record) noexcept
+    {
+        const auto operations        = reinterpret_cast<descriptor *>(dsc.descriptor_list_address());
+        const auto descriptors_count = dsc.descriptors_count();
+
+        auto status = dml::detail::execution_status::processing;
+        auto index  = size_t(0);
+
+        while (index < descriptors_count && status == dml::detail::execution_status::processing)
+        {
+            auto &current_dsc    = operations[index];
+            auto &current_record = *reinterpret_cast<completion_record *>(any_descriptor(current_dsc).completion_record_address());
+
+            auto op = operation(any_descriptor(current_dsc).operation());
+
+            switch (op)
+            {
+                case operation::nop:
+                    kernels::nop(nop_descriptor(current_dsc), nop_completion_record(current_record));
+                    break;
+                case operation::memory_move:
+                    kernels::mem_move(mem_move_descriptor(current_dsc), mem_move_completion_record(current_record));
+                    break;
+                case operation::fill:
+                    kernels::fill(fill_descriptor(current_dsc), fill_completion_record(current_record));
+                    break;
+                case operation::compare:
+                    kernels::compare(compare_descriptor(current_dsc), compare_completion_record(current_record));
+                    break;
+                case operation::compare_pattern:
+                    kernels::compare_pattern(compare_pattern_descriptor(current_dsc), compare_pattern_completion_record(current_record));
+                    break;
+                case operation::create_delta:
+                    kernels::create_delta(create_delta_descriptor(current_dsc), create_delta_completion_record(current_record));
+                    break;
+                case operation::apply_delta:
+                    kernels::apply_delta(apply_delta_descriptor(current_dsc), apply_delta_completion_record(current_record));
+                    break;
+                case operation::dualcast:
+                    kernels::dualcast(dualcast_descriptor(current_dsc), dualcast_completion_record(current_record));
+                    break;
+                case operation::crc:
+                    kernels::crc(crc_descriptor(current_dsc), crc_completion_record(current_record));
+                    break;
+                case operation::copy_crc:
+                    kernels::copy_crc(copy_crc_descriptor(current_dsc), crc_completion_record(current_record));
+                    break;
+                case operation::dif_check:
+                    kernels::dif_check(dif_check_descriptor(current_dsc), dif_check_completion_record(current_record));
+                    break;
+                case operation::dif_insert:
+                    kernels::dif_insert(dif_insert_descriptor(current_dsc), dif_insert_completion_record(current_record));
+                    break;
+                case operation::dif_strip:
+                    kernels::dif_strip(dif_strip_descriptor(current_dsc), dif_strip_completion_record(current_record));
+                    break;
+                case operation::dif_update:
+                    kernels::dif_update(dif_update_descriptor(current_dsc), dif_update_completion_record(current_record));
+                    break;
+                case operation::cache_flush:
+                    kernels::cache_flush(cache_flush_descriptor(current_dsc), cache_flush_completion_record(current_record));
+                    break;
+                default:
+                    status = dml::detail::execution_status::batch_error;
+            }
+
+            if (any_completion_record(current_record).status() != to_underlying(dml::detail::execution_status::success))
+            {
+                status = dml::detail::execution_status::batch_error;
+            }
+            else
+            {
+                ++index;
+            }
+        }
+
+        if (index == descriptors_count)
+        {
+            status = dml::detail::execution_status::success;
+        }
+
+        record.descriptors_completed() = static_cast<transfer_size_t>(index);
+
+        _mm_mfence();
+        record.status() = to_underlying(status);
+    }
+}  // namespace dml::core::kernels
diff --git a/sources/core/src/cache_flush.cpp b/sources/core/src/cache_flush.cpp
new file mode 100644
index 0000000..4df5737
--- /dev/null
+++ b/sources/core/src/cache_flush.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include <dml/detail/common/flags.hpp>
+#include <dml/detail/common/status.hpp>
+#include <dml/detail/common/utils/enum.hpp>
+#include <optimization_dispatcher.hpp>
+
+#include "immintrin.h"
+#include "kernels.hpp"
+
+namespace dml::core::kernels
+{
+    void cache_flush(cache_flush_descriptor dsc, cache_flush_completion_record record) noexcept
+    {
+        const auto dst              = reinterpret_cast<byte_t *>(dsc.destination_address());
+        const auto transfer_size    = dsc.transfer_size();
+        const auto invalidate_cache = intersects(dsc.flags(), dml::detail::cache_flush_flag::cache_control);
+
+        if (invalidate_cache)
+            dispatch::cache_flush(dst, transfer_size);
+        else
+            dispatch::cache_write_back(dst, transfer_size);
+
+        _mm_mfence();
+        record.status() = to_underlying(dml::detail::execution_status::success);
+    }
+}  // namespace dml::core::kernels
diff --git a/sources/core/src/compare.cpp b/sources/core/src/compare.cpp
new file mode 100644
index 0000000..bd21148
--- /dev/null
+++ b/sources/core/src/compare.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include <dml/detail/common/flags.hpp>
+#include <dml/detail/common/specific_flags.hpp>
+#include <dml/detail/common/status.hpp>
+#include <dml/detail/common/utils/enum.hpp>
+#include <optimization_dispatcher.hpp>
+#include <tuple>
+
+#include "immintrin.h"
+#include "kernels.hpp"
+
+namespace dml::core::kernels
+{
+    void compare(compare_descriptor dsc, compare_completion_record record) noexcept
+    {
+        const auto src1            = reinterpret_cast<byte_t *>(dsc.source_1_address());
+        const auto src2            = reinterpret_cast<byte_t *>(dsc.source_2_address());
+        const auto transfer_size   = dsc.transfer_size();
+        const auto expected_result = dsc.expected_result();
+        const auto check_result    = intersects(dsc.flags(), dml::detail::compare_flag::check_result);
+
+        std::tie(record.bytes_completed(), record.result()) = dispatch::compare(src1, src2, transfer_size);
+
+        _mm_mfence();
+        record.status() =
+            to_underlying(check_result ? (expected_result == record.result()) ? dml::detail::execution_status::success
+                                                                              : dml::detail::execution_status::false_predicate_success
+                                       : dml::detail::execution_status::success);
+    }
+}  // namespace dml::core::kernels
diff --git a/sources/core/src/compare_pattern.cpp b/sources/core/src/compare_pattern.cpp
new file mode 100644
index 0000000..fd456a4
--- /dev/null
+++ b/sources/core/src/compare_pattern.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include <dml/detail/common/flags.hpp>
+#include <dml/detail/common/specific_flags.hpp>
+#include <dml/detail/common/status.hpp>
+#include <dml/detail/common/utils/enum.hpp>
+#include <optimization_dispatcher.hpp>
+#include <tuple>
+
+#include "immintrin.h"
+#include "kernels.hpp"
+
+namespace dml::core::kernels
+{
+    void compare_pattern(compare_pattern_descriptor dsc, compare_pattern_completion_record record) noexcept
+    {
+        const auto pattern         = dsc.pattern();
+        const auto src             = reinterpret_cast<byte_t *>(dsc.source_address());
+        const auto transfer_size   = dsc.transfer_size();
+        const auto expected_result = dsc.expected_result();
+        const auto check_result    = intersects(dsc.flags(), dml::detail::compare_flag::check_result);
+
+        std::tie(record.bytes_completed(), record.result()) = dispatch::compare_pattern(pattern, src, transfer_size);
+
+        _mm_mfence();
+        record.status() =
+            to_underlying(check_result ? (expected_result == record.result()) ? dml::detail::execution_status::success
+                                                                              : dml::detail::execution_status::false_predicate_success
+                                       : dml::detail::execution_status::success);
+    }
+}  // namespace dml::core::kernels
diff --git a/sources/core/src/copy_crc.cpp b/sources/core/src/copy_crc.cpp
new file mode 100644
index 0000000..b120251
--- /dev/null
+++ b/sources/core/src/copy_crc.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include <dml/detail/common/specific_flags.hpp>
+#include <dml/detail/common/status.hpp>
+#include <dml/detail/common/utils/enum.hpp>
+#include <optimization_dispatcher.hpp>
+
+#include "immintrin.h"
+#include "kernels.hpp"
+
+namespace dml::core::kernels
+{
+    void copy_crc(copy_crc_descriptor dsc, crc_completion_record record) noexcept
+    {
+        const auto src           = reinterpret_cast<byte_t *>(dsc.source_address());
+        const auto dst           = reinterpret_cast<byte_t *>(dsc.destination_address());
+        const auto transfer_size = dsc.transfer_size();
+        const auto crc_seed      = dsc.crc_seed();
+        const auto bypass_reflection =
+            intersects(dsc.operation_specific_flags(), dml::detail::crc_specific_flag::bypass_crc_inversion_and_reflection);
+        const auto bypass_data_reflection =
+            intersects(dsc.operation_specific_flags(), dml::detail::crc_specific_flag::bypass_data_reflection);
+
+        dispatch::mem_move(src, dst, transfer_size);
+
+        auto reverse = [](uint32_t value)
+        {
+            value = (value & 0x55555555u) << 1u | (value & 0xAAAAAAAAu) >> 1u;
+            value = (value & 0x33333333u) << 2u | (value & 0xCCCCCCCCu) >> 2u;
+            value = (value & 0x0F0F0F0Fu) << 4u | (value & 0xF0F0F0F0u) >> 4u;
+            value = (value & 0x00FF00FFu) << 8u | (value & 0xFF00FF00u) >> 8u;
+            value = (value & 0x0000FFFFu) << 16u | (value & 0xFFFF0000u) >> 16u;
+
+            return value;
+        };
+
+        auto crc_value = crc_seed;
+
+        // Bypass inversion and use reverse bit order for CRC completion_record
+        if (!bypass_reflection)
+        {
+            crc_value = ~(crc_value);
+            crc_value = reverse(crc_value);
+        }
+
+        // Bypass Data Reflection in case if DML_FLAG_DATA_REFLECTION set
+        crc_value =
+            !bypass_data_reflection ? dispatch::crc_reflected(src, transfer_size, crc_value) : dispatch::crc(src, transfer_size, crc_value);
+
+        // Bypass inversion and use reverse bit order for CRC completion_record
+        if (!bypass_reflection)
+        {
+            crc_value = reverse(crc_value);
+            crc_value = ~(crc_value);
+        }
+
+        record.crc_value() = crc_value;
+
+        _mm_mfence();
+        record.status() = to_underlying(dml::detail::execution_status::success);
+    }
+}  // namespace dml::core::kernels
diff --git a/sources/core/src/crc.cpp b/sources/core/src/crc.cpp
new file mode 100644
index 0000000..95d3f06
--- /dev/null
+++ b/sources/core/src/crc.cpp
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include <dml/detail/common/specific_flags.hpp>
+#include <dml/detail/common/status.hpp>
+#include <dml/detail/common/utils/enum.hpp>
+#include <optimization_dispatcher.hpp>
+
+#include "immintrin.h"
+#include "kernels.hpp"
+
+namespace dml::core::kernels
+{
+    void crc(crc_descriptor dsc, crc_completion_record record) noexcept
+    {
+        const auto src           = reinterpret_cast<byte_t *>(dsc.source_address());
+        const auto transfer_size = dsc.transfer_size();
+        const auto crc_seed      = dsc.crc_seed();
+        const auto bypass_reflection =
+            intersects(dsc.operation_specific_flags(), dml::detail::crc_specific_flag::bypass_crc_inversion_and_reflection);
+        const auto bypass_data_reflection =
+            intersects(dsc.operation_specific_flags(), dml::detail::crc_specific_flag::bypass_data_reflection);
+
+        auto reverse = [](uint32_t value)
+        {
+            value = (value & 0x55555555u) << 1u | (value & 0xAAAAAAAAu) >> 1u;
+            value = (value & 0x33333333u) << 2u | (value & 0xCCCCCCCCu) >> 2u;
+            value = (value & 0x0F0F0F0Fu) << 4u | (value & 0xF0F0F0F0u) >> 4u;
+            value = (value & 0x00FF00FFu) << 8u | (value & 0xFF00FF00u) >> 8u;
+            value = (value & 0x0000FFFFu) << 16u | (value & 0xFFFF0000u) >> 16u;
+
+            return value;
+        };
+
+        auto crc_value = crc_seed;
+
+        // Bypass inversion and use reverse bit order for CRC completion_record
+        if (!bypass_reflection)
+        {
+            crc_value = ~(crc_value);
+            crc_value = reverse(crc_value);
+        }
+
+        // Bypass Data Reflection in case if DML_FLAG_DATA_REFLECTION set
+        crc_value =
+            !bypass_data_reflection ? dispatch::crc_reflected(src, transfer_size, crc_value) : dispatch::crc(src, transfer_size, crc_value);
+
+        // Bypass inversion and use reverse bit order for CRC completion_record
+        if (!bypass_reflection)
+        {
+            crc_value = reverse(crc_value);
+            crc_value = ~(crc_value);
+        }
+
+        record.crc_value() = crc_value;
+
+        _mm_mfence();
+        record.status() = to_underlying(dml::detail::execution_status::success);
+    }
+}  // namespace dml::core::kernels
diff --git a/sources/core/src/create_delta.cpp b/sources/core/src/create_delta.cpp
new file mode 100644
index 0000000..d42c2ce
--- /dev/null
+++ b/sources/core/src/create_delta.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include <dml/detail/common/flags.hpp>
+#include <dml/detail/common/status.hpp>
+#include <dml/detail/common/utils/enum.hpp>
+#include <optimization_dispatcher.hpp>
+#include <tuple>
+
+#include "immintrin.h"
+#include "kernels.hpp"
+
+namespace dml::core::kernels
+{
+    void create_delta(create_delta_descriptor dsc, create_delta_completion_record record) noexcept
+    {
+        const auto src1            = reinterpret_cast<byte_t *>(dsc.source_1_address());
+        const auto src2            = reinterpret_cast<byte_t *>(dsc.source_2_address());
+        const auto delta_record    = reinterpret_cast<byte_t *>(dsc.delta_record_address());
+        const auto delta_max_size  = dsc.maximum_delta_record_size();
+        const auto transfer_size   = dsc.transfer_size();
+        const auto expected_result = dsc.expected_result_mask();
+        const auto check_result    = intersects(dsc.flags(), dml::detail::create_delta_flag::check_result);
+
+        std::tie(record.delta_record_size(), record.result()) =
+            dispatch::create_delta(src1, src2, transfer_size, delta_record, delta_max_size);
+
+        _mm_mfence();
+        record.status() = to_underlying(check_result ? ((expected_result >> 1) == record.result())
+                                                           ? dml::detail::execution_status::success
+                                                           : dml::detail::execution_status::false_predicate_success
+                                                     : dml::detail::execution_status::success);
+    }
+}  // namespace dml::core::kernels
diff --git a/sources/core/src/dif_check.cpp b/sources/core/src/dif_check.cpp
new file mode 100644
index 0000000..f21be4b
--- /dev/null
+++ b/sources/core/src/dif_check.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include <dml_dif.h>
+
+#include <cstring>
+#include <dml/detail/common/status.hpp>
+#include <dml/detail/common/utils/enum.hpp>
+
+#include "../../../../include/dml/dmldefs.h"
+#include "immintrin.h"
+#include "kernels.hpp"
+
+namespace dml::core::kernels
+{
+    void dif_check(dif_check_descriptor dsc, dif_check_completion_record record) noexcept
+    {
+        const auto src              = reinterpret_cast<byte_t *>(dsc.source_address());
+        const auto transfer_size    = dsc.transfer_size();
+        const auto options          = dsc.flags();
+        const auto dif_options      = dsc.dif_flags();
+        const auto dif_src_options  = dsc.source_dif_flags();
+        const auto src_app_tag_mask = dsc.source_app_tag_mask();
+
+        const auto src_ref_tag = dsc.source_ref_tag();
+        const auto src_app_tag = dsc.source_app_tag();
+
+        dml_job_t job;
+        memset(&job, 0, sizeof(dml_job_t));
+        job.source_first_ptr                       = src;
+        job.source_length                          = transfer_size;
+        job.operation                              = DML_OP_DIF_CHECK;
+        job.dif_config.source_reference_tag_seed   = src_ref_tag;
+        job.dif_config.source_application_tag_seed = src_app_tag;
+        job.dif_config.source_application_tag_mask = src_app_tag_mask;
+        job.dif_config.block_size                  = static_cast<dml_dif_block_size_t>(dif_options & 0b11);
+
+        // Job API composes DIF flags into one 64-bit value via shifting, check dmldefs.h
+        job.dif_config.flags = (uint64_t(dif_options) << 16) | dif_src_options;
+
+        job.flags = options;
+
+        auto status = dml_legacy_dif_check(&job);
+
+        record.dif_status()      = job.result;
+        record.bytes_completed() = job.offset;
+        // TODO: Tags should be written
+
+        _mm_mfence();
+        record.status() = to_underlying((status == DML_STATUS_OK) ? dml::detail::execution_status::success
+                                                                  : dml::detail::execution_status::dif_control_error);
+    }
+}  // namespace dml::core::kernels
diff --git a/sources/core/src/dif_impl/CMakeLists.txt b/sources/core/src/dif_impl/CMakeLists.txt
new file mode 100644
index 0000000..1ff8040
--- /dev/null
+++ b/sources/core/src/dif_impl/CMakeLists.txt
@@ -0,0 +1,34 @@
+#
+# Copyright 2021 Intel Corporation.
+#
+# This software and the related documents are Intel copyrighted materials,
+# and your use of them is governed by the express license under which they
+# were provided to you ("License"). Unless the License provides otherwise,
+# you may not use, modify, copy, publish, distribute, disclose or transmit
+# this software or the related documents without Intel's prior written
+# permission.
+#
+# This software and the related documents are provided as is, with no
+# express or implied warranties, other than those that are expressly
+# stated in the License.
+#
+
+add_library(dml_dif_impl OBJECT
+        # DIFs
+        dml_dif.h
+        dml_dif.c
+        )
+
+target_link_libraries(dml_dif_impl
+        PRIVATE dml_sw_dispatcher
+        )
+target_include_directories(dml_dif_impl
+        PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}
+        PRIVATE ../../../../include
+        )
+target_compile_features(dml_dif_impl
+        PUBLIC c_std_11
+        )
+target_compile_options(dml_dif_impl
+        PRIVATE ${DML_QUALITY_OPTIONS}
+        )
diff --git a/sources/middle_layer/sw_path_legacy/dif.c b/sources/core/src/dif_impl/dml_dif.c
similarity index 82%
rename from sources/middle_layer/sw_path_legacy/dif.c
rename to sources/core/src/dif_impl/dml_dif.c
index df2da1a..7b6a238 100644
--- a/sources/middle_layer/sw_path_legacy/dif.c
+++ b/sources/core/src/dif_impl/dml_dif.c
@@ -14,9 +14,11 @@
  *
  */
 
-#include "dif.h"
+#include "dml_dif.h"
 
-#include <core_api.h>
+#include <dml/dmldefs.h>
+#include <dml_kernels.h>
+#include <stddef.h>
 
 #define OWN_DIF_CRC_POLYNOMIAL 0x8BB7u /**< CRC16 T10 polynomial */
 
@@ -80,17 +82,6 @@ static const uint32_t own_dif_block_sizes[4] = { 512u, 520u, 4096u, 4104u };
 #define OWN_F_APPLICATION_TAG_DETECTED(dif_ptr, flags) \
     (DML_DIF_FLAG_SRC_F_DETECT_APP_TAG & dif_flags && ((dif_ptr)->application_tag == DML_MAX_16U))
 
-static inline uint32_t bit_reverse_32u(uint32_t value)
-{
-    value = (value & 0x55555555u) << 1u | (value & 0xAAAAAAAAu) >> 1u;
-    value = (value & 0x33333333u) << 2u | (value & 0xCCCCCCCCu) >> 2u;
-    value = (value & 0x0F0F0F0Fu) << 4u | (value & 0xF0F0F0F0u) >> 4u;
-    value = (value & 0x00FF00FFu) << 8u | (value & 0xFF00FF00u) >> 8u;
-    value = (value & 0x0000FFFFu) << 16u | (value & 0xFFFF0000u) >> 16u;
-
-    return value;
-}
-
 static inline uint16_t reverse_bytes_16u(uint16_t value)
 {
     union
@@ -113,20 +104,49 @@ static inline uint32_t reverse_bytes_32u(uint32_t value)
     {
         uint32_t value;
         uint8_t  bytes[4];
-    } received_value, reverced_value;
+    } received_value, reversed_value;
 
     received_value.value = value;
 
-    reverced_value.bytes[0] = received_value.bytes[3];
-    reverced_value.bytes[1] = received_value.bytes[2];
-    reverced_value.bytes[2] = received_value.bytes[1];
-    reverced_value.bytes[3] = received_value.bytes[0];
+    reversed_value.bytes[0] = received_value.bytes[3];
+    reversed_value.bytes[1] = received_value.bytes[2];
+    reversed_value.bytes[2] = received_value.bytes[1];
+    reversed_value.bytes[3] = received_value.bytes[0];
+
+    return reversed_value.value;
+}
+
+static inline uint16_t calculate_crc_16u(uint16_t crc_value, const uint8_t data, const uint16_t polynomial)
+{
+    const size_t   byte_width     = 8;
+    const size_t   crc_bit_count  = sizeof(crc_value) * byte_width;
+    const size_t   crc_byte_shift = crc_bit_count - byte_width;
+    const uint16_t high_bit_mask  = 1 << (crc_bit_count - 1);
+
+    crc_value ^= (data << crc_byte_shift);
+
+    for (size_t bit = 0u; bit < byte_width; ++bit)
+    {
+        crc_value = (crc_value & high_bit_mask) ? ((crc_value << 1) ^ polynomial) : (crc_value << 1);
+    }
+
+    return crc_value;
+}
+
+static uint16_t crc_16u(const uint8_t *src, const uint32_t transfer_size, uint16_t crc_value, const uint16_t polynomial)
+{
+    for (size_t byte = 0; byte < transfer_size; ++byte)
+    {
+        crc_value = calculate_crc_16u(crc_value, src[byte], polynomial);
+    }
 
-    return reverced_value.value;
+    return crc_value;
 }
 
-dml_status_t dml_legacy_dif_check(dml_job_t *dml_job_ptr)
+int dml_legacy_dif_check(void *dml_job_ptr_)
 {
+    dml_job_t *dml_job_ptr = (dml_job_t *)dml_job_ptr_;
+
     // General constants
     const uint32_t dif_flags   = dml_job_ptr->dif_config.flags;
     const uint32_t block_size  = own_dif_block_sizes[dml_job_ptr->dif_config.block_size];
@@ -173,8 +193,8 @@ dml_status_t dml_legacy_dif_check(dml_job_t *dml_job_ptr)
             if (check_guard)
             {
                 uint16_t crc = crc_seed;
-                dmlc_calculate_crc_16u(source_ptr, block_size, &crc, OWN_DIF_CRC_POLYNOMIAL);
-                crc = reverse_bytes_16u((invert_crc_result) ? ~crc : crc);
+                crc          = crc_16u(source_ptr, block_size, crc, OWN_DIF_CRC_POLYNOMIAL);
+                crc          = reverse_bytes_16u((invert_crc_result) ? ~crc : crc);
 
                 if (crc != dif_ptr->guard_tag)
                 {
@@ -216,8 +236,10 @@ dml_status_t dml_legacy_dif_check(dml_job_t *dml_job_ptr)
     return DML_STATUS_OK;
 }
 
-dml_status_t dml_legacy_dif_insert(dml_job_t *dml_job_ptr)
+int dml_legacy_dif_insert(void *dml_job_ptr_)
 {
+    dml_job_t *dml_job_ptr = (dml_job_t *)dml_job_ptr_;
+
     const uint32_t dif_flags   = dml_job_ptr->dif_config.flags;
     const uint32_t block_size  = own_dif_block_sizes[dml_job_ptr->dif_config.block_size];
     const uint32_t block_count = dml_job_ptr->source_length / block_size;
@@ -245,10 +267,10 @@ dml_status_t dml_legacy_dif_insert(dml_job_t *dml_job_ptr)
         uint16_t         crc     = crc_seed;
 
         // Copy
-        dmlc_copy_8u(source_ptr, destination_ptr, block_size);
+        dml_ref_mem_move(source_ptr, destination_ptr, block_size);
 
         // Calculate CRC
-        dmlc_calculate_crc_16u(destination_ptr, block_size, &crc, OWN_DIF_CRC_POLYNOMIAL);
+        crc = crc_16u(destination_ptr, block_size, crc, OWN_DIF_CRC_POLYNOMIAL);
 
         // Write data integrity field
         dif_ptr->application_tag = reverse_bytes_16u(application_tag & application_tag_mask);
@@ -265,8 +287,10 @@ dml_status_t dml_legacy_dif_insert(dml_job_t *dml_job_ptr)
     return DML_STATUS_OK;
 }
 
-dml_status_t dml_legacy_dif_strip(dml_job_t *dml_job_ptr)
+int dml_legacy_dif_strip(void *dml_job_ptr_)
 {
+    dml_job_t *dml_job_ptr = (dml_job_t *)dml_job_ptr_;
+
     // General constants
     const uint32_t block_size  = own_dif_block_sizes[dml_job_ptr->dif_config.block_size];
     const uint32_t source_step = block_size + sizeof(own_dif_t);
@@ -277,7 +301,7 @@ dml_status_t dml_legacy_dif_strip(dml_job_t *dml_job_ptr)
     uint8_t       *destination_ptr = dml_job_ptr->destination_first_ptr;
 
     // Check source data
-    dml_status_t status = dml_legacy_dif_check(dml_job_ptr);
+    int status = dml_legacy_dif_check(dml_job_ptr);
     if (status != DML_STATUS_OK)
     {
         return status;
@@ -286,7 +310,7 @@ dml_status_t dml_legacy_dif_strip(dml_job_t *dml_job_ptr)
     // Process data
     for (uint32_t block = 0; block < block_count; block++)
     {
-        dmlc_copy_8u(source_ptr, destination_ptr, block_size);
+        dml_ref_mem_move(source_ptr, destination_ptr, block_size);
 
         source_ptr += source_step;
         destination_ptr += block_size;
@@ -295,8 +319,10 @@ dml_status_t dml_legacy_dif_strip(dml_job_t *dml_job_ptr)
     return DML_STATUS_OK;
 }
 
-dml_status_t dml_legacy_dif_update(dml_job_t * dml_job_ptr)
+int dml_legacy_dif_update(void *dml_job_ptr_)
 {
+    dml_job_t *dml_job_ptr = (dml_job_t *)dml_job_ptr_;
+
     // General constants
     const uint32_t dif_flags   = dml_job_ptr->dif_config.flags;
     const uint32_t block_size  = own_dif_block_sizes[dml_job_ptr->dif_config.block_size];
@@ -324,7 +350,7 @@ dml_status_t dml_legacy_dif_update(dml_job_t * dml_job_ptr)
     uint32_t       reference_tag   = dml_job_ptr->dif_config.destination_reference_tag_seed;
 
     // Check Source
-    const dml_status_t status = dml_legacy_dif_check(dml_job_ptr);
+    const int status = dml_legacy_dif_check(dml_job_ptr);
     if (status != DML_STATUS_OK)
     {
         return status;
@@ -335,13 +361,13 @@ dml_status_t dml_legacy_dif_update(dml_job_t * dml_job_ptr)
     {
         own_dif_t *const destination_dif_ptr = (own_dif_t *)(destination_ptr + block_size);
 
-        dmlc_copy_8u(source_ptr, destination_ptr, step);
+        dml_ref_mem_move(source_ptr, destination_ptr, step);
 
         // Update DIF
         if (calculate_crc)
         {
-            uint16_t crc = crc_seed;
-            dmlc_calculate_crc_16u(destination_ptr, block_size, &crc, OWN_DIF_CRC_POLYNOMIAL);
+            uint16_t crc                   = crc_seed;
+            crc                            = crc_16u(destination_ptr, block_size, crc, OWN_DIF_CRC_POLYNOMIAL);
             destination_dif_ptr->guard_tag = reverse_bytes_16u((invert_crc_result) ? ~crc : crc);
         }
 
diff --git a/sources/middle_layer/sw_path_legacy/dif.h b/sources/core/src/dif_impl/dml_dif.h
similarity index 75%
rename from sources/middle_layer/sw_path_legacy/dif.h
rename to sources/core/src/dif_impl/dml_dif.h
index 7fae4c1..cd8a2c9 100644
--- a/sources/middle_layer/sw_path_legacy/dif.h
+++ b/sources/core/src/dif_impl/dml_dif.h
@@ -17,21 +17,24 @@
 #ifndef DML_ML_SW_PATH_LEGACY_DIF_H
 #define DML_ML_SW_PATH_LEGACY_DIF_H
 
-#include <dml/dmldefs.h>
+#include <stdint.h>
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-dml_status_t dml_legacy_dif_check(dml_job_t* dml_job_ptr);
+typedef void* job_t;
 
-dml_status_t dml_legacy_dif_insert(dml_job_t* dml_job_ptr);
+int dml_legacy_dif_check(job_t dml_job_ptr);
 
-dml_status_t dml_legacy_dif_strip(dml_job_t* dml_job_ptr);
+int dml_legacy_dif_insert(job_t dml_job_ptr);
 
-dml_status_t dml_legacy_dif_update(dml_job_t* dml_job_ptr);
+int dml_legacy_dif_strip(job_t dml_job_ptr);
+
+int dml_legacy_dif_update(job_t dml_job_ptr);
 
 #ifdef __cplusplus
 }
 #endif
+
 #endif  // DML_ML_SW_PATH_LEGACY_DIF_H
diff --git a/sources/core/src/dif_insert.cpp b/sources/core/src/dif_insert.cpp
new file mode 100644
index 0000000..5a48484
--- /dev/null
+++ b/sources/core/src/dif_insert.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include <dml_dif.h>
+
+#include <cstring>
+#include <dml/detail/common/status.hpp>
+#include <dml/detail/common/utils/enum.hpp>
+
+#include "../../../../include/dml/dmldefs.h"
+#include "immintrin.h"
+#include "kernels.hpp"
+
+namespace dml::core::kernels
+{
+    void dif_insert(dif_insert_descriptor dsc, dif_insert_completion_record record) noexcept
+    {
+        const auto src              = reinterpret_cast<byte_t *>(dsc.source_address());
+        const auto dst              = reinterpret_cast<byte_t *>(dsc.destination_address());
+        const auto transfer_size    = dsc.transfer_size();
+        const auto options          = dsc.flags();
+        const auto dif_options      = dsc.dif_flags();
+        const auto dif_dst_options  = dsc.destination_dif_flags();
+        const auto dst_app_tag_mask = dsc.destination_app_tag_mask();
+
+        auto dst_ref_tag = dsc.destination_ref_tag();
+        auto dst_app_tag = dsc.destination_app_tag();
+
+        dml_job_t job;
+        memset(&job, 0, sizeof(dml_job_t));
+        job.source_first_ptr                            = src;
+        job.destination_first_ptr                       = dst;
+        job.source_length                               = transfer_size;
+        job.operation                                   = DML_OP_DIF_INSERT;
+        job.dif_config.destination_reference_tag_seed   = dst_ref_tag;
+        job.dif_config.destination_application_tag_seed = dst_app_tag;
+        job.dif_config.destination_application_tag_mask = dst_app_tag_mask;
+        job.dif_config.block_size                       = static_cast<dml_dif_block_size_t>(dif_options & 0b11);
+
+        // Job API composes DIF flags into one 64-bit value via shifting, check dmldefs.h
+        job.dif_config.flags = (uint64_t(dif_options) << 16) | (uint64_t(dif_dst_options) << 8);
+
+        job.flags = options;
+
+        auto status = dml_legacy_dif_insert(&job);
+
+        record.bytes_completed() = job.offset;
+        // TODO: Tags should be written
+
+        _mm_mfence();
+        record.status() = to_underlying((status == DML_STATUS_OK) ? dml::detail::execution_status::success
+                                                                  : dml::detail::execution_status::dif_control_error);
+    }
+}  // namespace dml::core::kernels
diff --git a/sources/core/src/dif_strip.cpp b/sources/core/src/dif_strip.cpp
new file mode 100644
index 0000000..8461a76
--- /dev/null
+++ b/sources/core/src/dif_strip.cpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include <dml_dif.h>
+
+#include <cstring>
+#include <dml/detail/common/status.hpp>
+#include <dml/detail/common/utils/enum.hpp>
+
+#include "../../../../include/dml/dmldefs.h"
+#include "immintrin.h"
+#include "kernels.hpp"
+
+namespace dml::core::kernels
+{
+    void dif_strip(dif_strip_descriptor dsc, dif_strip_completion_record record) noexcept
+    {
+        const auto src              = reinterpret_cast<byte_t *>(dsc.source_address());
+        const auto dst              = reinterpret_cast<byte_t *>(dsc.destination_address());
+        const auto transfer_size    = dsc.transfer_size();
+        const auto options          = dsc.flags();
+        const auto dif_options      = dsc.dif_flags();
+        const auto dif_src_options  = dsc.source_dif_flags();
+        const auto src_app_tag_mask = dsc.source_app_tag_mask();
+
+        auto src_ref_tag = dsc.source_ref_tag();
+        auto src_app_tag = dsc.source_app_tag();
+
+        dml_job_t job;
+        memset(&job, 0, sizeof(dml_job_t));
+        job.source_first_ptr                       = src;
+        job.destination_first_ptr                  = dst;
+        job.source_length                          = transfer_size;
+        job.operation                              = DML_OP_DIF_STRIP;
+        job.dif_config.source_reference_tag_seed   = src_ref_tag;
+        job.dif_config.source_application_tag_seed = src_app_tag;
+        job.dif_config.source_application_tag_mask = src_app_tag_mask;
+        job.dif_config.block_size                  = static_cast<dml_dif_block_size_t>(dif_options & 0b11);
+
+        // Job API composes DIF flags into one 64-bit value via shifting, check dmldefs.h
+        job.dif_config.flags = (uint64_t(dif_options) << 16) | dif_src_options;
+
+        job.flags = options;
+
+        auto status = dml_legacy_dif_strip(&job);
+
+        record.dif_status()      = job.result;
+        record.bytes_completed() = job.offset;
+        // TODO: Tags should be written
+
+        _mm_mfence();
+        record.status() = to_underlying((status == DML_STATUS_OK) ? dml::detail::execution_status::success
+                                                                  : dml::detail::execution_status::dif_control_error);
+    }
+}  // namespace dml::core::kernels
diff --git a/sources/core/src/dif_update.cpp b/sources/core/src/dif_update.cpp
new file mode 100644
index 0000000..4faa051
--- /dev/null
+++ b/sources/core/src/dif_update.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include <dml_dif.h>
+
+#include <cstring>
+#include <dml/detail/common/status.hpp>
+#include <dml/detail/common/utils/enum.hpp>
+
+#include "../../../../include/dml/dmldefs.h"
+#include "immintrin.h"
+#include "kernels.hpp"
+
+namespace dml::core::kernels
+{
+    void dif_update(dif_update_descriptor dsc, dif_update_completion_record record) noexcept
+    {
+        const auto src              = reinterpret_cast<byte_t *>(dsc.source_address());
+        const auto dst              = reinterpret_cast<byte_t *>(dsc.destination_address());
+        const auto transfer_size    = dsc.transfer_size();
+        const auto options          = dsc.flags();
+        const auto dif_options      = dsc.dif_flags();
+        const auto dif_src_options  = dsc.source_dif_flags();
+        const auto dif_dst_options  = dsc.destination_dif_flags();
+        const auto src_app_tag_mask = dsc.source_app_tag_mask();
+        const auto dst_app_tag_mask = dsc.destination_app_tag_mask();
+
+        auto src_ref_tag = dsc.source_ref_tag();
+        auto dst_ref_tag = dsc.destination_ref_tag();
+        auto src_app_tag = dsc.source_app_tag();
+        auto dst_app_tag = dsc.destination_app_tag();
+
+        dml_job_t job;
+        memset(&job, 0, sizeof(dml_job_t));
+        job.source_first_ptr                            = src;
+        job.destination_first_ptr                       = dst;
+        job.source_length                               = transfer_size;
+        job.operation                                   = DML_OP_DIF_UPDATE;
+        job.dif_config.source_reference_tag_seed        = src_ref_tag;
+        job.dif_config.source_application_tag_seed      = src_app_tag;
+        job.dif_config.source_application_tag_mask      = src_app_tag_mask;
+        job.dif_config.destination_reference_tag_seed   = dst_ref_tag;
+        job.dif_config.destination_application_tag_seed = dst_app_tag;
+        job.dif_config.destination_application_tag_mask = dst_app_tag_mask;
+        job.dif_config.block_size                       = static_cast<dml_dif_block_size_t>(dif_options & 0b11);
+
+        // Job API composes DIF flags into one 64-bit value via shifting, check dmldefs.h
+        job.dif_config.flags = (uint64_t(dif_options) << 16) | (uint64_t(dif_dst_options) << 8) | dif_src_options;
+
+        job.flags = static_cast<flags_t>(options);
+
+        auto status = dml_legacy_dif_update(&job);
+
+        record.dif_status()      = job.result;
+        record.bytes_completed() = job.offset;
+        // TODO: Tags should be written
+
+        _mm_mfence();
+        record.status() = to_underlying((status == DML_STATUS_OK) ? dml::detail::execution_status::success
+                                                                  : dml::detail::execution_status::dif_control_error);
+    }
+}  // namespace dml::core::kernels
diff --git a/sources/core/src/drain.cpp b/sources/core/src/drain.cpp
new file mode 100644
index 0000000..f1a13a1
--- /dev/null
+++ b/sources/core/src/drain.cpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include <dml/detail/common/status.hpp>
+
+#include "kernels.hpp"
+
+namespace dml::core::kernels
+{
+    void drain(drain_descriptor dsc, drain_completion_record record) noexcept
+    {
+        static_cast<void>(dsc);
+        record.status() = static_cast<status_t>(dml::detail::execution_status::success);
+    }
+}  // namespace dml::core::kernels
diff --git a/sources/core/src/dualcast.cpp b/sources/core/src/dualcast.cpp
new file mode 100644
index 0000000..c9e7024
--- /dev/null
+++ b/sources/core/src/dualcast.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include <dml/detail/common/status.hpp>
+#include <dml/detail/common/utils/enum.hpp>
+#include <optimization_dispatcher.hpp>
+
+#include "immintrin.h"
+#include "kernels.hpp"
+
+namespace dml::core::kernels
+{
+    void dualcast(dualcast_descriptor dsc, dualcast_completion_record record) noexcept
+    {
+        const auto src           = reinterpret_cast<byte_t *>(dsc.source_address());
+        const auto dst1          = reinterpret_cast<byte_t *>(dsc.destination_1_address());
+        const auto dst2          = reinterpret_cast<byte_t *>(dsc.destination_2_address());
+        const auto transfer_size = dsc.transfer_size();
+
+        dispatch::dualcast(src, dst1, dst2, transfer_size);
+
+        _mm_mfence();
+        record.status() = to_underlying(dml::detail::execution_status::success);
+    }
+}  // namespace dml::core::kernels
diff --git a/sources/core/src/fill.cpp b/sources/core/src/fill.cpp
new file mode 100644
index 0000000..9d79786
--- /dev/null
+++ b/sources/core/src/fill.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include <dml/detail/common/status.hpp>
+#include <dml/detail/common/utils/enum.hpp>
+#include <optimization_dispatcher.hpp>
+
+#include "immintrin.h"
+#include "kernels.hpp"
+
+namespace dml::core::kernels
+{
+    void fill(fill_descriptor dsc, fill_completion_record record) noexcept
+    {
+        const auto pattern       = dsc.pattern();
+        const auto dst           = reinterpret_cast<byte_t *>(dsc.destination_address());
+        const auto transfer_size = dsc.transfer_size();
+
+        dispatch::fill(pattern, dst, transfer_size);
+
+        _mm_mfence();
+        record.status() = to_underlying(dml::detail::execution_status::success);
+    }
+}  // namespace dml::core::kernels
diff --git a/sources/core/src/hardware_device.cpp b/sources/core/src/hardware_device.cpp
new file mode 100644
index 0000000..4ecba38
--- /dev/null
+++ b/sources/core/src/hardware_device.cpp
@@ -0,0 +1,95 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include <core/descriptor_views.hpp>
+#include <core/operations.hpp>
+#include <dml/detail/common/flags.hpp>
+#include <dml/detail/common/status.hpp>
+
+#include "core/device.hpp"
+#include "hw_dispatcher/hw_dispatcher.hpp"
+#include "hw_dispatcher/numa.hpp"
+
+namespace dml::core
+{
+#ifdef DML_HW
+    static inline auto enqueue(const dispatcher::hw_device &device, descriptor &dsc, completion_record &record) noexcept
+    {
+        auto view = any_descriptor(dsc);
+        view.flags() |= static_cast<flags_t>(dml::detail::flag::completion_record_address_valid) |
+                        static_cast<flags_t>(dml::detail::flag::request_completion_record);
+
+        // Use BlockOnFault on hardware, until page fault handling is implemented in software side
+        if (view.operation() != static_cast<operation_t>(operation::batch) &&
+            view.operation() != static_cast<operation_t>(operation::drain) && view.operation() != static_cast<operation_t>(operation::nop))
+        {
+            view.flags() |= static_cast<flags_t>(dml::detail::flag::block_on_fault);
+        }
+
+        view.completion_record_address() = reinterpret_cast<address_t>(&record);
+        record.bytes[0]                  = 0;
+
+        auto status = device.enqueue_descriptor(reinterpret_cast<const dsahw_descriptor_t *>(&dsc));
+
+        return status == DML_STATUS_OK ? dml::detail::submission_status::success : dml::detail::submission_status::failure;
+    }
+#endif
+
+    dml::detail::submission_status hardware_device::submit(descriptor &dsc, completion_record &completion_record) noexcept
+    {
+#ifdef DML_HW
+        auto &dispatcher = dispatcher::hw_dispatcher::get_instance();
+
+        if (dispatcher.is_hw_support())
+        {
+            static thread_local auto current_device_idx = 0u;
+
+            auto device_count  = std::distance(dispatcher.begin(), dispatcher.end());
+            auto tried_devices = 0u;
+
+            while (tried_devices < device_count)
+            {
+                auto &current_device = *(dispatcher.begin() + current_device_idx);
+                current_device_idx   = (current_device_idx + 1) % device_count;
+
+                if (util::get_numa_id() != current_device.numa_id())
+                {
+                    tried_devices++;
+                    continue;
+                }
+
+                auto status = enqueue(current_device, dsc, completion_record);
+
+                if (status != detail::submission_status::success)
+                {
+                    tried_devices++;
+                }
+                else
+                {
+                    return status;
+                }
+            }
+
+            return detail::submission_status::queue_busy;
+        }
+#else
+        static_cast<void>(dsc);
+        static_cast<void>(completion_record);
+#endif
+
+        return dml::detail::submission_status::failure;
+    }
+}  // namespace dml::core
diff --git a/sources/core/src/hw_dispatcher/CMakeLists.txt b/sources/core/src/hw_dispatcher/CMakeLists.txt
new file mode 100644
index 0000000..b406202
--- /dev/null
+++ b/sources/core/src/hw_dispatcher/CMakeLists.txt
@@ -0,0 +1,51 @@
+#
+# Copyright 2021 Intel Corporation.
+#
+# This software and the related documents are Intel copyrighted materials,
+# and your use of them is governed by the express license under which they
+# were provided to you ("License"). Unless the License provides otherwise,
+# you may not use, modify, copy, publish, distribute, disclose or transmit
+# this software or the related documents without Intel's prior written
+# permission.
+#
+# This software and the related documents are provided as is, with no
+# express or implied warranties, other than those that are expressly
+# stated in the License.
+#
+
+add_library(dml_hw_dispatcher OBJECT
+        hw_device.cpp
+        hw_device.hpp
+        hw_dispatcher.cpp
+        hw_dispatcher.hpp
+        hw_queue.cpp
+        hw_queue.hpp
+        numa.cpp
+        numa.hpp
+
+        hw_configuration_driver.c
+
+        legacy_headers/hardware_configuration_driver.h
+        legacy_headers/hardware_api.h
+        legacy_headers/hardware_completion_records_api.h
+        legacy_headers/hardware_limits.h
+        legacy_headers/hardware_descriptors_api.h
+        legacy_headers/own_dsa_accel_constants.h
+        legacy_headers/hardware_definitions.h
+        legacy_headers/libaccel_config.h
+        )
+
+target_compile_features(dml_hw_dispatcher PRIVATE cxx_std_17 c_std_11)
+
+target_compile_options(dml_hw_dispatcher
+        PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${DML_CPP_PRIVATE_OPTIONS}>
+        )
+
+target_include_directories(dml_hw_dispatcher PUBLIC ../../../../include)
+
+if (DML_HW)
+    target_compile_definitions(dml_hw_dispatcher
+            PRIVATE DML_HW
+            PUBLIC $<$<BOOL:${LOG_HW_INIT}>:LOG_HW_INIT>
+            )
+endif ()
diff --git a/sources/core/src/hw_dispatcher/hw_configuration_driver.c b/sources/core/src/hw_dispatcher/hw_configuration_driver.c
new file mode 100644
index 0000000..6bd3abd
--- /dev/null
+++ b/sources/core/src/hw_dispatcher/hw_configuration_driver.c
@@ -0,0 +1,399 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include <fcntl.h>
+
+#include "legacy_headers/hardware_configuration_driver.h"
+
+#if defined(linux)
+
+#include <dlfcn.h>
+#include <sys/mman.h>
+
+const static char *accelerator_configuration_driver_name = "/usr/lib64/libaccel-config.so.1";
+
+typedef int (*accfg_new_ptr)(struct accfg_ctx **ctx);
+
+typedef struct accfg_device *(*accfg_device_get_first_ptr)(struct accfg_ctx *ctx);
+
+typedef const char *(*accfg_device_get_devname_ptr)(struct accfg_device *device);
+
+typedef struct accfg_device *(*accfg_device_get_next_ptr)(struct accfg_device *device);
+
+typedef struct accfg_wq *(*accfg_wq_get_first_ptr)(struct accfg_device *device);
+
+typedef struct accfg_wq *(*accfg_wq_get_next_ptr)(struct accfg_wq *wq);
+
+typedef enum accfg_wq_state (*accfg_wq_get_state_ptr)(struct accfg_wq *wq);
+
+typedef unsigned int (*accfg_device_get_version_ptr)(struct accfg_device *device);
+
+typedef const char * (*accfg_wq_get_devname_ptr)(struct accfg_wq *wq);
+
+typedef enum accfg_device_state (*accfg_device_get_state_ptr)(struct accfg_device *device);
+
+typedef struct accfg_ctx *(*accfg_unref_ptr)(struct accfg_ctx *ctx);
+
+typedef enum accfg_wq_mode (*accfg_wq_get_mode_ptr)(struct accfg_wq *wq);
+
+typedef unsigned long (*accfg_device_get_gen_cap_ptr)(struct accfg_device *device);
+
+typedef int (*accfg_group_get_traffic_class_ptr)(struct accfg_group *group);
+
+typedef struct accfg_group *(*accfg_group_get_first_ptr)(struct accfg_device *device);
+
+typedef struct accfg_group *(*accfg_group_get_next_ptr)(struct accfg_group *group);
+
+typedef struct accfg_group *(*accfg_wq_get_group_ptr)(struct accfg_wq *wq);
+
+typedef int (*accfg_wq_get_group_id_ptr)(struct accfg_wq *wq);
+
+typedef int (*accfg_group_get_id_ptr)(struct accfg_group *group);
+
+typedef int (*accfg_wq_get_user_dev_path_ptr)(struct accfg_wq *wq, char *buf, size_t size);
+
+typedef int (*accfg_wq_get_priority_ptr)(struct accfg_wq *wq);
+
+/**
+ * @brief Table with functions required from accelerator configuration library
+ */
+static dsa_desc_t functions_table[] = { { NULL, "accfg_new" },
+                                        { NULL, "accfg_device_get_first" },
+                                        { NULL, "accfg_device_get_devname" },
+                                        { NULL, "accfg_device_get_next" },
+                                        { NULL, "accfg_wq_get_first" },
+                                        { NULL, "accfg_wq_get_next" },
+                                        { NULL, "accfg_wq_get_state" },
+                                        { NULL, "accfg_wq_get_mode" },
+                                        { NULL, "accfg_device_get_version" },
+                                        { NULL, "accfg_wq_get_devname" },
+                                        { NULL, "accfg_device_get_state" },
+                                        { NULL, "accfg_unref" },
+                                        { NULL, "accfg_device_get_gen_cap" },
+                                        { NULL, "accfg_device_get_numa_node" },
+                                        { NULL, "accfg_wq_get_priority" },
+                                        { NULL, "accfg_group_get_first" },
+                                        { NULL, "accfg_group_get_next" },
+                                        { NULL, "accfg_group_get_traffic_class_a" },
+                                        { NULL, "accfg_group_get_traffic_class_b" },
+                                        { NULL, "accfg_wq_get_group" },
+                                        { NULL, "accfg_wq_get_group_id" },
+                                        { NULL, "accfg_group_get_id" },
+                                        { NULL, "accfg_wq_get_user_dev_path" },
+                                        // Terminate list/init
+                                        { NULL, NULL } };
+
+static inline dsahw_status_t own_load_accelerator_configuration_driver(void **driver_instance_pptr);
+
+static inline bool own_load_configuration_functions(void *driver_instance_ptr);
+
+#endif
+
+dsahw_status_t DML_HW_API(initialize_accelerator_driver)(hw_driver_t *driver_ptr)
+{
+#if defined(linux)
+    // Variables
+    driver_ptr->driver_instance_ptr = NULL;
+
+    // Load DLL
+    dsahw_status_t status = own_load_accelerator_configuration_driver(&driver_ptr->driver_instance_ptr);
+
+    // If DLL is loaded successfully
+    if (DML_STATUS_OK != status || !driver_ptr->driver_instance_ptr || !own_load_configuration_functions(driver_ptr->driver_instance_ptr))
+    {
+        // Free DLL
+        if (driver_ptr->driver_instance_ptr)
+        {
+            dlclose(driver_ptr->driver_instance_ptr);
+        }
+
+        driver_ptr->driver_instance_ptr = NULL;
+    }
+
+    return status;
+#else
+    return DML_STATUS_LIBACCEL_NOT_FOUND;
+#endif
+}
+
+void DML_HW_API(finalize_accelerator_driver)(hw_driver_t *driver_ptr)
+{
+#if defined(linux)
+    if (driver_ptr->driver_instance_ptr)
+    {
+        dlclose(driver_ptr->driver_instance_ptr);
+    }
+
+    driver_ptr->driver_instance_ptr = NULL;
+#endif
+}
+
+int32_t DML_HW_API(driver_new_context)(struct accfg_ctx **ctx)
+{
+#if defined(linux)
+    return ((accfg_new_ptr)functions_table[0].function)(ctx);
+#else
+    return DML_STATUS_LIBACCEL_NOT_FOUND;
+#endif
+}
+
+struct accfg_device *DML_HW_API(context_get_first_device)(struct accfg_ctx *ctx)
+{
+#if defined(linux)
+    return ((accfg_device_get_first_ptr)functions_table[1].function)(ctx);
+#else
+    return NULL;
+#endif
+}
+
+const char *DML_HW_API(device_get_name)(struct accfg_device *device)
+{
+#if defined(linux)
+    return ((accfg_device_get_devname_ptr)functions_table[2].function)(device);
+#else
+    return NULL;
+#endif
+}
+
+struct accfg_device *DML_HW_API(device_get_next)(struct accfg_device *device)
+{
+#if defined(linux)
+    return ((accfg_device_get_next_ptr)functions_table[3].function)(device);
+#else
+    return NULL;
+#endif
+}
+
+struct accfg_wq *DML_HW_API(get_first_work_queue)(struct accfg_device *device)
+{
+#if defined(linux)
+    return ((accfg_wq_get_first_ptr)functions_table[4].function)(device);
+#else
+    return NULL;
+#endif
+}
+
+struct accfg_wq *DML_HW_API(work_queue_get_next)(struct accfg_wq *wq)
+{
+#if defined(linux)
+    return ((accfg_wq_get_next_ptr)functions_table[5].function)(wq);
+#else
+    return NULL;
+#endif
+}
+
+enum accfg_wq_state DML_HW_API(work_queue_get_state)(struct accfg_wq *wq)
+{
+#if defined(linux)
+    return ((accfg_wq_get_state_ptr)functions_table[6].function)(wq);
+#else
+    return -1;
+#endif
+}
+
+enum accfg_wq_mode DML_HW_API(work_queue_get_mode)(struct accfg_wq *wq)
+{
+#if defined(linux)
+    return ((accfg_wq_get_mode_ptr)functions_table[7].function)(wq);
+#else
+    return 2;
+#endif
+}
+
+uint32_t DML_HW_API(device_get_version)(struct accfg_device *device)
+{
+#if defined(linux)
+    return ((accfg_device_get_version_ptr) functions_table[8].function)(device);
+#else
+    return -1;
+#endif
+}
+
+const char * DML_HW_API(work_queue_get_device_name)(struct accfg_wq *wq)
+{
+#if defined(linux)
+    return ((accfg_wq_get_devname_ptr) functions_table[9].function)(wq);
+#else
+    return NULL;
+#endif
+}
+
+enum accfg_device_state DML_HW_API(device_get_state)(struct accfg_device *device)
+{
+#if defined(linux)
+    return ((accfg_device_get_state_ptr)functions_table[10].function)(device);
+#else
+    return -1;
+#endif
+}
+
+struct accfg_ctx *DML_HW_API(context_close)(struct accfg_ctx *ctx)
+{
+#if defined(linux)
+    return ((accfg_unref_ptr)functions_table[11].function)(ctx);
+#else
+    return NULL;
+#endif
+}
+
+uint64_t DML_HW_API(device_get_gen_cap_register)(struct accfg_device *device)
+{
+#if defined(linux)
+    return ((accfg_device_get_gen_cap_ptr)functions_table[12].function)(device);
+#else
+    return 0;
+#endif
+}
+
+uint64_t DML_HW_API(device_get_numa_node)(struct accfg_device *device)
+{
+#if defined(linux)
+    return ((accfg_device_get_gen_cap_ptr)functions_table[13].function)(device);
+#else
+    return -1;
+#endif
+}
+
+int32_t DML_HW_API(work_queue_get_priority)(struct accfg_wq *wq)
+{
+#if defined(linux)
+    return ((accfg_wq_get_priority_ptr) functions_table[14].function)(wq);
+#else
+    return -1;
+#endif
+}
+
+struct accfg_group *DML_HW_API(group_get_first)(struct accfg_device *device)
+{
+#if defined(linux)
+    return ((accfg_group_get_first_ptr)functions_table[15].function)(device);
+#else
+    return NULL;
+#endif
+}
+
+struct accfg_group *DML_HW_API(group_get_next)(struct accfg_group *group)
+{
+#if defined(linux)
+    return ((accfg_group_get_next_ptr)functions_table[16].function)(group);
+#else
+    return NULL;
+#endif
+}
+
+int DML_HW_API(group_get_traffic_class_a)(struct accfg_group *group)
+{
+#if defined(linux)
+    return ((accfg_group_get_traffic_class_ptr)functions_table[17].function)(group);
+#else
+    return 0;
+#endif
+}
+
+int DML_HW_API(group_get_traffic_class_b)(struct accfg_group *group)
+{
+#if defined(linux)
+    return ((accfg_group_get_traffic_class_ptr)functions_table[18].function)(group);
+#else
+    return 0;
+#endif
+}
+
+struct accfg_group *DML_HW_API(work_queue_get_group)(struct accfg_wq *wq)
+{
+#if defined(linux)
+    return ((accfg_wq_get_group_ptr)functions_table[19].function)(wq);
+#else
+    return NULL;
+#endif
+}
+
+int DML_HW_API(work_queue_get_group_id)(struct accfg_wq *wq)
+{
+#if defined(linux)
+    return ((accfg_wq_get_group_id_ptr)functions_table[20].function)(wq);
+#else
+    return -1;
+#endif
+}
+
+int DML_HW_API(group_get_id)(struct accfg_group *group)
+{
+#if defined(linux)
+    return ((accfg_group_get_id_ptr)functions_table[21].function)(group);
+#else
+    return -1;
+#endif
+}
+
+int DML_HW_API(work_queue_get_device_path)(struct accfg_wq *wq, char *buf, size_t size)
+{
+#if defined(linux)
+    return ((accfg_wq_get_user_dev_path_ptr)functions_table[22].function)(wq, buf, size);
+#else
+    return -1;
+#endif
+}
+
+#if defined(linux)
+
+/* ------ Internal functions implementation ------ */
+
+bool own_load_configuration_functions(void *driver_instance_ptr)
+{
+    uint32_t i = 0u;
+
+    // Clear error log
+    (void)dlerror();
+    while (functions_table[i].function_name)
+    {
+        functions_table[i].function = (library_function)dlsym(driver_instance_ptr, functions_table[i].function_name);
+
+        char *err_message = dlerror();
+
+        if (err_message || !functions_table[i].function)
+        {
+            return false;
+        }
+
+        i++;
+    }
+
+    return true;
+}
+
+dsahw_status_t own_load_accelerator_configuration_driver(void **driver_instance_pptr)
+{
+    DIAG("loading driver: %s\n", accelerator_configuration_driver_name);
+    // Try to load the user interface library for IAX/DSA kernel driver
+    void *driver_instance_ptr = dlopen(accelerator_configuration_driver_name, RTLD_LAZY);
+
+    if (!driver_instance_ptr)
+    {
+        // This is needed for error handle. We need to call dlerror
+        // for emptying error message. Otherwise we will receive error
+        // message during loading symbols from another library
+        dlerror();
+
+        return DML_STATUS_LIBACCEL_NOT_FOUND;
+    }
+
+    *driver_instance_pptr = driver_instance_ptr;
+
+    return DML_STATUS_OK;
+}
+
+#endif
diff --git a/sources/core/src/hw_dispatcher/hw_device.cpp b/sources/core/src/hw_dispatcher/hw_device.cpp
new file mode 100644
index 0000000..f503e64
--- /dev/null
+++ b/sources/core/src/hw_dispatcher/hw_device.cpp
@@ -0,0 +1,256 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#ifdef DML_HW
+
+#include "hw_device.hpp"
+
+#include <algorithm>
+
+#include "legacy_headers/hardware_configuration_driver.h"
+#include "legacy_headers/own_dsa_accel_constants.h"
+
+static inline bool own_search_device_name(const char *src_ptr, const uint32_t name, const uint32_t name_size) noexcept
+{
+    const uint8_t null_terminator = '\0';
+
+    for (size_t symbol_idx = 0u; null_terminator != src_ptr[symbol_idx + name_size]; symbol_idx++)
+    {
+        const auto *candidate_ptr = reinterpret_cast<const uint32_t *>(src_ptr + symbol_idx);
+
+        // Convert the first 3 bytes to lower case and make the 4th 0xff
+        if (name == (*candidate_ptr | CHAR_MSK))
+        {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+namespace dml::core::dispatcher
+{
+
+    void hw_device::fill_hw_context(dsahw_context_t *const hw_context_ptr) const noexcept
+    {
+        // Restore device properties
+        hw_context_ptr->gen_cap.block_on_fault_support       = hw_device::block_on_fault_support();
+        hw_context_ptr->gen_cap.overlapping_copy_support     = hw_device::overlapping_copy_support();
+        hw_context_ptr->gen_cap.memory_cache_control_support = hw_device::memory_cache_control_support();
+        hw_context_ptr->gen_cap.flush_cache_control_support  = hw_device::flush_cache_control_support();
+        hw_context_ptr->gen_cap.destination_readback_support = hw_device::destination_readback_support();
+        hw_context_ptr->gen_cap.descriptor_readback_support  = hw_device::descriptor_readback_support();
+        hw_context_ptr->gen_cap.max_transfer_size            = hw_device::max_transfer_size();
+        hw_context_ptr->gen_cap.max_batch_size               = hw_device::max_batch_size();
+        hw_context_ptr->gen_cap.message_size                 = hw_device::message_size();
+        hw_context_ptr->gen_cap.configuration_support        = hw_device::configuration_support();
+    }
+
+    auto hw_device::enqueue_descriptor(const dsahw_descriptor_t *desc_ptr) const noexcept -> dsahw_status_t
+    {
+        const auto n_queues = std::distance(this->begin(), this->end());
+
+        // Initially set to "end" index
+        static thread_local auto last_wq_idx = std::atomic(n_queues);
+
+        // Loop FROM the queue after the one used for last submit
+        for (auto idx = last_wq_idx.load() + 1; idx < n_queues; ++idx)
+        {
+            auto &queue  = *(this->begin() + idx);
+            auto  status = queue.enqueue_descriptor(desc_ptr);
+
+            if (DML_STATUS_OK == status)
+            {
+                last_wq_idx = idx;
+                return DML_STATUS_OK;
+            }
+        }
+
+        // If the loop before didn't submit descriptor, then loop UNTIL the queue that was used for last submit
+        for (auto idx = 0; idx <= last_wq_idx; ++idx)
+        {
+            auto &queue  = *(this->begin() + idx);
+            auto  status = queue.enqueue_descriptor(desc_ptr);
+
+            if (DML_STATUS_OK == status)
+            {
+                last_wq_idx = idx;
+                return DML_STATUS_OK;
+            }
+        }
+
+        return DML_STATUS_WORK_QUEUE_OVERFLOW_ERROR;
+    }
+
+    auto hw_device::block_on_fault_support() const noexcept -> uint8_t
+    {
+        return GC_BLOCK_ON_FAULT(gen_cap_register_);
+    }
+
+    auto hw_device::overlapping_copy_support() const noexcept -> uint8_t
+    {
+        return GC_OVERLAPPING(gen_cap_register_);
+    }
+
+    auto hw_device::memory_cache_control_support() const noexcept -> uint8_t
+    {
+        return GC_CACHE_WRITE(gen_cap_register_);
+    }
+
+    auto hw_device::flush_cache_control_support() const noexcept -> uint8_t
+    {
+        return GC_CACHE_FLUSH(gen_cap_register_);
+    }
+
+    auto hw_device::destination_readback_support() const noexcept -> uint8_t
+    {
+        return GC_DST_READBACK(gen_cap_register_);
+    }
+
+    auto hw_device::descriptor_readback_support() const noexcept -> uint8_t
+    {
+        return GC_DRAIN_READBACK(gen_cap_register_);
+    }
+
+    auto hw_device::max_transfer_size() const noexcept -> uint32_t
+    {
+        return GC_MAX_TRANSFER_SIZE(gen_cap_register_);
+    }
+
+    auto hw_device::max_batch_size() const noexcept -> uint32_t
+    {
+        return GC_MAX_BATCH_SIZE(gen_cap_register_);
+    }
+
+    auto hw_device::message_size() const noexcept -> uint16_t
+    {
+        return GC_INTERRUPT_STORAGE(gen_cap_register_);
+    }
+
+    auto hw_device::configuration_support() const noexcept -> uint8_t
+    {
+        return GC_CONF_SUPPORT(gen_cap_register_);
+    }
+
+    auto hw_device::initialize_new_device(descriptor_t *device_descriptor_ptr) noexcept -> dsahw_status_t
+    {
+#if defined(linux)
+        // Device initialization stage
+        auto       *device_ptr    = reinterpret_cast<accfg_device *>(device_descriptor_ptr);
+        const auto *name_ptr      = dsa_device_get_name(device_ptr);
+        const bool  is_dsa_device = own_search_device_name(name_ptr, DSA_DEVICE_ID, DEVICE_NAME_LENGTH);
+
+        version_major_ = dsa_device_get_version(device_ptr)>>8u;
+        version_minor_ = dsa_device_get_version(device_ptr)&0xFF;
+
+        DIAG("%5s: ", name_ptr);
+        if (!is_dsa_device || version_major_ != 1)
+        {
+            DIAGA("UNSUPPORTED\n");
+            return DML_STATUS_WORK_QUEUES_NOT_AVAILABLE;
+        }
+
+        if (ACCFG_DEVICE_ENABLED != dsa_device_get_state(device_ptr)) {
+            DIAGA("DISABLED\n");
+            return DML_STATUS_WORK_QUEUES_NOT_AVAILABLE;
+        }
+        DIAGA("\n");
+
+        gen_cap_register_ = dsa_device_get_gen_cap_register(device_ptr);
+        numa_node_id_     = dsa_device_get_numa_node(device_ptr);
+
+        DIAG("%5s: version: %d.%d\n", name_ptr, version_major_, version_minor_);
+        DIAG("%5s: numa:    %lu\n", name_ptr, numa_node_id_);
+        DIAG("%5s: GENCAP: 0x%016lX\n", name_ptr, gen_cap_register_);
+        DIAG("%5s: GENCAP: block on fault support:                      %d\n",          name_ptr, block_on_fault_support());
+        DIAG("%5s: GENCAP: overlapping copy support:                    %d\n",          name_ptr, overlapping_copy_support());
+        DIAG("%5s: GENCAP: cache control support (memory):              %d\n",          name_ptr, memory_cache_control_support());
+        DIAG("%5s: GENCAP: cache control support (cache flush):         %d\n",          name_ptr, flush_cache_control_support());
+        DIAG("%5s: GENCAP: maximum supported transfer size:             %u\n",          name_ptr, max_transfer_size());
+        DIAG("%5s: GENCAP: maximum supported batch size:                %u\n",          name_ptr, max_batch_size());
+
+        // Working queues initialization stage
+        auto *wq_ptr = dsa_get_first_work_queue(device_ptr);
+        auto  wq_it  = working_queues_.begin();
+
+        DIAG("%5s: getting device WQs\n", name_ptr);
+        while (nullptr != wq_ptr)
+        {
+            if (DML_STATUS_OK == wq_it->initialize_new_queue(wq_ptr))
+            {
+                wq_it++;
+
+                std::push_heap(working_queues_.begin(),
+                               wq_it,
+                               [](const hw_queue &a, const hw_queue &b) -> bool
+                               {
+                                   return a.priority() < b.priority();
+                               });
+            }
+
+            wq_ptr = dsa_work_queue_get_next(wq_ptr);
+        }
+
+        // Check number of working queues
+        queue_count_ = std::distance(working_queues_.begin(), wq_it);
+
+        if (queue_count_ > 1)
+        {
+            auto begin = working_queues_.begin();
+            auto end   = begin + queue_count_;
+
+            std::sort_heap(begin,
+                           end,
+                           [](const hw_queue &a, const hw_queue &b) -> bool
+                           {
+                               return a.priority() < b.priority();
+                           });
+        }
+
+        if (queue_count_ == 0)
+        {
+            return DML_STATUS_WORK_QUEUES_NOT_AVAILABLE;
+        }
+
+        return DML_STATUS_OK;
+#else
+        return DML_STATUS_WORK_QUEUES_NOT_AVAILABLE;
+#endif
+    }
+
+    auto hw_device::size() const noexcept -> size_t
+    {
+        return queue_count_;
+    }
+
+    auto hw_device::numa_id() const noexcept -> uint64_t
+    {
+        return numa_node_id_;
+    }
+
+    auto hw_device::begin() const noexcept -> queues_container_t::const_iterator
+    {
+        return working_queues_.cbegin();
+    }
+
+    auto hw_device::end() const noexcept -> queues_container_t::const_iterator
+    {
+        return working_queues_.cbegin() + queue_count_;
+    }
+
+}  // namespace dml::core::dispatcher
+
+#endif
diff --git a/sources/core/src/hw_dispatcher/hw_device.hpp b/sources/core/src/hw_dispatcher/hw_device.hpp
new file mode 100644
index 0000000..81722d3
--- /dev/null
+++ b/sources/core/src/hw_dispatcher/hw_device.hpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#ifndef DML_MIDDLE_LAYER_DISPATCHER_HW_DEVICE_HPP_
+#define DML_MIDDLE_LAYER_DISPATCHER_HW_DEVICE_HPP_
+
+#include <array>
+
+#include "dml/dmldefs.h"
+#include "hw_queue.hpp"
+
+#ifdef DML_HW
+#include "legacy_headers/hardware_definitions.h"
+#include "legacy_headers/own_dsa_accel_constants.h"
+
+namespace dml::core::dispatcher
+{
+
+    class hw_device final
+    {
+        static constexpr uint32_t max_working_queues = MAX_WORK_QUEUE_COUNT;
+
+        using queues_container_t = std::array<hw_queue, max_working_queues>;
+
+    public:
+        using descriptor_t = void;
+
+        hw_device() noexcept = default;
+
+        void fill_hw_context(dsahw_context_t *hw_context_ptr) const noexcept;
+
+        [[nodiscard]] auto enqueue_descriptor(const dsahw_descriptor_t *desc_ptr) const noexcept -> dsahw_status_t;
+
+        [[nodiscard]] auto initialize_new_device(descriptor_t *device_descriptor_ptr) noexcept -> dsahw_status_t;
+
+        [[nodiscard]] auto size() const noexcept -> size_t;
+
+        [[nodiscard]] auto numa_id() const noexcept -> uint64_t;
+
+        [[nodiscard]] auto begin() const noexcept -> queues_container_t::const_iterator;
+
+        [[nodiscard]] auto end() const noexcept -> queues_container_t::const_iterator;
+
+    protected:
+        auto block_on_fault_support() const noexcept -> uint8_t;
+
+        auto overlapping_copy_support() const noexcept -> uint8_t;
+
+        auto memory_cache_control_support() const noexcept -> uint8_t;
+
+        auto flush_cache_control_support() const noexcept -> uint8_t;
+
+        auto destination_readback_support() const noexcept -> uint8_t;
+
+        auto descriptor_readback_support() const noexcept -> uint8_t;
+
+        auto max_transfer_size() const noexcept -> uint32_t;
+
+        auto max_batch_size() const noexcept -> uint32_t;
+
+        auto message_size() const noexcept -> uint16_t;
+
+        auto configuration_support() const noexcept -> uint8_t;
+
+    private:
+        queues_container_t working_queues_   = {}; /**< Set of available HW working queues */
+        uint32_t           queue_count_      = 0u; /**< Number of working queues that are available */
+        uint64_t           gen_cap_register_ = 0u; /**< GENCAP register content */
+        uint64_t           numa_node_id_     = 0u; /**< NUMA node id of the device */
+        uint32_t           version_major_    = 0u; /**< Major version of discovered device */
+        uint32_t           version_minor_    = 0u; /**< Minor version of discovered device */
+    };
+
+}  // namespace dml::core::dispatcher
+
+#endif
+#endif  //DML_MIDDLE_LAYER_DISPATCHER_HW_DEVICE_HPP_
diff --git a/sources/core/src/hw_dispatcher/hw_dispatcher.cpp b/sources/core/src/hw_dispatcher/hw_dispatcher.cpp
new file mode 100644
index 0000000..5b2e521
--- /dev/null
+++ b/sources/core/src/hw_dispatcher/hw_dispatcher.cpp
@@ -0,0 +1,165 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include "hw_dispatcher.hpp"
+
+#if defined(DML_HW) && defined(linux)
+
+#include "legacy_headers/libaccel_config.h"
+
+#endif
+
+// TODO should be removed at all
+#define DML_HWSTS_RET(expr, err_code) \
+    {                                 \
+        if (expr)                     \
+        {                             \
+            return (err_code);        \
+        }                             \
+    }
+
+namespace dml::core::dispatcher
+{
+    static hw_dispatcher instance{};
+
+    hw_dispatcher::hw_dispatcher() noexcept
+    {
+#ifdef DML_HW
+        hw_init_status_ = hw_dispatcher::initialize_hw();
+        hw_support_     = hw_init_status_ == DML_STATUS_OK;
+#else
+        hw_support_ = false;
+#endif
+    }
+
+#ifdef DML_HW
+
+    auto hw_dispatcher::initialize_hw() noexcept -> dsahw_status_t
+    {
+        accfg_ctx *ctx_ptr = nullptr;
+
+        DIAG("DML version %s\n", "TODO");
+        DIAG("Struct size: %lu B\n", sizeof(device_container_t));
+
+        dsahw_status_t status = dsa_initialize_accelerator_driver(&hw_driver_);
+        DML_HWSTS_RET(status != DML_STATUS_OK, status);
+
+        DIAG("creating context\n");
+        int32_t context_creation_status = dsa_driver_new_context(&ctx_ptr);
+        DML_HWSTS_RET(0u != context_creation_status, DML_STATUS_LIBACCEL_ERROR);
+
+        // Retrieve first device in the system given the passed in context
+        DIAG("enumerating devices\n");
+        auto *dev_tmp_ptr = dsa_context_get_first_device(ctx_ptr);
+        auto  device_it   = devices_.begin();
+
+        while (nullptr != dev_tmp_ptr)
+        {
+            if (DML_STATUS_OK == device_it->initialize_new_device(dev_tmp_ptr))
+            {
+                device_it++;
+            }
+
+            // Retrieve the "next" device in the system based on given device
+            dev_tmp_ptr = dsa_device_get_next(dev_tmp_ptr);
+        }
+
+        device_count_ = std::distance(devices_.begin(), device_it);
+
+        if (device_count_ <= 0)
+        {
+            return DML_STATUS_WORK_QUEUES_NOT_AVAILABLE;
+        }
+
+        hw_context_.set_driver_context_ptr(ctx_ptr);
+
+        return DML_STATUS_OK;
+    }
+#endif
+
+    hw_dispatcher::~hw_dispatcher() noexcept
+    {
+#ifdef DML_HW
+        // Variables
+        auto *context_ptr = hw_context_.get_driver_context_ptr();
+
+        if (context_ptr != nullptr)
+        {
+            dsa_context_close(context_ptr);
+        }
+
+        dsa_finalize_accelerator_driver(&hw_driver_);
+
+        // Zeroing values
+        hw_context_.set_driver_context_ptr(nullptr);
+#endif
+    }
+
+    auto hw_dispatcher::get_instance() noexcept -> hw_dispatcher &
+    {
+        return instance;
+    }
+
+    auto hw_dispatcher::is_hw_support() const noexcept -> bool
+    {
+        return hw_support_;
+    }
+
+#ifdef DML_HW
+
+    void hw_dispatcher::fill_hw_context(dsahw_context_t *const hw_context_ptr) noexcept
+    {
+#if defined(linux)
+        // Restore context
+        hw_context_ptr->dsa_context_ptr = hw_context_.get_driver_context_ptr();
+
+        // Restore device properties
+        // We take the first one as all configurations across the platform should be the same for all devices
+        devices_[0].fill_hw_context(hw_context_ptr);
+#endif
+    }
+
+    auto hw_dispatcher::get_hw_init_status() const noexcept -> dsahw_status_t
+    {
+        return hw_init_status_;
+    }
+
+#ifdef DML_HW
+
+    auto hw_dispatcher::begin() const noexcept -> device_container_t::const_iterator
+    {
+        return devices_.cbegin();
+    }
+
+    auto hw_dispatcher::end() const noexcept -> device_container_t::const_iterator
+    {
+        return devices_.cbegin() + device_count_;
+    }
+
+    void hw_dispatcher::hw_context::set_driver_context_ptr(accfg_ctx *driver_context_ptr) noexcept
+    {
+        driver_context_ptr_ = driver_context_ptr;
+    }
+
+    [[nodiscard]] auto hw_dispatcher::hw_context::get_driver_context_ptr() noexcept -> accfg_ctx *
+    {
+        return driver_context_ptr_;
+    }
+
+#endif
+
+#endif
+}  // namespace dml::core::dispatcher
diff --git a/sources/core/src/hw_dispatcher/hw_dispatcher.hpp b/sources/core/src/hw_dispatcher/hw_dispatcher.hpp
new file mode 100644
index 0000000..4dd611b
--- /dev/null
+++ b/sources/core/src/hw_dispatcher/hw_dispatcher.hpp
@@ -0,0 +1,96 @@
+/*
+ *
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#ifndef DML_MIDDLE_LAYER_DISPATCHER_HW_DISPATCHER_HPP_
+#define DML_MIDDLE_LAYER_DISPATCHER_HW_DISPATCHER_HPP_
+
+#include <array>
+#include <cstdint>
+
+#include "dml/dmldefs.h"
+#include "hw_device.hpp"
+
+#ifdef DML_HW
+#include "legacy_headers/hardware_configuration_driver.h"
+#include "legacy_headers/hardware_definitions.h"
+#include "legacy_headers/own_dsa_accel_constants.h"
+#endif
+
+namespace dml::core::dispatcher
+{
+
+    class hw_dispatcher final
+    {
+#ifdef DML_HW
+
+        static constexpr uint32_t max_devices = MAX_DEVICE_COUNT;
+
+        using device_container_t = std::array<hw_device, max_devices>;
+
+        class hw_context final
+        {
+        public:
+            void set_driver_context_ptr(accfg_ctx *driver_context_ptr) noexcept;
+
+            [[nodiscard]] auto get_driver_context_ptr() noexcept -> accfg_ctx *;
+
+        private:
+            accfg_ctx *driver_context_ptr_ = nullptr; /**< DSA driver context */
+        };
+
+#endif
+
+    public:
+        static auto get_instance() noexcept -> hw_dispatcher &;
+
+        [[nodiscard]] auto is_hw_support() const noexcept -> bool;
+
+#ifdef DML_HW
+
+        [[nodiscard]] auto get_hw_init_status() const noexcept -> dsahw_status_t;
+
+        void fill_hw_context(dsahw_context_t *hw_context_ptr) noexcept;
+
+        [[nodiscard]] auto begin() const noexcept -> device_container_t::const_iterator;
+
+        [[nodiscard]] auto end() const noexcept -> device_container_t::const_iterator;
+
+#endif
+
+        ~hw_dispatcher() noexcept;
+
+        hw_dispatcher() noexcept;
+
+    protected:
+#ifdef DML_HW
+        auto initialize_hw() noexcept -> dsahw_status_t;
+
+    private:
+        hw_context         hw_context_;
+        hw_driver_t        hw_driver_{};
+        device_container_t devices_{};
+        size_t             device_count_ = 0;
+#endif
+
+        bool hw_support_;
+#ifdef DML_HW
+        dsahw_status_t hw_init_status_;
+#endif
+    };
+
+}  // namespace dml::core::dispatcher
+#endif  //DML_MIDDLE_LAYER_DISPATCHER_HW_DISPATCHER_HPP_
diff --git a/sources/core/src/hw_dispatcher/hw_queue.cpp b/sources/core/src/hw_dispatcher/hw_queue.cpp
new file mode 100644
index 0000000..caa960e
--- /dev/null
+++ b/sources/core/src/hw_dispatcher/hw_queue.cpp
@@ -0,0 +1,201 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#ifdef DML_HW
+
+#include <fcntl.h>
+
+#if defined(linux)
+
+#include <sys/mman.h>
+
+#endif
+
+#include "hw_queue.hpp"
+#include "legacy_headers/hardware_configuration_driver.h"
+#include "legacy_headers/own_dsa_accel_constants.h"
+
+#define DML_HWSTS_RET(expr, err_code) \
+    {                                 \
+        if (expr)                     \
+        {                             \
+            return (err_code);        \
+        }                             \
+    }
+
+namespace dml::core::dispatcher
+{
+    hw_queue::hw_queue(hw_queue &&other) noexcept
+    {
+        version_       = other.version_;
+        priority_      = other.priority_;
+        portal_mask_   = other.portal_mask_;
+        portal_ptr_    = other.portal_ptr_;
+        portal_offset_ = 0;
+
+        other.portal_ptr_ = nullptr;
+    }
+
+    auto hw_queue::operator=(hw_queue &&other) noexcept -> hw_queue &
+    {
+        version_       = other.version_;
+        priority_      = other.priority_;
+        portal_mask_   = other.portal_mask_;
+        portal_ptr_    = other.portal_ptr_;
+        portal_offset_ = 0;
+
+        other.portal_ptr_ = nullptr;
+
+        return *this;
+    }
+
+    hw_queue::~hw_queue()
+    {
+#if defined(linux)
+        // Freeing resources
+        if (portal_ptr_ != nullptr)
+        {
+            munmap(portal_ptr_, 0x1000u);
+
+            portal_ptr_ = nullptr;
+        }
+#endif
+    }
+
+    void hw_queue::set_portal_ptr(void *value_ptr) noexcept
+    {
+        portal_offset_ = reinterpret_cast<uintptr_t>(value_ptr) & OWN_PAGE_MASK;
+        portal_mask_   = reinterpret_cast<uintptr_t>(value_ptr) & (~OWN_PAGE_MASK);
+        portal_ptr_    = value_ptr;
+    }
+
+    auto hw_queue::get_portal_ptr() const noexcept -> void *
+    {
+        uint64_t offset = portal_offset_++;
+        offset          = (offset << 6) & OWN_PAGE_MASK;
+        return reinterpret_cast<void *>(offset | portal_mask_);
+    }
+
+    auto hw_queue::enqueue_descriptor(const dsahw_descriptor_t *desc_ptr) const noexcept -> dsahw_status_t
+    {
+#if defined(linux)
+        uint8_t retry = 0u;
+
+        void *current_place_ptr = get_portal_ptr();
+        asm volatile("sfence\t\n"
+                     ".byte 0xf2, 0x0f, 0x38, 0xf8, 0x02\t\n"
+                     "setz %0\t\n"
+                     : "=r"(retry)
+                     : "a"(current_place_ptr), "d"(desc_ptr));
+
+        return static_cast<dsahw_status_t>(retry);
+#else
+        return DML_STATUS_WORK_QUEUES_NOT_AVAILABLE;
+#endif
+    }
+
+    auto hw_queue::initialize_new_queue(void *wq_descriptor_ptr) noexcept -> dsahw_status_t
+    {
+#if defined(linux)
+        auto *work_queue_ptr        = reinterpret_cast<accfg_wq *>(wq_descriptor_ptr);
+        char path[64];
+#ifdef LOG_HW_INIT
+        auto work_queue_dev_name    = dsa_work_queue_get_device_name(work_queue_ptr);
+#endif
+
+        if (ACCFG_WQ_ENABLED != dsa_work_queue_get_state(work_queue_ptr))
+        {
+            DIAG("     %7s: DISABLED\n", work_queue_dev_name);
+            return DML_STATUS_WORK_QUEUES_NOT_AVAILABLE;
+        }
+
+        if (ACCFG_WQ_SHARED != dsa_work_queue_get_mode(work_queue_ptr))
+        {
+            DIAG("     %7s: UNSUPPORTED\n", work_queue_dev_name);
+            return DML_STATUS_WORK_QUEUES_NOT_AVAILABLE;
+        }
+
+        DIAG("     %7s:\n", work_queue_dev_name);
+        auto status = dsa_work_queue_get_device_path(work_queue_ptr, path, 64 - 1);
+
+        DML_HWSTS_RET((0 > status), DML_STATUS_LIBACCEL_ERROR);
+
+        DIAG("     %7s: opening descriptor %s", work_queue_dev_name, path);
+        auto fd = open(path, O_RDWR);
+        if(0 >= fd)
+        {
+            DIAGA(", access denied\n");
+            return DML_STATUS_LIBACCEL_ERROR;
+        }
+
+        // Map portal for enqcmd
+        auto *region_ptr = mmap(nullptr, 0x1000u, PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, 0u);
+        close(fd);
+        if(MAP_FAILED == region_ptr)
+        {
+            DIAGA(", limited MSI-X mapping failed\n");
+            return DML_STATUS_LIBACCEL_ERROR;
+        }
+        DIAGA("\n");
+
+        auto *group_ptr = dsa_work_queue_get_group(work_queue_ptr);
+        if (group_ptr == nullptr) {
+            return DML_STATUS_LIBACCEL_ERROR;
+        }
+
+        priority_    = dsa_work_queue_get_priority(work_queue_ptr);
+        memory_type_ = dsa_group_get_traffic_class_b(group_ptr) ? supported_memory_type::durable
+                                                                : supported_memory_type::non_durable;
+
+#if 0
+    DIAG("     %7s: size:        %d\n", work_queue_dev_name, accfg_wq_get_size(work_queue_ptr));
+    DIAG("     %7s: threshold:   %d\n", work_queue_dev_name, accfg_wq_get_threshold(work_queue_ptr));
+    DIAG("     %7s: priority:    %d\n", work_queue_dev_name, priority_);
+    DIAG("     %7s: group:       %d\n", work_queue_dev_name, group_id);
+    DIAG("     %7s: memtype:     %d\n", work_queue_dev_name, memory_type_);
+
+    for(struct accfg_engine *engine = accfg_engine_get_first(device_ptr);
+            engine != NULL; engine = accfg_engine_get_next(engine))
+    {
+        if(accfg_engine_get_group_id(engine) == group_id)
+            DIAG("            %s\n", accfg_engine_get_devname(engine));
+    }
+#else
+        DIAG("     %7s: priority:    %d\n", work_queue_dev_name, priority_);
+        DIAG("     %7s: memtype:     %d\n", work_queue_dev_name, static_cast<int>(memory_type_));
+#endif
+
+        hw_queue::set_portal_ptr(region_ptr);
+
+        return DML_STATUS_OK;
+#else
+        return DML_STATUS_WORK_QUEUES_NOT_AVAILABLE;
+#endif
+    }
+
+    auto hw_queue::priority() const noexcept -> int32_t
+    {
+        return priority_;
+    }
+
+    auto hw_queue::memory_type() const noexcept -> hw_queue::supported_memory_type
+    {
+        return memory_type_;
+    }
+
+}  // namespace dml::core::dispatcher
+
+#endif
diff --git a/sources/core/src/hw_dispatcher/hw_queue.hpp b/sources/core/src/hw_dispatcher/hw_queue.hpp
new file mode 100644
index 0000000..5a5dd02
--- /dev/null
+++ b/sources/core/src/hw_dispatcher/hw_queue.hpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#ifndef DML_MIDDLE_LAYER_DISPATCHER_HW_QUEUE_HPP_
+#define DML_MIDDLE_LAYER_DISPATCHER_HW_QUEUE_HPP_
+
+#include <atomic>
+
+#include "dml/dmldefs.h"
+
+#ifdef DML_HW
+
+#include "legacy_headers/hardware_definitions.h"
+
+namespace dml::core::dispatcher
+{
+
+    class hw_queue
+    {
+    public:
+        enum class supported_memory_type
+        {
+            durable,
+            non_durable
+        };
+
+        using descriptor_t = void;
+
+        hw_queue() noexcept = default;
+
+        hw_queue(const hw_queue &) noexcept = delete;
+
+        auto operator=(const hw_queue &other) noexcept -> hw_queue & = delete;
+
+        hw_queue(hw_queue &&other) noexcept;
+
+        auto operator=(hw_queue &&other) noexcept -> hw_queue &;
+
+        auto initialize_new_queue(descriptor_t *wq_descriptor_ptr) noexcept -> dsahw_status_t;
+
+        [[nodiscard]] auto get_portal_ptr() const noexcept -> void *;
+
+        [[nodiscard]] auto enqueue_descriptor(const dsahw_descriptor_t *desc_ptr) const noexcept -> dsahw_status_t;
+
+        [[nodiscard]] auto priority() const noexcept -> int32_t;
+
+        [[nodiscard]] auto memory_type() const noexcept -> supported_memory_type;
+
+        void set_portal_ptr(void *portal_ptr) noexcept;
+
+        ~hw_queue() noexcept;
+
+    private:
+        uint32_t                       version_       = 0u;
+        int32_t                        priority_      = 0u;
+        supported_memory_type          memory_type_   = supported_memory_type::non_durable;
+        uint64_t                       portal_mask_   = 0u; /**< Mask for incrementing portals */
+        mutable void                  *portal_ptr_    = nullptr;
+        mutable std::atomic<uintptr_t> portal_offset_ = 0u; /**< Portal for enqcmd (mod page size)*/
+    };
+
+}  // namespace dml::core::dispatcher
+#endif
+
+#endif  //DML_MIDDLE_LAYER_DISPATCHER_HW_QUEUE_HPP_
diff --git a/sources/hw-path/include/hardware_api.h b/sources/core/src/hw_dispatcher/legacy_headers/hardware_api.h
similarity index 88%
rename from sources/hw-path/include/hardware_api.h
rename to sources/core/src/hw_dispatcher/legacy_headers/hardware_api.h
index bc3c67c..9977170 100644
--- a/sources/hw-path/include/hardware_api.h
+++ b/sources/core/src/hw_dispatcher/legacy_headers/hardware_api.h
@@ -22,11 +22,10 @@
  * @{
  * @brief Contains general hardware function declarations
  */
-#include "hardware_limits.h"
+#include "hardware_completion_records_api.h"
 #include "hardware_definitions.h"
 #include "hardware_descriptors_api.h"
-#include "hardware_completion_records_api.h"
-
+#include "hardware_limits.h"
 
 #ifndef DML_OWN_HW_API_H__
 #define DML_OWN_HW_API_H__
@@ -43,7 +42,7 @@ extern "C" {
  * @return Follow statuses:
  *      - @todo Add statuses
  */
-dml_status_t DML_HW_API(get_context)(dsahw_context_t ** hw_context_ptr);
+dml_status_t DML_HW_API(get_context)(dsahw_context_t **hw_context_ptr);
 
 /**
  * @brief Calls an operation, which is implemented with DSA hardware
@@ -55,9 +54,7 @@ dml_status_t DML_HW_API(get_context)(dsahw_context_t ** hw_context_ptr);
  * @return @ref dml_status_t in according to specified DSA operation in @ref dml_job_t
  *
  */
-dml_status_t DML_HW_API(submit)(const dsahw_context_t *hw_state_ptr,
-                                const dsahw_descriptor_t *descriptor_ptr,
-                                dml_operation_flags_t flags);
+dml_status_t DML_HW_API(submit)(const dsahw_context_t *hw_state_ptr, const dsahw_descriptor_t *descriptor_ptr, dml_operation_flags_t flags);
 
 /**
  * @brief Closes connection with hardware
@@ -66,12 +63,10 @@ dml_status_t DML_HW_API(submit)(const dsahw_context_t *hw_state_ptr,
  *
  * @return The one of the follow statuses:
  *      - @ref DML_STATUS_OK;
- *      - @ref DML_STATUS_HARDWARE_DISCONNECTION_ERROR.
  *
  */
 dml_status_t DML_HW_API(finalize)(dsahw_context_t *hw_context_ptr);
 
-
 /**
  * @brief Returns value of OverlappingCopySupport from GENCAP
  *
@@ -82,11 +77,10 @@ dml_status_t DML_HW_API(finalize)(dsahw_context_t *hw_context_ptr);
  */
 int DML_HW_API(get_overlapping_copy_support)(dsahw_context_t *hw_context_ptr);
 
-
 #ifdef __cplusplus
 }
 #endif
 
-#endif //DML_OWN_HW_API_H__
+#endif  //DML_OWN_HW_API_H__
 
 /** @} */
diff --git a/sources/hw-path/include/hardware_completion_records_api.h b/sources/core/src/hw_dispatcher/legacy_headers/hardware_completion_records_api.h
similarity index 87%
rename from sources/hw-path/include/hardware_completion_records_api.h
rename to sources/core/src/hw_dispatcher/legacy_headers/hardware_completion_records_api.h
index c2447a8..3a41113 100644
--- a/sources/hw-path/include/hardware_completion_records_api.h
+++ b/sources/core/src/hw_dispatcher/legacy_headers/hardware_completion_records_api.h
@@ -24,7 +24,6 @@
  */
 #include "hardware_definitions.h"
 
-
 #ifndef DML_HW_COMPLETION_RECORDS_API_H__
 #define DML_HW_COMPLETION_RECORDS_API_H__
 
@@ -32,7 +31,6 @@
 extern "C" {
 #endif
 
-
 /**
  * @brief Extracts operation results from @ref dsahw_completion_record_t
  * and fills the @ref dml_job_t structure in accordance with the @ref DML_OP_MEM_MOVE operation
@@ -50,9 +48,8 @@ extern "C" {
  *
  */
 dsahw_status_t DML_HW_API(get_mem_move_result)(const dsahw_completion_record_t *completion_record_ptr,
-                                               dml_meta_result_t *result_ptr,
-                                               uint32_t *elements_processed_ptr);
-
+                                               dml_meta_result_t               *result_ptr,
+                                               uint32_t                        *elements_processed_ptr);
 
 /**
  * @brief Extracts operation results from @ref dsahw_completion_record_t
@@ -67,9 +64,7 @@ dsahw_status_t DML_HW_API(get_mem_move_result)(const dsahw_completion_record_t *
  *      - @ref DML_STATUS_PAGE_FAULT_ERROR;
  *
  */
-dsahw_status_t DML_HW_API(get_fill_result)(const dsahw_completion_record_t *completion_record_ptr,
-                                           uint32_t *elements_processed_ptr);
-
+dsahw_status_t DML_HW_API(get_fill_result)(const dsahw_completion_record_t *completion_record_ptr, uint32_t *elements_processed_ptr);
 
 /**
  * @brief Extracts operation results from @ref dsahw_completion_record_t
@@ -88,9 +83,8 @@ dsahw_status_t DML_HW_API(get_fill_result)(const dsahw_completion_record_t *comp
  *
  */
 dsahw_status_t DML_HW_API(get_compare_result)(const dsahw_completion_record_t *completion_record_ptr,
-                                              dml_meta_result_t *result_ptr,
-                                              uint32_t *elements_processed_ptr);
-
+                                              dml_meta_result_t               *result_ptr,
+                                              uint32_t                        *elements_processed_ptr);
 
 /**
  * @brief Extracts operation results from @ref dsahw_completion_record_t
@@ -113,11 +107,10 @@ dsahw_status_t DML_HW_API(get_compare_result)(const dsahw_completion_record_t *c
  *
  */
 dsahw_status_t DML_HW_API(get_delta_create_result)(const dsahw_completion_record_t *completion_record_ptr,
-                                                   const uint8_t *destination_ptr,
-                                                   uint32_t *delta_record_length_ptr,
-                                                   dml_meta_result_t *result_ptr,
-                                                   uint32_t *elements_processed_ptr);
-
+                                                   const uint8_t                   *destination_ptr,
+                                                   uint32_t                        *delta_record_length_ptr,
+                                                   dml_meta_result_t               *result_ptr,
+                                                   uint32_t                        *elements_processed_ptr);
 
 /**
  * @brief Extracts operation results from @ref dsahw_completion_record_t
@@ -135,9 +128,7 @@ dsahw_status_t DML_HW_API(get_delta_create_result)(const dsahw_completion_record
  *      - @ref DML_STATUS_OVERLAPPING_BUFFER_ERROR.
  *
  */
-dsahw_status_t DML_HW_API(get_delta_apply_result)(const dsahw_completion_record_t *completion_record_ptr,
-                                                  uint32_t *elements_processed_ptr);
-
+dsahw_status_t DML_HW_API(get_delta_apply_result)(const dsahw_completion_record_t *completion_record_ptr, uint32_t *elements_processed_ptr);
 
 /**
  * @brief Extracts operation results from @ref dsahw_completion_record_t
@@ -154,9 +145,7 @@ dsahw_status_t DML_HW_API(get_delta_apply_result)(const dsahw_completion_record_
  *      - @ref DML_STATUS_OVERLAPPING_BUFFER_ERROR
  *
  */
-dsahw_status_t DML_HW_API(get_dualcast_result)(const dsahw_completion_record_t *completion_record_ptr,
-                                               uint32_t *elements_processed_ptr);
-
+dsahw_status_t DML_HW_API(get_dualcast_result)(const dsahw_completion_record_t *completion_record_ptr, uint32_t *elements_processed_ptr);
 
 /**
  * @brief Extracts operation results from @ref dsahw_completion_record_t
@@ -174,9 +163,8 @@ dsahw_status_t DML_HW_API(get_dualcast_result)(const dsahw_completion_record_t *
  *
  */
 dsahw_status_t DML_HW_API(get_crc_result)(const dsahw_completion_record_t *completion_record_ptr,
-                                          uint32_t *crc_result_ptr,
-                                          uint32_t *elements_processed_ptr);
-
+                                          uint32_t                        *crc_result_ptr,
+                                          uint32_t                        *elements_processed_ptr);
 
 /**
  * @brief Extracts operation results from @ref dsahw_completion_record_t
@@ -195,9 +183,8 @@ dsahw_status_t DML_HW_API(get_crc_result)(const dsahw_completion_record_t *compl
  *
  */
 dsahw_status_t DML_HW_API(get_crc_copy_result)(const dsahw_completion_record_t *completion_record_ptr,
-                                               uint32_t *crc_result_ptr,
-                                               uint32_t *elements_processed_ptr);
-
+                                               uint32_t                        *crc_result_ptr,
+                                               uint32_t                        *elements_processed_ptr);
 
 /**
  * @brief Extracts operation results from @ref dsahw_completion_record_t
@@ -212,9 +199,7 @@ dsahw_status_t DML_HW_API(get_crc_copy_result)(const dsahw_completion_record_t *
  *      - @ref DML_STATUS_PAGE_FAULT_ERROR;
  *
  */
-dsahw_status_t DML_HW_API(get_cache_flush_result)(const dsahw_completion_record_t *completion_record_ptr,
-                                                  uint32_t *elements_processed_ptr);
-
+dsahw_status_t DML_HW_API(get_cache_flush_result)(const dsahw_completion_record_t *completion_record_ptr, uint32_t *elements_processed_ptr);
 
 /**
  * @brief Extracts operation results from @ref dsahw_completion_record_t
@@ -233,9 +218,9 @@ dsahw_status_t DML_HW_API(get_cache_flush_result)(const dsahw_completion_record_
  *
  */
 dsahw_status_t DML_HW_API(get_check_dif_result)(const dsahw_completion_record_t *completion_record_ptr,
-                                                dml_meta_result_t *result_ptr,
-                                                dml_dif_config_t *dif_config_ptr,
-                                                uint32_t *elements_processed_ptr);
+                                                dml_meta_result_t               *result_ptr,
+                                                dml_dif_config_t                *dif_config_ptr,
+                                                uint32_t                        *elements_processed_ptr);
 
 /**
  * @brief Extracts operation results from @ref dsahw_completion_record_t
@@ -254,9 +239,8 @@ dsahw_status_t DML_HW_API(get_check_dif_result)(const dsahw_completion_record_t
  *
  */
 dsahw_status_t DML_HW_API(get_insert_dif_result)(const dsahw_completion_record_t *completion_record_ptr,
-                                                 dml_dif_config_t *dif_config_ptr,
-                                                 uint32_t *elements_processed_ptr);
-
+                                                 dml_dif_config_t                *dif_config_ptr,
+                                                 uint32_t                        *elements_processed_ptr);
 
 /**
  * @brief Extracts operation results from @ref dsahw_completion_record_t
@@ -275,9 +259,8 @@ dsahw_status_t DML_HW_API(get_insert_dif_result)(const dsahw_completion_record_t
  *
  */
 dsahw_status_t DML_HW_API(get_strip_dif_result)(const dsahw_completion_record_t *completion_record_ptr,
-                                                dml_dif_config_t *dif_config_ptr,
-                                                uint32_t *elements_processed_ptr);
-
+                                                dml_dif_config_t                *dif_config_ptr,
+                                                uint32_t                        *elements_processed_ptr);
 
 /**
  * @brief Extracts operation results from @ref dsahw_completion_record_t
@@ -297,10 +280,9 @@ dsahw_status_t DML_HW_API(get_strip_dif_result)(const dsahw_completion_record_t
  *
  */
 dsahw_status_t DML_HW_API(get_update_dif_result)(const dsahw_completion_record_t *completion_record_ptr,
-                                                 dml_dif_config_t *dif_config_ptr,
-                                                 dml_meta_result_t *result_ptr,
-                                                 uint32_t *elements_processed_ptr);
-
+                                                 dml_dif_config_t                *dif_config_ptr,
+                                                 dml_meta_result_t               *result_ptr,
+                                                 uint32_t                        *elements_processed_ptr);
 
 /**
  * @brief Extracts operation results from @ref dsahw_completion_record_t
@@ -315,7 +297,6 @@ dsahw_status_t DML_HW_API(get_update_dif_result)(const dsahw_completion_record_t
  */
 dsahw_status_t DML_HW_API(get_nop_result)(const dsahw_completion_record_t *completion_record_ptr);
 
-
 /**
  * @brief Extracts operation results from @ref dsahw_completion_record_t
  * and fills the @ref dml_job_t structure in accordance with the @ref DML_OP_BATCH operation
@@ -329,9 +310,7 @@ dsahw_status_t DML_HW_API(get_nop_result)(const dsahw_completion_record_t *compl
  *      - @ref DML_STATUS_BATCH_ERROR;
  *
  */
-dsahw_status_t DML_HW_API(get_batch_result)(const dsahw_completion_record_t *completion_record_ptr,
-                                            uint32_t *descriptors_processed_ptr);
-
+dsahw_status_t DML_HW_API(get_batch_result)(const dsahw_completion_record_t *completion_record_ptr, uint32_t *descriptors_processed_ptr);
 
 /**
  * @brief Extracts operation results from @ref dsahw_completion_record_t
@@ -346,11 +325,10 @@ dsahw_status_t DML_HW_API(get_batch_result)(const dsahw_completion_record_t *com
  */
 dsahw_status_t DML_HW_API(get_drain_result)(const dsahw_completion_record_t *completion_record_ptr);
 
-
 #ifdef __cplusplus
 }
 #endif
 
-#endif //DML_HW_COMPLETION_RECORDS_API_H__
+#endif  //DML_HW_COMPLETION_RECORDS_API_H__
 
 /** @} */
diff --git a/sources/hw-path/include/hardware_configuration_driver.h b/sources/core/src/hw_dispatcher/legacy_headers/hardware_configuration_driver.h
similarity index 60%
rename from sources/hw-path/include/hardware_configuration_driver.h
rename to sources/core/src/hw_dispatcher/legacy_headers/hardware_configuration_driver.h
index 2861910..c7f05c9 100644
--- a/sources/hw-path/include/hardware_configuration_driver.h
+++ b/sources/core/src/hw_dispatcher/legacy_headers/hardware_configuration_driver.h
@@ -1,18 +1,18 @@
 /*
- * Copyright 2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
+* Copyright 2021 Intel Corporation.
+*
+* This software and the related documents are Intel copyrighted materials,
+* and your use of them is governed by the express license under which they
+* were provided to you ("License"). Unless the License provides otherwise,
+* you may not use, modify, copy, publish, distribute, disclose or transmit
+* this software or the related documents without Intel's prior written
+* permission.
+*
+* This software and the related documents are provided as is, with no
+* express or implied warranties, other than those that are expressly
+* stated in the License.
+*
+*/
 
 #ifndef DML_SOURCES_HW_PATH_INCLUDE_HW_CONFIGURATION_DRIVER_H_
 #define DML_SOURCES_HW_PATH_INCLUDE_HW_CONFIGURATION_DRIVER_H_
@@ -30,33 +30,33 @@ extern "C" {
 #endif
 
 /**
- * @brief Type of function that should be loaded from accelerator configuration driver
- */
+* @brief Type of function that should be loaded from accelerator configuration driver
+*/
 typedef int (*library_function)();
 
 /**
- * @brief Structure that maps function implementation to its name
- */
+* @brief Structure that maps function implementation to its name
+*/
 typedef struct {
-    library_function function;          /**< Function address */
-    const char       *function_name;    /**< Function name */
+   library_function function;          /**< Function address */
+   const char       *function_name;    /**< Function name */
 
 } dsa_desc_t;
 
 /**
- * @brief Structure represents configuration driver used for access to accelerator instances and their properties
- */
+* @brief Structure represents configuration driver used for access to accelerator instances and their properties
+*/
 typedef struct {
-    void *driver_instance_ptr; /**< Pointer to a loaded driver */
+   void *driver_instance_ptr; /**< Pointer to a loaded driver */
 } hw_driver_t;
 
 /**
- * @brief Initializes driver functions
- *
- * @note Should be called only once
- *
- * @return status of initialization
- */
+* @brief Initializes driver functions
+*
+* @note Should be called only once
+*
+* @return status of initialization
+*/
 dsahw_status_t DML_HW_API(initialize_accelerator_driver)(hw_driver_t *driver_ptr);
 
 void DML_HW_API(finalize_accelerator_driver)(hw_driver_t *driver_ptr);
@@ -71,7 +71,7 @@ const char *DML_HW_API(device_get_name)(struct accfg_device *device);
 
 struct accfg_device *DML_HW_API(device_get_next)(struct accfg_device *device);
 
-uint32_t DML_HW_API(device_get_major_version)(struct accfg_device *device);
+uint32_t DML_HW_API(device_get_version)(struct accfg_device *device);
 
 enum accfg_device_state DML_HW_API(device_get_state)(struct accfg_device *device);
 
@@ -83,7 +83,7 @@ struct accfg_wq *DML_HW_API(get_first_work_queue)(struct accfg_device *device);
 
 struct accfg_wq *DML_HW_API(work_queue_get_next)(struct accfg_wq *wq);
 
-int32_t DML_HW_API(work_queue_get_minor_version)(struct accfg_wq *wq);
+const char * DML_HW_API(work_queue_get_device_name)(struct accfg_wq *wq);
 
 int32_t DML_HW_API(work_queue_get_priority)(struct accfg_wq *wq);
 
@@ -111,4 +111,4 @@ int DML_HW_API(work_queue_get_device_path)(struct accfg_wq *wq, char *buf, size_
 }
 #endif
 
-#endif //DML_SOURCES_HW_PATH_INCLUDE_HW_CONFIGURATION_DRIVER_H_
+#endif //DML_SOURCES_HW_PATH_INCLUDE_HW_CONFIGURATION_DRIVER_H_
\ No newline at end of file
diff --git a/sources/hw-path/include/hardware_definitions.h b/sources/core/src/hw_dispatcher/legacy_headers/hardware_definitions.h
similarity index 66%
rename from sources/hw-path/include/hardware_definitions.h
rename to sources/core/src/hw_dispatcher/legacy_headers/hardware_definitions.h
index 12f95fe..71334b7 100644
--- a/sources/hw-path/include/hardware_definitions.h
+++ b/sources/core/src/hw_dispatcher/legacy_headers/hardware_definitions.h
@@ -30,17 +30,24 @@
 #ifndef DSA_HW_DSA_DEFINES_H__
 #define DSA_HW_DSA_DEFINES_H__
 
+#ifdef LOG_HW_INIT
+#include <stdio.h>
+#define DIAGA(...) printf(__VA_ARGS__); fflush(stdout)                  /**< Diagnostic printer for appending to line */
+#define DIAG(...) printf("dml-diag: " __VA_ARGS__); fflush(stdout)      /**< Diagnostic printer */
+#else
+#define DIAGA(...)                                                      /**< Diagnostic printer for appending to line */
+#define DIAG(...)                                                       /**< Diagnostic printer */
+#endif
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-
 /* ------ Definitions ------ */
 
-#if defined( _WIN32 ) || defined ( _WIN64 )
-#define DML_HW_STDCALL  __stdcall
-#define DML_HW_CDECL    __cdecl
+#if defined(_WIN32) || defined(_WIN64)
+#define DML_HW_STDCALL __stdcall
+#define DML_HW_CDECL   __cdecl
 #else
 #define DML_HW_STDCALL
 #define DML_HW_CDECL
@@ -48,74 +55,67 @@ extern "C" {
 
 /* ------ Macros ------ */
 
-#if !defined( DML_HW_API )
+#if !defined(DML_HW_API)
 #define DML_HW_API(name) DML_HW_STDCALL dsa_##name /**< Declaration macros to manipulate function name */
 #endif
 
-
 /* ------ Statuses ------ */
 
 typedef dml_status_t dsahw_status_t; /**< Redefinition of @ref dml_status_t for core functions */
 
-
 /* ------ State ------ */
 
 #if defined(__GNUC__)
-    /**
+/**
      * @brief Packs a structure byte by byte
      */
-    #define DML_HW_BYTE_PACKED_STRUCTURE_BEGIN \
-        typedef struct __attribute__ ((__packed__))
+#define DML_HW_BYTE_PACKED_STRUCTURE_BEGIN typedef struct __attribute__((__packed__))
 
-    /**
+/**
      * @brief Pops a previous structure pack property
      */
-    #define DML_HW_BYTE_PACKED_STRUCTURE_END
-#elif(_MSC_VER)
-    /**
+#define DML_HW_BYTE_PACKED_STRUCTURE_END
+#elif (_MSC_VER)
+/**
      * @brief Packs a structure byte by byte
      */
-    #define DML_HW_BYTE_PACKED_STRUCTURE_BEGIN \
-        __pragma(pack(push, 1)) \
-        typedef struct
+#define DML_HW_BYTE_PACKED_STRUCTURE_BEGIN __pragma(pack(push, 1)) typedef struct
 
-    /**
+/**
     * @brief Pops a previous structure pack property
     */
-    #define DML_HW_BYTE_PACKED_STRUCTURE_END \
-        __pragma(pack(pop))
+#define DML_HW_BYTE_PACKED_STRUCTURE_END   __pragma(pack(pop))
 #else
-    #error Compiler not supported
+#error Compiler not supported
 #endif
 
-
 /* ################# DSA DESCRIPTOR  ################# */
 
-#define DSA_HW_DESCRIPTOR_SIZE  (64u)         /**< DSA hardware descriptor byte size */
+#define DSA_HW_DESCRIPTOR_SIZE (64u) /**< DSA hardware descriptor byte size */
 
 /**
  * @brief Defines a common type of the DSA hardware descriptor
  */
 DML_HW_BYTE_PACKED_STRUCTURE_BEGIN
 {
-    uint8_t bytes[DSA_HW_DESCRIPTOR_SIZE];  /**< Allocation memory for an abstract DSA descriptor*/
-} dsahw_descriptor_t;
+    uint8_t bytes[DSA_HW_DESCRIPTOR_SIZE]; /**< Allocation memory for an abstract DSA descriptor*/
+}
+dsahw_descriptor_t;
 DML_HW_BYTE_PACKED_STRUCTURE_END
 
-
 /* ################# DSA COMPLETION RECORD  ################# */
 
-#define DSA_HW_COMPLETION_RECORD_SIZE  (32u) /**< DSA hardware completion record byte size */
-
+#define DSA_HW_COMPLETION_RECORD_SIZE (32u) /**< DSA hardware completion record byte size */
 
 /**
  * @brief Defines an abstract type of the DSA hardware completion record
  */
 DML_HW_BYTE_PACKED_STRUCTURE_BEGIN
 {
-    uint8_t status;                                 /**< DSA completion status field */
-    uint8_t bytes[DSA_HW_COMPLETION_RECORD_SIZE - 1u];  /**< Allocation memory for others fields*/
-} dsahw_completion_record_t;
+    uint8_t status;                                    /**< DSA completion status field */
+    uint8_t bytes[DSA_HW_COMPLETION_RECORD_SIZE - 1u]; /**< Allocation memory for others fields*/
+}
+dsahw_completion_record_t;
 DML_HW_BYTE_PACKED_STRUCTURE_END
 
 /**
@@ -135,12 +135,11 @@ typedef uint8_t portal_t; /**< Define portal type */
  */
 typedef struct
 {
-    portal_t *portals_ptr;     /**< Pointer to memory, which is mapped as DSA Portals */
-    uint32_t current_portal;   /**< Current available portal to enqueue a descriptor   */
-    uint32_t portal_count;     /**< Maximal count of portals in the portal table       */
+    portal_t *portals_ptr;    /**< Pointer to memory, which is mapped as DSA Portals */
+    uint32_t  current_portal; /**< Current available portal to enqueue a descriptor   */
+    uint32_t  portal_count;   /**< Maximal count of portals in the portal table       */
 } own_hw_portal_information_t;
 
-
 /**
  * @brief Contain information about TC-A and TC-B portals
  */
@@ -150,7 +149,6 @@ typedef struct
     own_hw_portal_information_t tc_b_portals; /**< WQs working with TC-B */
 } own_hw_portal_table_t;
 
-
 /**
  * @brief Contains information from General Capabilities Register (GENCAP)
  */
@@ -164,28 +162,25 @@ typedef struct
     uint8_t  destination_readback_support; /**< The Destination Readback flag in descriptors is supported/unsupported           */
     uint8_t  flush_cache_control_support;  /**< Cache control for cache flush operations is supported/unsupported               */
     uint8_t  overlapping_copy_support;     /**< Memory overlapping supported/unsupported for the memory move operation          */
-    uint8_t  interrupt_handle_request;     /**< Used to determine the interrupt handle to use in descriptors                    */
     uint8_t  block_on_fault_support;       /**< Block of fault supported/unsupported                                            */
     uint8_t  configuration_support;        /**< Group configuration and WQ configuration are read-write/read-only               */
-    uint8_t  max_descriptors;              /**< The maximum number of descriptors that can be in progress in each engine        */
 } own_hw_gen_cap_t;
 
-
 /**
  * @brief Contains specific information about Hardware Path
  */
-typedef struct {
-    own_dml_structure_id_t guard;         /**< Structure guard                         */
-    own_hw_portal_table_t  portal_table;  /**< Contains information about open portals */
-    own_hw_gen_cap_t       gen_cap;       /**< General Capabilities Register fields    */
+typedef struct
+{
+    own_dml_structure_id_t guard;        /**< Structure guard                         */
+    own_hw_portal_table_t  portal_table; /**< Contains information about open portals */
+    own_hw_gen_cap_t       gen_cap;      /**< General Capabilities Register fields    */
 #if defined(linux)
-    struct accfg_ctx *dsa_context_ptr;   /**< @todo */
+    struct accfg_ctx *dsa_context_ptr; /**< @todo */
 #endif
 } dsahw_context_t;
 
-
 #ifdef __cplusplus
 }
 #endif
 
-#endif // DSA_HW_DSA_DEFINES_H__
+#endif  // DSA_HW_DSA_DEFINES_H__
diff --git a/sources/hw-path/include/hardware_descriptors_api.h b/sources/core/src/hw_dispatcher/legacy_headers/hardware_descriptors_api.h
similarity index 78%
rename from sources/hw-path/include/hardware_descriptors_api.h
rename to sources/core/src/hw_dispatcher/legacy_headers/hardware_descriptors_api.h
index a0ef2b5..77e5f3a 100644
--- a/sources/hw-path/include/hardware_descriptors_api.h
+++ b/sources/core/src/hw_dispatcher/legacy_headers/hardware_descriptors_api.h
@@ -24,7 +24,6 @@
  */
 #include "hardware_definitions.h"
 
-
 #ifndef DSA_HARDWARE_DESCRIPTORS_API_H__
 #define DSA_HARDWARE_DESCRIPTORS_API_H__
 
@@ -32,7 +31,6 @@
 extern "C" {
 #endif
 
-
 /**
  * @brief Fills a descriptor for the @ref DML_OP_MEM_MOVE operation
  *
@@ -46,14 +44,13 @@ extern "C" {
  * @return The following statuses:
  * @todo add return statuses
  */
-dsahw_status_t DML_HW_API(init_mem_move_descriptor)(dsahw_descriptor_t *descriptor_ptr,
-                                                    const uint8_t *source_ptr,
-                                                    uint32_t source_length,
-                                                    uint8_t *destination_ptr,
-                                                    dml_operation_flags_t flags,
+dsahw_status_t DML_HW_API(init_mem_move_descriptor)(dsahw_descriptor_t        *descriptor_ptr,
+                                                    const uint8_t             *source_ptr,
+                                                    uint32_t                   source_length,
+                                                    uint8_t                   *destination_ptr,
+                                                    dml_operation_flags_t      flags,
                                                     dsahw_completion_record_t *result_ptr);
 
-
 /**
  * @brief Fills a descriptor for the @ref DML_OP_FILL operation
  *
@@ -68,14 +65,13 @@ dsahw_status_t DML_HW_API(init_mem_move_descriptor)(dsahw_descriptor_t *descript
  *      - @ref DML_STATUS_OK;
  *      - @ref DML_STATUS_NULL_POINTER_ERROR.
  */
-dsahw_status_t DML_HW_API(init_fill_descriptor)(dsahw_descriptor_t *descriptor_ptr,
-                                                uint64_t *pattern_ptr,
-                                                uint8_t *destination_ptr,
-                                                uint32_t destination_length,
-                                                dml_operation_flags_t flags,
+dsahw_status_t DML_HW_API(init_fill_descriptor)(dsahw_descriptor_t        *descriptor_ptr,
+                                                uint64_t                  *pattern_ptr,
+                                                uint8_t                   *destination_ptr,
+                                                uint32_t                   destination_length,
+                                                dml_operation_flags_t      flags,
                                                 dsahw_completion_record_t *result_ptr);
 
-
 /**
  * @brief Fills a descriptor for the @ref DML_OP_COMPARE operation
  *
@@ -91,15 +87,14 @@ dsahw_status_t DML_HW_API(init_fill_descriptor)(dsahw_descriptor_t *descriptor_p
  *      - @ref DML_STATUS_OK;
  *      - @ref DML_STATUS_NULL_POINTER_ERROR.
  */
-dsahw_status_t DML_HW_API(init_compare_descriptor)(dsahw_descriptor_t *descriptor_ptr,
-                                                   const uint8_t *source_first_ptr,
-                                                   const uint8_t *source_second_ptr,
-                                                   uint32_t source_length,
-                                                   dml_meta_result_t expected_result,
-                                                   dml_operation_flags_t flags,
+dsahw_status_t DML_HW_API(init_compare_descriptor)(dsahw_descriptor_t        *descriptor_ptr,
+                                                   const uint8_t             *source_first_ptr,
+                                                   const uint8_t             *source_second_ptr,
+                                                   uint32_t                   source_length,
+                                                   dml_meta_result_t          expected_result,
+                                                   dml_operation_flags_t      flags,
                                                    dsahw_completion_record_t *result_ptr);
 
-
 /**
  * @brief Fills a descriptor for the @ref DML_OP_COMPARE_PATTERN operation
  *
@@ -115,15 +110,14 @@ dsahw_status_t DML_HW_API(init_compare_descriptor)(dsahw_descriptor_t *descripto
  *      - @ref DML_STATUS_OK;
  *      - @ref DML_STATUS_NULL_POINTER_ERROR.
  */
-dsahw_status_t DML_HW_API(init_compare_pattern_descriptor)(dsahw_descriptor_t *descriptor_ptr,
-                                                           const uint8_t *source_ptr,
-                                                           uint32_t source_length,
-                                                           const uint64_t *pattern_ptr,
-                                                           dml_meta_result_t expected_result,
-                                                           dml_operation_flags_t flags,
+dsahw_status_t DML_HW_API(init_compare_pattern_descriptor)(dsahw_descriptor_t        *descriptor_ptr,
+                                                           const uint8_t             *source_ptr,
+                                                           uint32_t                   source_length,
+                                                           const uint64_t            *pattern_ptr,
+                                                           dml_meta_result_t          expected_result,
+                                                           dml_operation_flags_t      flags,
                                                            dsahw_completion_record_t *result_ptr);
 
-
 /**
  * @brief Fills a descriptor for the @ref DML_OP_DELTA_CREATE operation
  *
@@ -141,17 +135,16 @@ dsahw_status_t DML_HW_API(init_compare_pattern_descriptor)(dsahw_descriptor_t *d
  *      - @ref DML_STATUS_OK;
  *      - @ref DML_STATUS_NULL_POINTER_ERROR.
  */
-dsahw_status_t DML_HW_API(init_delta_create_descriptor)(dsahw_descriptor_t *descriptor_ptr,
-                                                        uint8_t *source_first_ptr,
-                                                        uint8_t *source_second_ptr,
-                                                        uint32_t source_length,
-                                                        uint8_t *delta_record_ptr,
-                                                        uint32_t max_delta_record_length,
-                                                        dml_meta_result_t expected_result,
-                                                        dml_operation_flags_t flags,
+dsahw_status_t DML_HW_API(init_delta_create_descriptor)(dsahw_descriptor_t        *descriptor_ptr,
+                                                        uint8_t                   *source_first_ptr,
+                                                        uint8_t                   *source_second_ptr,
+                                                        uint32_t                   source_length,
+                                                        uint8_t                   *delta_record_ptr,
+                                                        uint32_t                   max_delta_record_length,
+                                                        dml_meta_result_t          expected_result,
+                                                        dml_operation_flags_t      flags,
                                                         dsahw_completion_record_t *result_ptr);
 
-
 /**
  * @brief Fills a descriptor for the @ref DML_OP_DELTA_APPLY operation
  *
@@ -167,15 +160,14 @@ dsahw_status_t DML_HW_API(init_delta_create_descriptor)(dsahw_descriptor_t *desc
  *      - @ref DML_STATUS_OK;
  *      - @ref DML_STATUS_NULL_POINTER_ERROR.
  */
-dsahw_status_t DML_HW_API(init_delta_apply_descriptor)(dsahw_descriptor_t *descriptor_ptr,
-                                                       uint8_t *delta_record_ptr,
-                                                       uint32_t delta_record_length,
-                                                       uint8_t *destination_ptr,
-                                                       uint32_t destination_length,
-                                                       dml_operation_flags_t flags,
+dsahw_status_t DML_HW_API(init_delta_apply_descriptor)(dsahw_descriptor_t        *descriptor_ptr,
+                                                       uint8_t                   *delta_record_ptr,
+                                                       uint32_t                   delta_record_length,
+                                                       uint8_t                   *destination_ptr,
+                                                       uint32_t                   destination_length,
+                                                       dml_operation_flags_t      flags,
                                                        dsahw_completion_record_t *result_ptr);
 
-
 /**
  * @brief Fills a descriptor for the @ref DML_OP_DUALCAST operation
  *
@@ -191,15 +183,14 @@ dsahw_status_t DML_HW_API(init_delta_apply_descriptor)(dsahw_descriptor_t *descr
  *      - @ref DML_STATUS_OK;
  *      - @ref DML_STATUS_NULL_POINTER_ERROR.
  */
-dsahw_status_t DML_HW_API(init_dualcast_descriptor)(dsahw_descriptor_t *descriptor_ptr,
-                                                    const uint8_t *source_ptr,
-                                                    uint32_t source_length,
-                                                    uint8_t *destination_first_ptr,
-                                                    uint8_t *destination_second_ptr,
-                                                    dml_operation_flags_t flags,
+dsahw_status_t DML_HW_API(init_dualcast_descriptor)(dsahw_descriptor_t        *descriptor_ptr,
+                                                    const uint8_t             *source_ptr,
+                                                    uint32_t                   source_length,
+                                                    uint8_t                   *destination_first_ptr,
+                                                    uint8_t                   *destination_second_ptr,
+                                                    dml_operation_flags_t      flags,
                                                     dsahw_completion_record_t *result_ptr);
 
-
 /**
  * @brief Fills a descriptor for @ref DML_OP_CRC operation
  *
@@ -214,14 +205,13 @@ dsahw_status_t DML_HW_API(init_dualcast_descriptor)(dsahw_descriptor_t *descript
  *      - @ref DML_STATUS_OK;
  *      - @ref DML_STATUS_NULL_POINTER_ERROR.
  */
-dsahw_status_t DML_HW_API(init_crc_descriptor)(dsahw_descriptor_t *descriptor_ptr,
-                                               uint8_t *source_ptr,
-                                               uint32_t source_length,
-                                               uint32_t *crc_checksum_ptr,
-                                               dml_operation_flags_t flags,
+dsahw_status_t DML_HW_API(init_crc_descriptor)(dsahw_descriptor_t        *descriptor_ptr,
+                                               uint8_t                   *source_ptr,
+                                               uint32_t                   source_length,
+                                               uint32_t                  *crc_checksum_ptr,
+                                               dml_operation_flags_t      flags,
                                                dsahw_completion_record_t *result_ptr);
 
-
 /**
  * @brief Fills a descriptor for @ref DML_OP_COPY_CRC operation
  *
@@ -237,15 +227,14 @@ dsahw_status_t DML_HW_API(init_crc_descriptor)(dsahw_descriptor_t *descriptor_pt
  *      - @ref DML_STATUS_OK;
  *      - @ref DML_STATUS_NULL_POINTER_ERROR.
  */
-dsahw_status_t DML_HW_API(init_crc_copy_descriptor)(dsahw_descriptor_t *descriptor_ptr,
-                                                    uint8_t *source_ptr,
-                                                    uint32_t source_length,
-                                                    uint32_t *crc_checksum_ptr,
-                                                    uint8_t *destination_ptr,
-                                                    dml_operation_flags_t flags,
+dsahw_status_t DML_HW_API(init_crc_copy_descriptor)(dsahw_descriptor_t        *descriptor_ptr,
+                                                    uint8_t                   *source_ptr,
+                                                    uint32_t                   source_length,
+                                                    uint32_t                  *crc_checksum_ptr,
+                                                    uint8_t                   *destination_ptr,
+                                                    dml_operation_flags_t      flags,
                                                     dsahw_completion_record_t *result_ptr);
 
-
 /**
  * @brief Fills a descriptor for @ref DML_OP_CACHE_FLUSH operation
  *
@@ -259,13 +248,12 @@ dsahw_status_t DML_HW_API(init_crc_copy_descriptor)(dsahw_descriptor_t *descript
  *      - @ref DML_STATUS_OK;
  *      - @ref DML_STATUS_NULL_POINTER_ERROR.
  */
-dsahw_status_t DML_HW_API(init_cache_flush_descriptor)(dsahw_descriptor_t *descriptor_ptr,
-                                                       uint8_t *memory_region_ptr,
-                                                       uint32_t memory_region_length,
-                                                       dml_operation_flags_t flags,
+dsahw_status_t DML_HW_API(init_cache_flush_descriptor)(dsahw_descriptor_t        *descriptor_ptr,
+                                                       uint8_t                   *memory_region_ptr,
+                                                       uint32_t                   memory_region_length,
+                                                       dml_operation_flags_t      flags,
                                                        dsahw_completion_record_t *result_ptr);
 
-
 /**
  * @brief Fills a descriptor for the @ref DML_OP_DIF_CHECK operation
  *
@@ -281,14 +269,13 @@ dsahw_status_t DML_HW_API(init_cache_flush_descriptor)(dsahw_descriptor_t *descr
  *      - @ref DML_STATUS_JOB_LENGTH_ERROR;
  *      - @ref DML_STATUS_NULL_POINTER_ERROR.
  */
-dsahw_status_t DML_HW_API(init_check_dif_descriptor)(dsahw_descriptor_t *descriptor_ptr,
-                                                     uint8_t *source_ptr,
-                                                     uint32_t source_length,
-                                                     const dml_dif_config_t *dif_config_ptr,
-                                                     dml_operation_flags_t flags,
+dsahw_status_t DML_HW_API(init_check_dif_descriptor)(dsahw_descriptor_t        *descriptor_ptr,
+                                                     uint8_t                   *source_ptr,
+                                                     uint32_t                   source_length,
+                                                     const dml_dif_config_t    *dif_config_ptr,
+                                                     dml_operation_flags_t      flags,
                                                      dsahw_completion_record_t *result_ptr);
 
-
 /**
  * @brief Fills a descriptor for the @ref DML_OP_DIF_INSERT operation
  *
@@ -304,15 +291,14 @@ dsahw_status_t DML_HW_API(init_check_dif_descriptor)(dsahw_descriptor_t *descrip
  *      - @ref DML_STATUS_OK;
  *      - @ref DML_STATUS_JOB_LENGTH_ERROR;
  */
-dsahw_status_t DML_HW_API(init_insert_dif_descriptor)(dsahw_descriptor_t *descriptor_ptr,
-                                                      uint8_t *source_ptr,
-                                                      uint32_t source_length,
-                                                      const dml_dif_config_t *dif_config_ptr,
-                                                      uint8_t *destination_ptr,
-                                                      dml_operation_flags_t flags,
+dsahw_status_t DML_HW_API(init_insert_dif_descriptor)(dsahw_descriptor_t        *descriptor_ptr,
+                                                      uint8_t                   *source_ptr,
+                                                      uint32_t                   source_length,
+                                                      const dml_dif_config_t    *dif_config_ptr,
+                                                      uint8_t                   *destination_ptr,
+                                                      dml_operation_flags_t      flags,
                                                       dsahw_completion_record_t *result_ptr);
 
-
 /**
  * @brief Fills a descriptor for the @ref DML_OP_DIF_STRIP operation
  *
@@ -328,15 +314,14 @@ dsahw_status_t DML_HW_API(init_insert_dif_descriptor)(dsahw_descriptor_t *descri
  *      - @ref DML_STATUS_OK;
  *      - @ref DML_STATUS_JOB_LENGTH_ERROR;
  */
-dsahw_status_t DML_HW_API(init_strip_dif_descriptor)(dsahw_descriptor_t *descriptor_ptr,
-                                                     uint8_t *source_ptr,
-                                                     uint32_t source_length,
-                                                     const dml_dif_config_t *dif_config_ptr,
-                                                     uint8_t *destination_ptr,
-                                                     dml_operation_flags_t flags,
+dsahw_status_t DML_HW_API(init_strip_dif_descriptor)(dsahw_descriptor_t        *descriptor_ptr,
+                                                     uint8_t                   *source_ptr,
+                                                     uint32_t                   source_length,
+                                                     const dml_dif_config_t    *dif_config_ptr,
+                                                     uint8_t                   *destination_ptr,
+                                                     dml_operation_flags_t      flags,
                                                      dsahw_completion_record_t *result_ptr);
 
-
 /**
  * @brief Fills a descriptor for the @ref DML_OP_DIF_UPDATE operation
  *
@@ -352,15 +337,14 @@ dsahw_status_t DML_HW_API(init_strip_dif_descriptor)(dsahw_descriptor_t *descrip
  *      - @ref DML_STATUS_OK;
  *      - @ref DML_STATUS_JOB_LENGTH_ERROR;
  */
-dsahw_status_t DML_HW_API(init_update_dif_descriptor)(dsahw_descriptor_t *descriptor_ptr,
-                                                      uint8_t *source_ptr,
-                                                      uint32_t source_length,
-                                                      const dml_dif_config_t *dif_config_ptr,
-                                                      uint8_t *destination_ptr,
-                                                      dml_operation_flags_t flags,
+dsahw_status_t DML_HW_API(init_update_dif_descriptor)(dsahw_descriptor_t        *descriptor_ptr,
+                                                      uint8_t                   *source_ptr,
+                                                      uint32_t                   source_length,
+                                                      const dml_dif_config_t    *dif_config_ptr,
+                                                      uint8_t                   *destination_ptr,
+                                                      dml_operation_flags_t      flags,
                                                       dsahw_completion_record_t *result_ptr);
 
-
 /**
  * @brief Fills a descriptor for the @ref DML_OP_NOP operation
  *
@@ -372,11 +356,10 @@ dsahw_status_t DML_HW_API(init_update_dif_descriptor)(dsahw_descriptor_t *descri
  *      - @ref DML_STATUS_OK;
  *      - @ref DML_STATUS_NULL_POINTER_ERROR.
  */
-dsahw_status_t DML_HW_API(init_nop_descriptor)(dsahw_descriptor_t *descriptor_ptr,
-                                               dml_operation_flags_t flags,
+dsahw_status_t DML_HW_API(init_nop_descriptor)(dsahw_descriptor_t        *descriptor_ptr,
+                                               dml_operation_flags_t      flags,
                                                dsahw_completion_record_t *result_ptr);
 
-
 /**
  * @brief Fills a descriptor for the @ref DML_OP_BATCH operation
  *
@@ -390,13 +373,12 @@ dsahw_status_t DML_HW_API(init_nop_descriptor)(dsahw_descriptor_t *descriptor_pt
  *      - @ref DML_STATUS_OK;
  *      - @ref DML_STATUS_NULL_POINTER_ERROR.
  */
-dsahw_status_t DML_HW_API(init_batch_descriptor)(dsahw_descriptor_t *descriptor_ptr,
-                                                 const dsahw_descriptor_t *internal_descriptors_ptr,
-                                                 uint32_t internal_descriptors_count,
-                                                 dml_operation_flags_t flags,
+dsahw_status_t DML_HW_API(init_batch_descriptor)(dsahw_descriptor_t        *descriptor_ptr,
+                                                 const dsahw_descriptor_t  *internal_descriptors_ptr,
+                                                 uint32_t                   internal_descriptors_count,
+                                                 dml_operation_flags_t      flags,
                                                  dsahw_completion_record_t *result_ptr);
 
-
 /**
  * @brief Fills a descriptor for the @ref DML_OP_DRAIN operation
  *
@@ -410,16 +392,16 @@ dsahw_status_t DML_HW_API(init_batch_descriptor)(dsahw_descriptor_t *descriptor_
  *      - @ref DML_STATUS_OK;
  *      - @ref DML_STATUS_NULL_POINTER_ERROR.
  */
-dsahw_status_t DML_HW_API(init_drain_descriptor)(dsahw_descriptor_t *descriptor_ptr,
-                                                 uint64_t *readback_address1_ptr,
-                                                 uint64_t *readback_address2_ptr,
-                                                 dml_operation_flags_t flags,
+dsahw_status_t DML_HW_API(init_drain_descriptor)(dsahw_descriptor_t        *descriptor_ptr,
+                                                 uint64_t                  *readback_address1_ptr,
+                                                 uint64_t                  *readback_address2_ptr,
+                                                 dml_operation_flags_t      flags,
                                                  dsahw_completion_record_t *result_ptr);
 
 #ifdef __cplusplus
 }
 #endif
 
-#endif //DSA_HARDWARE_DESCRIPTORS_API_H__
+#endif  //DSA_HARDWARE_DESCRIPTORS_API_H__
 
 /** @} */
diff --git a/sources/hw-path/include/hardware_limits.h b/sources/core/src/hw_dispatcher/legacy_headers/hardware_limits.h
similarity index 97%
rename from sources/hw-path/include/hardware_limits.h
rename to sources/core/src/hw_dispatcher/legacy_headers/hardware_limits.h
index 78121ad..87e75a9 100644
--- a/sources/hw-path/include/hardware_limits.h
+++ b/sources/core/src/hw_dispatcher/legacy_headers/hardware_limits.h
@@ -15,22 +15,19 @@
  */
 
 /**
- * 
+ *
  * @brief Contains public hardware limits
  *
  */
 #include "hardware_definitions.h"
 
-
 #ifndef DML_HW_DSA_LIMITS_H__
 #define DML_HW_DSA_LIMITS_H__
 
-
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-
 /**
  * @brief Maximum number of the descriptors/completion records that can be launched in the single batch
  */
@@ -56,9 +53,8 @@ extern "C" {
  */
 #define DSA_HW_MAX_RESULT_ALIGNMENT (32u)
 
-
 #ifdef __cplusplus
 }
 #endif
 
-#endif // DML_HW_DSA_LIMITS_H__
+#endif  // DML_HW_DSA_LIMITS_H__
diff --git a/sources/hw-path/include/libaccel_config.h b/sources/core/src/hw_dispatcher/legacy_headers/libaccel_config.h
similarity index 65%
rename from sources/hw-path/include/libaccel_config.h
rename to sources/core/src/hw_dispatcher/legacy_headers/libaccel_config.h
index 747251e..3a1181b 100644
--- a/sources/hw-path/include/libaccel_config.h
+++ b/sources/core/src/hw_dispatcher/legacy_headers/libaccel_config.h
@@ -14,9 +14,8 @@
  *
  */
 
-/*
- * SPDX-License-Identifier: LGPL-2.0
- */
+// SPDX-License-Identifier: LGPL-2.1
+/* Copyright(c) 2019 Intel Corporation. All rights reserved. */
 
 #ifndef _LIBACCFG_H_
 #define _LIBACCFG_H_
@@ -45,73 +44,92 @@ extern "C" {
 #define UUID_ZERO "00000000-0000-0000-0000-000000000000"
 
 /* no need to save device state */
+enum accfg_device_type {
+    ACCFG_DEVICE_DSA = 0,
+    ACCFG_DEVICE_IAX = 1,
+    ACCFG_DEVICE_TYPE_UNKNOWN = -1,
+};
+
 enum accfg_device_state {
-	ACCFG_DEVICE_DISABLED = 0,
-	ACCFG_DEVICE_ENABLED = 1,
-	ACCFG_DEVICE_UNKNOWN = -1,
+    ACCFG_DEVICE_DISABLED = 0,
+    ACCFG_DEVICE_ENABLED = 1,
+    ACCFG_DEVICE_UNKNOWN = -1,
 };
 
 enum accfg_wq_mode {
-	ACCFG_WQ_SHARED = 0,
-	ACCFG_WQ_DEDICATED,
-	ACCFG_WQ_MODE_UNKNOWN,
+    ACCFG_WQ_SHARED = 0,
+    ACCFG_WQ_DEDICATED,
+    ACCFG_WQ_MODE_UNKNOWN,
 };
 
 enum accfg_wq_state {
-	ACCFG_WQ_DISABLED = 0,
-	ACCFG_WQ_ENABLED = 1,
-	ACCFG_WQ_QUIESCING = 2,
-	ACCFG_WQ_UNKNOWN = -1,
+    ACCFG_WQ_DISABLED,
+    ACCFG_WQ_ENABLED,
+    ACCFG_WQ_QUIESCING,
+    ACCFG_WQ_LOCKED,
+    ACCFG_WQ_UNKNOWN = -1,
 };
 
 enum accfg_wq_type {
-	ACCFG_WQT_NONE = 0,
-	ACCFG_WQT_KERNEL,
-	ACCFG_WQT_USER,
-	ACCFG_WQT_MDEV,
+    ACCFG_WQT_NONE = 0,
+    ACCFG_WQT_KERNEL,
+    ACCFG_WQT_USER,
+    ACCFG_WQT_MDEV,
 };
 
 enum accfg_control_flag {
-	ACCFG_DEVICE_DISABLE = 0,
-	ACCFG_DEVICE_ENABLE,
-	ACCFG_WQ_ENABLE,
-	ACCFG_WQ_DISABLE,
+    ACCFG_DEVICE_DISABLE = 0,
+    ACCFG_DEVICE_ENABLE,
+    ACCFG_WQ_ENABLE,
+    ACCFG_WQ_DISABLE,
+};
+
+enum accfg_mdev_type {
+    ACCFG_MDEV_TYPE_1_DWQ,
+    ACCFG_MDEV_TYPE_1_SWQ,
+    ACCFG_MDEV_TYPE_UNKNOWN,
 };
 
 /* no need to save device error */
 struct accfg_error {
-        uint64_t val[4];
+    uint64_t val[4];
+};
+
+struct accfg_op_cap {
+    uint64_t bits[4];
 };
 
 /* parameters read from sysfs of accfg driver */
 struct dev_parameters {
-	unsigned int token_limit;
+    unsigned int token_limit;
 };
 
 extern char *accfg_basenames[];
+extern char *accfg_mdev_basenames[];
 
 struct group_parameters {
-	unsigned int tokens_reserved;
-	unsigned int tokens_allowed;
-	unsigned int use_token_limit;
-	int traffic_class_a;
-	int traffic_class_b;
+    unsigned int tokens_reserved;
+    unsigned int tokens_allowed;
+    unsigned int use_token_limit;
+    int traffic_class_a;
+    int traffic_class_b;
 };
 
 struct wq_parameters {
-	int group_id;
-	unsigned int wq_size;
-	unsigned int threshold;
-	unsigned int priority;
-	int block_on_fault;
-	const char *mode;
-	const char *type;
-	const char *name;
-	const char *uuid_str;
+    int group_id;
+    unsigned int wq_size;
+    unsigned int threshold;
+    unsigned int priority;
+    int block_on_fault;
+    unsigned int max_batch_size;
+    uint64_t max_transfer_size;
+    const char *mode;
+    const char *type;
+    const char *name;
 };
 
 struct engine_parameters {
-	int group_id;
+    int group_id;
 };
 
 struct accfg_ctx;
@@ -133,11 +151,11 @@ int accfg_new(struct accfg_ctx **ctx);
 
 /* override default log routine */
 void accfg_set_log_fn(struct accfg_ctx *ctx,
-void (*log_fn) (struct accfg_ctx * ctx,
-	       int priority, const char *file,
-	       int line, const char *fn,
-	       const char *format,
-	       va_list args));
+                      void (*log_fn)(struct accfg_ctx *ctx,
+                                     int priority, const char *file,
+                                     int line, const char *fn,
+                                     const char *format,
+                                     va_list args));
 
 /* libaccfg function for device */
 struct accfg_device;
@@ -149,17 +167,19 @@ int accfg_device_disable(struct accfg_device *device, bool force);
 struct accfg_device *accfg_device_get_first(struct accfg_ctx *ctx);
 struct accfg_device *accfg_device_get_next(struct accfg_device *device);
 #define accfg_device_foreach(ctx, device) \
-        for (device = accfg_device_get_first(ctx); \
-             device != NULL; \
-             device = accfg_device_get_next(device))
+	for (device = accfg_device_get_first(ctx); \
+	     device != NULL; \
+	     device = accfg_device_get_next(device))
 struct accfg_ctx *accfg_device_get_ctx(struct accfg_device *);
 const char *accfg_device_get_devname(struct accfg_device *device);
 int accfg_device_type_validate(const char *dev_name);
+enum accfg_device_type accfg_device_get_type(struct accfg_device *device);
+char *accfg_device_get_type_str(struct accfg_device *device);
 int accfg_device_get_id(struct accfg_device *device);
 struct accfg_device *accfg_ctx_device_get_by_id(struct accfg_ctx *ctx,
-		int id);
+                                                   int id);
 struct accfg_device *accfg_ctx_device_get_by_name(struct accfg_ctx *ctx,
-		const char *dev_name);
+                                                     const char *dev_name);
 unsigned int accfg_device_get_max_groups(struct accfg_device *device);
 unsigned int accfg_device_get_max_work_queues(struct accfg_device *device);
 unsigned int accfg_device_get_max_engines(struct accfg_device *device);
@@ -167,36 +187,55 @@ unsigned int accfg_device_get_max_work_queues_size(struct accfg_device *device);
 int accfg_device_get_numa_node(struct accfg_device *device);
 unsigned int accfg_device_get_ims_size(struct accfg_device *device);
 unsigned int accfg_device_get_max_batch_size(struct accfg_device *device);
-unsigned long accfg_device_get_max_transfer_size(struct accfg_device *device);
-unsigned long accfg_device_get_op_cap(struct accfg_device *device);
-unsigned long accfg_device_get_gen_cap(struct accfg_device *device);
+uint64_t accfg_device_get_max_transfer_size(struct accfg_device *device);
+int accfg_device_get_op_cap(struct accfg_device *device,
+                                                struct accfg_op_cap *op_cap);
+uint64_t accfg_device_get_gen_cap(struct accfg_device *device);
 unsigned int accfg_device_get_configurable(struct accfg_device *device);
 bool accfg_device_get_pasid_enabled(struct accfg_device  *device);
+bool accfg_device_get_mdev_enabled(struct accfg_device *device);
 int accfg_device_get_errors(struct accfg_device *device, struct accfg_error *error);
 enum accfg_device_state accfg_device_get_state(struct accfg_device *device);
 unsigned int accfg_device_get_max_tokens(struct accfg_device *device);
 unsigned int accfg_device_get_max_batch_size(struct accfg_device *device);
 unsigned int accfg_device_get_token_limit(struct accfg_device *device);
 unsigned int accfg_device_get_cdev_major(struct accfg_device *device);
+unsigned int accfg_device_get_version(struct accfg_device *device);
 int accfg_device_get_clients(struct accfg_device *device);
 int accfg_device_set_token_limit(struct accfg_device *dev, int val);
 int accfg_device_is_active(struct accfg_device *device);
+int accfg_device_get_cmd_status(struct accfg_device *device);
+const char *accfg_device_get_cmd_status_str(struct accfg_device *device);
+
+struct accfg_device_mdev;
+struct accfg_device_mdev *accfg_device_first_mdev(struct accfg_device *device);
+struct accfg_device_mdev *accfg_device_next_mdev(struct accfg_device_mdev *mdev);
+void accfg_mdev_get_uuid(struct accfg_device_mdev *mdev, uuid_t uuid);
+enum accfg_mdev_type accfg_mdev_get_type(struct accfg_device_mdev *mdev);
+int accfg_create_mdev(struct accfg_device *device, enum accfg_mdev_type type,
+                                            uuid_t uuid);
+int accfg_remove_mdev(struct accfg_device *device, uuid_t uuid);
+
+#define accfg_device_mdev_foreach(device, mdev) \
+	for (mdev = accfg_device_first_mdev(device); \
+		mdev != NULL; \
+		mdev = accfg_device_next_mdev(mdev))
 
 /* libaccfg function for group */
 struct accfg_group;
 struct accfg_group *accfg_group_get_first(struct accfg_device *device);
 struct accfg_group *accfg_group_get_next(struct accfg_group *group);
 #define accfg_group_foreach(device, group) \
-        for (group = accfg_group_get_first(device); \
-             group != NULL; \
-             group = accfg_group_get_next(group))
+	for (group = accfg_group_get_first(device); \
+	     group != NULL; \
+	     group = accfg_group_get_next(group))
 int accfg_group_get_id(struct accfg_group *group);
 struct accfg_group *accfg_device_group_get_by_id(struct accfg_device *device,
-						int id);
+                                                  int id);
 int accfg_group_get_device_id(struct accfg_group *group);
 const char *accfg_group_get_devname(struct accfg_group *group);
-unsigned long accfg_group_get_size(struct accfg_group *group);
-unsigned long accfg_group_get_available_size(struct accfg_group *group);
+uint64_t accfg_group_get_size(struct accfg_group *group);
+uint64_t accfg_group_get_available_size(struct accfg_group *group);
 struct accfg_device *accfg_group_get_device(struct accfg_group *group);
 struct accfg_ctx *accfg_group_get_ctx(struct accfg_group *group);
 int accfg_group_get_tokens_reserved(struct accfg_group *group);
@@ -214,29 +253,22 @@ int accfg_group_set_traffic_class_b(struct accfg_group *group, int val);
 struct accfg_wq;
 struct accfg_wq *accfg_wq_get_first(struct accfg_device *device);
 struct accfg_wq *accfg_wq_get_next(struct accfg_wq *wq);
-uuid_t *accfg_wq_first_uuid(struct accfg_wq *wq);
-uuid_t *accfg_wq_next_uuid(struct accfg_wq *wq);
 
 
 #define accfg_wq_foreach(device, wq) \
-        for (wq = accfg_wq_get_first(device); \
-             wq != NULL; \
-             wq = accfg_wq_get_next(wq))
-
-#define accfg_wq_uuid_foreach(wq, uuid) \
-        for (uuid = accfg_wq_first_uuid(wq); \
-             uuid != NULL; \
-             uuid = accfg_wq_next_uuid(wq))
+	for (wq = accfg_wq_get_first(device); \
+	     wq != NULL; \
+	     wq = accfg_wq_get_next(wq))
 
 struct accfg_ctx *accfg_wq_get_ctx(struct accfg_wq *wq);
 struct accfg_device *accfg_wq_get_device(struct accfg_wq *wq);
 struct accfg_group *accfg_wq_get_group(struct accfg_wq *wq);
 int accfg_wq_get_id(struct accfg_wq *wq);
 struct accfg_wq *accfg_device_wq_get_by_id(struct accfg_device *device,
-					int id);
+                                               int id);
 const char *accfg_wq_get_devname(struct accfg_wq *wq);
 enum accfg_wq_mode accfg_wq_get_mode(struct accfg_wq *wq);
-unsigned long accfg_wq_get_size(struct accfg_wq *wq);
+uint64_t accfg_wq_get_size(struct accfg_wq *wq);
 int accfg_wq_get_group_id(struct accfg_wq *wq);
 int accfg_wq_get_priority(struct accfg_wq *wq);
 unsigned int accfg_wq_get_priv(struct accfg_wq *wq);
@@ -245,6 +277,8 @@ enum accfg_wq_state accfg_wq_get_state(struct accfg_wq *wq);
 int accfg_wq_get_cdev_minor(struct accfg_wq *wq);
 const char *accfg_wq_get_type_name(struct accfg_wq *wq);
 enum accfg_wq_type accfg_wq_get_type(struct accfg_wq *wq);
+unsigned int accfg_wq_get_max_batch_size(struct accfg_wq *wq);
+uint64_t accfg_wq_get_max_transfer_size(struct accfg_wq *wq);
 int accfg_wq_get_threshold(struct accfg_wq *wq);
 int accfg_wq_get_clients(struct accfg_wq *wq);
 int accfg_wq_is_enabled(struct accfg_wq *wq);
@@ -253,35 +287,36 @@ int accfg_wq_set_priority(struct accfg_wq *wq, int val);
 int accfg_wq_set_group_id(struct accfg_wq *wq, int val);
 int accfg_wq_set_threshold(struct accfg_wq *wq, int val);
 int accfg_wq_set_block_on_fault(struct accfg_wq *wq, int val);
-int accfg_wq_set_str_mode(struct accfg_wq *wq, const char* val);
+int accfg_wq_set_max_batch_size(struct accfg_wq *wq, int val);
+int accfg_wq_set_max_transfer_size(struct accfg_wq *wq, uint64_t val);
+int accfg_wq_set_str_mode(struct accfg_wq *wq, const char *val);
 int accfg_wq_set_mode(struct accfg_wq *wq, enum accfg_wq_mode mode);
-int accfg_wq_set_str_type(struct accfg_wq *wq, const char* val);
+int accfg_wq_set_str_type(struct accfg_wq *wq, const char *val);
 int accfg_wq_set_str_name(struct accfg_wq *wq, const char *val);
 int accfg_wq_enable(struct accfg_wq *wq);
 int accfg_wq_disable(struct accfg_wq *wq, bool force);
 int accfg_wq_priority_boundary(struct accfg_wq *wq);
 int accfg_wq_size_boundary(struct accfg_device *device, int wq_num);
-int accfg_wq_create_mdev(struct accfg_wq *wq, uuid_t uuid);
-int accfg_wq_remove_mdev(struct accfg_wq *wq, uuid_t uuid);
+int accfg_wq_get_user_dev_path(struct accfg_wq *wq, char *buf, size_t size);
 
 /* libaccfg function for engine */
 struct accfg_engine;
 struct accfg_engine *accfg_engine_get_first(struct accfg_device *device);
 struct accfg_engine *accfg_engine_get_next(struct accfg_engine *engine);
 #define accfg_engine_foreach(device, engine) \
-        for (engine = accfg_engine_get_first(device); \
-             engine != NULL; \
-             engine = accfg_engine_get_next(engine))
+	for (engine = accfg_engine_get_first(device); \
+	     engine != NULL; \
+	     engine = accfg_engine_get_next(engine))
 struct accfg_ctx *accfg_engine_get_ctx(struct accfg_engine *engine);
 struct accfg_device *accfg_engine_get_device(struct accfg_engine *engine);
 struct accfg_group *accfg_engine_get_group(struct accfg_engine *engine);
 int accfg_engine_get_group_id(struct accfg_engine *engine);
 int accfg_engine_get_id(struct accfg_engine *engine);
 struct accfg_engine *accfg_device_engine_get_by_id(struct accfg_device *device,
-						int id);
+                                                   int id);
 const char *accfg_engine_get_devname(struct accfg_engine *engine);
 int accfg_engine_set_group_id(struct accfg_engine *engine, int val);
 #ifdef __cplusplus
 }				/* extern "C" */
 #endif
-#endif
+#endif
\ No newline at end of file
diff --git a/sources/core/src/hw_dispatcher/legacy_headers/own_dsa_accel_constants.h b/sources/core/src/hw_dispatcher/legacy_headers/own_dsa_accel_constants.h
new file mode 100644
index 0000000..1c037f0
--- /dev/null
+++ b/sources/core/src/hw_dispatcher/legacy_headers/own_dsa_accel_constants.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright 2020-2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+/**
+ * @brief Contains a constant, which are used to connect with hardware
+ * @date 3/23/2020
+ *
+ */
+
+#include <stdint.h>
+
+#ifndef DML_DSA_ACCEL_CONFIG_H__
+#define DML_DSA_ACCEL_CONFIG_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define DSA_DEVICE_ID ((uint32_t)(((uint32_t)0xFF << 24u) | ((uint32_t)('a') << 16u) | ((uint32_t)('s') << 8u) | (uint32_t)('d')))
+
+#define CHAR_MSK             0xFF202020
+#define MAX_DEVICE_COUNT     8u
+#define MAX_WORK_QUEUE_COUNT 8u
+#define OWN_PAGE_MASK        0x0FFFllu /**< Defines page mask for portal incrementing */
+
+// General Capabilities Register unwrappers
+#define GC_BLOCK_ON_FAULT(GENCAP)           (((GENCAP))    &0x01)       /**< GENCAP bit 0      - block on fault support                    */
+#define GC_OVERLAPPING(GENCAP)              (((GENCAP)>>1) &0x01)       /**< GENCAP bit 1      - overlapping copy support                  */
+#define GC_CACHE_WRITE(GENCAP)              (((GENCAP)>>2) &0x01)       /**< GENCAP bit 2      - cache control support (memory)            */
+#define GC_CACHE_FLUSH(GENCAP)              (((GENCAP)>>3) &0x01)       /**< GENCAP bit 3      - cache control support (cache flush)       */
+#define GC_COM_CAP(GENCAP)                  (((GENCAP)>>4) &0x01)       /**< GENCAP bit 4      - command capabilities support              */
+#define GC_DST_READBACK(GENCAP)             (((GENCAP)>>8) &0x01)       /**< GENCAP bit 8      - destination readback support              */
+#define GC_DRAIN_READBACK(GENCAP)           (((GENCAP)>>9) &0x01)       /**< GENCAP bit 9      - drain descriptor readback address support */
+#define GC_MAX_TRANSFER_SIZE(GENCAP)  (1 << (((GENCAP)>>16)&0x1F))      /**< GENCAP 20-16 bits - maximum supported transfer size           */
+#define GC_MAX_BATCH_SIZE(GENCAP)     (1 << (((GENCAP)>>21)&0x0F))      /**< GENCAP 24-21 bits - maximum supported batch size              */
+#define GC_INTERRUPT_STORAGE(GENCAP)       ((((GENCAP)>>25)&0x3F)*256u) /**< GENCAP 30-25 bits - interrupt message storage size            */
+#define GC_CONF_SUPPORT(GENCAP)             (((GENCAP)>>31)&0x01)       /**< GENCAP bit 31     - configuration support                     */
+
+static const char     DEVICE_NAME[]      = "dsa";
+static const uint32_t DEVICE_NAME_LENGTH = sizeof(DEVICE_NAME) - 2u;  //sizeof will return 4, position of terminating 0 is 3
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  //DML_DSA_ACCEL_CONFIG_H__
diff --git a/include/dml/cpp/middle_layer/descriptor.hpp b/sources/core/src/hw_dispatcher/numa.cpp
similarity index 59%
rename from include/dml/cpp/middle_layer/descriptor.hpp
rename to sources/core/src/hw_dispatcher/numa.cpp
index 28ef3fb..f686429 100644
--- a/include/dml/cpp/middle_layer/descriptor.hpp
+++ b/sources/core/src/hw_dispatcher/numa.cpp
@@ -14,26 +14,27 @@
  *
  */
 
-/**
- * @date 05/19/2021
- * @defgroup dmlml DML Middle Layer
- * @brief Middle Layer for Intel(R) Data Mover Library (Intel® DML)
- */
-
-#ifndef DML_ML_DESCRIPTOR_HPP
-#define DML_ML_DESCRIPTOR_HPP
+#if defined(linux)
+#include <x86intrin.h>
+#endif
 
-#include "types.hpp"
+#include "numa.hpp"
 
-namespace dml::ml
+namespace dml::core::util
 {
-    /**
-     * @todo
-     */
-    struct alignas(64u) descriptor
+    uint32_t get_numa_id() noexcept
     {
-        byte_t bytes[64u]{}; /**< Underlying data array */
-    };
-}  // namespace dml::ml
+#if defined(linux)
+        uint32_t tsc_aux = 0;
+
+        __rdtscp(&tsc_aux);
+
+        // Linux encodes NUMA node into [32:12] of TSC_AUX
+        return tsc_aux >> 12;
+#else
+        // Not supported in Windows yet
+        return 0;
+#endif
+    }
 
-#endif  //DML_ML_DESCRIPTOR_HPP
+}  // namespace dml::core::util
diff --git a/sources/middle_layer/dispatcher/numa.hpp b/sources/core/src/hw_dispatcher/numa.hpp
similarity index 85%
rename from sources/middle_layer/dispatcher/numa.hpp
rename to sources/core/src/hw_dispatcher/numa.hpp
index 2072573..93bf73c 100644
--- a/sources/middle_layer/dispatcher/numa.hpp
+++ b/sources/core/src/hw_dispatcher/numa.hpp
@@ -19,10 +19,9 @@
 
 #include <cstdint>
 
-namespace dml::ml::util {
-
-int32_t get_numa_id() noexcept;
-
+namespace dml::core::util
+{
+    [[nodiscard]] uint32_t get_numa_id() noexcept;
 }
 
-#endif //DML_MIDDLE_LAYER_DISPATCHER_NUMA_HPP_
+#endif  //DML_MIDDLE_LAYER_DISPATCHER_NUMA_HPP_
diff --git a/sources/core/src/kernels.hpp b/sources/core/src/kernels.hpp
new file mode 100644
index 0000000..fa776f5
--- /dev/null
+++ b/sources/core/src/kernels.hpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#ifndef DML_CORE_OWN_KERNELS_HPP
+#define DML_CORE_OWN_KERNELS_HPP
+
+#include <core/completion_record_views.hpp>
+#include <core/descriptor_views.hpp>
+
+namespace dml::core::kernels
+{
+    void nop(nop_descriptor dsc, nop_completion_record record) noexcept;
+
+    void batch(batch_descriptor dsc, batch_completion_record record) noexcept;
+
+    void drain(drain_descriptor dsc, drain_completion_record record) noexcept;
+
+    void mem_move(mem_move_descriptor dsc, mem_move_completion_record record) noexcept;
+
+    void fill(fill_descriptor dsc, fill_completion_record record) noexcept;
+
+    void compare(compare_descriptor dsc, compare_completion_record record) noexcept;
+
+    void compare_pattern(compare_pattern_descriptor dsc, compare_pattern_completion_record record) noexcept;
+
+    void create_delta(create_delta_descriptor dsc, create_delta_completion_record record) noexcept;
+
+    void apply_delta(apply_delta_descriptor dsc, apply_delta_completion_record record) noexcept;
+
+    void dualcast(dualcast_descriptor dsc, dualcast_completion_record record) noexcept;
+
+    void crc(crc_descriptor dsc, crc_completion_record record) noexcept;
+
+    void copy_crc(copy_crc_descriptor dsc, crc_completion_record record) noexcept;
+
+    void dif_check(dif_check_descriptor dsc, dif_check_completion_record record) noexcept;
+
+    void dif_insert(dif_insert_descriptor dsc, dif_insert_completion_record record) noexcept;
+
+    void dif_strip(dif_strip_descriptor dsc, dif_strip_completion_record record) noexcept;
+
+    void dif_update(dif_update_descriptor dsc, dif_update_completion_record record) noexcept;
+
+    void cache_flush(cache_flush_descriptor dsc, cache_flush_completion_record record) noexcept;
+}  // namespace dml::core::kernels
+
+#endif  //DML_CORE_OWN_KERNELS_HPP
diff --git a/sources/core/src/mem_move.cpp b/sources/core/src/mem_move.cpp
new file mode 100644
index 0000000..913f10e
--- /dev/null
+++ b/sources/core/src/mem_move.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include <dml/detail/common/status.hpp>
+#include <dml/detail/common/utils/enum.hpp>
+
+#include "immintrin.h"
+#include "kernels.hpp"
+#include "optimization_dispatcher.hpp"
+
+namespace dml::core::kernels
+{
+    void mem_move(mem_move_descriptor dsc, mem_move_completion_record record) noexcept
+    {
+        const auto src           = reinterpret_cast<byte_t *>(dsc.source_address());
+        const auto dst           = reinterpret_cast<byte_t *>(dsc.destination_address());
+        const auto transfer_size = dsc.transfer_size();
+
+        dispatch::mem_move(src, dst, transfer_size);
+
+        _mm_mfence();
+        record.status() = to_underlying(dml::detail::execution_status::success);
+    }
+}  // namespace dml::core::kernels
diff --git a/sources/core/src/nop.cpp b/sources/core/src/nop.cpp
new file mode 100644
index 0000000..f166419
--- /dev/null
+++ b/sources/core/src/nop.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include <dml/detail/common/status.hpp>
+
+#include "kernels.hpp"
+
+namespace dml::core::kernels
+{
+    void nop(nop_descriptor dsc, nop_completion_record record) noexcept
+    {
+        static_cast<void>(dsc);
+        record.status() = static_cast<status_t>(dml::detail::execution_status::success);
+    }
+
+}  // namespace dml::core::kernels
diff --git a/sources/core/src/software_device.cpp b/sources/core/src/software_device.cpp
new file mode 100644
index 0000000..7614986
--- /dev/null
+++ b/sources/core/src/software_device.cpp
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include <core/completion_record_views.hpp>
+#include <core/descriptor_views.hpp>
+#include <core/operations.hpp>
+#include <dml/detail/common/status.hpp>
+
+#include "core/device.hpp"
+#include "kernels.hpp"
+
+namespace dml::core
+{
+    dml::detail::submission_status software_device::submit(descriptor& dsc, completion_record& completion_record) noexcept
+    {
+        auto dsc_view = any_descriptor(dsc);
+        auto op       = operation(dsc_view.operation());
+
+        switch (op)
+        {
+            case operation::nop:
+                kernels::nop(nop_descriptor(dsc), nop_completion_record(completion_record));
+                break;
+            case operation::batch:
+                kernels::batch(batch_descriptor(dsc), batch_completion_record(completion_record));
+                break;
+            case operation::drain:
+                kernels::drain(drain_descriptor(dsc), drain_completion_record(completion_record));
+                break;
+            case operation::memory_move:
+                kernels::mem_move(mem_move_descriptor(dsc), mem_move_completion_record(completion_record));
+                break;
+            case operation::fill:
+                kernels::fill(fill_descriptor(dsc), fill_completion_record(completion_record));
+                break;
+            case operation::compare:
+                kernels::compare(compare_descriptor(dsc), compare_completion_record(completion_record));
+                break;
+            case operation::compare_pattern:
+                kernels::compare_pattern(compare_pattern_descriptor(dsc), compare_pattern_completion_record(completion_record));
+                break;
+            case operation::create_delta:
+                kernels::create_delta(create_delta_descriptor(dsc), create_delta_completion_record(completion_record));
+                break;
+            case operation::apply_delta:
+                kernels::apply_delta(apply_delta_descriptor(dsc), apply_delta_completion_record(completion_record));
+                break;
+            case operation::dualcast:
+                kernels::dualcast(dualcast_descriptor(dsc), dualcast_completion_record(completion_record));
+                break;
+            case operation::crc:
+                kernels::crc(crc_descriptor(dsc), crc_completion_record(completion_record));
+                break;
+            case operation::copy_crc:
+                kernels::copy_crc(copy_crc_descriptor(dsc), crc_completion_record(completion_record));
+                break;
+            case operation::dif_check:
+                kernels::dif_check(dif_check_descriptor(dsc), dif_check_completion_record(completion_record));
+                break;
+            case operation::dif_insert:
+                kernels::dif_insert(dif_insert_descriptor(dsc), dif_insert_completion_record(completion_record));
+                break;
+            case operation::dif_strip:
+                kernels::dif_strip(dif_strip_descriptor(dsc), dif_strip_completion_record(completion_record));
+                break;
+            case operation::dif_update:
+                kernels::dif_update(dif_update_descriptor(dsc), dif_update_completion_record(completion_record));
+                break;
+            case operation::cache_flush:
+                kernels::cache_flush(cache_flush_descriptor(dsc), cache_flush_completion_record(completion_record));
+                break;
+            default:
+                return dml::detail::submission_status::failure;
+        }
+
+        return dml::detail::submission_status::success;
+    }
+}  // namespace dml::core
diff --git a/sources/core/src/sw_dispatcher/CMakeLists.txt b/sources/core/src/sw_dispatcher/CMakeLists.txt
new file mode 100644
index 0000000..5e0f8de
--- /dev/null
+++ b/sources/core/src/sw_dispatcher/CMakeLists.txt
@@ -0,0 +1,48 @@
+#
+# Copyright 2021 Intel Corporation.
+#
+# This software and the related documents are Intel copyrighted materials,
+# and your use of them is governed by the express license under which they
+# were provided to you ("License"). Unless the License provides otherwise,
+# you may not use, modify, copy, publish, distribute, disclose or transmit
+# this software or the related documents without Intel's prior written
+# permission.
+#
+# This software and the related documents are provided as is, with no
+# express or implied warranties, other than those that are expressly
+# stated in the License.
+#
+
+add_library(dml_sw_dispatcher OBJECT
+        dml_cpuid.h
+        dml_kernels.h
+        optimization_dispatcher.hpp
+
+        optimization_dispatcher.cpp
+        dml_cpuid.c
+        )
+
+target_include_directories(dml_sw_dispatcher
+        PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}
+        )
+target_compile_features(dml_sw_dispatcher
+        PUBLIC cxx_std_17
+        PUBLIC c_std_11
+        )
+target_sources(dml_sw_dispatcher
+        PUBLIC $<TARGET_OBJECTS:dml_kernels_ref>
+        PUBLIC $<TARGET_OBJECTS:dml_kernels_avx512>
+        PUBLIC $<TARGET_OBJECTS:dml_kernels_cache_flush>
+        )
+target_compile_options(dml_sw_dispatcher
+        PRIVATE ${DML_QUALITY_OPTIONS}
+        PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${DML_CPP_PRIVATE_OPTIONS}>
+        )
+
+if (DML_ARCH STREQUAL avx512)
+    target_compile_definitions(dml_sw_dispatcher PRIVATE DML_AVX512)
+endif ()
+
+add_subdirectory(ref)
+add_subdirectory(avx512)
+add_subdirectory(cache_flush)
diff --git a/sources/core/src/sw_dispatcher/avx512/CMakeLists.txt b/sources/core/src/sw_dispatcher/avx512/CMakeLists.txt
new file mode 100644
index 0000000..2739e92
--- /dev/null
+++ b/sources/core/src/sw_dispatcher/avx512/CMakeLists.txt
@@ -0,0 +1,34 @@
+#
+# Copyright 2021 Intel Corporation.
+#
+# This software and the related documents are Intel copyrighted materials,
+# and your use of them is governed by the express license under which they
+# were provided to you ("License"). Unless the License provides otherwise,
+# you may not use, modify, copy, publish, distribute, disclose or transmit
+# this software or the related documents without Intel's prior written
+# permission.
+#
+# This software and the related documents are provided as is, with no
+# express or implied warranties, other than those that are expressly
+# stated in the License.
+#
+
+add_library(dml_kernels_avx512 OBJECT
+        mem_move.c
+        fill.c
+        compare.c
+        compare_pattern.c
+        crc.c
+        )
+
+target_compile_features(dml_kernels_avx512 PRIVATE c_std_11)
+
+target_compile_options(dml_kernels_avx512 PRIVATE ${DML_QUALITY_OPTIONS})
+
+if (CMAKE_C_COMPILER_ID MATCHES GNU)
+    target_compile_options(dml_kernels_avx512 PRIVATE -march=skylake-avx512)
+endif ()
+
+if (CMAKE_C_COMPILER_ID MATCHES MSVC)
+    target_compile_options(dml_kernels_avx512 PRIVATE /arch:AVX512)
+endif ()
diff --git a/sources/core/src/sw_dispatcher/avx512/compare.c b/sources/core/src/sw_dispatcher/avx512/compare.c
new file mode 100644
index 0000000..70fe8d0
--- /dev/null
+++ b/sources/core/src/sw_dispatcher/avx512/compare.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include "../dml_kernels.h"
+
+#if defined(_MSC_BUILD)
+#include <intrin.h>
+#elif defined(__GNUC__)
+#include <x86intrin.h>
+#else
+#error "Unsupported compiler"
+#endif
+
+uint32_t dml_avx512_compare(const uint8_t* src1, const uint8_t* src2, uint32_t transfer_size, uint8_t* result)
+{
+    const uint8_t equal     = 0x0;
+    const uint8_t not_equal = 0x1;
+
+    uint32_t  i;
+    __mmask64 msk64 = (__mmask64)0;
+    for (i = 0u; (i + 64) <= transfer_size; i += 64)
+    {
+        msk64 =
+            _mm512_cmp_epi8_mask(_mm512_loadu_si512((void const*)(src1 + i)), _mm512_loadu_si512((void const*)(src2 + i)), _MM_CMPINT_NE);
+        if (msk64)
+        {
+            *result = not_equal;
+            return i + (uint32_t)_tzcnt_u64((uint64_t)msk64);
+        }
+    }
+    {
+        uint64_t tail = transfer_size & 63;
+        msk64         = ((uint64_t)1 << tail) - (uint64_t)1;
+        msk64         = _mm512_cmp_epi8_mask(_mm512_maskz_loadu_epi8(msk64, (void const*)(src1 + i)),
+                                     _mm512_maskz_loadu_epi8(msk64, (void const*)(src2 + i)),
+                                     _MM_CMPINT_NE);
+        if (msk64)
+        {
+            *result = not_equal;
+            return i + (uint32_t)_tzcnt_u64((uint64_t)msk64);
+        }
+    }
+
+    *result = equal;
+    return 0;
+}
\ No newline at end of file
diff --git a/sources/core/src/sw_dispatcher/avx512/compare_pattern.c b/sources/core/src/sw_dispatcher/avx512/compare_pattern.c
new file mode 100644
index 0000000..14c9bc5
--- /dev/null
+++ b/sources/core/src/sw_dispatcher/avx512/compare_pattern.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include "../dml_kernels.h"
+
+#if defined(_MSC_BUILD)
+#include <intrin.h>
+#elif defined(__GNUC__)
+#include <x86intrin.h>
+#else
+#error "Unsupported compiler"
+#endif
+
+uint32_t dml_avx512_compare_pattern(uint64_t pattern, const uint8_t* src, uint32_t transfer_size, uint8_t* result)
+{
+    const uint8_t equal     = 0x0;
+    const uint8_t not_equal = 0x1;
+
+    const uint32_t        pattern_chunk_count = transfer_size >> 3;
+    const uint64_t        tail_bytes_count    = transfer_size & 7;
+    const uint64_t* const pattern_region_ptr  = (uint64_t*)src;
+
+    __m512i  x_pattern = _mm512_set1_epi64(pattern);
+    uint32_t i;
+
+    for (i = 0u; (i + 8) <= pattern_chunk_count; i += 8)
+    {
+        __mmask8 msk8 = _mm512_cmp_epi64_mask(_mm512_loadu_si512((void const*)(pattern_region_ptr + i)), x_pattern, _MM_CMPINT_NE);
+        if (msk8)
+        {
+            *result = not_equal;
+            return (i + (uint32_t)_tzcnt_u32((uint32_t)msk8)) << 3u;
+        }
+    }
+    {
+        uint64_t tail = pattern_chunk_count & 7;
+        if (tail)
+        {
+            __mmask8 msk8 = (__mmask8)((1 << tail) - 1);
+            msk8          = _mm512_mask_cmp_epi64_mask(msk8,
+                                              _mm512_maskz_loadu_epi64(msk8, (void const*)(pattern_region_ptr + i)),
+                                              x_pattern,
+                                              _MM_CMPINT_NE);
+            if (msk8)
+            {
+                *result = not_equal;
+                return (i + (uint32_t)_tzcnt_u32((uint32_t)msk8)) << 3u;
+            }
+        }
+    }
+    if (tail_bytes_count)
+    {
+        src += transfer_size - tail_bytes_count;
+        uint64_t byte_pattern = pattern;
+        // Compare tail
+        for (i = 0; i < tail_bytes_count; i++)
+        {
+            if (src[i] != (uint8_t)byte_pattern)
+            {
+                *result = not_equal;
+                return (pattern_chunk_count << 3) + i;
+            }
+            const size_t byte_width = 8;
+            byte_pattern >>= byte_width;
+        }
+    }
+
+    *result = equal;
+    return 0;
+}
diff --git a/sources/core/src/sw_dispatcher/avx512/crc.c b/sources/core/src/sw_dispatcher/avx512/crc.c
new file mode 100644
index 0000000..f9ac9b3
--- /dev/null
+++ b/sources/core/src/sw_dispatcher/avx512/crc.c
@@ -0,0 +1,628 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include "../dml_kernels.h"
+
+#if defined(_MSC_BUILD)
+#include <intrin.h>
+#elif defined(__GNUC__)
+#include <x86intrin.h>
+#else
+#error "Unsupported compiler"
+#endif
+
+#define DML_DISABLE_OPTIMIZATION_
+
+/**
+*  @todo
+*/
+static inline uint32_t getCRCSize(uint64_t poly)
+{
+    uint32_t crcSize;
+    crcSize = 63 - (uint32_t)_lzcnt_u64(poly);
+    return crcSize;
+}
+
+/**
+*  @todo
+*/
+
+static void own_CRC_8u_opt_k0(const uint8_t* src_ptr, uint32_t init_crc, int len0, int crc_size, uint32_t* koeff_ptr, uint32_t* crc_ptr);
+
+/**
+*  @todo
+*/
+static void own_CRC_8u_k0(const uint8_t* src_ptr, uint32_t len, uint64_t poly, const uint8_t optPoly[128], uint32_t init, uint32_t* crc_ptr)
+{
+    int crc_size = getCRCSize(poly);
+    own_CRC_8u_opt_k0(src_ptr, init, len, crc_size, (uint32_t*)optPoly, crc_ptr);
+}
+
+/**
+*  @todo
+*/
+static void poly1x64_32_div(uint64_t poly, uint64_t* quit_ptr, uint32_t* tail_ptr)
+{
+    int      i, j;
+    uint64_t tail = 0;
+    uint64_t quot = 0;
+    uint8_t  byte = 0x01;
+    for (i = 0; i < 9; i++)
+    {
+        uint8_t  bit;
+        uint64_t hbit;
+        for (j = 0; j < 8; j++)
+        {
+            bit = (byte & 0x80) >> 7;
+            byte <<= 1;
+            hbit = tail & 0x80000000;
+            tail <<= 1;
+            tail |= bit;
+            quot <<= 1;
+            if (hbit)
+            {
+                tail = tail ^ poly;
+                quot |= 1;
+            }
+            tail = tail & 0xffffffff;
+        }
+    }
+    *quit_ptr = quot;
+    *tail_ptr = (uint32_t)tail;
+    return;
+}
+
+/**
+*  @todo
+*/
+static inline void own_gen_crc_opt_poly_8u(uint64_t poly, uint8_t optPoly[128])
+{
+    uint64_t u;
+    uint32_t i, k, crc_size;
+    uint32_t t;
+
+    crc_size        = getCRCSize(poly);
+    uint32_t* opt   = (uint32_t*)optPoly;
+    *(uint64_t*)opt = poly; /*copy poly*/
+    uint64_t poly32 = poly << (32 - crc_size);
+    poly1x64_32_div(poly32, &u, &t); /*for 1^64 and U*/
+    *(uint64_t*)(opt + 2) = u;
+    int      bits[]       = { 64, 96, 160, 224, 288, 352, 416, 480, 544, 608, 672, 736, 800, 864, 928, 992, 1056, 2016, 2080 };
+    uint32_t tail         = 0;
+    uint32_t poly_32      = (uint32_t)poly;
+    int      j;
+
+    k    = bits[0] + 8;
+    tail = poly_32;
+    for (j = 40; (uint32_t)j < k; j++)
+    {
+        uint32_t mask;
+        mask = (tail & 0x80000000) ? poly_32 : 0;
+        tail += tail;
+        tail ^= mask;
+    }
+    opt[4 + 0] = (uint32_t)tail;
+
+    for (i = 1; i < ((sizeof(bits) / sizeof(bits[0])) - 2); i++)
+    {
+        k = bits[i] + 8;
+        for (; (uint32_t)j < k; j++)
+        {
+            uint32_t mask;
+            mask = (tail & 0x80000000) ? poly_32 : 0;
+            tail += tail;
+            tail ^= mask;
+        }
+        opt[4 + i] = (uint32_t)tail;
+    }
+}
+
+static inline void dmlc_own_calculate_crc_32u(const uint8_t* const memory_region_ptr,
+                                              uint32_t             bytes_to_hash,
+                                              uint32_t* const      crc_ptr,
+                                              uint32_t             polynomial)
+{
+    uint64_t poly = (uint64_t)polynomial | ((uint64_t)1u << (uint64_t)32u);
+    uint8_t  opt_poly_ptr[128];
+
+    own_gen_crc_opt_poly_8u(poly, opt_poly_ptr);
+    own_CRC_8u_k0(memory_region_ptr, bytes_to_hash, poly, opt_poly_ptr, *crc_ptr, crc_ptr);
+}
+
+#if defined(_MSC_VER)
+#pragma optimize("", off)
+#pragma optimize("O3", on)
+#endif
+
+/**
+*  @todo
+*/
+#define _MM_XOR_PS(A, B) _mm_castps_si128(_mm_xor_ps(_mm_castsi128_ps(A), _mm_castsi128_ps(B)))
+/**
+*  @todo
+*/
+#define arg1_low32 ecx
+/**
+*  @todo
+*/
+static void own_CRC_8u_opt_k0(const uint8_t* src_ptr, uint32_t init_crc, int len0, int crc_size, uint32_t* koeff, uint32_t* crc_ptr)
+{
+    uint64_t pshufb_shf_table[] = { 0x8786858483828100, 0x8f8e8d8c8b8a8988, 0x0706050403020100, 0x000e0d0c0b0a0908 };
+
+    int      len = len0;
+    uint8_t  ttt[128];
+    uint8_t* r11 = ttt;
+    uint8_t* ptr;
+
+    __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm12, xmm13;
+#ifndef DML_DISABLE_OPTIMIZATION_
+    __m128i xmm11;
+#endif  // DML_DISABLE_OPTIMIZATION_
+
+    int     eax, ecx, r9;
+    __m128i ENDIA_SHUF_MASK = _mm_set_epi8(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
+    __m128i mask2           = _mm_set_epi64x(0x00000000FFFFFFFF, 0xFFFFFFFFFFFFFFFF);
+    __m128i mask1           = _mm_set_epi64x(0x8080808080808080, 0x8080808080808080);
+
+    uint64_t q = *(uint64_t*)(koeff + 0);
+    q <<= (32 - crc_size);
+    uint64_t u      = *(uint64_t*)(koeff + 2);
+    uint64_t k_64   = ((uint64_t)koeff[4]) << 32;
+    uint64_t k_96   = ((uint64_t)koeff[5]) << 32;
+    uint64_t k_160  = ((uint64_t)koeff[6]) << 32;
+    uint64_t k_224  = ((uint64_t)koeff[7]) << 32;
+    uint64_t k_288  = ((uint64_t)koeff[8]) << 32;
+    uint64_t k_352  = ((uint64_t)koeff[9]) << 32;
+    uint64_t k_416  = ((uint64_t)koeff[10]) << 32;
+    uint64_t k_480  = ((uint64_t)koeff[11]) << 32;
+    uint64_t k_544  = ((uint64_t)koeff[12]) << 32;
+    uint64_t k_608  = ((uint64_t)koeff[13]) << 32;
+    uint64_t k_672  = ((uint64_t)koeff[14]) << 32;
+    uint64_t k_736  = ((uint64_t)koeff[15]) << 32;
+    uint64_t k_800  = ((uint64_t)koeff[16]) << 32;
+    uint64_t k_864  = ((uint64_t)koeff[17]) << 32;
+    uint64_t k_928  = ((uint64_t)koeff[18]) << 32;
+    uint64_t k_992  = ((uint64_t)koeff[19]) << 32;
+    uint64_t k_1056 = ((uint64_t)koeff[20]) << 32;
+
+    ecx = init_crc;
+    //crc16_t10dif_01:
+    ecx = ecx << (32 - crc_size);
+#ifndef DML_DISABLE_OPTIMIZATION_
+    if (len < 256)
+    {
+        goto _less_than_256;
+    }
+#endif  // DML_DISABLE_OPTIMIZATION_
+    //; load the initial crc value
+    xmm10 = _mm_cvtsi32_si128(arg1_low32);  //movd	xmm10, arg1_low32; initial crc
+    //; crc value does not need to be byte - reflected, but it needs to be moved to the high part of the register.
+    //; because data will be byte - reflected and will align with initial crc at correct place.
+    xmm10 = _mm_slli_si128(xmm10, 12);
+    //; receive the initial 128B data, xor the initial crc value
+    xmm0 = _mm_loadu_si128((const __m128i*)(src_ptr + 16 * 0));
+    xmm1 = _mm_loadu_si128((const __m128i*)(src_ptr + 16 * 1));
+    xmm2 = _mm_loadu_si128((const __m128i*)(src_ptr + 16 * 2));
+    xmm3 = _mm_loadu_si128((const __m128i*)(src_ptr + 16 * 3));
+    xmm4 = _mm_loadu_si128((const __m128i*)(src_ptr + 16 * 4));
+    xmm5 = _mm_loadu_si128((const __m128i*)(src_ptr + 16 * 5));
+    xmm6 = _mm_loadu_si128((const __m128i*)(src_ptr + 16 * 6));
+    xmm7 = _mm_loadu_si128((const __m128i*)(src_ptr + 16 * 7));
+    xmm0 = _mm_shuffle_epi8(xmm0, ENDIA_SHUF_MASK);
+    //; XOR the initial_crc value
+    xmm0  = _mm_xor_si128(xmm0, xmm10);
+    xmm1  = _mm_shuffle_epi8(xmm1, ENDIA_SHUF_MASK);
+    xmm2  = _mm_shuffle_epi8(xmm2, ENDIA_SHUF_MASK);
+    xmm3  = _mm_shuffle_epi8(xmm3, ENDIA_SHUF_MASK);
+    xmm4  = _mm_shuffle_epi8(xmm4, ENDIA_SHUF_MASK);
+    xmm5  = _mm_shuffle_epi8(xmm5, ENDIA_SHUF_MASK);
+    xmm6  = _mm_shuffle_epi8(xmm6, ENDIA_SHUF_MASK);
+    xmm7  = _mm_shuffle_epi8(xmm7, ENDIA_SHUF_MASK);
+    xmm10 = _mm_set_epi64x(k_1056 /*rk4*/, k_992 /*rk3*/);
+    //; imm value of pclmulqdq instruction will determine which constant to use
+    //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+    //; we subtract 256 instead of 128 to save one instruction from the loop
+    len -= 256;
+//
+//; at this section of the code, there is 128 * x + y(0 <= y < 128) bytes of buffer.The _fold_128_B_loop
+//; loop will fold 128B at a time until we have 128 + y Bytes of buffer
+//
+//
+//; fold 128B at a time.This section of the code folds 8 xmm registers in parallel
+_fold_128_B_loop:
+    //
+    //; update the buffer pointer
+    src_ptr += 128;
+    xmm9  = _mm_loadu_si128((const __m128i*)(src_ptr + 16 * 0));
+    xmm12 = _mm_loadu_si128((const __m128i*)(src_ptr + 16 * 1));
+    xmm9  = _mm_shuffle_epi8(xmm9, ENDIA_SHUF_MASK);
+    xmm12 = _mm_shuffle_epi8(xmm12, ENDIA_SHUF_MASK);
+    xmm8  = xmm0;
+    xmm13 = xmm1;
+    xmm0  = _mm_clmulepi64_si128(xmm0, xmm10, 0x0);
+    xmm8  = _mm_clmulepi64_si128(xmm8, xmm10, 0x11);
+    xmm1  = _mm_clmulepi64_si128(xmm1, xmm10, 0x0);
+    xmm13 = _mm_clmulepi64_si128(xmm13, xmm10, 0x11);
+    xmm0  = _mm_xor_si128(xmm0, xmm9);
+    xmm0  = _MM_XOR_PS(xmm0, xmm8);
+    xmm1  = _mm_xor_si128(xmm1, xmm12);
+    xmm1  = _MM_XOR_PS(xmm1, xmm13);
+    xmm9  = _mm_loadu_si128((const __m128i*)(src_ptr + 16 * 2));
+    xmm12 = _mm_loadu_si128((const __m128i*)(src_ptr + 16 * 3));
+    xmm9  = _mm_shuffle_epi8(xmm9, ENDIA_SHUF_MASK);
+    xmm12 = _mm_shuffle_epi8(xmm12, ENDIA_SHUF_MASK);
+    xmm8  = xmm2;
+    xmm13 = xmm3;
+    xmm2  = _mm_clmulepi64_si128(xmm2, xmm10, 0x0);
+    xmm8  = _mm_clmulepi64_si128(xmm8, xmm10, 0x11);
+    xmm3  = _mm_clmulepi64_si128(xmm3, xmm10, 0x0);
+    xmm13 = _mm_clmulepi64_si128(xmm13, xmm10, 0x11);
+    xmm2  = _mm_xor_si128(xmm2, xmm9);
+    xmm2  = _MM_XOR_PS(xmm2, xmm8);
+    xmm3  = _mm_xor_si128(xmm3, xmm12);
+    xmm3  = _MM_XOR_PS(xmm3, xmm13);
+
+    xmm9  = _mm_loadu_si128((const __m128i*)(src_ptr + 16 * 4));
+    xmm12 = _mm_loadu_si128((const __m128i*)(src_ptr + 16 * 5));
+    xmm9  = _mm_shuffle_epi8(xmm9, ENDIA_SHUF_MASK);
+    xmm12 = _mm_shuffle_epi8(xmm12, ENDIA_SHUF_MASK);
+    xmm8  = xmm4;
+    xmm13 = xmm5;
+    xmm4  = _mm_clmulepi64_si128(xmm4, xmm10, 0x0);
+    xmm8  = _mm_clmulepi64_si128(xmm8, xmm10, 0x11);
+    xmm5  = _mm_clmulepi64_si128(xmm5, xmm10, 0x0);
+    xmm13 = _mm_clmulepi64_si128(xmm13, xmm10, 0x11);
+    xmm4  = _mm_xor_si128(xmm4, xmm9);
+    xmm4  = _MM_XOR_PS(xmm4, xmm8);
+    xmm5  = _mm_xor_si128(xmm5, xmm12);
+    xmm5  = _MM_XOR_PS(xmm5, xmm13);
+
+    xmm9  = _mm_loadu_si128((const __m128i*)(src_ptr + 16 * 6));
+    xmm12 = _mm_loadu_si128((const __m128i*)(src_ptr + 16 * 7));
+    xmm9  = _mm_shuffle_epi8(xmm9, ENDIA_SHUF_MASK);
+    xmm12 = _mm_shuffle_epi8(xmm12, ENDIA_SHUF_MASK);
+    xmm8  = xmm6;
+    xmm13 = xmm7;
+    xmm6  = _mm_clmulepi64_si128(xmm6, xmm10, 0x0);
+    xmm8  = _mm_clmulepi64_si128(xmm8, xmm10, 0x11);
+    xmm7  = _mm_clmulepi64_si128(xmm7, xmm10, 0x0);
+    xmm13 = _mm_clmulepi64_si128(xmm13, xmm10, 0x11);
+    xmm6  = _mm_xor_si128(xmm6, xmm9);
+    xmm6  = _MM_XOR_PS(xmm6, xmm8);
+    xmm7  = _mm_xor_si128(xmm7, xmm12);
+    xmm7  = _MM_XOR_PS(xmm7, xmm13);
+
+    len -= 128;
+    //; check if there is another 128B in the buffer to be able to fold
+    if (len >= 0)
+        goto _fold_128_B_loop;  //jge	_fold_128_B_loop
+    //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+    src_ptr += 128;
+    //; at this point, the buffer pointer is pointing at the last y Bytes of the buffer
+    //; fold the 8 xmm registers to 1 xmm register with different constants
+    //
+    xmm10 = _mm_set_epi64x(k_928 /*rk10*/, k_864 /*rk9*/);
+    xmm8  = xmm0;
+    xmm0  = _mm_clmulepi64_si128(xmm0, xmm10, 0x11);
+    xmm8  = _mm_clmulepi64_si128(xmm8, xmm10, 0x0);
+    xmm7  = _mm_xor_si128(xmm7, xmm8);
+    xmm7  = _MM_XOR_PS(xmm7, xmm0);
+
+    xmm10 = _mm_set_epi64x(k_800 /*rk12*/, k_736 /*rk11*/);
+    xmm8  = xmm1;
+    xmm1  = _mm_clmulepi64_si128(xmm1, xmm10, 0x11);
+    xmm8  = _mm_clmulepi64_si128(xmm8, xmm10, 0x0);
+    xmm7  = _mm_xor_si128(xmm7, xmm8);
+    xmm7  = _MM_XOR_PS(xmm7, xmm1);
+    xmm10 = _mm_set_epi64x(k_672 /*rk14*/, k_608 /*rk13*/);
+    xmm8  = xmm2;
+    xmm2  = _mm_clmulepi64_si128(xmm2, xmm10, 0x11);
+    xmm8  = _mm_clmulepi64_si128(xmm8, xmm10, 0x0);
+    xmm7  = _mm_xor_si128(xmm7, xmm8);
+    xmm7  = _mm_xor_si128(xmm7, xmm2);
+    xmm10 = _mm_set_epi64x(k_544 /*rk16*/, k_480 /*rk15*/);
+    xmm8  = xmm3;
+    xmm3  = _mm_clmulepi64_si128(xmm3, xmm10, 0x11);
+    xmm8  = _mm_clmulepi64_si128(xmm8, xmm10, 0x0);
+    xmm7  = _mm_xor_si128(xmm7, xmm8);
+    xmm7  = _MM_XOR_PS(xmm7, xmm3);
+    xmm10 = _mm_set_epi64x(k_416 /*rk18*/, k_352 /*rk17*/);
+    xmm8  = xmm4;
+    xmm4  = _mm_clmulepi64_si128(xmm4, xmm10, 0x11);
+    xmm8  = _mm_clmulepi64_si128(xmm8, xmm10, 0x0);
+    xmm7  = _mm_xor_si128(xmm7, xmm8);
+    xmm7  = _mm_xor_si128(xmm7, xmm4);
+    xmm10 = _mm_set_epi64x(k_288 /*rk20*/, k_224 /*rk19*/);
+    xmm8  = xmm5;
+    xmm5  = _mm_clmulepi64_si128(xmm5, xmm10, 0x11);
+    xmm8  = _mm_clmulepi64_si128(xmm8, xmm10, 0x0);
+    xmm7  = _mm_xor_si128(xmm7, xmm8);
+    xmm7  = _MM_XOR_PS(xmm7, xmm5);
+    xmm10 = _mm_set_epi64x(k_160 /*rk2*/, k_96 /*rk1*/);
+    //        ; imm value of pclmulqdq instruction will determine which constant to use
+    xmm8 = xmm6;
+    xmm6 = _mm_clmulepi64_si128(xmm6, xmm10, 0x11);
+    xmm8 = _mm_clmulepi64_si128(xmm8, xmm10, 0x0);
+    xmm7 = _mm_xor_si128(xmm7, xmm8);
+    xmm7 = _mm_xor_si128(xmm7, xmm6);
+    //
+    //        ; instead of 128, we add 112 to the loop counter to save 1 instruction from the loop
+    //        ; instead of a cmp instruction, we use the negative flag with the jl instruction
+    len += (128 - 16);
+    if (len < 0)
+        goto _final_reduction_for_128;  //        jl	_final_reduction_for_128
+//        ; now we have 16 + y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory
+//        ; we can fold 16 bytes at a time if y >= 16
+//        ; continue folding 16B at a time
+_16B_reduction_loop:
+    xmm8 = xmm7;
+    xmm7 = _mm_clmulepi64_si128(xmm7, xmm10, 0x11);
+    xmm8 = _mm_clmulepi64_si128(xmm8, xmm10, 0x0);
+    xmm7 = _mm_xor_si128(xmm7, xmm8);
+    xmm0 = _mm_loadu_si128((const __m128i*)src_ptr);
+    xmm0 = _mm_shuffle_epi8(xmm0, ENDIA_SHUF_MASK);
+    xmm7 = _mm_xor_si128(xmm7, xmm0);
+    src_ptr += 16;
+    len -= 16;
+    //        ; instead of a cmp instruction, we utilize the flags with the jge instruction
+    //        ; equivalent of : cmp arg3, 16 - 16
+    //        ; check if there is any more 16B in the buffer to be able to fold
+
+    if (len >= 0)
+        goto _16B_reduction_loop;  //        jge	_16B_reduction_loop
+//        ; now we have 16 + z bytes left to reduce, where 0 <= z < 16.
+//        ; first, we reduce the data in the xmm7 register
+_final_reduction_for_128:
+    //    ; check if any more data to fold.If not, compute the CRC of the final 128 bits
+    len += 16;
+    if (len == 0)
+        goto _128_done;  //        je	_128_done
+                         //        ; here we are getting data that is less than 16 bytes.
+        //        ; since we know that there was data before the pointer, we can offset the input pointer before the actual point, to receive exactly 16 bytes.
+        //        ; after that the registers need to be adjusted.
+#ifndef DML_DISABLE_OPTIMIZATION_
+_get_last_two_xmms:
+#endif  // DML_DISABLE_OPTIMIZATION_
+    xmm2 = xmm7;
+    xmm1 = _mm_loadu_si128((const __m128i*)(src_ptr - 16 + len));
+    xmm1 = _mm_shuffle_epi8(xmm1, ENDIA_SHUF_MASK);
+    //        ; get rid of the extra data that was loaded before
+    //        ; load the shift constant
+    //        lea	rax, [pshufb_shf_table + 16]
+    //        sub	rax, arg3
+    ptr  = (uint8_t*)pshufb_shf_table + 16 - len;
+    xmm0 = _mm_loadu_si128((const __m128i*)ptr);
+    //
+    //        ; shift xmm2 to the left by arg3 bytes
+    xmm2 = _mm_shuffle_epi8(xmm2, xmm0);
+    xmm0 = _mm_xor_si128(xmm0, mask1);
+    xmm7 = _mm_shuffle_epi8(xmm7, xmm0);
+    xmm1 = _mm_blendv_epi8(xmm1, xmm2, xmm0);
+    //        ; fold 16 Bytes
+    xmm2 = xmm1;
+    xmm8 = xmm7;
+    xmm7 = _mm_clmulepi64_si128(xmm7, xmm10, 0x11);
+    xmm8 = _mm_clmulepi64_si128(xmm8, xmm10, 0x0);
+    xmm7 = _mm_xor_si128(xmm7, xmm8);
+    xmm7 = _mm_xor_si128(xmm7, xmm2);
+_128_done:
+    //    ; compute crc of a 128 - bit value
+    xmm10 = _mm_set_epi64x(k_64 /*rk6*/, k_96 /*rk5*/);
+    xmm0  = xmm7;
+    //        ; 64b fold
+    xmm7 = _mm_clmulepi64_si128(xmm7, xmm10, 0x1);
+    xmm0 = _mm_slli_si128(xmm0, 8);
+    xmm7 = _mm_xor_si128(xmm7, xmm0);
+    //        ; 32b fold
+    xmm0 = xmm7;
+    xmm0 = _mm_and_si128(xmm0, mask2);
+    xmm7 = _mm_srli_si128(xmm7, 12);
+    xmm7 = _mm_clmulepi64_si128(xmm7, xmm10, 0x10);
+    xmm7 = _mm_xor_si128(xmm7, xmm0);
+//        ; barrett reduction
+_barrett:
+    xmm10 = _mm_set_epi64x(q /*rk8*/, u /*rk7*/);
+    xmm0  = xmm7;
+    xmm7  = _mm_clmulepi64_si128(xmm7, xmm10, 0x01);
+    xmm7  = _mm_slli_si128(xmm7, 4);
+    xmm7  = _mm_clmulepi64_si128(xmm7, xmm10, 0x11);
+
+    xmm7 = _mm_slli_si128(xmm7, 4);
+    xmm7 = _mm_xor_si128(xmm7, xmm0);
+    eax  = _mm_extract_epi32(xmm7, 1);
+#ifndef DML_DISABLE_OPTIMIZATION_
+_cleanup:
+#endif  // DML_DISABLE_OPTIMIZATION_
+    //    ; scale the result back to 16 bits
+    eax      = ((uint32_t)eax) >> (32 - crc_size);
+    *crc_ptr = eax;
+    return;  //        ret
+
+    //align 16
+#ifndef DML_DISABLE_OPTIMIZATION_
+_less_than_256:
+    //
+    //; check if there is enough buffer to be able to fold 16B at a time
+    //cmp	arg3, 32
+    //jl	_less_than_32
+    if (len < 32)
+    {
+        goto _less_than_32;
+    }
+    xmm11 = ENDIA_SHUF_MASK;
+    //; if there is, load the constants
+    xmm10 = _mm_set_epi64x(k_160 /*rk2*/, k_96 /*rk1*/);
+    xmm0  = _mm_cvtsi32_si128(arg1_low32);
+    xmm0  = _mm_slli_si128(xmm0, 12);
+    xmm7  = _mm_loadu_si128((const __m128i*)src_ptr);
+    xmm7  = _mm_shuffle_epi8(xmm7, ENDIA_SHUF_MASK);
+    xmm7  = _mm_xor_si128(xmm7, xmm0);
+    //
+    //; update the buffer pointer
+    src_ptr += 16;
+    //
+    //; update the counter.subtract 32 instead of 16 to save one instruction from the loop
+    len -= 32;
+    goto _16B_reduction_loop;  //jmp	_16B_reduction_loop
+
+//align 16
+_less_than_32:
+    //; mov initial crc to the return value. this is necessary for zero - length buffers.
+    eax = arg1_low32;  //mov	eax, arg1_low32
+    //test	arg3, arg3
+    if (len == 0)
+        goto _cleanup;  //je	_cleanup
+    //
+    xmm11 = ENDIA_SHUF_MASK;
+
+    xmm0 = _mm_cvtsi32_si128(arg1_low32);
+    xmm0 = _mm_slli_si128(xmm0, 12);
+    //cmp	arg3, 16
+    if (len == 16)
+        goto _exact_16_left;  //je	_exact_16_left
+    if (len < 16)
+        goto _less_than_16_left;  //jl	_less_than_16_left
+
+    xmm7 = _mm_loadu_si128((const __m128i*)src_ptr);
+    xmm7 = _mm_shuffle_epi8(xmm7, ENDIA_SHUF_MASK);
+    xmm7 = _mm_xor_si128(xmm7, xmm0);
+    src_ptr += 16;
+    len -= 16;
+    xmm10 = _mm_set_epi64x(k_160 /*rk2*/, k_96 /*rk1*/);
+    goto _get_last_two_xmms;  //jmp	_get_last_two_xmms
+//align 16
+_less_than_16_left:
+#endif  // DML_DISABLE_OPTIMIZATION_
+    //; use stack space to load data less than 16 bytes, zero - out the 16B in memory first.
+    //
+    xmm1 = _mm_setzero_si128();
+
+    _mm_storeu_si128((__m128i*)r11, xmm1);
+    //cmp	arg3, 4
+    if (len < 4)
+        goto _only_less_than_4;  //jl	_only_less_than_4
+    //;	backup the counter value
+    r9 = len;
+    //cmp	arg3, 8
+    if (len < 8)
+        goto _less_than_8_left;  //jl	_less_than_8_left
+    //; load 8 Bytes
+    *(int64_t*)r11 = *(int64_t*)src_ptr;
+    r11 += 8;
+    len -= 8;
+    src_ptr += 8;
+_less_than_8_left:
+    //cmp	arg3, 4
+    if (len < 4)
+        goto _less_than_4_left;  //jl	_less_than_4_left
+    //; load 4 Bytes
+    *(int*)r11 = *(int*)src_ptr;
+    r11 += 4;
+    len -= 4;
+    src_ptr += 4;  //add	arg2, 4
+_less_than_4_left:
+    //
+    //cmp	arg3, 2
+    if (len < 2)
+        goto _less_than_2_left;  //jl	_less_than_2_left
+    //
+    //; load 2 Bytes
+    *(short*)r11 = *(short*)src_ptr;
+    r11 += 2;
+    len -= 2;
+    src_ptr += 2;
+_less_than_2_left:
+    //cmp     arg3, 1
+    if (len < 1)
+        goto _zero_left;  //jl      _zero_left
+    //; load 1 Byte
+    *r11 = *src_ptr;
+_zero_left:
+    xmm7 = _mm_loadu_si128((const __m128i*)ttt);
+    xmm7 = _mm_shuffle_epi8(xmm7, ENDIA_SHUF_MASK);
+    xmm7 = _mm_xor_si128(xmm7, xmm0);
+
+    ptr  = (uint8_t*)pshufb_shf_table + 16 - r9;
+    xmm0 = _mm_loadu_si128((const __m128i*)ptr);
+    xmm0 = _mm_xor_si128(xmm0, mask1);
+    //
+    xmm7 = _mm_shuffle_epi8(xmm7, xmm0);
+    goto _128_done;  //jmp	_128_done
+    //align 16
+#ifndef DML_DISABLE_OPTIMIZATION_
+_exact_16_left:
+#endif  // DML_DISABLE_OPTIMIZATION_
+    xmm7 = _mm_loadu_si128((const __m128i*)src_ptr);
+    xmm7 = _mm_shuffle_epi8(xmm7, ENDIA_SHUF_MASK);
+    xmm7 = _mm_xor_si128(xmm7, xmm0);
+
+    goto _128_done;  //jmp	_128_done
+_only_less_than_4:
+    //cmp	arg3, 3
+    if (len < 3)
+        goto _only_less_than_3;  //jl	_only_less_than_3
+    //; load 3 Bytes
+    r11[0] = src_ptr[0];
+
+    r11[1] = src_ptr[1];
+
+    r11[2] = src_ptr[2];
+    xmm7   = _mm_loadu_si128((const __m128i*)r11);
+    xmm7   = _mm_shuffle_epi8(xmm7, ENDIA_SHUF_MASK);
+    xmm7   = _mm_xor_si128(xmm7, xmm0);
+    xmm7   = _mm_srli_si128(xmm7, 5);
+    goto _barrett;  //jmp	_barrett
+_only_less_than_3:
+    //cmp	arg3, 2
+    if (len < 2)
+        goto _only_less_than_2;  //jl	_only_less_than_2
+    //; load 2 Bytes
+    r11[0] = src_ptr[0];
+
+    r11[1] = src_ptr[1];
+    xmm7   = _mm_loadu_si128((const __m128i*)r11);
+    xmm7   = _mm_shuffle_epi8(xmm7, ENDIA_SHUF_MASK);
+    xmm7   = _mm_xor_si128(xmm7, xmm0);
+    xmm7   = _mm_srli_si128(xmm7, 6);
+    goto _barrett;  //jmp	_barrett
+_only_less_than_2:
+    //
+    //; load 1 Byte
+    eax    = src_ptr[0];
+    r11[0] = eax;
+
+    xmm7 = _mm_loadu_si128((const __m128i*)r11);
+    xmm7 = _mm_shuffle_epi8(xmm7, ENDIA_SHUF_MASK);
+    xmm7 = _mm_xor_si128(xmm7, xmm0);
+
+    xmm7 = _mm_srli_si128(xmm7, 7);
+
+    goto _barrett;  //jmp	_barrett
+}
+
+uint32_t dml_avx512_crc_u32(const uint8_t* src, uint32_t transfer_size, uint32_t crc_value, uint32_t polynomial)
+{
+    const size_t optimization_border = 256;
+
+    if (transfer_size < optimization_border)
+    {
+        return dml_ref_crc_32u(src, transfer_size, crc_value, polynomial);
+    }
+
+    dmlc_own_calculate_crc_32u(src, transfer_size, &crc_value, polynomial);
+
+    return crc_value;
+}
diff --git a/sources/core/src/sw_dispatcher/avx512/fill.c b/sources/core/src/sw_dispatcher/avx512/fill.c
new file mode 100644
index 0000000..4431a61
--- /dev/null
+++ b/sources/core/src/sw_dispatcher/avx512/fill.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include "../dml_kernels.h"
+
+#if defined(_MSC_BUILD)
+#include <intrin.h>
+#elif defined(__GNUC__)
+#include <x86intrin.h>
+#else
+#error "Unsupported compiler"
+#endif
+
+// Disable optimization for MSVC, because it doesn't support _load_maskXX intrinsic
+#ifndef _MSC_BUILD
+static inline void fill_big_avx512(uint64_t pattern, uint8_t *const src, uint32_t transfer_size)
+{
+    // Check pointer alignment
+    const size_t unaligned_bytes     = (uintptr_t)src % 64u;
+    const size_t unaligned_part_size = (0u == unaligned_bytes) ? 0u : 64u - unaligned_bytes;
+
+    __m512i zmm_pattern = _mm512_set1_epi64(pattern);
+
+    // Fill unaligned part of destination
+    if (0u != unaligned_part_size)
+    {
+        unsigned long long mask  = 0xFFFFFFFFFFFFFFFFu >> unaligned_bytes;
+        __mmask64          mmask = _load_mask64(&mask);
+
+        _mm512_mask_storeu_epi8(src, mmask, zmm_pattern);
+
+        pattern     = (pattern << (unaligned_bytes * 8u)) | (pattern >> (64u - (unaligned_bytes * 8u)));
+        zmm_pattern = _mm512_set1_epi64(pattern);
+    }
+
+    // Fill aligned part of destination
+    const size_t aligned_part_size = transfer_size - unaligned_part_size;
+    const size_t head_size         = aligned_part_size / sizeof(__m512i);
+    const size_t tail_size         = aligned_part_size % sizeof(__m512i);
+
+    uint8_t *const aligned_src = src + unaligned_part_size;
+    __m512i       *head_ptr    = (__m512i *)aligned_src;
+    __m512i       *tail_ptr    = (__m512i *)head_ptr + head_size;
+
+    // Fill head part
+    if (0u != head_size)
+    {
+        while (head_ptr != tail_ptr)
+        {
+            _mm512_store_si512(head_ptr, zmm_pattern);
+            head_ptr++;
+        }
+    }
+
+    // Fill tail part
+    if (0u != tail_size)
+    {
+        unsigned long long mask  = ~(0xFFFFFFFFFFFFFFFFu << tail_size);
+        __mmask64          mmask = _load_mask64(&mask);
+        _mm512_mask_storeu_epi8(tail_ptr, mmask, zmm_pattern);
+    }
+}
+
+static inline void fill_small_avx512(uint64_t pattern, uint8_t *const src, uint32_t transfer_size)
+{
+    // Fill 0-63 bytes with 64bit pattern via two _mm256_mask_storeu_epi8 calls.
+
+    const size_t tail_size = transfer_size % sizeof(__m512i);
+
+    __m256i ymm1_pattern = _mm256_set1_epi64x(pattern);
+
+    unsigned long long mask_value = ~(0xFFFFFFFFFFFFFFFFu << tail_size);
+    __mmask32          mask_first = _load_mask32((uint32_t *)&mask_value);
+    _mm256_mask_storeu_epi8(src, mask_first, ymm1_pattern);
+
+    __mmask32 mask_second = _load_mask32((uint32_t *)&mask_value + 1u);
+    _mm256_mask_storeu_epi8(src + 32u, mask_second, ymm1_pattern);
+}
+
+void dml_avx512_fill_u64(uint64_t pattern, uint8_t *dst, uint32_t transfer_size)
+{
+    const size_t small_bound = 64u;
+
+    if (transfer_size < small_bound)
+    {
+        fill_small_avx512(pattern, dst, transfer_size);
+    }
+    else
+    {
+        fill_big_avx512(pattern, dst, transfer_size);
+    }
+}
+#else
+void dml_avx512_fill_u64(uint64_t pattern, uint8_t *dst, uint32_t transfer_size)
+{
+    dml_ref_fill_u64(pattern, dst, transfer_size);
+}
+#endif
diff --git a/sources/core/src/sw_dispatcher/avx512/mem_move.c b/sources/core/src/sw_dispatcher/avx512/mem_move.c
new file mode 100644
index 0000000..1501cff
--- /dev/null
+++ b/sources/core/src/sw_dispatcher/avx512/mem_move.c
@@ -0,0 +1,862 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include "../dml_cpuid.h"
+#include "../dml_kernels.h"
+
+#if defined(_MSC_BUILD)
+#include <intrin.h>
+#elif defined(__GNUC__)
+#include <x86intrin.h>
+#else
+#error "Unsupported compiler"
+#endif
+
+#if defined(_MSC_VER)
+#define OWN_ALIGNED_64_ARRAY(array_declaration) __declspec(align(64u)) array_declaration
+#elif defined(__GNUC__)
+#define OWN_ALIGNED_64_ARRAY(array_declaration) array_declaration __attribute__((aligned(64u)))
+#endif
+
+static inline void dmlc_own_px_copy_8u_unrolled(const uint8_t *src, uint8_t *dst, uint32_t length)
+{
+    uint32_t align_dst = 64u - ((uint64_t)dst & 0x3F);
+    uint32_t align_src = 64u - ((uint64_t)src & 0x3F);
+
+    if (align_dst < 64u)
+    {
+        if (length < align_dst)
+        {
+            align_dst = length;
+            for (uint32_t i = 0u; i < align_dst; ++i)
+            {
+                dst[i] = src[i];
+            }
+            return;
+        }
+        for (uint32_t i = 0u; i < align_dst; ++i)
+        {
+            dst[i] = src[i];
+        }
+        length -= align_dst;
+        src += align_dst;
+        dst += align_dst;
+    }
+
+    if (align_dst == align_src)
+    {
+        const uint64_t *src_64u_ptr = (uint64_t *)src;
+        uint64_t       *dst_64u_ptr = (uint64_t *)dst;
+
+        uint32_t length_64u = length / sizeof(uint64_t);
+        uint32_t tail_start = length_64u * sizeof(uint64_t);
+
+        while (length_64u > 3u)
+        {
+            dst_64u_ptr[0] = src_64u_ptr[0];
+            dst_64u_ptr[1] = src_64u_ptr[1];
+            dst_64u_ptr[2] = src_64u_ptr[2];
+            dst_64u_ptr[3] = src_64u_ptr[3];
+            dst_64u_ptr += 4u;
+            src_64u_ptr += 4u;
+            length_64u -= 4u;
+        }
+
+        for (uint32_t i = 0u; i < length_64u; ++i)
+        {
+            dst_64u_ptr[i] = src_64u_ptr[i];
+        }
+
+        for (uint32_t i = tail_start; i < length; ++i)
+        {
+            dst[i] = src[i];
+        }
+    }
+    else
+    {
+        while (length > 7u)
+        {
+            dst[0] = src[0];
+            dst[1] = src[1];
+            dst[2] = src[2];
+            dst[3] = src[3];
+            dst[4] = src[4];
+            dst[5] = src[5];
+            dst[6] = src[6];
+            dst[7] = src[7];
+
+            dst += 8u;
+            src += 8u;
+            length -= 8;
+        }
+
+        for (uint32_t i = 0u; i < length; ++i)
+        {
+            dst[i] = src[i];
+        }
+    }
+}
+
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_2u[32])  = { 1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
+                                                              17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 };
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_4u[32])  = { 2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17,
+                                                              18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33 };
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_6u[32])  = { 3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18,
+                                                              19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34 };
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_8u[32])  = { 4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+                                                              20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35 };
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_10u[32]) = { 5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+                                                               21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36 };
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_12u[32]) = { 6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
+                                                               22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37 };
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_14u[32]) = { 7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
+                                                               23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38 };
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_16u[32]) = { 8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
+                                                               24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39 };
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_18u[32]) = { 9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+                                                               25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40 };
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_20u[32]) = { 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
+                                                               26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41 };
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_22u[32]) = { 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
+                                                               27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42 };
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_24u[32]) = { 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
+                                                               28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43 };
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_26u[32]) = { 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
+                                                               29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44 };
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_28u[32]) = { 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+                                                               30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45 };
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_30u[32]) = { 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+                                                               31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46 };
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_32u[32]) = { 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+                                                               32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47 };
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_34u[32]) = { 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+                                                               33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48 };
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_36u[32]) = { 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
+                                                               34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49 };
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_38u[32]) = { 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
+                                                               35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50 };
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_40u[32]) = { 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
+                                                               36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51 };
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_42u[32]) = { 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
+                                                               37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52 };
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_44u[32]) = { 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
+                                                               38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53 };
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_46u[32]) = { 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
+                                                               39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54 };
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_48u[32]) = { 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
+                                                               40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55 };
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_50u[32]) = { 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+                                                               41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56 };
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_52u[32]) = { 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
+                                                               42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57 };
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_54u[32]) = { 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
+                                                               43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58 };
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_56u[32]) = { 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
+                                                               44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59 };
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_58u[32]) = { 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
+                                                               45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60 };
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_60u[32]) = { 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
+                                                               46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61 };
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_62u[32]) = { 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
+                                                               47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62 };
+
+static uint16_t *permutex_idx_pptr[31] = { permutex_idx_2u,  permutex_idx_4u,  permutex_idx_6u,  permutex_idx_8u,  permutex_idx_10u,
+                                           permutex_idx_12u, permutex_idx_14u, permutex_idx_16u, permutex_idx_18u, permutex_idx_20u,
+                                           permutex_idx_22u, permutex_idx_24u, permutex_idx_26u, permutex_idx_28u, permutex_idx_30u,
+                                           permutex_idx_32u, permutex_idx_34u, permutex_idx_36u, permutex_idx_38u, permutex_idx_40u,
+                                           permutex_idx_42u, permutex_idx_44u, permutex_idx_46u, permutex_idx_48u, permutex_idx_50u,
+                                           permutex_idx_52u, permutex_idx_54u, permutex_idx_56u, permutex_idx_58u, permutex_idx_60u,
+                                           permutex_idx_62u };
+
+static inline __m512i dmlc_own_mm512_bsrli_epi128(__m512i a, uint32_t shift)
+{
+    switch (shift)
+    {
+        case 1:
+            {
+                return _mm512_bsrli_epi128(a, 1);
+            }
+        case 2:
+            {
+                return _mm512_bsrli_epi128(a, 2);
+            }
+        case 3:
+            {
+                return _mm512_bsrli_epi128(a, 3);
+            }
+        case 4:
+            {
+                return _mm512_bsrli_epi128(a, 4);
+            }
+        case 5:
+            {
+                return _mm512_bsrli_epi128(a, 5);
+            }
+        case 6:
+            {
+                return _mm512_bsrli_epi128(a, 6);
+            }
+        case 7:
+            {
+                return _mm512_bsrli_epi128(a, 7);
+            }
+        case 8:
+            {
+                return _mm512_bsrli_epi128(a, 8);
+            }
+        case 9:
+            {
+                return _mm512_bsrli_epi128(a, 9);
+            }
+        case 10:
+            {
+                return _mm512_bsrli_epi128(a, 10);
+            }
+        case 11:
+            {
+                return _mm512_bsrli_epi128(a, 11);
+            }
+        case 12:
+            {
+                return _mm512_bsrli_epi128(a, 12);
+            }
+        case 13:
+            {
+                return _mm512_bsrli_epi128(a, 13);
+            }
+        case 14:
+            {
+                return _mm512_bsrli_epi128(a, 14);
+            }
+        case 15:
+            {
+                return _mm512_bsrli_epi128(a, 15);
+            }
+        default:
+            return _mm512_setzero_si512();
+    }
+}
+
+static inline __m512i dmlc_own_mm512_bslli_epi128(__m512i a, uint32_t shift)
+{
+    switch (shift)
+    {
+        case 1:
+            {
+                return _mm512_bslli_epi128(a, 1);
+            }
+        case 2:
+            {
+                return _mm512_bslli_epi128(a, 2);
+            }
+        case 3:
+            {
+                return _mm512_bslli_epi128(a, 3);
+            }
+        case 4:
+            {
+                return _mm512_bslli_epi128(a, 4);
+            }
+        case 5:
+            {
+                return _mm512_bslli_epi128(a, 5);
+            }
+        case 6:
+            {
+                return _mm512_bslli_epi128(a, 6);
+            }
+        case 7:
+            {
+                return _mm512_bslli_epi128(a, 7);
+            }
+        case 8:
+            {
+                return _mm512_bslli_epi128(a, 8);
+            }
+        case 9:
+            {
+                return _mm512_bslli_epi128(a, 9);
+            }
+        case 10:
+            {
+                return _mm512_bslli_epi128(a, 10);
+            }
+        case 11:
+            {
+                return _mm512_bslli_epi128(a, 11);
+            }
+        case 12:
+            {
+                return _mm512_bslli_epi128(a, 12);
+            }
+        case 13:
+            {
+                return _mm512_bslli_epi128(a, 13);
+            }
+        case 14:
+            {
+                return _mm512_bslli_epi128(a, 14);
+            }
+        case 15:
+            {
+                return _mm512_bslli_epi128(a, 15);
+            }
+        default:
+            return _mm512_setzero_si512();
+    }
+}
+
+static inline __m512i dmlc_own_mm512_alignr_epi8(__m512i a, __m512i b, uint32_t shift)
+{
+    switch (shift)
+    {
+        case 0:
+            {
+                return b;
+            }
+        case 4:
+            {
+                return _mm512_alignr_epi32(a, b, 1);
+            }
+        case 8:
+            {
+                return _mm512_alignr_epi32(a, b, 2);
+            }
+        case 12:
+            {
+                return _mm512_alignr_epi32(a, b, 3);
+            }
+        case 16:
+            {
+                return _mm512_alignr_epi32(a, b, 4);
+            }
+        case 20:
+            {
+                return _mm512_alignr_epi32(a, b, 5);
+            }
+        case 24:
+            {
+                return _mm512_alignr_epi32(a, b, 6);
+            }
+        case 28:
+            {
+                return _mm512_alignr_epi32(a, b, 7);
+            }
+        case 32:
+            {
+                return _mm512_alignr_epi32(a, b, 8);
+            }
+        case 36:
+            {
+                return _mm512_alignr_epi32(a, b, 9);
+            }
+        case 40:
+            {
+                return _mm512_alignr_epi32(a, b, 10);
+            }
+        case 44:
+            {
+                return _mm512_alignr_epi32(a, b, 11);
+            }
+        case 48:
+            {
+                return _mm512_alignr_epi32(a, b, 12);
+            }
+        case 52:
+            {
+                return _mm512_alignr_epi32(a, b, 13);
+            }
+        case 56:
+            {
+                return _mm512_alignr_epi32(a, b, 14);
+            }
+        case 60:
+            {
+                return _mm512_alignr_epi32(a, b, 15);
+            }
+        default:
+            return _mm512_setzero_si512();
+    }
+}
+
+static void copy_avx512(const uint8_t *src, uint8_t *dst, uint32_t transfer_size)
+{
+    const size_t kilobyte = 1024;
+
+    if (transfer_size < kilobyte)
+    {
+        dmlc_own_px_copy_8u_unrolled(src, dst, transfer_size);
+        return;
+    }
+
+    if (transfer_size > (32 * kilobyte))
+    {
+        size_t cache_size = dml_core_get_cache_size();
+        if ((cache_size > 0) && (transfer_size > cache_size))
+        {
+            dmlc_own_px_copy_8u_unrolled(src, dst, transfer_size);
+            return;
+        }
+    }
+
+    const uint32_t alignment    = 64;
+    const uint32_t magic_number = 0x3f;
+
+    uint32_t align_dst = alignment - ((uintptr_t)dst & magic_number);
+    uint32_t align_src = alignment - ((uintptr_t)src & magic_number);
+
+    if (align_dst < alignment)
+    {
+        if (transfer_size < 4 * kilobyte)
+        {
+            dmlc_own_px_copy_8u_unrolled(src, dst, transfer_size);
+            return;
+        }
+
+        dmlc_own_px_copy_8u_unrolled(src, dst, align_dst);
+
+        transfer_size -= align_dst;
+        dst += align_dst;
+        src += align_dst;
+
+        uint32_t length_512u = transfer_size / sizeof(__m512i);
+        uint32_t tail        = transfer_size % sizeof(__m512i);
+
+        if (0u != ((align_src - align_dst) & 15u))
+        {
+            uint32_t shift = (align_dst > align_src) ? (align_dst - align_src) : (64u + align_dst - align_src);
+
+            if (0u == (shift & 3u))
+            {
+                src -= shift;
+                __mmask64 skip_mask = ~((1llu << shift) - 1u);
+                __m512i   zmm0      = _mm512_maskz_loadu_epi8(skip_mask, (const __m512i *)src);
+                src += 64u;
+
+                while (length_512u > 4u)
+                {
+                    __m512i zmm1 = _mm512_load_si512((const __m512i *)src);
+                    __m512i zmm2 = dmlc_own_mm512_alignr_epi8(zmm1, zmm0, shift);
+                    __m512i zmm3 = _mm512_load_si512((const __m512i *)(src + 64u));
+                    __m512i zmm4 = dmlc_own_mm512_alignr_epi8(zmm3, zmm1, shift);
+                    __m512i zmm5 = _mm512_load_si512((const __m512i *)(src + 128u));
+                    __m512i zmm6 = dmlc_own_mm512_alignr_epi8(zmm5, zmm3, shift);
+                    zmm0         = _mm512_load_si512((const __m512i *)(src + 192u));
+                    __m512i zmm7 = dmlc_own_mm512_alignr_epi8(zmm0, zmm5, shift);
+                    _mm512_store_si512((__m512i *)dst, zmm2);
+                    _mm512_store_si512((__m512i *)(dst + 64u), zmm4);
+                    _mm512_store_si512((__m512i *)(dst + 128u), zmm6);
+                    _mm512_store_si512((__m512i *)(dst + 192u), zmm7);
+                    src += 256u;
+                    dst += 256u;
+                    length_512u -= 4u;
+                }
+
+                src -= 64u - shift;
+            }
+            else if (0u == (shift & 1u))
+            {
+                src -= shift;
+                __mmask64 skip_mask = ~((1llu << shift) - 1u);
+                __m512i   zmm0      = _mm512_maskz_loadu_epi8(skip_mask, (const __m512i *)src);
+                src += 64u;
+
+                __m512i permutex_idxmm = _mm512_load_si512(permutex_idx_pptr[(shift - 2) / 2]);
+
+                while (length_512u > 4u)
+                {
+                    __m512i zmm1 = _mm512_load_si512((const __m512i *)src);
+                    __m512i zmm2 = _mm512_permutex2var_epi16(zmm0, permutex_idxmm, zmm1);
+                    __m512i zmm3 = _mm512_load_si512((const __m512i *)(src + 64u));
+                    __m512i zmm4 = _mm512_permutex2var_epi16(zmm1, permutex_idxmm, zmm3);
+                    __m512i zmm5 = _mm512_load_si512((const __m512i *)(src + 128u));
+                    __m512i zmm6 = _mm512_permutex2var_epi16(zmm3, permutex_idxmm, zmm5);
+                    zmm0         = _mm512_load_si512((const __m512i *)(src + 192u));
+                    __m512i zmm7 = _mm512_permutex2var_epi16(zmm5, permutex_idxmm, zmm0);
+                    _mm512_store_si512((__m512i *)dst, zmm2);
+                    _mm512_store_si512((__m512i *)(dst + 64u), zmm4);
+                    _mm512_store_si512((__m512i *)(dst + 128u), zmm6);
+                    _mm512_store_si512((__m512i *)(dst + 192u), zmm7);
+                    src += 256u;
+                    dst += 256u;
+                    length_512u -= 4u;
+                }
+
+                src -= 64u - shift;
+            }
+            else if (shift < 16u)
+            {
+                if (transfer_size < 16 * kilobyte)
+                {
+                    dmlc_own_px_copy_8u_unrolled(src, dst, transfer_size);
+                    return;
+                }
+
+                src -= shift;
+                __mmask64 skip_mask = ~((1llu << shift) - 1u);
+                __m512i   zmm0      = _mm512_maskz_loadu_epi8(skip_mask, (const __m512i *)src);
+                src += 64u;
+
+                __m512i permutex_idxmm_higher = _mm512_load_si512(permutex_idx_pptr[(shift - 1) / 2]);
+
+                while (length_512u > 4u)
+                {
+                    __m512i zmm1        = _mm512_load_si512((const __m512i *)src);
+                    __m512i zmm2_lower  = dmlc_own_mm512_bsrli_epi128(zmm0, shift);
+                    __m512i zmm2_higher = _mm512_permutex2var_epi16(zmm0, permutex_idxmm_higher, zmm1);
+                    zmm2_higher         = dmlc_own_mm512_bslli_epi128(zmm2_higher, 1u);
+                    zmm2_higher         = _mm512_mask_mov_epi8(zmm2_higher, 0x0001000100010001, zmm2_lower);
+                    __m512i zmm3        = _mm512_load_si512((const __m512i *)(src + 64u));
+                    __m512i zmm4_lower  = dmlc_own_mm512_bsrli_epi128(zmm1, shift);
+                    __m512i zmm4_higher = _mm512_permutex2var_epi16(zmm1, permutex_idxmm_higher, zmm3);
+                    zmm4_higher         = dmlc_own_mm512_bslli_epi128(zmm4_higher, 1u);
+                    zmm4_higher         = _mm512_mask_mov_epi8(zmm4_higher, 0x0001000100010001, zmm4_lower);
+                    __m512i zmm5        = _mm512_load_si512((const __m512i *)(src + 128u));
+                    __m512i zmm6_lower  = dmlc_own_mm512_bsrli_epi128(zmm3, shift);
+                    __m512i zmm6_higher = _mm512_permutex2var_epi16(zmm3, permutex_idxmm_higher, zmm5);
+                    zmm6_higher         = dmlc_own_mm512_bslli_epi128(zmm6_higher, 1u);
+                    zmm6_higher         = _mm512_mask_mov_epi8(zmm6_higher, 0x0001000100010001, zmm6_lower);
+                    zmm0                = _mm512_load_si512((const __m512i *)(src + 192u));
+                    __m512i zmm7_lower  = dmlc_own_mm512_bsrli_epi128(zmm5, shift);
+                    __m512i zmm7_higher = _mm512_permutex2var_epi16(zmm5, permutex_idxmm_higher, zmm0);
+                    zmm7_higher         = dmlc_own_mm512_bslli_epi128(zmm7_higher, 1u);
+                    zmm7_higher         = _mm512_mask_mov_epi8(zmm7_higher, 0x0001000100010001, zmm7_lower);
+                    _mm512_store_si512((__m512i *)dst, zmm2_higher);
+                    _mm512_store_si512((__m512i *)(dst + 64u), zmm4_higher);
+                    _mm512_store_si512((__m512i *)(dst + 128u), zmm6_higher);
+                    _mm512_store_si512((__m512i *)(dst + 192u), zmm7_higher);
+                    src += 256u;
+                    dst += 256u;
+                    length_512u -= 4u;
+                }
+
+                src -= 64u - shift;
+            }
+            else if (shift > 48u)
+            {
+                if (transfer_size < 16 * kilobyte)
+                {
+                    dmlc_own_px_copy_8u_unrolled(src, dst, transfer_size);
+                    return;
+                }
+
+                src -= shift;
+                __mmask64 skip_mask = ~((1llu << shift) - 1u);
+                __m512i   zmm0      = _mm512_maskz_loadu_epi8(skip_mask, (const __m512i *)src);
+                src += 64u;
+
+                __m512i  permutex_idxmm_lower = _mm512_load_si512(permutex_idx_pptr[(shift - 3) / 2]);
+                uint32_t shift_higher         = 64u - shift;
+
+                while (length_512u > 4u)
+                {
+                    __m512i zmm1        = _mm512_load_si512((const __m512i *)src);
+                    __m512i zmm2_lower  = _mm512_permutex2var_epi16(zmm0, permutex_idxmm_lower, zmm1);
+                    zmm2_lower          = dmlc_own_mm512_bsrli_epi128(zmm2_lower, 1u);
+                    __m512i zmm2_higher = dmlc_own_mm512_bslli_epi128(zmm1, shift_higher);
+                    zmm2_higher         = _mm512_mask_mov_epi8(zmm2_higher, 0x7FFF7FFF7FFF7FFF, zmm2_lower);
+                    __m512i zmm3        = _mm512_load_si512((const __m512i *)(src + 64u));
+                    __m512i zmm4_lower  = _mm512_permutex2var_epi16(zmm1, permutex_idxmm_lower, zmm3);
+                    zmm4_lower          = dmlc_own_mm512_bsrli_epi128(zmm4_lower, 1u);
+                    __m512i zmm4_higher = dmlc_own_mm512_bslli_epi128(zmm3, shift_higher);
+                    zmm4_higher         = _mm512_mask_mov_epi8(zmm4_higher, 0x7FFF7FFF7FFF7FFF, zmm4_lower);
+                    __m512i zmm5        = _mm512_load_si512((const __m512i *)(src + 128u));
+                    __m512i zmm6_lower  = _mm512_permutex2var_epi16(zmm3, permutex_idxmm_lower, zmm5);
+                    zmm6_lower          = dmlc_own_mm512_bsrli_epi128(zmm6_lower, 1u);
+                    __m512i zmm6_higher = dmlc_own_mm512_bslli_epi128(zmm5, shift_higher);
+                    zmm6_higher         = _mm512_mask_mov_epi8(zmm6_higher, 0x7FFF7FFF7FFF7FFF, zmm6_lower);
+                    zmm0                = _mm512_load_si512((const __m512i *)(src + 192u));
+                    __m512i zmm7_lower  = _mm512_permutex2var_epi16(zmm5, permutex_idxmm_lower, zmm0);
+                    zmm7_lower          = dmlc_own_mm512_bsrli_epi128(zmm7_lower, 1u);
+                    __m512i zmm7_higher = dmlc_own_mm512_bslli_epi128(zmm0, shift_higher);
+                    zmm7_higher         = _mm512_mask_mov_epi8(zmm7_higher, 0x7FFF7FFF7FFF7FFF, zmm7_lower);
+                    _mm512_store_si512((__m512i *)dst, zmm2_higher);
+                    _mm512_store_si512((__m512i *)(dst + 64u), zmm4_higher);
+                    _mm512_store_si512((__m512i *)(dst + 128u), zmm6_higher);
+                    _mm512_store_si512((__m512i *)(dst + 192u), zmm7_higher);
+                    src += 256u;
+                    dst += 256u;
+                    length_512u -= 4u;
+                }
+
+                src -= 64u - shift;
+            }
+            else
+            {
+                dmlc_own_px_copy_8u_unrolled(src, dst, transfer_size);
+                return;
+            }
+        }
+        else
+        {
+            while (length_512u > 3u)
+            {
+                __m512i zmm0 = _mm512_loadu_si512((const __m512i *)src);
+                __m512i zmm1 = _mm512_loadu_si512((const __m512i *)(src + 64u));
+                __m512i zmm2 = _mm512_loadu_si512((const __m512i *)(src + 128u));
+                __m512i zmm3 = _mm512_loadu_si512((const __m512i *)(src + 192u));
+                _mm512_store_si512((__m512i *)dst, zmm0);
+                _mm512_store_si512((__m512i *)(dst + 64u), zmm1);
+                _mm512_store_si512((__m512i *)(dst + 128u), zmm2);
+                _mm512_store_si512((__m512i *)(dst + 192u), zmm3);
+                src += 256u;
+                dst += 256u;
+                length_512u -= 4;
+            }
+        }
+        while (length_512u > 0u)
+        {
+            __m512i zmm0 = _mm512_loadu_si512((const __m512i *)src);
+            _mm512_store_si512((__m512i *)dst, zmm0);
+            src += 64u;
+            dst += 64u;
+            --length_512u;
+        }
+
+        dmlc_own_px_copy_8u_unrolled(src, dst, tail);
+
+        return;
+    }
+
+    uint32_t length_512u = transfer_size / sizeof(__m512i);
+    uint32_t tail        = transfer_size % sizeof(__m512i);
+
+    if (align_src < 64u)
+    {
+        if (transfer_size < 32 * kilobyte)
+        {
+            dmlc_own_px_copy_8u_unrolled(src, dst, transfer_size);
+            return;
+        }
+
+        uint32_t shift = 64 - align_src;
+
+        if (0u == (shift & 3u))
+        {
+            src -= shift;
+            __mmask64 skip_mask = ~((1llu << shift) - 1u);
+            __m512i   zmm0      = _mm512_maskz_loadu_epi8(skip_mask, (const __m512i *)src);
+            src += 64u;
+
+            while (length_512u > 4u)
+            {
+                __m512i zmm1 = _mm512_load_si512((const __m512i *)src);
+                __m512i zmm2 = dmlc_own_mm512_alignr_epi8(zmm1, zmm0, shift);
+                __m512i zmm3 = _mm512_load_si512((const __m512i *)(src + 64u));
+                __m512i zmm4 = dmlc_own_mm512_alignr_epi8(zmm3, zmm1, shift);
+                __m512i zmm5 = _mm512_load_si512((const __m512i *)(src + 128u));
+                __m512i zmm6 = dmlc_own_mm512_alignr_epi8(zmm5, zmm3, shift);
+                zmm0         = _mm512_load_si512((const __m512i *)(src + 192u));
+                __m512i zmm7 = dmlc_own_mm512_alignr_epi8(zmm0, zmm5, shift);
+                _mm512_store_si512((__m512i *)dst, zmm2);
+                _mm512_store_si512((__m512i *)(dst + 64u), zmm4);
+                _mm512_store_si512((__m512i *)(dst + 128u), zmm6);
+                _mm512_store_si512((__m512i *)(dst + 192u), zmm7);
+                src += 256u;
+                dst += 256u;
+                length_512u -= 4u;
+            }
+
+            src -= 64u - shift;
+        }
+        else if (0u == (shift & 1u))
+        {
+            src -= shift;
+            __mmask64 skip_mask = ~((1llu << shift) - 1u);
+            __m512i   zmm0      = _mm512_maskz_loadu_epi8(skip_mask, (const __m512i *)src);
+            src += 64u;
+
+            __m512i permutex_idxmm = _mm512_load_si512(permutex_idx_pptr[(shift - 2) / 2]);
+
+            while (length_512u > 4u)
+            {
+                __m512i zmm1 = _mm512_load_si512((const __m512i *)src);
+                __m512i zmm2 = _mm512_permutex2var_epi16(zmm0, permutex_idxmm, zmm1);
+                __m512i zmm3 = _mm512_load_si512((const __m512i *)(src + 64u));
+                __m512i zmm4 = _mm512_permutex2var_epi16(zmm1, permutex_idxmm, zmm3);
+                __m512i zmm5 = _mm512_load_si512((const __m512i *)(src + 128u));
+                __m512i zmm6 = _mm512_permutex2var_epi16(zmm3, permutex_idxmm, zmm5);
+                zmm0         = _mm512_load_si512((const __m512i *)(src + 192u));
+                __m512i zmm7 = _mm512_permutex2var_epi16(zmm5, permutex_idxmm, zmm0);
+                _mm512_store_si512((__m512i *)dst, zmm2);
+                _mm512_store_si512((__m512i *)(dst + 64u), zmm4);
+                _mm512_store_si512((__m512i *)(dst + 128u), zmm6);
+                _mm512_store_si512((__m512i *)(dst + 192u), zmm7);
+                src += 256u;
+                dst += 256u;
+                length_512u -= 4u;
+            }
+
+            src -= 64u - shift;
+        }
+        else if (shift < 16u)
+        {
+            src -= shift;
+            __mmask64 skip_mask = ~((1llu << shift) - 1u);
+            __m512i   zmm0      = _mm512_maskz_loadu_epi8(skip_mask, (const __m512i *)src);
+            src += 64u;
+
+            __m512i permutex_idxmm_higher = _mm512_load_si512(permutex_idx_pptr[(shift - 1) / 2]);
+
+            while (length_512u > 4u)
+            {
+                __m512i zmm1        = _mm512_load_si512((const __m512i *)src);
+                __m512i zmm2_lower  = dmlc_own_mm512_bsrli_epi128(zmm0, shift);
+                __m512i zmm2_higher = _mm512_permutex2var_epi16(zmm0, permutex_idxmm_higher, zmm1);
+                zmm2_higher         = dmlc_own_mm512_bslli_epi128(zmm2_higher, 1u);
+                zmm2_higher         = _mm512_mask_mov_epi8(zmm2_higher, 0x0001000100010001, zmm2_lower);
+                __m512i zmm3        = _mm512_load_si512((const __m512i *)(src + 64u));
+                __m512i zmm4_lower  = dmlc_own_mm512_bsrli_epi128(zmm1, shift);
+                __m512i zmm4_higher = _mm512_permutex2var_epi16(zmm1, permutex_idxmm_higher, zmm3);
+                zmm4_higher         = dmlc_own_mm512_bslli_epi128(zmm4_higher, 1u);
+                zmm4_higher         = _mm512_mask_mov_epi8(zmm4_higher, 0x0001000100010001, zmm4_lower);
+                __m512i zmm5        = _mm512_load_si512((const __m512i *)(src + 128u));
+                __m512i zmm6_lower  = dmlc_own_mm512_bsrli_epi128(zmm3, shift);
+                __m512i zmm6_higher = _mm512_permutex2var_epi16(zmm3, permutex_idxmm_higher, zmm5);
+                zmm6_higher         = dmlc_own_mm512_bslli_epi128(zmm6_higher, 1u);
+                zmm6_higher         = _mm512_mask_mov_epi8(zmm6_higher, 0x0001000100010001, zmm6_lower);
+                zmm0                = _mm512_load_si512((const __m512i *)(src + 192u));
+                __m512i zmm7_lower  = dmlc_own_mm512_bsrli_epi128(zmm5, shift);
+                __m512i zmm7_higher = _mm512_permutex2var_epi16(zmm5, permutex_idxmm_higher, zmm0);
+                zmm7_higher         = dmlc_own_mm512_bslli_epi128(zmm7_higher, 1u);
+                zmm7_higher         = _mm512_mask_mov_epi8(zmm7_higher, 0x0001000100010001, zmm7_lower);
+                _mm512_store_si512((__m512i *)dst, zmm2_higher);
+                _mm512_store_si512((__m512i *)(dst + 64u), zmm4_higher);
+                _mm512_store_si512((__m512i *)(dst + 128u), zmm6_higher);
+                _mm512_store_si512((__m512i *)(dst + 192u), zmm7_higher);
+                src += 256u;
+                dst += 256u;
+                length_512u -= 4u;
+            }
+
+            src -= 64u - shift;
+        }
+        else if (shift > 48u)
+        {
+            src -= shift;
+            __mmask64 skip_mask = ~((1llu << shift) - 1u);
+            __m512i   zmm0      = _mm512_maskz_loadu_epi8(skip_mask, (const __m512i *)src);
+            src += 64u;
+
+            __m512i  permutex_idxmm_lower = _mm512_load_si512(permutex_idx_pptr[(shift - 3) / 2]);
+            uint32_t shift_higher         = 64u - shift;
+
+            while (length_512u > 4u)
+            {
+                __m512i zmm1        = _mm512_load_si512((const __m512i *)src);
+                __m512i zmm2_lower  = _mm512_permutex2var_epi16(zmm0, permutex_idxmm_lower, zmm1);
+                zmm2_lower          = dmlc_own_mm512_bsrli_epi128(zmm2_lower, 1u);
+                __m512i zmm2_higher = dmlc_own_mm512_bslli_epi128(zmm1, shift_higher);
+                zmm2_higher         = _mm512_mask_mov_epi8(zmm2_higher, 0x7FFF7FFF7FFF7FFF, zmm2_lower);
+                __m512i zmm3        = _mm512_load_si512((const __m512i *)(src + 64u));
+                __m512i zmm4_lower  = _mm512_permutex2var_epi16(zmm1, permutex_idxmm_lower, zmm3);
+                zmm4_lower          = dmlc_own_mm512_bsrli_epi128(zmm4_lower, 1u);
+                __m512i zmm4_higher = dmlc_own_mm512_bslli_epi128(zmm3, shift_higher);
+                zmm4_higher         = _mm512_mask_mov_epi8(zmm4_higher, 0x7FFF7FFF7FFF7FFF, zmm4_lower);
+                __m512i zmm5        = _mm512_load_si512((const __m512i *)(src + 128u));
+                __m512i zmm6_lower  = _mm512_permutex2var_epi16(zmm3, permutex_idxmm_lower, zmm5);
+                zmm6_lower          = dmlc_own_mm512_bsrli_epi128(zmm6_lower, 1u);
+                __m512i zmm6_higher = dmlc_own_mm512_bslli_epi128(zmm5, shift_higher);
+                zmm6_higher         = _mm512_mask_mov_epi8(zmm6_higher, 0x7FFF7FFF7FFF7FFF, zmm6_lower);
+                zmm0                = _mm512_load_si512((const __m512i *)(src + 192u));
+                __m512i zmm7_lower  = _mm512_permutex2var_epi16(zmm5, permutex_idxmm_lower, zmm0);
+                zmm7_lower          = dmlc_own_mm512_bsrli_epi128(zmm7_lower, 1u);
+                __m512i zmm7_higher = dmlc_own_mm512_bslli_epi128(zmm0, shift_higher);
+                zmm7_higher         = _mm512_mask_mov_epi8(zmm7_higher, 0x7FFF7FFF7FFF7FFF, zmm7_lower);
+                _mm512_store_si512((__m512i *)dst, zmm2_higher);
+                _mm512_store_si512((__m512i *)(dst + 64u), zmm4_higher);
+                _mm512_store_si512((__m512i *)(dst + 128u), zmm6_higher);
+                _mm512_store_si512((__m512i *)(dst + 192u), zmm7_higher);
+                src += 256u;
+                dst += 256u;
+                length_512u -= 4u;
+            }
+
+            src -= 64u - shift;
+        }
+        else
+        {
+            dmlc_own_px_copy_8u_unrolled(src, dst, transfer_size);
+            return;
+        }
+    }
+    else
+    {
+        if (((12 * kilobyte) < transfer_size) && (transfer_size < (32 * kilobyte)))
+        {
+            dmlc_own_px_copy_8u_unrolled(src, dst, transfer_size);
+            return;
+        }
+        while (length_512u > 3u)
+        {
+            __m512i zmm0 = _mm512_load_si512((const __m512i *)src);
+            __m512i zmm1 = _mm512_load_si512((const __m512i *)(src + 64u));
+            __m512i zmm2 = _mm512_load_si512((const __m512i *)(src + 128u));
+            __m512i zmm3 = _mm512_load_si512((const __m512i *)(src + 192u));
+            _mm512_store_si512((__m512i *)dst, zmm0);
+            _mm512_store_si512((__m512i *)(dst + 64u), zmm1);
+            _mm512_store_si512((__m512i *)(dst + 128u), zmm2);
+            _mm512_store_si512((__m512i *)(dst + 192u), zmm3);
+            src += 256u;
+            dst += 256u;
+            length_512u -= 4;
+        }
+    }
+
+    while (length_512u > 0u)
+    {
+        __m512i zmm0 = _mm512_loadu_si512((const __m512i *)src);
+        _mm512_store_si512((__m512i *)dst, zmm0);
+        src += 64u;
+        dst += 64u;
+        --length_512u;
+    }
+
+    dmlc_own_px_copy_8u_unrolled(src, dst, tail);
+}
+
+void dml_avx512_mem_move(const uint8_t *src, uint8_t *dst, uint32_t transfer_size)
+{
+    const uint8_t *const src_begin = src;
+    const uint8_t *const src_end   = src + transfer_size;
+    const uint8_t *const dst_begin = dst;
+    const uint8_t *const dst_end   = dst + transfer_size;
+
+    /*
+     * Either:
+     * src: |-------|
+     * dst:          |-------|
+     *
+     * OR:
+     * src:          |-------|
+     * dst: |-------|
+     *
+     * Assume ranges are exclusive, then equality is taken into account
+     *
+     * Copy is safe
+     */
+    if (src_end <= dst_begin || src_begin >= dst_end)
+    {
+        copy_avx512(src, dst, transfer_size);
+    }
+    /*
+     * Fallback to move
+     */
+    else
+    {
+        dml_ref_mem_move(src, dst, transfer_size);
+    }
+}
diff --git a/sources/core/src/sw_dispatcher/cache_flush/CMakeLists.txt b/sources/core/src/sw_dispatcher/cache_flush/CMakeLists.txt
new file mode 100644
index 0000000..526052e
--- /dev/null
+++ b/sources/core/src/sw_dispatcher/cache_flush/CMakeLists.txt
@@ -0,0 +1,26 @@
+#
+# Copyright 2021 Intel Corporation.
+#
+# This software and the related documents are Intel copyrighted materials,
+# and your use of them is governed by the express license under which they
+# were provided to you ("License"). Unless the License provides otherwise,
+# you may not use, modify, copy, publish, distribute, disclose or transmit
+# this software or the related documents without Intel's prior written
+# permission.
+#
+# This software and the related documents are provided as is, with no
+# express or implied warranties, other than those that are expressly
+# stated in the License.
+#
+
+add_library(dml_kernels_cache_flush OBJECT
+        cache_flush.c
+        )
+
+target_compile_features(dml_kernels_cache_flush PRIVATE c_std_11)
+
+target_compile_options(dml_kernels_cache_flush PRIVATE ${DML_QUALITY_OPTIONS})
+
+if (CMAKE_C_COMPILER_ID MATCHES GNU)
+    target_compile_options(dml_kernels_cache_flush PRIVATE -mclflushopt -mclwb)
+endif ()
diff --git a/sources/core/src/sw_dispatcher/cache_flush/cache_flush.c b/sources/core/src/sw_dispatcher/cache_flush/cache_flush.c
new file mode 100644
index 0000000..ea58990
--- /dev/null
+++ b/sources/core/src/sw_dispatcher/cache_flush/cache_flush.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include "../dml_kernels.h"
+
+#if defined(_MSC_BUILD)
+#include <intrin.h>
+#elif defined(__GNUC__)
+#include <x86intrin.h>
+#else
+#error "Unsupported compiler"
+#endif
+
+static const size_t cache_line_size = 64u;
+
+void dml_clflushopt(uint8_t *dst, uint32_t transfer_size)
+{
+    const size_t cache_line_count = transfer_size / cache_line_size;
+
+    _mm_mfence();
+    for (size_t cache_line_index = 0; cache_line_index < cache_line_count; ++cache_line_index)
+    {
+        uint8_t *cache_line = dst + (cache_line_size * cache_line_index);
+
+        _mm_clflushopt(cache_line);
+    }
+    _mm_mfence();
+}
+
+void dml_clflush(uint8_t *dst, uint32_t transfer_size)
+{
+    const size_t cache_line_count = transfer_size / cache_line_size;
+
+    _mm_mfence();
+    for (size_t cache_line_index = 0; cache_line_index < cache_line_count; ++cache_line_index)
+    {
+        uint8_t *cache_line = dst + (cache_line_size * cache_line_index);
+
+        _mm_clflush(cache_line);
+    }
+    _mm_mfence();
+}
+
+void dml_clwb(uint8_t *dst, uint32_t transfer_size)
+{
+    const size_t cache_line_count = transfer_size / cache_line_size;
+
+    _mm_mfence();
+    for (size_t cache_line_index = 0; cache_line_index < cache_line_count; ++cache_line_index)
+    {
+        uint8_t *cache_line = dst + (cache_line_size * cache_line_index);
+
+        _mm_clwb(cache_line);
+    }
+    _mm_mfence();
+}
+
+void dml_clwb_unsupported(uint8_t *dst, uint32_t transfer_size)
+{
+    (void)dst;
+    (void)transfer_size;
+}
\ No newline at end of file
diff --git a/sources/core/src/sw_dispatcher/dml_cpuid.c b/sources/core/src/sw_dispatcher/dml_cpuid.c
new file mode 100644
index 0000000..9f2184b
--- /dev/null
+++ b/sources/core/src/sw_dispatcher/dml_cpuid.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include "dml_cpuid.h"
+
+#ifdef _WIN32
+#include "intrin.h"
+typedef int dml_register_t;
+#else
+#include <cpuid.h>
+typedef unsigned dml_register_t;
+#endif
+
+dml_core_registers dml_core_cpuidex(dml_register_t leaf, dml_register_t sub_leaf)
+{
+#ifdef _WIN32
+    const dml_register_t eax = 0;
+    const dml_register_t ebx = 1;
+    const dml_register_t ecx = 2;
+    const dml_register_t edx = 3;
+
+    dml_register_t regs[4];
+    __cpuidex(regs, leaf, sub_leaf);
+
+    dml_core_registers registers = { regs[eax], regs[ebx], regs[ecx], regs[edx] };
+
+    return registers;
+#else
+    dml_core_registers registers;
+    __cpuid_count(leaf, sub_leaf, registers.eax, registers.ebx, registers.ecx, registers.edx);
+
+    return registers;
+#endif
+}
+
+dml_core_registers dml_core_cpuid(dml_register_t leaf)
+{
+    return dml_core_cpuidex(leaf, 0x0);
+}
+
+size_t dml_core_get_cache_size()
+{
+    static size_t cache_size = 0u;
+
+    if (cache_size > 0)
+    {
+        return cache_size;
+    }
+
+    const size_t max_cache_types = 32;
+
+    dml_register_t max_cache_size = 0;
+
+    for (dml_register_t cache_type = 0; cache_type < max_cache_types; cache_type++)
+    {
+        const dml_register_t cache_parameters = 0x4;
+
+        const dml_core_registers registers = dml_core_cpuidex(cache_parameters, cache_type);
+
+        const dml_register_t none_type         = 0x1f;
+        const dml_register_t instruction_cache = 0x2;
+        if ((registers.eax & none_type) == none_type)
+        {
+            break;
+        }
+        if ((registers.eax & 0x1f) != instruction_cache)
+        {
+            // Sets = ECX
+            const dml_register_t sets = registers.ecx;
+
+            // Line_Size = EBX[11:0]
+            const dml_register_t line_size_mask = 0xfff;
+            const dml_register_t line_size      = (registers.ebx & line_size_mask);
+
+            // Partitions = EBX[21:12]
+            const dml_register_t partitions_mask   = 0x3ff;
+            const dml_register_t partitions_offset = 12;
+            const dml_register_t partitions        = ((registers.ebx >> partitions_offset) & partitions_mask) + 1;
+
+            // Ways = EBX[31:22]
+            const dml_register_t ways_mask   = 0x3ff;
+            const dml_register_t ways_offset = 22;
+            const dml_register_t ways        = ((registers.ebx >> ways_offset) & ways_mask) + 1;
+
+            // This cache size in bytes
+            const dml_register_t this_cache_size = (ways + 1) * (partitions + 1) * (line_size + 1) * (sets + 1);
+
+            if (this_cache_size > (dml_register_t)max_cache_size)
+            {
+                max_cache_size = this_cache_size;
+            }
+        }
+    }
+
+    cache_size = (size_t)max_cache_size;
+
+    return cache_size;
+}
diff --git a/sources/core/src/sw_dispatcher/dml_cpuid.h b/sources/core/src/sw_dispatcher/dml_cpuid.h
new file mode 100644
index 0000000..a5e4ff6
--- /dev/null
+++ b/sources/core/src/sw_dispatcher/dml_cpuid.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#ifndef DML_CORE_OWN_KERNELS_CPUID_H
+#define DML_CORE_OWN_KERNELS_CPUID_H
+
+#ifdef _WIN32
+typedef int dml_register_t;
+#else
+typedef unsigned dml_register_t;
+#endif
+
+#include <stddef.h>
+
+#define DML_CPUID_EXTENSIONS 0x7
+
+#define DML_AVX512F  (1 << 16)
+#define DML_AVX512DQ (1 << 17)
+#define DML_AVX512CD (1 << 28)
+#define DML_AVX512BW (1 << 30)
+#define DML_AVX512VL (1u << 31)
+
+#define DML_AVX512_MASK (DML_AVX512F | DML_AVX512DQ | DML_AVX512CD | DML_AVX512BW | DML_AVX512VL)
+
+#define DML_CLFLUSHOPT (1 << 23)
+#define DML_CLWB       (1 << 24)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct
+{
+    dml_register_t eax;
+    dml_register_t ebx;
+    dml_register_t ecx;
+    dml_register_t edx;
+} dml_core_registers;
+
+dml_core_registers dml_core_cpuidex(dml_register_t leaf, dml_register_t sub_leaf);
+
+dml_core_registers dml_core_cpuid(dml_register_t leaf);
+
+size_t dml_core_get_cache_size();
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // DML_CORE_OWN_KERNELS_CPUID_H
diff --git a/sources/core/src/sw_dispatcher/dml_kernels.h b/sources/core/src/sw_dispatcher/dml_kernels.h
new file mode 100644
index 0000000..72e1b36
--- /dev/null
+++ b/sources/core/src/sw_dispatcher/dml_kernels.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#ifndef DML_CORE_OWN_KERNELS_DEFS_H
+#define DML_CORE_OWN_KERNELS_DEFS_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void dml_ref_mem_move(const uint8_t *src, uint8_t *dst, uint32_t transfer_size);
+
+void dml_avx512_mem_move(const uint8_t *src, uint8_t *dst, uint32_t transfer_size);
+
+void dml_ref_fill_u64(uint64_t pattern, uint8_t *dst, uint32_t transfer_size);
+
+void dml_avx512_fill_u64(uint64_t pattern, uint8_t *dst, uint32_t transfer_size);
+
+uint32_t dml_ref_compare(const uint8_t *src1, const uint8_t *src2, uint32_t transfer_size, uint8_t *result);
+
+uint32_t dml_avx512_compare(const uint8_t *src1, const uint8_t *src2, uint32_t transfer_size, uint8_t *result);
+
+uint32_t dml_ref_compare_pattern(uint64_t pattern, const uint8_t *src, uint32_t transfer_size, uint8_t *result);
+
+uint32_t dml_avx512_compare_pattern(uint64_t pattern, const uint8_t *src, uint32_t transfer_size, uint8_t *result);
+
+uint32_t dml_ref_create_delta(const uint8_t *src1,
+                              const uint8_t *src2,
+                              uint32_t       transfer_size,
+                              uint8_t       *delta_record,
+                              uint32_t       max_delta_record_size,
+                              uint8_t       *result);
+
+void dml_ref_apply_delta(const uint8_t *delta_record, uint8_t *dst, uint32_t delta_record_size);
+
+void dml_ref_dualcast(const uint8_t *src, uint8_t *dst1, uint8_t *dst2, uint32_t transfer_size);
+
+uint32_t dml_ref_crc_32u(const uint8_t *src, uint32_t transfer_size, uint32_t crc_value, uint32_t polynomial);
+
+uint32_t dml_avx512_crc_u32(const uint8_t *src, uint32_t transfer_size, uint32_t crc_value, uint32_t polynomial);
+
+uint32_t dml_ref_crc_reflected_u32(const uint8_t *src, uint32_t transfer_size, uint32_t crc_value, uint32_t polynomial);
+
+void dml_clflushopt(uint8_t *dst, uint32_t transfer_size);
+
+void dml_clflush(uint8_t *dst, uint32_t transfer_size);
+
+void dml_clwb(uint8_t *dst, uint32_t transfer_size);
+
+void dml_clwb_unsupported(uint8_t *dst, uint32_t transfer_size);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // DML_CORE_OWN_KERNELS_DEFS_H
diff --git a/sources/core/src/sw_dispatcher/optimization_dispatcher.cpp b/sources/core/src/sw_dispatcher/optimization_dispatcher.cpp
new file mode 100644
index 0000000..e3bdf32
--- /dev/null
+++ b/sources/core/src/sw_dispatcher/optimization_dispatcher.cpp
@@ -0,0 +1,148 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include "optimization_dispatcher.hpp"
+
+#include <tuple>
+
+#include "dml_cpuid.h"
+#include "dml_kernels.h"
+
+namespace dml::core::dispatch
+{
+    static auto gs_mem_move          = dml_ref_mem_move;
+    static auto gs_fill_u64          = dml_ref_fill_u64;
+    static auto gs_compare           = dml_ref_compare;
+    static auto gs_compare_pattern   = dml_ref_compare_pattern;
+    static auto gs_create_delta      = dml_ref_create_delta;
+    static auto gs_apply_delta       = dml_ref_apply_delta;
+    static auto gs_dualcast          = dml_ref_dualcast;
+    static auto gs_crc_u32           = dml_ref_crc_32u;
+    static auto gs_crc_reflected_u32 = dml_ref_crc_reflected_u32;
+    static auto gs_cache_flush       = dml_clflush;
+    static auto gs_cache_write_back  = dml_clwb_unsupported;
+
+    class dispatcher
+    {
+    public:
+        dispatcher() noexcept
+        {
+#ifdef DML_AVX512
+            gs_mem_move         = dml_avx512_mem_move;
+            gs_fill_u64         = dml_avx512_fill_u64;
+            gs_compare          = dml_avx512_compare;
+            gs_compare_pattern  = dml_avx512_compare_pattern;
+            gs_crc_u32          = dml_avx512_crc_u32;
+            gs_cache_flush      = dml_clflushopt;
+            gs_cache_write_back = dml_clwb;
+#endif
+
+            // Disable software dispatcher to preserve previous behavior
+#if 0
+            auto registers = dml_core_cpuid(DML_CPUID_EXTENSIONS);
+
+            if ((registers.ebx & DML_AVX512_MASK) == DML_AVX512_MASK)
+            {
+                gs_mem_move        = dml_avx512_mem_move;
+                gs_fill_u64        = dml_avx512_fill_u64;
+                gs_compare         = dml_avx512_compare;
+                gs_compare_pattern = dml_avx512_compare_pattern;
+                gs_crc_u32         = dml_avx512_crc_u32;
+            }
+
+            if ((registers.ebx & DML_CLFLUSHOPT) == DML_CLFLUSHOPT)
+            {
+                gs_cache_flush = dml_clflushopt;
+            }
+
+            if ((registers.ebx & DML_CLWB) == DML_CLWB)
+            {
+                gs_cache_write_back = dml_clwb;
+            }
+#endif
+        }
+    };
+
+    [[maybe_unused]] static auto gs_dispatcher = dispatcher();
+
+    void mem_move(const uint8_t* src, uint8_t* dst, uint32_t transfer_size) noexcept
+    {
+        gs_mem_move(src, dst, transfer_size);
+    }
+
+    void fill(uint64_t pattern, uint8_t* dst, uint32_t transfer_size) noexcept
+    {
+        gs_fill_u64(pattern, dst, transfer_size);
+    }
+
+    std::tuple<uint32_t, uint8_t> compare(const uint8_t* src1, const uint8_t* src2, uint32_t transfer_size) noexcept
+    {
+        uint8_t result   = 0;
+        auto    mismatch = gs_compare(src1, src2, transfer_size, &result);
+
+        return { mismatch, result };
+    }
+
+    std::tuple<uint32_t, uint8_t> compare_pattern(uint64_t pattern, const uint8_t* src, uint32_t transfer_size) noexcept
+    {
+        uint8_t result   = 0;
+        auto    mismatch = gs_compare_pattern(pattern, src, transfer_size, &result);
+
+        return { mismatch, result };
+    }
+
+    std::tuple<uint32_t, uint8_t> create_delta(const uint8_t* src1,
+                                               const uint8_t* src2,
+                                               uint32_t       transfer_size,
+                                               uint8_t*       delta_record,
+                                               uint32_t       delta_max_size) noexcept
+    {
+        uint8_t result            = 0;
+        auto    delta_record_size = gs_create_delta(src1, src2, transfer_size, delta_record, delta_max_size, &result);
+
+        return { delta_record_size, result };
+    }
+
+    void apply_delta(const uint8_t* delta_record, uint8_t* dst, uint32_t transfer_size) noexcept
+    {
+        gs_apply_delta(delta_record, dst, transfer_size);
+    }
+
+    void dualcast(const uint8_t* src, uint8_t* dst1, uint8_t* dst2, uint32_t transfer_size) noexcept
+    {
+        gs_dualcast(src, dst1, dst2, transfer_size);
+    }
+
+    uint32_t crc(const uint8_t* src, uint32_t transfer_size, uint32_t crc_seed, uint32_t polynomial) noexcept
+    {
+        return gs_crc_u32(src, transfer_size, crc_seed, polynomial);
+    }
+
+    uint32_t crc_reflected(const uint8_t* src, uint32_t transfer_size, uint32_t crc_seed, uint32_t polynomial) noexcept
+    {
+        return gs_crc_reflected_u32(src, transfer_size, crc_seed, polynomial);
+    }
+
+    void cache_flush(uint8_t* dst, uint32_t transfer_size) noexcept
+    {
+        gs_cache_flush(dst, transfer_size);
+    }
+
+    void cache_write_back(uint8_t* dst, uint32_t transfer_size) noexcept
+    {
+        gs_cache_write_back(dst, transfer_size);
+    }
+}  // namespace dml::core::dispatch
diff --git a/sources/core/src/sw_dispatcher/optimization_dispatcher.hpp b/sources/core/src/sw_dispatcher/optimization_dispatcher.hpp
new file mode 100644
index 0000000..db023f7
--- /dev/null
+++ b/sources/core/src/sw_dispatcher/optimization_dispatcher.hpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#ifndef DML_CORE_OWN_KERNELS_OPTIMIZATION_DISPATCHER_HPP
+#define DML_CORE_OWN_KERNELS_OPTIMIZATION_DISPATCHER_HPP
+
+#include <cstdint>
+#include <tuple>
+
+namespace dml::core::dispatch
+{
+    void mem_move(const uint8_t* src, uint8_t* dst, uint32_t transfer_size) noexcept;
+
+    void fill(uint64_t pattern, uint8_t* dst, uint32_t transfer_size) noexcept;
+
+    std::tuple<uint32_t, uint8_t> compare(const uint8_t* src1, const uint8_t* src2, uint32_t transfer_size) noexcept;
+
+    std::tuple<uint32_t, uint8_t> compare_pattern(uint64_t pattern, const uint8_t* src, uint32_t transfer_size) noexcept;
+
+    std::tuple<uint32_t, uint8_t> create_delta(const uint8_t* src1,
+                                               const uint8_t* src2,
+                                               uint32_t       transfer_size,
+                                               uint8_t*       delta_record,
+                                               uint32_t       delta_max_size) noexcept;
+
+    void apply_delta(const uint8_t* delta_record, uint8_t* dst, uint32_t transfer_size) noexcept;
+
+    void dualcast(const uint8_t* src, uint8_t* dst1, uint8_t* dst2, uint32_t transfer_size) noexcept;
+
+    uint32_t crc(const uint8_t* src, uint32_t transfer_size, uint32_t crc_seed, uint32_t polynomial = 0x1EDC6F41u) noexcept;
+
+    uint32_t crc_reflected(const uint8_t* src, uint32_t transfer_size, uint32_t crc_seed, uint32_t polynomial = 0x1EDC6F41u) noexcept;
+
+    void cache_flush(uint8_t* dst, uint32_t transfer_size) noexcept;
+
+    void cache_write_back(uint8_t* dst, uint32_t transfer_size) noexcept;
+}  // namespace dml::core::dispatch
+
+#endif  //DML_CORE_OWN_KERNELS_OPTIMIZATION_DISPATCHER_HPP
diff --git a/sources/core/src/sw_dispatcher/ref/CMakeLists.txt b/sources/core/src/sw_dispatcher/ref/CMakeLists.txt
new file mode 100644
index 0000000..39fdc5b
--- /dev/null
+++ b/sources/core/src/sw_dispatcher/ref/CMakeLists.txt
@@ -0,0 +1,37 @@
+#
+# Copyright 2021 Intel Corporation.
+#
+# This software and the related documents are Intel copyrighted materials,
+# and your use of them is governed by the express license under which they
+# were provided to you ("License"). Unless the License provides otherwise,
+# you may not use, modify, copy, publish, distribute, disclose or transmit
+# this software or the related documents without Intel's prior written
+# permission.
+#
+# This software and the related documents are provided as is, with no
+# express or implied warranties, other than those that are expressly
+# stated in the License.
+#
+
+add_library(dml_kernels_ref OBJECT
+        mem_move.c
+        fill.c
+        compare.c
+        compare_pattern.c
+        create_delta.c
+        apply_delta.c
+        dualcast.c
+        crc.c
+        )
+
+target_compile_features(dml_kernels_ref PRIVATE c_std_11)
+
+target_compile_options(dml_kernels_ref PRIVATE ${DML_QUALITY_OPTIONS})
+
+if (CMAKE_C_COMPILER_ID MATCHES GNU)
+    target_compile_options(dml_kernels_ref PRIVATE -mavx2)
+endif ()
+
+if (CMAKE_C_COMPILER_ID MATCHES MSVC)
+    target_compile_options(dml_kernels_ref PRIVATE /arch:AVX2)
+endif ()
diff --git a/sources/core/src/sw_dispatcher/ref/apply_delta.c b/sources/core/src/sw_dispatcher/ref/apply_delta.c
new file mode 100644
index 0000000..f48b809
--- /dev/null
+++ b/sources/core/src/sw_dispatcher/ref/apply_delta.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include <stddef.h>
+
+#include "../dml_kernels.h"
+
+void dml_ref_apply_delta(const uint8_t *delta_record, uint8_t *dst, uint32_t delta_record_size)
+{
+    typedef uint64_t block_t;
+    typedef uint16_t offset_t;
+
+    const size_t delta_note_size   = sizeof(block_t) + sizeof(offset_t);
+    const size_t delta_notes_count = delta_record_size / delta_note_size;
+
+    block_t *const dst_u64 = (block_t *)dst;
+
+    for (size_t index = 0; index < delta_notes_count; ++index)
+    {
+        const uint8_t *const delta_note_position = delta_record + (delta_note_size * index);
+
+        const offset_t offset = *(offset_t *)delta_note_position;
+        const block_t  data   = *(block_t *)(delta_note_position + sizeof(offset_t));
+
+        dst_u64[offset] = data;
+    }
+}
diff --git a/sources/core/src/sw_dispatcher/ref/compare.c b/sources/core/src/sw_dispatcher/ref/compare.c
new file mode 100644
index 0000000..5964a88
--- /dev/null
+++ b/sources/core/src/sw_dispatcher/ref/compare.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include <stddef.h>
+
+#include "../dml_kernels.h"
+
+uint32_t dml_ref_compare(const uint8_t* src1, const uint8_t* src2, uint32_t transfer_size, uint8_t* result)
+{
+    const uint8_t equal     = 0x0;
+    const uint8_t not_equal = 0x1;
+
+    for (size_t index = 0; index < transfer_size; ++index)
+    {
+        if (src1[index] != src2[index])
+        {
+            *result = not_equal;
+            return (uint32_t)index;
+        }
+    }
+
+    *result = equal;
+    return 0;
+}
diff --git a/sources/core/src/sw_dispatcher/ref/compare_pattern.c b/sources/core/src/sw_dispatcher/ref/compare_pattern.c
new file mode 100644
index 0000000..34ba7b3
--- /dev/null
+++ b/sources/core/src/sw_dispatcher/ref/compare_pattern.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include <stddef.h>
+
+#include "../dml_kernels.h"
+
+uint32_t dml_ref_compare_pattern(uint64_t pattern, const uint8_t *src, uint32_t transfer_size, uint8_t *result)
+{
+    const uint8_t equal     = 0x0;
+    const uint8_t not_equal = 0x1;
+
+    const size_t chunk_size = sizeof(pattern);
+    const size_t head_size  = transfer_size / chunk_size;
+    const size_t tail_size  = transfer_size % chunk_size;
+
+    const uint64_t *const head = (const uint64_t *)src;
+    const uint8_t *const  tail = src + chunk_size * head_size;
+
+    for (size_t index = 0; index < head_size; ++index)
+    {
+        if (head[index] != pattern)
+        {
+            *result = not_equal;
+            return (uint32_t)(index * chunk_size);
+        }
+    }
+
+    const uint8_t *const pattern_u8 = (uint8_t *)&pattern;
+
+    for (size_t index = 0; index < tail_size; ++index)
+    {
+        // No overflow for pattern. See tail_size calculation.
+        if (tail[index] != pattern_u8[index])
+        {
+            *result = not_equal;
+            return (uint32_t)(chunk_size * head_size + index);
+        }
+    }
+
+    *result = equal;
+    return 0;
+}
diff --git a/sources/core/src/sw_dispatcher/ref/crc.c b/sources/core/src/sw_dispatcher/ref/crc.c
new file mode 100644
index 0000000..dfc5e97
--- /dev/null
+++ b/sources/core/src/sw_dispatcher/ref/crc.c
@@ -0,0 +1,65 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include <stddef.h>
+
+#include "../dml_kernels.h"
+
+static inline uint8_t reverse(uint8_t byte)
+{
+    byte = ((byte & 0x55u) << 1u) | ((byte & 0xAAu) >> 1u);
+    byte = ((byte & 0x33u) << 2u) | ((byte & 0xCCu) >> 2u);
+    byte = ((byte & 0x0Fu) << 4u) | ((byte & 0xF0u) >> 4u);
+
+    return byte;
+}
+
+static inline uint32_t calculate_crc_32u(uint32_t crc_value, uint8_t data, uint32_t polynomial)
+{
+    const size_t   byte_width     = 8;
+    const size_t   crc_bit_count  = sizeof(crc_value) * byte_width;
+    const size_t   crc_byte_shift = crc_bit_count - byte_width;
+    const uint32_t high_bit_mask  = 1 << (crc_bit_count - 1);
+
+    crc_value ^= (data << crc_byte_shift);
+
+    for (size_t bit = 0u; bit < byte_width; ++bit)
+    {
+        crc_value = (crc_value & high_bit_mask) ? ((crc_value << 1) ^ polynomial) : (crc_value << 1);
+    }
+
+    return crc_value;
+}
+
+uint32_t dml_ref_crc_32u(const uint8_t *src, uint32_t transfer_size, uint32_t crc_value, uint32_t polynomial)
+{
+    for (size_t byte = 0; byte < transfer_size; ++byte)
+    {
+        crc_value = calculate_crc_32u(crc_value, src[byte], polynomial);
+    }
+
+    return crc_value;
+}
+
+uint32_t dml_ref_crc_reflected_u32(const uint8_t *src, uint32_t transfer_size, uint32_t crc_value, uint32_t polynomial)
+{
+    for (size_t byte = 0; byte < transfer_size; ++byte)
+    {
+        crc_value = calculate_crc_32u(crc_value, reverse(src[byte]), polynomial);
+    }
+
+    return crc_value;
+}
diff --git a/sources/core/src/sw_dispatcher/ref/create_delta.c b/sources/core/src/sw_dispatcher/ref/create_delta.c
new file mode 100644
index 0000000..75249cb
--- /dev/null
+++ b/sources/core/src/sw_dispatcher/ref/create_delta.c
@@ -0,0 +1,68 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include <stddef.h>
+
+#include "../dml_kernels.h"
+
+uint32_t dml_ref_create_delta(const uint8_t *src1,
+                              const uint8_t *src2,
+                              uint32_t       transfer_size,
+                              uint8_t       *delta_record,
+                              uint32_t       delta_record_max_size,
+                              uint8_t       *result)
+{
+    typedef uint64_t block_t;
+    typedef uint16_t offset_t;
+
+    const size_t delta_note_size = sizeof(block_t) + sizeof(offset_t);
+    const size_t block_count     = transfer_size / sizeof(block_t);
+
+    uint32_t delta_record_size = 0;
+
+    for (size_t index = 0; index < block_count; ++index)
+    {
+        const block_t block1 = *(((block_t *)src1) + index);
+        const block_t block2 = *(((block_t *)src2) + index);
+
+        if (block1 != block2)
+        {
+            if ((delta_record_size + delta_note_size) > delta_record_max_size)
+            {
+                const uint8_t overflow = 0x2;
+
+                *result = overflow;
+                return delta_record_size;
+            }
+
+            uint8_t *const  delta_position = delta_record + delta_record_size;
+            offset_t *const offset         = (offset_t *)delta_position;
+            block_t *const  data           = (block_t *)(delta_position + sizeof(offset_t));
+
+            *offset = (offset_t)index;
+            *data   = block2;
+
+            delta_record_size += (uint32_t)delta_note_size;
+        }
+    }
+
+    const uint8_t equal     = 0x0;
+    const uint8_t not_equal = 0x1;
+
+    *result = delta_record_size ? not_equal : equal;
+
+    return delta_record_size;
+}
diff --git a/include/dml/cpp/middle_layer/core.hpp b/sources/core/src/sw_dispatcher/ref/dualcast.c
similarity index 68%
rename from include/dml/cpp/middle_layer/core.hpp
rename to sources/core/src/sw_dispatcher/ref/dualcast.c
index 864c3a2..94767ce 100644
--- a/include/dml/cpp/middle_layer/core.hpp
+++ b/sources/core/src/sw_dispatcher/ref/dualcast.c
@@ -14,15 +14,15 @@
  *
  */
 
-#ifndef DML_ML_CORE_HPP
-#define DML_ML_CORE_HPP
+#include <stddef.h>
 
-#include "descriptor_views.hpp"
-#include "result_views.hpp"
+#include "../dml_kernels.h"
 
-namespace dml::ml::core
+void dml_ref_dualcast(const uint8_t *src, uint8_t *dst1, uint8_t *dst2, uint32_t transfer_size)
 {
-    execution_status submit(descriptor& dsc) noexcept;
-}  // namespace dml::ml::core
-
-#endif  //DML_ML_CORE_HPP
+    for (size_t index = 0; index < transfer_size; ++index)
+    {
+        dst1[index] = src[index];
+        dst2[index] = src[index];
+    }
+}
diff --git a/sources/core/src/sw_dispatcher/ref/fill.c b/sources/core/src/sw_dispatcher/ref/fill.c
new file mode 100644
index 0000000..d00c90c
--- /dev/null
+++ b/sources/core/src/sw_dispatcher/ref/fill.c
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include <stddef.h>
+
+#include "../dml_kernels.h"
+
+void dml_ref_fill_u64(uint64_t pattern, uint8_t *dst, uint32_t transfer_size)
+{
+    const uint8_t *const pattern_bytes = (const uint8_t *)&pattern;
+
+    for (size_t index = 0; index < transfer_size; ++index)
+    {
+        dst[index] = pattern_bytes[index % sizeof(pattern)];
+    }
+}
diff --git a/sources/core/src/sw_dispatcher/ref/mem_move.c b/sources/core/src/sw_dispatcher/ref/mem_move.c
new file mode 100644
index 0000000..80ce2f5
--- /dev/null
+++ b/sources/core/src/sw_dispatcher/ref/mem_move.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include <stddef.h>
+
+#include "../dml_kernels.h"
+
+static inline void own_copy_forward(const uint8_t *const src, uint8_t *const dst, uint32_t transfer_size)
+{
+    for (size_t index = 0; index < transfer_size; ++index)
+    {
+        dst[index] = src[index];
+    }
+}
+
+static inline void own_copy_backward(const uint8_t *const src, uint8_t *const dst, uint32_t transfer_size)
+{
+    const size_t last_index  = transfer_size - 1;
+    const size_t first_index = 0;
+
+    for (size_t index = last_index; index != first_index; --index)
+    {
+        dst[index] = src[index];
+    }
+
+    dst[first_index] = src[first_index];
+}
+
+void dml_ref_mem_move(const uint8_t *src, uint8_t *dst, uint32_t transfer_size)
+{
+    const uint8_t *const src_begin = src;
+    const uint8_t *const src_end   = src + transfer_size;
+    const uint8_t *const dst_begin = dst;
+    const uint8_t *const dst_end   = dst + transfer_size;
+
+    /*
+     * Either:
+     * src: |-------|
+     * dst:          |-------|
+     *
+     * OR:
+     * src:          |-------|
+     * dst: |-------|
+     *
+     * Assume ranges are exclusive, then equality is taken into account
+     *
+     * Any copy is safe
+     */
+    if (src_end <= dst_begin || src_begin >= dst_end)
+    {
+        own_copy_forward(src, dst, transfer_size);
+    }
+    /*
+     * src:     |-------|
+     * dst: |-------|
+     *
+     * Only forward copy is applicable
+     */
+    else if (src_begin < dst_end && src_end > dst_end)
+    {
+        own_copy_forward(src, dst, transfer_size);
+    }
+    /*
+     * src: |-------|
+     * dst:     |-------|
+     *
+     * Only backward copy is applicable
+     */
+    else if (src_begin < dst_begin && src_end > dst_begin)
+    {
+        own_copy_backward(src, dst, transfer_size);
+    }
+    /*
+     * src: |-------|
+     * dst: |-------|
+     *
+     * The same memory regions
+     */
+    else
+    {
+        // Do nothing
+    }
+}
diff --git a/sources/middle_layer/utils.hpp b/sources/core/src/utils.hpp
similarity index 81%
rename from sources/middle_layer/utils.hpp
rename to sources/core/src/utils.hpp
index b689a59..2616258 100644
--- a/sources/middle_layer/utils.hpp
+++ b/sources/core/src/utils.hpp
@@ -17,14 +17,14 @@
 #ifndef DML_ML_OWN_UTILS_HPP
 #define DML_ML_OWN_UTILS_HPP
 
-#include <dml/cpp/middle_layer/status.hpp>
+#include <dml/detail/common/status.hpp>
 #include <tuple>
 
 #define RETURN_STATUS_IF(expr, status) \
     if ((expr))                        \
     return (status)
 
-namespace dml::ml
+namespace dml::core
 {
     template <typename... args_t>
     bool any_equal_zero(args_t... args) noexcept
@@ -46,6 +46,12 @@ namespace dml::ml
         return ((lhs <= rhs) && ((lhs + lhs_size) > rhs)) || ((rhs <= lhs) && ((rhs + rhs_size) > lhs));
     }
 
+    bool adjacent(address_t lhs, transfer_size_t lhs_size, address_t rhs) noexcept
+    {
+        // If end of lhs is the same as begin of rhs
+        return (lhs + lhs_size) == rhs;
+    }
+
     template <std::size_t alignment, typename... args_t>
     bool any_misaligned(args_t... args) noexcept
     {
@@ -55,14 +61,6 @@ namespace dml::ml
         };
         return (is_misaligned(args) || ...);
     }
-
-//    template <typename elem_t>
-//    constexpr elem_t reverse_bytes(elem_t v) noexcept {
-//        constexpr auto byte_size = elem_t(8);
-//        constexpr auto mask = ~0 ^ (sizeof(v) * byte_size);
-//        v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
-//        return (v >> 16) | (v << 16);
-//    }
-}  // namespace dml::ml
+}  // namespace dml::core
 
 #endif  // DML_ML_OWN_UTILS_HPP
diff --git a/sources/core/src/validation.cpp b/sources/core/src/validation.cpp
new file mode 100644
index 0000000..f1c92ad
--- /dev/null
+++ b/sources/core/src/validation.cpp
@@ -0,0 +1,333 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include <core/descriptor_views.hpp>
+#include <core/operations.hpp>
+#include <core/validation.hpp>
+#include <dml/detail/common/status.hpp>
+
+#include "utils.hpp"
+
+namespace dml::core
+{
+    static constexpr uint32_t dif_block_sizes[4] = { 512u, 520u, 4096u, 4104u };
+
+    static dml::detail::validation_status validate(nop_descriptor nop) noexcept;
+
+    static dml::detail::validation_status validate(batch_descriptor batch) noexcept;
+
+    static dml::detail::validation_status validate(drain_descriptor drain) noexcept;
+
+    static dml::detail::validation_status validate(mem_move_descriptor mem_move) noexcept;
+
+    static dml::detail::validation_status validate(fill_descriptor fill) noexcept;
+
+    static dml::detail::validation_status validate(compare_descriptor compare) noexcept;
+
+    static dml::detail::validation_status validate(compare_pattern_descriptor compare_pattern) noexcept;
+
+    static dml::detail::validation_status validate(create_delta_descriptor create_delta) noexcept;
+
+    static dml::detail::validation_status validate(apply_delta_descriptor apply_delta) noexcept;
+
+    static dml::detail::validation_status validate(dualcast_descriptor dualcast) noexcept;
+
+    static dml::detail::validation_status validate(crc_descriptor crc) noexcept;
+
+    static dml::detail::validation_status validate(copy_crc_descriptor copy_crc) noexcept;
+
+    static dml::detail::validation_status validate(dif_check_descriptor dif_check) noexcept;
+
+    static dml::detail::validation_status validate(dif_insert_descriptor dif_insert) noexcept;
+
+    static dml::detail::validation_status validate(dif_strip_descriptor dif_strip) noexcept;
+
+    static dml::detail::validation_status validate(dif_update_descriptor dif_update) noexcept;
+
+    static dml::detail::validation_status validate(cache_flush_descriptor cache_flush) noexcept;
+
+    dml::detail::validation_status validate(descriptor &dsc) noexcept
+    {
+        auto view = any_descriptor(dsc);
+
+        switch (static_cast<operation>(view.operation()))
+        {
+            case operation::nop:
+                return validate(nop_descriptor(dsc));
+            case operation::batch:
+                return validate(batch_descriptor(dsc));
+            case operation::drain:
+                return validate(drain_descriptor(dsc));
+            case operation::memory_move:
+                return validate(mem_move_descriptor(dsc));
+            case operation::fill:
+                return validate(fill_descriptor(dsc));
+            case operation::compare:
+                return validate(compare_descriptor(dsc));
+            case operation::compare_pattern:
+                return validate(compare_pattern_descriptor(dsc));
+            case operation::create_delta:
+                return validate(create_delta_descriptor(dsc));
+            case operation::apply_delta:
+                return validate(apply_delta_descriptor(dsc));
+            case operation::dualcast:
+                return validate(dualcast_descriptor(dsc));
+            case operation::crc:
+                return validate(crc_descriptor(dsc));
+            case operation::copy_crc:
+                return validate(copy_crc_descriptor(dsc));
+            case operation::dif_check:
+                return validate(dif_check_descriptor(dsc));
+            case operation::dif_insert:
+                return validate(dif_insert_descriptor(dsc));
+            case operation::dif_strip:
+                return validate(dif_strip_descriptor(dsc));
+            case operation::dif_update:
+                return validate(dif_update_descriptor(dsc));
+            case operation::cache_flush:
+                return validate(cache_flush_descriptor(dsc));
+            default:
+                return dml::detail::validation_status::unsupported_operation;
+        }
+    }
+
+    static dml::detail::validation_status validate(nop_descriptor nop) noexcept
+    {
+        static_cast<void>(nop);
+
+        return dml::detail::validation_status::success;
+    }
+
+    static dml::detail::validation_status validate(drain_descriptor drain) noexcept
+    {
+        static_cast<void>(drain);
+
+        return dml::detail::validation_status::success;
+    }
+
+    static dml::detail::validation_status validate(mem_move_descriptor mem_move) noexcept
+    {
+        RETURN_STATUS_IF(any_equal_zero(mem_move.source_address(), mem_move.destination_address()),
+                         dml::detail::validation_status::null_address);
+        RETURN_STATUS_IF(any_equal_zero(mem_move.transfer_size()), dml::detail::validation_status::null_size);
+
+        return dml::detail::validation_status::success;
+    }
+
+    static dml::detail::validation_status validate(fill_descriptor fill) noexcept
+    {
+        RETURN_STATUS_IF(any_equal_zero(fill.destination_address()), dml::detail::validation_status::null_address);
+        RETURN_STATUS_IF(any_equal_zero(fill.transfer_size()), dml::detail::validation_status::null_size);
+
+        return dml::detail::validation_status::success;
+    }
+
+    static dml::detail::validation_status validate(compare_descriptor compare) noexcept
+    {
+        RETURN_STATUS_IF(any_equal_zero(compare.source_1_address(), compare.source_2_address()),
+                         dml::detail::validation_status::null_address);
+        RETURN_STATUS_IF(any_equal_zero(compare.transfer_size()), dml::detail::validation_status::null_size);
+
+        return dml::detail::validation_status::success;
+    }
+
+    static dml::detail::validation_status validate(compare_pattern_descriptor compare_pattern) noexcept
+    {
+        RETURN_STATUS_IF(any_equal_zero(compare_pattern.source_address()), dml::detail::validation_status::null_address);
+        RETURN_STATUS_IF(any_equal_zero(compare_pattern.transfer_size()), dml::detail::validation_status::null_size);
+
+        return dml::detail::validation_status::success;
+    }
+
+    static dml::detail::validation_status validate(create_delta_descriptor create_delta) noexcept
+    {
+        constexpr auto max_size = 0x80000;
+
+        RETURN_STATUS_IF(
+            any_equal_zero(create_delta.source_1_address(), create_delta.source_2_address(), create_delta.delta_record_address()),
+            dml::detail::validation_status::null_address);
+
+        RETURN_STATUS_IF(any_equal_zero(create_delta.transfer_size(), create_delta.maximum_delta_record_size()),
+                         dml::detail::validation_status::null_size);
+
+        RETURN_STATUS_IF(
+            any_misaligned<8u>(create_delta.source_1_address(), create_delta.source_2_address(), create_delta.delta_record_address()),
+            dml::detail::validation_status::misalignment);
+
+        RETURN_STATUS_IF(create_delta.transfer_size() % 8 != 0, dml::detail::validation_status::wrong_size);
+
+        RETURN_STATUS_IF(create_delta.transfer_size() > max_size, dml::detail::validation_status::large_size);
+
+        RETURN_STATUS_IF(create_delta.maximum_delta_record_size() % 10 != 0 || create_delta.maximum_delta_record_size() < 80,
+                         dml::detail::validation_status::wrong_delta_size);
+
+        return dml::detail::validation_status::success;
+    }
+
+    static dml::detail::validation_status validate(apply_delta_descriptor apply_delta) noexcept
+    {
+        constexpr auto max_size = 0x80000;
+
+        RETURN_STATUS_IF(any_equal_zero(apply_delta.destination_address(), apply_delta.delta_record_address()),
+                         dml::detail::validation_status::null_address);
+        RETURN_STATUS_IF(any_equal_zero(apply_delta.transfer_size(), apply_delta.delta_record_size()),
+                         dml::detail::validation_status::null_size);
+
+        RETURN_STATUS_IF(overlaps(apply_delta.delta_record_address(),
+                                  apply_delta.delta_record_size(),
+                                  apply_delta.destination_address(),
+                                  apply_delta.transfer_size()),
+                         dml::detail::validation_status::overlapping);
+
+        RETURN_STATUS_IF(any_misaligned<8u>(apply_delta.destination_address(), apply_delta.delta_record_address()),
+                         dml::detail::validation_status::misalignment);
+
+        RETURN_STATUS_IF(apply_delta.transfer_size() % 8 != 0, dml::detail::validation_status::wrong_size);
+
+        RETURN_STATUS_IF(apply_delta.transfer_size() > max_size, dml::detail::validation_status::large_size);
+
+        RETURN_STATUS_IF(apply_delta.delta_record_size() % 10 != 0, dml::detail::validation_status::wrong_delta_size);
+
+        return dml::detail::validation_status::success;
+    }
+
+    static dml::detail::validation_status validate(dualcast_descriptor dualcast) noexcept
+    {
+        RETURN_STATUS_IF(any_equal_zero(dualcast.source_address(), dualcast.destination_1_address(), dualcast.destination_2_address()),
+                         dml::detail::validation_status::null_address);
+        RETURN_STATUS_IF(any_equal_zero(dualcast.transfer_size()), dml::detail::validation_status::null_size);
+
+        RETURN_STATUS_IF((dualcast.destination_1_address() & 0xFFFu) != (dualcast.destination_2_address() & 0xFFFu),
+                         dml::detail::validation_status::wrong_dualcast_address);
+
+        RETURN_STATUS_IF(overlaps(dualcast.source_address(), dualcast.destination_1_address(), dualcast.transfer_size()),
+                         dml::detail::validation_status::overlapping);
+
+        RETURN_STATUS_IF(overlaps(dualcast.source_address(), dualcast.destination_2_address(), dualcast.transfer_size()),
+                         dml::detail::validation_status::overlapping);
+
+        RETURN_STATUS_IF(overlaps(dualcast.destination_1_address(), dualcast.destination_2_address(), dualcast.transfer_size()),
+                         dml::detail::validation_status::overlapping);
+
+        return dml::detail::validation_status::success;
+    }
+
+    static dml::detail::validation_status validate(crc_descriptor crc) noexcept
+    {
+        RETURN_STATUS_IF(any_equal_zero(crc.source_address()), dml::detail::validation_status::null_address);
+        RETURN_STATUS_IF(any_equal_zero(crc.transfer_size()), dml::detail::validation_status::null_size);
+
+        return dml::detail::validation_status::success;
+    }
+
+    static dml::detail::validation_status validate(copy_crc_descriptor copy_crc) noexcept
+    {
+        RETURN_STATUS_IF(any_equal_zero(copy_crc.source_address(), copy_crc.destination_address()),
+                         dml::detail::validation_status::null_address);
+        RETURN_STATUS_IF(any_equal_zero(copy_crc.transfer_size()), dml::detail::validation_status::null_size);
+
+        RETURN_STATUS_IF(overlaps(copy_crc.source_address(), copy_crc.destination_address(), copy_crc.transfer_size()),
+                         dml::detail::validation_status::overlapping);
+
+        return dml::detail::validation_status::success;
+    }
+
+    static dml::detail::validation_status validate(cache_flush_descriptor cache_flush) noexcept
+    {
+        RETURN_STATUS_IF(any_equal_zero(cache_flush.destination_address()), dml::detail::validation_status::null_address);
+        RETURN_STATUS_IF(any_equal_zero(cache_flush.transfer_size()), dml::detail::validation_status::null_size);
+
+        return dml::detail::validation_status::success;
+    }
+
+    static dml::detail::validation_status validate(dif_check_descriptor dif_check) noexcept
+    {
+        const auto block_size = dif_block_sizes[dif_check.dif_flags() & 0b11];
+
+        RETURN_STATUS_IF(any_equal_zero(dif_check.source_address()), dml::detail::validation_status::null_address);
+        RETURN_STATUS_IF(any_equal_zero(dif_check.transfer_size()), dml::detail::validation_status::null_size);
+        RETURN_STATUS_IF(dif_check.transfer_size() % (block_size + sizeof(uint64_t)) != 0, dml::detail::validation_status::wrong_dif_size);
+
+        return dml::detail::validation_status::success;
+    }
+
+    static dml::detail::validation_status validate(dif_insert_descriptor dif_insert) noexcept
+    {
+        const auto block_size = dif_block_sizes[dif_insert.dif_flags() & 0b11];
+
+        RETURN_STATUS_IF(any_equal_zero(dif_insert.source_address(), dif_insert.destination_address()),
+                         dml::detail::validation_status::null_address);
+        RETURN_STATUS_IF(any_equal_zero(dif_insert.transfer_size()), dml::detail::validation_status::null_size);
+        RETURN_STATUS_IF(dif_insert.transfer_size() % block_size != 0, dml::detail::validation_status::wrong_dif_size);
+
+        const auto src_size = dif_insert.transfer_size();
+        const auto dst_size = src_size + ((src_size / block_size) * 8);
+        RETURN_STATUS_IF(overlaps(dif_insert.source_address(), src_size, dif_insert.destination_address(), dst_size),
+                         dml::detail::validation_status::overlapping);
+
+        return dml::detail::validation_status::success;
+    }
+
+    static dml::detail::validation_status validate(dif_strip_descriptor dif_strip) noexcept
+    {
+        const auto block_size = dif_block_sizes[dif_strip.dif_flags() & 0b11];
+
+        RETURN_STATUS_IF(any_equal_zero(dif_strip.source_address(), dif_strip.destination_address()),
+                         dml::detail::validation_status::null_address);
+        RETURN_STATUS_IF(any_equal_zero(dif_strip.transfer_size()), dml::detail::validation_status::null_size);
+        RETURN_STATUS_IF(dif_strip.transfer_size() % (block_size + sizeof(uint64_t)) != 0, dml::detail::validation_status::wrong_dif_size);
+
+        const auto src_size = dif_strip.transfer_size();
+        const auto dst_size = src_size - ((src_size / block_size) * 8);
+        RETURN_STATUS_IF(overlaps(dif_strip.source_address(), src_size, dif_strip.destination_address(), dst_size),
+                         dml::detail::validation_status::overlapping);
+
+        // Hardware bug workaround
+        if (dif_strip.destination_address() < dif_strip.source_address())
+        {
+            if ((dif_strip.destination_address() + dst_size) <= dif_strip.source_address() &&
+                dif_strip.source_address() <= (dif_strip.destination_address() + src_size))
+            {
+                return dml::detail::validation_status::dif_strip_adjacent;
+            }
+        }
+
+        return dml::detail::validation_status::success;
+    }
+
+    static dml::detail::validation_status validate(dif_update_descriptor dif_update) noexcept
+    {
+        const auto block_size = dif_block_sizes[dif_update.dif_flags() & 0b11];
+
+        RETURN_STATUS_IF(any_equal_zero(dif_update.source_address(), dif_update.destination_address()),
+                         dml::detail::validation_status::null_address);
+        RETURN_STATUS_IF(any_equal_zero(dif_update.transfer_size()), dml::detail::validation_status::null_size);
+        RETURN_STATUS_IF(dif_update.transfer_size() % (block_size + sizeof(uint64_t)) != 0, dml::detail::validation_status::wrong_dif_size);
+        RETURN_STATUS_IF(overlaps(dif_update.source_address(), dif_update.destination_address(), dif_update.transfer_size()),
+                         dml::detail::validation_status::overlapping);
+
+        return dml::detail::validation_status::success;
+    }
+
+    static dml::detail::validation_status validate(batch_descriptor batch) noexcept
+    {
+        RETURN_STATUS_IF(any_equal_zero(batch.descriptor_list_address()), dml::detail::validation_status::null_address);
+        RETURN_STATUS_IF(batch.descriptors_count() < 4, dml::detail::validation_status::wrong_batch_size);
+
+        return dml::detail::validation_status::success;
+    }
+
+}  // namespace dml::core
diff --git a/sources/cores/CMakeLists.txt b/sources/cores/CMakeLists.txt
deleted file mode 100644
index 8ac5538..0000000
--- a/sources/cores/CMakeLists.txt
+++ /dev/null
@@ -1,43 +0,0 @@
-#
-# Copyright 2020-2021 Intel Corporation.
-#
-# This software and the related documents are Intel copyrighted materials,
-# and your use of them is governed by the express license under which they
-# were provided to you ("License"). Unless the License provides otherwise,
-# you may not use, modify, copy, publish, distribute, disclose or transmit
-# this software or the related documents without Intel's prior written
-# permission.
-#
-# This software and the related documents are provided as is, with no
-# express or implied warranties, other than those that are expressly
-# stated in the License.
-#
-
-add_library(dml_core OBJECT
-    src/dmlc_fill_8u.c
-    src/dmlc_delta_record_8u.c
-    src/dmlc_crc_16u_32u.c
-    src/dmlc_copy_8u.c
-    src/dmlc_compare_8u.c
-    src/dmlc_cache_8u.c
-    )
-
-target_include_directories(dml_core
-    PUBLIC include
-    PRIVATE ../../include
-    PRIVATE src/include
-    )
-
-target_compile_features(dml_core PRIVATE c_std_11)
-
-# TODO: target_compile_options(dml_core PRIVATE ${DML_QUALITY_OPTIONS})
-
-target_compile_definitions(dml_core PRIVATE DML_CORES_BADARG_CHECK)
-
-# TODO: Remove
-if ("${DML_ARCH}" STREQUAL "avx512")
-    target_compile_options(dml_core PRIVATE ${DML_AVX512_OPTIONS})
-    target_compile_definitions(dml_core PRIVATE AVX512)
-else()
-    target_compile_definitions(dml_core PRIVATE PX)
-endif()
diff --git a/sources/cores/include/core_compare.h b/sources/cores/include/core_compare.h
deleted file mode 100644
index 778b16c..0000000
--- a/sources/cores/include/core_compare.h
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * Copyright 2020-2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-/**
- * @file
- * @date 2/10/2020
- *
- * @defgroup core_public_compare Compare Features
- * @ingroup core_public_features
- * @{
- *
- * @brief Features to compare memory region with another one or with a pattern.
- *
- * @details Compare group contains optimized cores, which perform the following tasks:
- *	-   Comparing between vectors;
- *	-   Comparing between vector and a pattern;
- *	-   Comparing vector values with some key/range to create a bit mask;
- *	-   Creating delta between two memory regions.
- *
- */
-
-#include "core_definitions.h"
-
-#ifndef DML_KERNEL_COMPARE_H__
-#define DML_KERNEL_COMPARE_H__
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* ------ Kernel Compare Defines ------ */
-
-#define DML_COMPARE_STATUS_EQ DML_STATUS_OK                  /**< Redefinition of default status in context of Compare functions */
-#define DML_COMPARE_STATUS_NE DML_STATUS_FALSE_PREDICATE_OK  /**< Redefinition of default status in context of Compare functions */
-
-typedef uint64_t pattern_t;     /**< Special type for 8-byte pattern */
-#define DML_SIZE_PATTERN_T  64  /**< pattern_t size in bits */
-
-
-/* ------ Kernel Compare Functions ------ */
-
-/**
- * @brief Compares specified memory regions.
- *
- * @param[in] first_vector_ptr          pointer to the reference vector
- * @param[in] second_vector_ptr         pointer to the vector to compare
- * @param[in] size                      number of bytes to compare
- * @param[out] mismatch_offset_ptr      first mismatch offset
- *
- * @note No memory alignment required.
- * @note After function execution mismatch_offset_ptr contains the first miss_match offset
- * if vectors are not equal.
- *
- * @return
- *      - @ref DML_COMPARE_STATUS_EQ;
- *      - @ref DML_COMPARE_STATUS_NE;
- *      - @ref DML_STATUS_NULL_POINTER_ERROR.
- */
-DML_CORE_API(dmlc_status_t, compare_8u, (const uint8_t *first_vector_ptr,
-                                         const uint8_t *second_vector_ptr,
-                                         const uint32_t size,
-                                         uint32_t *const mismatch_offset_ptr));
-
-
-/**
- * @brief Compares specified memory region with 8-byte pattern.
- *
- * @param[in] memory_region_ptr         pointer to the base vector
- * @param[in] pattern                   expected 8-byte memory pattern
- * @param[in] size                      number of bytes to compare
- * @param[out] mismatch_offset_ptr      first mismatch offset
- *
- * @note After function execution mismatch_offset_ptr contains the first miss_match offset
- * if vectors are not equal.
- * @note Mismatch_offset_ptr may not be the exact byte location,
- * but it is guaranteed to be no greater than the first difference.
- *
- * @return
- *      - @ref DML_COMPARE_STATUS_EQ;
- *      - @ref DML_COMPARE_STATUS_NE;
- *      - @ref DML_STATUS_NULL_POINTER_ERROR.
- */
-DML_CORE_API(dmlc_status_t, compare_with_pattern_8u, (const uint8_t *memory_region_ptr,
-                                                      const pattern_t pattern,
-                                                      const uint32_t size,
-                                                      uint32_t *const mismatch_offset_ptr));
-
-/**
- * @brief Creates delta record if vectors are not equal
- *
- * @param[in]  reference_vector_ptr    pointer to the base vector
- * @param[in]  second_vector_ptr       pointer to the delta that is written into the delta record
- * @param[in]  compared_bytes          number of bytes to compare
- * @param[in]  delta_record_max_size   maximal delta record size
- * @param[out] delta_record_ptr        pointer to the delta record
- * @param[out] record_size_ptr         created delta record size
- *
- * @warning: Compared vectors addresses must be aligned to a multiple of 8.
- * @warning: Number of bytes to compare must be multiple of 8.
- * @warning: Number of bytes to compare must be less or equal to the maximum supported offset,
- *           which is 524,280 bytes (0x7FFF8).
- * @warning: Number of available bytes in delta record must be multiple of 10.
- *
- * @return
- *      - @ref DML_STATUS_OK;
- *      - @ref DML_STATUS_NULL_POINTER_ERROR;
- *      - @ref DML_STATUS_DELTA_ALIGN_ERROR in case if vector address is not aligned to be a
- *             multiple of 8;
- *      - @ref DML_STATUS_DELTA_INPUT_SIZE_ERROR in case if input vector size is not multiple of 8, or
- *             in case if input vector size is greater then max delta offset supported;
- *      - @ref DML_STATUS_DELTA_RECORD_SIZE_ERROR in case if max_delta_record_size is not sufficient
- *             for delta record creation, or max_delta_record_size is not a multiple of 10u
- *
- */
-DML_CORE_API(dmlc_status_t, create_delta_record_8u, (const uint8_t *reference_vector_ptr,
-                                                    const uint8_t *second_vector_ptr,
-                                                    const uint32_t compared_bytes,
-                                                    const uint32_t delta_record_max_size,
-                                                    uint8_t* delta_record_ptr,
-                                                    uint32_t *const record_size_ptr));
-
-/**
- * @brief Applies delta record to the contents of memory at destination address
- *
- * @param[out] memory_region_ptr    pointer to a memory region that is updated with a delta
- * @param[in]  delta_record_ptr     pointer to a delta record
- * @param[in]  memory_region_size   destination size
- * @param[in]  delta_record_size    delta record size
- *
- * @warning Memory region byte size must be multiply of 8.
- * @warning Delta record byte size must be multiply of 10.
- * @warning Function does not support vectors' overlap.
- * @warning Maximal supported offset is 524,280 bytes (0x7FFF8).
- *
- * @return
- *      - @ref DML_STATUS_OK;
- *      - @ref DML_STATUS_NULL_POINTER_ERROR;
- *      - @ref DML_STATUS_DELTA_ALIGN_ERROR in case if memory_region_ptr address is not aligned a
- *             multiple of 8;
- *      - @ref DML_STATUS_DELTA_INPUT_SIZE_ERROR in case if memory region size is not multiple of 8;
- *      - @ref DML_STATUS_DELTA_RECORD_SIZE_ERROR in case if delta record size is not multiple of 10;
- *      - @ref DML_STATUS_OVERLAPPING_BUFFER_ERROR in case if vectors overlap
- *      - @ref DML_STATUS_MEMORY_OVERFLOW_ERROR in case if offset is greater than memory region size
- */
-DML_CORE_API(dmlc_status_t, apply_delta_record_8u, (uint8_t * memory_region_ptr,
-                                                   const uint8_t *delta_record_ptr,
-                                                   const uint32_t memory_region_size,
-                                                   const uint32_t delta_record_size));
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif //DML_KERNEL_COMPARE_H__
-/** @} */
diff --git a/sources/cores/include/core_cpu_features.h b/sources/cores/include/core_cpu_features.h
deleted file mode 100644
index dc113ab..0000000
--- a/sources/cores/include/core_cpu_features.h
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Copyright 2020-2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-/**
- * @date 2/25/2020
- *
- * @defgroup core_public_cpu CPU Features
- * @ingroup core_public_features
- * @{
- * @brief Wrappers of CPU features.
- *
- * @details CPU features group includes the following functions:
- *      - Functions that get CPU info;
- *      - Wrappers for cache manipulation instructions;
- *      - etc.
- *
- */
-
-#include "core_definitions.h"
-
-#ifndef DML_KERNEL_CPU_FEATURES_H__
-#define DML_KERNEL_CPU_FEATURES_H__
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-
-#ifdef _WIN32
-
-#include "intrin.h"
-
-/**
- * @brief Return informantion about CPU based on arguments
- *
- * @param[out] info        - 4-int buffer containing result of CPUID (registers EAX, EBX, ECX, EDX)
- * @param[in] info_type    - value of EAX register, setting type of resulting information
- * @param[in] info_subtype - value of ECX register, setting subtype of resulting information
- *
- * @return
- *      Nothing
- *
- */
-DML_CORE_OWN_INLINE(void, cpuid, (int info[4], int info_type, int info_subtype))
-{
-    __cpuidex(info, info_type, info_subtype);
-}
-#else
-
-//  GCC Intrinsics 
-#include <cpuid.h>
-#include <dlfcn.h>
-
-/**
- * @brief Return informantion about CPU based on arguments
- *
- * @param[out] info        - 4-int buffer containing result of CPUID (registers EAX, EBX, ECX, EDX)
- * @param[in] info_type    - value of EAX register, setting type of resulting information
- * @param[in] info_subtype - value of ECX register, setting subtype of resulting information
- *
- * @return
- *      Nothing
- *
- */
-DML_CORE_OWN_INLINE(void, cpuid, (int info[4], int info_type, int info_subtype))
-{
-    __cpuid_count(info_type, info_subtype, info[0], info[1], info[2], info[3]);
-}
-#endif
-
-/**
- * @brief Flushes the processor caches at the destination address with сache line invalidation from all cache hierarchy.
- *
- * @param[in] memory_region_ptr   memory region address to update from cache
- * @param[in] bytes_to_flush      memory region size, in bytes, to flush
- *
- * @return
- *      - @ref DML_STATUS_OK;
- *      - @ref DML_STATUS_NULL_POINTER_ERROR;
- *
- */
-DML_CORE_API(dmlc_status_t, move_cache_to_memory_8u, (const uint8_t  *memory_region_ptr,
-                                                     const uint32_t bytes_to_flush));
-
-/**
- * @brief Flushes the processor caches at the destination address without cache line invalidation.
- *
- * @param[in] memory_region_ptr - memory region address to update from cache
- * @param[in] bytes_to_flush    - memory region size, in bytes, to flush
- *
- * @return
- *      - @ref DML_STATUS_OK;
- *      - @ref DML_STATUS_NULL_POINTER_ERROR;
- *
- */
-DML_CORE_API(dmlc_status_t, copy_cache_to_memory_8u, (const uint8_t  *memory_region_ptr,
-                                                     const uint32_t bytes_to_flush));
-
-/**
- * @brief Maximum cache size
- */
-static int32_t max_cache_size = -1;
-
-/**
- * @brief Returns max available cache size
- *
- * @param[out] size - pointer on resulting max cache size
- *
- * @return
- *      - @ref DML_STATUS_OK;
- *      - @ref DML_STATUS_UNKNOWN_CACHE_SIZE_ERROR;
- *
- */
-DML_CORE_OWN_INLINE(dml_status_t, get_max_cache_size, (int32_t * size))
-{
-    if (max_cache_size > 0)
-    {
-        *size = max_cache_size;
-        return DML_STATUS_OK;
-    }
-    int32_t tmp_max_size = 0;
-    int32_t info[4] = {0, 0, 0, 0};
-
-    for (int32_t n = 0; n < 32; n++) {
-        dmlc_own_cpuid(info, 4, n);
-        if (!(info[0] & 0x1f))
-        {
-            break;
-        }
-        if ((info[0] & 0x1f) != 2)
-        {
-            // Cache Size in Bytes = (Ways + 1) * (Partitions + 1) * (Line_Size + 1) * (Sets + 1)
-            int32_t tmp_cache_size = info[2] + 1;              // Sets = ECX
-            tmp_cache_size *= (info[1] & 0xfff) + 1;           // Line_Size = EBX[11:0]
-            tmp_cache_size *= ((info[1] >> 12) & 0x3ff) + 1;   // Partitions = EBX[21:12]
-            tmp_cache_size *= ((info[1] >> 22) & 0x3ff) + 1;   // Ways = EBX[31:22]
-            if (tmp_cache_size > tmp_max_size) { tmp_max_size = tmp_cache_size; }
-        }
-    }
-    if (tmp_max_size) 
-    {
-        max_cache_size = tmp_max_size;
-        *size = tmp_max_size;
-        return DML_STATUS_OK;
-    }
-    else 
-    {
-        *size = 0;
-        return DML_STATUS_UNKNOWN_CACHE_SIZE_ERROR;
-    }
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif //DML_KERNEL_CPU_FEATURES_H__
-
-/** @} */
diff --git a/sources/cores/include/core_definitions.h b/sources/cores/include/core_definitions.h
deleted file mode 100644
index 2b279cd..0000000
--- a/sources/cores/include/core_definitions.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright 2020-2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-/**
- * @file
- * @date 2/10/2020
- *
- * @defgroup core_src Kernel Layer
- * @brief Intel(R) Data Mover Library (Intel® DML) Core functions
- *
- * @details DML Kernels Layer is a software path run on the user CPU, which must implement of the base DML features.
- * The kernel is an atomic function without any dependencies on the upper layers of abstraction
- *
- * @defgroup core_public Public API
- * @ingroup core_src
- *
- * @defgroup core_public_definitions Public Definitions
- * @ingroup core_public
- * @{
- *
- * @brief Contains general definitions for public use in Intel® Data Mover Library (Intel® DML) Cores.
- *
- */
-
-#include <stdint.h>
-#include "dml/dmldefs.h"
-
-#ifndef KERNEL_DEFINITIONS_H__
-#define KERNEL_DEFINITIONS_H__
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* ------ Definitions ------ */
-
-#if defined( _WIN32 ) || defined ( _WIN64 )
-#define DML_CORE_STDCALL  __stdcall
-#define DML_CORE_CDECL    __cdecl
-#else
-#define DML_CORE_STDCALL
-#define DML_CORE_CDECL
-#endif
-
-/* ------ Macros ------ */
-/**
- * @brief Defines an internal function declared in the file scope
- */
-#define DML_CORE_OWN_INLINE(type, name, arg) type static inline dmlc_own_##name arg
-
-#if !defined( DML_CORE_API )
-#define DML_CORE_API(type, name, arg) type DML_CORE_STDCALL dmlc_##name arg /**< Declaration macros to manipulate function name */
-#endif
-
-/* ------ Statuses ------ */
-
-typedef dml_status_t dmlc_status_t; /**< Redefinition of @ref dml_status_t for core functions */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif //KERNEL_DEFINITIONS_H__
-
-/** @} */
diff --git a/sources/cores/include/core_hash_functions.h b/sources/cores/include/core_hash_functions.h
deleted file mode 100644
index cf038a7..0000000
--- a/sources/cores/include/core_hash_functions.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright 2020-2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-/**
- * @date 3/5/2020
- *
- * @defgroup core_public_hash Hash Features
- * @ingroup core_public_features
- * @{
- *
- * @brief Features to calculate CRC
- *
- * @details Hash group contains optimized functions that calculate a hash value using
- * different hash algorithms.
- *
- */
-
-
-#include "core_definitions.h"
-
-#ifndef DML_KERNEL_HASH_H__
-#define DML_KERNEL_HASH_H__
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/**
- * @brief Shift value for extracting next byte for the CRC16 function
- */
-#define OWN_CRC16_BYTE_SHIFT ( 8u )
-
-/**
- * @brief Shift value for extracting next byte the for CRC32 function
- */
-#define OWN_CRC32_BYTE_SHIFT ( 24u )
-
-
-/**
- * @brief Calculates CRC16 hash/checksum for a signified memory region
- *
- * @param[in]     memory_region_ptr        address of memory region to hash
- * @param[in]     bytes_to_hash            memory region size, in bytes, to hash
- * @param[in,out] crc_ptr                  CRC seed / result
- * @param[in]     polynomial	           polynomial to XORing
- *
- * @note No memory alignment is required;
- * @note crc_ptr is the initial seed for CRC16 calculation and result storing
- *
- * @return
- *      - @ref DML_STATUS_OK;
- *      - @ref DML_STATUS_NULL_POINTER_ERROR.
- */
-DML_CORE_API(dmlc_status_t, calculate_crc_16u, (const uint8_t  *const memory_region_ptr,
-                                                uint32_t bytes_to_hash,
-                                                uint16_t *const crc_ptr,
-                                                uint16_t polynomial));
-
-
-/**
- * @brief Calculates CRC32 hash/checksum for a signified memory region
- *
- * @param[in]     memory_region_ptr        address of memory region to hash
- * @param[in]     bytes_to_hash            memory region size, in bytes, to hash
- * @param[in,out] crc_ptr                  CRC seed / result
- * @param[in]     polynomial	           polynomial to XORing
- *
- * @note No memory alignment is required;
- * @note crc_ptr is the initial seed for CRC32 calculation and result storing
- *
- * @return
- *      - @ref DML_STATUS_OK;
- *      - @ref DML_STATUS_NULL_POINTER_ERROR.
- */
-DML_CORE_API(dmlc_status_t, calculate_crc_32u, (const uint8_t *const memory_region_ptr,
-                                                uint32_t bytes_to_hash,
-                                                uint32_t *const crc_ptr,
-                                                uint32_t polynomial));
-
-
-/**
- * @brief Calculates CRC32 hash/checksum for a signified memory region with reversed bytes bits
- *
- * @param[in]     memory_region_ptr        address of memory region to hash
- * @param[in]     bytes_to_hash            memory region size, in bytes, to hash
- * @param[in,out] crc_ptr                  CRC seed / result
- * @param[in]     polynomial	           polynomial to XORing
- *
- * @note No memory alignment is required;
- * @note crc_ptr is the initial seed for CRC32 calculation and result storing
- *
- * @return
- *      - @ref DML_STATUS_OK;
- *      - @ref DML_STATUS_NULL_POINTER_ERROR.
- */
-DML_CORE_API(dmlc_status_t, calculate_crc_reflected_32u, (const uint8_t *const memory_region_ptr,
-                                                          uint32_t bytes_to_hash,
-                                                          uint32_t *const crc_ptr,
-                                                          uint32_t polynomial ) );
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // DML_KERNEL_HASH_H__
-/** @} */
diff --git a/sources/cores/include/core_memory.h b/sources/cores/include/core_memory.h
deleted file mode 100644
index a2932a9..0000000
--- a/sources/cores/include/core_memory.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright 2020-2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-/**
- * @date 2/20/2020
- *
- * @defgroup core_public_memory Memory Features
- * @ingroup core_public_features
- * @{
- *
- * @brief Features to move or copy memory region or to fill it with a pattern.
- *
- * @details Memory group contains optimized functions that perform the following tasks:
- *	-   Copying the data from source to destination;
- *	-   Movement the data from one memory region into another;
- *	-   Filling vectors with some value, pattern.
- *
- */
-
-
- #include "core_definitions.h"
-
-#ifndef DML_KERNEL_MEMORY_H__
-#define DML_KERNEL_MEMORY_H__
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/**
- * @brief Copies bytes from vector to another vector.
- *
- * @param[in]  source_ptr              pointer to source start
- * @param[out] destination_ptr         pointer to destination start
- * @param[in]  bytes_to_process        number of bytes to process
- *
- * @note No memory alignment is required.
- *
- * @return
- *      - @ref DML_STATUS_OK;
- */
-DML_CORE_API(dmlc_status_t, copy_8u, ( const uint8_t  *const source_ptr,
-                                                      uint8_t  *const destination_ptr,
-                                                      uint32_t        bytes_to_process ) );
-
-
-/**
- * @brief Moves bytes from vector to another vector.
- *
- * @param[in]  source_ptr              pointer to source start
- * @param[out] destination_ptr         pointer to destination start
- * @param[in]  bytes_to_process        count of bytes to process
- *
- * @note No memory alignment is required.
- *
- * @return
- *      - @ref DML_STATUS_OK;
- */
-DML_CORE_API(dmlc_status_t, move_8u, (const uint8_t *const source_ptr,
-                                               uint8_t *const destination_ptr,
-                                               uint32_t bytes_to_process));
-
-
-/**
- * @brief Copies bytes from vector to two vectors.
- *
- * @param[in]  source_ptr              pointer to source start
- * @param[out] first_destination_ptr   pointer to first destination start
- * @param[out] second_destination_ptr  pointer to second destination start
- * @param[in]  bytes_to_process        number of bytes to process
- *
- * @warning 0:11 bits in destination_first_ptr and destination_second_ptr must be equal.
- * @warning Function does not support vectors' overlap.
- *
- * @return
- *      - @ref DML_STATUS_OK;
- */
- DML_CORE_API(dmlc_status_t, dualcast_copy_8u, (const uint8_t *const source_ptr,
-                                                uint8_t *const first_destination_ptr,
-                                                uint8_t *const second_destination_ptr,
-                                                uint32_t bytes_to_process));
-
-
-/**
- * @brief Fills the source vector with the value in the pattern field.
- *
- * @param[in]  pattern                 64-bit pattern to fill
- * @param[out] memory_region_ptr       memory region address
- * @param[in]  bytes_to_process        count of bytes to process
- *
- * @note No memory alignment is required.
- *
- * @return
- *      - @ref DML_STATUS_OK;
- *      - @ref DML_STATUS_NULL_POINTER_ERROR.
- */
-DML_CORE_API(dmlc_status_t, fill_with_pattern_8u, (uint64_t pattern,
-                                                   uint8_t *const memory_region_ptr,
-                                                   uint32_t bytes_to_process));
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // DML_KERNEL_MEMORY_H__
-/** @} */
diff --git a/sources/cores/src/avx512/dmlc_compare_8u_k0.cxx b/sources/cores/src/avx512/dmlc_compare_8u_k0.cxx
deleted file mode 100644
index bc39a33..0000000
--- a/sources/cores/src/avx512/dmlc_compare_8u_k0.cxx
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * Copyright 2020-2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
- /**
-  * @brief Contain default implementation of the follow functions:
-  *      - @ref dmlc_own_compare_8u()
-  *      - @ref dmlc_own_compare_with_pattern_8u()
-  *
-  * @date 07/06/2021
-  *
-  */
-
-
-DML_CORE_OWN_INLINE(dmlc_status_t, compare_8u, (const uint8_t* first_vector_ptr,
-    const uint8_t* second_vector_ptr,
-    const uint32_t size,
-    uint32_t* const mismatch_offset_ptr))
-{
-    uint32_t    i;
-    __mmask64   msk64 = (__mmask64)0;
-    for (i = 0u; (i + 64) <= size; i += 64) {
-        msk64 = _mm512_cmp_epi8_mask(
-            _mm512_loadu_si512((void const*)(first_vector_ptr + i)),
-            _mm512_loadu_si512((void const*)(second_vector_ptr + i)),
-            _MM_CMPINT_NE);
-        if (msk64) {
-            *mismatch_offset_ptr = i + (uint32_t)_tzcnt_u64((uint64_t)msk64);
-            return DML_COMPARE_STATUS_NE;
-        }
-    }
-    {
-        uint64_t    tail = size & 63;
-        msk64 = ((uint64_t)1 << tail) - (uint64_t)1;
-        msk64 = _mm512_cmp_epi8_mask(
-            _mm512_maskz_loadu_epi8(msk64, (void const*)(first_vector_ptr + i)),
-            _mm512_maskz_loadu_epi8(msk64, (void const*)(second_vector_ptr + i)),
-            _MM_CMPINT_NE);
-        if (msk64) {
-            *mismatch_offset_ptr = i + (uint32_t)_tzcnt_u64((uint64_t)msk64);
-            return DML_COMPARE_STATUS_NE;
-        }
-    }
-    return DML_COMPARE_STATUS_EQ;
-}
-
-DML_CORE_OWN_INLINE(dmlc_status_t, compare_with_pattern_8u, (const uint8_t* memory_region_ptr,
-    const pattern_t pattern,
-    const uint32_t size,
-    uint32_t* const mismatch_offset_ptr))
-{
-#if (DML_SIZE_PATTERN_T == 64)
-    const uint32_t pattern_chunk_count = size >> 3;
-    const uint64_t tail_bytes_count = size & 7;
-    const uint64_t* const pattern_region_ptr = (uint64_t*)memory_region_ptr;
-
-    __m512i     x_pattern = _mm512_set1_epi64(pattern);
-    uint32_t    i;
-    __mmask8    msk8 = (__mmask8)0;
-
-    for (i = 0u; (i + 8) <= pattern_chunk_count; i += 8) {
-        msk8 = _mm512_cmp_epi64_mask(_mm512_loadu_si512((void const*)(pattern_region_ptr + i)),
-            x_pattern,
-            _MM_CMPINT_NE);
-        if (msk8) {
-            *mismatch_offset_ptr = (i + (uint32_t)_tzcnt_u32((uint32_t)msk8)) << 3u;
-            return DML_COMPARE_STATUS_NE;
-        }
-    }
-    {
-        uint64_t    tail = pattern_chunk_count & 7;
-        if (tail) {
-            msk8 = (__mmask8)((1 << tail) - 1);
-            msk8 = _mm512_mask_cmp_epi64_mask(msk8,
-                _mm512_maskz_loadu_epi64(msk8, (void const*)(pattern_region_ptr + i)),
-                x_pattern,
-                _MM_CMPINT_NE);
-            if (msk8) {
-                *mismatch_offset_ptr = (i + (uint32_t)_tzcnt_u32((uint32_t)msk8)) << 3u;
-                return DML_COMPARE_STATUS_NE;
-            }
-        }
-    }
-    if (tail_bytes_count) {
-        memory_region_ptr += size - tail_bytes_count;
-        pattern_t byte_pattern = pattern;
-        // Compare tail
-        for (uint32_t i = 0; i < tail_bytes_count; i++)
-        {
-            if (memory_region_ptr[i] != (uint8_t)byte_pattern)
-            {
-                *mismatch_offset_ptr = (pattern_chunk_count << 3) + i;
-
-                return DML_COMPARE_STATUS_NE;
-            }
-            byte_pattern >>= OWN_BYTE_BIT_LENGTH;
-        }
-    }
-    return DML_COMPARE_STATUS_EQ;
-#else
-    //Constants
-    const uint32_t pattern_size = sizeof(pattern_t);
-    const uint32_t pattern_chunk_count = size / pattern_size;
-    const uint64_t tail_bytes_count = size % pattern_size;
-    const uint64_t* const pattern_region_ptr = (uint64_t*)memory_region_ptr;
-
-    // Compare by pattern chunks
-    for (uint32_t i = 0u; i < pattern_chunk_count; i++)
-    {
-        if (pattern_region_ptr[i] != pattern)
-        {
-            *mismatch_offset_ptr = i * pattern_size;
-
-            return DML_COMPARE_STATUS_NE;
-        }
-    }
-
-    memory_region_ptr += size - tail_bytes_count;
-    pattern_t byte_pattern = pattern;
-
-    // Compare tail
-    for (uint32_t i = 0; i < tail_bytes_count; i++)
-    {
-        if (memory_region_ptr[i] != (uint8_t)byte_pattern)
-        {
-            *mismatch_offset_ptr = pattern_chunk_count * pattern_size + i;
-
-            return DML_COMPARE_STATUS_NE;
-        }
-
-        byte_pattern >>= OWN_BYTE_BIT_LENGTH;
-    }
-    return DML_COMPARE_STATUS_EQ;
-#endif
-}
-
-
diff --git a/sources/cores/src/avx512/dmlc_copy_8u_k0.cxx b/sources/cores/src/avx512/dmlc_copy_8u_k0.cxx
deleted file mode 100644
index 2d56260..0000000
--- a/sources/cores/src/avx512/dmlc_copy_8u_k0.cxx
+++ /dev/null
@@ -1,779 +0,0 @@
-/*
- * Copyright 2020-2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
- /**
-  * @brief Contain optimized AVX512 implementation of the follow functions:
-  *      - @ref dmlc_copy_8u()
-  *      - @ref dmlc_move_8u()
-  *      - @ref dmlc_dualcast_copy_8u()
-  *
-  * @date 5/26/2021
-  *
-  */
-
-#include "core_cpu_features.h"
-
-#if defined(_MSC_VER)
-#define OWN_ALIGNED_64_ARRAY(array_declaration) __declspec(align(64u)) array_declaration
-#elif defined(__GNUC__)
-#define OWN_ALIGNED_64_ARRAY(array_declaration) array_declaration __attribute__((aligned(64u)))
-#endif
-
-DML_CORE_OWN_INLINE(void, px_copy_8u_unrolled, (const uint8_t *src_ptr, uint8_t *dst_ptr, uint32_t length)) {
-    uint32_t align_dst = 64u - ((uint64_t)dst_ptr & 0x3F);
-    uint32_t align_src = 64u - ((uint64_t)src_ptr & 0x3F);
-
-    if (align_dst < 64u) {
-        if (length < align_dst) {
-            align_dst = length;
-            for (uint32_t i = 0u; i < align_dst; ++i) {
-                dst_ptr[i] = src_ptr[i];
-            }
-            return;
-        }
-        for (uint32_t i = 0u; i < align_dst; ++i) {
-            dst_ptr[i] = src_ptr[i];
-        }
-        length -= align_dst;
-        src_ptr += align_dst;
-        dst_ptr += align_dst;
-    }
-
-    if (align_dst == align_src) {
-        const uint64_t *src_64u_ptr = (uint64_t *)src_ptr;
-        uint64_t *dst_64u_ptr = (uint64_t *)dst_ptr;
-
-        uint32_t length_64u = length / sizeof(uint64_t);
-        uint32_t tail_start = length_64u * sizeof(uint64_t);
-
-        while (length_64u > 3u) {
-            dst_64u_ptr[0] = src_64u_ptr[0];
-            dst_64u_ptr[1] = src_64u_ptr[1];
-            dst_64u_ptr[2] = src_64u_ptr[2];
-            dst_64u_ptr[3] = src_64u_ptr[3];
-            dst_64u_ptr += 4u;
-            src_64u_ptr += 4u;
-            length_64u -= 4u;
-        }
-
-        for (uint32_t i = 0u; i < length_64u; ++i) {
-            dst_64u_ptr[i] = src_64u_ptr[i];
-        }
-
-        for (uint32_t i = tail_start; i < length; ++i) {
-            dst_ptr[i] = src_ptr[i];
-        }
-    }
-    else {
-        while (length > 7u) {
-            dst_ptr[0] = src_ptr[0];
-            dst_ptr[1] = src_ptr[1];
-            dst_ptr[2] = src_ptr[2];
-            dst_ptr[3] = src_ptr[3];
-            dst_ptr[4] = src_ptr[4];
-            dst_ptr[5] = src_ptr[5];
-            dst_ptr[6] = src_ptr[6];
-            dst_ptr[7] = src_ptr[7];
-
-            dst_ptr += 8u;
-            src_ptr += 8u;
-            length -= 8;
-        }
-
-        for (uint32_t i = 0u; i < length; ++i) {
-            dst_ptr[i] = src_ptr[i];
-        }
-    }
-}
-
-OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_2u[32])  = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
-OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_4u[32])  = {2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33};
-OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_6u[32])  = {3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34};
-OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_8u[32])  = {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35};
-OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_10u[32]) = {5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36};
-OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_12u[32]) = {6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37};
-OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_14u[32]) = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38};
-OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_16u[32]) = {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39};
-OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_18u[32]) = {9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40};
-OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_20u[32]) = {10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41};
-OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_22u[32]) = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42};
-OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_24u[32]) = {12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43};
-OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_26u[32]) = {13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44};
-OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_28u[32]) = {14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45};
-OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_30u[32]) = {15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46};
-OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_32u[32]) = {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47};
-OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_34u[32]) = {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48};
-OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_36u[32]) = {18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49};
-OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_38u[32]) = {19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50};
-OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_40u[32]) = {20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51};
-OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_42u[32]) = {21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52};
-OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_44u[32]) = {22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53};
-OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_46u[32]) = {23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54};
-OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_48u[32]) = {24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55};
-OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_50u[32]) = {25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56};
-OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_52u[32]) = {26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57};
-OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_54u[32]) = {27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58};
-OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_56u[32]) = {28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59};
-OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_58u[32]) = {29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60};
-OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_60u[32]) = {30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61};
-OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_62u[32]) = {31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62};
-
-static uint16_t *permutex_idx_pptr[31] = {
-    permutex_idx_2u, permutex_idx_4u, permutex_idx_6u, permutex_idx_8u, permutex_idx_10u, permutex_idx_12u, permutex_idx_14u, permutex_idx_16u,
-    permutex_idx_18u, permutex_idx_20u, permutex_idx_22u, permutex_idx_24u, permutex_idx_26u, permutex_idx_28u, permutex_idx_30u, permutex_idx_32u,
-    permutex_idx_34u, permutex_idx_36u, permutex_idx_38u, permutex_idx_40u, permutex_idx_42u, permutex_idx_44u, permutex_idx_46u, permutex_idx_48u,
-    permutex_idx_50u, permutex_idx_52u, permutex_idx_54u, permutex_idx_56u, permutex_idx_58u, permutex_idx_60u, permutex_idx_62u};
-
-DML_CORE_OWN_INLINE(__m512i, mm512_bsrli_epi128, (__m512i a, uint32_t shift))
-{
-    switch (shift)
-    {
-    case 1: {
-        return _mm512_bsrli_epi128(a, 1);
-    }
-    case 2: {
-        return _mm512_bsrli_epi128(a, 2);
-    }
-    case 3: {
-        return _mm512_bsrli_epi128(a, 3);
-    }
-    case 4: {
-        return _mm512_bsrli_epi128(a, 4);
-    }
-    case 5: {
-        return _mm512_bsrli_epi128(a, 5);
-    }
-    case 6: {
-        return _mm512_bsrli_epi128(a, 6);
-    }
-    case 7: {
-        return _mm512_bsrli_epi128(a, 7);
-    }
-    case 8: {
-        return _mm512_bsrli_epi128(a, 8);
-    }
-    case 9: {
-        return _mm512_bsrli_epi128(a, 9);
-    }
-    case 10: {
-        return _mm512_bsrli_epi128(a, 10);
-    }
-    case 11: {
-        return _mm512_bsrli_epi128(a, 11);
-    }
-    case 12: {
-        return _mm512_bsrli_epi128(a, 12);
-    }
-    case 13: {
-        return _mm512_bsrli_epi128(a, 13);
-    }
-    case 14: {
-        return _mm512_bsrli_epi128(a, 14);
-    }
-    case 15: {
-        return _mm512_bsrli_epi128(a, 15);
-    }
-    default:
-        return _mm512_setzero_si512();
-    }
-}
-
-DML_CORE_OWN_INLINE(__m512i, mm512_bslli_epi128, (__m512i a, uint32_t shift))
-{
-    switch (shift)
-    {
-    case 1: {
-        return _mm512_bslli_epi128(a, 1);
-    }
-    case 2: {
-        return _mm512_bslli_epi128(a, 2);
-    }
-    case 3: {
-        return _mm512_bslli_epi128(a, 3);
-    }
-    case 4: {
-        return _mm512_bslli_epi128(a, 4);
-    }
-    case 5: {
-        return _mm512_bslli_epi128(a, 5);
-    }
-    case 6: {
-        return _mm512_bslli_epi128(a, 6);
-    }
-    case 7: {
-        return _mm512_bslli_epi128(a, 7);
-    }
-    case 8: {
-        return _mm512_bslli_epi128(a, 8);
-    }
-    case 9: {
-        return _mm512_bslli_epi128(a, 9);
-    }
-    case 10: {
-        return _mm512_bslli_epi128(a, 10);
-    }
-    case 11: {
-        return _mm512_bslli_epi128(a, 11);
-    }
-    case 12: {
-        return _mm512_bslli_epi128(a, 12);
-    }
-    case 13: {
-        return _mm512_bslli_epi128(a, 13);
-    }
-    case 14: {
-        return _mm512_bslli_epi128(a, 14);
-    }
-    case 15: {
-        return _mm512_bslli_epi128(a, 15);
-    }
-    default:
-        return _mm512_setzero_si512();
-    }
-}
-
-DML_CORE_OWN_INLINE(__m512i, mm512_alignr_epi8, (__m512i a, __m512i b, uint32_t shift))
-{
-    switch (shift)
-    {
-    case 0: {
-        return b;
-    }
-    case 4: {
-        return _mm512_alignr_epi32(a, b, 1);
-    }
-    case 8: {
-        return _mm512_alignr_epi32(a, b, 2);
-    }
-    case 12: {
-        return _mm512_alignr_epi32(a, b, 3);
-    }
-    case 16: {
-        return _mm512_alignr_epi32(a, b, 4);
-    }
-    case 20: {
-        return _mm512_alignr_epi32(a, b, 5);
-    }
-    case 24: {
-        return _mm512_alignr_epi32(a, b, 6);
-    }
-    case 28: {
-        return _mm512_alignr_epi32(a, b, 7);
-    }
-    case 32: {
-        return _mm512_alignr_epi32(a, b, 8);
-    }
-    case 36: {
-        return _mm512_alignr_epi32(a, b, 9);
-    }
-    case 40: {
-        return _mm512_alignr_epi32(a, b, 10);
-    }
-    case 44: {
-        return _mm512_alignr_epi32(a, b, 11);
-    }
-    case 48: {
-        return _mm512_alignr_epi32(a, b, 12);
-    }
-    case 52: {
-        return _mm512_alignr_epi32(a, b, 13);
-    }
-    case 56: {
-        return _mm512_alignr_epi32(a, b, 14);
-    }
-    case 60: {
-        return _mm512_alignr_epi32(a, b, 15);
-    }
-    default:
-        return _mm512_setzero_si512();
-    }
-}
-
-DML_CORE_OWN_INLINE(void, copy_8u, (const uint8_t *src_ptr,
-    uint8_t *dst_ptr,
-    uint32_t length))
-{
-    if (length < 1024u) {
-        dmlc_own_px_copy_8u_unrolled(src_ptr, dst_ptr, length);
-        return;
-    }
-
-    if (length > 32000) {
-        int32_t size = 0u;
-        dmlc_own_get_max_cache_size(&size);
-        if ((size > 0) && (length > (uint32_t)size)) {
-            dmlc_own_px_copy_8u_unrolled(src_ptr, dst_ptr, length);
-            return;
-        }
-    }
-
-    uint32_t align_dst = 64u - ((uint64_t)dst_ptr & 0x3F);
-    uint32_t align_src = 64u - ((uint64_t)src_ptr & 0x3F);
-    if (align_dst < 64u)
-    {
-        if (length < 4000u) {
-            dmlc_own_px_copy_8u_unrolled(src_ptr, dst_ptr, length);
-            return;
-        }
-        dmlc_own_px_copy_8u_unrolled(src_ptr, dst_ptr, align_dst);
-        length -= align_dst;
-        dst_ptr += align_dst;
-        src_ptr += align_dst;
-        uint32_t length512u = length / sizeof(__m512i);
-        uint32_t tail = length % sizeof(__m512i);
-
-        if (0u != ((align_src - align_dst) & 15u))
-        {
-            uint32_t shift = (align_dst > align_src)? (align_dst - align_src) : (64u + align_dst - align_src);
-
-            if (0u == (shift & 3u)) {
-                src_ptr -= shift;
-                __mmask64 skip_mask = ~((1llu << shift) - 1u);
-                __m512i zmm0 = _mm512_maskz_loadu_epi8(skip_mask, (const __m512i *)src_ptr);
-                src_ptr += 64u;
-
-                while (length512u > 4u) {
-                    __m512i zmm1 = _mm512_load_si512((const __m512i *)src_ptr);
-                    __m512i zmm2 = dmlc_own_mm512_alignr_epi8(zmm1, zmm0, shift);
-                    __m512i zmm3 = _mm512_load_si512((const __m512i *)(src_ptr + 64u));
-                    __m512i zmm4 = dmlc_own_mm512_alignr_epi8(zmm3, zmm1, shift);
-                    __m512i zmm5 = _mm512_load_si512((const __m512i *)(src_ptr + 128u));
-                    __m512i zmm6 = dmlc_own_mm512_alignr_epi8(zmm5, zmm3, shift);
-                    zmm0 = _mm512_load_si512((const __m512i *)(src_ptr + 192u));
-                    __m512i zmm7 = dmlc_own_mm512_alignr_epi8(zmm0, zmm5, shift);
-                    _mm512_store_si512((__m512i *)dst_ptr, zmm2);
-                    _mm512_store_si512((__m512i *)(dst_ptr + 64u), zmm4);
-                    _mm512_store_si512((__m512i *)(dst_ptr + 128u), zmm6);
-                    _mm512_store_si512((__m512i *)(dst_ptr + 192u), zmm7);
-                    src_ptr += 256u;
-                    dst_ptr += 256u;
-                    length512u -= 4u;
-                }
-
-                src_ptr -= 64u - shift;
-            }
-            else if (0u == (shift & 1u)) {
-                src_ptr -= shift;
-                __mmask64 skip_mask = ~((1llu << shift) - 1u);
-                __m512i zmm0 = _mm512_maskz_loadu_epi8(skip_mask, (const __m512i *)src_ptr);
-                src_ptr += 64u;
-
-                __m512i permutex_idxmm = _mm512_load_si512(permutex_idx_pptr[(shift - 2) / 2]);
-
-                while (length512u > 4u) {
-                    __m512i zmm1 = _mm512_load_si512((const __m512i *)src_ptr);
-                    __m512i zmm2 = _mm512_permutex2var_epi16(zmm0, permutex_idxmm, zmm1);
-                    __m512i zmm3 = _mm512_load_si512((const __m512i *)(src_ptr + 64u));
-                    __m512i zmm4 = _mm512_permutex2var_epi16(zmm1, permutex_idxmm, zmm3);
-                    __m512i zmm5 = _mm512_load_si512((const __m512i *)(src_ptr + 128u));
-                    __m512i zmm6 = _mm512_permutex2var_epi16(zmm3, permutex_idxmm, zmm5);
-                    zmm0 = _mm512_load_si512((const __m512i *)(src_ptr + 192u));
-                    __m512i zmm7 = _mm512_permutex2var_epi16(zmm5, permutex_idxmm, zmm0);
-                    _mm512_store_si512((__m512i *)dst_ptr, zmm2);
-                    _mm512_store_si512((__m512i *)(dst_ptr + 64u), zmm4);
-                    _mm512_store_si512((__m512i *)(dst_ptr + 128u), zmm6);
-                    _mm512_store_si512((__m512i *)(dst_ptr + 192u), zmm7);
-                    src_ptr += 256u;
-                    dst_ptr += 256u;
-                    length512u -= 4u;
-                }
-
-                src_ptr -= 64u - shift;
-            }
-            else if (shift < 16u) {
-                if (length < 16000u) {
-                    dmlc_own_px_copy_8u_unrolled(src_ptr, dst_ptr, length);
-                    return;
-                }
-                src_ptr -= shift;
-                __mmask64 skip_mask = ~((1llu << shift) - 1u);
-                __m512i zmm0 = _mm512_maskz_loadu_epi8(skip_mask, (const __m512i *)src_ptr);
-                src_ptr += 64u;
-
-                __m512i permutex_idxmm_higher = _mm512_load_si512(permutex_idx_pptr[(shift - 1) / 2]);
-
-                while (length512u > 4u) {
-                    __m512i zmm1 = _mm512_load_si512((const __m512i *)src_ptr);
-                    __m512i zmm2_lower = dmlc_own_mm512_bsrli_epi128(zmm0, shift);
-                    __m512i zmm2_higher = _mm512_permutex2var_epi16(zmm0, permutex_idxmm_higher, zmm1);
-                    zmm2_higher = dmlc_own_mm512_bslli_epi128(zmm2_higher, 1u);
-                    zmm2_higher = _mm512_mask_mov_epi8(zmm2_higher, 0x0001000100010001, zmm2_lower);
-                    __m512i zmm3 = _mm512_load_si512((const __m512i *)(src_ptr + 64u));
-                    __m512i zmm4_lower = dmlc_own_mm512_bsrli_epi128(zmm1, shift);
-                    __m512i zmm4_higher = _mm512_permutex2var_epi16(zmm1, permutex_idxmm_higher, zmm3);
-                    zmm4_higher = dmlc_own_mm512_bslli_epi128(zmm4_higher, 1u);
-                    zmm4_higher = _mm512_mask_mov_epi8(zmm4_higher, 0x0001000100010001, zmm4_lower);
-                    __m512i zmm5 = _mm512_load_si512((const __m512i *)(src_ptr + 128u));
-                    __m512i zmm6_lower = dmlc_own_mm512_bsrli_epi128(zmm3, shift);
-                    __m512i zmm6_higher = _mm512_permutex2var_epi16(zmm3, permutex_idxmm_higher, zmm5);
-                    zmm6_higher = dmlc_own_mm512_bslli_epi128(zmm6_higher, 1u);
-                    zmm6_higher = _mm512_mask_mov_epi8(zmm6_higher, 0x0001000100010001, zmm6_lower);
-                    zmm0 = _mm512_load_si512((const __m512i *)(src_ptr + 192u));
-                    __m512i zmm7_lower = dmlc_own_mm512_bsrli_epi128(zmm5, shift);
-                    __m512i zmm7_higher = _mm512_permutex2var_epi16(zmm5, permutex_idxmm_higher, zmm0);
-                    zmm7_higher = dmlc_own_mm512_bslli_epi128(zmm7_higher, 1u);
-                    zmm7_higher = _mm512_mask_mov_epi8(zmm7_higher, 0x0001000100010001, zmm7_lower);
-                    _mm512_store_si512((__m512i *)dst_ptr, zmm2_higher);
-                    _mm512_store_si512((__m512i *)(dst_ptr + 64u), zmm4_higher);
-                    _mm512_store_si512((__m512i *)(dst_ptr + 128u), zmm6_higher);
-                    _mm512_store_si512((__m512i *)(dst_ptr + 192u), zmm7_higher);
-                    src_ptr += 256u;
-                    dst_ptr += 256u;
-                    length512u -= 4u;
-                }
-
-                src_ptr -= 64u - shift;
-            }
-            else if (shift > 48u) {
-                if (length < 16000u) {
-                    dmlc_own_px_copy_8u_unrolled(src_ptr, dst_ptr, length);
-                    return;
-                }
-                src_ptr -= shift;
-                __mmask64 skip_mask = ~((1llu << shift) - 1u);
-                __m512i zmm0 = _mm512_maskz_loadu_epi8(skip_mask, (const __m512i *)src_ptr);
-                src_ptr += 64u;
-
-                __m512i permutex_idxmm_lower = _mm512_load_si512(permutex_idx_pptr[(shift - 3) / 2]);
-                uint32_t shift_higher = 64u - shift;
-
-                while (length512u > 4u) {
-                    __m512i zmm1 = _mm512_load_si512((const __m512i *)src_ptr);
-                    __m512i zmm2_lower = _mm512_permutex2var_epi16(zmm0, permutex_idxmm_lower, zmm1);
-                    zmm2_lower = dmlc_own_mm512_bsrli_epi128(zmm2_lower, 1u);
-                    __m512i zmm2_higher = dmlc_own_mm512_bslli_epi128(zmm1, shift_higher);
-                    zmm2_higher = _mm512_mask_mov_epi8(zmm2_higher, 0x7FFF7FFF7FFF7FFF, zmm2_lower);
-                    __m512i zmm3 = _mm512_load_si512((const __m512i *)(src_ptr + 64u));
-                    __m512i zmm4_lower = _mm512_permutex2var_epi16(zmm1, permutex_idxmm_lower, zmm3);
-                    zmm4_lower = dmlc_own_mm512_bsrli_epi128(zmm4_lower, 1u);
-                    __m512i zmm4_higher = dmlc_own_mm512_bslli_epi128(zmm3, shift_higher);
-                    zmm4_higher = _mm512_mask_mov_epi8(zmm4_higher, 0x7FFF7FFF7FFF7FFF, zmm4_lower);
-                    __m512i zmm5 = _mm512_load_si512((const __m512i *)(src_ptr + 128u));
-                    __m512i zmm6_lower = _mm512_permutex2var_epi16(zmm3, permutex_idxmm_lower, zmm5);
-                    zmm6_lower = dmlc_own_mm512_bsrli_epi128(zmm6_lower, 1u);
-                    __m512i zmm6_higher = dmlc_own_mm512_bslli_epi128(zmm5, shift_higher);
-                    zmm6_higher = _mm512_mask_mov_epi8(zmm6_higher, 0x7FFF7FFF7FFF7FFF, zmm6_lower);
-                    zmm0 = _mm512_load_si512((const __m512i *)(src_ptr + 192u));
-                    __m512i zmm7_lower = _mm512_permutex2var_epi16(zmm5, permutex_idxmm_lower, zmm0);
-                    zmm7_lower = dmlc_own_mm512_bsrli_epi128(zmm7_lower, 1u);
-                    __m512i zmm7_higher = dmlc_own_mm512_bslli_epi128(zmm0, shift_higher);
-                    zmm7_higher = _mm512_mask_mov_epi8(zmm7_higher, 0x7FFF7FFF7FFF7FFF, zmm7_lower);
-                    _mm512_store_si512((__m512i *)dst_ptr, zmm2_higher);
-                    _mm512_store_si512((__m512i *)(dst_ptr + 64u), zmm4_higher);
-                    _mm512_store_si512((__m512i *)(dst_ptr + 128u), zmm6_higher);
-                    _mm512_store_si512((__m512i *)(dst_ptr + 192u), zmm7_higher);
-                    src_ptr += 256u;
-                    dst_ptr += 256u;
-                    length512u -= 4u;
-                }
-
-                src_ptr -= 64u - shift;
-            }
-            else {
-                dmlc_own_px_copy_8u_unrolled(src_ptr, dst_ptr, length);
-                return;
-            }
-        }
-        else
-        {
-            while (length512u > 3u) {
-                __m512i zmm0 = _mm512_loadu_si512((const __m512i *)src_ptr);
-                __m512i zmm1 = _mm512_loadu_si512((const __m512i *)(src_ptr + 64u));
-                __m512i zmm2 = _mm512_loadu_si512((const __m512i *)(src_ptr + 128u));
-                __m512i zmm3 = _mm512_loadu_si512((const __m512i *)(src_ptr + 192u));
-                _mm512_store_si512((__m512i *)dst_ptr, zmm0);
-                _mm512_store_si512((__m512i *)(dst_ptr + 64u), zmm1);
-                _mm512_store_si512((__m512i *)(dst_ptr + 128u), zmm2);
-                _mm512_store_si512((__m512i *)(dst_ptr + 192u), zmm3);
-                src_ptr += 256u;
-                dst_ptr += 256u;
-                length512u -= 4;
-            }
-        }
-        while (length512u > 0u) {
-            __m512i zmm0 = _mm512_loadu_si512((const __m512i *)src_ptr);
-            _mm512_store_si512((__m512i *)dst_ptr, zmm0);
-            src_ptr += 64u;
-            dst_ptr += 64u;
-            --length512u;
-        }
-
-        dmlc_own_px_copy_8u_unrolled(src_ptr, dst_ptr, tail);
-
-        return;
-    }
-
-    uint32_t length512u = length / sizeof(__m512i);
-    uint32_t tail = length % sizeof(__m512i);
-
-    if (align_src < 64u)
-    {
-        if (length < 32000u) {
-            dmlc_own_px_copy_8u_unrolled(src_ptr, dst_ptr, length);
-            return;
-        }
-        uint32_t shift = 64 - align_src;
-
-        if (0u == (shift & 3u)) {
-            src_ptr -= shift;
-            __mmask64 skip_mask = ~((1llu << shift) - 1u);
-            __m512i zmm0 = _mm512_maskz_loadu_epi8(skip_mask, (const __m512i *)src_ptr);
-            src_ptr += 64u;
-
-            while (length512u > 4u) {
-                __m512i zmm1 = _mm512_load_si512((const __m512i *)src_ptr);
-                __m512i zmm2 = dmlc_own_mm512_alignr_epi8(zmm1, zmm0, shift);
-                __m512i zmm3 = _mm512_load_si512((const __m512i *)(src_ptr + 64u));
-                __m512i zmm4 = dmlc_own_mm512_alignr_epi8(zmm3, zmm1, shift);
-                __m512i zmm5 = _mm512_load_si512((const __m512i *)(src_ptr + 128u));
-                __m512i zmm6 = dmlc_own_mm512_alignr_epi8(zmm5, zmm3, shift);
-                zmm0 = _mm512_load_si512((const __m512i *)(src_ptr + 192u));
-                __m512i zmm7 = dmlc_own_mm512_alignr_epi8(zmm0, zmm5, shift);
-                _mm512_store_si512((__m512i *)dst_ptr, zmm2);
-                _mm512_store_si512((__m512i *)(dst_ptr + 64u), zmm4);
-                _mm512_store_si512((__m512i *)(dst_ptr + 128u), zmm6);
-                _mm512_store_si512((__m512i *)(dst_ptr + 192u), zmm7);
-                src_ptr += 256u;
-                dst_ptr += 256u;
-                length512u -= 4u;
-            }
-
-            src_ptr -= 64u - shift;
-        }
-        else if (0u == (shift & 1u)) {
-            src_ptr -= shift;
-            __mmask64 skip_mask = ~((1llu << shift) - 1u);
-            __m512i zmm0 = _mm512_maskz_loadu_epi8(skip_mask, (const __m512i *)src_ptr);
-            src_ptr += 64u;
-
-            __m512i permutex_idxmm = _mm512_load_si512(permutex_idx_pptr[(shift - 2) / 2]);
-
-            while (length512u > 4u) {
-                __m512i zmm1 = _mm512_load_si512((const __m512i *)src_ptr);
-                __m512i zmm2 = _mm512_permutex2var_epi16(zmm0, permutex_idxmm, zmm1);
-                __m512i zmm3 = _mm512_load_si512((const __m512i *)(src_ptr + 64u));
-                __m512i zmm4 = _mm512_permutex2var_epi16(zmm1, permutex_idxmm, zmm3);
-                __m512i zmm5 = _mm512_load_si512((const __m512i *)(src_ptr + 128u));
-                __m512i zmm6 = _mm512_permutex2var_epi16(zmm3, permutex_idxmm, zmm5);
-                zmm0 = _mm512_load_si512((const __m512i *)(src_ptr + 192u));
-                __m512i zmm7 = _mm512_permutex2var_epi16(zmm5, permutex_idxmm, zmm0);
-                _mm512_store_si512((__m512i *)dst_ptr, zmm2);
-                _mm512_store_si512((__m512i *)(dst_ptr + 64u), zmm4);
-                _mm512_store_si512((__m512i *)(dst_ptr + 128u), zmm6);
-                _mm512_store_si512((__m512i *)(dst_ptr + 192u), zmm7);
-                src_ptr += 256u;
-                dst_ptr += 256u;
-                length512u -= 4u;
-            }
-
-            src_ptr -= 64u - shift;
-        }
-        else if (shift < 16u) {
-            src_ptr -= shift;
-            __mmask64 skip_mask = ~((1llu << shift) - 1u);
-            __m512i zmm0 = _mm512_maskz_loadu_epi8(skip_mask, (const __m512i *)src_ptr);
-            src_ptr += 64u;
-
-            __m512i permutex_idxmm_higher = _mm512_load_si512(permutex_idx_pptr[(shift - 1) / 2]);
-
-            while (length512u > 4u) {
-                __m512i zmm1 = _mm512_load_si512((const __m512i *)src_ptr);
-                __m512i zmm2_lower = dmlc_own_mm512_bsrli_epi128(zmm0, shift);
-                __m512i zmm2_higher = _mm512_permutex2var_epi16(zmm0, permutex_idxmm_higher, zmm1);
-                zmm2_higher = dmlc_own_mm512_bslli_epi128(zmm2_higher, 1u);
-                zmm2_higher = _mm512_mask_mov_epi8(zmm2_higher, 0x0001000100010001, zmm2_lower);
-                __m512i zmm3 = _mm512_load_si512((const __m512i *)(src_ptr + 64u));
-                __m512i zmm4_lower = dmlc_own_mm512_bsrli_epi128(zmm1, shift);
-                __m512i zmm4_higher = _mm512_permutex2var_epi16(zmm1, permutex_idxmm_higher, zmm3);
-                zmm4_higher = dmlc_own_mm512_bslli_epi128(zmm4_higher, 1u);
-                zmm4_higher = _mm512_mask_mov_epi8(zmm4_higher, 0x0001000100010001, zmm4_lower);
-                __m512i zmm5 = _mm512_load_si512((const __m512i *)(src_ptr + 128u));
-                __m512i zmm6_lower = dmlc_own_mm512_bsrli_epi128(zmm3, shift);
-                __m512i zmm6_higher = _mm512_permutex2var_epi16(zmm3, permutex_idxmm_higher, zmm5);
-                zmm6_higher = dmlc_own_mm512_bslli_epi128(zmm6_higher, 1u);
-                zmm6_higher = _mm512_mask_mov_epi8(zmm6_higher, 0x0001000100010001, zmm6_lower);
-                zmm0 = _mm512_load_si512((const __m512i *)(src_ptr + 192u));
-                __m512i zmm7_lower = dmlc_own_mm512_bsrli_epi128(zmm5, shift);
-                __m512i zmm7_higher = _mm512_permutex2var_epi16(zmm5, permutex_idxmm_higher, zmm0);
-                zmm7_higher = dmlc_own_mm512_bslli_epi128(zmm7_higher, 1u);
-                zmm7_higher = _mm512_mask_mov_epi8(zmm7_higher, 0x0001000100010001, zmm7_lower);
-                _mm512_store_si512((__m512i *)dst_ptr, zmm2_higher);
-                _mm512_store_si512((__m512i *)(dst_ptr + 64u), zmm4_higher);
-                _mm512_store_si512((__m512i *)(dst_ptr + 128u), zmm6_higher);
-                _mm512_store_si512((__m512i *)(dst_ptr + 192u), zmm7_higher);
-                src_ptr += 256u;
-                dst_ptr += 256u;
-                length512u -= 4u;
-            }
-
-            src_ptr -= 64u - shift;
-        }
-        else if (shift > 48u) {
-            src_ptr -= shift;
-            __mmask64 skip_mask = ~((1llu << shift) - 1u);
-            __m512i zmm0 = _mm512_maskz_loadu_epi8(skip_mask, (const __m512i *)src_ptr);
-            src_ptr += 64u;
-
-            __m512i permutex_idxmm_lower = _mm512_load_si512(permutex_idx_pptr[(shift - 3) / 2]);
-            uint32_t shift_higher = 64u - shift;
-
-            while (length512u > 4u) {
-                __m512i zmm1 = _mm512_load_si512((const __m512i *)src_ptr);
-                __m512i zmm2_lower = _mm512_permutex2var_epi16(zmm0, permutex_idxmm_lower, zmm1);
-                zmm2_lower = dmlc_own_mm512_bsrli_epi128(zmm2_lower, 1u);
-                __m512i zmm2_higher = dmlc_own_mm512_bslli_epi128(zmm1, shift_higher);
-                zmm2_higher = _mm512_mask_mov_epi8(zmm2_higher, 0x7FFF7FFF7FFF7FFF, zmm2_lower);
-                __m512i zmm3 = _mm512_load_si512((const __m512i *)(src_ptr + 64u));
-                __m512i zmm4_lower = _mm512_permutex2var_epi16(zmm1, permutex_idxmm_lower, zmm3);
-                zmm4_lower = dmlc_own_mm512_bsrli_epi128(zmm4_lower, 1u);
-                __m512i zmm4_higher = dmlc_own_mm512_bslli_epi128(zmm3, shift_higher);
-                zmm4_higher = _mm512_mask_mov_epi8(zmm4_higher, 0x7FFF7FFF7FFF7FFF, zmm4_lower);
-                __m512i zmm5 = _mm512_load_si512((const __m512i *)(src_ptr + 128u));
-                __m512i zmm6_lower = _mm512_permutex2var_epi16(zmm3, permutex_idxmm_lower, zmm5);
-                zmm6_lower = dmlc_own_mm512_bsrli_epi128(zmm6_lower, 1u);
-                __m512i zmm6_higher = dmlc_own_mm512_bslli_epi128(zmm5, shift_higher);
-                zmm6_higher = _mm512_mask_mov_epi8(zmm6_higher, 0x7FFF7FFF7FFF7FFF, zmm6_lower);
-                zmm0 = _mm512_load_si512((const __m512i *)(src_ptr + 192u));
-                __m512i zmm7_lower = _mm512_permutex2var_epi16(zmm5, permutex_idxmm_lower, zmm0);
-                zmm7_lower = dmlc_own_mm512_bsrli_epi128(zmm7_lower, 1u);
-                __m512i zmm7_higher = dmlc_own_mm512_bslli_epi128(zmm0, shift_higher);
-                zmm7_higher = _mm512_mask_mov_epi8(zmm7_higher, 0x7FFF7FFF7FFF7FFF, zmm7_lower);
-                _mm512_store_si512((__m512i *)dst_ptr, zmm2_higher);
-                _mm512_store_si512((__m512i *)(dst_ptr + 64u), zmm4_higher);
-                _mm512_store_si512((__m512i *)(dst_ptr + 128u), zmm6_higher);
-                _mm512_store_si512((__m512i *)(dst_ptr + 192u), zmm7_higher);
-                src_ptr += 256u;
-                dst_ptr += 256u;
-                length512u -= 4u;
-            }
-
-            src_ptr -= 64u - shift;
-        }
-        else {
-            dmlc_own_px_copy_8u_unrolled(src_ptr, dst_ptr, length);
-            return;
-        }
-    }
-    else
-    {
-        if ((12000 < length) && (length < 32000)) {
-            dmlc_own_px_copy_8u_unrolled(src_ptr, dst_ptr, length);
-            return;
-        }
-        while (length512u > 3u) {
-            __m512i zmm0 = _mm512_load_si512((const __m512i *)src_ptr);
-            __m512i zmm1 = _mm512_load_si512((const __m512i *)(src_ptr + 64u));
-            __m512i zmm2 = _mm512_load_si512((const __m512i *)(src_ptr + 128u));
-            __m512i zmm3 = _mm512_load_si512((const __m512i *)(src_ptr + 192u));
-            _mm512_store_si512((__m512i *)dst_ptr, zmm0);
-            _mm512_store_si512((__m512i *)(dst_ptr + 64u), zmm1);
-            _mm512_store_si512((__m512i *)(dst_ptr + 128u), zmm2);
-            _mm512_store_si512((__m512i *)(dst_ptr + 192u), zmm3);
-            src_ptr += 256u;
-            dst_ptr += 256u;
-            length512u -= 4;
-        }
-    }
-
-    while (length512u > 0u) {
-        __m512i zmm0 = _mm512_loadu_si512((const __m512i *)src_ptr);
-        _mm512_store_si512((__m512i *)dst_ptr, zmm0);
-        src_ptr += 64u;
-        dst_ptr += 64u;
-        --length512u;
-    }
-
-    dmlc_own_px_copy_8u_unrolled(src_ptr, dst_ptr, tail);
-}
-
-DML_CORE_OWN_INLINE(void, px_copy_8u_not_unrolled, (const uint8_t *src_ptr, uint8_t *dst_ptr, uint32_t length)) {
-    const uint64_t *src_64u_ptr = (uint64_t *)src_ptr;
-    uint64_t *dst_64u_ptr = (uint64_t *)dst_ptr;
-
-    uint32_t length_64u = length / sizeof(uint64_t);
-    uint32_t tail_start = length_64u * sizeof(uint64_t);
-
-    for (uint32_t i = 0u; i < length_64u; ++i) {
-        dst_64u_ptr[i] = src_64u_ptr[i];
-    }
-
-    for (uint32_t i = tail_start; i < length; ++i) {
-        dst_ptr[i] = src_ptr[i];
-    }
-}
-
-DML_CORE_OWN_INLINE(void, move_8u, (const uint8_t *const source_ptr,
-    uint8_t *const destination_ptr,
-    uint32_t       bytes_to_process))
-{
-    // Current position in source vector
-    const uint8_t *source_current_ptr = (const uint8_t *)(source_ptr + bytes_to_process);
-
-    // Current position in destination vector
-    uint8_t *destination_current_ptr = (uint8_t *)(destination_ptr + bytes_to_process);
-
-    while (0u < bytes_to_process)
-    {
-        // Shift position in destination vector
-        destination_current_ptr--;
-
-        // Shift position in source vector
-        source_current_ptr--;
-
-        // Copy 1 byte
-        (*destination_current_ptr) = (*source_current_ptr);
-
-        // Decrease bytes counter
-        bytes_to_process -= sizeof(uint8_t);
-    }
-}
-
-
-DML_CORE_OWN_INLINE(void, dualcast_copy_8u, (const uint8_t *const source_ptr,
-    uint8_t *const first_destination_ptr,
-    uint8_t *const second_destination_ptr,
-    uint32_t       bytes_to_process))
-{
-    // Current position in source vector 64u
-    const uint8_t *source_current_ptr = (const uint8_t *)source_ptr;
-
-    // Current position in first destination vector 64u
-    uint8_t *first_destination_current_ptr = (uint8_t *)first_destination_ptr;
-
-    // Current position in second destination vector 64u
-    uint8_t *second_destination_current_ptr = (uint8_t *)second_destination_ptr;
-
-    while (0 < bytes_to_process)
-    {
-        // Copy 1 byte to first destination vector
-        (*first_destination_current_ptr) = (*source_current_ptr);
-
-        // Copy 1 byte to second destination vector
-        (*second_destination_current_ptr) = (*source_current_ptr);
-
-        // Shift position in first destination vector
-        first_destination_current_ptr++;
-
-        // Shift position in second destination vector
-        second_destination_current_ptr++;
-
-        // Shift position in source vector
-        source_current_ptr++;
-
-        // Decrease bytes counter
-        bytes_to_process -= sizeof(uint8_t);
-    }
-}
diff --git a/sources/cores/src/avx512/dmlc_crc_16u_32u_k0.cxx b/sources/cores/src/avx512/dmlc_crc_16u_32u_k0.cxx
deleted file mode 100644
index 17cc6d8..0000000
--- a/sources/cores/src/avx512/dmlc_crc_16u_32u_k0.cxx
+++ /dev/null
@@ -1,618 +0,0 @@
-/*
- * Copyright 2020-2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-/**
- * @brief Contain implementation of the follow functions:
- *      - @ref dmlc_calculate_crc_32u()
- *
- * @date 7/20/2021
- *
- */
-
-#define DML_DISABLE_OPTIMIZATION_
-
-/**
-*  @todo
-*/
-static inline uint32_t getCRCSize(uint64_t poly)
-{
-    uint32_t    crcSize;
-    crcSize = 63 - (uint32_t)_lzcnt_u64(poly);
-    return crcSize;
-}
-
-/**
-*  @todo
-*/
-
-static void own_CRC_8u_opt_k0(const uint8_t* src_ptr, uint32_t init_crc, int len0, int crc_size, uint32_t* koeff_ptr, uint32_t* crc_ptr);
-
-/**
-*  @todo
-*/
-static void own_CRC_8u_k0(const uint8_t* src_ptr, uint32_t len, uint64_t poly, const uint8_t optPoly[128], uint32_t init, uint32_t* crc_ptr)
-{
-    int crc_size = getCRCSize(poly);
-    own_CRC_8u_opt_k0(src_ptr, init, len, crc_size, (uint32_t*)optPoly, crc_ptr);
-}
-
-/**
-*  @todo
-*/
-static void poly1x64_32_div(uint64_t poly, uint64_t* quit_ptr, uint32_t* tail_ptr)
-{
-    int i, j;
-    uint64_t tail = 0;
-    uint64_t quot = 0;
-    uint8_t  byte = 0x01;
-    for (i = 0; i < 9; i++) {
-        uint8_t bit;
-        uint64_t hbit;
-        for (j = 0; j < 8; j++) {
-            bit = (byte & 0x80) >> 7;
-            byte <<= 1;
-            hbit = tail & 0x80000000;
-            tail <<= 1;
-            tail |= bit;
-            quot <<= 1;
-            if (hbit) {
-                tail = tail ^ poly;
-                quot |= 1;
-            }
-            tail = tail & 0xffffffff;
-        }
-    }
-    *quit_ptr = quot;
-    *tail_ptr = (uint32_t)tail;
-    return;
-}
-
-/**
-*  @todo
-*/
-static inline void own_gen_crc_opt_poly_8u(uint64_t poly, uint8_t optPoly[128])
-{
-    uint64_t u;
-    uint32_t i, k, crc_size;
-    uint32_t t;
-
-    crc_size = getCRCSize(poly);
-    uint32_t* opt = (uint32_t*)optPoly;
-    *(uint64_t*)opt = poly;     /*copy poly*/
-    uint64_t poly32 = poly << (32 - crc_size);
-    poly1x64_32_div(poly32, &u, &t); /*for 1^64 and U*/
-    *(uint64_t*)(opt + 2) = u;
-    int bits[] = {64, 96, 160, 224, 288, 352, 416, 480, 544, 608, 672, 736, 800, 864, 928, 992, 1056, 2016, 2080};
-    uint32_t tail = 0;
-    uint32_t poly_32 = (uint32_t)poly;
-    int j;
-
-    k = bits[0] + 8;
-    tail = poly_32;
-    for (j = 40; (uint32_t)j < k; j++) {
-        uint32_t mask;
-        mask = (tail & 0x80000000) ? poly_32 : 0;
-        tail += tail;
-        tail ^= mask;
-    }
-    opt[4 + 0] = (uint32_t)tail;
-
-    for (i = 1; i < ((sizeof(bits) / sizeof(bits[0])) - 2); i++) {
-        k = bits[i] + 8;
-        for (; (uint32_t)j < k; j++) {
-            uint32_t mask;
-            mask = (tail & 0x80000000) ? poly_32 : 0;
-            tail += tail;
-            tail ^= mask;
-        }
-        opt[4 + i] = (uint32_t)tail;
-    }
-}
-
-DML_CORE_OWN_INLINE(dmlc_status_t, calculate_crc_32u, (const uint8_t* const memory_region_ptr,
-        uint32_t bytes_to_hash,
-        uint32_t* const crc_ptr,
-        uint32_t polynomial))
-{
-    uint64_t poly = (uint64_t)polynomial | ((uint64_t)1u << (uint64_t)32u);
-    uint8_t  opt_poly_ptr[128];
-
-    own_gen_crc_opt_poly_8u(poly, opt_poly_ptr);
-    own_CRC_8u_k0(memory_region_ptr, bytes_to_hash, poly, opt_poly_ptr, *crc_ptr, crc_ptr);
-    return DML_STATUS_OK;
-}
-
-#if defined(_MSC_VER)
-#pragma optimize("", off)
-#pragma optimize("O3", on)
-#endif
-
-/**
-*  @todo
-*/
-#define _MM_XOR_PS(A,B) _mm_castps_si128(_mm_xor_ps(_mm_castsi128_ps(A),_mm_castsi128_ps(B)))
-/**
-*  @todo
-*/
-#define arg1_low32 ecx
-/**
-*  @todo
-*/
-static void own_CRC_8u_opt_k0(const uint8_t* src_ptr, uint32_t init_crc, int len0, int crc_size, uint32_t* koeff, uint32_t* crc_ptr)
-{
-    uint64_t pshufb_shf_table[] = {
-            0x8786858483828100, 0x8f8e8d8c8b8a8988,
-            0x0706050403020100, 0x000e0d0c0b0a0908};
-
-    int len = len0;
-    uint8_t ttt[128];
-    uint8_t* r11 = ttt;
-    uint8_t* ptr;
-
-    __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm12, xmm13;
-#ifndef DML_DISABLE_OPTIMIZATION_
-    __m128i xmm11;
-#endif // DML_DISABLE_OPTIMIZATION_
-
-    int eax, ecx, r9;
-    __m128i ENDIA_SHUF_MASK = _mm_set_epi8(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
-    __m128i mask2 = _mm_set_epi64x(0x00000000FFFFFFFF, 0xFFFFFFFFFFFFFFFF);
-    __m128i mask1 = _mm_set_epi64x(0x8080808080808080, 0x8080808080808080);
-
-    uint64_t q = *(uint64_t*)(koeff + 0);
-    q <<= (32 - crc_size);
-    uint64_t u = *(uint64_t*)(koeff + 2);
-    uint64_t k_64 = ((uint64_t)koeff[4]) << 32;
-    uint64_t k_96 = ((uint64_t)koeff[5]) << 32;
-    uint64_t k_160 = ((uint64_t)koeff[6]) << 32;
-    uint64_t k_224 = ((uint64_t)koeff[7]) << 32;
-    uint64_t k_288 = ((uint64_t)koeff[8]) << 32;
-    uint64_t k_352 = ((uint64_t)koeff[9]) << 32;
-    uint64_t k_416 = ((uint64_t)koeff[10]) << 32;
-    uint64_t k_480 = ((uint64_t)koeff[11]) << 32;
-    uint64_t k_544 = ((uint64_t)koeff[12]) << 32;
-    uint64_t k_608 = ((uint64_t)koeff[13]) << 32;
-    uint64_t k_672 = ((uint64_t)koeff[14]) << 32;
-    uint64_t k_736 = ((uint64_t)koeff[15]) << 32;
-    uint64_t k_800 = ((uint64_t)koeff[16]) << 32;
-    uint64_t k_864 = ((uint64_t)koeff[17]) << 32;
-    uint64_t k_928 = ((uint64_t)koeff[18]) << 32;
-    uint64_t k_992 = ((uint64_t)koeff[19]) << 32;
-    uint64_t k_1056 = ((uint64_t)koeff[20]) << 32;
-
-
-    ecx = init_crc;
-    //crc16_t10dif_01:
-    ecx = ecx << (32 - crc_size);
-#ifndef DML_DISABLE_OPTIMIZATION_
-    if (len < 256) {
-        goto _less_than_256;
-    }
-#endif // DML_DISABLE_OPTIMIZATION_
-    //; load the initial crc value
-    xmm10 = _mm_cvtsi32_si128(arg1_low32);               //movd	xmm10, arg1_low32; initial crc
-    //; crc value does not need to be byte - reflected, but it needs to be moved to the high part of the register.
-    //; because data will be byte - reflected and will align with initial crc at correct place.
-    xmm10 = _mm_slli_si128(xmm10, 12);
-    //; receive the initial 128B data, xor the initial crc value
-    xmm0 = _mm_loadu_si128((const __m128i*)(src_ptr + 16 * 0));
-    xmm1 = _mm_loadu_si128((const __m128i*)(src_ptr + 16 * 1));
-    xmm2 = _mm_loadu_si128((const __m128i*)(src_ptr + 16 * 2));
-    xmm3 = _mm_loadu_si128((const __m128i*)(src_ptr + 16 * 3));
-    xmm4 = _mm_loadu_si128((const __m128i*)(src_ptr + 16 * 4));
-    xmm5 = _mm_loadu_si128((const __m128i*)(src_ptr + 16 * 5));
-    xmm6 = _mm_loadu_si128((const __m128i*)(src_ptr + 16 * 6));
-    xmm7 = _mm_loadu_si128((const __m128i*)(src_ptr + 16 * 7));
-    xmm0 = _mm_shuffle_epi8(xmm0, ENDIA_SHUF_MASK);
-    //; XOR the initial_crc value
-    xmm0 = _mm_xor_si128(xmm0, xmm10);
-    xmm1 = _mm_shuffle_epi8(xmm1, ENDIA_SHUF_MASK);
-    xmm2 = _mm_shuffle_epi8(xmm2, ENDIA_SHUF_MASK);
-    xmm3 = _mm_shuffle_epi8(xmm3, ENDIA_SHUF_MASK);
-    xmm4 = _mm_shuffle_epi8(xmm4, ENDIA_SHUF_MASK);
-    xmm5 = _mm_shuffle_epi8(xmm5, ENDIA_SHUF_MASK);
-    xmm6 = _mm_shuffle_epi8(xmm6, ENDIA_SHUF_MASK);
-    xmm7 = _mm_shuffle_epi8(xmm7, ENDIA_SHUF_MASK);
-    xmm10 = _mm_set_epi64x(k_1056/*rk4*/, k_992/*rk3*/);
-    //; imm value of pclmulqdq instruction will determine which constant to use
-    //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-    //; we subtract 256 instead of 128 to save one instruction from the loop
-    len -= 256;
-    //
-    //; at this section of the code, there is 128 * x + y(0 <= y < 128) bytes of buffer.The _fold_128_B_loop
-    //; loop will fold 128B at a time until we have 128 + y Bytes of buffer
-    //
-    //
-    //; fold 128B at a time.This section of the code folds 8 xmm registers in parallel
-    _fold_128_B_loop:
-    //
-    //; update the buffer pointer
-    src_ptr += 128;
-    xmm9 = _mm_loadu_si128((const __m128i*)(src_ptr + 16 * 0));
-    xmm12 = _mm_loadu_si128((const __m128i*)(src_ptr + 16 * 1));
-    xmm9 = _mm_shuffle_epi8(xmm9, ENDIA_SHUF_MASK);
-    xmm12 = _mm_shuffle_epi8(xmm12, ENDIA_SHUF_MASK);
-    xmm8 = xmm0;
-    xmm13 = xmm1;
-    xmm0 = _mm_clmulepi64_si128(xmm0, xmm10, 0x0);
-    xmm8 = _mm_clmulepi64_si128(xmm8, xmm10, 0x11);
-    xmm1 = _mm_clmulepi64_si128(xmm1, xmm10, 0x0);
-    xmm13 = _mm_clmulepi64_si128(xmm13, xmm10, 0x11);
-    xmm0 = _mm_xor_si128(xmm0, xmm9);
-    xmm0 = _MM_XOR_PS(xmm0, xmm8);
-    xmm1 = _mm_xor_si128(xmm1, xmm12);
-    xmm1 = _MM_XOR_PS(xmm1, xmm13);
-    xmm9 = _mm_loadu_si128((const __m128i*)(src_ptr + 16 * 2));
-    xmm12 = _mm_loadu_si128((const __m128i*)(src_ptr + 16 * 3));
-    xmm9 = _mm_shuffle_epi8(xmm9, ENDIA_SHUF_MASK);
-    xmm12 = _mm_shuffle_epi8(xmm12, ENDIA_SHUF_MASK);
-    xmm8 = xmm2;
-    xmm13 = xmm3;
-    xmm2 = _mm_clmulepi64_si128(xmm2, xmm10, 0x0);
-    xmm8 = _mm_clmulepi64_si128(xmm8, xmm10, 0x11);
-    xmm3 = _mm_clmulepi64_si128(xmm3, xmm10, 0x0);
-    xmm13 = _mm_clmulepi64_si128(xmm13, xmm10, 0x11);
-    xmm2 = _mm_xor_si128(xmm2, xmm9);
-    xmm2 = _MM_XOR_PS(xmm2, xmm8);
-    xmm3 = _mm_xor_si128(xmm3, xmm12);
-    xmm3 = _MM_XOR_PS(xmm3, xmm13);
-
-    xmm9 = _mm_loadu_si128((const __m128i*)(src_ptr + 16 * 4));
-    xmm12 = _mm_loadu_si128((const __m128i*)(src_ptr + 16 * 5));
-    xmm9 = _mm_shuffle_epi8(xmm9, ENDIA_SHUF_MASK);
-    xmm12 = _mm_shuffle_epi8(xmm12, ENDIA_SHUF_MASK);
-    xmm8 = xmm4;
-    xmm13 = xmm5;
-    xmm4 = _mm_clmulepi64_si128(xmm4, xmm10, 0x0);
-    xmm8 = _mm_clmulepi64_si128(xmm8, xmm10, 0x11);
-    xmm5 = _mm_clmulepi64_si128(xmm5, xmm10, 0x0);
-    xmm13 = _mm_clmulepi64_si128(xmm13, xmm10, 0x11);
-    xmm4 = _mm_xor_si128(xmm4, xmm9);
-    xmm4 = _MM_XOR_PS(xmm4, xmm8);
-    xmm5 = _mm_xor_si128(xmm5, xmm12);
-    xmm5 = _MM_XOR_PS(xmm5, xmm13);
-
-    xmm9 = _mm_loadu_si128((const __m128i*)(src_ptr + 16 * 6));
-    xmm12 = _mm_loadu_si128((const __m128i*)(src_ptr + 16 * 7));
-    xmm9 = _mm_shuffle_epi8(xmm9, ENDIA_SHUF_MASK);
-    xmm12 = _mm_shuffle_epi8(xmm12, ENDIA_SHUF_MASK);
-    xmm8 = xmm6;
-    xmm13 = xmm7;
-    xmm6 = _mm_clmulepi64_si128(xmm6, xmm10, 0x0);
-    xmm8 = _mm_clmulepi64_si128(xmm8, xmm10, 0x11);
-    xmm7 = _mm_clmulepi64_si128(xmm7, xmm10, 0x0);
-    xmm13 = _mm_clmulepi64_si128(xmm13, xmm10, 0x11);
-    xmm6 = _mm_xor_si128(xmm6, xmm9);
-    xmm6 = _MM_XOR_PS(xmm6, xmm8);
-    xmm7 = _mm_xor_si128(xmm7, xmm12);
-    xmm7 = _MM_XOR_PS(xmm7, xmm13);
-
-    len -= 128;
-    //; check if there is another 128B in the buffer to be able to fold
-    if (len >= 0) goto _fold_128_B_loop;                 //jge	_fold_128_B_loop
-    //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-    src_ptr += 128;
-    //; at this point, the buffer pointer is pointing at the last y Bytes of the buffer
-    //; fold the 8 xmm registers to 1 xmm register with different constants
-    //
-    xmm10 = _mm_set_epi64x(k_928/*rk10*/, k_864/*rk9*/);
-    xmm8 = xmm0;
-    xmm0 = _mm_clmulepi64_si128(xmm0, xmm10, 0x11);
-    xmm8 = _mm_clmulepi64_si128(xmm8, xmm10, 0x0);
-    xmm7 = _mm_xor_si128(xmm7, xmm8);
-    xmm7 = _MM_XOR_PS(xmm7, xmm0);
-
-    xmm10 = _mm_set_epi64x(k_800/*rk12*/, k_736/*rk11*/);
-    xmm8 = xmm1;
-    xmm1 = _mm_clmulepi64_si128(xmm1, xmm10, 0x11);
-    xmm8 = _mm_clmulepi64_si128(xmm8, xmm10, 0x0);
-    xmm7 = _mm_xor_si128(xmm7, xmm8);
-    xmm7 = _MM_XOR_PS(xmm7, xmm1);
-    xmm10 = _mm_set_epi64x(k_672/*rk14*/, k_608/*rk13*/);
-    xmm8 = xmm2;
-    xmm2 = _mm_clmulepi64_si128(xmm2, xmm10, 0x11);
-    xmm8 = _mm_clmulepi64_si128(xmm8, xmm10, 0x0);
-    xmm7 = _mm_xor_si128(xmm7, xmm8);
-    xmm7 = _mm_xor_si128(xmm7, xmm2);
-    xmm10 = _mm_set_epi64x(k_544/*rk16*/, k_480/*rk15*/);
-    xmm8 = xmm3;
-    xmm3 = _mm_clmulepi64_si128(xmm3, xmm10, 0x11);
-    xmm8 = _mm_clmulepi64_si128(xmm8, xmm10, 0x0);
-    xmm7 = _mm_xor_si128(xmm7, xmm8);
-    xmm7 = _MM_XOR_PS(xmm7, xmm3);
-    xmm10 = _mm_set_epi64x(k_416/*rk18*/, k_352/*rk17*/);
-    xmm8 = xmm4;
-    xmm4 = _mm_clmulepi64_si128(xmm4, xmm10, 0x11);
-    xmm8 = _mm_clmulepi64_si128(xmm8, xmm10, 0x0);
-    xmm7 = _mm_xor_si128(xmm7, xmm8);
-    xmm7 = _mm_xor_si128(xmm7, xmm4);
-    xmm10 = _mm_set_epi64x(k_288/*rk20*/, k_224/*rk19*/);
-    xmm8 = xmm5;
-    xmm5 = _mm_clmulepi64_si128(xmm5, xmm10, 0x11);
-    xmm8 = _mm_clmulepi64_si128(xmm8, xmm10, 0x0);
-    xmm7 = _mm_xor_si128(xmm7, xmm8);
-    xmm7 = _MM_XOR_PS(xmm7, xmm5);
-    xmm10 = _mm_set_epi64x(k_160/*rk2*/, k_96/*rk1*/);
-    //        ; imm value of pclmulqdq instruction will determine which constant to use
-    xmm8 = xmm6;
-    xmm6 = _mm_clmulepi64_si128(xmm6, xmm10, 0x11);
-    xmm8 = _mm_clmulepi64_si128(xmm8, xmm10, 0x0);
-    xmm7 = _mm_xor_si128(xmm7, xmm8);
-    xmm7 = _mm_xor_si128(xmm7, xmm6);
-    //
-    //        ; instead of 128, we add 112 to the loop counter to save 1 instruction from the loop
-    //        ; instead of a cmp instruction, we use the negative flag with the jl instruction
-    len += (128 - 16);
-    if (len < 0) goto _final_reduction_for_128;          //        jl	_final_reduction_for_128
-    //        ; now we have 16 + y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory
-    //        ; we can fold 16 bytes at a time if y >= 16
-    //        ; continue folding 16B at a time
-    _16B_reduction_loop:
-    xmm8 = xmm7;
-    xmm7 = _mm_clmulepi64_si128(xmm7, xmm10, 0x11);
-    xmm8 = _mm_clmulepi64_si128(xmm8, xmm10, 0x0);
-    xmm7 = _mm_xor_si128(xmm7, xmm8);
-    xmm0 = _mm_loadu_si128((const __m128i*)src_ptr);
-    xmm0 = _mm_shuffle_epi8(xmm0, ENDIA_SHUF_MASK);
-    xmm7 = _mm_xor_si128(xmm7, xmm0);
-    src_ptr += 16;
-    len -= 16;
-    //        ; instead of a cmp instruction, we utilize the flags with the jge instruction
-    //        ; equivalent of : cmp arg3, 16 - 16
-    //        ; check if there is any more 16B in the buffer to be able to fold
-
-    if (len >= 0) goto _16B_reduction_loop;              //        jge	_16B_reduction_loop
-    //        ; now we have 16 + z bytes left to reduce, where 0 <= z < 16.
-    //        ; first, we reduce the data in the xmm7 register
-    _final_reduction_for_128:
-    //    ; check if any more data to fold.If not, compute the CRC of the final 128 bits
-    len += 16;
-    if (len == 0) goto _128_done;                        //        je	_128_done
-    //        ; here we are getting data that is less than 16 bytes.
-    //        ; since we know that there was data before the pointer, we can offset the input pointer before the actual point, to receive exactly 16 bytes.
-    //        ; after that the registers need to be adjusted.
-#ifndef DML_DISABLE_OPTIMIZATION_
-    _get_last_two_xmms:
-#endif // DML_DISABLE_OPTIMIZATION_
-    xmm2 = xmm7;
-    xmm1 = _mm_loadu_si128((const __m128i*)(src_ptr - 16 + len));
-    xmm1 = _mm_shuffle_epi8(xmm1, ENDIA_SHUF_MASK);
-    //        ; get rid of the extra data that was loaded before
-    //        ; load the shift constant
-    //        lea	rax, [pshufb_shf_table + 16]
-    //        sub	rax, arg3
-    ptr = (uint8_t*)pshufb_shf_table + 16 - len;
-    xmm0 = _mm_loadu_si128((const __m128i*)ptr);
-    //
-    //        ; shift xmm2 to the left by arg3 bytes
-    xmm2 = _mm_shuffle_epi8(xmm2, xmm0);
-    xmm0 = _mm_xor_si128(xmm0, mask1);
-    xmm7 = _mm_shuffle_epi8(xmm7, xmm0);
-    xmm1 = _mm_blendv_epi8(xmm1, xmm2, xmm0);
-    //        ; fold 16 Bytes
-    xmm2 = xmm1;
-    xmm8 = xmm7;
-    xmm7 = _mm_clmulepi64_si128(xmm7, xmm10, 0x11);
-    xmm8 = _mm_clmulepi64_si128(xmm8, xmm10, 0x0);
-    xmm7 = _mm_xor_si128(xmm7, xmm8);
-    xmm7 = _mm_xor_si128(xmm7, xmm2);
-    _128_done:
-    //    ; compute crc of a 128 - bit value
-    xmm10 = _mm_set_epi64x(k_64/*rk6*/, k_96/*rk5*/);
-    xmm0 = xmm7;
-    //        ; 64b fold
-    xmm7 = _mm_clmulepi64_si128(xmm7, xmm10, 0x1);
-    xmm0 = _mm_slli_si128(xmm0, 8);
-    xmm7 = _mm_xor_si128(xmm7, xmm0);
-    //        ; 32b fold
-    xmm0 = xmm7;
-    xmm0 = _mm_and_si128(xmm0, mask2);
-    xmm7 = _mm_srli_si128(xmm7, 12);
-    xmm7 = _mm_clmulepi64_si128(xmm7, xmm10, 0x10);
-    xmm7 = _mm_xor_si128(xmm7, xmm0);
-    //        ; barrett reduction
-    _barrett:
-    xmm10 = _mm_set_epi64x(q/*rk8*/, u/*rk7*/);
-    xmm0 = xmm7;
-    xmm7 = _mm_clmulepi64_si128(xmm7, xmm10, 0x01);
-    xmm7 = _mm_slli_si128(xmm7, 4);
-    xmm7 = _mm_clmulepi64_si128(xmm7, xmm10, 0x11);
-
-    xmm7 = _mm_slli_si128(xmm7, 4);
-    xmm7 = _mm_xor_si128(xmm7, xmm0);
-    eax = _mm_extract_epi32(xmm7, 1);
-#ifndef DML_DISABLE_OPTIMIZATION_
-    _cleanup:
-#endif // DML_DISABLE_OPTIMIZATION_
-    //    ; scale the result back to 16 bits
-    eax = ((uint32_t)eax) >> (32 - crc_size);
-    *crc_ptr = eax;
-    return;                                             //        ret
-
-    //align 16
-#ifndef DML_DISABLE_OPTIMIZATION_
-    _less_than_256:
-    //
-    //; check if there is enough buffer to be able to fold 16B at a time
-    //cmp	arg3, 32
-    //jl	_less_than_32
-    if (len < 32) {
-        goto _less_than_32;
-    }
-    xmm11 = ENDIA_SHUF_MASK;
-    //; if there is, load the constants
-    xmm10 = _mm_set_epi64x(k_160/*rk2*/, k_96/*rk1*/);
-    xmm0 = _mm_cvtsi32_si128(arg1_low32);
-    xmm0 = _mm_slli_si128(xmm0, 12);
-    xmm7 = _mm_loadu_si128((const __m128i*)src_ptr);
-    xmm7 = _mm_shuffle_epi8(xmm7, ENDIA_SHUF_MASK);
-    xmm7 = _mm_xor_si128(xmm7, xmm0);
-    //
-    //; update the buffer pointer
-    src_ptr += 16;
-    //
-    //; update the counter.subtract 32 instead of 16 to save one instruction from the loop
-    len -= 32;
-    goto _16B_reduction_loop;                            //jmp	_16B_reduction_loop
-
-    //align 16
-    _less_than_32:
-    //; mov initial crc to the return value. this is necessary for zero - length buffers.
-    eax = arg1_low32;                                    //mov	eax, arg1_low32
-    //test	arg3, arg3
-    if (len == 0) goto _cleanup;                         //je	_cleanup
-    //
-    xmm11 = ENDIA_SHUF_MASK;
-
-    xmm0 = _mm_cvtsi32_si128(arg1_low32);
-    xmm0 = _mm_slli_si128(xmm0, 12);
-    //cmp	arg3, 16
-    if (len == 16) goto _exact_16_left;                  //je	_exact_16_left
-    if (len < 16) goto _less_than_16_left;               //jl	_less_than_16_left
-
-    xmm7 = _mm_loadu_si128((const __m128i*)src_ptr);
-    xmm7 = _mm_shuffle_epi8(xmm7, ENDIA_SHUF_MASK);
-    xmm7 = _mm_xor_si128(xmm7, xmm0);
-    src_ptr += 16;
-    len -= 16;
-    xmm10 = _mm_set_epi64x(k_160/*rk2*/, k_96/*rk1*/);
-    goto _get_last_two_xmms;                             //jmp	_get_last_two_xmms
-    //align 16
-    _less_than_16_left:
-#endif // DML_DISABLE_OPTIMIZATION_
-    //; use stack space to load data less than 16 bytes, zero - out the 16B in memory first.
-    //
-    xmm1 = _mm_setzero_si128();
-
-    _mm_storeu_si128((__m128i*)r11, xmm1);
-    //cmp	arg3, 4
-    if (len < 4) goto _only_less_than_4;                 //jl	_only_less_than_4
-    //;	backup the counter value
-    r9 = len;
-    //cmp	arg3, 8
-    if (len < 8) goto _less_than_8_left;                 //jl	_less_than_8_left
-    //; load 8 Bytes
-    *(int64_t*)r11 = *(int64_t*)src_ptr;
-    r11 += 8;
-    len -= 8;
-    src_ptr += 8;
-    _less_than_8_left:
-    //cmp	arg3, 4
-    if (len < 4) goto _less_than_4_left;                 //jl	_less_than_4_left
-    //; load 4 Bytes
-    *(int*)r11 = *(int*)src_ptr;
-    r11 += 4;
-    len -= 4;
-    src_ptr += 4;                                        //add	arg2, 4
-    _less_than_4_left:
-    //
-    //cmp	arg3, 2
-    if (len < 2) goto _less_than_2_left;                 //jl	_less_than_2_left
-    //
-    //; load 2 Bytes
-    *(short*)r11 = *(short*)src_ptr;
-    r11 += 2;
-    len -= 2;
-    src_ptr += 2;
-    _less_than_2_left:
-    //cmp     arg3, 1
-    if (len < 1) goto _zero_left;                        //jl      _zero_left
-    //; load 1 Byte
-    *r11 = *src_ptr;
-    _zero_left:
-    xmm7 = _mm_loadu_si128((const __m128i*)ttt);
-    xmm7 = _mm_shuffle_epi8(xmm7, ENDIA_SHUF_MASK);
-    xmm7 = _mm_xor_si128(xmm7, xmm0);
-
-    ptr = (uint8_t*)pshufb_shf_table + 16 - r9;
-    xmm0 = _mm_loadu_si128((const __m128i*)ptr);
-    xmm0 = _mm_xor_si128(xmm0, mask1);
-    //
-    xmm7 = _mm_shuffle_epi8(xmm7, xmm0);
-    goto _128_done;                                      //jmp	_128_done
-    //align 16
-#ifndef DML_DISABLE_OPTIMIZATION_
-    _exact_16_left:
-#endif // DML_DISABLE_OPTIMIZATION_
-    xmm7 = _mm_loadu_si128((const __m128i*)src_ptr);
-    xmm7 = _mm_shuffle_epi8(xmm7, ENDIA_SHUF_MASK);
-    xmm7 = _mm_xor_si128(xmm7, xmm0);
-
-    goto _128_done;                                      //jmp	_128_done
-    _only_less_than_4:
-    //cmp	arg3, 3
-    if (len < 3) goto _only_less_than_3;                 //jl	_only_less_than_3
-    //; load 3 Bytes
-    r11[0] = src_ptr[0];
-
-    r11[1] = src_ptr[1];
-
-    r11[2] = src_ptr[2];
-    xmm7 = _mm_loadu_si128((const __m128i*)r11);
-    xmm7 = _mm_shuffle_epi8(xmm7, ENDIA_SHUF_MASK);
-    xmm7 = _mm_xor_si128(xmm7, xmm0);
-    xmm7 = _mm_srli_si128(xmm7, 5);
-    goto _barrett;                                       //jmp	_barrett
-    _only_less_than_3:
-    //cmp	arg3, 2
-    if (len < 2) goto _only_less_than_2;                 //jl	_only_less_than_2
-    //; load 2 Bytes
-    r11[0] = src_ptr[0];
-
-    r11[1] = src_ptr[1];
-    xmm7 = _mm_loadu_si128((const __m128i*)r11);
-    xmm7 = _mm_shuffle_epi8(xmm7, ENDIA_SHUF_MASK);
-    xmm7 = _mm_xor_si128(xmm7, xmm0);
-    xmm7 = _mm_srli_si128(xmm7, 6);
-    goto _barrett;                                       //jmp	_barrett
-    _only_less_than_2:
-    //
-    //; load 1 Byte
-    eax = src_ptr[0];
-    r11[0] = eax;
-
-    xmm7 = _mm_loadu_si128((const __m128i*)r11);
-    xmm7 = _mm_shuffle_epi8(xmm7, ENDIA_SHUF_MASK);
-    xmm7 = _mm_xor_si128(xmm7, xmm0);
-
-    xmm7 = _mm_srli_si128(xmm7, 7);
-
-    goto _barrett;                                       //jmp	_barrett
-}
-
-/**
-*  @todo
-*/
-dmlc_status_t dmlc_own_calculate_crc_32u_noopt(const uint8_t* const memory_region_ptr,
-                                               uint32_t bytes_to_hash,
-                                               uint32_t* const crc_ptr,
-                                               uint32_t polynomial)
-{
-    // Current crc value
-    uint32_t current_crc = (*crc_ptr);
-
-    // Through all bytes
-    for (uint32_t i = 0u; i < bytes_to_hash; ++i)
-    {
-        // Calculate crc for current byte
-        current_crc = dmlc_own_crc_byte_32u(current_crc, memory_region_ptr[i], polynomial);
-    }
-
-    // Store result
-    (*crc_ptr) = current_crc;
-
-    return DML_STATUS_OK;
-}
\ No newline at end of file
diff --git a/sources/cores/src/avx512/dmlc_fill_8u.cxx b/sources/cores/src/avx512/dmlc_fill_8u.cxx
deleted file mode 100644
index 24de573..0000000
--- a/sources/cores/src/avx512/dmlc_fill_8u.cxx
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright 2020-2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-/**
- * @brief Contain optimized AVX512 implementation of the follow functions:
- *      - @ref dmlc_fill_with_pattern_8u()
- *
- * @date 10/29/2020
- *
- */
-
-#include "core_memory.h"
-#include "own_dmlc_definitions.h"
-
-DML_CORE_OWN_INLINE(void, opt_fill_with_pattern_8u_big, ( uint64_t        pattern,
-                                                          uint8_t  *const memory_region_ptr,
-                                                          uint32_t        bytes_to_process ) );
-
-DML_CORE_OWN_INLINE(void, opt_fill_with_pattern_8u_small, ( uint64_t        pattern,
-                                                           uint8_t  *const memory_region_ptr,
-                                                           uint32_t        bytes_to_process ) );
-
-DML_CORE_OWN_INLINE(dmlc_status_t, opt_fill_with_pattern_8u, ( uint64_t        pattern,
-                                                               uint8_t  *const memory_region_ptr,
-                                                               uint32_t        bytes_to_process ) )
-{
-    DML_CORE_CHECK_NULL_POINTER(memory_region_ptr);
-
-    if (bytes_to_process < 64u)
-    {
-        dmlc_own_opt_fill_with_pattern_8u_small(pattern, memory_region_ptr, bytes_to_process);
-    }
-    else
-    {
-        dmlc_own_opt_fill_with_pattern_8u_big(pattern, memory_region_ptr, bytes_to_process);
-    }
-
-    return DML_STATUS_OK;
-}
-
-DML_CORE_OWN_INLINE(void, opt_fill_with_pattern_8u_big, ( uint64_t        pattern,
-                                                          uint8_t  *const memory_region_ptr,
-                                                          uint32_t        bytes_to_process ) )
-{
-    // Check pointer alignment
-    const size_t unaligned_bytes = (uintptr_t)memory_region_ptr % 64u;
-    const size_t unaligned_part_size = (0u == unaligned_bytes) ? 0u : 64u - unaligned_bytes;
-
-    __m512i zmm_pattern = _mm512_set1_epi64(pattern);
-
-    // Fill unaligned part of destination
-    if (0u != unaligned_part_size)
-    {
-        unsigned long long mask = 0xFFFFFFFFFFFFFFFFu >> unaligned_bytes;
-        __mmask64 mmask = _load_mask64(&mask);
-
-        _mm512_mask_storeu_epi8(memory_region_ptr, mmask, zmm_pattern);
-
-        pattern = (pattern << (unaligned_bytes * 8u)) | (pattern >> (64u - (unaligned_bytes * 8u)));
-        zmm_pattern = _mm512_set1_epi64(pattern);
-    }
-
-    // Fill aligned part of destination
-    const size_t aligned_part_size = bytes_to_process - unaligned_part_size;
-    const size_t head_size = aligned_part_size / sizeof(__m512i);
-    const size_t tail_size = aligned_part_size % sizeof(__m512i);
-
-    uint8_t *const aligned_memory_region_ptr = memory_region_ptr + unaligned_part_size;
-    __m512i *head_ptr = (__m512i *)aligned_memory_region_ptr;
-    __m512i *tail_ptr = (__m512i *)head_ptr + head_size;
-
-    // Fill head part
-    if (0u != head_size)
-    {
-        while (head_ptr != tail_ptr)
-        {
-            _mm512_store_si512(head_ptr, zmm_pattern);
-            head_ptr++;
-        }
-    }
-
-    // Fill tail part
-    if (0u != tail_size)
-    {
-        unsigned long long mask = ~(0xFFFFFFFFFFFFFFFFu << tail_size);
-        __mmask64 mmask = _load_mask64(&mask);
-        _mm512_mask_storeu_epi8(tail_ptr, mmask, zmm_pattern);
-    }
-
-}
-
-DML_CORE_OWN_INLINE(void, opt_fill_with_pattern_8u_small, ( uint64_t        pattern,
-                                                            uint8_t  *const memory_region_ptr,
-                                                            uint32_t        bytes_to_process ) )
-{
-    // Fill 1-63 bytes with 64bit pattern via two _mm256_mask_storeu_epi8 calls.
-
-    const size_t tail_size = bytes_to_process % sizeof(__m512i);
-
-    __m256i ymm1_pattern = _mm256_set1_epi64x(pattern);
-
-    unsigned long long mask_value = ~(0xFFFFFFFFFFFFFFFFu << tail_size);
-    __mmask32 mask_first = _load_mask32((uint32_t *)&mask_value);
-    _mm256_mask_storeu_epi8(memory_region_ptr, mask_first, ymm1_pattern);
-
-    __mmask32 mask_second = _load_mask32((uint32_t *)&mask_value + 1u);
-    _mm256_mask_storeu_epi8(memory_region_ptr + 32u, mask_second, ymm1_pattern);
-}
diff --git a/sources/cores/src/default/dmlc_compare_8u_px.cxx b/sources/cores/src/default/dmlc_compare_8u_px.cxx
deleted file mode 100644
index 9c93445..0000000
--- a/sources/cores/src/default/dmlc_compare_8u_px.cxx
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright 2020-2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
- /**
-  * @brief Contain default implementation of the follow functions:
-  *      - @ref dmlc_own_compare_8u()
-  *      - @ref dmlc_own_compare_with_pattern_8u()
-  *
-  * @date 07/06/2021
-  *
-  */
-
-DML_CORE_OWN_INLINE(dmlc_status_t, compare_8u, (const uint8_t* first_vector_ptr,
-    const uint8_t* second_vector_ptr,
-    const uint32_t size,
-    uint32_t* const mismatch_offset_ptr))
-{
-    for (uint32_t i = 0u; i < size; i++)
-    {
-        if (first_vector_ptr[i] != second_vector_ptr[i])
-        {
-            *mismatch_offset_ptr = i;
-
-            return DML_COMPARE_STATUS_NE;
-        }
-    }
-
-    return DML_COMPARE_STATUS_EQ;
-}
-
-DML_CORE_OWN_INLINE(dmlc_status_t, compare_with_pattern_8u, (const uint8_t* memory_region_ptr,
-    const pattern_t pattern,
-    const uint32_t size,
-    uint32_t* const mismatch_offset_ptr))
-{
-    //Constants
-    const uint32_t pattern_size = sizeof(pattern_t);
-    const uint32_t pattern_chunk_count = size / pattern_size;
-    const uint64_t tail_bytes_count = size % pattern_size;
-    const uint64_t* const pattern_region_ptr = (uint64_t*)memory_region_ptr;
-
-    // Compare by pattern chunks
-    for (uint32_t i = 0u; i < pattern_chunk_count; i++)
-    {
-        if (pattern_region_ptr[i] != pattern)
-        {
-            *mismatch_offset_ptr = i * pattern_size;
-
-            return DML_COMPARE_STATUS_NE;
-        }
-    }
-
-    memory_region_ptr += size - tail_bytes_count;
-    pattern_t byte_pattern = pattern;
-
-    // Compare tail
-    for (uint32_t i = 0; i < tail_bytes_count; i++)
-    {
-        if (memory_region_ptr[i] != (uint8_t)byte_pattern)
-        {
-            *mismatch_offset_ptr = pattern_chunk_count * pattern_size + i;
-
-            return DML_COMPARE_STATUS_NE;
-        }
-
-        byte_pattern >>= OWN_BYTE_BIT_LENGTH;
-    }
-
-    return DML_COMPARE_STATUS_EQ;
-}
diff --git a/sources/cores/src/default/dmlc_copy_8u_px.cxx b/sources/cores/src/default/dmlc_copy_8u_px.cxx
deleted file mode 100644
index 3b84a9e..0000000
--- a/sources/cores/src/default/dmlc_copy_8u_px.cxx
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright 2020-2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
- /**
-  * @brief Contain default implementation of the follow functions:
-  *      - @ref dmlc_own_copy_8u()
-  *      - @ref dmlc_own_move_8u()
-  *      - @ref dmlc_own_dualcast_copy_8u()
-  *
-  * @date 5/26/2021
-  *
-  */
-
-DML_CORE_OWN_INLINE(void, copy_8u, (const uint8_t *const source_ptr,
-    uint8_t *const destination_ptr,
-    uint32_t       bytes_to_process))
-{
-    // Current position in source vector
-    const uint8_t *source_current_ptr = (const uint8_t *)source_ptr;
-
-    // Current position in destination vector
-    uint8_t *destination_current_ptr = (uint8_t *)destination_ptr;
-
-    while (0u < bytes_to_process)
-    {
-        // Copy 1 byte
-        *destination_current_ptr = *source_current_ptr;
-
-        // Shift position in destination vector
-        destination_current_ptr++;
-
-        // Shift position in source vector
-        source_current_ptr++;
-
-        // Decrease bytes counter
-        bytes_to_process -= sizeof(uint8_t);
-    }
-}
-
-
-DML_CORE_OWN_INLINE(void, move_8u, (const uint8_t *const source_ptr,
-    uint8_t *const destination_ptr,
-    uint32_t       bytes_to_process))
-{
-    // Current position in source vector
-    const uint8_t *source_current_ptr = (const uint8_t *)(source_ptr + bytes_to_process);
-
-    // Current position in destination vector
-    uint8_t *destination_current_ptr = (uint8_t *)(destination_ptr + bytes_to_process);
-
-    while (0u < bytes_to_process)
-    {
-        // Shift position in destination vector
-        destination_current_ptr--;
-
-        // Shift position in source vector
-        source_current_ptr--;
-
-        // Copy 1 byte
-        (*destination_current_ptr) = (*source_current_ptr);
-
-        // Decrease bytes counter
-        bytes_to_process -= sizeof(uint8_t);
-    }
-}
-
-
-DML_CORE_OWN_INLINE(void, dualcast_copy_8u, (const uint8_t *const source_ptr,
-    uint8_t *const first_destination_ptr,
-    uint8_t *const second_destination_ptr,
-    uint32_t       bytes_to_process))
-{
-    // Current position in source vector 64u
-    const uint8_t *source_current_ptr = (const uint8_t *)source_ptr;
-
-    // Current position in first destination vector 64u
-    uint8_t *first_destination_current_ptr = (uint8_t *)first_destination_ptr;
-
-    // Current position in second destination vector 64u
-    uint8_t *second_destination_current_ptr = (uint8_t *)second_destination_ptr;
-
-    while (0 < bytes_to_process)
-    {
-        // Copy 1 byte to first destination vector
-        (*first_destination_current_ptr) = (*source_current_ptr);
-
-        // Copy 1 byte to second destination vector
-        (*second_destination_current_ptr) = (*source_current_ptr);
-
-        // Shift position in first destination vector
-        first_destination_current_ptr++;
-
-        // Shift position in second destination vector
-        second_destination_current_ptr++;
-
-        // Shift position in source vector
-        source_current_ptr++;
-
-        // Decrease bytes counter
-        bytes_to_process -= sizeof(uint8_t);
-    }
-}
diff --git a/sources/cores/src/default/dmlc_crc_16u_32u_px.cxx b/sources/cores/src/default/dmlc_crc_16u_32u_px.cxx
deleted file mode 100644
index 16db5e7..0000000
--- a/sources/cores/src/default/dmlc_crc_16u_32u_px.cxx
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright 2020-2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
- /**
-  * @brief Contain implementation of the follow functions:
-  *      - @ref dmlc_calculate_crc_32u()
-  *
-  * @date 7/20/2021
-  *
-  */
-
-DML_CORE_OWN_INLINE(dmlc_status_t, calculate_crc_32u, (const uint8_t* const memory_region_ptr,
-    uint32_t bytes_to_hash,
-    uint32_t* const crc_ptr,
-    uint32_t polynomial))
-{
-     // Current crc value
-    uint32_t current_crc = (*crc_ptr);
-
-    // Through all bytes
-    for (uint32_t i = 0u; i < bytes_to_hash; ++i)
-    {
-        // Calculate crc for current byte
-        current_crc = dmlc_own_crc_byte_32u(current_crc, memory_region_ptr[i], polynomial);
-    }
-
-    // Store result
-    (*crc_ptr) = current_crc;
-    return DML_STATUS_OK;
-}
diff --git a/sources/cores/src/default/dmlc_fill_8u.cxx b/sources/cores/src/default/dmlc_fill_8u.cxx
deleted file mode 100644
index 1d4c9cd..0000000
--- a/sources/cores/src/default/dmlc_fill_8u.cxx
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright 2020-2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-/**
- * @brief Contain default implementation of the follow functions:
- *      - @ref dmlc_fill_with_pattern_8u()
- *
- * @date 10/29/2020
- *
- */
-
-#include "core_memory.h"
-#include "own_dmlc_definitions.h"
-
-DML_CORE_OWN_INLINE(dmlc_status_t, opt_fill_with_pattern_8u, ( uint64_t        pattern,
-                                                               uint8_t  *const memory_region_ptr,
-                                                               uint32_t        bytes_to_process ) )
-{
-    DML_CORE_CHECK_NULL_POINTER(memory_region_ptr)
-
-    // Current position in memory region 64u
-    uint64_t *memory_region_current_64u_ptr = (uint64_t *)memory_region_ptr;
-
-    // Current pattern 64u to fill with
-    const uint64_t *pattern_current_64u_ptr = (const uint64_t *)(&pattern);
-
-    // Current pattern 8u to fill with
-    const uint8_t *pattern_current_8u_ptr   = (const uint8_t *)(&pattern);
-
-    // Current position in memory region 8u
-    uint8_t *memory_region_current_8u_ptr;
-
-    // Fill body
-    while(sizeof(uint64_t) <= bytes_to_process)
-    {
-        // Put 8 bytes to current memory region
-        (*memory_region_current_64u_ptr) =
-        (*pattern_current_64u_ptr);
-
-        // Shift position in memory region
-        memory_region_current_64u_ptr++;
-
-        // Decrease bytes counter
-        bytes_to_process -= sizeof(uint64_t);
-    }
-
-    // Get position in memory region
-    memory_region_current_8u_ptr = (uint8_t *)memory_region_current_64u_ptr;
-
-    // Fill tail
-    while(0 < bytes_to_process)
-    {
-        // Put 1 byte to current memory region
-        (*memory_region_current_8u_ptr) =
-        (*pattern_current_8u_ptr);
-
-        // Shift position in memory region
-        memory_region_current_8u_ptr++;
-
-        // Shift position in current pattern
-        pattern_current_8u_ptr++;
-
-        // Decrease bytes counter
-        bytes_to_process -= sizeof(uint8_t);
-    }
-
-    // Success
-    return DML_STATUS_OK;
-}
diff --git a/sources/cores/src/dmlc_cache_8u.c b/sources/cores/src/dmlc_cache_8u.c
deleted file mode 100644
index ac388e6..0000000
--- a/sources/cores/src/dmlc_cache_8u.c
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright 2020-2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-/**
- * @brief Contain implementation of functions for cache manipulation
- * @date 2/25/2020
- *
- * @details Function list:
- *          - @ref dmlc_move_cache_to_memory
- *          - @ref dmlc_copy_cache_to_memory
- *
- */
-
-
-#include "core_cpu_features.h"
-#include "own_dmlc_definitions.h"
-
-#define OWN_CACHE_LINE_BYTE_SIZE 64u
-
-DML_CORE_API(dmlc_status_t, move_cache_to_memory_8u, (const uint8_t  *memory_region_ptr,
-                                                      const uint32_t bytes_to_flush))
-{
-    DML_CORE_CHECK_NULL_POINTER(memory_region_ptr)
-
-    // Constants
-    const uint64_t start_address    = (uint64_t) memory_region_ptr;
-    const uint32_t cache_align      = OWN_CACHE_LINE_BYTE_SIZE - (start_address % OWN_CACHE_LINE_BYTE_SIZE);
-    const uint32_t cache_line_count = (bytes_to_flush + OWN_CACHE_LINE_BYTE_SIZE) / OWN_CACHE_LINE_BYTE_SIZE - 1u;
-
-    // Variables
-    void *memory_address_ptr = (void*) memory_region_ptr;
-
-    #if !defined (PX)
-    _mm_clflushopt(memory_address_ptr);
-    #else
-    _mm_clflush(memory_address_ptr);
-    #endif
-
-    // Align pointer
-    memory_region_ptr += cache_align;
-
-    for (uint32_t i = 0u; i < cache_line_count; i++)
-    {
-        #if !defined (PX)
-        _mm_clflushopt(memory_address_ptr);
-        #else
-        _mm_clflush(memory_address_ptr);
-        #endif
-        memory_region_ptr += OWN_CACHE_LINE_BYTE_SIZE;
-    }
-
-    return DML_STATUS_OK;
-}
-
-
-DML_CORE_API(dmlc_status_t, copy_cache_to_memory_8u, (const uint8_t  *memory_region_ptr,
-                                                           const uint32_t bytes_to_flush))
-{
-    DML_CORE_CHECK_NULL_POINTER(memory_region_ptr)
-
-    #if !defined (PX)
-    // Constants
-    const uint64_t start_address    = (uint64_t) memory_region_ptr;
-    const uint32_t cache_align      = OWN_CACHE_LINE_BYTE_SIZE - (start_address % OWN_CACHE_LINE_BYTE_SIZE);
-    const uint32_t cache_line_count = (bytes_to_flush + OWN_CACHE_LINE_BYTE_SIZE) / OWN_CACHE_LINE_BYTE_SIZE - 1u;
-
-    // Variable
-    void *memory_address_ptr = (void*) memory_region_ptr;
-
-    _mm_clwb(memory_address_ptr);
-
-    // Align pointer
-    memory_region_ptr += cache_align;
-
-    for (uint32_t i = 0u; i < cache_line_count; i++)
-    {
-        _mm_clwb(memory_address_ptr);
-        memory_region_ptr += OWN_CACHE_LINE_BYTE_SIZE;
-    }
-    #endif
-
-    return DML_STATUS_OK;
-}
diff --git a/sources/cores/src/dmlc_compare_8u.c b/sources/cores/src/dmlc_compare_8u.c
deleted file mode 100644
index 7c9c23e..0000000
--- a/sources/cores/src/dmlc_compare_8u.c
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright 2020-2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-/**
- * @brief Contain implementation of the follow functions:
- *      - @ref dmlc_compare_8u()
- *      - @ref dmlc_compare_pattern_8u()
- *
- * @date 2/10/2020
- *
- */
-
-#include "core_compare.h"
-#include "own_dmlc_definitions.h"
-#if defined(AVX512)
-#include "avx512/dmlc_compare_8u_k0.cxx"
-#else
-#include "default/dmlc_compare_8u_px.cxx"
-#endif
-
-
-DML_CORE_API(dmlc_status_t, compare_8u, (const uint8_t* first_vector_ptr,
-    const uint8_t* second_vector_ptr,
-    const uint32_t size,
-    uint32_t* const mismatch_offset_ptr))
-{
-    return dmlc_own_compare_8u(first_vector_ptr, second_vector_ptr, size, mismatch_offset_ptr);
-}
-
-DML_CORE_API(dmlc_status_t, compare_with_pattern_8u, (const uint8_t *memory_region_ptr,
-                                                      const pattern_t pattern,
-                                                      const uint32_t size,
-                                                      uint32_t *const mismatch_offset_ptr))
-{
-    return dmlc_own_compare_with_pattern_8u(memory_region_ptr, pattern, size, mismatch_offset_ptr);
-}
diff --git a/sources/cores/src/dmlc_copy_8u.c b/sources/cores/src/dmlc_copy_8u.c
deleted file mode 100644
index 220d603..0000000
--- a/sources/cores/src/dmlc_copy_8u.c
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright 2020-2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-/**
- * @brief Contain implementation of the follow functions:
- *      - @ref dmlc_copy_forward_8u()
- *      - @ref dmlc_copy_backward_8u()
- *      - @ref dmlc_dualcast_copy_8u()
- *
- * @date 2/20/2020
- *
- */
-
-
-#include "core_memory.h"
-#include "own_dmlc_definitions.h"
-#if defined(AVX512)
-#include "avx512/dmlc_copy_8u_k0.cxx"
-#else
-#include "default/dmlc_copy_8u_px.cxx"
-#endif
-
-/** Checks 0:11 bits for equality **/
-#define OWN_BAD_ARGUMENT_DUALCAST_DST_ALIGNMENT(dst_ptr1, dst_ptr2)         \
-        DML_CORE_BAD_ARGUMENT_RETURN( ((((uint64_t) (dst_ptr1)) & 0xFFFu) != \
-                                      (((uint64_t) (dst_ptr2)) & 0xFFFu)),  \
-                                          DML_STATUS_DUALCAST_ALIGN_ERROR )
-
-
-DML_CORE_API(dmlc_status_t, copy_8u, ( const uint8_t  *const source_ptr,
-                                                     uint8_t  *const destination_ptr,
-                                                     uint32_t        bytes_to_process ) )
-{
-    // Main action
-    dmlc_own_copy_8u(source_ptr, destination_ptr, bytes_to_process);
-
-    // Success
-    return DML_STATUS_OK;
-}
-
-
-DML_CORE_API(dmlc_status_t, move_8u, ( const uint8_t  *const source_ptr,
-                                                      uint8_t  *const destination_ptr,
-                                                      uint32_t        bytes_to_process ) )
-{
-    const uint8_t * const src_begin = source_ptr;
-    const uint8_t * const src_end   = source_ptr + bytes_to_process;
-    const uint8_t * const dst_begin = destination_ptr;
-    const uint8_t * const dst_end   = destination_ptr + bytes_to_process;
-
-    // If memory regions do not overlap:
-    if (src_end <= dst_begin || src_begin >= dst_end)
-    {
-        return dmlc_copy_8u(source_ptr, destination_ptr, bytes_to_process);
-    }
-
-    dmlc_own_move_8u(source_ptr, destination_ptr, bytes_to_process);
-
-    // Success
-    return DML_STATUS_OK;
-}
-
-
- DML_CORE_API(dmlc_status_t, dualcast_copy_8u, ( const uint8_t  *const source_ptr,
-                                                       uint8_t  *const first_destination_ptr,
-                                                       uint8_t  *const second_destination_ptr,
-                                                       uint32_t        bytes_to_process ) )
-{
-    // Main action
-    dmlc_own_dualcast_copy_8u(source_ptr, first_destination_ptr, second_destination_ptr, bytes_to_process);
-
-    // Success
-    return DML_STATUS_OK;
-}
diff --git a/sources/cores/src/dmlc_crc_16u_32u.c b/sources/cores/src/dmlc_crc_16u_32u.c
deleted file mode 100644
index edf7d2d..0000000
--- a/sources/cores/src/dmlc_crc_16u_32u.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright 2020-2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-/**
- * @brief Contain implementation of the follow functions:
- *      - @ref dmlc_calculate_crc_16u()
- *      - @ref dmlc_calculate_crc_32u()
- *      - @ref dmlc_calculate_crc_reflected_32u()
- *
- * @date 2/5/2020
- *
- */
-
-
-#include "core_hash_functions.h"
-#include "own_dmlc_definitions.h"
-#include "own_dmlc_crc_16u_32u.cxx"
-#include "own_dmlc_byte_op.cxx"
-
-#if defined(AVX512)
-#include "avx512/dmlc_crc_16u_32u_k0.cxx"
-#else
-#include "default/dmlc_crc_16u_32u_px.cxx"
-#endif
-
-
-
-DML_CORE_API(dmlc_status_t, calculate_crc_16u, (const uint8_t  *const memory_region_ptr,
-                                                      uint32_t bytes_to_hash,
-                                                      uint16_t *const crc_ptr,
-                                                      uint16_t polynomial))
-{
-    // Check input arguments
-    DML_CORE_CHECK_NULL_POINTER(memory_region_ptr)
-    DML_CORE_CHECK_NULL_POINTER(crc_ptr)
-
-    // Current crc value
-    uint16_t current_crc = (*crc_ptr);
-
-    // Through all bytes
-    for(uint32_t i = 0u; i < bytes_to_hash; ++i)
-    {
-        // Calculate crc for current byte
-        current_crc  = dmlc_own_crc_byte_16u(current_crc, memory_region_ptr[i], polynomial);
-    }
-
-    // Store result
-    (*crc_ptr) = current_crc;
-
-    return DML_STATUS_OK;
-}
-
-#if defined(_MSC_VER)
-#define BORDER_OPT  256
-#else
-#define BORDER_OPT  256
-#endif
-
-
-DML_CORE_API(dmlc_status_t, calculate_crc_32u, (const uint8_t  *const memory_region_ptr,
-                                                      uint32_t bytes_to_hash,
-                                                      uint32_t *const crc_ptr,
-                                                      uint32_t polynomial))
-{
-#if defined(AVX512)
-    if (bytes_to_hash < BORDER_OPT)
-        return dmlc_own_calculate_crc_32u_noopt(memory_region_ptr, bytes_to_hash, crc_ptr, polynomial);
-#endif 
-    return dmlc_own_calculate_crc_32u(memory_region_ptr, bytes_to_hash, crc_ptr, polynomial);
-}
-
-
-DML_CORE_API(dmlc_status_t, calculate_crc_reflected_32u, (const uint8_t  *const memory_region_ptr,
-                                                                uint32_t bytes_to_hash,
-                                                                uint32_t *const crc_ptr,
-                                                                uint32_t polynomial))
-{
-    // Check input arguments
-    DML_CORE_CHECK_NULL_POINTER(memory_region_ptr)
-    DML_CORE_CHECK_NULL_POINTER(crc_ptr)
-
-    // Current crc value
-    uint32_t current_crc = (*crc_ptr);
-
-    // Temporary storage for reversed value
-    uint8_t  reversed_value;
-
-    // Through all bytes
-    for(uint32_t i = 0u; i < bytes_to_hash; ++i)
-    {
-        // Get current byte
-        reversed_value = memory_region_ptr[i];
-
-        // Reverse bits
-        reversed_value = dmlc_own_reverse_8u(reversed_value);
-
-        // Calculate crc for current reversed byte
-        current_crc = dmlc_own_crc_byte_32u(current_crc, reversed_value, polynomial);
-    }
-
-    // Store result
-    (*crc_ptr) = current_crc;
-
-    return DML_STATUS_OK;
-}
diff --git a/sources/cores/src/dmlc_delta_record_8u.c b/sources/cores/src/dmlc_delta_record_8u.c
deleted file mode 100644
index 3bd46cc..0000000
--- a/sources/cores/src/dmlc_delta_record_8u.c
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright 2020-2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-/**
- * @details Contain implementation for Delta Record feature:
- *       - @ref dmlc_create_delta_record_8u
- *
- * @date 2/17/2020
- *
- */
-
-#include "core_compare.h"
-#include "own_dmlc_definitions.h"
-
-
-/**
- * @defgroup own_delta_record Delta record own API
- * @ingroup core_own
- * @{
- * @brief Delta record own definitions and functions
- *
- */
-
-/* ------ DELTA RECORD INTERNAL DEFINITIONS ------ */
-
-typedef pattern_t region_t;            /**< Redefinition for the @ref pattern_t type        */
-typedef uint16_t  offset_t;            /**< Redefinition to make more abstract offset type  */
-
-/* Delta note sizes  */
-#define DELTA_NOTE_OFFSET_FIELD_SIZE  sizeof(offset_t)    /**< Size of offset field in delta note */
-#define DELTA_NOTE_REGION_FIELD_SIZE  sizeof(region_t)    /**< Size of delta field in delta note */
-#define DELTA_NOTE_SIZE (DELTA_NOTE_OFFSET_FIELD_SIZE + DELTA_NOTE_REGION_FIELD_SIZE) /**< Delta note size*/
-
-/* Checkers */
-
-/**< Input pointer must be aligned to 8 bytes */
-#define OWN_DELTA_CHECK_PTR_ALIGNMENT(ptr) \
-    DML_CORE_BAD_ARGUMENT_RETURN((((uint64_t)ptr) % 8u), DML_STATUS_DELTA_ALIGN_ERROR)
-
-#define MAX_AVAILABLE_INPUT_SIZE 0x7FFF8u                 /**< Input vector size limit*/
-
-#if defined(__GNUC__)
-    typedef struct __attribute__ ((__packed__))
-#elif(_MSC_VER)
-    #pragma pack(2)
-    typedef struct
-#else
-    #error Compiler not supported
-#endif
-{
-    offset_t offset;          /**< Offset of mismatched region, which delta written in delta field*/
-    region_t reference_data;  /**< Contain delta between standard vector and compared*/
-} own_delta_note_t;           /**< Presents a single element of Delta Record stream */
-
-/** @} */
-
-/* ------ DELTA RECORD PUBLIC FUNCTIONS IMPLEMENTATION ------ */
-
-DML_CORE_API(dmlc_status_t, create_delta_record_8u, (const uint8_t *reference_vector_ptr,
-                                                     const uint8_t *second_vector_ptr,
-                                                     const uint32_t compared_bytes,
-                                                     const uint32_t delta_record_max_size,
-                                                     uint8_t* delta_record_ptr,
-                                                     uint32_t *const record_size_ptr))
-{
-    DML_CORE_CHECK_NULL_POINTER(reference_vector_ptr)
-    DML_CORE_CHECK_NULL_POINTER(second_vector_ptr)
-    DML_CORE_CHECK_NULL_POINTER(delta_record_ptr)
-    DML_CORE_CHECK_NULL_POINTER(record_size_ptr)
-    OWN_DELTA_CHECK_PTR_ALIGNMENT(reference_vector_ptr)
-    OWN_DELTA_CHECK_PTR_ALIGNMENT(second_vector_ptr)
-
-    (*record_size_ptr) = 0u;
-
-    DML_CORE_CHECK_INPUT_SIZE(compared_bytes % DELTA_NOTE_REGION_FIELD_SIZE, DML_STATUS_DELTA_ALIGN_ERROR)
-    DML_CORE_CHECK_INPUT_SIZE(compared_bytes > MAX_AVAILABLE_INPUT_SIZE, DML_STATUS_DELTA_OFFSET_ERROR)
-    DML_CORE_CHECK_OUTPUT_SIZE(delta_record_max_size % DELTA_NOTE_SIZE, DML_STATUS_DELTA_INPUT_SIZE_ERROR)
-    DML_CORE_CHECK_OUTPUT_SIZE(0u == delta_record_max_size, DML_STATUS_DELTA_INPUT_SIZE_ERROR)
-
-    // Delta Record
-    const uint32_t delta_note_count           = delta_record_max_size / DELTA_NOTE_SIZE;
-    const offset_t regions_count              = (offset_t)(compared_bytes / DELTA_NOTE_REGION_FIELD_SIZE);
-    own_delta_note_t* current_delta_notes_ptr = (own_delta_note_t*) delta_record_ptr;
-    own_delta_note_t* end_delta_notes_ptr     = current_delta_notes_ptr + delta_note_count;
-
-    // Create delta
-    for (offset_t i = 0u; i < regions_count; i++)
-    {
-        const uint64_t base_region   = *(uint64_t *) reference_vector_ptr;
-        const uint64_t vector_region = *(uint64_t *) second_vector_ptr;
-
-        reference_vector_ptr += DELTA_NOTE_REGION_FIELD_SIZE;
-        second_vector_ptr    += DELTA_NOTE_REGION_FIELD_SIZE;
-
-        // Write delta note into delta record in case:
-        if (base_region != vector_region)
-        {
-            if(current_delta_notes_ptr < end_delta_notes_ptr)
-            {
-                current_delta_notes_ptr->reference_data = base_region;
-                current_delta_notes_ptr->offset         = i;
-                current_delta_notes_ptr++;
-                (*record_size_ptr) += DELTA_NOTE_SIZE;
-            }
-            else
-            {
-                return DML_STATUS_DELTA_RECORD_SIZE_ERROR;
-            }
-        }
-    }
-
-    return DML_STATUS_OK;
-}
-
-
-DML_CORE_API(dmlc_status_t, apply_delta_record_8u, (uint8_t * memory_region_ptr,
-                                                   const uint8_t  *delta_record_ptr,
-                                                   const uint32_t memory_region_size,
-                                                   const uint32_t delta_record_size))
-{
-    DML_CORE_CHECK_NULL_POINTER(memory_region_ptr)
-    DML_CORE_CHECK_NULL_POINTER(delta_record_ptr)
-    OWN_DELTA_CHECK_PTR_ALIGNMENT(memory_region_ptr)
-    DML_CORE_CHECK_INPUT_SIZE(memory_region_size > MAX_AVAILABLE_INPUT_SIZE, DML_STATUS_DELTA_INPUT_SIZE_ERROR)
-    DML_CORE_CHECK_INPUT_SIZE(memory_region_size % DELTA_NOTE_REGION_FIELD_SIZE, DML_STATUS_DELTA_ALIGN_ERROR)
-    DML_CORE_CHECK_INPUT_SIZE(delta_record_size % DELTA_NOTE_SIZE, DML_STATUS_DELTA_RECORD_SIZE_ERROR)
-    DML_CORE_CHECK_OVERLAPPING_FORWARD(delta_record_ptr, memory_region_ptr, memory_region_size)
-    DML_CORE_CHECK_OVERLAPPING_FORWARD(memory_region_ptr, delta_record_ptr, delta_record_size)
-
-    // Constants
-    const uint32_t delta_notes_count = delta_record_size / DELTA_NOTE_SIZE;
-
-    // Variables
-    own_delta_note_t *delta_note_ptr = (own_delta_note_t *) delta_record_ptr;
-    region_t *regions_ptr            = (region_t *) memory_region_ptr;
-
-    for (uint32_t i = 0u; i < delta_notes_count; i++)
-    {
-        const offset_t region_offset  = delta_note_ptr[i].offset;
-        const region_t reference_data = delta_note_ptr[i].reference_data;
-
-        if (region_offset < memory_region_size)
-        {
-            regions_ptr[region_offset] = reference_data;
-        }
-        else
-        {
-            return DML_STATUS_MEMORY_OVERFLOW_ERROR;
-        }
-    }
-
-    return DML_STATUS_OK;
-}
diff --git a/sources/cores/src/dmlc_fill_8u.c b/sources/cores/src/dmlc_fill_8u.c
deleted file mode 100644
index e213645..0000000
--- a/sources/cores/src/dmlc_fill_8u.c
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright 2020-2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-/**
- * @brief Contain implementation of the follow functions:
- *      - @ref dmlc_fill_with_pattern_8u()
- *
- * @date 2/21/2020
- *
- */
-
-
-#include "core_memory.h"
-#include "own_dmlc_definitions.h"
-
-#if defined(AVX512)
-    // TODO: I cannot load mask on MSVC17, so I disabled optimizations
-    #if (_MSC_VER >= 1928) || defined(__GNUC__)
-        #include "avx512/dmlc_fill_8u.cxx"
-    #else
-        #include "default/dmlc_fill_8u.cxx"
-    #endif
-#else
-    #include "default/dmlc_fill_8u.cxx"
-#endif
-
-DML_CORE_API(dmlc_status_t, fill_with_pattern_8u, ( uint64_t        pattern,
-                                                     uint8_t  *const memory_region_ptr,
-                                                     uint32_t        bytes_to_process ) )
-{
-    return dmlc_own_opt_fill_with_pattern_8u(pattern, memory_region_ptr, bytes_to_process);
-}
diff --git a/sources/cores/src/include/own_dmlc_checkers.h b/sources/cores/src/include/own_dmlc_checkers.h
deleted file mode 100644
index 063a691..0000000
--- a/sources/cores/src/include/own_dmlc_checkers.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright 2020-2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-/**
-* @brief
-* @date 2/10/2020
-*
-* @addtogroup core_own
-* @{
-*/
-
-#include "core_definitions.h"
-
-#ifndef DML_OWN_BAD_ARGUMENT_MACROS_HPP_
-#define DML_OWN_BAD_ARGUMENT_MACROS_HPP_
-
-#ifdef DML_CORES_BADARG_CHECK
-    #define DML_CORE_BAD_ARGUMENT_RETURN(expression, error_code) \
-    if(expression) \
-    { \
-        return (error_code); \
-    }
-
-#else
-    #define DML_CORE_BAD_ARGUMENT_RETURN(expression, error_code ) /**< */
-#endif
-
-    // Bad Argument wrappers
-    #define DML_CORE_CHECK_NULL_POINTER(pointer) \
-        DML_CORE_BAD_ARGUMENT_RETURN( NULL == (pointer), DML_STATUS_NULL_POINTER_ERROR)
-
-    #define DML_CORE_CHECK_OVERLAPPING_FORWARD(dst_ptr, src_ptr, length) \
-        DML_CORE_BAD_ARGUMENT_RETURN((( (uint64_t) (src_ptr) ) <= ( (uint64_t) (dst_ptr) )) && \
-                                     ( ( (uint64_t)((src_ptr) + (length)) ) > ( (uint64_t) (dst_ptr)) ), DML_STATUS_OVERLAPPING_BUFFER_ERROR)
-
-    #define DML_CORE_CHECK_OVERLAPPING_BACKWARD(dst_ptr, src_ptr, length) \
-        DML_CORE_CHECK_OVERLAPPING_FORWARD(src_ptr, dst_ptr, length)
-
-    #define DML_CORE_CHECK_OVERLAPPING(pointer1, pointer2, length) \
-        DML_CORE_CHECK_OVERLAPPING_FORWARD(pointer1, pointer2, length) \
-        DML_CORE_CHECK_OVERLAPPING_BACKWARD(pointer1, pointer2, length)
-
-
-    #define DML_CORE_CHECK_INPUT_SIZE(condition, status) \
-        DML_CORE_BAD_ARGUMENT_RETURN((condition), (status))
-
-    #define DML_CORE_CHECK_OUTPUT_SIZE(condition, status) \
-        DML_CORE_BAD_ARGUMENT_RETURN((condition), (status))
-
-#endif //DML_OWN_BAD_ARGUMENT_MACROS_HPP_
-
-/** @} */
diff --git a/sources/cores/src/include/own_dmlc_definitions.h b/sources/cores/src/include/own_dmlc_definitions.h
deleted file mode 100644
index e27cdc8..0000000
--- a/sources/cores/src/include/own_dmlc_definitions.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright 2020-2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-/**
- * @file
- * @date 2/10/2020
- *
- * @defgroup core_own Own API
- * @ingroup core_src
- *
- * @defgroup core_own_definition Internal Definitions
- * @ingroup core_own
- * @{
- *
- * @brief Contains general definitions for internal use in Intel® Data Mover Library (Intel® DML) Cores.
- *
- */
-
-#if defined(_MSC_BUILD)
-#include <intrin.h>
-#elif defined(__GNUC__)
-#include <x86intrin.h>
-#else
-#error "Unsupported compiler"
-#endif
-
-#include "core_definitions.h"
-#include "own_dmlc_checkers.h"
-
-#ifndef OWN_KERNEL_DEFINITIONS_H__
-#define OWN_KERNEL_DEFINITIONS_H__
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Define NULL pointer value */
-#ifndef NULL
-    #ifdef  __cplusplus
-        #define NULL    0
-    #else
-        #define NULL    ((void *)0)
-    #endif
-#endif
-
-/* ------ Defines ------ */
-#define OWN_BYTE_BIT_LENGTH 8u                /**< Byte bit-length*/
-#define OWN_HIGH_BIT_MASK_16U ( 0x8000u )     /**< Mask for checking high bit in uint16 value */
-#define OWN_HIGH_BIT_MASK_32U ( 0x80000000u ) /**< Mask for checking high bit in uint16 value */
-
-/* ------ Enumerations ------ */
-
-/**
- * @brief Kernel boolean type
- */
-typedef enum
-{
-    OWN_BOOL_FALSE = 0u,    /**< Bool True  */
-    OWN_BOOL_TRUE  = 1u     /**< Bool False */
-} kernel_bool_t;
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif //OWN_KERNEL_DEFINITIONS_H__
-
-/** @} */
diff --git a/sources/cores/src/own_dmlc_byte_op.cxx b/sources/cores/src/own_dmlc_byte_op.cxx
deleted file mode 100644
index bee00e5..0000000
--- a/sources/cores/src/own_dmlc_byte_op.cxx
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright 2020-2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-/**
- * @brief Contain implementation of the follow functions:
- *      - @ref dmlc_own_reverse_8u()
- *
- * @date 3/5/2020
- *
- */
-
-
-DML_CORE_OWN_INLINE(uint8_t, reverse_8u, ( uint8_t byte ) )
-{
-    // Value to return
-    uint8_t reversed_value = byte;
-
-    reversed_value = ((reversed_value & 0x55u) << 1u) | ((reversed_value & 0xAAu) >> 1u);
-    reversed_value = ((reversed_value & 0x33u) << 2u) | ((reversed_value & 0xCCu) >> 2u);
-    reversed_value = ((reversed_value & 0x0Fu) << 4u) | ((reversed_value & 0xF0u) >> 4u);
-
-    return reversed_value;
-}
diff --git a/sources/cores/src/own_dmlc_crc_16u_32u.cxx b/sources/cores/src/own_dmlc_crc_16u_32u.cxx
deleted file mode 100644
index e096ab1..0000000
--- a/sources/cores/src/own_dmlc_crc_16u_32u.cxx
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright 2020-2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-/**
- * @brief Contain implementation of the follow functions:
- *      - @ref dmlc_own_crc_byte_16u()
- *      - @ref dmlc_own_crc_byte_32u()
- *
- * @date 2/24/2020
- *
- */
-
-DML_CORE_OWN_INLINE(uint16_t, crc_byte_16u, ( uint16_t init_crc,
-                                             uint8_t  next_byte,
-                                             uint16_t polynomial ) )
-{
-    // Current crc value
-    uint16_t current_crc = init_crc ^ (next_byte << OWN_CRC16_BYTE_SHIFT);
-
-    // Modulo-2 division bit by bit
-    for(uint8_t bit = 0u; bit < OWN_BYTE_BIT_LENGTH; ++bit)
-    {
-        // Check high bit
-        current_crc = (current_crc & OWN_HIGH_BIT_MASK_16U) ?
-                      // If high bit is set - shift it to the left and XOR with polynomial
-                      ((current_crc << 1u) ^ polynomial) :
-                      // Else just shift it to the left
-                      (current_crc << 1u);
-    }
-
-    return current_crc;
-}
-
-
-DML_CORE_OWN_INLINE(uint32_t, crc_byte_32u, ( uint32_t init_crc,
-                                             uint8_t  next_byte,
-                                             uint32_t polynomial ) )
-{
-    // Current crc value
-    uint32_t current_crc = init_crc ^ (next_byte << OWN_CRC32_BYTE_SHIFT);
-
-    // Modulo-2 division bit by bit
-    for(uint8_t bit = 0u; bit < OWN_BYTE_BIT_LENGTH; ++bit)
-    {
-        // Check high bit
-        current_crc = (current_crc & OWN_HIGH_BIT_MASK_32U) ?
-                      // If high bit is set - shift it to the left and XOR with polynomial
-                      ((current_crc << 1u) ^ polynomial) :
-                      // Else just shift it to the left
-                      (current_crc << 1u);
-    }
-
-    return current_crc;
-}
diff --git a/sources/cpp_api/CMakeLists.txt b/sources/cpp_api/CMakeLists.txt
index 7f266d3..b47de96 100644
--- a/sources/cpp_api/CMakeLists.txt
+++ b/sources/cpp_api/CMakeLists.txt
@@ -14,32 +14,36 @@
 #
 
 add_library(dmlhl STATIC
-    $<TARGET_OBJECTS:dml_middle_layer>
-    $<TARGET_OBJECTS:dml_core> # TODO: Remove
-    )
+        $<TARGET_OBJECTS:dml_middle_layer>
+        )
 
 target_include_directories(dmlhl
-    PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}/../../include>
-    PUBLIC $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUEDIR}>
-    )
+        PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}/../../include>
+        PUBLIC $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUEDIR}>
+        )
+target_sources(dmlhl
+        PRIVATE $<TARGET_PROPERTY:dml_middle_layer,INTERFACE_SOURCES>
+        )
+target_compile_features(dmlhl
+        PUBLIC cxx_std_17
+        )
+target_compile_options(dmlhl
+        PRIVATE ${DML_QUALITY_OPTIONS}
+        )
 
-target_compile_features(dmlhl PUBLIC cxx_std_17)
-
-# TODO: Remove
-if (DML_HW)
+if(DML_HW)
     target_link_libraries(dmlhl PRIVATE ${CMAKE_DL_LIBS})
-    target_compile_definitions(dmlhl PUBLIC DML_HW)
 endif()
 
 set_target_properties(dmlhl PROPERTIES
-    CXX_STANDARD 17
-    CXX_STANDARD_REQUIRED ON
-    CXX_EXTENSIONS OFF
-    VERSION ${PROJECT_VERSION}
-    SOVERSION ${PROJECT_SOVERSION})
+        CXX_STANDARD 17
+        CXX_STANDARD_REQUIRED ON
+        CXX_EXTENSIONS OFF
+        VERSION ${PROJECT_VERSION}
+        SOVERSION ${PROJECT_SOVERSION})
 
 install(TARGETS dmlhl
-    EXPORT ${PROJECT_NAME}Targets
-    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
-    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
-    RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+        EXPORT ${PROJECT_NAME}Targets
+        LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+        ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+        RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
diff --git a/sources/hw-path/include/own_dsa_accel_constants.h b/sources/hw-path/include/own_dsa_accel_constants.h
deleted file mode 100644
index a7a138d..0000000
--- a/sources/hw-path/include/own_dsa_accel_constants.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright 2020-2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-/**
- * @brief Contains a constant, which are used to connect with hardware
- * @date 3/23/2020
- *
- */
-
-#include <stdint.h>
-
-#ifndef DML_DSA_ACCEL_CONFIG_H__
-#define DML_DSA_ACCEL_CONFIG_H__
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define DSA_DEVICE_ID ((uint32_t) (((uint32_t)0xFF << 24u)    \
-                                   | ((uint32_t)('a') << 16u) \
-                                   | ((uint32_t)('s') << 8u)  \
-                                   | (uint32_t)('d')))
-
-#define CHAR_MSK              0xFF202020
-#define MAX_DEVICE_COUNT      100u
-#define MAX_WORK_QUEUE_COUNT  100u
-#define OWN_PAGE_MASK         0x0FFFllu     /**< Defines page mask for portal incrementing */
-
-// General Capabilities Register unwrappers
-#define GC_BLOCK_ON_FAULT_SUP(gen_cap)           ((gen_cap >> 0u) & 0x0000000000000001ull)
-#define GC_OVERLAPPING_COPY_SUPPORT(gen_cap)     ((gen_cap >> 1u) & 0x0000000000000001ull)
-#define GC_M_CACHE_CONTROL_SUPPORT(gen_cap)      ((gen_cap >> 2u) & 0x0000000000000001ull)
-#define GC_F_CACHE_CONTROL_SUPPORT(gen_cap)      ((gen_cap >> 3u) & 0x0000000000000001ull)
-#define GC_INTERRUPT_HANDLE_REQUEST(gen_cap)     ((gen_cap >> 7u) & 0x0000000000000001ull)
-#define GC_DESTINATION_READBACK_SUPPORT(gen_cap) ((gen_cap >> 8u) & 0x0000000000000001ull)
-#define GC_DESCRIPTOR_READBACK_SUPPORT(gen_cap)  ((gen_cap >> 9u) & 0x0000000000000001ull)
-#define GC_MAX_TRANSFER_SIZE(gen_cap)            (1u << ((gen_cap >> 16u) & 0x000000000000001Full))
-#define GC_MAX_BATCH_SIZE(gen_cap)               (1u << ((gen_cap >> 21u) & 0x000000000000000Full))
-#define GC_MESSAGE_SIZE(gen_cap)                 (256u * ((gen_cap >> 25u) & 0x000000000000003Full))
-#define GC_CONFIGURATION_SUPPORT(gen_cap)        ((gen_cap >> 31u) & 0x0000000000000001ull)
-#define GC_MAX_DESCRIPTORS(gen_cap)              ((gen_cap >> 32u) & 0x00000000000000FFull)
-
-static const char     *DLL_NAME          = "/usr/lib64/libaccel-config.so";
-static const char     DEVICE_NAME[]      = "dsa";
-static const uint32_t DEVICE_NAME_LENGTH =
-                              sizeof(DEVICE_NAME) - 2u; //sizeof will return 4, position of terminating 0 is 3
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif //DML_DSA_ACCEL_CONFIG_H__
diff --git a/sources/middle_layer/CMakeLists.txt b/sources/middle_layer/CMakeLists.txt
index c66a710..cc39eba 100644
--- a/sources/middle_layer/CMakeLists.txt
+++ b/sources/middle_layer/CMakeLists.txt
@@ -16,34 +16,32 @@
 project(dml_middle_layer CXX)
 
 add_library(dml_middle_layer OBJECT
-    # Sources
-    awaiter.cpp
-    validation.cpp
-    core.cpp
+        src/operation.cpp
+        src/result.cpp
+        src/execution_path.cpp
+        src/validation.cpp
 
-    sw_path_legacy/dif.c
-    )
+        ../../include/dml/detail/ml/options.hpp
+        ../../include/dml/detail/ml/operation.hpp
+        ../../include/dml/detail/ml/result.hpp
+        ../../include/dml/detail/ml/execution_path.hpp
+        ../../include/dml/detail/ml/validation.hpp
+        )
 
+target_link_libraries(dml_middle_layer
+        PRIVATE dml_core
+        )
 target_include_directories(dml_middle_layer
-    PRIVATE ../../include
-    PRIVATE ../cores/include
-    PRIVATE dispatcher)
-
-target_compile_features(dml_middle_layer PUBLIC cxx_std_17)
-
-target_compile_definitions(dml_middle_layer PRIVATE $<$<BOOL:${EFFICIENT_WAIT}>: DML_EFFICIENT_WAIT>)
-
-if (DML_HW)
-    target_sources(dml_middle_layer PRIVATE
-        dispatcher/hw_device.cpp
-        dispatcher/hw_dispatcher.cpp
-        dispatcher/hw_queue.cpp
-        dispatcher/numa.cpp
-        device.cpp
-        hw_configuration_driver.c
+        PUBLIC ../../include
+        )
+target_sources(dml_middle_layer
+        PUBLIC $<TARGET_OBJECTS:dml_core>
+        PUBLIC $<TARGET_PROPERTY:dml_core,INTERFACE_SOURCES>
+        )
+target_compile_features(dml_middle_layer
+        PUBLIC cxx_std_17
+        )
+target_compile_options(dml_middle_layer
+        PRIVATE ${DML_QUALITY_OPTIONS}
+        PRIVATE ${DML_CPP_PRIVATE_OPTIONS}
         )
-    target_include_directories(dml_middle_layer PRIVATE ../hw-path/include)
-    target_compile_definitions(dml_middle_layer
-        PUBLIC DML_HW
-        PRIVATE $<$<BOOL:${LIB_ACCEL_3_2}>: LIB_ACCEL_VERSION_3_2>)
-endif ()
diff --git a/sources/middle_layer/awaiter.cpp b/sources/middle_layer/awaiter.cpp
deleted file mode 100644
index 1ac06ca..0000000
--- a/sources/middle_layer/awaiter.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright 2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-#include <dml/cpp/middle_layer/awaiter.hpp>
-
-#if defined(linux)
-#include <x86intrin.h>
-#else
-#include <intrin.h>
-#include <emmintrin.h>
-#endif
-
-namespace dml::ml {
-
-#ifdef DML_EFFICIENT_WAIT
-    static inline uint64_t current_time() {
-        return __rdtsc();
-    }
-
-    static inline void monitor_address(volatile void *address) {
-        asm volatile(".byte 0xf3, 0x48, 0x0f, 0xae, 0xf0" : : "a"(address));
-    }
-
-    static inline int wait_until(unsigned long timeout, unsigned int state) {
-        uint8_t r            = 0u;
-        auto    timeout_low  = static_cast<uint32_t>(timeout);
-        auto    timeout_high = static_cast<uint32_t>(timeout >> 32);
-
-        asm volatile(".byte 0xf2, 0x48, 0x0f, 0xae, 0xf1\t\n"
-                     "setc %0\t\n"
-        : "=r"(r)
-        : "c"(state), "a"(timeout_low), "d"(timeout_high));
-
-        return r;
-    }
-#endif
-
-    awaiter::awaiter(volatile void *address,
-                     uint8_t initial_value,
-                     uint32_t period) noexcept
-            : address_ptr_(reinterpret_cast<volatile uint8_t *>(address)),
-              period_(period),
-              initial_value_(initial_value) {
-        // Empty constructor
-    }
-
-    awaiter::~awaiter() noexcept {
-#ifdef DML_EFFICIENT_WAIT
-        while (initial_value_ == *address_ptr_) {
-            monitor_address(address_ptr_);
-
-            auto start = current_time();
-            wait_until(start + period_, idle_state_);
-        }
-#else
-        while (initial_value_ == *address_ptr_) {
-            _mm_pause();
-        }
-#endif
-    }
-}
diff --git a/sources/middle_layer/core.cpp b/sources/middle_layer/core.cpp
deleted file mode 100644
index af3b801..0000000
--- a/sources/middle_layer/core.cpp
+++ /dev/null
@@ -1,780 +0,0 @@
-/*
- * Copyright 2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-#include "dml/cpp/middle_layer/core.hpp"
-
-#include <immintrin.h>
-
-#include "core_api.h"
-//#include "dif.hpp"
-#include "dml/cpp/middle_layer/descriptor_views.hpp"
-#include "dml/cpp/middle_layer/options.hpp"
-#include "dml/cpp/middle_layer/result_views.hpp"
-
-// TODO: Only for DIFs
-#include <dml/dmldefs.h>
-
-#include <cstring>
-
-#include "sw_path_legacy/dif.h"
-
-namespace dml::ml::core
-{
-    static inline void write_status(execution_status from_status, status_t &to_status) noexcept
-    {
-        _mm_sfence();
-        to_status = static_cast<status_t>(from_status);
-    }
-
-    static inline execution_status evaluate(views::nop_descriptor dsc) noexcept;
-
-    static inline execution_status evaluate(views::batch_descriptor dsc) noexcept;
-
-    static inline execution_status evaluate(views::drain_descriptor dsc) noexcept;
-
-    static inline execution_status evaluate(views::mem_move_descriptor dsc) noexcept;
-
-    static inline execution_status evaluate(views::fill_descriptor dsc) noexcept;
-
-    static inline execution_status evaluate(views::compare_descriptor dsc) noexcept;
-
-    static inline execution_status evaluate(views::compare_pattern_descriptor dsc) noexcept;
-
-    static inline execution_status evaluate(views::create_delta_descriptor dsc) noexcept;
-
-    static inline execution_status evaluate(views::apply_delta_descriptor dsc) noexcept;
-
-    static inline execution_status evaluate(views::dualcast_descriptor dsc) noexcept;
-
-    static inline execution_status evaluate(views::crc_descriptor dsc) noexcept;
-
-    static inline execution_status evaluate(views::copy_crc_descriptor dsc) noexcept;
-
-    static inline execution_status evaluate(views::dif_check_descriptor dsc) noexcept;
-
-    static inline execution_status evaluate(views::dif_insert_descriptor dsc) noexcept;
-
-    static inline execution_status evaluate(views::dif_strip_descriptor dsc) noexcept;
-
-    static inline execution_status evaluate(views::dif_update_descriptor dsc) noexcept;
-
-    static inline execution_status evaluate(views::cache_flush_descriptor dsc) noexcept;
-
-    execution_status submit(descriptor &dsc) noexcept
-    {
-        auto view = views::any_descriptor(dsc);
-
-        switch (static_cast<operation>(view.operation()))
-        {
-            case operation::nop:
-                return core::evaluate(views::nop_descriptor(dsc));
-                break;
-            case operation::batch:
-                return core::evaluate(views::batch_descriptor(dsc));
-                break;
-            case operation::drain:
-                return core::evaluate(views::drain_descriptor(dsc));
-                break;
-            case operation::memory_move:
-                return core::evaluate(views::mem_move_descriptor(dsc));
-                break;
-            case operation::fill:
-                return core::evaluate(views::fill_descriptor(dsc));
-                break;
-            case operation::compare:
-                return core::evaluate(views::compare_descriptor(dsc));
-                break;
-            case operation::compare_pattern:
-                return core::evaluate(views::compare_pattern_descriptor(dsc));
-                break;
-            case operation::create_delta:
-                return core::evaluate(views::create_delta_descriptor(dsc));
-                break;
-            case operation::apply_delta:
-                return core::evaluate(views::apply_delta_descriptor(dsc));
-                break;
-            case operation::dualcast:
-                return core::evaluate(views::dualcast_descriptor(dsc));
-                break;
-            case operation::crc:
-                return core::evaluate(views::crc_descriptor(dsc));
-                break;
-            case operation::copy_crc:
-                return core::evaluate(views::copy_crc_descriptor(dsc));
-                break;
-            case operation::dif_check:
-                return core::evaluate(views::dif_check_descriptor(dsc));
-                break;
-            case operation::dif_insert:
-                return core::evaluate(views::dif_insert_descriptor(dsc));
-                break;
-            case operation::dif_strip:
-                return core::evaluate(views::dif_strip_descriptor(dsc));
-                break;
-            case operation::dif_update:
-                return core::evaluate(views::dif_update_descriptor(dsc));
-                break;
-            case operation::cache_flush:
-                return core::evaluate(views::cache_flush_descriptor(dsc));
-                break;
-            default:
-                return execution_status::unexpected;
-        }
-    }
-
-    static inline execution_status evaluate(views::nop_descriptor dsc) noexcept
-    {
-        auto final_status = execution_status::success;
-        if (dsc.completion_record_address())
-        {
-            auto record = reinterpret_cast<completion_record *>(dsc.completion_record_address());
-            auto result = views::nop_result(*record);
-
-            write_status(final_status, result.status());
-        }
-
-        return final_status;
-    }
-
-    static inline execution_status evaluate(views::drain_descriptor dsc) noexcept
-    {
-        auto final_status = execution_status::success;
-        if (dsc.completion_record_address())
-        {
-            auto record = reinterpret_cast<completion_record *>(dsc.completion_record_address());
-            auto result = views::drain_result(*record);
-
-            write_status(final_status, result.status());
-        }
-
-        return final_status;
-    }
-
-    static inline execution_status evaluate(views::mem_move_descriptor dsc) noexcept
-    {
-        const auto src           = reinterpret_cast<byte_t *>(dsc.source_address());
-        const auto dst           = reinterpret_cast<byte_t *>(dsc.destination_address());
-        const auto transfer_size = dsc.transfer_size();
-
-        const auto status = dmlc_move_8u(src, dst, transfer_size);
-
-        auto final_status = (status == DML_STATUS_OK) ? execution_status::success : execution_status::unexpected;
-        if (dsc.completion_record_address())
-        {
-            auto record = reinterpret_cast<completion_record *>(dsc.completion_record_address());
-            auto result = views::mem_move_result(*record);
-
-            write_status(final_status, result.status());
-        }
-
-        return final_status;
-    }
-
-    static inline execution_status evaluate(views::fill_descriptor dsc) noexcept
-    {
-        const auto pattern       = dsc.pattern();
-        const auto dst           = reinterpret_cast<byte_t *>(dsc.destination_address());
-        const auto transfer_size = dsc.transfer_size();
-
-        const auto status = dmlc_fill_with_pattern_8u(pattern, dst, transfer_size);
-
-        auto final_status = (status == DML_STATUS_OK) ? execution_status::success : execution_status::unexpected;
-        if (dsc.completion_record_address())
-        {
-            auto record = reinterpret_cast<completion_record *>(dsc.completion_record_address());
-            auto result = views::fill_result(*record);
-
-            write_status(final_status, result.status());
-        }
-
-        return final_status;
-    }
-
-    static inline execution_status evaluate(views::compare_descriptor dsc) noexcept
-    {
-        const auto src1            = reinterpret_cast<byte_t *>(dsc.source_1_address());
-        const auto src2            = reinterpret_cast<byte_t *>(dsc.source_2_address());
-        const auto transfer_size   = dsc.transfer_size();
-        const auto expected_result = dsc.expected_result();
-        const auto options         = compare_options(dsc.flags());
-
-        auto mismatch = transfer_size_t(0);
-
-        const auto status = dmlc_compare_8u(src1, src2, transfer_size, &mismatch);
-
-        result_t actual_result;
-        if (status == DML_COMPARE_STATUS_EQ)
-        {
-            actual_result = 0;
-        }
-        else if (status == DML_COMPARE_STATUS_NE)
-        {
-            actual_result = 1;
-        }
-        else
-        {
-            if (dsc.completion_record_address())
-            {
-                auto record = reinterpret_cast<completion_record *>(dsc.completion_record_address());
-                auto result = views::compare_result(*record);
-                write_status(execution_status::unexpected, result.status());
-            }
-            return execution_status::unexpected;
-        }
-
-        auto final_status = options.contains(compare_option::check_result)
-                                ? (expected_result == actual_result) ? execution_status::success : execution_status::false_predicate_success
-                                : execution_status::success;
-
-        if (dsc.completion_record_address())
-        {
-            auto record = reinterpret_cast<completion_record *>(dsc.completion_record_address());
-            auto result = views::compare_result(*record);
-
-            result.result()          = actual_result;
-            result.bytes_completed() = mismatch;
-
-            write_status(final_status, result.status());
-        }
-
-        return final_status;
-    }
-
-    static inline execution_status evaluate(views::compare_pattern_descriptor dsc) noexcept
-    {
-        const auto pattern         = dsc.pattern();
-        const auto src             = reinterpret_cast<byte_t *>(dsc.source_address());
-        const auto transfer_size   = dsc.transfer_size();
-        const auto expected_result = dsc.expected_result();
-        const auto options         = compare_options(dsc.flags());
-
-        auto mismatch = transfer_size_t(0);
-
-        const auto status = dmlc_compare_with_pattern_8u(src, pattern, transfer_size, &mismatch);
-
-        result_t actual_result;
-        if (status == DML_COMPARE_STATUS_EQ)
-        {
-            actual_result = 0;
-        }
-        else if (status == DML_COMPARE_STATUS_NE)
-        {
-            actual_result = 1;
-        }
-        else
-        {
-            if (dsc.completion_record_address())
-            {
-                auto record = reinterpret_cast<completion_record *>(dsc.completion_record_address());
-                auto result = views::compare_result(*record);
-                write_status(execution_status::unexpected, result.status());
-            }
-            return execution_status::unexpected;
-        }
-
-        auto final_status = options.contains(compare_option::check_result)
-                                ? (expected_result == actual_result) ? execution_status::success : execution_status::false_predicate_success
-                                : execution_status::success;
-
-        if (dsc.completion_record_address())
-        {
-            auto record = reinterpret_cast<completion_record *>(dsc.completion_record_address());
-            auto result = views::compare_result(*record);
-
-            result.result()          = actual_result;
-            result.bytes_completed() = mismatch;
-            write_status(final_status, result.status());
-        }
-
-        return final_status;
-    }
-
-    static inline execution_status evaluate(views::create_delta_descriptor dsc) noexcept
-    {
-        const auto src1            = reinterpret_cast<byte_t *>(dsc.source_1_address());
-        const auto src2            = reinterpret_cast<byte_t *>(dsc.source_2_address());
-        const auto delta_record    = reinterpret_cast<byte_t *>(dsc.delta_record_address());
-        const auto delta_max_size  = dsc.maximum_delta_record_size();
-        const auto transfer_size   = dsc.transfer_size();
-        const auto expected_result = dsc.expected_result_mask();
-        const auto options         = create_delta_options(dsc.flags());
-
-        auto delta_record_size = transfer_size_t(0);
-
-        // Flip src1 and src2 due to core differs from hardware spec
-        const auto status = dmlc_create_delta_record_8u(src2, src1, transfer_size, delta_max_size, delta_record, &delta_record_size);
-
-        result_t actual_result;
-        if (status == DML_STATUS_DELTA_RECORD_SIZE_ERROR)
-        {
-            actual_result = static_cast<result_t>(delta_expected_result_option::expect_overflow);
-        }
-        else if (status == DML_STATUS_OK)
-        {
-            actual_result = delta_record_size ? static_cast<result_t>(delta_expected_result_option::expect_not_equal)
-                                              : static_cast<result_t>(delta_expected_result_option::expect_equal);
-        }
-        else
-        {
-            if (dsc.completion_record_address())
-            {
-                auto record = reinterpret_cast<completion_record *>(dsc.completion_record_address());
-                auto result = views::create_delta_result(*record);
-
-                write_status(execution_status::unexpected, result.status());
-            }
-            return execution_status::unexpected;
-        }
-
-        auto final_status = options.contains(compare_option::check_result)
-                                ? (expected_result == actual_result) ? execution_status::success : execution_status::false_predicate_success
-                                : execution_status::success;
-
-        if (dsc.completion_record_address())
-        {
-            auto record = reinterpret_cast<completion_record *>(dsc.completion_record_address());
-            auto result = views::create_delta_result(*record);
-
-            result.delta_record_size() = delta_record_size;
-            result.result()            = actual_result >> 1; // Hack, because of result and result mask uses different values
-            write_status(final_status, result.status());
-        }
-
-        return final_status;
-    }
-
-    static inline execution_status evaluate(views::apply_delta_descriptor dsc) noexcept
-    {
-        const auto dst           = reinterpret_cast<byte_t *>(dsc.destination_address());
-        const auto delta_record  = reinterpret_cast<byte_t *>(dsc.delta_record_address());
-        const auto delta_size    = dsc.delta_record_size();
-        const auto transfer_size = dsc.transfer_size();
-
-        const auto status = dmlc_apply_delta_record_8u(dst, delta_record, transfer_size, delta_size);
-
-        auto final_status = status == DML_STATUS_OK ? execution_status::success : execution_status::unexpected;
-        if (dsc.completion_record_address())
-        {
-            auto record = reinterpret_cast<completion_record *>(dsc.completion_record_address());
-            auto result = views::apply_delta_result(*record);
-
-            write_status(final_status, result.status());
-        }
-
-        return final_status;
-    }
-
-    static inline execution_status evaluate(views::dualcast_descriptor dsc) noexcept
-    {
-        const auto src           = reinterpret_cast<byte_t *>(dsc.source_address());
-        const auto dst1          = reinterpret_cast<byte_t *>(dsc.destination_1_address());
-        const auto dst2          = reinterpret_cast<byte_t *>(dsc.destination_2_address());
-        const auto transfer_size = dsc.transfer_size();
-
-        const auto status = dmlc_dualcast_copy_8u(src, dst1, dst2, transfer_size);
-
-        auto final_status = status == DML_STATUS_OK ? execution_status::success : execution_status::unexpected;
-        if (dsc.completion_record_address())
-        {
-            auto record = reinterpret_cast<completion_record *>(dsc.completion_record_address());
-            auto result = views::dualcast_result(*record);
-
-            write_status(final_status, result.status());
-        }
-
-        return final_status;
-    }
-
-    static inline execution_status evaluate(views::crc_descriptor dsc) noexcept
-    {
-        const auto src           = reinterpret_cast<byte_t *>(dsc.source_address());
-        const auto transfer_size = dsc.transfer_size();
-        const auto crc_seed      = dsc.crc_seed();
-        const auto options       = crc_additional_options(dsc.operation_specific_flags());
-
-        constexpr auto polynomial = 0x1EDC6F41u;
-        auto           reverse    = [](uint32_t value)
-        {
-            value = (value & 0x55555555u) << 1u | (value & 0xAAAAAAAAu) >> 1u;
-            value = (value & 0x33333333u) << 2u | (value & 0xCCCCCCCCu) >> 2u;
-            value = (value & 0x0F0F0F0Fu) << 4u | (value & 0xF0F0F0F0u) >> 4u;
-            value = (value & 0x00FF00FFu) << 8u | (value & 0xFF00FF00u) >> 8u;
-            value = (value & 0x0000FFFFu) << 16u | (value & 0xFFFF0000u) >> 16u;
-
-            return value;
-        };
-
-        const auto bypass_reflection      = options.contains(crc_additional_option::bypass_reflection);
-        const auto bypass_data_reflection = options.contains(crc_additional_option::bypass_data_reflection);
-
-        auto crc_value = crc_seed;
-
-        // Bypass inversion and use reverse bit order for CRC completion_record
-        if (!bypass_reflection)
-        {
-            crc_value = ~(crc_value);
-            crc_value = reverse(crc_value);
-        }
-
-        // Bypass Data Reflection in case if DML_FLAG_DATA_REFLECTION set
-        auto status = (!bypass_data_reflection) ? dmlc_calculate_crc_reflected_32u(src, transfer_size, &crc_value, polynomial)
-                                                : dmlc_calculate_crc_32u(src, transfer_size, &crc_value, polynomial);
-
-        // Bypass inversion and use reverse bit order for CRC completion_record
-        if (!bypass_reflection)
-        {
-            crc_value = reverse(crc_value);
-            crc_value = ~(crc_value);
-        }
-
-        auto final_status = status == DML_STATUS_OK ? execution_status::success : execution_status::unexpected;
-        if (dsc.completion_record_address())
-        {
-            auto record = reinterpret_cast<completion_record *>(dsc.completion_record_address());
-            auto result = views::crc_result(*record);
-
-            result.crc_value() = crc_value;
-            write_status(final_status, result.status());
-        }
-
-        return final_status;
-    }
-
-    static inline execution_status evaluate(views::copy_crc_descriptor dsc) noexcept
-    {
-        const auto src           = reinterpret_cast<byte_t *>(dsc.source_address());
-        const auto dst           = reinterpret_cast<byte_t *>(dsc.destination_address());
-        const auto transfer_size = dsc.transfer_size();
-        const auto crc_seed      = dsc.crc_seed();
-        const auto options       = crc_additional_options(dsc.operation_specific_flags());
-
-        {
-            const auto status = dmlc_move_8u(src, dst, transfer_size);
-
-            if (status != DML_STATUS_OK)
-            {
-                auto final_status = execution_status::unexpected;
-                if (dsc.completion_record_address())
-                {
-                    auto record = reinterpret_cast<completion_record *>(dsc.completion_record_address());
-                    auto result = views::any_result(*record);
-
-                    write_status(final_status, result.status());
-                }
-                return final_status;
-            }
-        }
-
-        constexpr auto polynomial = 0x1EDC6F41u;
-        auto           reverse    = [](uint32_t value)
-        {
-            value = (value & 0x55555555u) << 1u | (value & 0xAAAAAAAAu) >> 1u;
-            value = (value & 0x33333333u) << 2u | (value & 0xCCCCCCCCu) >> 2u;
-            value = (value & 0x0F0F0F0Fu) << 4u | (value & 0xF0F0F0F0u) >> 4u;
-            value = (value & 0x00FF00FFu) << 8u | (value & 0xFF00FF00u) >> 8u;
-            value = (value & 0x0000FFFFu) << 16u | (value & 0xFFFF0000u) >> 16u;
-
-            return value;
-        };
-
-        const auto bypass_reflection      = options.contains(crc_additional_option::bypass_reflection);
-        const auto bypass_data_reflection = options.contains(crc_additional_option::bypass_data_reflection);
-
-        auto crc_value = crc_seed;
-
-        // Bypass inversion and use reverse bit order for CRC completion_record
-        if (!bypass_reflection)
-        {
-            crc_value = ~(crc_value);
-            crc_value = reverse(crc_value);
-        }
-
-        // Bypass Data Reflection in case if DML_FLAG_DATA_REFLECTION set
-        const auto status = (!bypass_data_reflection) ? dmlc_calculate_crc_reflected_32u(src, transfer_size, &crc_value, polynomial)
-                                                      : dmlc_calculate_crc_32u(src, transfer_size, &crc_value, polynomial);
-
-        // Bypass inversion and use reverse bit order for CRC completion_record
-        if (!bypass_reflection)
-        {
-            crc_value = reverse(crc_value);
-            crc_value = ~(crc_value);
-        }
-
-        auto final_status = status == DML_STATUS_OK ? execution_status::success : execution_status::unexpected;
-        if (dsc.completion_record_address())
-        {
-            auto record = reinterpret_cast<completion_record *>(dsc.completion_record_address());
-            auto result = views::crc_result(*record);
-
-            result.crc_value() = crc_value;
-            write_status(final_status, result.status());
-        }
-
-        return final_status;
-    }
-
-    static inline execution_status evaluate(views::dif_check_descriptor dsc) noexcept
-    {
-        constexpr uint32_t dif_block_sizes[4] = { 512u, 520u, 4096u, 4104u };
-
-        const auto src              = reinterpret_cast<byte_t *>(dsc.source_address());
-        const auto transfer_size    = dsc.transfer_size();
-        const auto options          = dif_check_options(dsc.flags());
-        const auto dif_options      = dif_additional_options(dsc.dif_flags());
-        const auto dif_src_options  = dif_additional_src_options(dsc.source_dif_flags());
-        const auto src_app_tag_mask = dsc.source_app_tag_mask();
-
-        const auto src_ref_tag = dsc.source_ref_tag();
-        const auto src_app_tag = dsc.source_app_tag();
-
-        dml_job_t job;
-        memset(&job, 0, sizeof(dml_job_t));
-        job.source_first_ptr                       = src;
-        job.source_length                          = transfer_size;
-        job.operation                              = DML_OP_DIF_CHECK;
-        job.dif_config.source_reference_tag_seed   = src_ref_tag;
-        job.dif_config.source_application_tag_seed = src_app_tag;
-        job.dif_config.source_application_tag_mask = src_app_tag_mask;
-        job.dif_config.block_size = static_cast<dml_dif_block_size_t>(static_cast<operation_specific_flags_t>(dif_options) & 0b11);
-
-        // Job API composes DIF flags into one 64-bit value via shifting, check dmldefs.h
-        job.dif_config.flags =
-            (static_cast<uint64_t>(static_cast<dif_flags_t>(dif_options)) << 16) | (static_cast<dif_flags_t>(dif_src_options));
-
-        job.flags = static_cast<flags_t>(options);
-
-        auto status = dml_legacy_dif_check(&job);
-
-        // Unsupported operation
-        auto final_status = (status == DML_STATUS_OK) ? execution_status::success : execution_status::dif_control_error;
-        if (dsc.completion_record_address())
-        {
-            auto record = reinterpret_cast<completion_record *>(dsc.completion_record_address());
-            auto result = views::dif_check_result(*record);
-
-            result.dif_status()      = job.result;
-            result.bytes_completed() = job.offset;
-            // TODO: Should also write values for tags
-            write_status(final_status, result.status());
-        }
-
-        return final_status;
-    }
-
-    static inline execution_status evaluate(views::dif_insert_descriptor dsc) noexcept
-    {
-        const auto src              = reinterpret_cast<byte_t *>(dsc.source_address());
-        const auto dst              = reinterpret_cast<byte_t *>(dsc.destination_address());
-        const auto transfer_size    = dsc.transfer_size();
-        const auto options          = dif_insert_options(dsc.flags());
-        const auto dif_options      = dif_additional_options(dsc.dif_flags());
-        const auto dif_dst_options  = dif_additional_src_options(dsc.destination_dif_flags());
-        const auto dst_app_tag_mask = dsc.destination_app_tag_mask();
-
-        auto dst_ref_tag = dsc.destination_ref_tag();
-        auto dst_app_tag = dsc.destination_app_tag();
-
-        dml_job_t job;
-        memset(&job, 0, sizeof(dml_job_t));
-        job.source_first_ptr                            = src;
-        job.destination_first_ptr                       = dst;
-        job.source_length                               = transfer_size;
-        job.operation                                   = DML_OP_DIF_INSERT;
-        job.dif_config.destination_reference_tag_seed   = dst_ref_tag;
-        job.dif_config.destination_application_tag_seed = dst_app_tag;
-        job.dif_config.destination_application_tag_mask = dst_app_tag_mask;
-        job.dif_config.block_size = static_cast<dml_dif_block_size_t>(static_cast<operation_specific_flags_t>(dif_options) & 0b11);
-
-        // Job API composes DIF flags into one 64-bit value via shifting, check dmldefs.h
-        job.dif_config.flags =
-            (static_cast<uint64_t>(static_cast<dif_flags_t>(dif_options)) << 16) | (static_cast<dif_flags_t>(dif_dst_options) << 8);
-
-        job.flags = static_cast<flags_t>(options);
-
-        auto status = dml_legacy_dif_insert(&job);
-
-        // Unsupported operation
-        auto final_status = (status == DML_STATUS_OK) ? execution_status::success : execution_status::dif_control_error;
-        if (dsc.completion_record_address())
-        {
-            auto record = reinterpret_cast<completion_record *>(dsc.completion_record_address());
-            auto result = views::dif_insert_result(*record);
-
-            result.bytes_completed() = job.offset;
-            // TODO: Should also write values for tags
-            write_status(final_status, result.status());
-        }
-
-        return final_status;
-    }
-
-    static inline execution_status evaluate(views::dif_strip_descriptor dsc) noexcept
-    {
-        const auto src              = reinterpret_cast<byte_t *>(dsc.source_address());
-        const auto dst              = reinterpret_cast<byte_t *>(dsc.destination_address());
-        const auto transfer_size    = dsc.transfer_size();
-        const auto options          = dif_strip_options(dsc.flags());
-        const auto dif_options      = dif_additional_options(dsc.dif_flags());
-        const auto dif_src_options  = dif_additional_src_options(dsc.source_dif_flags());
-        const auto src_app_tag_mask = dsc.source_app_tag_mask();
-
-        auto src_ref_tag = dsc.source_ref_tag();
-        auto src_app_tag = dsc.source_app_tag();
-
-        dml_job_t job;
-        memset(&job, 0, sizeof(dml_job_t));
-        job.source_first_ptr                       = src;
-        job.destination_first_ptr                  = dst;
-        job.source_length                          = transfer_size;
-        job.operation                              = DML_OP_DIF_STRIP;
-        job.dif_config.source_reference_tag_seed   = src_ref_tag;
-        job.dif_config.source_application_tag_seed = src_app_tag;
-        job.dif_config.source_application_tag_mask = src_app_tag_mask;
-        job.dif_config.block_size = static_cast<dml_dif_block_size_t>(static_cast<operation_specific_flags_t>(dif_options) & 0b11);
-
-        // Job API composes DIF flags into one 64-bit value via shifting, check dmldefs.h
-        job.dif_config.flags =
-            (static_cast<uint64_t>(static_cast<dif_flags_t>(dif_options)) << 16) | (static_cast<dif_flags_t>(dif_src_options));
-
-        job.flags = static_cast<flags_t>(options);
-
-        auto status = dml_legacy_dif_strip(&job);
-
-        // Unsupported operation
-        auto final_status = (status == DML_STATUS_OK) ? execution_status::success : execution_status::dif_control_error;
-        if (dsc.completion_record_address())
-        {
-            auto record = reinterpret_cast<completion_record *>(dsc.completion_record_address());
-            auto result = views::dif_strip_result(*record);
-
-            result.dif_status()      = job.result;
-            result.bytes_completed() = job.offset;
-            // TODO: Should also write values for tags
-            write_status(final_status, result.status());
-        }
-
-        return final_status;
-    }
-
-    static inline execution_status evaluate(views::dif_update_descriptor dsc) noexcept
-    {
-        const auto src              = reinterpret_cast<byte_t *>(dsc.source_address());
-        const auto dst              = reinterpret_cast<byte_t *>(dsc.destination_address());
-        const auto transfer_size    = dsc.transfer_size();
-        const auto options          = dif_update_options(dsc.flags());
-        const auto dif_options      = dif_additional_options(dsc.dif_flags());
-        const auto dif_src_options  = dif_additional_src_options(dsc.source_dif_flags());
-        const auto dif_dst_options  = dif_additional_src_options(dsc.destination_dif_flags());
-        const auto src_app_tag_mask = dsc.source_app_tag_mask();
-        const auto dst_app_tag_mask = dsc.destination_app_tag_mask();
-
-        auto src_ref_tag = dsc.source_ref_tag();
-        auto dst_ref_tag = dsc.destination_ref_tag();
-        auto src_app_tag = dsc.source_app_tag();
-        auto dst_app_tag = dsc.destination_app_tag();
-
-        dml_job_t job;
-        memset(&job, 0, sizeof(dml_job_t));
-        job.source_first_ptr                            = src;
-        job.destination_first_ptr                       = dst;
-        job.source_length                               = transfer_size;
-        job.operation                                   = DML_OP_DIF_UPDATE;
-        job.dif_config.source_reference_tag_seed        = src_ref_tag;
-        job.dif_config.source_application_tag_seed      = src_app_tag;
-        job.dif_config.source_application_tag_mask      = src_app_tag_mask;
-        job.dif_config.destination_reference_tag_seed   = dst_ref_tag;
-        job.dif_config.destination_application_tag_seed = dst_app_tag;
-        job.dif_config.destination_application_tag_mask = dst_app_tag_mask;
-        job.dif_config.block_size = static_cast<dml_dif_block_size_t>(static_cast<operation_specific_flags_t>(dif_options) & 0b11);
-
-        // Job API composes DIF flags into one 64-bit value via shifting, check dmldefs.h
-        job.dif_config.flags = (static_cast<uint64_t>(static_cast<dif_flags_t>(dif_options)) << 16) |
-                               (static_cast<dif_flags_t>(dif_dst_options) << 8) | (static_cast<dif_flags_t>(dif_src_options));
-
-        job.flags = static_cast<flags_t>(options);
-
-        auto status = dml_legacy_dif_update(&job);
-
-        // Unsupported operation
-        auto final_status = (status == DML_STATUS_OK) ? execution_status::success : execution_status::dif_control_error;
-        if (dsc.completion_record_address())
-        {
-            auto record = reinterpret_cast<completion_record *>(dsc.completion_record_address());
-            auto result = views::dif_update_result(*record);
-
-            result.dif_status()      = job.result;
-            result.bytes_completed() = job.offset;
-            // TODO: Should also write values for tags
-            write_status(final_status, result.status());
-        }
-
-        return final_status;
-    }
-
-    static inline execution_status evaluate(views::cache_flush_descriptor dsc) noexcept
-    {
-        const auto dst           = reinterpret_cast<byte_t *>(dsc.destination_address());
-        const auto transfer_size = dsc.transfer_size();
-        const auto options       = cache_flush_options(dsc.operation_specific_flags());
-
-        auto status = options.contains(cache_flush_option::cache_control) ? dmlc_copy_cache_to_memory_8u(dst, transfer_size)
-                                                                          : dmlc_move_cache_to_memory_8u(dst, transfer_size);
-
-        auto final_status = status == DML_STATUS_OK ? execution_status::success : execution_status::unexpected;
-        if (dsc.completion_record_address())
-        {
-            auto record = reinterpret_cast<completion_record *>(dsc.completion_record_address());
-            auto result = views::cache_flush_result(*record);
-
-            write_status(final_status, result.status());
-        }
-
-        return final_status;
-    }
-
-    static inline execution_status evaluate(views::batch_descriptor dsc) noexcept
-    {
-        const auto operations        = reinterpret_cast<descriptor *>(dsc.descriptor_list_address());
-        const auto descriptors_count = dsc.descriptors_count();
-
-        auto final_status = execution_status::success;
-        auto index        = transfer_size_t(0);
-        for (index = 0; index < descriptors_count; ++index)
-        {
-            auto &op_dsc = operations[index];
-
-            auto status = submit(op_dsc);
-
-            if (status != execution_status::success)
-            {
-                final_status = status;
-                break;
-            }
-        }
-
-        if (dsc.completion_record_address())
-        {
-            auto record = reinterpret_cast<completion_record *>(dsc.completion_record_address());
-            auto result = views::batch_result(*record);
-
-            result.descriptors_completed() = index;
-            write_status(final_status, result.status());
-        }
-
-        return final_status;
-    }
-
-}  // namespace dml::ml::core
diff --git a/sources/middle_layer/device.cpp b/sources/middle_layer/device.cpp
deleted file mode 100644
index 073e26a..0000000
--- a/sources/middle_layer/device.cpp
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright 2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-#include <dml/cpp/middle_layer/device.hpp>
-#include <hw_dispatcher.hpp>
-#include <numa.hpp>
-
-namespace dml::ml
-{
-#ifdef DML_HW
-    submission_status hardware::submit(descriptor &dsc, completion_record &record) noexcept
-    {
-        static auto                &dispatcher_instance = dispatcher::hw_dispatcher::get_instance();
-        static thread_local int32_t numa_id             = util::get_numa_id();
-
-        if (dispatcher_instance.is_hw_support())
-        {
-            const auto n_devices = std::distance(dispatcher_instance.begin(), dispatcher_instance.end());
-
-            // Initially set to "end" index
-            static auto last_device_idx = std::atomic(n_devices);
-
-            // Loop FROM the device after the one used for last submit
-            for (auto device_idx = last_device_idx.load() + 1; device_idx < n_devices; ++device_idx)
-            {
-                auto &device = *(dispatcher_instance.begin() + device_idx);
-                if (device.numa_id() != numa_id)
-                {
-                    continue;
-                }
-
-                auto view = views::any_descriptor(dsc);
-                view.flags() |=
-                    static_cast<flags_t>(flag::completion_record_address_valid) | static_cast<flags_t>(flag::request_completion_record);
-
-                // Use BlockOnFault on hardware, until page fault handling is implemented in software side
-                if (view.operation() != static_cast<operation_t>(operation::batch) &&
-                    view.operation() != static_cast<operation_t>(operation::drain) &&
-                    view.operation() != static_cast<operation_t>(operation::nop))
-                {
-                    view.flags() |= static_cast<flags_t>(flag::block_on_fault);
-                }
-
-                view.completion_record_address() = reinterpret_cast<address_t>(&record);
-                record.bytes[0]                  = 0;
-
-                auto status = device.enqueue_descriptor(reinterpret_cast<const dsahw_descriptor_t *>(&dsc));
-
-                if (status == DML_STATUS_OK)
-                {
-                    last_device_idx = device_idx;
-                    return submission_status::success;
-                }
-            }
-
-            // If the loop before didn't submit descriptor, then loop UNTIL the device that was used for last submit
-            for (auto device_idx = 0; device_idx <= last_device_idx; ++device_idx)
-            {
-                auto &device = *(dispatcher_instance.begin() + device_idx);
-                if (device.numa_id() != numa_id)
-                {
-                    continue;
-                }
-
-                auto view = views::any_descriptor(dsc);
-                view.flags() |=
-                    static_cast<flags_t>(flag::completion_record_address_valid) | static_cast<flags_t>(flag::request_completion_record);
-
-                // Use BlockOnFault on hardware, until page fault handling is implemented in software side
-                if (view.operation() != static_cast<operation_t>(operation::batch) &&
-                    view.operation() != static_cast<operation_t>(operation::drain) &&
-                    view.operation() != static_cast<operation_t>(operation::nop))
-                {
-                    view.flags() |= static_cast<flags_t>(flag::block_on_fault);
-                }
-
-                view.completion_record_address() = reinterpret_cast<address_t>(&record);
-                record.bytes[0]                  = 0;
-
-                auto status = device.enqueue_descriptor(reinterpret_cast<const dsahw_descriptor_t *>(&dsc));
-
-                if (status == DML_STATUS_OK)
-                {
-                    last_device_idx = device_idx;
-                    return submission_status::success;
-                }
-            }
-        }
-
-        return submission_status::failure;
-    }
-#endif
-}  // namespace dml::ml
diff --git a/sources/middle_layer/dispatcher/hw_device.cpp b/sources/middle_layer/dispatcher/hw_device.cpp
deleted file mode 100644
index 06acace..0000000
--- a/sources/middle_layer/dispatcher/hw_device.cpp
+++ /dev/null
@@ -1,215 +0,0 @@
-/*
- * Copyright 2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-#ifdef DML_HW
-
-#include <algorithm>
-
-#include "hw_device.hpp"
-#include "hardware_configuration_driver.h"
-#include "own_dsa_accel_constants.h"
-
-static inline bool own_search_device_name(const char *src_ptr,
-                                          const uint32_t name,
-                                          const uint32_t name_size) noexcept {
-    const uint8_t null_terminator = '\0';
-
-    for (size_t symbol_idx = 0u; null_terminator != src_ptr[symbol_idx + name_size]; symbol_idx++) {
-        const auto *candidate_ptr = reinterpret_cast<const uint32_t *>(src_ptr + symbol_idx);
-
-        // Convert the first 3 bytes to lower case and make the 4th 0xff
-        if (name == (*candidate_ptr | CHAR_MSK)) {
-            return true;
-        }
-    }
-
-    return false;
-}
-
-namespace dml::ml::dispatcher {
-
-void hw_device::fill_hw_context(dsahw_context_t *const hw_context_ptr) const noexcept {
-    // Restore device properties
-    hw_context_ptr->gen_cap.block_on_fault_support       = hw_device::block_on_fault_support();
-    hw_context_ptr->gen_cap.overlapping_copy_support     = hw_device::overlapping_copy_support();
-    hw_context_ptr->gen_cap.memory_cache_control_support = hw_device::memory_cache_control_support();
-    hw_context_ptr->gen_cap.flush_cache_control_support  = hw_device::flush_cache_control_support();
-    hw_context_ptr->gen_cap.interrupt_handle_request     = hw_device::interrupt_handle_request();
-    hw_context_ptr->gen_cap.destination_readback_support = hw_device::destination_readback_support();
-    hw_context_ptr->gen_cap.descriptor_readback_support  = hw_device::descriptor_readback_support();
-    hw_context_ptr->gen_cap.max_transfer_size            = hw_device::max_transfer_size();
-    hw_context_ptr->gen_cap.max_batch_size               = hw_device::max_batch_size();
-    hw_context_ptr->gen_cap.message_size                 = hw_device::message_size();
-    hw_context_ptr->gen_cap.configuration_support        = hw_device::configuration_support();
-    hw_context_ptr->gen_cap.max_descriptors              = hw_device::max_descriptors();
-}
-
-auto hw_device::enqueue_descriptor(const dsahw_descriptor_t *desc_ptr) const noexcept -> dsahw_status_t {
-    const auto n_queues = std::distance(this->begin(), this->end());
-
-    // Initially set to "end" index
-    static auto last_wq_idx = std::atomic(n_queues);
-
-    // Loop FROM the queue after the one used for last submit
-    for (auto idx = last_wq_idx.load() + 1; idx < n_queues; ++idx)
-    {
-        auto &queue = *(this->begin() + idx);
-        auto status = queue.enqueue_descriptor(desc_ptr);
-
-        if (DML_STATUS_OK == status) {
-            last_wq_idx = idx;
-            return DML_STATUS_OK;
-        }
-    }
-
-    // If the loop before didn't submit descriptor, then loop UNTIL the queue that was used for last submit
-    for (auto idx = 0; idx <= last_wq_idx; ++idx)
-    {
-        auto &queue = *(this->begin() + idx);
-        auto status = queue.enqueue_descriptor(desc_ptr);
-
-        if (DML_STATUS_OK == status) {
-            last_wq_idx = idx;
-            return DML_STATUS_OK;
-        }
-    }
-
-    return DML_STATUS_INSTANCE_NOT_FOUND;
-}
-
-auto hw_device::block_on_fault_support() const noexcept -> uint8_t {
-    return GC_BLOCK_ON_FAULT_SUP(gen_cap_register_);
-}
-
-auto hw_device::overlapping_copy_support() const noexcept -> uint8_t {
-    return GC_OVERLAPPING_COPY_SUPPORT(gen_cap_register_);
-}
-
-auto hw_device::memory_cache_control_support() const noexcept -> uint8_t {
-    return GC_M_CACHE_CONTROL_SUPPORT(gen_cap_register_);
-}
-
-auto hw_device::flush_cache_control_support() const noexcept -> uint8_t {
-    return GC_F_CACHE_CONTROL_SUPPORT(gen_cap_register_);
-}
-
-auto hw_device::interrupt_handle_request() const noexcept -> uint8_t {
-    return GC_INTERRUPT_HANDLE_REQUEST(gen_cap_register_);
-}
-
-auto hw_device::destination_readback_support() const noexcept -> uint8_t {
-    return GC_DESTINATION_READBACK_SUPPORT(gen_cap_register_);
-}
-
-auto hw_device::descriptor_readback_support() const noexcept -> uint8_t {
-    return GC_DESCRIPTOR_READBACK_SUPPORT(gen_cap_register_);
-}
-
-auto hw_device::max_transfer_size() const noexcept -> uint32_t {
-    return GC_MAX_TRANSFER_SIZE(gen_cap_register_);
-}
-
-auto hw_device::max_batch_size() const noexcept -> uint32_t {
-    return GC_MAX_BATCH_SIZE(gen_cap_register_);
-}
-
-auto hw_device::message_size() const noexcept -> uint16_t {
-    return GC_MESSAGE_SIZE(gen_cap_register_);
-}
-
-auto hw_device::configuration_support() const noexcept -> uint8_t {
-    return GC_CONFIGURATION_SUPPORT(gen_cap_register_);
-}
-
-auto hw_device::max_descriptors() const noexcept -> uint8_t {
-    return GC_MAX_DESCRIPTORS(gen_cap_register_);
-}
-
-auto hw_device::initialize_new_device(descriptor_t *device_descriptor_ptr) noexcept -> dsahw_status_t {
-#if defined(linux)
-    // Device initialization stage
-    auto       *device_ptr   = reinterpret_cast<accfg_device *>(device_descriptor_ptr);
-    const auto *name_ptr     = dsa_device_get_name(device_ptr);
-    const bool is_dsa_device = own_search_device_name(name_ptr, DSA_DEVICE_ID, DEVICE_NAME_LENGTH);
-
-    if (!is_dsa_device || ACCFG_DEVICE_DISABLED == dsa_device_get_state(device_ptr)) {
-        return DML_STATUS_INSTANCE_NOT_FOUND;
-    }
-
-    gen_cap_register_ = dsa_device_get_gen_cap_register(device_ptr);
-    version_          = dsa_device_get_major_version(device_ptr);
-    numa_node_id_     = dsa_device_get_numa_node(device_ptr);
-
-    dsa_group_get_first(device_ptr);
-
-    // Working queues initialization stage
-    auto *wq_ptr = dsa_get_first_work_queue(device_ptr);
-    auto wq_it   = working_queues_.begin();
-
-    while (nullptr != wq_ptr) {
-        if (DML_STATUS_OK == wq_it->initialize_new_queue(wq_ptr, version_)) {
-            wq_it++;
-
-            std::push_heap(working_queues_.begin(), wq_it,
-                           [](const hw_queue &a, const hw_queue &b) -> bool {
-                               return a.priority() < b.priority();
-                           });
-        }
-
-        wq_ptr = dsa_work_queue_get_next(wq_ptr);
-    }
-
-    // Check number of working queues
-    queue_count_ = std::distance(working_queues_.begin(), wq_it);
-
-    if (queue_count_ > 1) {
-        auto begin = working_queues_.begin();
-        auto end   = begin + queue_count_;
-
-        std::sort_heap(begin, end, [](const hw_queue &a, const hw_queue &b) -> bool {
-            return a.priority() < b.priority();
-        });
-    }
-
-    if (queue_count_ == 0) {
-        return DML_STATUS_WORK_QUEUES_NOT_AVAILABLE;
-    }
-
-    return DML_STATUS_OK;
-#else
-    return DML_STATUS_INSTANCE_NOT_FOUND;
-#endif
-}
-
-auto hw_device::size() const noexcept -> size_t {
-    return queue_count_;
-}
-
-auto hw_device::numa_id() const noexcept -> uint64_t {
-    return numa_node_id_;
-}
-
-auto hw_device::begin() const noexcept -> queues_container_t::const_iterator {
-    return working_queues_.cbegin();
-}
-
-auto hw_device::end() const noexcept -> queues_container_t::const_iterator {
-    return working_queues_.cbegin() + queue_count_;
-}
-
-}
-
-#endif
diff --git a/sources/middle_layer/dispatcher/hw_device.hpp b/sources/middle_layer/dispatcher/hw_device.hpp
deleted file mode 100644
index b364825..0000000
--- a/sources/middle_layer/dispatcher/hw_device.hpp
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright 2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-#ifndef DML_MIDDLE_LAYER_DISPATCHER_HW_DEVICE_HPP_
-#define DML_MIDDLE_LAYER_DISPATCHER_HW_DEVICE_HPP_
-
-#include <array>
-
-#include "dml/dmldefs.h"
-#include "hw_queue.hpp"
-
-#ifdef DML_HW
-#include "hardware_definitions.h"
-#include "own_dsa_accel_constants.h"
-
-namespace dml::ml::dispatcher {
-
-class hw_device final {
-
-    static constexpr uint32_t max_working_queues = MAX_WORK_QUEUE_COUNT;
-
-    using queues_container_t = std::array<hw_queue, max_working_queues>;
-
-public:
-    using descriptor_t = void;
-
-    hw_device() noexcept = default;
-
-    void fill_hw_context(dsahw_context_t *hw_context_ptr) const noexcept;
-
-    [[nodiscard]] auto enqueue_descriptor(const dsahw_descriptor_t *desc_ptr) const noexcept -> dsahw_status_t;
-
-    [[nodiscard]] auto initialize_new_device(descriptor_t *device_descriptor_ptr) noexcept -> dsahw_status_t;
-
-    [[nodiscard]] auto size() const noexcept -> size_t;
-
-    [[nodiscard]] auto numa_id() const noexcept -> uint64_t;
-
-    [[nodiscard]] auto begin() const noexcept -> queues_container_t::const_iterator;
-
-    [[nodiscard]] auto end() const noexcept -> queues_container_t::const_iterator;
-
-protected:
-    auto block_on_fault_support() const noexcept -> uint8_t;
-
-    auto overlapping_copy_support() const noexcept -> uint8_t;
-
-    auto memory_cache_control_support() const noexcept -> uint8_t;
-
-    auto flush_cache_control_support() const noexcept -> uint8_t;
-
-    auto interrupt_handle_request() const noexcept -> uint8_t;
-
-    auto destination_readback_support() const noexcept -> uint8_t;
-
-    auto descriptor_readback_support() const noexcept -> uint8_t;
-
-    auto max_transfer_size() const noexcept -> uint32_t;
-
-    auto max_batch_size() const noexcept -> uint32_t;
-
-    auto message_size() const noexcept -> uint16_t;
-
-    auto configuration_support() const noexcept -> uint8_t;
-
-    auto max_descriptors() const noexcept -> uint8_t;
-
-private:
-    queues_container_t working_queues_   = {};    /**< Set of available HW working queues */
-    uint32_t           queue_count_      = 0u;    /**< Number of working queues that are available */
-    uint64_t           gen_cap_register_ = 0u;    /**< GENCAP register content */
-    uint64_t           numa_node_id_     = 0u;    /**< NUMA node id of the device */
-    uint32_t           version_          = 0u;    /**< Version of discovered device */
-};
-
-}
-
-#endif
-#endif //DML_MIDDLE_LAYER_DISPATCHER_HW_DEVICE_HPP_
diff --git a/sources/middle_layer/dispatcher/hw_dispatcher.cpp b/sources/middle_layer/dispatcher/hw_dispatcher.cpp
deleted file mode 100644
index b4d7a12..0000000
--- a/sources/middle_layer/dispatcher/hw_dispatcher.cpp
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Copyright 2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-#include "hw_dispatcher.hpp"
-
-#ifdef LOG_HW_INIT
-
-#include <iostream>
-
-#endif
-
-#if defined(DML_HW) && defined(linux)
-
-#include "libaccel_config.h"
-
-#endif
-
-// TODO should be removed at all
-#define DML_HWSTS_RET(expr, err_code) { if( expr ) { return( err_code ); }}
-
-namespace dml::ml::dispatcher {
-
-hw_dispatcher::hw_dispatcher() noexcept {
-#ifdef DML_HW
-    hw_init_status_ = hw_dispatcher::initialize_hw();
-    hw_support_     = hw_init_status_ == DML_STATUS_OK;
-#else
-    hw_support_ = false;
-#endif
-}
-
-#ifdef DML_HW
-
-auto hw_dispatcher::initialize_hw() noexcept -> dsahw_status_t {
-
-    accfg_ctx *ctx_ptr = nullptr;
-
-    dsahw_status_t status = dsa_initialize_accelerator_driver(&hw_driver_);
-    DML_HWSTS_RET(status != DML_STATUS_OK, status);
-
-    int32_t context_creation_status = dsa_driver_new_context(&ctx_ptr);
-    DML_HWSTS_RET(0u != context_creation_status, DML_STATUS_HARDWARE_CONNECTION_ERROR);
-
-    // Retrieve first device in the system given the passed in context
-    auto *dev_tmp_ptr = dsa_context_get_first_device(ctx_ptr);
-    auto device_it    = devices_.begin();
-
-    while (nullptr != dev_tmp_ptr) {
-        if (DML_STATUS_OK == device_it->initialize_new_device(dev_tmp_ptr)) {
-            device_it++;
-        }
-
-        // Retrieve the "next" device in the system based on given device
-        dev_tmp_ptr = dsa_device_get_next(dev_tmp_ptr);
-    }
-
-    device_count_ = std::distance(devices_.begin(), device_it);
-
-    if (device_count_ <= 0) {
-        return DML_STATUS_HARDWARE_CONNECTION_ERROR;
-    }
-
-#ifdef LOG_HW_INIT
-    std::cout << "--------------------------------\n";
-    std::cout << "Number of discovered devices: " << device_count_ << "\n";
-    std::cout << "--------------------------------\n";
-
-    for (size_t i = 0; i < device_count_; i++) {
-        std::cout << "Device #" << i << " : " << devices_[i].size() << " work queues\n";
-    }
-
-    std::cout << "--------------------------------\n" << std::endl;
-#endif
-
-    hw_context_.set_driver_context_ptr(ctx_ptr);
-
-    return DML_STATUS_OK;
-}
-#endif
-
-hw_dispatcher::~hw_dispatcher() noexcept {
-#ifdef DML_HW
-    // Variables
-    auto *context_ptr = hw_context_.get_driver_context_ptr();
-
-    if (context_ptr != nullptr) {
-        dsa_context_close(context_ptr);
-    }
-
-    dsa_finalize_accelerator_driver(&hw_driver_);
-
-    // Zeroing values
-    hw_context_.set_driver_context_ptr(nullptr);
-#endif
-}
-
-auto hw_dispatcher::get_instance() noexcept -> hw_dispatcher & {
-    static hw_dispatcher instance{};
-
-    return instance;
-}
-
-auto hw_dispatcher::is_hw_support() const noexcept -> bool {
-    return hw_support_;
-}
-
-#ifdef DML_HW
-
-void hw_dispatcher::fill_hw_context(dsahw_context_t *const hw_context_ptr) noexcept {
-
-#if defined(linux)
-    // Restore context
-    hw_context_ptr->dsa_context_ptr = hw_context_.get_driver_context_ptr();
-
-    // Restore device properties
-    // We take the first one as all configurations across the platform should be the same for all devices
-    devices_[0].fill_hw_context(hw_context_ptr);
-#endif
-}
-
-auto hw_dispatcher::get_hw_init_status() const noexcept -> dsahw_status_t {
-    return hw_init_status_;
-}
-
-#ifdef DML_HW
-
-auto hw_dispatcher::begin() const noexcept -> device_container_t::const_iterator {
-    return devices_.cbegin();
-}
-
-auto hw_dispatcher::end() const noexcept -> device_container_t::const_iterator {
-    return devices_.cbegin() + device_count_;
-}
-
-void hw_dispatcher::hw_context::set_driver_context_ptr(accfg_ctx *driver_context_ptr) noexcept {
-    driver_context_ptr_ = driver_context_ptr;
-}
-
-[[nodiscard]] auto hw_dispatcher::hw_context::get_driver_context_ptr() noexcept -> accfg_ctx * {
-    return driver_context_ptr_;
-}
-
-#endif
-
-#endif
-}
diff --git a/sources/middle_layer/dispatcher/hw_dispatcher.hpp b/sources/middle_layer/dispatcher/hw_dispatcher.hpp
deleted file mode 100644
index b41b886..0000000
--- a/sources/middle_layer/dispatcher/hw_dispatcher.hpp
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- *
- * Copyright 2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-#ifndef DML_MIDDLE_LAYER_DISPATCHER_HW_DISPATCHER_HPP_
-#define DML_MIDDLE_LAYER_DISPATCHER_HW_DISPATCHER_HPP_
-
-#include <array>
-#include <cstdint>
-
-#include "hw_device.hpp"
-#include "dml/dmldefs.h"
-
-#ifdef DML_HW
-#include "hardware_definitions.h"
-#include "hardware_configuration_driver.h"
-#include "own_dsa_accel_constants.h"
-#endif
-
-namespace dml::ml::dispatcher {
-
-class hw_dispatcher final {
-
-#ifdef DML_HW
-
-    static constexpr uint32_t max_devices = MAX_DEVICE_COUNT;
-
-    using device_container_t = std::array<hw_device, max_devices>;
-
-    class hw_context final {
-    public:
-        void set_driver_context_ptr(accfg_ctx *driver_context_ptr) noexcept;
-
-        [[nodiscard]] auto get_driver_context_ptr() noexcept -> accfg_ctx *;
-
-    private:
-        accfg_ctx *driver_context_ptr_ = nullptr; /**< DSA driver context */
-    };
-
-#endif
-
-public:
-
-    static auto get_instance() noexcept -> hw_dispatcher &;
-
-    [[nodiscard]] auto is_hw_support() const noexcept -> bool;
-
-#ifdef DML_HW
-
-    [[nodiscard]] auto get_hw_init_status() const noexcept -> dsahw_status_t;
-
-    void fill_hw_context(dsahw_context_t *hw_context_ptr) noexcept;
-
-    [[nodiscard]] auto begin() const noexcept -> device_container_t::const_iterator;
-
-    [[nodiscard]] auto end() const noexcept -> device_container_t::const_iterator;
-
-#endif
-
-    virtual ~hw_dispatcher() noexcept;
-
-protected:
-    hw_dispatcher() noexcept;
-
-#ifdef DML_HW
-    auto initialize_hw() noexcept -> dsahw_status_t;
-
-private:
-    hw_context         hw_context_;
-    hw_driver_t        hw_driver_{};
-    device_container_t devices_{};
-    size_t             device_count_      = 0;
-#endif
-
-    bool hw_support_;
-#ifdef DML_HW
-    dsahw_status_t hw_init_status_;
-#endif
-};
-
-}
-#endif //DML_MIDDLE_LAYER_DISPATCHER_HW_DISPATCHER_HPP_
diff --git a/sources/middle_layer/dispatcher/hw_queue.cpp b/sources/middle_layer/dispatcher/hw_queue.cpp
deleted file mode 100644
index 1a69cc7..0000000
--- a/sources/middle_layer/dispatcher/hw_queue.cpp
+++ /dev/null
@@ -1,200 +0,0 @@
-/*
- * Copyright 2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-#ifdef DML_HW
-
-#include <fcntl.h>
-
-#if defined( linux )
-
-#include <sys/mman.h>
-
-#endif
-
-#include "hw_queue.hpp"
-#include "hardware_configuration_driver.h"
-#include "own_dsa_accel_constants.h"
-
-#define DML_HWSTS_RET(expr, err_code) { if( expr ) { return( err_code ); }}
-#define DEC_BASE 10u         /**< @todo */
-#define DEC_CHAR_BASE ('0')  /**< @todo */
-#define DEC_MAX_INT_BUF 16u  /**< @todo */
-
-namespace dml::ml::dispatcher {
-
-static inline uint32_t own_int_to_str(char *path_ptr, uint32_t i, uint32_t max_path, uint32_t number) noexcept {
-    uint8_t  ch[DEC_MAX_INT_BUF];
-    uint32_t j = 0u;
-
-    // At least once - in case of 0
-    do {
-        // Convert digit to char
-        ch[j++] = DEC_CHAR_BASE + number % DEC_BASE;
-        // Next digit
-        number /= DEC_BASE;
-    } while (0u < number);
-    // Return max+1 if "path" buffer overflow
-    DML_HWSTS_RET((max_path < i + j), max_path + 1u);
-    do {
-        // Copy to "path" in correct order
-        path_ptr[i++] = ch[--j];
-    } while (0u < j);
-    // Success - return the next "free" char index in the "path"
-    return i;
-}
-
-static inline dsahw_status_t own_specify_path(char *path_ptr,
-                                              uint32_t max_path,
-                                              uint32_t major,
-                                              uint32_t minor) noexcept {
-    uint32_t i = 0u;
-
-    // Strlen analogue
-    while (('\0' != path_ptr[i]) && (i < max_path)) {
-        i++;
-    }
-    // i has index of terminating 0
-    // Check for buffer overflow
-    DML_HWSTS_RET((max_path < i), DML_STATUS_VERSION_DETECTION_ERROR);
-    // Need the next format: "/dev/char/major:minor"
-    i = own_int_to_str(path_ptr, i, max_path, major);
-    // Check for buffer overflow
-    DML_HWSTS_RET((max_path < i + 1u), DML_STATUS_VERSION_DETECTION_ERROR);
-    path_ptr[i++] = ':';
-    i = own_int_to_str(path_ptr, i, max_path, minor);
-    // Check for buffer overflow
-    DML_HWSTS_RET((max_path < i), DML_STATUS_VERSION_DETECTION_ERROR);
-    path_ptr[i] = '\0';
-    return DML_STATUS_OK;
-}
-
-hw_queue::hw_queue(hw_queue &&other) noexcept {
-    version_       = other.version_;
-    priority_      = other.priority_;
-    portal_mask_   = other.portal_mask_;
-    portal_ptr_    = other.portal_ptr_;
-    portal_offset_ = 0;
-
-    other.portal_ptr_ = nullptr;
-}
-
-auto hw_queue::operator=(hw_queue &&other) noexcept -> hw_queue & {
-    version_       = other.version_;
-    priority_      = other.priority_;
-    portal_mask_   = other.portal_mask_;
-    portal_ptr_    = other.portal_ptr_;
-    portal_offset_ = 0;
-
-    other.portal_ptr_ = nullptr;
-
-    return *this;
-}
-
-hw_queue::~hw_queue() {
-#if defined( linux )
-    // Freeing resources
-    if (portal_ptr_ != nullptr) {
-        munmap(portal_ptr_, 0x1000u);
-
-        portal_ptr_ = nullptr;
-    }
-#endif
-}
-
-void hw_queue::set_portal_ptr(void *value_ptr) noexcept {
-    portal_offset_ = reinterpret_cast<uintptr_t>(value_ptr) & OWN_PAGE_MASK;
-    portal_mask_   = reinterpret_cast<uintptr_t>(value_ptr) & (~OWN_PAGE_MASK);
-    portal_ptr_    = value_ptr;
-}
-
-auto hw_queue::get_portal_ptr() const noexcept -> void * {
-    uint64_t offset = portal_offset_++;
-    offset = (offset << 6) & OWN_PAGE_MASK;
-    return reinterpret_cast<void *>(offset | portal_mask_);
-}
-
-auto hw_queue::enqueue_descriptor(const dsahw_descriptor_t *desc_ptr) const noexcept -> dsahw_status_t {
-#if defined( linux )
-    uint8_t retry = 0u;
-
-    void *current_place_ptr = get_portal_ptr();
-    asm volatile("sfence\t\n"
-                 ".byte 0xf2, 0x0f, 0x38, 0xf8, 0x02\t\n"
-                 "setz %0\t\n"
-    : "=r"(retry) : "a" (current_place_ptr), "d" (desc_ptr));
-
-    return static_cast<dsahw_status_t>(retry);
-#else
-    return DML_STATUS_INSTANCE_NOT_FOUND;
-#endif
-}
-
-auto hw_queue::initialize_new_queue(void *wq_descriptor_ptr, uint32_t major_version) noexcept -> dsahw_status_t {
-#if defined( linux )
-    auto *work_queue_ptr = reinterpret_cast<accfg_wq *>(wq_descriptor_ptr);
-    char path[64]        = "/dev/char/";
-
-    if (ACCFG_WQ_ENABLED != dsa_work_queue_get_state(work_queue_ptr) ||
-        ACCFG_WQ_SHARED != dsa_work_queue_get_mode(work_queue_ptr)) {
-        return DML_STATUS_WORK_QUEUES_NOT_AVAILABLE;
-    }
-
-    auto *group_ptr = dsa_work_queue_get_group(work_queue_ptr);
-    if (group_ptr == nullptr) {
-        return DML_STATUS_INTERNAL_ERROR;
-    }
-
-    version_     = dsa_work_queue_get_minor_version(work_queue_ptr);
-    priority_    = dsa_work_queue_get_priority(work_queue_ptr);
-    memory_type_ = dsa_group_get_traffic_class_b(group_ptr) ? supported_memory_type::durable
-                                                            : supported_memory_type::non_durable;
-
-    // Need the next format: "/dev/char/major:minor"
-#if defined(LIB_ACCEL_VERSION_3_2)
-    auto status = dsa_work_queue_get_device_path(work_queue_ptr, path, 64 - 1);
-#else
-    auto status = own_specify_path(path, sizeof(path) - 1u, major_version, version_);
-#endif
-    DML_HWSTS_RET((0 > status), DML_STATUS_INCORRECT_WORK_QUEUE_ID);
-
-    auto fd = open(path, O_RDWR);
-    DML_HWSTS_RET((0 > fd), DML_STATUS_WORK_QUEUE_CONNECTION_ERROR);
-
-    // Map portal for enqcmd
-    auto *region_ptr = mmap(nullptr, 0x1000u, PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, 0u);
-    close(fd);
-
-    DML_HWSTS_RET((MAP_FAILED == region_ptr), DML_STATUS_PORTAL_CREATION_ERROR);
-
-    hw_queue::set_portal_ptr(region_ptr);
-
-    return DML_STATUS_OK;
-#else
-    return DML_STATUS_WORK_QUEUE_CONNECTION_ERROR;
-#endif
-}
-
-auto hw_queue::priority() const noexcept -> int32_t {
-    return priority_;
-}
-
-auto hw_queue::memory_type() const noexcept -> hw_queue::supported_memory_type {
-    return memory_type_;
-}
-
-}
-
-#endif
diff --git a/sources/middle_layer/dispatcher/hw_queue.hpp b/sources/middle_layer/dispatcher/hw_queue.hpp
deleted file mode 100644
index 368b932..0000000
--- a/sources/middle_layer/dispatcher/hw_queue.hpp
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright 2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-#ifndef DML_MIDDLE_LAYER_DISPATCHER_HW_QUEUE_HPP_
-#define DML_MIDDLE_LAYER_DISPATCHER_HW_QUEUE_HPP_
-
-#include <atomic>
-
-#include "dml/dmldefs.h"
-
-#ifdef DML_HW
-
-#include "hardware_definitions.h"
-
-namespace dml::ml::dispatcher {
-
-class hw_queue {
-public:
-    enum class supported_memory_type {
-        durable,
-        non_durable
-    };
-
-    using descriptor_t = void;
-
-    hw_queue() noexcept = default;
-
-    hw_queue(const hw_queue &) noexcept = delete;
-
-    auto operator=(const hw_queue &other) noexcept -> hw_queue & = delete;
-
-    hw_queue(hw_queue &&other) noexcept;
-
-    auto operator=(hw_queue &&other) noexcept -> hw_queue &;
-
-    auto initialize_new_queue(descriptor_t *wq_descriptor_ptr, uint32_t major_version) noexcept -> dsahw_status_t;
-
-    [[nodiscard]] auto get_portal_ptr() const noexcept -> void *;
-
-    [[nodiscard]] auto enqueue_descriptor(const dsahw_descriptor_t *desc_ptr) const noexcept -> dsahw_status_t;
-
-    [[nodiscard]] auto priority() const noexcept -> int32_t;
-
-    [[nodiscard]] auto memory_type() const noexcept -> supported_memory_type;
-
-    void set_portal_ptr(void *portal_ptr) noexcept;
-
-    virtual ~hw_queue() noexcept;
-
-private:
-    uint32_t                       version_       = 0u;
-    int32_t                        priority_      = 0u;
-    supported_memory_type          memory_type_   = supported_memory_type::non_durable;
-    uint64_t                       portal_mask_   = 0u;      /**< Mask for incrementing portals */
-    mutable void                   *portal_ptr_   = nullptr;
-    mutable std::atomic<uintptr_t> portal_offset_ = 0u;      /**< Portal for enqcmd (mod page size)*/
-};
-
-}
-#endif
-
-#endif //DML_MIDDLE_LAYER_DISPATCHER_HW_QUEUE_HPP_
diff --git a/sources/middle_layer/dispatcher/numa.cpp b/sources/middle_layer/dispatcher/numa.cpp
deleted file mode 100644
index dbe13de..0000000
--- a/sources/middle_layer/dispatcher/numa.cpp
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright 2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-#include <array>
-#include <fstream>
-#include <unordered_map>
-
-#if defined(linux)
-    #include <x86intrin.h>
-#endif
-
-#include "numa.hpp"
-
-namespace dml::ml::util {
-
-static constexpr auto get_nodes_configuration() -> std::array<std::pair<const char *, int32_t>, 10> {
-    return {
-            {
-                    {R"(/sys/bus/node/devices/node0/cpulist)", 0},
-                    {R"(/sys/bus/node/devices/node1/cpulist)", 1},
-                    {R"(/sys/bus/node/devices/node2/cpulist)", 2},
-                    {R"(/sys/bus/node/devices/node3/cpulist)", 3},
-                    {R"(/sys/bus/node/devices/node4/cpulist)", 4},
-                    {R"(/sys/bus/node/devices/node5/cpulist)", 5},
-                    {R"(/sys/bus/node/devices/node6/cpulist)", 6},
-                    {R"(/sys/bus/node/devices/node7/cpulist)", 7},
-                    {R"(/sys/bus/node/devices/node8/cpulist)", 8},
-                    {R"(/sys/bus/node/devices/node9/cpulist)", 9}
-            }
-    };
-}
-
-static inline void get_region(std::ifstream &stream,
-                              uint32_t &begin,
-                              uint32_t &end,
-                              bool read_end_splitter = false) {
-    char splitter = '0';
-
-    stream >> begin;
-    stream >> splitter;
-    stream >> end;
-
-    if (read_end_splitter) {
-        stream >> splitter;
-    }
-}
-
-static inline void update_regions(std::unordered_map<uint32_t, int32_t> &regions,
-                                  uint32_t begin,
-                                  uint32_t end,
-                                  int32_t value) {
-    for (uint32_t i = begin; i <= end; i++) {
-        regions[i] = value;
-    }
-}
-
-class numa_configuration {
-public:
-    numa_configuration(const numa_configuration &other) = delete;
-
-    auto operator=(const numa_configuration &other) -> numa_configuration & = delete;
-
-    numa_configuration(numa_configuration &&other) = delete;
-
-    auto operator=(numa_configuration &&other) -> numa_configuration & = delete;
-
-    static auto get_instance() noexcept -> numa_configuration & {
-        static numa_configuration inst{};
-
-        return inst;
-    }
-
-    auto operator[](uint32_t cpu_id) noexcept -> int32_t {
-        if (mapping_.find(cpu_id) != mapping_.end()) {
-            return mapping_[cpu_id];
-        } else {
-            return 0;
-        }
-    }
-
-private:
-    numa_configuration() noexcept {
-        constexpr auto node_lists = get_nodes_configuration();
-
-        for (auto path : node_lists) {
-            std::ifstream file(path.first);
-
-            if (!file.is_open()) {
-                continue;
-            }
-
-            uint32_t begin = 0;
-            uint32_t end   = 0;
-
-            get_region(file, begin, end, true);
-            update_regions(mapping_, begin, end, path.second);
-
-            get_region(file, begin, end);
-            update_regions(mapping_, begin, end, path.second);
-        }
-    }
-
-    std::unordered_map<uint32_t, int32_t> mapping_;
-};
-
-auto get_cpu_id() -> uint32_t {
-    uint32_t cpu_id = -1;
-
-#if defined(linux)
-    __rdtscp(&cpu_id);
-#endif
-
-    return cpu_id;
-}
-
-int32_t get_numa_id() noexcept {
-#if defined(linux)
-    static auto &numa_config = numa_configuration::get_instance();
-
-    static thread_local auto cpu_id  = get_cpu_id();
-    static thread_local auto numa_id = numa_config[cpu_id];
-
-    return numa_id;
-#else
-    // Not supported in Windows yet
-    return -1;
-#endif
-}
-
-}
diff --git a/sources/middle_layer/hw_configuration_driver.c b/sources/middle_layer/hw_configuration_driver.c
deleted file mode 100644
index 5bacb32..0000000
--- a/sources/middle_layer/hw_configuration_driver.c
+++ /dev/null
@@ -1,372 +0,0 @@
-/*
- * Copyright 2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-
-#include "hardware_configuration_driver.h"
-
-#include <fcntl.h>
-
-#if defined( linux )
-
-#include <sys/mman.h>
-#include <dlfcn.h>
-
-const static char *accelerator_configuration_driver_name = "/usr/lib64/libaccel-config.so";
-
-typedef int                     (*accfg_new_ptr)(struct accfg_ctx **ctx);
-
-typedef struct accfg_device *(*accfg_device_get_first_ptr)(struct accfg_ctx *ctx);
-
-typedef const char *(*accfg_device_get_devname_ptr)(struct accfg_device *device);
-
-typedef struct accfg_device *(*accfg_device_get_next_ptr)(struct accfg_device *device);
-
-typedef struct accfg_wq *(*accfg_wq_get_first_ptr)(struct accfg_device *device);
-
-typedef struct accfg_wq *(*accfg_wq_get_next_ptr)(struct accfg_wq *wq);
-
-typedef enum accfg_wq_state     (*accfg_wq_get_state_ptr)(struct accfg_wq *wq);
-
-typedef unsigned int            (*accfg_device_get_cdev_major_ptr)(struct accfg_device *device);
-
-typedef int                     (*accfg_wq_get_cdev_minor_ptr)(struct accfg_wq *wq);
-
-typedef enum accfg_device_state (*accfg_device_get_state_ptr)(struct accfg_device *device);
-
-typedef struct accfg_ctx *(*accfg_unref_ptr)(struct accfg_ctx *ctx);
-
-typedef enum accfg_wq_mode      (*accfg_wq_get_mode_ptr)(struct accfg_wq *wq);
-
-typedef unsigned long           (*accfg_device_get_gen_cap_ptr)(struct accfg_device *device);
-
-typedef int (*accfg_group_get_traffic_class_ptr)(struct accfg_group *group);
-
-typedef struct accfg_group *(*accfg_group_get_first_ptr)(struct accfg_device *device);
-
-typedef struct accfg_group *(*accfg_group_get_next_ptr)(struct accfg_group *group);
-
-typedef struct accfg_group *(*accfg_wq_get_group_ptr)(struct accfg_wq *wq);
-
-typedef int (*accfg_wq_get_group_id_ptr)(struct accfg_wq *wq);
-
-typedef int (*accfg_group_get_id_ptr)(struct accfg_group *group);
-
-typedef int (*accfg_wq_get_user_dev_path_ptr)(struct accfg_wq *wq, char *buf, size_t size);
-
-/**
- * @brief Table with functions required from accelerator configuration library
- */
-static dsa_desc_t functions_table[] = {
-        {NULL, "accfg_new"},
-        {NULL, "accfg_device_get_first"},
-        {NULL, "accfg_device_get_devname"},
-        {NULL, "accfg_device_get_next"},
-        {NULL, "accfg_wq_get_first"},
-        {NULL, "accfg_wq_get_next"},
-        {NULL, "accfg_wq_get_state"},
-        {NULL, "accfg_wq_get_mode"},
-        {NULL, "accfg_device_get_cdev_major"},
-        {NULL, "accfg_wq_get_cdev_minor"},
-        {NULL, "accfg_device_get_state"},
-        {NULL, "accfg_unref"},
-        {NULL, "accfg_device_get_gen_cap"},
-        {NULL, "accfg_device_get_numa_node"},
-        {NULL, "accfg_wq_get_priority"},
-        {NULL, "accfg_group_get_first"},
-        {NULL, "accfg_group_get_next"},
-        {NULL, "accfg_group_get_traffic_class_a"},
-        {NULL, "accfg_group_get_traffic_class_b"},
-        {NULL, "accfg_wq_get_group"},
-        {NULL, "accfg_wq_get_group_id"},
-        {NULL, "accfg_group_get_id"},
-#if defined(LIB_ACCEL_VERSION_3_2)
-        {NULL, "accfg_wq_get_user_dev_path"},
-#endif
-        // Terminate list/init
-        {NULL, NULL}
-};
-
-static inline dsahw_status_t own_load_accelerator_configuration_driver(void **driver_instance_pptr);
-
-static inline bool own_load_configuration_functions(void *driver_instance_ptr);
-
-#endif
-
-dsahw_status_t DML_HW_API(initialize_accelerator_driver)(hw_driver_t *driver_ptr) {
-#if defined( linux )
-    // Variables
-    driver_ptr->driver_instance_ptr = NULL;
-
-    // Load DLL
-    dsahw_status_t status = own_load_accelerator_configuration_driver(&driver_ptr->driver_instance_ptr);
-
-    // If DLL is loaded successfully
-    if (DML_STATUS_OK != status ||
-        !driver_ptr->driver_instance_ptr ||
-        !own_load_configuration_functions(driver_ptr->driver_instance_ptr)) {
-
-        // Free DLL
-        if (driver_ptr->driver_instance_ptr) {
-            dlclose(driver_ptr->driver_instance_ptr);
-        }
-
-        driver_ptr->driver_instance_ptr = NULL;
-    }
-
-    return status;
-#else
-    return DML_STATUS_DRIVER_NOT_FOUND;
-#endif
-}
-
-void DML_HW_API(finalize_accelerator_driver)(hw_driver_t *driver_ptr) {
-#if defined( linux )
-    if (driver_ptr->driver_instance_ptr) {
-        dlclose(driver_ptr->driver_instance_ptr);
-    }
-
-    driver_ptr->driver_instance_ptr = NULL;
-#endif
-}
-
-int32_t DML_HW_API(driver_new_context)(struct accfg_ctx **ctx) {
-#if defined( linux )
-    return ((accfg_new_ptr) functions_table[0].function)(ctx);
-#else
-    return DML_STATUS_DRIVER_NOT_FOUND;
-#endif
-}
-
-struct accfg_device *DML_HW_API(context_get_first_device)(struct accfg_ctx *ctx) {
-#if defined( linux )
-    return ((accfg_device_get_first_ptr) functions_table[1].function)(ctx);
-#else
-    return NULL;
-#endif
-}
-
-const char *DML_HW_API(device_get_name)(struct accfg_device *device) {
-#if defined( linux )
-    return ((accfg_device_get_devname_ptr) functions_table[2].function)(device);
-#else
-    return NULL;
-#endif
-}
-
-struct accfg_device *DML_HW_API(device_get_next)(struct accfg_device *device) {
-#if defined( linux )
-    return ((accfg_device_get_next_ptr) functions_table[3].function)(device);
-#else
-    return NULL;
-#endif
-}
-
-struct accfg_wq *DML_HW_API(get_first_work_queue)(struct accfg_device *device) {
-#if defined( linux )
-    return ((accfg_wq_get_first_ptr) functions_table[4].function)(device);
-#else
-    return NULL;
-#endif
-}
-
-struct accfg_wq *DML_HW_API(work_queue_get_next)(struct accfg_wq *wq) {
-#if defined( linux )
-    return ((accfg_wq_get_next_ptr) functions_table[5].function)(wq);
-#else
-    return NULL;
-#endif
-}
-
-enum accfg_wq_state DML_HW_API(work_queue_get_state)(struct accfg_wq *wq) {
-#if defined( linux )
-    return ((accfg_wq_get_state_ptr) functions_table[6].function)(wq);
-#else
-    return -1;
-#endif
-}
-
-enum accfg_wq_mode DML_HW_API(work_queue_get_mode)(struct accfg_wq *wq) {
-#if defined( linux )
-    return ((accfg_wq_get_mode_ptr) functions_table[7].function)(wq);
-#else
-    return 2;
-#endif
-}
-
-uint32_t DML_HW_API(device_get_major_version)(struct accfg_device *device) {
-#if defined( linux )
-    return ((accfg_device_get_cdev_major_ptr) functions_table[8].function)(device);
-#else
-    return -1;
-#endif
-}
-
-int32_t DML_HW_API(work_queue_get_minor_version)(struct accfg_wq *wq) {
-#if defined( linux )
-    return ((accfg_wq_get_cdev_minor_ptr) functions_table[9].function)(wq);
-#else
-    return -1;
-#endif
-}
-
-enum accfg_device_state DML_HW_API(device_get_state)(struct accfg_device *device) {
-#if defined( linux )
-    return ((accfg_device_get_state_ptr) functions_table[10].function)(device);
-#else
-    return -1;
-#endif
-}
-
-struct accfg_ctx *DML_HW_API(context_close)(struct accfg_ctx *ctx) {
-#if defined( linux )
-    return ((accfg_unref_ptr) functions_table[11].function)(ctx);
-#else
-    return NULL;
-#endif
-}
-
-uint64_t DML_HW_API(device_get_gen_cap_register)(struct accfg_device *device) {
-#if defined( linux )
-    return ((accfg_device_get_gen_cap_ptr) functions_table[12].function)(device);
-#else
-    return 0;
-#endif
-}
-
-uint64_t DML_HW_API(device_get_numa_node)(struct accfg_device *device) {
-#if defined( linux )
-    return ((accfg_device_get_gen_cap_ptr) functions_table[13].function)(device);
-#else
-    return -1;
-#endif
-}
-
-int32_t DML_HW_API(work_queue_get_priority)(struct accfg_wq *wq) {
-#if defined( linux )
-    return ((accfg_wq_get_cdev_minor_ptr) functions_table[14].function)(wq);
-#else
-    return -1;
-#endif
-}
-
-struct accfg_group *DML_HW_API(group_get_first)(struct accfg_device *device) {
-#if defined( linux )
-    return ((accfg_group_get_first_ptr) functions_table[15].function)(device);
-#else
-    return NULL;
-#endif
-}
-
-struct accfg_group *DML_HW_API(group_get_next)(struct accfg_group *group) {
-#if defined( linux )
-    return ((accfg_group_get_next_ptr) functions_table[16].function)(group);
-#else
-    return NULL;
-#endif
-}
-
-int DML_HW_API(group_get_traffic_class_a)(struct accfg_group *group) {
-#if defined( linux )
-    return ((accfg_group_get_traffic_class_ptr) functions_table[17].function)(group);
-#else
-    return 0;
-#endif
-}
-
-int DML_HW_API(group_get_traffic_class_b)(struct accfg_group *group) {
-#if defined( linux )
-    return ((accfg_group_get_traffic_class_ptr) functions_table[18].function)(group);
-#else
-    return 0;
-#endif
-}
-
-struct accfg_group *DML_HW_API(work_queue_get_group)(struct accfg_wq *wq) {
-#if defined( linux )
-    return ((accfg_wq_get_group_ptr) functions_table[19].function)(wq);
-#else
-    return NULL;
-#endif
-}
-
-int DML_HW_API(work_queue_get_group_id)(struct accfg_wq *wq) {
-#if defined( linux )
-    return ((accfg_wq_get_group_id_ptr) functions_table[20].function)(wq);
-#else
-    return -1;
-#endif
-}
-
-int DML_HW_API(group_get_id)(struct accfg_group *group) {
-#if defined( linux )
-    return ((accfg_group_get_id_ptr) functions_table[21].function)(group);
-#else
-    return -1;
-#endif
-}
-
-int DML_HW_API(work_queue_get_device_path)(struct accfg_wq *wq, char *buf, size_t size) {
-#if defined( linux ) && defined(LIB_ACCEL_VERSION_3_2)
-    return ((accfg_wq_get_user_dev_path_ptr) functions_table[22].function)(wq, buf, size);
-#else
-    return -1;
-#endif
-}
-
-#if defined( linux )
-
-/* ------ Internal functions implementation ------ */
-
-bool own_load_configuration_functions(void *driver_instance_ptr) {
-    uint32_t i = 0u;
-
-    // Clear error log
-    (void)dlerror();
-    while (functions_table[i].function_name) {
-        functions_table[i].function = (library_function) dlsym(driver_instance_ptr, functions_table[i].function_name);
-
-        char *err_message = dlerror();
-
-        if (err_message || !functions_table[i].function) {
-            return false;
-        }
-            
-        i++;
-    }
-
-    return true;
-}
-
-dsahw_status_t own_load_accelerator_configuration_driver(void **driver_instance_pptr) {
-
-    // Try to load the user interface library for IAX/DSA kernel driver
-    void *driver_instance_ptr = dlopen(accelerator_configuration_driver_name, RTLD_LAZY);
-
-    if (!driver_instance_ptr) {
-        // This is needed for error handle. We need to call dlerror
-        // for emptying error message. Otherwise we will receive error
-        // message during loading symbols from another library
-        dlerror();
-
-        return DML_STATUS_DRIVER_NOT_FOUND;
-    }
-
-    *driver_instance_pptr = driver_instance_ptr;
-
-    return DML_STATUS_OK;
-}
-
-#endif
diff --git a/sources/middle_layer/src/execution_path.cpp b/sources/middle_layer/src/execution_path.cpp
new file mode 100644
index 0000000..84b9049
--- /dev/null
+++ b/sources/middle_layer/src/execution_path.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include <core/device.hpp>
+#include <dml/detail/ml/execution_path.hpp>
+
+#include "ml_utils.hpp"
+
+namespace dml::detail::ml::execution_path
+{
+    submission_status software::submit(operation& op, result& res) noexcept
+    {
+        return core::software_device().submit(as_core(op), as_core(res));
+    }
+
+    submission_status hardware::submit(operation& op, result& res) noexcept
+    {
+        return core::hardware_device().submit(as_core(op), as_core(res));
+    }
+}  // namespace dml::detail::ml::execution_path
diff --git a/sources/middle_layer/src/ml_utils.hpp b/sources/middle_layer/src/ml_utils.hpp
new file mode 100644
index 0000000..efe3e97
--- /dev/null
+++ b/sources/middle_layer/src/ml_utils.hpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#ifndef DML_ML_OWN_UTILS_HPP
+#define DML_ML_OWN_UTILS_HPP
+
+#include <core/types.hpp>
+#include <dml/detail/ml/operation.hpp>
+#include <dml/detail/ml/result.hpp>
+#include <new>
+
+namespace dml::detail::ml
+{
+    // Wrapper for reinterpret_cast<core::completion_record*>
+    [[nodiscard]] static inline auto& as_core(result& res) noexcept
+    {
+        return *std::launder(reinterpret_cast<core::completion_record*>(&res));
+    }
+
+    // Wrapper for reinterpret_cast<core::descriptor*>
+    [[nodiscard]] static inline auto& as_core(operation& res) noexcept
+    {
+        return *std::launder(reinterpret_cast<core::descriptor*>(&res));
+    }
+
+    // Wrapper for reinterpret_cast<core::result*>
+    [[nodiscard]] static inline auto& as_ml(core::completion_record& res) noexcept
+    {
+        return *std::launder(reinterpret_cast<result*>(&res));
+    }
+
+    // Wrapper for reinterpret_cast<operation*>
+    [[nodiscard]] static inline auto& as_ml(core::descriptor& res) noexcept
+    {
+        return *std::launder(reinterpret_cast<operation*>(&res));
+    }
+}  // namespace dml::detail::ml
+
+#endif  // DML_ML_OWN_UTILS_HPP
diff --git a/sources/middle_layer/src/operation.cpp b/sources/middle_layer/src/operation.cpp
new file mode 100644
index 0000000..8ad94da
--- /dev/null
+++ b/sources/middle_layer/src/operation.cpp
@@ -0,0 +1,361 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include <core/descriptor_views.hpp>
+#include <core/operations.hpp>
+#include <core/types.hpp>
+#include <cstring>
+#include <dml/detail/ml/operation.hpp>
+
+#include "ml_utils.hpp"
+
+namespace dml::detail::ml
+{
+    operation make_nop_operation(const nop_options options) noexcept
+    {
+        auto descriptor = core::descriptor{};
+        auto view       = core::nop_descriptor(descriptor);
+
+        view.operation() = to_underlying(core::operation::nop);
+        view.flags()     = static_cast<flags_t>(options);
+
+        return as_ml(descriptor);
+    }
+
+    operation make_drain_operation(address_t                    readback_address_1,
+                                   address_t                    readback_address_2,
+                                   const drain_options          options,
+                                   const drain_specific_options specific_options) noexcept
+    {
+        auto descriptor = core::descriptor{};
+        auto view       = core::drain_descriptor(descriptor);
+
+        view.operation()                = to_underlying(core::operation::drain);
+        view.readback_address_1()       = readback_address_1;
+        view.readback_address_2()       = readback_address_2;
+        view.flags()                    = static_cast<flags_t>(options);
+        view.operation_specific_flags() = static_cast<operation_specific_flags_t>(specific_options);
+
+        return as_ml(descriptor);
+    }
+
+    operation make_mem_move_operation(const byte_t *const    src,
+                                      byte_t *const          dst,
+                                      const transfer_size_t  size,
+                                      const mem_move_options options) noexcept
+    {
+        auto descriptor = core::descriptor{};
+        auto view       = core::mem_move_descriptor(descriptor);
+
+        view.operation()           = to_underlying(core::operation::memory_move);
+        view.source_address()      = reinterpret_cast<address_t>(src);
+        view.destination_address() = reinterpret_cast<address_t>(dst);
+        view.transfer_size()       = size;
+        view.flags()               = static_cast<flags_t>(options);
+
+        return as_ml(descriptor);
+    }
+
+    operation make_fill_operation(const uint64_t        pattern,
+                                  byte_t *const         dst,
+                                  const transfer_size_t size,
+                                  const fill_options    options) noexcept
+    {
+        auto descriptor = core::descriptor{};
+        auto view       = core::fill_descriptor(descriptor);
+
+        view.operation()           = to_underlying(core::operation::fill);
+        view.pattern()             = pattern;
+        view.destination_address() = reinterpret_cast<address_t>(dst);
+        view.transfer_size()       = size;
+        view.flags()               = static_cast<flags_t>(options);
+
+        return as_ml(descriptor);
+    }
+
+    operation make_dualcast_operation(const byte_t *const             src,
+                                      byte_t *const                   dst1,
+                                      byte_t *const                   dst2,
+                                      const transfer_size_t           size,
+                                      const dualcast_options          options,
+                                      const dualcast_specific_options specific_options) noexcept
+    {
+        auto descriptor = core::descriptor{};
+        auto view       = core::dualcast_descriptor(descriptor);
+
+        view.operation()                = to_underlying(core::operation::dualcast);
+        view.source_address()           = reinterpret_cast<address_t>(src);
+        view.destination_1_address()    = reinterpret_cast<address_t>(dst1);
+        view.destination_2_address()    = reinterpret_cast<address_t>(dst2);
+        view.transfer_size()            = size;
+        view.flags()                    = static_cast<flags_t>(options);
+        view.operation_specific_flags() = static_cast<operation_specific_flags_t>(specific_options);
+
+        return as_ml(descriptor);
+    }
+
+    operation make_compare_operation(const byte_t *const   src1,
+                                     const byte_t *const   src2,
+                                     const transfer_size_t size,
+                                     const compare_options options,
+                                     const compare_result  expected_result) noexcept
+    {
+        auto descriptor = core::descriptor{};
+        auto view       = core::compare_descriptor(descriptor);
+
+        view.operation()        = to_underlying(core::operation::compare);
+        view.source_1_address() = reinterpret_cast<address_t>(src1);
+        view.source_2_address() = reinterpret_cast<address_t>(src2);
+        view.transfer_size()    = size;
+        view.flags()            = static_cast<flags_t>(options);
+        view.expected_result()  = static_cast<result_t>(expected_result);
+
+        return as_ml(descriptor);
+    }
+
+    operation make_compare_pattern_operation(const uint64_t                pattern,
+                                             const byte_t                 *src,
+                                             const transfer_size_t         size,
+                                             const compare_pattern_options options,
+                                             const compare_result          expected_result) noexcept
+    {
+        auto descriptor = core::descriptor{};
+        auto view       = core::compare_pattern_descriptor(descriptor);
+
+        view.operation()       = to_underlying(core::operation::compare_pattern);
+        view.pattern()         = pattern;
+        view.source_address()  = reinterpret_cast<address_t>(src);
+        view.transfer_size()   = size;
+        view.flags()           = static_cast<flags_t>(options);
+        view.expected_result() = static_cast<result_t>(expected_result);
+
+        return as_ml(descriptor);
+    }
+
+    operation make_crc_operation(const byte_t *const        src,
+                                 const transfer_size_t      size,
+                                 const crc_value_t          crc_seed,
+                                 const crc_options          options,
+                                 const crc_specific_options specific_options) noexcept
+    {
+        auto descriptor = core::descriptor{};
+        auto view       = core::crc_descriptor(descriptor);
+
+        view.operation()                = to_underlying(core::operation::crc);
+        view.source_address()           = reinterpret_cast<address_t>(src);
+        view.transfer_size()            = size;
+        view.flags()                    = static_cast<flags_t>(options);
+        view.operation_specific_flags() = static_cast<operation_specific_flags_t>(specific_options);
+        view.crc_seed()                 = crc_seed;
+
+        return as_ml(descriptor);
+    }
+
+    operation make_copy_crc_operation(const byte_t *const             src,
+                                      byte_t *const                   dst,
+                                      const transfer_size_t           size,
+                                      const crc_value_t               crc_seed,
+                                      const copy_crc_options          options,
+                                      const copy_crc_specific_options specific_options) noexcept
+    {
+        auto descriptor = core::descriptor{};
+        auto view       = core::copy_crc_descriptor(descriptor);
+
+        view.operation()                = to_underlying(core::operation::copy_crc);
+        view.source_address()           = reinterpret_cast<address_t>(src);
+        view.destination_address()      = reinterpret_cast<address_t>(dst);
+        view.transfer_size()            = size;
+        view.flags()                    = static_cast<flags_t>(options);
+        view.operation_specific_flags() = static_cast<operation_specific_flags_t>(specific_options);
+        view.crc_seed()                 = crc_seed;
+
+        return as_ml(descriptor);
+    }
+
+    operation make_create_delta_operation(const byte_t *const        src1,
+                                          const byte_t *const        src2,
+                                          const transfer_size_t      size,
+                                          byte_t *const              delta_record,
+                                          const transfer_size_t      delta_max_size,
+                                          const create_delta_options options,
+                                          const create_delta_result  expected_result) noexcept
+    {
+        auto descriptor = core::descriptor{};
+        auto view       = core::create_delta_descriptor(descriptor);
+
+        view.operation()                 = to_underlying(core::operation::create_delta);
+        view.source_1_address()          = reinterpret_cast<address_t>(src1);
+        view.source_2_address()          = reinterpret_cast<address_t>(src2);
+        view.delta_record_address()      = reinterpret_cast<address_t>(delta_record);
+        view.transfer_size()             = size;
+        view.maximum_delta_record_size() = delta_max_size;
+        view.flags()                     = static_cast<flags_t>(options);
+        view.expected_result_mask()      = static_cast<result_t>(expected_result);
+
+        return as_ml(descriptor);
+    }
+
+    operation make_apply_delta_operation(const byte_t *const       delta_record,
+                                         const transfer_size_t     delta_size,
+                                         byte_t *const             dst,
+                                         const transfer_size_t     size,
+                                         const apply_delta_options options) noexcept
+    {
+        auto descriptor = core::descriptor{};
+        auto view       = core::apply_delta_descriptor(descriptor);
+
+        view.operation()            = to_underlying(core::operation::apply_delta);
+        view.delta_record_address() = reinterpret_cast<address_t>(delta_record);
+        view.destination_address()  = reinterpret_cast<address_t>(dst);
+        view.transfer_size()        = size;
+        view.delta_record_size()    = delta_size;
+        view.flags()                = static_cast<flags_t>(options);
+
+        return as_ml(descriptor);
+    }
+
+    operation make_cache_flush_operation(byte_t *const dst, const transfer_size_t size, const cache_flush_options options) noexcept
+    {
+        auto descriptor = core::descriptor{};
+        auto view       = core::cache_flush_descriptor(descriptor);
+
+        view.operation()           = to_underlying(core::operation::cache_flush);
+        view.destination_address() = reinterpret_cast<address_t>(dst);
+        view.transfer_size()       = size;
+        view.flags()               = static_cast<flags_t>(options);
+
+        return as_ml(descriptor);
+    }
+
+    operation make_dif_check_operation(const byte_t        *src,
+                                       transfer_size_t      transfer_size,
+                                       dif_parameters       src_parameters,
+                                       dif_check_options    options,
+                                       dif_specific_options specific_options,
+                                       dif_source_options   source_options) noexcept
+    {
+        auto descriptor = core::descriptor{};
+        auto view       = core::dif_check_descriptor(descriptor);
+
+        view.operation()           = to_underlying(core::operation::dif_check);
+        view.source_address()      = reinterpret_cast<address_t>(src);
+        view.transfer_size()       = transfer_size;
+        view.flags()               = static_cast<flags_t>(options);
+        view.dif_flags()           = static_cast<dif_flags_t>(specific_options);
+        view.source_dif_flags()    = static_cast<dif_flags_t>(source_options);
+        view.source_ref_tag()      = src_parameters.ref_tag_seed;
+        view.source_app_tag()      = src_parameters.app_tag_seed;
+        view.source_app_tag_mask() = src_parameters.app_tag_mask;
+
+        return as_ml(descriptor);
+    }
+
+    operation make_dif_insert_operation(const byte_t           *src,
+                                        byte_t                 *dst,
+                                        transfer_size_t         transfer_size,
+                                        dif_parameters          dst_parameters,
+                                        dif_insert_options      options,
+                                        dif_specific_options    specific_options,
+                                        dif_destination_options destination_options) noexcept
+    {
+        auto descriptor = core::descriptor{};
+        auto view       = core::dif_insert_descriptor(descriptor);
+
+        view.operation()                = to_underlying(core::operation::dif_insert);
+        view.source_address()           = reinterpret_cast<address_t>(src);
+        view.destination_address()      = reinterpret_cast<address_t>(dst);
+        view.transfer_size()            = transfer_size;
+        view.flags()                    = static_cast<flags_t>(options);
+        view.dif_flags()                = static_cast<dif_flags_t>(specific_options);
+        view.destination_dif_flags()    = static_cast<dif_flags_t>(destination_options);
+        view.destination_ref_tag()      = dst_parameters.ref_tag_seed;
+        view.destination_app_tag()      = dst_parameters.app_tag_seed;
+        view.destination_app_tag_mask() = dst_parameters.app_tag_mask;
+
+        return as_ml(descriptor);
+    }
+
+    operation make_dif_strip_operation(const byte_t        *src,
+                                       byte_t              *dst,
+                                       transfer_size_t      transfer_size,
+                                       dif_parameters       src_parameters,
+                                       dif_strip_options    options,
+                                       dif_specific_options specific_options,
+                                       dif_source_options   source_options) noexcept
+    {
+        auto descriptor = core::descriptor{};
+        auto view       = core::dif_strip_descriptor(descriptor);
+
+        view.operation()           = to_underlying(core::operation::dif_strip);
+        view.source_address()      = reinterpret_cast<address_t>(src);
+        view.destination_address() = reinterpret_cast<address_t>(dst);
+        view.transfer_size()       = transfer_size;
+        view.flags()               = static_cast<flags_t>(options);
+        view.dif_flags()           = static_cast<dif_flags_t>(specific_options);
+        view.source_dif_flags()    = static_cast<dif_flags_t>(source_options);
+        view.source_ref_tag()      = src_parameters.ref_tag_seed;
+        view.source_app_tag()      = src_parameters.app_tag_seed;
+        view.source_app_tag_mask() = src_parameters.app_tag_mask;
+
+        return as_ml(descriptor);
+    }
+
+    operation make_dif_update_operation(const byte_t           *src,
+                                        byte_t                 *dst,
+                                        transfer_size_t         transfer_size,
+                                        dif_parameters          src_parameters,
+                                        dif_parameters          dst_parameters,
+                                        dif_update_options      options,
+                                        dif_specific_options    specific_options,
+                                        dif_source_options      source_options,
+                                        dif_destination_options destination_options) noexcept
+    {
+        auto descriptor = core::descriptor{};
+        auto view       = core::dif_update_descriptor(descriptor);
+
+        view.operation()           = to_underlying(core::operation::dif_update);
+        view.source_address()      = reinterpret_cast<address_t>(src);
+        view.destination_address() = reinterpret_cast<address_t>(dst);
+        view.transfer_size()       = transfer_size;
+        view.flags()               = static_cast<flags_t>(options);
+        view.dif_flags()           = static_cast<dif_flags_t>(specific_options);
+
+        view.source_dif_flags()    = static_cast<dif_flags_t>(source_options);
+        view.source_ref_tag()      = src_parameters.ref_tag_seed;
+        view.source_app_tag()      = src_parameters.app_tag_seed;
+        view.source_app_tag_mask() = src_parameters.app_tag_mask;
+
+        view.destination_dif_flags()    = static_cast<dif_flags_t>(destination_options);
+        view.destination_ref_tag()      = dst_parameters.ref_tag_seed;
+        view.destination_app_tag()      = dst_parameters.app_tag_seed;
+        view.destination_app_tag_mask() = dst_parameters.app_tag_mask;
+
+        return as_ml(descriptor);
+    }
+
+    operation make_batch_operation(const operation *const src, const transfer_size_t length, const batch_options options) noexcept
+    {
+        auto descriptor = core::descriptor{};
+        auto view       = core::batch_descriptor(descriptor);
+
+        view.operation()               = to_underlying(core::operation::batch);
+        view.descriptor_list_address() = reinterpret_cast<address_t>(src);
+        view.descriptors_count()       = length;
+        view.flags()                   = static_cast<flags_t>(options);
+
+        return as_ml(descriptor);
+    }
+}  // namespace dml::detail::ml
diff --git a/sources/middle_layer/src/result.cpp b/sources/middle_layer/src/result.cpp
new file mode 100644
index 0000000..faa2ca4
--- /dev/null
+++ b/sources/middle_layer/src/result.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2021 Intel Corporation.
+ *
+ * This software and the related documents are Intel copyrighted materials,
+ * and your use of them is governed by the express license under which they
+ * were provided to you ("License"). Unless the License provides otherwise,
+ * you may not use, modify, copy, publish, distribute, disclose or transmit
+ * this software or the related documents without Intel's prior written
+ * permission.
+ *
+ * This software and the related documents are provided as is, with no
+ * express or implied warranties, other than those that are expressly
+ * stated in the License.
+ *
+ */
+
+#include <core/completion_record_views.hpp>
+#include <core/descriptor_views.hpp>
+#include <dml/detail/ml/result.hpp>
+
+#include "ml_utils.hpp"
+
+#if defined(linux)
+#include <x86intrin.h>
+#else
+#include <emmintrin.h>
+#include <intrin.h>
+#endif
+
+namespace dml::detail::ml
+{
+    /**
+     * @brief Class that allows to defer scope exit to the moment when a certain address is changed
+     */
+    class awaiter final
+    {
+    public:
+        /**
+         * @brief Constructor of the class
+         *
+         * @param address       pointer to memory that should be asynchronously changed
+         * @param initial_value value to compare with
+         * @param period        number of clocks between checks
+         */
+        explicit awaiter(volatile void *address, uint8_t initial_value, uint32_t period = 200) noexcept:
+            address_ptr_(reinterpret_cast<volatile uint8_t *>(address)),
+            period_(period),
+            initial_value_(initial_value)
+        {
+            // Empty constructor
+        }
+
+        /**
+         * @brief Destructor that performs actual wait
+         */
+        ~awaiter() noexcept
+        {
+#ifdef DML_EFFICIENT_WAIT
+            while (initial_value_ == *address_ptr_)
+            {
+                monitor_address(address_ptr_);
+
+                auto start = current_time();
+                wait_until(start + period_, idle_state_);
+            }
+#else
+            while (initial_value_ == *address_ptr_)
+            {
+                _mm_pause();
+            }
+#endif
+        }
+
+    private:
+        volatile uint8_t *address_ptr_;        /**<Pointer to memory that should be asynchronously changed */
+        uint32_t          period_        = 0u; /**<Number of clocks between checks */
+        uint8_t           initial_value_ = 0u; /**<Value to compare with */
+        uint32_t          idle_state_    = 0u; /**<State for CPU wait control */
+    };
+
+    void wait(result &res) noexcept
+    {
+        awaiter wait_for(static_cast<volatile void *>(&res), 0);
+    }
+
+    void bind(operation &op, result &res) noexcept
+    {
+        auto view = core::any_descriptor(as_core(op));
+
+        view.flags() |= static_cast<flags_t>(flag::completion_record_address_valid) | static_cast<flags_t>(flag::request_completion_record);
+
+        view.completion_record_address() = reinterpret_cast<address_t>(&res);
+
+        res.bytes[0] = 0;
+    }
+
+    detail::execution_status get_status(result &res) noexcept
+    {
+        return static_cast<execution_status>(core::any_completion_record(as_core(res)).status());
+    }
+
+    detail::result_t get_result(result &res) noexcept
+    {
+        return core::any_completion_record(as_core(res)).result();
+    }
+
+    detail::transfer_size_t get_bytes_completed(result &res) noexcept
+    {
+        return core::any_completion_record(as_core(res)).bytes_completed();
+    }
+
+    detail::transfer_size_t get_delta_record_size(result &res) noexcept
+    {
+        return core::create_delta_completion_record(as_core(res)).delta_record_size();
+    }
+
+    detail::transfer_size_t get_crc_value(result &res) noexcept
+    {
+        return core::crc_completion_record(as_core(res)).crc_value();
+    }
+
+}  // namespace dml::detail::ml
diff --git a/include/dml/cpp/middle_layer/validation.hpp b/sources/middle_layer/src/validation.cpp
similarity index 70%
rename from include/dml/cpp/middle_layer/validation.hpp
rename to sources/middle_layer/src/validation.cpp
index aa676b9..1528b08 100644
--- a/include/dml/cpp/middle_layer/validation.hpp
+++ b/sources/middle_layer/src/validation.cpp
@@ -14,14 +14,15 @@
  *
  */
 
-#ifndef DML_ML_VALIDATION_HPP
-#define DML_ML_VALIDATION_HPP
+#include <core/validation.hpp>
+#include <dml/detail/ml/validation.hpp>
 
-#include <dml/cpp/middle_layer/descriptor.hpp>
+#include "ml_utils.hpp"
 
-namespace dml::ml
+namespace dml::detail::ml
 {
-    [[nodiscard]] validation_status validate(descriptor &dsc) noexcept;
-}  // namespace dml::ml
-
-#endif  //DML_ML_VALIDATION_HPP
+    validation_status validate(operation& op) noexcept
+    {
+        return core::validate(as_core(op));
+    }
+}  // namespace dml::detail::ml
diff --git a/sources/middle_layer/validation.cpp b/sources/middle_layer/validation.cpp
deleted file mode 100644
index 6d29f3c..0000000
--- a/sources/middle_layer/validation.cpp
+++ /dev/null
@@ -1,315 +0,0 @@
-/*
- * Copyright 2021 Intel Corporation.
- *
- * This software and the related documents are Intel copyrighted materials,
- * and your use of them is governed by the express license under which they
- * were provided to you ("License"). Unless the License provides otherwise,
- * you may not use, modify, copy, publish, distribute, disclose or transmit
- * this software or the related documents without Intel's prior written
- * permission.
- *
- * This software and the related documents are provided as is, with no
- * express or implied warranties, other than those that are expressly
- * stated in the License.
- *
- */
-
-#include <dml/cpp/middle_layer/descriptor_views.hpp>
-#include <dml/cpp/middle_layer/validation.hpp>
-#include <dml/cpp/middle_layer/values.hpp>
-
-#include "utils.hpp"
-
-namespace dml::ml
-{
-    static constexpr uint32_t dif_block_sizes[4] = { 512u, 520u, 4096u, 4104u };
-
-    [[nodiscard]] static validation_status validate(views::nop_descriptor nop) noexcept;
-
-    [[nodiscard]] static validation_status validate(views::batch_descriptor batch) noexcept;
-
-    [[nodiscard]] static validation_status validate(views::drain_descriptor drain) noexcept;
-
-    [[nodiscard]] static validation_status validate(views::mem_move_descriptor mem_move) noexcept;
-
-    [[nodiscard]] static validation_status validate(views::fill_descriptor fill) noexcept;
-
-    [[nodiscard]] static validation_status validate(views::compare_descriptor compare) noexcept;
-
-    [[nodiscard]] static validation_status validate(views::compare_pattern_descriptor compare_pattern) noexcept;
-
-    [[nodiscard]] static validation_status validate(views::create_delta_descriptor create_delta) noexcept;
-
-    [[nodiscard]] static validation_status validate(views::apply_delta_descriptor apply_delta) noexcept;
-
-    [[nodiscard]] static validation_status validate(views::dualcast_descriptor dualcast) noexcept;
-
-    [[nodiscard]] static validation_status validate(views::crc_descriptor crc) noexcept;
-
-    [[nodiscard]] static validation_status validate(views::copy_crc_descriptor copy_crc) noexcept;
-
-    [[nodiscard]] static validation_status validate(views::dif_check_descriptor dif_check) noexcept;
-
-    [[nodiscard]] static validation_status validate(views::dif_insert_descriptor dif_insert) noexcept;
-
-    [[nodiscard]] static validation_status validate(views::dif_strip_descriptor dif_strip) noexcept;
-
-    [[nodiscard]] static validation_status validate(views::dif_update_descriptor dif_update) noexcept;
-
-    [[nodiscard]] static validation_status validate(views::cache_flush_descriptor cache_flush) noexcept;
-
-    validation_status validate(descriptor &dsc) noexcept
-    {
-        auto view = views::any_descriptor(dsc);
-
-        switch (static_cast<operation>(view.operation()))
-        {
-            case operation::nop:
-                return validate(views::nop_descriptor(dsc));
-            case operation::batch:
-                return validate(views::batch_descriptor(dsc));
-            case operation::drain:
-                return validate(views::drain_descriptor(dsc));
-            case operation::memory_move:
-                return validate(views::mem_move_descriptor(dsc));
-            case operation::fill:
-                return validate(views::fill_descriptor(dsc));
-            case operation::compare:
-                return validate(views::compare_descriptor(dsc));
-            case operation::compare_pattern:
-                return validate(views::compare_pattern_descriptor(dsc));
-            case operation::create_delta:
-                return validate(views::create_delta_descriptor(dsc));
-            case operation::apply_delta:
-                return validate(views::apply_delta_descriptor(dsc));
-            case operation::dualcast:
-                return validate(views::dualcast_descriptor(dsc));
-            case operation::crc:
-                return validate(views::crc_descriptor(dsc));
-            case operation::copy_crc:
-                return validate(views::copy_crc_descriptor(dsc));
-            case operation::dif_check:
-                return validate(views::dif_check_descriptor(dsc));
-            case operation::dif_insert:
-                return validate(views::dif_insert_descriptor(dsc));
-            case operation::dif_strip:
-                return validate(views::dif_strip_descriptor(dsc));
-            case operation::dif_update:
-                return validate(views::dif_update_descriptor(dsc));
-            case operation::cache_flush:
-                return validate(views::cache_flush_descriptor(dsc));
-            default:
-                return validation_status::unsupported_operation;
-        }
-    }
-
-    [[nodiscard]] static validation_status validate(views::nop_descriptor nop) noexcept
-    {
-        static_cast<void>(nop);
-
-        return validation_status::success;
-    }
-
-    [[nodiscard]] static validation_status validate(views::drain_descriptor drain) noexcept
-    {
-        static_cast<void>(drain);
-
-        return validation_status::success;
-    }
-
-    [[nodiscard]] static validation_status validate(views::mem_move_descriptor mem_move) noexcept
-    {
-        RETURN_STATUS_IF(any_equal_zero(mem_move.source_address(), mem_move.destination_address()), validation_status::address_is_null);
-        RETURN_STATUS_IF(any_equal_zero(mem_move.transfer_size()), validation_status::size_is_null);
-
-        return validation_status::success;
-    }
-
-    [[nodiscard]] static validation_status validate(views::fill_descriptor fill) noexcept
-    {
-        RETURN_STATUS_IF(any_equal_zero(fill.destination_address()), validation_status::address_is_null);
-        RETURN_STATUS_IF(any_equal_zero(fill.transfer_size()), validation_status::size_is_null);
-
-        return validation_status::success;
-    }
-
-    [[nodiscard]] static validation_status validate(views::compare_descriptor compare) noexcept
-    {
-        RETURN_STATUS_IF(any_equal_zero(compare.source_1_address(), compare.source_2_address()), validation_status::address_is_null);
-        RETURN_STATUS_IF(any_equal_zero(compare.transfer_size()), validation_status::size_is_null);
-
-        return validation_status::success;
-    }
-
-    [[nodiscard]] static validation_status validate(views::compare_pattern_descriptor compare_pattern) noexcept
-    {
-        RETURN_STATUS_IF(any_equal_zero(compare_pattern.source_address()), validation_status::address_is_null);
-        RETURN_STATUS_IF(any_equal_zero(compare_pattern.transfer_size()), validation_status::size_is_null);
-
-        return validation_status::success;
-    }
-
-    [[nodiscard]] static validation_status validate(views::create_delta_descriptor create_delta) noexcept
-    {
-        constexpr auto max_size = 0x80000;
-
-        RETURN_STATUS_IF(
-            any_equal_zero(create_delta.source_1_address(), create_delta.source_2_address(), create_delta.delta_record_address()),
-            validation_status::address_is_null);
-
-        RETURN_STATUS_IF(any_equal_zero(create_delta.transfer_size(), create_delta.maximum_delta_record_size()),
-                         validation_status::size_is_null);
-
-        RETURN_STATUS_IF(
-            any_misaligned<8u>(create_delta.source_1_address(), create_delta.source_2_address(), create_delta.delta_record_address()),
-            validation_status::address_is_misaligned);
-
-        RETURN_STATUS_IF(create_delta.transfer_size() % 8 != 0, validation_status::delta_input_size_is_wrong);
-
-        RETURN_STATUS_IF(create_delta.transfer_size() > max_size, validation_status::delta_input_size_overflow);
-
-        RETURN_STATUS_IF(create_delta.maximum_delta_record_size() % 10 != 0 || create_delta.maximum_delta_record_size() < 80,
-                         validation_status::delta_record_size_is_wrong);
-
-        return validation_status::success;
-    }
-
-    [[nodiscard]] static validation_status validate(views::apply_delta_descriptor apply_delta) noexcept
-    {
-        constexpr auto max_size = 0x80000;
-
-        RETURN_STATUS_IF(any_equal_zero(apply_delta.destination_address(), apply_delta.delta_record_address()),
-                         validation_status::address_is_null);
-        RETURN_STATUS_IF(any_equal_zero(apply_delta.transfer_size(), apply_delta.delta_record_size()), validation_status::size_is_null);
-
-        RETURN_STATUS_IF(overlaps(apply_delta.delta_record_address(),
-                                  apply_delta.delta_record_size(),
-                                  apply_delta.destination_address(),
-                                  apply_delta.transfer_size()),
-                         validation_status::buffers_overlap);
-
-        RETURN_STATUS_IF(any_misaligned<8u>(apply_delta.destination_address(), apply_delta.delta_record_address()),
-                         validation_status::address_is_misaligned);
-
-        RETURN_STATUS_IF(apply_delta.transfer_size() % 8 != 0, validation_status::delta_input_size_is_wrong);
-
-        RETURN_STATUS_IF(apply_delta.transfer_size() > max_size, validation_status::delta_input_size_overflow);
-
-        RETURN_STATUS_IF(apply_delta.delta_record_size() % 10 != 0, validation_status::delta_record_size_is_wrong);
-
-        return validation_status::success;
-    }
-
-    [[nodiscard]] static validation_status validate(views::dualcast_descriptor dualcast) noexcept
-    {
-        RETURN_STATUS_IF(any_equal_zero(dualcast.source_address(), dualcast.destination_1_address(), dualcast.destination_2_address()),
-                         validation_status::address_is_null);
-        RETURN_STATUS_IF(any_equal_zero(dualcast.transfer_size()), validation_status::size_is_null);
-
-        RETURN_STATUS_IF((dualcast.destination_1_address() & 0xFFFu) != (dualcast.destination_2_address() & 0xFFFu),
-                         validation_status::dualcast_address_is_wrong);
-
-        RETURN_STATUS_IF(overlaps(dualcast.source_address(), dualcast.destination_1_address(), dualcast.transfer_size()),
-                         validation_status::buffers_overlap);
-
-        RETURN_STATUS_IF(overlaps(dualcast.source_address(), dualcast.destination_2_address(), dualcast.transfer_size()),
-                         validation_status::buffers_overlap);
-
-        RETURN_STATUS_IF(overlaps(dualcast.destination_1_address(), dualcast.destination_2_address(), dualcast.transfer_size()),
-                         validation_status::buffers_overlap);
-
-        return validation_status::success;
-    }
-
-    [[nodiscard]] static validation_status validate(views::crc_descriptor crc) noexcept
-    {
-        RETURN_STATUS_IF(any_equal_zero(crc.source_address()), validation_status::address_is_null);
-        RETURN_STATUS_IF(any_equal_zero(crc.transfer_size()), validation_status::size_is_null);
-
-        return validation_status::success;
-    }
-
-    [[nodiscard]] static validation_status validate(views::copy_crc_descriptor copy_crc) noexcept
-    {
-        RETURN_STATUS_IF(any_equal_zero(copy_crc.source_address(), copy_crc.destination_address()), validation_status::address_is_null);
-        RETURN_STATUS_IF(any_equal_zero(copy_crc.transfer_size()), validation_status::size_is_null);
-
-        RETURN_STATUS_IF(overlaps(copy_crc.source_address(), copy_crc.destination_address(), copy_crc.transfer_size()),
-                         validation_status::buffers_overlap);
-
-        return validation_status::success;
-    }
-
-    [[nodiscard]] static validation_status validate(views::cache_flush_descriptor cache_flush) noexcept
-    {
-        RETURN_STATUS_IF(any_equal_zero(cache_flush.destination_address()), validation_status::address_is_null);
-        RETURN_STATUS_IF(any_equal_zero(cache_flush.transfer_size()), validation_status::size_is_null);
-
-        return validation_status::success;
-    }
-
-    [[nodiscard]] static validation_status validate(views::dif_check_descriptor dif_check) noexcept
-    {
-        const auto block_size = dif_block_sizes[dif_check.dif_flags() & 0b11];
-
-        RETURN_STATUS_IF(any_equal_zero(dif_check.source_address()), validation_status::address_is_null);
-        RETURN_STATUS_IF(any_equal_zero(dif_check.transfer_size()), validation_status::size_is_null);
-        RETURN_STATUS_IF(dif_check.transfer_size() % (block_size + sizeof(uint64_t)) != 0, validation_status::dif_size_is_wrong);
-
-        return validation_status::success;
-    }
-
-    [[nodiscard]] static validation_status validate(views::dif_insert_descriptor dif_insert) noexcept
-    {
-        const auto block_size = dif_block_sizes[dif_insert.dif_flags() & 0b11];
-
-        RETURN_STATUS_IF(any_equal_zero(dif_insert.source_address(), dif_insert.destination_address()), validation_status::address_is_null);
-        RETURN_STATUS_IF(any_equal_zero(dif_insert.transfer_size()), validation_status::size_is_null);
-        RETURN_STATUS_IF(dif_insert.transfer_size() % block_size != 0, validation_status::dif_size_is_wrong);
-
-        const auto src_size = dif_insert.transfer_size();
-        const auto dst_size = (src_size / block_size) * (block_size + static_cast<transfer_size_t>(sizeof(uint64_t)));
-        RETURN_STATUS_IF(overlaps(dif_insert.source_address(), src_size, dif_insert.destination_address(), dst_size),
-                         validation_status::buffers_overlap);
-
-        return validation_status::success;
-    }
-
-    [[nodiscard]] static validation_status validate(views::dif_strip_descriptor dif_strip) noexcept
-    {
-        const auto block_size = dif_block_sizes[dif_strip.dif_flags() & 0b11];
-
-        RETURN_STATUS_IF(any_equal_zero(dif_strip.source_address(), dif_strip.destination_address()), validation_status::address_is_null);
-        RETURN_STATUS_IF(any_equal_zero(dif_strip.transfer_size()), validation_status::size_is_null);
-        RETURN_STATUS_IF(dif_strip.transfer_size() % (block_size + sizeof(uint64_t)) != 0, validation_status::dif_size_is_wrong);
-
-        const auto src_size = dif_strip.transfer_size();
-        const auto dst_size = (src_size / (block_size + static_cast<transfer_size_t>(sizeof(uint64_t)))) * block_size;
-        RETURN_STATUS_IF(overlaps(dif_strip.source_address(), src_size, dif_strip.destination_address(), dst_size),
-                         validation_status::buffers_overlap);
-
-        return validation_status::success;
-    }
-
-    [[nodiscard]] static validation_status validate(views::dif_update_descriptor dif_update) noexcept
-    {
-        const auto block_size = dif_block_sizes[dif_update.dif_flags() & 0b11];
-
-        RETURN_STATUS_IF(any_equal_zero(dif_update.source_address(), dif_update.destination_address()), validation_status::address_is_null);
-        RETURN_STATUS_IF(any_equal_zero(dif_update.transfer_size()), validation_status::size_is_null);
-        RETURN_STATUS_IF(dif_update.transfer_size() % (block_size + sizeof(uint64_t)) != 0, validation_status::dif_size_is_wrong);
-        RETURN_STATUS_IF(overlaps(dif_update.source_address(), dif_update.destination_address(), dif_update.transfer_size()),
-                         validation_status::buffers_overlap);
-
-        return validation_status::success;
-    }
-
-    [[nodiscard]] static validation_status validate(views::batch_descriptor batch) noexcept
-    {
-        RETURN_STATUS_IF(any_equal_zero(batch.descriptor_list_address()), validation_status::address_is_null);
-        RETURN_STATUS_IF(batch.descriptors_count() < 4, validation_status::batch_size_is_wrong);
-
-        return validation_status::success;
-    }
-
-}  // namespace dml::ml