diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index e42a62ef4..b577908fb 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -15,9 +15,13 @@ on:
   push:
     branches:
       - 'main'
+      - 'develop'
+      - 'release/**'
   pull_request:
     branches:
       - 'main'
+      - 'develop'
+      - 'release/**'
 
   # Allow manually triggering of the workflow.
   workflow_dispatch: {}
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 37ed5e48f..ef9152007 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -15,9 +15,13 @@ on:
   push:
     branches:
       - 'main'
+      - 'develop'
+      - 'release/**'      
   pull_request:
     branches:
       - 'main'
+      - 'develop'
+      - 'release/**'
 
   # Allow manually triggering of the workflow.
   workflow_dispatch: {}
@@ -36,7 +40,7 @@ jobs:
       
       - name: Build documentation
         run: |
-          docker run --rm -t -v ${{ github.workspace }}:/build -u "$(id -u):$(id -g)" -e REPO:/build -e PDF=1 ghcr.io/xmos/doc_builder:main
+          docker run --rm -t -u "$(id -u):$(id -g)" -v ${{ github.workspace }}:/build -e REPO:/build -e DOXYGEN_INCLUDE=/build/doc/Doxyfile.inc -e EXCLUDE_PATTERNS=/build/doc/exclude_patterns.inc -e DOXYGEN_INPUT=ignore -e PDF=1 ghcr.io/xmos/doc_builder:main
       
       - name: Save documentation artifacts
         uses: actions/upload-artifact@v2
diff --git a/.gitignore b/.gitignore
index 218c2becf..4b79dfed7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,6 +21,7 @@ filesystem_support/spanning-xn*
 filesystem_support/target-xn*
 
 # Python cruft
+**/.python-version
 **/*.pyc
 **/__pycache__/*
 
@@ -28,4 +29,6 @@ filesystem_support/target-xn*
 **/test_*.log
 
 # macOS cruft
-.DS_Store
\ No newline at end of file
+.DS_Store
+
+**/*.swp
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 000000000..b3287a6c7
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "examples/bare-metal/shared_src/xscope_fileio"]
+	path = examples/bare-metal/shared_src/xscope_fileio
+	url = git@github.com:xmos/xscope_fileio
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 710eb34c5..6612488a3 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -1,7 +1,7 @@
 Avona Reference Design Change Log
 =================================
 
-In progress
------------
-
+0.1.0
+-----
 
+  * Initial version with support for AEC and AGC libraries.
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 000000000..ce004463c
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,77 @@
+cmake_minimum_required(VERSION 3.11)
+
+## Disable in-source build.
+if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}")
+    message(FATAL_ERROR "In-source build is not allowed! Please specify a build folder.\n\tex:cmake -B build")
+endif()
+
+project(sw_avona)
+
+## With this hack, x86 builds are work on Linux and Darwin.
+if( NOT XCORE )
+    set(CMAKE_SYSTEM_NAME Linux)
+    set(CMAKE_SYSTEM_VERSION 0.0.1)
+endif()
+
+## Set up the build options
+include(etc/build_options.cmake)
+
+find_package( Python3 COMPONENTS Interpreter )
+
+## Compile flags for C/C++ for all libs and apps (for all platforms)
+list(APPEND   COMPILE_FLAGS        -Os -g -MMD   )
+list(APPEND   COMPILE_FLAGS        -Wno-format -Wall )
+
+## Platform-specific C/C++ compile flags
+list( APPEND   COMPILE_FLAGS_XCORE  -march=xs3a )
+list( APPEND   COMPILE_FLAGS_Linux  -DX86_BUILD=1 )
+
+list(APPEND   COMPILE_FLAGS ${COMPILE_FLAGS_${CMAKE_SYSTEM_NAME}} )
+
+## CMake configuration stuff
+
+message( STATUS "System name here ${CMAKE_SYSTEM_NAME}" )
+if( XCORE )
+    enable_language( C CXX XC ASM )
+else()
+    enable_language( C CXX ASM )
+endif()
+
+set( CMAKE_CXX_STANDARD           11  )
+set( CMAKE_CXX_STANDARD_REQUIRED  ON  )
+set( CMAKE_CXX_EXTENSIONS         OFF )
+
+add_compile_options( ${COMPILE_FLAGS} )
+
+## Get build dependencies
+#file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/deps)
+#include(${CMAKE_SOURCE_DIR}/etc/fetch_deps.cmake)
+
+# The path to the xcore_sdk must be set either as the CMake variable XCORE_SDK_PATH or as an
+# environment variable XCORE_SDK_PATH
+if(NOT DEFINED XCORE_SDK_PATH)
+  if(NOT DEFINED ENV{XCORE_SDK_PATH})
+    message(FATAL_ERROR "XCORE_SDK_PATH environment variable must be set")
+  endif()
+
+  set(XCORE_SDK_PATH $ENV{XCORE_SDK_PATH})
+endif()
+
+get_filename_component(XCORE_SDK_PATH ${XCORE_SDK_PATH} ABSOLUTE BASE_DIR ${CMAKE_CURRENT_BINARY_DIR})
+
+if(NOT EXISTS ${XCORE_SDK_PATH})
+  message(FATAL_ERROR "xcore_sdk not present at ${XCORE_SDK_PATH}")
+endif()
+
+message(STATUS "xcore_sdk in ${XCORE_SDK_PATH}")
+
+## Add libs and apps
+add_subdirectory( modules )
+
+if ( ${BUILD_EXAMPLES} )
+    add_subdirectory( examples )
+endif()
+
+if ( ${BUILD_TESTS} )
+  add_subdirectory( test )
+endif()
diff --git a/Jenkinsfile b/Jenkinsfile
new file mode 100644
index 000000000..1b5b3f467
--- /dev/null
+++ b/Jenkinsfile
@@ -0,0 +1,276 @@
+@Library('xmos_jenkins_shared_library@v0.16.2') _
+getApproval()
+
+pipeline {
+  agent none
+
+  parameters {
+    booleanParam(name: 'FULL_TEST_OVERRIDE',
+                 defaultValue: false,
+                 description: 'Force a full test.')
+  }
+  environment {
+    REPO = 'sw_avona'
+    VIEW = getViewName(REPO)
+    FULL_TEST = """${(params.FULL_TEST_OVERRIDE
+                    || env.BRANCH_NAME == 'develop'
+                    || env.BRANCH_NAME == 'main'
+                    || env.BRANCH_NAME ==~ 'release/.*') ? 1 : 0}"""
+  }
+  options {
+    skipDefaultCheckout()
+  }
+  stages {
+    stage('xcore.ai executables build') {
+      agent {
+        label 'x86_64 && brew'
+      }
+      environment {
+        XCORE_SDK_PATH = "${WORKSPACE}/xcore_sdk"
+      }
+      stages {
+        stage('Get view') {
+          steps {
+            xcorePrepareSandbox("${VIEW}", "${REPO}")
+            dir("${REPO}") {
+              viewEnv() {
+                withVenv {
+                  sh "git submodule update --init"
+                  sh "pip install -e ${env.WORKSPACE}/xtagctl"
+                  sh "pip install -e examples/bare-metal/shared_src/xscope_fileio"
+                }
+              }
+            }
+          }
+        }
+        stage('CMake') {
+          steps {
+            dir("${REPO}") {
+              sh "mkdir build"
+            }
+            dir("${REPO}/build") {
+              viewEnv() {
+                withVenv {
+                  sh "cmake --version"
+                  script {
+                      if (env.FULL_TEST == "1") {
+                        sh 'cmake -S.. -DCMAKE_TOOLCHAIN_FILE=../etc/xmos_toolchain.cmake -DPython3_FIND_VIRTUALENV="ONLY" -DBUILD_TESTS=ON'
+                      }
+                      else {
+                        sh 'cmake -S.. -DCMAKE_TOOLCHAIN_FILE=../etc/xmos_toolchain.cmake -DPython3_FIND_VIRTUALENV="ONLY" -DTEST_SPEEDUP_FACTOR=4 -DBUILD_TESTS=ON'
+                      }
+                  }
+                  sh "make -j8"
+                  sh 'rm CMakeCache.txt'
+                  sh 'cmake -S.. -DPython3_FIND_VIRTUALENV="ONLY" -DTEST_WAV_AEC_BUILD_CONFIG="1 2 2 10 5" -DBUILD_TESTS=ON'
+                  sh "make -j8"
+                }
+              }
+            }
+            dir("${REPO}") {
+              stash name: 'cmake_build', includes: 'build/**/*.xe, build/**/conftest.py, build/**/test_wav_aec_c_app'
+            }
+          }
+        }
+      }
+      post {
+        cleanup {
+          cleanWs()
+        }
+      }
+    }
+    stage('xcore.ai Verification') {
+      agent {
+        label 'xcore.ai'
+      }
+      stages{
+        stage('Get View') {
+          steps {
+            xcorePrepareSandbox("${VIEW}", "${REPO}")
+            dir("${REPO}") {
+              viewEnv() {
+                withVenv {
+                  sh "git submodule update --init"
+                  sh "pip install -e examples/bare-metal/shared_src/xscope_fileio"
+                  unstash 'cmake_build'
+                }
+              }
+            }
+          }
+        }
+        stage('Reset XTAGs'){
+          steps{
+            dir("${REPO}") {
+              sh 'rm -f ~/.xtag/acquired' //Hacky but ensure it always works even when previous failed run left lock file present
+              viewEnv() {
+                withVenv{
+                  sh "pip install -e ${env.WORKSPACE}/xtagctl"
+                  sh "xtagctl reset_all XCORE-AI-EXPLORER"
+                }
+              }
+            }
+          }
+        }
+        stage('Examples') {
+          steps {
+            dir("${REPO}/examples/bare-metal/aec_1_thread") {
+              viewEnv() {
+                withVenv {
+                  sh "python ../shared_src/python/run_xcoreai.py ../../../build/examples/bare-metal/aec_1_thread/bin/aec_1_thread.xe --input ../shared_src/test_streams/aec_example_input.wav"
+                }
+              }
+            }
+            dir("${REPO}/examples/bare-metal/aec_2_threads") {
+              viewEnv() {
+                withVenv {
+                  sh "python ../shared_src/python/run_xcoreai.py ../../../build/examples/bare-metal/aec_2_threads/bin/aec_2_threads.xe --input ../shared_src/test_streams/aec_example_input.wav"
+                  // Make sure 1 thread and 2 threads output is bitexact
+                  sh "diff output.wav ../aec_1_thread/output.wav"
+                }
+              }
+            }
+            dir("${REPO}/examples/bare-metal/pipeline_single_threaded") {
+              viewEnv() {
+                withVenv {
+                  sh "python ../shared_src/python/run_xcoreai.py ../../../build/examples/bare-metal/pipeline_single_threaded/bin/pipeline_single_threaded.xe --input ../shared_src/test_streams/pipeline_example_input.wav"
+                }
+              }
+            }
+            dir("${REPO}/examples/bare-metal/pipeline_multi_threaded") {
+              viewEnv() {
+                withVenv {
+                  sh "python ../shared_src/python/run_xcoreai.py ../../../build/examples/bare-metal/pipeline_multi_threaded/bin/pipeline_multi_threaded.xe --input ../shared_src/test_streams/pipeline_example_input.wav"
+                  // Make sure single thread and multi threads pipeline output is bitexact
+                  sh "diff output.wav ../pipeline_single_threaded/output.wav"
+                }
+              }
+            }
+            dir("${REPO}/examples/bare-metal/agc") {
+              viewEnv() {
+                withVenv {
+                  sh "python ../shared_src/python/run_xcoreai.py ../../../build/examples/bare-metal/agc/bin/agc.xe --input ../shared_src/test_streams/agc_example_input.wav"
+                }
+              }
+            }
+          }
+        }
+        stage('AEC test_aec_enhancements') {
+          steps {
+            dir("${REPO}/test/lib_aec/test_aec_enhancements") {
+              viewEnv() {
+                withVenv {
+                  withMounts([["projects", "projects/hydra_audio", "hydra_audio_test_skype"]]) {
+                    withEnv(["hydra_audio_PATH=$hydra_audio_test_skype_PATH"]) {
+                      sh "./make_dirs.sh"
+                      sh "pytest -n 2 --junitxml=pytest_result.xml"
+                      junit "pytest_result.xml"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+        stage('AEC test_delay_estimator') {
+          steps {
+            dir("${REPO}/test/lib_aec/test_delay_estimator") {
+              viewEnv() {
+                withVenv {
+                  sh 'mkdir -p ./input_wavs/'
+                  sh 'mkdir -p ./output_files/'
+                  sh "pytest -n 2 --junitxml=pytest_result.xml"
+                  junit "pytest_result.xml"
+                  runPython("python print_stats.py")
+                }
+              }
+            }
+          }
+        }
+        stage('AEC test_aec_profile') {
+          steps {
+            dir("${REPO}/test/lib_aec/test_aec_profile") {
+              viewEnv() {
+                withVenv {
+                  sh "pytest -n 1 --junitxml=pytest_result.xml"
+                  junit "pytest_result.xml"
+                }
+              }
+            }
+          }
+        }
+        stage('AEC aec_unit_tests') {
+          steps {
+            dir("${REPO}/test/lib_aec/aec_unit_tests") {
+              viewEnv() {
+                withVenv {
+                  sh "pytest -n 2 --junitxml=pytest_result.xml"
+                  junit "pytest_result.xml"
+                }
+              }
+            }
+          }
+        }
+        stage('AEC test_aec_spec') {
+          steps {
+            dir("${REPO}/test/lib_aec/test_aec_spec") {
+              viewEnv {
+                withVenv {
+                  sh "./make_dirs.sh"
+                  script {
+                    if (env.FULL_TEST == "0") {
+                      sh 'mv excluded_tests_quick.txt excluded_tests.txt'
+                    }
+                  }
+                  sh "python generate_audio.py"
+                  sh "pytest -n 2 --junitxml=pytest_result.xml test_process_audio.py"
+                  sh "cp pytest_result.xml results_process.xml"
+                  catchError {
+                    sh "pytest --junitxml=pytest_result.xml test_check_output.py"
+                  }
+                  sh "cp pytest_result.xml results_check.xml"
+                  sh "python parse_results.py"
+                  sh "pytest --junitxml=pytest_results.xml test_evaluate_results.py"
+                  sh "cp pytest_result.xml results_final.xml"
+                  junit "results_final.xml"
+                }
+              }
+            }
+          }
+        }
+        stage('AGC tests') {
+          steps {
+            dir("${REPO}/test/lib_agc/test_process_frame") {
+              viewEnv() {
+                withVenv {
+                  sh "pytest -n 2 --junitxml=pytest_result.xml"
+                  junit "pytest_result.xml"
+                }
+              }
+            }
+          }
+        }
+      }//stages
+      post {
+        always {
+          archiveArtifacts artifacts: "${REPO}/build/**/*", fingerprint: true
+          archiveArtifacts artifacts: "${REPO}/test/lib_aec/test_aec_profile/**/aec_prof*.log", fingerprint: true
+          archiveArtifacts artifacts: "${REPO}/test/lib_aec/test_aec_profile/**/profile_index_to_tag_mapping.log", fingerprint: true
+        }
+        cleanup {
+          cleanWs()
+        }
+      }
+    }//stage xcore.ai Verification
+    stage('Update view files') {
+      agent {
+        label 'x86_64&&brew'
+      }
+      when {
+        expression { return currentBuild.currentResult == "SUCCESS" }
+      }
+      steps {
+        updateViewfiles()
+      }
+    }
+  }
+}
diff --git a/applications/avona/filesystem_support/create_fs.bat b/applications/avona/filesystem_support/create_fs.bat
index f454ac280..9896dfc38 100644
--- a/applications/avona/filesystem_support/create_fs.bat
+++ b/applications/avona/filesystem_support/create_fs.bat
@@ -4,33 +4,25 @@
 
 @echo off
 
-:: Check for fat.fs already existing
-if exist "%~dp0\fat.fs" (
+:: Create directory for intended files and Copy renamed files into directory
+if exist "%temp%\fatmktmp\" (
     :: Exit with error
     echo.
-    echo fat.fs already exists!
+    echo fatmktmp\ directory already exists at %temp%
+    echo Please delete and retry.
     pause
 ) else (
-    :: Create directory for intended files and Copy renamed files into directory
-    if exist "%temp%\fatmktmp\" (
-        :: Exit with error
-        echo.
-        echo fatmktmp\ directory already exists at %temp%
-        echo Please delete and retry.
-        pause
-    ) else (
-        mkdir %temp%\fatmktmp
-        cp "%WW_PATH%\models\common\WR_250k.en-US.alexa.bin" %temp%\fatmktmp\250kenUS.bin
-        cp "%WW_PATH%\models\common\WS_50k.en-US.alexa.bin" %temp%\fatmktmp\50kenUS.bin
+    mkdir %temp%\fatmktmp\ww\
+    cp "%WW_PATH%\models\common\WR_250k.en-US.alexa.bin" %temp%\fatmktmp\ww\250kenUS.bin
+    cp "%WW_PATH%\models\common\WS_50k.en-US.alexa.bin" %temp%\fatmktmp\ww\50kenUS.bin
 
-        :: Run fatfs_mkimage.exe on the directory to create filesystem file
-        start ..\..\host\fatfs\fatfs_mkimage.exe --input=%temp%\fatmktmp --output=fat.fs
+    :: Run fatfs_mkimage.exe on the directory to create filesystem file
+    start ..\..\host\fatfs\fatfs_mkimage.exe --input=%temp%\fatmktmp\ww --output=fat.fs
 
-        echo Filesystem created. Deleting temp files . . .
-        :: File fat.fs is also deleted in cleanup without this:
-        sleep 1
+    echo Filesystem created. Deleting temp files . . .
+    :: File fat.fs is also deleted in cleanup without this:
+    sleep 1
 
-        :: Cleanup
-        rm -rf %temp%\fatmktmp
-    )
-)
\ No newline at end of file
+    :: Cleanup
+    rm -rf %temp%\fatmktmp
+)
diff --git a/applications/avona/filesystem_support/create_fs.sh b/applications/avona/filesystem_support/create_fs.sh
index 687e97e60..b5a36113a 100755
--- a/applications/avona/filesystem_support/create_fs.sh
+++ b/applications/avona/filesystem_support/create_fs.sh
@@ -1,16 +1,14 @@
 #!/bin/sh
 
-if [ -e fat.fs ] ; then
-    echo "fat.fs already exists!"
-else
-    # Create directory for intended files and Copy renamed files into directory
-    tmp_dir=$(mktemp -d)
-    cp "$WW_PATH/models/common/WR_250k.en-US.alexa.bin" $tmp_dir/250kenUS.bin
-    cp "$WW_PATH/models/common/WS_50k.en-US.alexa.bin" $tmp_dir/50kenUS.bin
+# Create directory for intended files and Copy renamed files into directory
+tmp_dir=$(mktemp -d)
+ww_dir=$tmp_dir/ww
+mkdir -p $ww_dir
+cp "$WW_PATH/models/common/WR_250k.en-US.alexa.bin" $ww_dir/250kenUS.bin
+cp "$WW_PATH/models/common/WS_50k.en-US.alexa.bin" $ww_dir/50kenUS.bin
 
-    # Create env var for path to fatfs_mkimage?
-    FATFS_MKIMAGE_PATH=../../host/fatfs
+# Create env var for path to fatfs_mkimage?
+FATFS_MKIMAGE_PATH=../../host/fatfs
 
-    # Run fatfs_mkimage.exe on the directory to create filesystem file
-    $FATFS_MKIMAGE_PATH/fatfs_mkimage --input=$tmp_dir --output=fat.fs
-fi
+# Run fatfs_mkimage.exe on the directory to create filesystem file
+$FATFS_MKIMAGE_PATH/fatfs_mkimage --input=$tmp_dir --output=fat.fs
diff --git a/doc/Doxyfile.inc b/doc/Doxyfile.inc
new file mode 100644
index 000000000..11ca6e379
--- /dev/null
+++ b/doc/Doxyfile.inc
@@ -0,0 +1,17 @@
+# This file provides overrides to the Doxyfile configuration
+
+PROJECT_NAME = Avona
+PROJECT_BRIEF = "Avona Reference Design"
+
+# AEC module
+INPUT += ../modules/lib_aec/api ../examples/bare-metal/shared_src/aec/
+
+# AGC module
+INPUT += ../modules/lib_agc/api
+
+USE_MATHJAX            = YES
+MATHJAX_FORMAT         = HTML-CSS
+MATHJAX_RELPATH        = https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/
+
+# Make short MATHJAX sequences prettier
+ALIASES += math{1}="@f$\1@f$"
diff --git a/doc/README.rst b/doc/README.rst
index a50a81783..860401595 100644
--- a/doc/README.rst
+++ b/doc/README.rst
@@ -1,21 +1,13 @@
-######################
-Building Documentation
-######################
-
-Instructions are given below to build the documentation.  The recommended method is using Docker, 
-however, alternative instructions are provided in case using Docker in not an option.
-
-To develop the content of this repository, it is recommended to launch a `sphinx-autobuild`
-server as per the instructions below. Once started, point a web-browser at
-http://127.0.0.1:8000. If running the server within a VM, remember to configure
-port forwarding.
+####################
+Documentation Source
+####################
 
-You can now edit the .rst documentation, and your web-browser content will automatically
-update.
+This folder contains source files for the **Avona Reference Design** documentation.  The sources do not render well in GitHub or an RST viewer.  In addition, some information 
+is not visible at all and some links will not be functional.
 
-************
-Using Docker
-************
+**********************
+Building Documentation
+**********************
 
 =============
 Prerequisites
@@ -33,52 +25,44 @@ Pull the docker container:
 Building
 ========
 
-Build documentation:
+To build the documentation, run the following command in the root of the repository:
 
 .. code-block:: console
 
-    $ docker run --rm -t -v $(pwd):/build -u "$(id -u):$(id -g)" -e REPO:/build ghcr.io/xmos/doc_builder:main
-
-********************
-Without Using Docker
-********************
-
-=============
-Prerequisites
-=============
-
-Install `Doxygen <https://www.doxygen.nl/index.html>`_.
+    $ docker run --rm -t -u "$(id -u):$(id -g)" -v $(pwd):/build -e REPO:/build -e DOXYGEN_INCLUDE=/build/doc/Doxyfile.inc -e EXCLUDE_PATTERNS=/build/doc/exclude_patterns.inc -e DOXYGEN_INPUT=ignore ghcr.io/xmos/doc_builder:main
 
-Install the required Python packages:
+HTML document output is saved in the ``doc/_build/latest/html`` folder.  Open ``index.html`` to preview the saved documentation.
 
-.. code-block:: console
+**********************
+Adding a New Component
+**********************
 
-    $ pip install -r requirements.txt
+Follow the following steps to add a new component.
 
-========
-Building
-========
+- Add an entry for the new component's top-level document to the appropriate TOC in the documents tree.
+- If the new component uses `Doxygen`, append the appropriate path(s) to the INPUT variable in `Doxyfile.inc`.
+- If the new component includes `.rst` files that should **not** be part of the documentation build, append the appropriate pattern(s) to `exclude_patterns.inc`.
 
-Build documentation:
+***
+FAQ
+***
 
-.. code-block:: console
+Q: Is it possible to build just a subset of the documentation?
 
-    $ make html
+A: Yes, however it is not recommended at this time.
 
-Launch sphinx-autobuild server:
+Q: Is it possible to used the ``livehtml`` feature of Sphynx?
 
-.. code-block:: console
+A: No, but ``livehtml`` support may be added to the XMOS ``doc_builder`` Docker container in the future.
 
-    $ make livehtml
+Q: Where can I learn more about the XMOS ``doc_builder`` Docker container?
 
-Clean documentation:
+A: See the https://github.com/xmos/doc_builder repository.  See the ``doc_builder`` repository README for details on additional build options.  
 
-.. code-block:: console
+Q: How do I suggest enhancements to the XMOS ``doc_builder`` Docker container?
 
-    $ make clean
+A: Create a new issue here: https://github.com/xmos/doc_builder/issues
 
-Clean and build documentation with link check:
+Q: I don't need to run the link checking, can I disable that to make the build faster?
 
-.. code-block:: console
-    
-    $ make clean html linkcheck SPHINXOPTS="-W --keep-going"
+A: Yes, add ``-e SKIP_LINK=1`` to the ``docker run`` command line above.
diff --git a/doc/exclude_patterns.inc b/doc/exclude_patterns.inc
new file mode 100644
index 000000000..bc2a9a5bc
--- /dev/null
+++ b/doc/exclude_patterns.inc
@@ -0,0 +1,4 @@
+# The following patterns are to be excluded from the documentation build
+doc/README.rst
+tools
+test
diff --git a/doc/getting_started.rst b/doc/getting_started.rst
new file mode 100644
index 000000000..a3ce08b38
--- /dev/null
+++ b/doc/getting_started.rst
@@ -0,0 +1,58 @@
+###########
+QUICK START
+###########
+
+Requirements
+------------
+
+* XTC Tools 15.0.6 or higher
+* A clone of the `xcore_sdk <https://github.com/xmos/xcore_sdk/>`_, with its submodules initialised
+* CMake 3.18 or higher
+* Python 3.7 or higher
+
+
+Building
+--------
+
+The following instructions show how to build Avona and run one of the example applications. This
+procedure is currently supported on MacOS and Linux only.
+
+#. Enter the clone of Avona and initialise submodules
+     .. code-block:: console
+
+       cd sw_avona
+       git submodule update --init
+
+#. Set the ``XCORE_SDK_PATH`` with the location of the clone of xcore_sdk
+     .. code-block:: console
+
+       export XCORE_SDK_PATH=/home/username/xcore_sdk
+
+#. Create a build directory
+     .. code-block:: console
+
+       mkdir build
+       cd build
+
+#. Run cmake to setup the build environment for the XMOS toolchain
+     .. code-block:: console
+
+       cmake -S.. -DCMAKE_TOOLCHAIN_FILE=../etc/xmos_toolchain.cmake
+
+#. Running make will then build the Avona libraries and example applications
+     .. code-block:: console
+
+       make
+
+#. Install dependencies
+     .. code-block:: console
+
+       cd ../examples/bare-metal/aec_1_thread
+       pip install -e ../shared_src/xscope_fileio
+
+#. Run the single-threaded AEC example
+     .. code-block:: console
+
+       python ../shared_src/python/run_xcoreai.py ../../../build/examples/bare-metal/aec_1_thread/bin/aec_1_thread.xe --input ../shared_src/test_streams/aec_example_input.wav
+
+See :ref:`examples` for full details about the example applications.
diff --git a/doc/quick_start.rst b/doc/quick_start.rst
deleted file mode 100644
index 2338194a5..000000000
--- a/doc/quick_start.rst
+++ /dev/null
@@ -1,5 +0,0 @@
-###########
-Quick Start
-###########
-
-TODO: No documentation yet
\ No newline at end of file
diff --git a/doc/requirements.txt b/doc/requirements.txt
deleted file mode 100644
index dff4bd6e7..000000000
--- a/doc/requirements.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-breathe
-furo
-Sphinx
-sphinx-copybutton
-sphinx-inline-tabs
-sphinx-autobuild
diff --git a/doc/substitutions.rst b/doc/substitutions.rst
new file mode 100644
index 000000000..a06ed94e8
--- /dev/null
+++ b/doc/substitutions.rst
@@ -0,0 +1,2 @@
+.. |I2C| replace:: I\ :sup:`2`\ C
+.. |I2S| replace:: I\ :sup:`2`\ S
diff --git a/doc/user_guide/audio_processing/index.rst b/doc/user_guide/audio_processing/index.rst
new file mode 100644
index 000000000..046781ea6
--- /dev/null
+++ b/doc/user_guide/audio_processing/index.rst
@@ -0,0 +1,22 @@
+################
+Audio Processing
+################
+
+At the core of the Avona Reference Design are high-performance audio processing algorithms.  The algorithms are connected in a pipeline that takes its input from a pair of the microphone and executes a series of signal processing algorithms to extract a voice signal from a complex soundscape. The audio pipeline can accept a reference signal from a host system which is used to perform Acoustic Echo Cancellation (AEC) to remove audio being played by the host. The audio pipeline provides two different output channels - one that is optimized for Automatic Speech Recognition systems and the other for voice communications.
+
+A flexible audio signal routing infrastructure and a range of digital inputs and outputs enables the Avona Reference Design to be integrated into a wide range of system configurations, that can be configured at start up and during operation through a set of control registers.  In addition, all source code is provided to allow for full customization or the addition of other audio processing algorithms.
+
+AUDIO FEATURES
+
+.. toctree:: 
+   :maxdepth: 1
+
+   ../../../modules/lib_aec/doc/index
+   ../../../modules/lib_agc/doc/index
+
+EXAMPLES
+
+.. toctree:: 
+   :maxdepth: 1
+
+   ../../../examples/bare-metal/doc/index
diff --git a/doc/user_guide/index.rst b/doc/user_guide/index.rst
new file mode 100644
index 000000000..b84ea2625
--- /dev/null
+++ b/doc/user_guide/index.rst
@@ -0,0 +1,12 @@
+##########
+USER GUIDE
+##########
+
+The XMOS Avona Reference Design User Guide is written for system architects and engineers designing Far-field voice systems using the xcore.ai processor. The document describes typical usage models, the processor architecture, key feature operation, and interface definitions. In conjunction with the product datasheet, these two documents provide all the information required for system design, from concept to production testing and verification.
+
+It is expected that this document is read in conjunction with the relevant datasheet and that the user is familiar with basic voice processing terminology.
+
+.. toctree::
+   :maxdepth: 1
+
+   audio_processing/index
\ No newline at end of file
diff --git a/etc/build_options.cmake b/etc/build_options.cmake
new file mode 100644
index 000000000..0c24004ef
--- /dev/null
+++ b/etc/build_options.cmake
@@ -0,0 +1,23 @@
+
+
+include(CMakeDependentOption)
+
+
+
+#### BUILD OPTIONS ####  
+
+## If enabled, the unit tests will be added as build targets
+set(BUILD_TESTS    OFF CACHE BOOL "Include tests as CMake targets." )
+set(BUILD_EXAMPLES ON CACHE BOOL "Include examples as CMake targets." )
+set( TEST_WAV_AEC_BUILD_CONFIG "2 2 2 10 5" CACHE STRING "AEC build configuration for test_wav_aec in <threads> <ychannels> <xchannels> <num_main_phases> <num_shadow_phases> format" )
+set( TEST_AEC_ENHANCEMENTS_BUILD_CONFIG "2 2 2 10 5" CACHE STRING "AEC build configuration for test_aec_enhancements in <threads> <ychannels> <xchannels> <num_main_phases> <num_shadow_phases> format" )
+set( TEST_DELAY_ESTIMATOR_BUILD_CONFIG "2 2 2 10 5" CACHE STRING "AEC build configuration for test_delay_estimator in <threads> <ychannels> <xchannels> <num_main_phases> <num_shadow_phases> format" )
+set( TEST_AEC_SPEC_BUILD_CONFIG "2 1 1 20 10" CACHE STRING "AEC build configuration for test_aec_spec in <threads> <ychannels> <xchannels> <num_main_phases> <num_shadow_phases> format" )
+set( TEST_AEC_PROFILE_BUILD_CONFIG "2 2 2 10 5" "1 2 2 10 5" CACHE STRING "AEC build configurations for test_aec_profile in <ychannels> <xchannels> <num_main_phases> <num_shadow_phases> format" )
+set( AEC_UNIT_TESTS_BUILD_CONFIG "2 2 2 10 5" CACHE STRING "AEC build configuration for aec_unit_tests in <threads> <ychannels> <xchannels> <num_main_phases> <num_shadow_phases> format" )
+set( TEST_SPEEDUP_FACTOR "1" CACHE STRING "Test speedup factor." )
+
+#### PRINT OPTIONS ####
+
+message(STATUS "BUILD_TESTS:    ${BUILD_TESTS}")
+
diff --git a/etc/fetch_deps.cmake b/etc/fetch_deps.cmake
new file mode 100644
index 000000000..cf2d61aaa
--- /dev/null
+++ b/etc/fetch_deps.cmake
@@ -0,0 +1,56 @@
+## fetch dependencies
+include(FetchContent)
+
+if ( ${BUILD_TESTS} )
+  FetchContent_Declare(
+    xcore_sdk
+    GIT_REPOSITORY      git@github.com:xmos/xcore_sdk
+    GIT_TAG             origin/develop
+    GIT_SUBMODULES      modules/lib_xs3_math modules/lib_dsp
+    GIT_SHALLOW         TRUE
+    UPDATE_DISCONNECTED TRUE
+    SOURCE_DIR          ${CMAKE_BINARY_DIR}/deps/xcore_sdk
+  )
+  FetchContent_Populate(xcore_sdk)
+
+  FetchContent_Declare(
+    audio_test_tools
+    GIT_REPOSITORY      git@github.com:xmos/audio_test_tools
+    GIT_TAG             v4.5.1
+    GIT_SHALLOW         TRUE
+    UPDATE_DISCONNECTED TRUE
+    SOURCE_DIR          ${CMAKE_BINARY_DIR}/deps/audio_test_tools
+  )
+  FetchContent_Populate(audio_test_tools)
+
+  FetchContent_Declare(
+    unity
+    GIT_REPOSITORY      git@github.com:xmos/Unity
+    GIT_TAG             origin/develop
+    GIT_SHALLOW         TRUE
+    UPDATE_DISCONNECTED TRUE
+    SOURCE_DIR          ${CMAKE_BINARY_DIR}/deps/Unity
+  )
+  FetchContent_Populate(unity)
+
+  FetchContent_Declare(
+    xscope_fileio
+    GIT_REPOSITORY      git@github.com:xmos/xscope_fileio
+    GIT_TAG             v0.3.2
+    GIT_SHALLOW         TRUE
+    UPDATE_DISCONNECTED TRUE
+    SOURCE_DIR          ${CMAKE_BINARY_DIR}/deps/xscope_fileio
+  )
+  FetchContent_Populate(xscope_fileio)
+else()
+  FetchContent_Declare(
+    xcore_sdk
+    GIT_REPOSITORY      git@github.com:xmos/xcore_sdk
+    GIT_TAG             origin/develop
+    GIT_SUBMODULES      modules/lib_xs3_math
+    GIT_SHALLOW         TRUE
+    UPDATE_DISCONNECTED TRUE
+    SOURCE_DIR          ${CMAKE_BINARY_DIR}/deps/xcore_sdk
+  )
+  FetchContent_Populate(xcore_sdk)
+endif()
diff --git a/etc/xc/CMakeDetermineXCCompiler.cmake b/etc/xc/CMakeDetermineXCCompiler.cmake
new file mode 100644
index 000000000..09ceb8eef
--- /dev/null
+++ b/etc/xc/CMakeDetermineXCCompiler.cmake
@@ -0,0 +1,16 @@
+if(DEFINED XMOS_TOOLS_PATH)
+    set(CMAKE_XC_COMPILER "${XMOS_TOOLS_PATH}/xcc")
+else()
+    set(CMAKE_XC_COMPILER  "$ENV{XMOS_TOOL_PATH}/bin/xcc")
+endif()
+mark_as_advanced(CMAKE_XC_COMPILER)
+
+set(CMAKE_XC_SOURCE_FILE_EXTENSIONS xc;XC CACHE INTERNAL "")
+set(CMAKE_XC_OUTPUT_EXTENSION .obj CACHE INTERNAL "")
+set(CMAKE_XC_COMPILER_ENV_VAR "XC" CACHE INTERNAL "")
+set(CMAKE_XC_LINKER_PREFERENCE 99 CACHE INTERNAL "")
+set(CMAKE_XC_LINKER_PREFERENCE_PROPAGATES 1 CACHE INTERNAL "")
+
+# Configure variables set in this file for fast reload later on
+configure_file(${CMAKE_CURRENT_LIST_DIR}/CMakeXCCompiler.cmake.in
+               ${CMAKE_PLATFORM_INFO_DIR}/CMakeXCCompiler.cmake)
diff --git a/etc/xc/CMakeTestXCCompiler.cmake b/etc/xc/CMakeTestXCCompiler.cmake
new file mode 100644
index 000000000..85d759342
--- /dev/null
+++ b/etc/xc/CMakeTestXCCompiler.cmake
@@ -0,0 +1,6 @@
+if(CMAKE_XC_COMPILER_FORCED)
+  # The compiler configuration was forced by the user.
+  # Assume the user has configured all compiler information.
+  set(CMAKE_XC_COMPILER_WORKS TRUE CACHE INTERNAL "")
+  return()
+endif()
diff --git a/etc/xc/CMakeXCCompiler.cmake.in b/etc/xc/CMakeXCCompiler.cmake.in
new file mode 100644
index 000000000..b2345607b
--- /dev/null
+++ b/etc/xc/CMakeXCCompiler.cmake.in
@@ -0,0 +1,7 @@
+set(CMAKE_XC_COMPILER "@CMAKE_XC_COMPILER@")
+set(CMAKE_XC_COMPILER_LOADED 1)
+set(CMAKE_XC_SOURCE_FILE_EXTENSIONS @CMAKE_XC_SOURCE_FILE_EXTENSIONS@)
+set(CMAKE_XC_OUTPUT_EXTENSION @CMAKE_XC_OUTPUT_EXTENSION@)
+set(CMAKE_XC_COMPILER_ENV_VAR "@CMAKE_XC_COMPILER_ENV_VAR@")
+
+set(CMAKE_LINKER "@CMAKE_LINKER@")
diff --git a/etc/xc/CMakeXCInformation.cmake b/etc/xc/CMakeXCInformation.cmake
new file mode 100644
index 000000000..fca5f9a8d
--- /dev/null
+++ b/etc/xc/CMakeXCInformation.cmake
@@ -0,0 +1,36 @@
+include(CMakeLanguageInformation)
+include(CMakeCommonLanguageInclude)
+
+if(NOT CMAKE_INCLUDE_FLAG_XC)
+  set(CMAKE_INCLUDE_FLAG_XC ${CMAKE_INCLUDE_FLAG_C})
+endif()
+
+set(CMAKE_XC_FLAGS_INIT "$ENV{XCFLAGS} ${CMAKE_XC_FLAGS_INIT}")
+
+# Create a static archive incrementally for large object file counts.
+# If CMAKE_CXX_CREATE_STATIC_LIBRARY is set it will override these.
+if(NOT DEFINED CMAKE_XC_ARCHIVE_CREATE)
+  set(CMAKE_XC_ARCHIVE_CREATE "<CMAKE_AR> qc <TARGET> <LINK_FLAGS> <OBJECTS>")
+endif()
+if(NOT DEFINED CMAKE_XC_ARCHIVE_APPEND)
+  set(CMAKE_XC_ARCHIVE_APPEND "<CMAKE_AR> q <TARGET> <LINK_FLAGS> <OBJECTS>")
+endif()
+if(NOT DEFINED CMAKE_XC_ARCHIVE_FINISH)
+  set(CMAKE_XC_ARCHIVE_FINISH "<CMAKE_RANLIB> <TARGET>")
+endif()
+
+# Compile an XC file into an object file
+if(NOT CMAKE_XC_COMPILE_OBJECT)
+  set(CMAKE_XC_COMPILE_OBJECT
+    "<CMAKE_XC_COMPILER> <DEFINES> <INCLUDES> <FLAGS> -o <OBJECT> -x xc -c <SOURCE>")
+endif()
+
+if(NOT CMAKE_XC_LINK_EXECUTABLE)
+  set(CMAKE_XC_LINK_EXECUTABLE
+    "<CMAKE_XC_COMPILER> <FLAGS> <CMAKE_XC_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>")
+endif()
+
+set(CMAKE_XC_CREATE_SHARED_LIBRARY "<CMAKE_AR> <TARGET> <LINK_FLAGS> <OBJECTS>")
+set(CMAKE_XC_CREATE_SHARED_MODULE "XC_NO_CREATE_SHARED_MODULE")
+
+set(CMAKE_XC_INFORMATION_LOADED 1)
diff --git a/etc/xmos_toolchain.cmake b/etc/xmos_toolchain.cmake
new file mode 100644
index 000000000..d318e6f15
--- /dev/null
+++ b/etc/xmos_toolchain.cmake
@@ -0,0 +1,42 @@
+set(CMAKE_SYSTEM_NAME XCORE)
+set(CMAKE_SYSTEM_VERSION 0.0.1)
+
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/etc/xc/")
+
+if(DEFINED XMOS_TOOL_PATH)
+    set(CMAKE_C_COMPILER "${XMOS_TOOL_PATH}/xcc")
+    set(CMAKE_XC_COMPILER "${XMOS_TOOL_PATH}/xcc")
+    set(CMAKE_CXX_COMPILER  "${XMOS_TOOL_PATH}/xcc")
+    set(CMAKE_ASM_COMPILER  "${XMOS_TOOL_PATH}/xcc")
+    set(CMAKE_AR "${XMOS_TOOL_PATH}/xmosar" CACHE FILEPATH "Archiver")
+    set(CMAKE_C_COMPILER_AR "${XMOS_TOOL_PATH}/xmosar")
+    set(CMAKE_XC_COMPILER_AR "${XMOS_TOOL_PATH}/xmosar")
+    set(CMAKE_CXX_COMPILER_AR "${XMOS_TOOL_PATH}/xmosar")
+    set(CMAKE_ASM_COMPILER_AR "${XMOS_TOOL_PATH}/xmosar")
+else()
+    # message(WARNING "${COLOR_YELLOW}XMOS_TOOL_PATH not specified.  CMake will assume tools have been added to PATH.${COLOR_RESET}")
+    set(CMAKE_C_COMPILER "xcc")
+    set(CMAKE_XC_COMPILER  "xcc")
+    set(CMAKE_CXX_COMPILER  "xcc")
+    set(CMAKE_ASM_COMPILER  "xcc")
+    set(CMAKE_AR "xmosar" CACHE FILEPATH "Archiver") # has to be cached in windows
+    set(CMAKE_C_COMPILER_AR "xmosar")
+    set(CMAKE_XC_COMPILER_AR "xmosar")
+    set(CMAKE_CXX_COMPILER_AR "xmosar")
+    set(CMAKE_ASM_COMPILER_AR "xmosar")
+endif()
+
+set(CMAKE_RANLIB "")
+set(CMAKE_C_COMPILER_FORCED TRUE)
+set(CMAKE_XC_COMPILER_FORCED TRUE)
+set(CMAKE_CXX_COMPILER_FORCED TRUE)
+set(CMAKE_ASM_COMPILER_FORCED TRUE)
+
+set( XCORE ON CACHE BOOL "Building for xCore" )
+
+if( NOT ( DEFINED XCORE_TARGET ) )
+  set( XCORE_TARGET "XCORE-AI-EXPLORER" CACHE STRING "xCore hardware target" )
+endif()
+
+message(STATUS "XCORE_TARGET is ${XCORE_TARGET}" )
+
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
new file mode 100644
index 000000000..09f6358d9
--- /dev/null
+++ b/examples/CMakeLists.txt
@@ -0,0 +1,4 @@
+set( SHARED_SRC_PATH ${CMAKE_SOURCE_DIR}/examples/bare-metal/shared_src )
+set( XSCOPE_FILEIO_PATH ${SHARED_SRC_PATH}/xscope_fileio/xscope_fileio )
+
+add_subdirectory( bare-metal ) 
diff --git a/examples/bare-metal/CMakeLists.txt b/examples/bare-metal/CMakeLists.txt
new file mode 100644
index 000000000..665ce975e
--- /dev/null
+++ b/examples/bare-metal/CMakeLists.txt
@@ -0,0 +1,15 @@
+
+set(DEPS_ROOT ${CMAKE_SOURCE_DIR}/..)
+
+add_subdirectory( aec_1_thread )
+
+add_subdirectory( pipeline_single_threaded )
+
+## AEC 2 thread example builds only on XCORE
+if( XCORE )
+add_subdirectory( aec_2_threads )
+
+add_subdirectory( pipeline_multi_threaded )
+endif()
+
+add_subdirectory( agc )
diff --git a/examples/bare-metal/aec_1_thread/CMakeLists.txt b/examples/bare-metal/aec_1_thread/CMakeLists.txt
new file mode 100644
index 000000000..5be09ff4d
--- /dev/null
+++ b/examples/bare-metal/aec_1_thread/CMakeLists.txt
@@ -0,0 +1,88 @@
+## App name
+set( APP_NAME  aec_1_thread )
+
+if( NOT ${Python3_FOUND} )
+  message(FATAL_ERROR "Python3 not found for running . ") 
+endif()
+
+## Depends on libraries
+list( APPEND  DEP_LIBS_XCORE  ""  )
+list( APPEND  DEP_LIBS_Linux  m   )
+
+list( APPEND  DEP_LIBS        
+    lib_aec 
+    ${DEP_LIBS_${CMAKE_SYSTEM_NAME}}
+)
+
+## Sources
+file( GLOB_RECURSE SOURCES_C  src/*.c )
+file( GLOB SOURCES_SHARED_FILEIO ${SHARED_SRC_PATH}/file_utils/*.c )
+file( GLOB SOURCES_SHARED_AEC ${SHARED_SRC_PATH}/aec/aec_process_frame_1thread.c )
+unset( SOURCES_SHARED )
+list( APPEND SOURCES_SHARED ${SOURCES_SHARED_FILEIO} ${SOURCES_SHARED_AEC} )
+file( GLOB SOURCES_XC src/*.xc )
+file( GLOB_RECURSE XSCOPE_FILEIO_SOURCES  ${XSCOPE_FILEIO_PATH}/src/*.c )
+
+unset( SOURCES )
+list( APPEND  SOURCES ${SOURCES_C} ${SOURCES_SHARED})
+unset( SOURCES_XCORE )
+list( APPEND SOURCES_XCORE ${SOURCES_XC} ${XSCOPE_FILEIO_SOURCES} )
+list( APPEND SOURCES ${SOURCES_${CMAKE_SYSTEM_NAME}} )
+
+## Includes
+unset( INCLUDES )
+list( APPEND  INCLUDES src ${SHARED_SRC_PATH}/file_utils ${SHARED_SRC_PATH}/aec )
+unset( INCLUDES_XCORE )
+list( APPEND INCLUDES_XCORE ${XSCOPE_FILEIO_PATH} ${XSCOPE_FILEIO_PATH}/api )
+list( APPEND INCLUDES ${INCLUDES_${CMAKE_SYSTEM_NAME}} )
+
+# set( XSCOPE_CONFIG config.xscope )
+get_filename_component(XSCOPE_CONFIG config.xscope ABSOLUTE)
+
+## Compile flags
+unset(COMPILE_FLAGS)
+unset(COMPILE_FLAGS_XCORE)
+
+list(APPEND   COMPILE_FLAGS_XCORE  -DTEST_WAV_XSCOPE=1 )
+
+unset(COMPILE_FLAGS_Linux)
+list( APPEND  COMPILE_FLAGS_Linux   ""  )
+
+
+##Linker flags
+unset(LINKER_FLAGS)
+list( APPEND  LINKER_FLAGS  "" )
+
+unset(LINKER_FLAGS_XCORE)
+list( APPEND  LINKER_FLAGS_XCORE  "-target=${XCORE_TARGET}"     )
+list( APPEND  LINKER_FLAGS_XCORE  "-report"                     )
+list( APPEND  LINKER_FLAGS_XCORE  "${XSCOPE_CONFIG}"            )
+
+unset(LINKER_FLAGS_Linux)
+list( APPEND  LINKER_FLAGS_Linux  "" )
+
+list( APPEND  LINKER_FLAGS ${LINKER_FLAGS_${CMAKE_SYSTEM_NAME}} )
+list( APPEND  COMPILE_FLAGS ${COMPILE_FLAGS_${CMAKE_SYSTEM_NAME}} ${LINKER_FLAGS_${CMAKE_SYSTEM_NAME}} )
+
+#########
+## executable output directory
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
+
+add_executable( ${APP_NAME} ${SOURCES} ${SOURCES_C_APP} )
+
+target_include_directories( ${APP_NAME} PRIVATE ${INCLUDES} )
+
+target_link_libraries( ${APP_NAME} ${DEP_LIBS})
+
+target_compile_options( ${APP_NAME} PRIVATE ${COMPILE_FLAGS} )
+
+#(because otherwise the set_target_properties command fails)
+string(REPLACE ";" " " LINKER_FLAGS_STR "${LINKER_FLAGS}")
+set_target_properties( ${APP_NAME} PROPERTIES LINK_FLAGS "${LINKER_FLAGS_STR}" )
+
+if ( XCORE )
+  set_target_properties( ${APP_NAME} PROPERTIES
+      SUFFIX ".xe"
+      LINK_DEPENDS  ${XSCOPE_CONFIG}
+      )
+endif()
diff --git a/examples/bare-metal/aec_1_thread/config.xscope b/examples/bare-metal/aec_1_thread/config.xscope
new file mode 100644
index 000000000..0d3b65e4c
--- /dev/null
+++ b/examples/bare-metal/aec_1_thread/config.xscope
@@ -0,0 +1,10 @@
+<xSCOPEconfig ioMode="basic" enabled="true">
+  <Probe name="open_file" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="read_bytes" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="write_setup" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="write_bytes" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="seek" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="tell" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="host_quit" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+</xSCOPEconfig>
+
diff --git a/examples/bare-metal/aec_1_thread/src/aec_app.c b/examples/bare-metal/aec_1_thread/src/aec_app.c
new file mode 100644
index 000000000..26dcce56b
--- /dev/null
+++ b/examples/bare-metal/aec_1_thread/src/aec_app.c
@@ -0,0 +1,136 @@
+// Copyright 2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <limits.h>
+
+#include "aec_api.h"
+
+#include "aec_config.h"
+#include "aec_memory_pool.h"
+#include "fileio.h"
+#include "wav_utils.h"
+
+
+extern void aec_process_frame_1thread(
+        aec_state_t *main_state,
+        aec_state_t *shadow_state,
+        int32_t (*output_main)[AEC_FRAME_ADVANCE],
+        int32_t (*output_shadow)[AEC_FRAME_ADVANCE],
+        const int32_t (*y_data)[AEC_FRAME_ADVANCE],
+        const int32_t (*x_data)[AEC_FRAME_ADVANCE]);
+
+void aec_task(const char *input_file_name, const char *output_file_name) {
+    // Ensure configuration is a subset of the maximum configuration the library supports
+    assert(AEC_MAX_Y_CHANNELS <= AEC_LIB_MAX_Y_CHANNELS);
+    assert(AEC_MAX_X_CHANNELS <= AEC_LIB_MAX_X_CHANNELS);
+    assert((AEC_MAX_Y_CHANNELS * AEC_MAX_X_CHANNELS * AEC_MAIN_FILTER_PHASES) <= (AEC_LIB_MAX_PHASES));
+    assert((AEC_MAX_Y_CHANNELS * AEC_MAX_X_CHANNELS * AEC_SHADOW_FILTER_PHASES) <= (AEC_LIB_MAX_PHASES));
+    
+    file_t input_file, output_file;
+    // Open input wav file containing mic and ref channels of input data
+    int ret = file_open(&input_file, input_file_name, "rb");
+    assert((!ret) && "Failed to open file");
+    // Open output wav file that will contain the AEC output
+    ret = file_open(&output_file, output_file_name, "wb");
+    assert((!ret) && "Failed to open file");
+
+    wav_header input_header_struct, output_header_struct;
+    unsigned input_header_size;
+    if(get_wav_header_details(&input_file, &input_header_struct, &input_header_size) != 0){
+        printf("error in get_wav_header_details()\n");
+        _Exit(1);
+    }
+    file_seek(&input_file, input_header_size, SEEK_SET);
+    // Ensure 32bit wav file
+    if(input_header_struct.bit_depth != 32)
+     {
+         printf("Error: unsupported wav bit depth (%d) for %s file. Only 32 supported\n", input_header_struct.bit_depth, input_file_name);
+         _Exit(1);
+     }
+    // Ensure input wav file contains correct number of channels 
+    if(input_header_struct.num_channels != (AEC_MAX_Y_CHANNELS+AEC_MAX_X_CHANNELS)){
+        printf("Error: wav num channels(%d) does not match aec(%u)\n", input_header_struct.num_channels, (AEC_MAX_Y_CHANNELS+AEC_MAX_X_CHANNELS));
+        _Exit(1);
+    }
+    
+    unsigned frame_count = wav_get_num_frames(&input_header_struct);
+    // Calculate number of frames in the wav file
+    unsigned block_count = frame_count / AEC_FRAME_ADVANCE;
+    wav_form_header(&output_header_struct,
+            input_header_struct.audio_format,
+            AEC_MAX_Y_CHANNELS,
+            input_header_struct.sample_rate,
+            input_header_struct.bit_depth,
+            block_count*AEC_FRAME_ADVANCE);
+
+    file_write(&output_file, (uint8_t*)(&output_header_struct),  WAV_HEADER_BYTES);
+
+    int32_t input_read_buffer[AEC_FRAME_ADVANCE * (AEC_MAX_Y_CHANNELS + AEC_MAX_X_CHANNELS)] = {0}; // Array for storing interleaved input read from wav file
+    int32_t output_write_buffer[AEC_FRAME_ADVANCE * (AEC_MAX_Y_CHANNELS)];
+
+    int32_t DWORD_ALIGNED frame_y[AEC_MAX_Y_CHANNELS][AEC_FRAME_ADVANCE];
+    int32_t DWORD_ALIGNED frame_x[AEC_MAX_X_CHANNELS][AEC_FRAME_ADVANCE];
+
+    unsigned bytes_per_frame = wav_get_num_bytes_per_frame(&input_header_struct);
+
+    // Initialise AEC
+    uint8_t DWORD_ALIGNED aec_memory_pool[sizeof(aec_memory_pool_t)];
+    uint8_t DWORD_ALIGNED aec_shadow_filt_memory_pool[sizeof(aec_shadow_filt_memory_pool_t)]; 
+    aec_state_t DWORD_ALIGNED main_state;
+    aec_state_t DWORD_ALIGNED shadow_state;
+    aec_shared_state_t DWORD_ALIGNED aec_shared_state;
+    
+    aec_init(&main_state, &shadow_state, &aec_shared_state,
+            &aec_memory_pool[0], &aec_shadow_filt_memory_pool[0],
+            AEC_MAX_Y_CHANNELS, AEC_MAX_X_CHANNELS,
+            AEC_MAIN_FILTER_PHASES, AEC_SHADOW_FILTER_PHASES);
+
+    for(unsigned b=0;b<block_count;b++){
+        long input_location =  wav_get_frame_start(&input_header_struct, b * AEC_FRAME_ADVANCE, input_header_size);
+        file_seek (&input_file, input_location, SEEK_SET);
+        file_read (&input_file, (uint8_t*)&input_read_buffer[0], bytes_per_frame* AEC_FRAME_ADVANCE);
+        // Deinterleave and copy y and x samples to their respective buffers
+        for(unsigned f=0; f<AEC_FRAME_ADVANCE; f++){
+            for(unsigned ch=0;ch<AEC_MAX_Y_CHANNELS;ch++){
+                unsigned i =(f * (AEC_MAX_Y_CHANNELS+AEC_MAX_X_CHANNELS)) + ch;
+                frame_y[ch][f] = input_read_buffer[i];
+            }
+            for(unsigned ch=0;ch<AEC_MAX_X_CHANNELS;ch++){
+                unsigned i =(f * (AEC_MAX_Y_CHANNELS+AEC_MAX_X_CHANNELS)) + AEC_MAX_Y_CHANNELS + ch;
+                frame_x[ch][f] = input_read_buffer[i];
+            }
+        }
+        // Call AEC functions to process AEC_FRAME_ADVANCE new samples of data
+        /* Reuse mic data memory for main filter output
+         * Reuse ref data memory for shadow filter output.
+         */
+        aec_process_frame_1thread(&main_state, &shadow_state, frame_y, frame_x, frame_y, frame_x);
+        
+        // Create interleaved output that can be written to wav file
+        for (unsigned ch=0;ch<AEC_MAX_Y_CHANNELS;ch++){
+            for(unsigned i=0;i<AEC_FRAME_ADVANCE;i++){
+                output_write_buffer[i*(AEC_MAX_Y_CHANNELS) + ch] = frame_y[ch][i];
+            }
+        }
+
+        file_write(&output_file, (uint8_t*)(output_write_buffer), output_header_struct.bit_depth/8 * AEC_FRAME_ADVANCE * AEC_MAX_Y_CHANNELS);
+    }
+    file_close(&input_file);
+    file_close(&output_file);
+    shutdown_session();
+}
+
+
+#if X86_BUILD
+int main(int argc, char **argv) {
+    if(argc < 3) {
+        printf("Arguments missing. Expected: <input file name> <output file name>\n");
+        assert(0);
+    }
+    aec_task(argv[1], argv[2]);
+    return 0;
+}
+#endif
diff --git a/examples/bare-metal/aec_1_thread/src/aec_config.h b/examples/bare-metal/aec_1_thread/src/aec_config.h
new file mode 100644
index 000000000..a5313decc
--- /dev/null
+++ b/examples/bare-metal/aec_1_thread/src/aec_config.h
@@ -0,0 +1,9 @@
+#ifndef aec_config_h_
+#define aec_config_h_
+
+#define AEC_MAX_Y_CHANNELS   (2)
+#define AEC_MAX_X_CHANNELS   (2)
+#define AEC_MAIN_FILTER_PHASES    (10)
+#define AEC_SHADOW_FILTER_PHASES    (5)
+
+#endif /* aec_config_h_ */
diff --git a/examples/bare-metal/aec_1_thread/src/main.xc b/examples/bare-metal/aec_1_thread/src/main.xc
new file mode 100644
index 000000000..2eb5358eb
--- /dev/null
+++ b/examples/bare-metal/aec_1_thread/src/main.xc
@@ -0,0 +1,40 @@
+// Copyright 2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <platform.h>
+#include <xs1.h>
+#include <xscope.h>
+#include <stdlib.h>
+#ifdef __XC__
+#define chanend_t chanend
+#else
+#include <xcore/chanend.h>
+#endif
+
+extern "C" {
+#include "xs3_math.h"
+void aec_task(const char *input_file_name, const char *output_file_name);
+#if TEST_WAV_XSCOPE
+    #include "xscope_io_device.h"
+#endif
+}
+
+#define IN_WAV_FILE_NAME    "input.wav"
+#define OUT_WAV_FILE_NAME   "output.wav"
+int main (void)
+{
+  chan xscope_chan;
+  par
+  {
+#if TEST_WAV_XSCOPE
+    xscope_host_data(xscope_chan);
+#endif
+    on tile[0]: {
+#if TEST_WAV_XSCOPE
+        xscope_io_init(xscope_chan);
+#endif 
+        aec_task(IN_WAV_FILE_NAME, OUT_WAV_FILE_NAME);
+        _Exit(0);
+    }
+  }
+  return 0;
+}
diff --git a/examples/bare-metal/aec_2_threads/CMakeLists.txt b/examples/bare-metal/aec_2_threads/CMakeLists.txt
new file mode 100644
index 000000000..a1bf6f399
--- /dev/null
+++ b/examples/bare-metal/aec_2_threads/CMakeLists.txt
@@ -0,0 +1,88 @@
+## App name
+set( APP_NAME  aec_2_threads )
+
+if( NOT ${Python3_FOUND} )
+  message(FATAL_ERROR "Python3 not found for running . ") 
+endif()
+
+## Depends on libraries
+list( APPEND  DEP_LIBS_XCORE  ""  )
+list( APPEND  DEP_LIBS_Linux  m   )
+
+list( APPEND  DEP_LIBS        
+    lib_aec 
+    ${DEP_LIBS_${CMAKE_SYSTEM_NAME}}
+)
+
+## Sources
+file( GLOB_RECURSE SOURCES_C  src/*.c )
+file( GLOB SOURCES_SHARED_FILEIO ${SHARED_SRC_PATH}/file_utils/*.c )
+file( GLOB SOURCES_SHARED_AEC ${SHARED_SRC_PATH}/aec/aec_process_frame_2threads.c )
+unset( SOURCES_SHARED )
+list( APPEND SOURCES_SHARED ${SOURCES_SHARED_FILEIO} ${SOURCES_SHARED_AEC} )
+file( GLOB SOURCES_XC src/*.xc )
+file( GLOB_RECURSE XSCOPE_FILEIO_SOURCES  ${XSCOPE_FILEIO_PATH}/src/*.c )
+
+unset( SOURCES )
+list( APPEND  SOURCES ${SOURCES_C} ${SOURCES_SHARED})
+unset( SOURCES_XCORE )
+list( APPEND SOURCES_XCORE ${SOURCES_XC} ${XSCOPE_FILEIO_SOURCES} )
+list( APPEND SOURCES ${SOURCES_${CMAKE_SYSTEM_NAME}} )
+
+## Includes
+unset( INCLUDES )
+list( APPEND  INCLUDES src ${SHARED_SRC_PATH}/file_utils ${SHARED_SRC_PATH}/aec )
+unset( INCLUDES_XCORE )
+list( APPEND INCLUDES_XCORE ${XSCOPE_FILEIO_PATH} ${XSCOPE_FILEIO_PATH}/api )
+list( APPEND INCLUDES ${INCLUDES_${CMAKE_SYSTEM_NAME}} )
+
+# set( XSCOPE_CONFIG config.xscope )
+get_filename_component(XSCOPE_CONFIG config.xscope ABSOLUTE)
+
+## Compile flags
+unset(COMPILE_FLAGS)
+unset(COMPILE_FLAGS_XCORE)
+
+list(APPEND   COMPILE_FLAGS_XCORE  -DTEST_WAV_XSCOPE=1 )
+
+unset(COMPILE_FLAGS_Linux)
+list( APPEND  COMPILE_FLAGS_Linux   ""  )
+
+
+##Linker flags
+unset(LINKER_FLAGS)
+list( APPEND  LINKER_FLAGS  "" )
+
+unset(LINKER_FLAGS_XCORE)
+list( APPEND  LINKER_FLAGS_XCORE  "-target=${XCORE_TARGET}"     )
+list( APPEND  LINKER_FLAGS_XCORE  "-report"                     )
+list( APPEND  LINKER_FLAGS_XCORE  "${XSCOPE_CONFIG}"            )
+
+unset(LINKER_FLAGS_Linux)
+list( APPEND  LINKER_FLAGS_Linux  "" )
+
+list( APPEND  LINKER_FLAGS ${LINKER_FLAGS_${CMAKE_SYSTEM_NAME}} )
+list( APPEND  COMPILE_FLAGS ${COMPILE_FLAGS_${CMAKE_SYSTEM_NAME}} ${LINKER_FLAGS_${CMAKE_SYSTEM_NAME}} )
+
+#########
+## executable output directory
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
+
+add_executable( ${APP_NAME} ${SOURCES} ${SOURCES_C_APP} )
+
+target_include_directories( ${APP_NAME} PRIVATE ${INCLUDES} )
+
+target_link_libraries( ${APP_NAME} ${DEP_LIBS})
+
+target_compile_options( ${APP_NAME} PRIVATE ${COMPILE_FLAGS} )
+
+#(because otherwise the set_target_properties command fails)
+string(REPLACE ";" " " LINKER_FLAGS_STR "${LINKER_FLAGS}")
+set_target_properties( ${APP_NAME} PROPERTIES LINK_FLAGS "${LINKER_FLAGS_STR}" )
+
+if ( XCORE )
+  set_target_properties( ${APP_NAME} PROPERTIES
+      SUFFIX ".xe"
+      LINK_DEPENDS  ${XSCOPE_CONFIG}
+      )
+endif()
diff --git a/examples/bare-metal/aec_2_threads/config.xscope b/examples/bare-metal/aec_2_threads/config.xscope
new file mode 100644
index 000000000..0d3b65e4c
--- /dev/null
+++ b/examples/bare-metal/aec_2_threads/config.xscope
@@ -0,0 +1,10 @@
+<xSCOPEconfig ioMode="basic" enabled="true">
+  <Probe name="open_file" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="read_bytes" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="write_setup" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="write_bytes" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="seek" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="tell" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="host_quit" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+</xSCOPEconfig>
+
diff --git a/examples/bare-metal/aec_2_threads/src/aec_app.c b/examples/bare-metal/aec_2_threads/src/aec_app.c
new file mode 100644
index 000000000..186a7b285
--- /dev/null
+++ b/examples/bare-metal/aec_2_threads/src/aec_app.c
@@ -0,0 +1,138 @@
+// Copyright 2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <limits.h>
+
+#include "aec_defines.h"
+#include "aec_api.h"
+
+#include "aec_config.h"
+#include "aec_memory_pool.h"
+#include "fileio.h"
+#include "wav_utils.h"
+
+
+
+extern void aec_process_frame_2threads(
+        aec_state_t *main_state,
+        aec_state_t *shadow_state,
+        int32_t (*output_main)[AEC_FRAME_ADVANCE],
+        int32_t (*output_shadow)[AEC_FRAME_ADVANCE],
+        const int32_t (*y_data)[AEC_FRAME_ADVANCE],
+        const int32_t (*x_data)[AEC_FRAME_ADVANCE]);
+
+void aec_task(const char *input_file_name, const char *output_file_name) {
+    // Ensure configuration is a subset of the maximum configuration the library supports
+    assert(AEC_MAX_Y_CHANNELS <= AEC_LIB_MAX_Y_CHANNELS);
+    assert(AEC_MAX_X_CHANNELS <= AEC_LIB_MAX_X_CHANNELS);
+    assert((AEC_MAX_Y_CHANNELS * AEC_MAX_X_CHANNELS * AEC_MAIN_FILTER_PHASES) <= (AEC_LIB_MAX_PHASES));
+    assert((AEC_MAX_Y_CHANNELS * AEC_MAX_X_CHANNELS * AEC_SHADOW_FILTER_PHASES) <= (AEC_LIB_MAX_PHASES));
+    
+    file_t input_file, output_file;
+    // Open input wav file containing mic and ref channels of input data
+    int ret = file_open(&input_file, input_file_name, "rb");
+    assert((!ret) && "Failed to open file");
+    // Open output wav file that will contain the AEC output
+    ret = file_open(&output_file, output_file_name, "wb");
+    assert((!ret) && "Failed to open file");
+
+    wav_header input_header_struct, output_header_struct;
+    unsigned input_header_size;
+    if(get_wav_header_details(&input_file, &input_header_struct, &input_header_size) != 0){
+        printf("error in get_wav_header_details()\n");
+        _Exit(1);
+    }
+    file_seek(&input_file, input_header_size, SEEK_SET);
+    // Ensure 32bit wav file
+    if(input_header_struct.bit_depth != 32)
+     {
+         printf("Error: unsupported wav bit depth (%d) for %s file. Only 32 supported\n", input_header_struct.bit_depth, input_file_name);
+         _Exit(1);
+     }
+    // Ensure input wav file contains correct number of channels 
+    if(input_header_struct.num_channels != (AEC_MAX_Y_CHANNELS+AEC_MAX_X_CHANNELS)){
+        printf("Error: wav num channels(%d) does not match aec(%u)\n", input_header_struct.num_channels, (AEC_MAX_Y_CHANNELS+AEC_MAX_X_CHANNELS));
+        _Exit(1);
+    }
+    
+    unsigned frame_count = wav_get_num_frames(&input_header_struct);
+    // Calculate number of frames in the wav file
+    unsigned block_count = frame_count / AEC_FRAME_ADVANCE;
+    wav_form_header(&output_header_struct,
+            input_header_struct.audio_format,
+            AEC_MAX_Y_CHANNELS,
+            input_header_struct.sample_rate,
+            input_header_struct.bit_depth,
+            block_count*AEC_FRAME_ADVANCE);
+
+    file_write(&output_file, (uint8_t*)(&output_header_struct),  WAV_HEADER_BYTES);
+
+    int32_t input_read_buffer[AEC_FRAME_ADVANCE * (AEC_MAX_Y_CHANNELS + AEC_MAX_X_CHANNELS)] = {0}; // Array for storing interleaved input read from wav file
+    int32_t output_write_buffer[AEC_FRAME_ADVANCE * (AEC_MAX_Y_CHANNELS)];
+
+    int32_t DWORD_ALIGNED frame_y[AEC_MAX_Y_CHANNELS][AEC_FRAME_ADVANCE];
+    int32_t DWORD_ALIGNED frame_x[AEC_MAX_X_CHANNELS][AEC_FRAME_ADVANCE];
+
+    unsigned bytes_per_frame = wav_get_num_bytes_per_frame(&input_header_struct);
+
+    // Initialise AEC
+    uint8_t DWORD_ALIGNED aec_memory_pool[sizeof(aec_memory_pool_t)];
+    uint8_t DWORD_ALIGNED aec_shadow_filt_memory_pool[sizeof(aec_shadow_filt_memory_pool_t)]; 
+    aec_state_t DWORD_ALIGNED main_state;
+    aec_state_t DWORD_ALIGNED shadow_state;
+    aec_shared_state_t DWORD_ALIGNED aec_shared_state;
+    
+    aec_init(&main_state, &shadow_state, &aec_shared_state,
+            &aec_memory_pool[0], &aec_shadow_filt_memory_pool[0],
+            AEC_MAX_Y_CHANNELS, AEC_MAX_X_CHANNELS,
+            AEC_MAIN_FILTER_PHASES, AEC_SHADOW_FILTER_PHASES);
+
+    for(unsigned b=0;b<block_count;b++){
+        long input_location =  wav_get_frame_start(&input_header_struct, b * AEC_FRAME_ADVANCE, input_header_size);
+        file_seek (&input_file, input_location, SEEK_SET);
+        file_read (&input_file, (uint8_t*)&input_read_buffer[0], bytes_per_frame* AEC_FRAME_ADVANCE);
+        // Deinterleave and copy y and x samples to their respective buffers
+        for(unsigned f=0; f<AEC_FRAME_ADVANCE; f++){
+            for(unsigned ch=0;ch<AEC_MAX_Y_CHANNELS;ch++){
+                unsigned i =(f * (AEC_MAX_Y_CHANNELS+AEC_MAX_X_CHANNELS)) + ch;
+                frame_y[ch][f] = input_read_buffer[i];
+            }
+            for(unsigned ch=0;ch<AEC_MAX_X_CHANNELS;ch++){
+                unsigned i =(f * (AEC_MAX_Y_CHANNELS+AEC_MAX_X_CHANNELS)) + AEC_MAX_Y_CHANNELS + ch;
+                frame_x[ch][f] = input_read_buffer[i];
+            }
+        }
+        // Call AEC functions to process AEC_FRAME_ADVANCE new samples of data
+        /* Reuse mic data memory for main filter output
+         * Reuse ref data memory for shadow filter output.
+         */
+        aec_process_frame_2threads(&main_state, &shadow_state, frame_y, frame_x, frame_y, frame_x);
+        
+        // Create interleaved output that can be written to wav file
+        for (unsigned ch=0;ch<AEC_MAX_Y_CHANNELS;ch++){
+            for(unsigned i=0;i<AEC_FRAME_ADVANCE;i++){
+                output_write_buffer[i*(AEC_MAX_Y_CHANNELS) + ch] = frame_y[ch][i];
+            }
+        }
+
+        file_write(&output_file, (uint8_t*)(output_write_buffer), output_header_struct.bit_depth/8 * AEC_FRAME_ADVANCE * AEC_MAX_Y_CHANNELS);
+    }
+    file_close(&input_file);
+    file_close(&output_file);
+    shutdown_session();
+}
+
+
+#if X86_BUILD
+int main(int argc, char **argv) {
+    if(argc < 3) {
+        printf("Arguments missing. Expected: <input file name> <output file name>\n");
+        assert(0);
+    }
+    aec_task(argv[1], argv[2]);
+    return 0;
+}
+#endif
diff --git a/examples/bare-metal/aec_2_threads/src/aec_config.h b/examples/bare-metal/aec_2_threads/src/aec_config.h
new file mode 100644
index 000000000..a5313decc
--- /dev/null
+++ b/examples/bare-metal/aec_2_threads/src/aec_config.h
@@ -0,0 +1,9 @@
+#ifndef aec_config_h_
+#define aec_config_h_
+
+#define AEC_MAX_Y_CHANNELS   (2)
+#define AEC_MAX_X_CHANNELS   (2)
+#define AEC_MAIN_FILTER_PHASES    (10)
+#define AEC_SHADOW_FILTER_PHASES    (5)
+
+#endif /* aec_config_h_ */
diff --git a/examples/bare-metal/aec_2_threads/src/aec_task_distribution.c b/examples/bare-metal/aec_2_threads/src/aec_task_distribution.c
new file mode 100644
index 000000000..302581f07
--- /dev/null
+++ b/examples/bare-metal/aec_2_threads/src/aec_task_distribution.c
@@ -0,0 +1,25 @@
+#include "aec_task_distribution.h"
+
+/* Structure used for issuing jobs on cores */
+task_distribution_t tdist = {
+    {
+        {{0, 0, 1},{2, 0, 1},{1, 1, 1},},
+        {{1, 0, 1},{0, 1, 1},{2, 1, 1},},
+    },
+    {
+        {{0, 0, 1},{0, 1, 1},},
+        {{1, 0, 1},{1, 1, 1},},
+    },
+    {
+        {{0, 0, 1},},
+        {{0, 1, 1},},
+    },
+    {
+        {{0, 1},},
+        {{1, 1},},
+    },
+    {
+        {{0, 1},{2, 1},},
+        {{1, 1},{0, 0},},
+    },
+};
diff --git a/examples/bare-metal/aec_2_threads/src/aec_task_distribution.h b/examples/bare-metal/aec_2_threads/src/aec_task_distribution.h
new file mode 100644
index 000000000..33500ef13
--- /dev/null
+++ b/examples/bare-metal/aec_2_threads/src/aec_task_distribution.h
@@ -0,0 +1,78 @@
+#ifndef aec_task_distribution_h_
+#define aec_task_distribution_h_
+
+/**
+ * @page This header defines the data structures used when distributing tasks across hardware threads.
+ *
+ * The task distribution scheme distributes tasks across hardware threads for 2 scenarios.
+ *      1. Distribute multiple unique tasks across multiple HW threads. For example, for a 3 tasks, 2 threads configuration,
+ *         distribute [task0, task1, task2] across [Thread0, Thread1].
+ *      2. Distribute multiple (task, channel) pairs across multiple HW threads. For example, for a 3 tasks, 2 channels, 2 threads
+ *      configuration, distribute [(task0, ch0), (task0, ch1), (task1, ch0), (task1, ch1), (task2, ch0), (task2,
+ *      ch1)] across [Thread0, Thread1].
+ *      Number of channels used when defining the (task, channel) pair is fixed to max(`AEC_MAX_Y_CHANNELS`,
+ *      `AEC_MAX_X_CHANNELS`).
+ */
+
+/**
+ * @brief Structure used when distributing tasks across hardware threads.
+ */ 
+typedef struct {
+    /** Task index.*/
+    int task;
+    /** Flag indicating whether the task is active on that core. The task is run on the core only when is_active is set
+     * to 1*/
+    int is_active;
+}par_tasks_t;
+
+/**
+ * @brief Structure used when distributing (task, channel) pairs across hardware threads.
+ */
+typedef struct {
+    /** Task index.*/
+    int task;
+    /** Channel index.*/
+    int channel;
+    /** Flag indicating whether the (task, channel) pair is active on that core. The (task, channel) pair is run on the
+     * core only when is_active is set to 1*/
+    int is_active;
+}par_tasks_and_channels_t;
+
+
+#define AEC_THREAD_COUNT   (2) /// <Number of hardware threads available
+
+/** Number of iterations run on a given thread when distributing 2 tasks across AEC_THREAD_COUNT threads*/
+#define AEC_2_TASKS_PASSES   (1)
+
+/** Number of iterations run on a given thread when distributing 3 tasks across AEC_THREAD_COUNT threads*/
+#define AEC_3_TASKS_PASSES   (2)
+
+/** Number of iterations run on a given thread when distributing 3 tasks, max(AEC_MAX_Y_CHANNELS, AEC_MAX_X_CHANNELS)
+ * channels across AEC_THREAD_COUNT number of threads*/
+#define AEC_3_TASKS_AND_CHANNELS_PASSES   (3)
+
+/** Number of iterations run on a given thread when distributing 2 tasks, max(AEC_MAX_Y_CHANNELS, AEC_MAX_X_CHANNELS)
+ * channels across AEC_THREAD_COUNT number of threads*/
+#define AEC_2_TASKS_AND_CHANNELS_PASSES   (2)
+
+/** Number of iterations run on a given thread when distributing 1 task, max(AEC_MAX_Y_CHANNELS, AEC_MAX_X_CHANNELS)
+ * channels across AEC_THREAD_COUNT number of threads*/
+#define AEC_1_TASKS_AND_CHANNELS_PASSES   (1)
+
+typedef struct {
+    /** task distribution definition for 3 tasks, max(AEC_MAX_Y_CHANNELS, AEC_MAX_X_CHANNELS) channels, scheduled across
+     * AEC_THREAD_COUNT threads */
+    par_tasks_and_channels_t par_3_tasks_and_channels[AEC_THREAD_COUNT][AEC_3_TASKS_AND_CHANNELS_PASSES];
+    /** task distribution definition for 2 tasks, max(AEC_MAX_Y_CHANNELS, AEC_MAX_X_CHANNELS) channels, scheduled across
+     * AEC_THREAD_COUNT threads */
+    par_tasks_and_channels_t par_2_tasks_and_channels[AEC_THREAD_COUNT][AEC_2_TASKS_AND_CHANNELS_PASSES];
+    /** task distribution definition for 1 task, max(AEC_MAX_Y_CHANNELS, AEC_MAX_X_CHANNELS) channels, scheduled across
+     * AEC_THREAD_COUNT threads */
+    par_tasks_and_channels_t par_1_tasks_and_channels[AEC_THREAD_COUNT][AEC_1_TASKS_AND_CHANNELS_PASSES];
+    /** task distribution definition for 2 tasks, scheduled across AEC_THREAD_COUNT threads */
+    par_tasks_t par_2_tasks[AEC_THREAD_COUNT][AEC_2_TASKS_PASSES];
+    /** task distribution definition for 3 tasks, scheduled across AEC_THREAD_COUNT threads */
+    par_tasks_t par_3_tasks[AEC_THREAD_COUNT][AEC_3_TASKS_PASSES];
+}task_distribution_t;
+
+#endif /* aec_task_distribution_h_ */
diff --git a/examples/bare-metal/aec_2_threads/src/main.xc b/examples/bare-metal/aec_2_threads/src/main.xc
new file mode 100644
index 000000000..2eb5358eb
--- /dev/null
+++ b/examples/bare-metal/aec_2_threads/src/main.xc
@@ -0,0 +1,40 @@
+// Copyright 2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <platform.h>
+#include <xs1.h>
+#include <xscope.h>
+#include <stdlib.h>
+#ifdef __XC__
+#define chanend_t chanend
+#else
+#include <xcore/chanend.h>
+#endif
+
+extern "C" {
+#include "xs3_math.h"
+void aec_task(const char *input_file_name, const char *output_file_name);
+#if TEST_WAV_XSCOPE
+    #include "xscope_io_device.h"
+#endif
+}
+
+#define IN_WAV_FILE_NAME    "input.wav"
+#define OUT_WAV_FILE_NAME   "output.wav"
+int main (void)
+{
+  chan xscope_chan;
+  par
+  {
+#if TEST_WAV_XSCOPE
+    xscope_host_data(xscope_chan);
+#endif
+    on tile[0]: {
+#if TEST_WAV_XSCOPE
+        xscope_io_init(xscope_chan);
+#endif 
+        aec_task(IN_WAV_FILE_NAME, OUT_WAV_FILE_NAME);
+        _Exit(0);
+    }
+  }
+  return 0;
+}
diff --git a/examples/bare-metal/agc/CMakeLists.txt b/examples/bare-metal/agc/CMakeLists.txt
new file mode 100644
index 000000000..af3b3a67d
--- /dev/null
+++ b/examples/bare-metal/agc/CMakeLists.txt
@@ -0,0 +1,27 @@
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
+
+add_executable(agc src/agc_app.c
+                   ${SHARED_SRC_PATH}/file_utils/fileio.c
+                   ${SHARED_SRC_PATH}/file_utils/wav_utils.c)
+
+target_include_directories(agc PRIVATE ${SHARED_SRC_PATH}/file_utils)
+
+target_link_libraries(agc lib_agc lib_xs3_math)
+
+if(XCORE)
+  target_sources(agc PRIVATE src/main.xc
+                             ${XSCOPE_FILEIO_PATH}/src/xscope_io_device.c)
+
+  target_include_directories(agc PRIVATE ${XSCOPE_FILEIO_PATH}
+                                         ${XSCOPE_FILEIO_PATH}/api)
+
+  target_compile_options(agc PRIVATE "-DTEST_WAV_XSCOPE=1"
+                                     "-target=${XCORE_TARGET}")
+
+  target_link_options(agc PRIVATE "-target=${XCORE_TARGET}"
+                                  "-report"
+                                  "${CMAKE_CURRENT_SOURCE_DIR}/config.xscope")
+  set_target_properties(agc PROPERTIES SUFFIX ".xe")
+else()
+  target_link_libraries(agc m)
+endif(XCORE)
diff --git a/examples/bare-metal/agc/config.xscope b/examples/bare-metal/agc/config.xscope
new file mode 100644
index 000000000..008845f3d
--- /dev/null
+++ b/examples/bare-metal/agc/config.xscope
@@ -0,0 +1,9 @@
+<xSCOPEconfig ioMode="basic" enabled="true">
+  <Probe name="open_file" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="read_bytes" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="write_setup" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="write_bytes" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="seek" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="tell" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="host_quit" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+</xSCOPEconfig>
diff --git a/examples/bare-metal/agc/src/agc_app.c b/examples/bare-metal/agc/src/agc_app.c
new file mode 100644
index 000000000..586eaca2a
--- /dev/null
+++ b/examples/bare-metal/agc/src/agc_app.c
@@ -0,0 +1,97 @@
+// Copyright 2022 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <stdio.h>
+#include <assert.h>
+
+#include <agc_api.h>
+
+#include <fileio.h>
+#include <wav_utils.h>
+
+
+void agc_task(const char *input_file_name, const char *output_file_name) {
+    //open files
+    file_t input_file, output_file;
+    int ret = file_open(&input_file, input_file_name, "rb");
+    assert((!ret) && "Failed to open input file");
+    ret = file_open(&output_file, output_file_name, "wb");
+    assert((!ret) && "Failed to open output file");
+
+    wav_header input_header_struct, output_header_struct;
+    unsigned input_header_size;
+
+    if (get_wav_header_details(&input_file, &input_header_struct, &input_header_size) != 0) {
+        printf("error in get_wav_header_details()\n");
+        _Exit(1);
+    }
+
+    file_seek(&input_file, input_header_size, SEEK_SET);
+
+    if (input_header_struct.bit_depth != 32) {
+         printf("Error: unsupported wav bit depth (%d) for %s file. Only 32 supported\n", input_header_struct.bit_depth, input_file_name);
+         _Exit(1);
+    }
+
+    if (input_header_struct.num_channels != 1) {
+        printf("Error: wav num channels (%d) does not match expected (1)\n", input_header_struct.num_channels);
+        _Exit(1);
+    }
+
+    unsigned frame_count = wav_get_num_frames(&input_header_struct);
+
+    unsigned block_count = frame_count / AGC_FRAME_ADVANCE;
+    wav_form_header(&output_header_struct,
+                    input_header_struct.audio_format,
+                    1,   // number of channels
+                    input_header_struct.sample_rate,
+                    input_header_struct.bit_depth,
+                    block_count * AGC_FRAME_ADVANCE);
+
+    file_write(&output_file, (uint8_t*)(&output_header_struct), WAV_HEADER_BYTES);
+
+    int32_t input_read_buffer[AGC_FRAME_ADVANCE];
+    int32_t output_write_buffer[AGC_FRAME_ADVANCE];
+    unsigned bytes_per_frame = wav_get_num_bytes_per_frame(&input_header_struct);
+
+
+    // Initialise the AGC configuration using one of the pre-defined profiles in api/agc_profiles.h, and then
+    // make any alterations to the defaults. In this application, there is no VAD, so adapt_on_vad must be
+    // disabled. Also there is no AEC, so lc_enabled must be disabled.
+    agc_config_t conf = AGC_PROFILE_COMMS;
+    conf.adapt_on_vad = 0;
+    conf.lc_enabled = 0;
+
+    agc_state_t agc;
+    agc_init(&agc, &conf);
+
+    // Initialise the meta-data. Since this application has neither VAD nor AEC, the meta-data will be
+    // constant and can use these pre-defined values to make clear the absence of VAD and AEC.
+    agc_meta_data_t md = {AGC_META_DATA_NO_VAD, AGC_META_DATA_NO_AEC, AGC_META_DATA_NO_AEC};
+
+    for (unsigned bl = 0; bl < block_count; ++bl) {
+        long input_location =  wav_get_frame_start(&input_header_struct, bl * AGC_FRAME_ADVANCE, input_header_size);
+        file_seek (&input_file, input_location, SEEK_SET);
+        file_read (&input_file, (uint8_t *)input_read_buffer, bytes_per_frame * AGC_FRAME_ADVANCE);
+
+        // Call the AGC function to process the input frame, writing the output into the output buffer.
+        agc_process_frame(&agc, output_write_buffer, input_read_buffer, &md);
+
+        file_write(&output_file, (uint8_t *)output_write_buffer, bytes_per_frame * AGC_FRAME_ADVANCE);
+    }
+
+    file_close(&input_file);
+    file_close(&output_file);
+    shutdown_session();
+}
+
+
+#if X86_BUILD
+int main(int argc, char **argv) {
+    if (argc < 3) {
+        printf("Arguments missing. Expected: <input file name> <output file name>\n");
+        assert(0);
+    }
+    agc_task(argv[1], argv[2]);
+    return 0;
+}
+#endif
diff --git a/examples/bare-metal/agc/src/main.xc b/examples/bare-metal/agc/src/main.xc
new file mode 100644
index 000000000..172bf8cea
--- /dev/null
+++ b/examples/bare-metal/agc/src/main.xc
@@ -0,0 +1,38 @@
+// Copyright 2022 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <platform.h>
+#include <xs1.h>
+#include <xscope.h>
+#include <stdlib.h>
+
+extern "C" {
+#if TEST_WAV_XSCOPE
+#include <xscope_io_device.h>
+#endif
+
+void agc_task(const char *input_file_name, const char *output_file_name);
+}
+
+#define chanend_t chanend
+
+#define IN_WAV_FILE_NAME "input.wav"
+#define OUT_WAV_FILE_NAME "output.wav"
+
+int main (void)
+{
+  chan xscope_chan;
+  par
+  {
+#if TEST_WAV_XSCOPE
+    xscope_host_data(xscope_chan);
+#endif
+    on tile[0]: {
+#if TEST_WAV_XSCOPE
+        xscope_io_init(xscope_chan);
+#endif
+        agc_task(IN_WAV_FILE_NAME, OUT_WAV_FILE_NAME);
+        _Exit(0);
+    }
+  }
+  return 0;
+}
diff --git a/examples/bare-metal/doc/index.rst b/examples/bare-metal/doc/index.rst
new file mode 100644
index 000000000..1afdf2da6
--- /dev/null
+++ b/examples/bare-metal/doc/index.rst
@@ -0,0 +1,31 @@
+
+####################
+Example Applications
+####################
+
+Several examples are provided to demonstrate processing of audio using the audio processing algorithms individually as
+well as put together in a pipeline.
+
+Building Examples
+==================
+
+After configuring the CMake project (with the ``BUILD_EXAMPLES`` enabled), all the examples can
+be built by using the ``make`` command within the build directory.  Individual examples can be built
+using ``make EXAMPLE_NAME``, where ``EXAMPLE_NAME`` is the example to build. 
+
+Running Examples
+==================
+
+In order to access binary files on the host from the XCore device over xscope, the examples make use of the
+xscope_fileio utility, which needs to be installed before running the example application. To install xscope_fileio, run
+the following command from the `examples/bare-metal/` directory in a terminal where XMOS XTC tools are sourced.
+
+::
+
+    pip install -e shared_src/xscope_fileio/
+
+
+.. toctree::
+   :maxdepth: 1
+    
+   src/examples
diff --git a/examples/bare-metal/doc/src/examples.rst b/examples/bare-metal/doc/src/examples.rst
new file mode 100644
index 000000000..73b2bce32
--- /dev/null
+++ b/examples/bare-metal/doc/src/examples.rst
@@ -0,0 +1,179 @@
+.. _examples:
+
+Example App: aec_1_thread
+=========================
+
+This example demonstrates how AEC functions are called on a single thread to process data through the AEC stage of a pipeline.
+
+In it, a 32-bit, 4 channel wav file input.wav is read and processed through the AEC stage frame by frame.
+AEC is configured for 2 mic input channels, 2 reference input channels, 10 phase main filter and a 5 phase shadow 
+filter.
+The input file input.wav has 2 channels of mic input followed by 2 channels of reference input.
+Echo cancelled version of the mic input is generated as the AEC output and written to the output.wav file.
+
+Building
+********
+
+After configuring the CMake project, the following commands can be used from the
+`sw_avona/examples/bare-metal/aec_1_thread` directory to build and run this example application using the XCORE-AI-EXPLORER board as a target:
+
+::
+    
+    cd ../../../build
+    make aec_1_thread
+    cd ../examples/bare-metal/aec_1_thread
+    python ../shared_src/python/run_xcoreai.py ../../../build/examples/bare-metal/aec_1_thread/bin/aec_1_thread.xe --input ../shared_src/test_streams/aec_example_input.wav
+
+
+Output
+------
+
+The output file output.wav is generated in the `sw_avona/examples/bare-metal/aec_1_thread` directory. The input file
+input.wav is also present in the same directory. View output.wav and input.wav in Audacity to compare the echo cancelled
+output against the microphone input.
+
+
+Example App: aec_2_threads
+==========================
+
+This example demonstrates how AEC functions are called on 2 threads to process data through the AEC stage of a pipeline.
+
+In it, a 32-bit, 4 channel wav file input.wav is read and processed through the AEC stage frame by frame.
+AEC is configured for 2 mic input channels, 2 reference input channels, 10 phase main filter and a 5 phase shadow
+filter.
+
+The input file input.wav has 2 channels of mic input followed by 2 channels of reference input.
+Echo cancelled version of the mic input is generated as the AEC output and written to the output.wav file.
+
+Building
+********
+
+After configuring the CMake project, the following commands can be used from the
+`sw_avona/examples/bare-metal/aec_2_threads` directory to build and run this example application using the XCORE-AI-EXPLORER board as a target:
+
+::
+    
+    cd ../../../build
+    make aec_2_threads
+    cd ../examples/bare-metal/aec_2_threads
+    python ../shared_src/python/run_xcoreai.py ../../../build/examples/bare-metal/aec_2_threads/bin/aec_2_threads.xe --input ../shared_src/test_streams/aec_example_input.wav
+
+Output
+------
+
+The output file output.wav is generated in the `sw_avona/examples/bare-metal/aec_2_threads` directory. The input file
+input.wav is also present in the same directory. View output.wav and input.wav in Audacity to compare the echo cancelled
+output against the microphone input.
+
+
+Example App: agc
+================
+
+This example demonstrates how AGC functions are called on a single thread to process data through the AGC stage of
+a pipeline. A single AGC instance is run using the profile that is tuned for communication with a human listener.
+
+Since this example application only demonstrates the AGC module, without a VAD or an AEC, adaption based on voice
+activity and the loss control feature are both disabled.
+
+The input is a single channel, 32-bit wav file, which is read and processed through the AGC frame-by-frame.
+
+Building
+********
+
+After configuring the CMake project, the following commands can be used from the `sw_avona/examples/bare-metal/agc`
+directory to build and run this example application using the XCORE-AI-EXPLORER board as a target:
+
+::
+
+    cd ../../../build
+    make agc
+    cd ../examples/bare-metal/agc
+    python ../shared_src/python/run_xcoreai.py ../../../build/examples/bare-metal/agc/bin/agc.xe --input ../shared_src/test_streams/agc_example_input.wav
+
+Output
+------
+
+The output file output.wav is generated in the `sw_avona/examples/bare-metal/agc` directory. The provided
+input `agc_example_input.wav` is low-volume white-noise and the effect of the AGC can be heard in the output
+by listening to the two wav files.
+
+
+Example App: pipeline_single_threaded
+=====================================
+
+This example demonstrates how the audio processing stages are put together in a pipeline
+
+In it, a 32-bit, 4 channel wav file input.wav is read and processed through the pipeline stages frame by frame. The
+example currently demonstrates a pipeline having AEC and AGC stages.
+
+AEC is configured for 2 mic input channels, 2 reference input channels, 10 phase main filter and a 5 phase shadow
+filter. AEC stage generates the echo cancelled version of the mic input that is then sent for processing through the
+AGC.
+
+AGC is configured for ASR engine suitable gain control on channel 0 and Comms suitable gain control on channel 1. The
+output of AGC stage is the pipeline output which is written into a 2 channel output wav file.
+
+The pipeline is run on a single thread.
+
+The input file input.wav has 2 channels of mic input followed by 2 channels of reference input. Output is written to the output.wav file.
+
+Building
+********
+
+After configuring the CMake project, the following commands can be used from the
+`sw_avona/examples/bare-metal/pipeline_single_threaded` directory to build and run this example application using the XCORE-AI-EXPLORER board as a target:
+
+::
+    
+    cd ../../../build
+    make pipeline_single_threaded
+    cd ../examples/bare-metal/pipeline_single_threaded
+    python ../shared_src/python/run_xcoreai.py ../../../build/examples/bare-metal/pipeline_single_threaded/bin/pipeline_single_threaded.xe --input ../shared_src/test_streams/pipeline_example_input.wav
+
+Output
+------
+
+The output file output.wav is generated in the `sw_avona/examples/bare-metal/pipeline_single_threaded` directory. The
+input file input.wav is also present in the same directory. View output.wav and input.wav in Audacity to compare the
+pipeline output against the microphone input.
+
+Example App: pipeline_multi_threaded
+=====================================
+
+This example demonstrates how the audio processing stages are put together in a pipeline where stages are run in
+parallel on separate hardware threads.
+
+In it, a 32-bit, 4 channel wav file input.wav is read and processed through the pipeline stages frame by frame. The
+example currently demonstrates a pipeline having AEC and AGC stages.
+
+AEC is configured for 2 mic input channels, 2 reference input channels, 10 phase main filter and a 5 phase shadow
+filter. This example calls AEC functions using 2 threads to process a frame through the AEC stage. AEC stage generates
+the echo cancelled version of the mic input that is then sent for processing through the AGC.
+
+AGC is configured for ASR engine suitable gain control on channel 0 and Comms suitable gain control on channel 1. The
+output of AGC stage is the pipeline output which is written into a 2 channel output wav file.
+
+In total, the audio processing stages consume 3 hardware threads; 2 for AEC stage and 1 for AGC stage.
+
+The input file input.wav has 2 channels of mic input followed by 2 channels of reference input. Output is written to the output.wav file.
+
+Building
+********
+
+After configuring the CMake project, the following commands can be used from the
+`sw_avona/examples/bare-metal/pipeline_multi_threaded` directory to build and run this example application using the
+XCORE-AI-EXPLORER board as a target:
+
+::
+    
+    cd ../../../build
+    make pipeline_multi_threaded
+    cd ../examples/bare-metal/pipeline_multi_threaded
+    python ../shared_src/python/run_xcoreai.py ../../../build/examples/bare-metal/pipeline_multi_threaded/bin/pipeline_multi_threaded.xe --input ../shared_src/test_streams/pipeline_example_input.wav
+
+Output
+------
+
+The output file output.wav is generated in the `sw_avona/examples/bare-metal/pipeline_multi_threaded` directory. The
+input file input.wav is also present in the same directory. View output.wav and input.wav in Audacity to compare the
+pipeline output against the microphone input.
diff --git a/examples/bare-metal/pipeline_multi_threaded/CMakeLists.txt b/examples/bare-metal/pipeline_multi_threaded/CMakeLists.txt
new file mode 100644
index 000000000..a4d23c501
--- /dev/null
+++ b/examples/bare-metal/pipeline_multi_threaded/CMakeLists.txt
@@ -0,0 +1,89 @@
+## App name
+set( APP_NAME  pipeline_multi_threaded )
+
+if( NOT ${Python3_FOUND} )
+  message(FATAL_ERROR "Python3 not found for running . ") 
+endif()
+
+## Depends on libraries
+list( APPEND  DEP_LIBS_XCORE  ""  )
+list( APPEND  DEP_LIBS_Linux  m   )
+
+list( APPEND  DEP_LIBS        
+    lib_aec
+    lib_agc
+    ${DEP_LIBS_${CMAKE_SYSTEM_NAME}}
+)
+
+## Sources
+file( GLOB_RECURSE SOURCES_C  src/*.c )
+file( GLOB SOURCES_SHARED_FILEIO ${SHARED_SRC_PATH}/file_utils/*.c )
+file( GLOB SOURCES_SHARED_AEC ${SHARED_SRC_PATH}/aec/aec_process_frame_2threads.c )
+unset( SOURCES_SHARED )
+list( APPEND SOURCES_SHARED ${SOURCES_SHARED_FILEIO} ${SOURCES_SHARED_AEC} )
+file( GLOB SOURCES_XC src/*.xc )
+file( GLOB_RECURSE XSCOPE_FILEIO_SOURCES  ${XSCOPE_FILEIO_PATH}/src/*.c )
+
+unset( SOURCES )
+list( APPEND  SOURCES ${SOURCES_C} ${SOURCES_SHARED})
+unset( SOURCES_XCORE )
+list( APPEND SOURCES_XCORE ${SOURCES_XC} ${XSCOPE_FILEIO_SOURCES} )
+list( APPEND SOURCES ${SOURCES_${CMAKE_SYSTEM_NAME}} )
+
+## Includes
+unset( INCLUDES )
+list( APPEND  INCLUDES src ${SHARED_SRC_PATH}/file_utils ${SHARED_SRC_PATH}/aec )
+unset( INCLUDES_XCORE )
+list( APPEND INCLUDES_XCORE ${XSCOPE_FILEIO_PATH} ${XSCOPE_FILEIO_PATH}/api )
+list( APPEND INCLUDES ${INCLUDES_${CMAKE_SYSTEM_NAME}} )
+
+# set( XSCOPE_CONFIG config.xscope )
+get_filename_component(XSCOPE_CONFIG config.xscope ABSOLUTE)
+
+## Compile flags
+unset(COMPILE_FLAGS)
+unset(COMPILE_FLAGS_XCORE)
+
+list(APPEND   COMPILE_FLAGS_XCORE  -DTEST_WAV_XSCOPE=1 )
+
+unset(COMPILE_FLAGS_Linux)
+list( APPEND  COMPILE_FLAGS_Linux   ""  )
+
+
+##Linker flags
+unset(LINKER_FLAGS)
+list( APPEND  LINKER_FLAGS  "" )
+
+unset(LINKER_FLAGS_XCORE)
+list( APPEND  LINKER_FLAGS_XCORE  "-target=${XCORE_TARGET}"     )
+list( APPEND  LINKER_FLAGS_XCORE  "-report"                     )
+list( APPEND  LINKER_FLAGS_XCORE  "${XSCOPE_CONFIG}"            )
+
+unset(LINKER_FLAGS_Linux)
+list( APPEND  LINKER_FLAGS_Linux  "" )
+
+list( APPEND  LINKER_FLAGS ${LINKER_FLAGS_${CMAKE_SYSTEM_NAME}} )
+list( APPEND  COMPILE_FLAGS ${COMPILE_FLAGS_${CMAKE_SYSTEM_NAME}} ${LINKER_FLAGS_${CMAKE_SYSTEM_NAME}} )
+
+#########
+## executable output directory
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
+
+add_executable( ${APP_NAME} ${SOURCES} ${SOURCES_C_APP} )
+
+target_include_directories( ${APP_NAME} PRIVATE ${INCLUDES} )
+
+target_link_libraries( ${APP_NAME} ${DEP_LIBS})
+
+target_compile_options( ${APP_NAME} PRIVATE ${COMPILE_FLAGS} )
+
+#(because otherwise the set_target_properties command fails)
+string(REPLACE ";" " " LINKER_FLAGS_STR "${LINKER_FLAGS}")
+set_target_properties( ${APP_NAME} PROPERTIES LINK_FLAGS "${LINKER_FLAGS_STR}" )
+
+if ( XCORE )
+  set_target_properties( ${APP_NAME} PROPERTIES
+      SUFFIX ".xe"
+      LINK_DEPENDS  ${XSCOPE_CONFIG}
+      )
+endif()
diff --git a/examples/bare-metal/pipeline_multi_threaded/config.xscope b/examples/bare-metal/pipeline_multi_threaded/config.xscope
new file mode 100644
index 000000000..0d3b65e4c
--- /dev/null
+++ b/examples/bare-metal/pipeline_multi_threaded/config.xscope
@@ -0,0 +1,10 @@
+<xSCOPEconfig ioMode="basic" enabled="true">
+  <Probe name="open_file" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="read_bytes" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="write_setup" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="write_bytes" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="seek" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="tell" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="host_quit" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+</xSCOPEconfig>
+
diff --git a/examples/bare-metal/pipeline_multi_threaded/src/aec_task_distribution.c b/examples/bare-metal/pipeline_multi_threaded/src/aec_task_distribution.c
new file mode 100644
index 000000000..302581f07
--- /dev/null
+++ b/examples/bare-metal/pipeline_multi_threaded/src/aec_task_distribution.c
@@ -0,0 +1,25 @@
+#include "aec_task_distribution.h"
+
+/* Structure used for issuing jobs on cores */
+task_distribution_t tdist = {
+    {
+        {{0, 0, 1},{2, 0, 1},{1, 1, 1},},
+        {{1, 0, 1},{0, 1, 1},{2, 1, 1},},
+    },
+    {
+        {{0, 0, 1},{0, 1, 1},},
+        {{1, 0, 1},{1, 1, 1},},
+    },
+    {
+        {{0, 0, 1},},
+        {{0, 1, 1},},
+    },
+    {
+        {{0, 1},},
+        {{1, 1},},
+    },
+    {
+        {{0, 1},{2, 1},},
+        {{1, 1},{0, 0},},
+    },
+};
diff --git a/examples/bare-metal/pipeline_multi_threaded/src/aec_task_distribution.h b/examples/bare-metal/pipeline_multi_threaded/src/aec_task_distribution.h
new file mode 100644
index 000000000..33500ef13
--- /dev/null
+++ b/examples/bare-metal/pipeline_multi_threaded/src/aec_task_distribution.h
@@ -0,0 +1,78 @@
+#ifndef aec_task_distribution_h_
+#define aec_task_distribution_h_
+
+/**
+ * @page This header defines the data structures used when distributing tasks across hardware threads.
+ *
+ * The task distribution scheme distributes tasks across hardware threads for 2 scenarios.
+ *      1. Distribute multiple unique tasks across multiple HW threads. For example, for a 3 tasks, 2 threads configuration,
+ *         distribute [task0, task1, task2] across [Thread0, Thread1].
+ *      2. Distribute multiple (task, channel) pairs across multiple HW threads. For example, for a 3 tasks, 2 channels, 2 threads
+ *      configuration, distribute [(task0, ch0), (task0, ch1), (task1, ch0), (task1, ch1), (task2, ch0), (task2,
+ *      ch1)] across [Thread0, Thread1].
+ *      Number of channels used when defining the (task, channel) pair is fixed to max(`AEC_MAX_Y_CHANNELS`,
+ *      `AEC_MAX_X_CHANNELS`).
+ */
+
+/**
+ * @brief Structure used when distributing tasks across hardware threads.
+ */ 
+typedef struct {
+    /** Task index.*/
+    int task;
+    /** Flag indicating whether the task is active on that core. The task is run on the core only when is_active is set
+     * to 1*/
+    int is_active;
+}par_tasks_t;
+
+/**
+ * @brief Structure used when distributing (task, channel) pairs across hardware threads.
+ */
+typedef struct {
+    /** Task index.*/
+    int task;
+    /** Channel index.*/
+    int channel;
+    /** Flag indicating whether the (task, channel) pair is active on that core. The (task, channel) pair is run on the
+     * core only when is_active is set to 1*/
+    int is_active;
+}par_tasks_and_channels_t;
+
+
+#define AEC_THREAD_COUNT   (2) /// <Number of hardware threads available
+
+/** Number of iterations run on a given thread when distributing 2 tasks across AEC_THREAD_COUNT threads*/
+#define AEC_2_TASKS_PASSES   (1)
+
+/** Number of iterations run on a given thread when distributing 3 tasks across AEC_THREAD_COUNT threads*/
+#define AEC_3_TASKS_PASSES   (2)
+
+/** Number of iterations run on a given thread when distributing 3 tasks, max(AEC_MAX_Y_CHANNELS, AEC_MAX_X_CHANNELS)
+ * channels across AEC_THREAD_COUNT number of threads*/
+#define AEC_3_TASKS_AND_CHANNELS_PASSES   (3)
+
+/** Number of iterations run on a given thread when distributing 2 tasks, max(AEC_MAX_Y_CHANNELS, AEC_MAX_X_CHANNELS)
+ * channels across AEC_THREAD_COUNT number of threads*/
+#define AEC_2_TASKS_AND_CHANNELS_PASSES   (2)
+
+/** Number of iterations run on a given thread when distributing 1 task, max(AEC_MAX_Y_CHANNELS, AEC_MAX_X_CHANNELS)
+ * channels across AEC_THREAD_COUNT number of threads*/
+#define AEC_1_TASKS_AND_CHANNELS_PASSES   (1)
+
+typedef struct {
+    /** task distribution definition for 3 tasks, max(AEC_MAX_Y_CHANNELS, AEC_MAX_X_CHANNELS) channels, scheduled across
+     * AEC_THREAD_COUNT threads */
+    par_tasks_and_channels_t par_3_tasks_and_channels[AEC_THREAD_COUNT][AEC_3_TASKS_AND_CHANNELS_PASSES];
+    /** task distribution definition for 2 tasks, max(AEC_MAX_Y_CHANNELS, AEC_MAX_X_CHANNELS) channels, scheduled across
+     * AEC_THREAD_COUNT threads */
+    par_tasks_and_channels_t par_2_tasks_and_channels[AEC_THREAD_COUNT][AEC_2_TASKS_AND_CHANNELS_PASSES];
+    /** task distribution definition for 1 task, max(AEC_MAX_Y_CHANNELS, AEC_MAX_X_CHANNELS) channels, scheduled across
+     * AEC_THREAD_COUNT threads */
+    par_tasks_and_channels_t par_1_tasks_and_channels[AEC_THREAD_COUNT][AEC_1_TASKS_AND_CHANNELS_PASSES];
+    /** task distribution definition for 2 tasks, scheduled across AEC_THREAD_COUNT threads */
+    par_tasks_t par_2_tasks[AEC_THREAD_COUNT][AEC_2_TASKS_PASSES];
+    /** task distribution definition for 3 tasks, scheduled across AEC_THREAD_COUNT threads */
+    par_tasks_t par_3_tasks[AEC_THREAD_COUNT][AEC_3_TASKS_PASSES];
+}task_distribution_t;
+
+#endif /* aec_task_distribution_h_ */
diff --git a/examples/bare-metal/pipeline_multi_threaded/src/main.xc b/examples/bare-metal/pipeline_multi_threaded/src/main.xc
new file mode 100644
index 000000000..2ba862303
--- /dev/null
+++ b/examples/bare-metal/pipeline_multi_threaded/src/main.xc
@@ -0,0 +1,28 @@
+// Copyright 2017-2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <platform.h>
+#include <xs1.h>
+#include <stdio.h>
+#include <xscope.h>
+#include <stdlib.h>
+
+#include "xscope_io_device.h"
+
+extern "C" {
+    extern void main_tile0(const char *input_file_name, const char* output_file_name);
+}
+
+int main(){
+    chan xscope_chan;
+
+    par {
+        xscope_host_data(xscope_chan);
+        on tile[0]: 
+        {
+          xscope_io_init(xscope_chan);
+          main_tile0("input.wav", "output.wav");
+          _Exit(0);
+        }
+    }
+    return 0;
+}
diff --git a/examples/bare-metal/pipeline_multi_threaded/src/pipeline.c b/examples/bare-metal/pipeline_multi_threaded/src/pipeline.c
new file mode 100644
index 000000000..66e44bbac
--- /dev/null
+++ b/examples/bare-metal/pipeline_multi_threaded/src/pipeline.c
@@ -0,0 +1,123 @@
+// Copyright 2018-2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <string.h>
+#include <stdlib.h>
+#include <xcore/channel.h>
+#include <xcore/chanend.h>
+#include <xcore/channel_transaction.h>
+#include <xcore/port.h>
+#include <xcore/parallel.h>
+#include <xcore/assert.h>
+#include <xcore/hwtimer.h>
+
+#include "pipeline_config.h"
+#include "pipeline_state.h"
+
+#include "aec_api.h"
+#include "aec_memory_pool.h"
+#include "agc_api.h"
+
+extern void aec_process_frame_2threads(
+        aec_state_t *main_state,
+        aec_state_t *shadow_state,
+        int32_t (*output_main)[AEC_FRAME_ADVANCE],
+        int32_t (*output_shadow)[AEC_FRAME_ADVANCE],
+        const int32_t (*y_data)[AEC_FRAME_ADVANCE],
+        const int32_t (*x_data)[AEC_FRAME_ADVANCE]);
+
+DECLARE_JOB(pipeline_stage_1, (chanend_t, chanend_t));
+DECLARE_JOB(pipeline_stage_2, (chanend_t, chanend_t));
+
+
+/// pipeline_stage_1
+void pipeline_stage_1(chanend_t c_frame_in, chanend_t c_frame_out) {
+    // Pipeline metadata
+    pipeline_metadata_t md;
+    // AEC
+    aec_state_t DWORD_ALIGNED aec_main_state;
+    aec_state_t DWORD_ALIGNED aec_shadow_state;
+    aec_shared_state_t DWORD_ALIGNED aec_shared_state;
+    uint8_t DWORD_ALIGNED aec_main_memory_pool[sizeof(aec_memory_pool_t)];
+    uint8_t DWORD_ALIGNED aec_shadow_memory_pool[sizeof(aec_shadow_filt_memory_pool_t)];
+    
+    // Initialise AEC
+    aec_init(&aec_main_state, &aec_shadow_state, &aec_shared_state,
+            &aec_main_memory_pool[0], &aec_shadow_memory_pool[0],
+            AP_MAX_Y_CHANNELS, AP_MAX_X_CHANNELS,
+            AEC_MAIN_FILTER_PHASES, AEC_SHADOW_FILTER_PHASES);
+
+    int32_t DWORD_ALIGNED frame[AP_MAX_X_CHANNELS + AP_MAX_Y_CHANNELS][AP_FRAME_ADVANCE];
+    while(1) {
+        // Receive input frame
+        chan_in_buf_word(c_frame_in, (uint32_t*)&frame[0][0], ((AP_MAX_X_CHANNELS+AP_MAX_Y_CHANNELS) * AP_FRAME_ADVANCE));
+
+        /** AEC*/
+        // Memory optimisation: Don't generate shadow filter output. Use mic input memory for the aec main filter output
+        aec_process_frame_2threads(&aec_main_state, &aec_shadow_state, &frame[0], NULL, &frame[0], &frame[AP_MAX_Y_CHANNELS]);        
+        
+        // Update metadata
+        md.max_ref_energy = aec_calc_max_ref_energy(&frame[AP_MAX_Y_CHANNELS], AP_MAX_X_CHANNELS);
+        for(int ch=0; ch<AP_MAX_Y_CHANNELS; ch++) {
+            md.aec_corr_factor[ch] = aec_calc_corr_factor(&aec_main_state, ch);
+        }
+        // Transmit metadata
+        chan_out_buf_byte(c_frame_out, (uint8_t*)&md, sizeof(pipeline_metadata_t));
+
+        // Transmit output frame
+        chan_out_buf_word(c_frame_out, (uint32_t*)&frame[0][0], (AP_MAX_Y_CHANNELS * AP_FRAME_ADVANCE)); 
+    }
+}
+
+
+/// pipeline_stage_2
+void pipeline_stage_2(chanend_t c_frame_in, chanend_t c_frame_out) {
+    // Pipeline metadata
+    pipeline_metadata_t md;
+    // Initialise AGC
+    agc_config_t agc_conf_asr = AGC_PROFILE_ASR;
+    agc_config_t agc_conf_comms = AGC_PROFILE_COMMS;
+    agc_conf_asr.adapt_on_vad = 0; // We don't have VAD yet
+    agc_conf_comms.adapt_on_vad = 0; // We don't have VAD yet
+    agc_conf_comms.lc_enabled = 1; // Enable loss control on comms
+
+    agc_state_t agc_state[AP_MAX_Y_CHANNELS];
+    agc_init(&agc_state[0], &agc_conf_asr);
+    for(int ch=1; ch<AP_MAX_Y_CHANNELS; ch++) {
+        agc_init(&agc_state[ch], &agc_conf_comms);
+    }
+
+    agc_meta_data_t agc_md;
+    agc_md.vad_flag = AGC_META_DATA_NO_VAD;
+
+    int32_t frame[AP_MAX_Y_CHANNELS][AP_FRAME_ADVANCE];
+    while(1) {
+        // Receive metadata
+        chan_in_buf_byte(c_frame_in, (uint8_t*)&md, sizeof(pipeline_metadata_t));
+        agc_md.aec_ref_power = md.max_ref_energy;
+
+        // Receive input frame
+        chan_in_buf_word(c_frame_in, (uint32_t*)&frame[0][0], (AP_MAX_Y_CHANNELS * AP_FRAME_ADVANCE));
+
+        /** AGC*/
+        for(int ch=0; ch<AP_MAX_Y_CHANNELS; ch++) {
+            agc_md.aec_corr_factor = md.aec_corr_factor[ch];
+            // Memory optimisation: Reuse input memory for AGC output
+            agc_process_frame(&agc_state[ch], frame[ch], frame[ch], &agc_md);
+        }
+
+        // Transmit output frame
+        chan_out_buf_word(c_frame_out, (uint32_t*)&frame[0][0], (AP_MAX_Y_CHANNELS * AP_FRAME_ADVANCE)); 
+    }
+}
+
+
+/// Pipeline
+void pipeline(chanend_t c_pcm_in_b, chanend_t c_pcm_out_a) {
+    // 2 stage pipeline. stage 1: AEC, stage 2: AGC
+    channel_t c_stage_1_to_2 = chan_alloc();
+    
+    PAR_JOBS(
+        PJOB(pipeline_stage_1, (c_pcm_in_b, c_stage_1_to_2.end_a)),
+        PJOB(pipeline_stage_2, (c_stage_1_to_2.end_b, c_pcm_out_a))
+    );
+}
diff --git a/examples/bare-metal/pipeline_multi_threaded/src/pipeline_config.h b/examples/bare-metal/pipeline_multi_threaded/src/pipeline_config.h
new file mode 100644
index 000000000..39897ba5d
--- /dev/null
+++ b/examples/bare-metal/pipeline_multi_threaded/src/pipeline_config.h
@@ -0,0 +1,13 @@
+#ifndef pipeline_config_h_
+#define pipeline_config_h_
+
+#define AP_MAX_Y_CHANNELS (2)
+#define AP_MAX_X_CHANNELS (2)
+#define AP_FRAME_ADVANCE (240)
+
+#define AEC_MAX_Y_CHANNELS   (AP_MAX_Y_CHANNELS)
+#define AEC_MAX_X_CHANNELS   (AP_MAX_X_CHANNELS)
+#define AEC_MAIN_FILTER_PHASES    (10)
+#define AEC_SHADOW_FILTER_PHASES    (5)
+
+#endif /* pipeline_config_h_ */
diff --git a/examples/bare-metal/pipeline_multi_threaded/src/pipeline_state.h b/examples/bare-metal/pipeline_multi_threaded/src/pipeline_state.h
new file mode 100644
index 000000000..cbf0b9146
--- /dev/null
+++ b/examples/bare-metal/pipeline_multi_threaded/src/pipeline_state.h
@@ -0,0 +1,13 @@
+#ifndef AP_STAGE_A_STATE_H
+#define AP_STAGE_A_STATE_H
+
+#include "pipeline_config.h"
+#include "xs3_math.h"
+
+typedef struct {
+    float_s32_t max_ref_energy;
+    float_s32_t aec_corr_factor[AP_MAX_Y_CHANNELS];
+    int32_t vad_flag;
+}pipeline_metadata_t;
+
+#endif
diff --git a/examples/bare-metal/pipeline_multi_threaded/src/test_wav.c b/examples/bare-metal/pipeline_multi_threaded/src/test_wav.c
new file mode 100644
index 000000000..e1afe8f9b
--- /dev/null
+++ b/examples/bare-metal/pipeline_multi_threaded/src/test_wav.c
@@ -0,0 +1,124 @@
+#include <xcore/channel.h>
+#include <xcore/chanend.h>
+#include <xcore/channel_transaction.h>
+#include <xcore/port.h>
+#include <xcore/parallel.h>
+#include <xcore/assert.h>
+#include <xcore/hwtimer.h>
+#include "xs3_math.h"
+#include "fileio.h"
+#include "wav_utils.h"
+
+#include "pipeline_config.h"
+#include "pipeline_state.h"
+
+DECLARE_JOB(tx, (chanend_t, chanend_t, const char*));
+DECLARE_JOB(pipeline, (chanend_t, chanend_t));
+DECLARE_JOB(rx, (chanend_t, chanend_t, const char*));
+
+/// tx
+void tx(chanend_t c_pcm_in_a, chanend_t c_wavheader_a, const char* input_file_name) {
+    file_t input_file;
+    // Open input wav file containing mic and ref channels of input data
+    int ret = file_open(&input_file, input_file_name, "rb");
+    assert((!ret) && "Failed to open file");
+
+    wav_header input_header_struct, output_header_struct;
+    unsigned input_header_size;
+    if(get_wav_header_details(&input_file, &input_header_struct, &input_header_size) != 0){
+        printf("error in get_wav_header_details()\n");
+        _Exit(1);
+    }
+
+    file_seek(&input_file, input_header_size, SEEK_SET);
+    // Ensure 32bit wav file
+    if(input_header_struct.bit_depth != 32)
+     {
+         printf("Error: unsupported wav bit depth (%d) for %s file. Only 32 supported\n", input_header_struct.bit_depth, input_file_name);
+         _Exit(1);
+     }
+    // Ensure input wav file contains correct number of channels 
+    if(input_header_struct.num_channels != (AP_MAX_Y_CHANNELS+AP_MAX_X_CHANNELS)){
+        printf("Error: wav num channels(%d) does not match aec(%u)\n", input_header_struct.num_channels, (AP_MAX_Y_CHANNELS+AP_MAX_X_CHANNELS));
+        _Exit(1);
+    }
+    
+    unsigned frame_count = wav_get_num_frames(&input_header_struct);
+    // Calculate number of frames in the wav file
+    unsigned block_count = frame_count / AP_FRAME_ADVANCE;
+    wav_form_header(&output_header_struct,
+            input_header_struct.audio_format,
+            AP_MAX_Y_CHANNELS,
+            input_header_struct.sample_rate,
+            input_header_struct.bit_depth,
+            block_count*AP_FRAME_ADVANCE);
+    
+    // Send output_header_struct to the rx thread
+    chan_out_buf_byte(c_wavheader_a, (uint8_t*)&output_header_struct, sizeof(wav_header));
+
+
+    int32_t input_read_buffer[AP_FRAME_ADVANCE * (AP_MAX_Y_CHANNELS + AP_MAX_X_CHANNELS)] = {0}; // Array for storing interleaved input read from wav file
+    int32_t DWORD_ALIGNED frame[AP_MAX_X_CHANNELS + AP_MAX_Y_CHANNELS][AP_FRAME_ADVANCE];
+
+    unsigned bytes_per_frame = wav_get_num_bytes_per_frame(&input_header_struct);
+
+    for(unsigned b=0;b<block_count;b++){
+        long input_location =  wav_get_frame_start(&input_header_struct, b * AP_FRAME_ADVANCE, input_header_size);
+        file_seek (&input_file, input_location, SEEK_SET);
+        file_read (&input_file, (uint8_t*)&input_read_buffer[0], bytes_per_frame* AP_FRAME_ADVANCE);
+        // Deinterleave and copy to a [channels][240] array
+        for(unsigned f=0; f<AP_FRAME_ADVANCE; f++){
+            for(unsigned ch=0; ch<(AP_MAX_Y_CHANNELS + AP_MAX_Y_CHANNELS); ch++){
+                unsigned i =(f * (AP_MAX_Y_CHANNELS+AP_MAX_X_CHANNELS)) + ch;
+                frame[ch][f] = input_read_buffer[i];
+            }
+        }
+        // Transmit input frame over channel
+        chan_out_buf_word(c_pcm_in_a, (uint32_t*)&frame[0][0], ((AP_MAX_Y_CHANNELS+AP_MAX_X_CHANNELS) * AP_FRAME_ADVANCE));
+    }
+}
+
+/// rx
+void rx(chanend_t c_pcm_out_b, chanend_t c_wavheader_b, const char* output_file_name) {
+    file_t output_file;
+    int32_t output_write_buffer[AP_FRAME_ADVANCE * (AP_MAX_Y_CHANNELS)];
+    int32_t DWORD_ALIGNED pipeline_output[AP_MAX_Y_CHANNELS][AP_FRAME_ADVANCE];
+
+    int ret = file_open(&output_file, output_file_name, "wb");
+    assert((!ret) && "Failed to open file");
+    wav_header output_header_struct;
+    // Wait for output header to be sent to us
+    chan_in_buf_byte(c_wavheader_b, (uint8_t*)&output_header_struct, sizeof(wav_header));
+
+    file_write(&output_file, (uint8_t*)(&output_header_struct),  WAV_HEADER_BYTES);
+    
+    int32_t num_frames = (output_header_struct.data_bytes) / (output_header_struct.num_channels * (output_header_struct.bit_depth/8) * AP_FRAME_ADVANCE);
+    for(int frame=0; frame<num_frames; frame++)
+    {
+        // Receive output frame over channel
+        chan_in_buf_word(c_pcm_out_b, (uint32_t*)&pipeline_output[0][0], (AP_MAX_Y_CHANNELS * AP_FRAME_ADVANCE));
+
+        // Create interleaved output that can be written to wav file
+        for (unsigned ch=0;ch<AP_MAX_Y_CHANNELS;ch++){
+            for(unsigned i=0;i<AP_FRAME_ADVANCE;i++){
+                output_write_buffer[i*(AP_MAX_Y_CHANNELS) + ch] = pipeline_output[ch][i];
+            }
+        }
+        file_write(&output_file, (uint8_t*)(output_write_buffer), output_header_struct.bit_depth/8 * AP_FRAME_ADVANCE * AP_MAX_Y_CHANNELS);
+    }
+    
+    shutdown_session();
+    _Exit(0);
+}
+
+void main_tile0(const char *input_file_name, const char* output_file_name) {
+    channel_t c_pcm_in = chan_alloc();
+    channel_t c_pcm_out = chan_alloc();
+    channel_t c_wavheader = chan_alloc();
+    PAR_JOBS(
+        PJOB(tx, (c_pcm_in.end_a, c_wavheader.end_a, input_file_name)),
+        PJOB(pipeline, (c_pcm_in.end_b, c_pcm_out.end_a)),
+        PJOB(rx, (c_pcm_out.end_b, c_wavheader.end_b, output_file_name))
+        );
+}
+
diff --git a/examples/bare-metal/pipeline_single_threaded/CMakeLists.txt b/examples/bare-metal/pipeline_single_threaded/CMakeLists.txt
new file mode 100644
index 000000000..d00dc14ba
--- /dev/null
+++ b/examples/bare-metal/pipeline_single_threaded/CMakeLists.txt
@@ -0,0 +1,89 @@
+## App name
+set( APP_NAME  pipeline_single_threaded )
+
+if( NOT ${Python3_FOUND} )
+  message(FATAL_ERROR "Python3 not found for running . ") 
+endif()
+
+## Depends on libraries
+list( APPEND  DEP_LIBS_XCORE  ""  )
+list( APPEND  DEP_LIBS_Linux  m   )
+
+list( APPEND  DEP_LIBS        
+    lib_aec
+    lib_agc
+    ${DEP_LIBS_${CMAKE_SYSTEM_NAME}}
+)
+
+## Sources
+file( GLOB_RECURSE SOURCES_C  src/*.c )
+file( GLOB SOURCES_SHARED_FILEIO ${SHARED_SRC_PATH}/file_utils/*.c )
+file( GLOB SOURCES_SHARED_AEC ${SHARED_SRC_PATH}/aec/aec_process_frame_1thread.c )
+unset( SOURCES_SHARED )
+list( APPEND SOURCES_SHARED ${SOURCES_SHARED_FILEIO} ${SOURCES_SHARED_AEC} )
+file( GLOB SOURCES_XC src/*.xc )
+file( GLOB_RECURSE XSCOPE_FILEIO_SOURCES ${XSCOPE_FILEIO_PATH}/src/*.c )
+
+unset( SOURCES )
+list( APPEND  SOURCES ${SOURCES_C} ${SOURCES_SHARED})
+unset( SOURCES_XCORE )
+list( APPEND SOURCES_XCORE ${SOURCES_XC} ${XSCOPE_FILEIO_SOURCES} )
+list( APPEND SOURCES ${SOURCES_${CMAKE_SYSTEM_NAME}} )
+
+## Includes
+unset( INCLUDES )
+list( APPEND  INCLUDES src ${SHARED_SRC_PATH}/file_utils ${SHARED_SRC_PATH}/aec )
+unset( INCLUDES_XCORE )
+list( APPEND INCLUDES_XCORE ${XSCOPE_FILEIO_PATH} ${XSCOPE_FILEIO_PATH}/api )
+list( APPEND INCLUDES ${INCLUDES_${CMAKE_SYSTEM_NAME}} )
+
+# set( XSCOPE_CONFIG config.xscope )
+get_filename_component(XSCOPE_CONFIG config.xscope ABSOLUTE)
+
+## Compile flags
+unset(COMPILE_FLAGS)
+unset(COMPILE_FLAGS_XCORE)
+
+list(APPEND   COMPILE_FLAGS_XCORE  -DTEST_WAV_XSCOPE=1 )
+
+unset(COMPILE_FLAGS_Linux)
+list( APPEND  COMPILE_FLAGS_Linux   ""  )
+
+
+##Linker flags
+unset(LINKER_FLAGS)
+list( APPEND  LINKER_FLAGS  "" )
+
+unset(LINKER_FLAGS_XCORE)
+list( APPEND  LINKER_FLAGS_XCORE  "-target=${XCORE_TARGET}"     )
+list( APPEND  LINKER_FLAGS_XCORE  "-report"                     )
+list( APPEND  LINKER_FLAGS_XCORE  "${XSCOPE_CONFIG}"            )
+
+unset(LINKER_FLAGS_Linux)
+list( APPEND  LINKER_FLAGS_Linux  "" )
+
+list( APPEND  LINKER_FLAGS ${LINKER_FLAGS_${CMAKE_SYSTEM_NAME}} )
+list( APPEND  COMPILE_FLAGS ${COMPILE_FLAGS_${CMAKE_SYSTEM_NAME}} ${LINKER_FLAGS_${CMAKE_SYSTEM_NAME}} )
+
+#########
+## executable output directory
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
+
+add_executable( ${APP_NAME} ${SOURCES} ${SOURCES_C_APP} )
+
+target_include_directories( ${APP_NAME} PRIVATE ${INCLUDES} )
+
+target_link_libraries( ${APP_NAME} ${DEP_LIBS})
+
+target_compile_options( ${APP_NAME} PRIVATE ${COMPILE_FLAGS} )
+
+#(because otherwise the set_target_properties command fails)
+string(REPLACE ";" " " LINKER_FLAGS_STR "${LINKER_FLAGS}")
+set_target_properties( ${APP_NAME} PROPERTIES LINK_FLAGS "${LINKER_FLAGS_STR}" )
+
+if ( XCORE )
+  set_target_properties( ${APP_NAME} PROPERTIES
+      SUFFIX ".xe"
+      LINK_DEPENDS  ${XSCOPE_CONFIG}
+      )
+endif()
diff --git a/examples/bare-metal/pipeline_single_threaded/config.xscope b/examples/bare-metal/pipeline_single_threaded/config.xscope
new file mode 100644
index 000000000..0d3b65e4c
--- /dev/null
+++ b/examples/bare-metal/pipeline_single_threaded/config.xscope
@@ -0,0 +1,10 @@
+<xSCOPEconfig ioMode="basic" enabled="true">
+  <Probe name="open_file" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="read_bytes" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="write_setup" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="write_bytes" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="seek" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="tell" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="host_quit" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+</xSCOPEconfig>
+
diff --git a/examples/bare-metal/pipeline_single_threaded/src/main.xc b/examples/bare-metal/pipeline_single_threaded/src/main.xc
new file mode 100644
index 000000000..91d03d7bf
--- /dev/null
+++ b/examples/bare-metal/pipeline_single_threaded/src/main.xc
@@ -0,0 +1,28 @@
+// Copyright 2017-2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <platform.h>
+#include <xs1.h>
+#include <stdio.h>
+#include <xscope.h>
+#include <stdlib.h>
+
+#include "xscope_io_device.h"
+
+extern "C" {
+    extern void pipeline_wrapper(const char *input_file_name, const char* output_file_name);
+}
+
+int main(){
+    chan xscope_chan;
+
+    par {
+        xscope_host_data(xscope_chan);
+        on tile[1]: 
+        {
+          xscope_io_init(xscope_chan);
+          pipeline_wrapper("input.wav", "output.wav");
+          _Exit(0);
+        }
+    }
+    return 0;
+}
diff --git a/examples/bare-metal/pipeline_single_threaded/src/pipeline.c b/examples/bare-metal/pipeline_single_threaded/src/pipeline.c
new file mode 100644
index 000000000..0e7407b86
--- /dev/null
+++ b/examples/bare-metal/pipeline_single_threaded/src/pipeline.c
@@ -0,0 +1,62 @@
+// Copyright 2018-2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <string.h>
+#include <stdlib.h>
+
+#include "aec_api.h"
+#include "pipeline_config.h"
+#include "pipeline_state.h"
+
+extern void aec_process_frame_1thread(
+        aec_state_t *main_state,
+        aec_state_t *shadow_state,
+        int32_t (*output_main)[AEC_FRAME_ADVANCE],
+        int32_t (*output_shadow)[AEC_FRAME_ADVANCE],
+        const int32_t (*y_data)[AEC_FRAME_ADVANCE],
+        const int32_t (*x_data)[AEC_FRAME_ADVANCE]);
+
+
+void pipeline_init(pipeline_state_t *state) {
+    memset(state, 0, sizeof(pipeline_state_t)); 
+    
+    // Initialise AEC
+    aec_init(&state->aec_main_state, &state->aec_shadow_state, &state->aec_shared_state,
+            &state->aec_main_memory_pool[0], &state->aec_shadow_memory_pool[0],
+            AEC_MAX_Y_CHANNELS, AEC_MAX_X_CHANNELS,
+            AEC_MAIN_FILTER_PHASES, AEC_SHADOW_FILTER_PHASES);
+
+    // Initialise AGC
+    agc_config_t agc_conf_asr = AGC_PROFILE_ASR;
+    agc_config_t agc_conf_comms = AGC_PROFILE_COMMS;
+    agc_conf_asr.adapt_on_vad = 0; // We don't have VAD yet
+    agc_conf_comms.adapt_on_vad = 0; // We don't have VAD yet
+    agc_conf_comms.lc_enabled = 1; // Enable loss control on comms
+    agc_init(&state->agc_state[0], &agc_conf_asr);
+    for(int ch=1; ch<AP_MAX_Y_CHANNELS; ch++) {
+        agc_init(&state->agc_state[ch], &agc_conf_comms);
+    }
+    
+    
+}
+
+void pipeline_process_frame(pipeline_state_t *state,
+        int32_t (*input_y_data)[AP_FRAME_ADVANCE],
+        int32_t (*input_x_data)[AP_FRAME_ADVANCE],
+        int32_t (*output_data)[AP_FRAME_ADVANCE])
+{
+    /** AEC*/
+    int32_t aec_output_shadow[AEC_MAX_Y_CHANNELS][AP_FRAME_ADVANCE];
+    int32_t aec_output_main[AEC_MAX_Y_CHANNELS][AP_FRAME_ADVANCE];
+
+    aec_process_frame_1thread(&state->aec_main_state, &state->aec_shadow_state, aec_output_main, aec_output_shadow, input_y_data, input_x_data);
+    
+    agc_meta_data_t agc_md;
+    agc_md.aec_ref_power = aec_calc_max_ref_energy(input_x_data, AP_MAX_X_CHANNELS);
+    agc_md.vad_flag = AGC_META_DATA_NO_VAD;
+    
+    /** AGC*/
+    for(int ch=0; ch<AP_MAX_Y_CHANNELS; ch++) {
+        agc_md.aec_corr_factor = aec_calc_corr_factor(&state->aec_main_state, ch);
+        agc_process_frame(&state->agc_state[ch], output_data[ch], aec_output_main[ch], &agc_md);
+    }
+}
diff --git a/examples/bare-metal/pipeline_single_threaded/src/pipeline_config.h b/examples/bare-metal/pipeline_single_threaded/src/pipeline_config.h
new file mode 100644
index 000000000..39897ba5d
--- /dev/null
+++ b/examples/bare-metal/pipeline_single_threaded/src/pipeline_config.h
@@ -0,0 +1,13 @@
+#ifndef pipeline_config_h_
+#define pipeline_config_h_
+
+#define AP_MAX_Y_CHANNELS (2)
+#define AP_MAX_X_CHANNELS (2)
+#define AP_FRAME_ADVANCE (240)
+
+#define AEC_MAX_Y_CHANNELS   (AP_MAX_Y_CHANNELS)
+#define AEC_MAX_X_CHANNELS   (AP_MAX_X_CHANNELS)
+#define AEC_MAIN_FILTER_PHASES    (10)
+#define AEC_SHADOW_FILTER_PHASES    (5)
+
+#endif /* pipeline_config_h_ */
diff --git a/examples/bare-metal/pipeline_single_threaded/src/pipeline_state.h b/examples/bare-metal/pipeline_single_threaded/src/pipeline_state.h
new file mode 100644
index 000000000..4a29aedd9
--- /dev/null
+++ b/examples/bare-metal/pipeline_single_threaded/src/pipeline_state.h
@@ -0,0 +1,21 @@
+#ifndef AP_STAGE_A_STATE_H
+#define AP_STAGE_A_STATE_H
+
+#include "pipeline_config.h"
+#include "aec_state.h"
+#include "aec_memory_pool.h"
+#include "agc_api.h"
+
+typedef struct {
+    // AEC
+    aec_state_t DWORD_ALIGNED aec_main_state;
+    aec_state_t DWORD_ALIGNED aec_shadow_state;
+    aec_shared_state_t DWORD_ALIGNED aec_shared_state;
+    uint8_t DWORD_ALIGNED aec_main_memory_pool[sizeof(aec_memory_pool_t)];
+    uint8_t DWORD_ALIGNED aec_shadow_memory_pool[sizeof(aec_shadow_filt_memory_pool_t)];
+
+    // AGC
+    agc_state_t agc_state[AP_MAX_Y_CHANNELS];
+} pipeline_state_t;
+
+#endif
diff --git a/examples/bare-metal/pipeline_single_threaded/src/test_wav.c b/examples/bare-metal/pipeline_single_threaded/src/test_wav.c
new file mode 100644
index 000000000..35df92f0a
--- /dev/null
+++ b/examples/bare-metal/pipeline_single_threaded/src/test_wav.c
@@ -0,0 +1,112 @@
+#include "xs3_math.h"
+#include "fileio.h"
+#include "wav_utils.h"
+
+#include "pipeline_config.h"
+#include "pipeline_state.h"
+
+extern void pipeline_init(pipeline_state_t *state);
+extern void pipeline_process_frame(pipeline_state_t *state,
+    int32_t (*input_y_data)[AP_FRAME_ADVANCE],
+    int32_t (*input_x_data)[AP_FRAME_ADVANCE],
+    int32_t (*output_data)[AP_FRAME_ADVANCE]);
+
+
+void pipeline_wrapper(const char *input_file_name, const char* output_file_name)
+{
+    file_t input_file, output_file;
+    // Open input wav file containing mic and ref channels of input data
+    int ret = file_open(&input_file, input_file_name, "rb");
+    assert((!ret) && "Failed to open file");
+    // Open output wav file that will contain the AEC output
+    ret = file_open(&output_file, output_file_name, "wb");
+    assert((!ret) && "Failed to open file");
+
+    wav_header input_header_struct, output_header_struct;
+    unsigned input_header_size;
+    if(get_wav_header_details(&input_file, &input_header_struct, &input_header_size) != 0){
+        printf("error in get_wav_header_details()\n");
+        _Exit(1);
+    }
+
+    file_seek(&input_file, input_header_size, SEEK_SET);
+    // Ensure 32bit wav file
+    if(input_header_struct.bit_depth != 32)
+     {
+         printf("Error: unsupported wav bit depth (%d) for %s file. Only 32 supported\n", input_header_struct.bit_depth, input_file_name);
+         _Exit(1);
+     }
+    // Ensure input wav file contains correct number of channels 
+    if(input_header_struct.num_channels != (AP_MAX_Y_CHANNELS+AP_MAX_X_CHANNELS)){
+        printf("Error: wav num channels(%d) does not match aec(%u)\n", input_header_struct.num_channels, (AP_MAX_Y_CHANNELS+AP_MAX_X_CHANNELS));
+        _Exit(1);
+    }
+    
+    unsigned frame_count = wav_get_num_frames(&input_header_struct);
+    // Calculate number of frames in the wav file
+    unsigned block_count = frame_count / AP_FRAME_ADVANCE;
+    wav_form_header(&output_header_struct,
+            input_header_struct.audio_format,
+            AP_MAX_Y_CHANNELS,
+            input_header_struct.sample_rate,
+            input_header_struct.bit_depth,
+            block_count*AP_FRAME_ADVANCE);
+
+    file_write(&output_file, (uint8_t*)(&output_header_struct),  WAV_HEADER_BYTES);
+
+    int32_t input_read_buffer[AP_FRAME_ADVANCE * (AP_MAX_Y_CHANNELS + AP_MAX_X_CHANNELS)] = {0}; // Array for storing interleaved input read from wav file
+    int32_t output_write_buffer[AP_FRAME_ADVANCE * (AP_MAX_Y_CHANNELS)];
+
+    int32_t DWORD_ALIGNED frame_y[AP_MAX_Y_CHANNELS][AP_FRAME_ADVANCE];
+    int32_t DWORD_ALIGNED frame_x[AP_MAX_X_CHANNELS][AP_FRAME_ADVANCE];
+    int32_t DWORD_ALIGNED pipeline_output[AP_MAX_Y_CHANNELS][AP_FRAME_ADVANCE];
+
+    unsigned bytes_per_frame = wav_get_num_bytes_per_frame(&input_header_struct);
+    
+    // Initialise pipeline
+    pipeline_state_t DWORD_ALIGNED pipeline_state;
+    pipeline_init(&pipeline_state);
+
+    for(unsigned b=0;b<block_count;b++){
+        long input_location =  wav_get_frame_start(&input_header_struct, b * AP_FRAME_ADVANCE, input_header_size);
+        file_seek (&input_file, input_location, SEEK_SET);
+        file_read (&input_file, (uint8_t*)&input_read_buffer[0], bytes_per_frame* AP_FRAME_ADVANCE);
+        // Deinterleave and copy y and x samples to their respective buffers
+        for(unsigned f=0; f<AP_FRAME_ADVANCE; f++){
+            for(unsigned ch=0;ch<AP_MAX_Y_CHANNELS;ch++){
+                unsigned i =(f * (AP_MAX_Y_CHANNELS+AP_MAX_X_CHANNELS)) + ch;
+                frame_y[ch][f] = input_read_buffer[i];
+            }
+            for(unsigned ch=0;ch<AP_MAX_X_CHANNELS;ch++){
+                unsigned i =(f * (AP_MAX_Y_CHANNELS+AP_MAX_X_CHANNELS)) + AP_MAX_Y_CHANNELS + ch;
+                frame_x[ch][f] = input_read_buffer[i];
+            }
+        }
+        
+        // Process a frame of data through the pipeline
+        pipeline_process_frame(&pipeline_state, frame_y, frame_x, pipeline_output);
+        
+        // Create interleaved output that can be written to wav file
+        for (unsigned ch=0;ch<AP_MAX_Y_CHANNELS;ch++){
+            for(unsigned i=0;i<AP_FRAME_ADVANCE;i++){
+                output_write_buffer[i*(AP_MAX_Y_CHANNELS) + ch] = pipeline_output[ch][i];
+            }
+        }
+
+        file_write(&output_file, (uint8_t*)(output_write_buffer), output_header_struct.bit_depth/8 * AP_FRAME_ADVANCE * AP_MAX_Y_CHANNELS);
+    }
+    file_close(&input_file);
+    file_close(&output_file);
+    shutdown_session();
+}
+
+#if X86_BUILD
+int main(int argc, char **argv) {
+    if(argc < 3) {
+        printf("Arguments missing. Expected: <input file name> <output file name>\n");
+        assert(0);
+    }
+    pipeline_wrapper(argv[1], argv[2]);
+    return 0;
+}
+#endif
diff --git a/examples/bare-metal/shared_src/aec/aec_memory_pool.h b/examples/bare-metal/shared_src/aec/aec_memory_pool.h
new file mode 100644
index 000000000..558cc817a
--- /dev/null
+++ b/examples/bare-metal/shared_src/aec/aec_memory_pool.h
@@ -0,0 +1,52 @@
+// Copyright 2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#ifndef AEC_MEMORY_POOL_H
+#define AEC_MEMORY_POOL_H
+
+#include "xs3_math.h"
+
+//Memory pool definition
+typedef struct {
+    /** Memory pointed to by aec_shared_state_t::y and aec_shared_state_t::Y*/
+    int32_t mic_input_frame[AEC_MAX_Y_CHANNELS][AEC_PROC_FRAME_LENGTH + 2];
+    /** Memory pointed to by aec_shared_state_t::x and aec_shared_state_t::X. Also reused for main filter
+     * aec_state_t::T*/
+    int32_t ref_input_frame[AEC_MAX_X_CHANNELS][AEC_PROC_FRAME_LENGTH + 2];
+    /** Memory pointed to by aec_shared_state_t::prev_y*/
+    int32_t mic_prev_samples[AEC_MAX_Y_CHANNELS][AEC_PROC_FRAME_LENGTH - AEC_FRAME_ADVANCE];
+    /** Memory pointed to by aec_shared_state_t::prev_x*/
+    int32_t ref_prev_samples[AEC_MAX_X_CHANNELS][AEC_PROC_FRAME_LENGTH - AEC_FRAME_ADVANCE];
+    /** Memory pointed to by main filter aec_state_t::H_hat, aec_shared_state_t::X_fifo, main filter
+     * aec_state_t::X_fifo_1d and shadow filter aec_state_t::X_fifo_1d*/
+    complex_s32_t phase_pool_H_hat_X_fifo[((AEC_MAX_Y_CHANNELS*AEC_MAX_X_CHANNELS*AEC_MAIN_FILTER_PHASES) + (AEC_MAX_X_CHANNELS*AEC_MAIN_FILTER_PHASES)) * (AEC_PROC_FRAME_LENGTH/2 + 1)];
+    /** Memory pointed to by main filter aec_state_t::Error and aec_state_t::error*/
+    complex_s32_t Error[AEC_MAX_Y_CHANNELS][AEC_FD_FRAME_LENGTH];
+    /** Memory pointed to by main filter aec_state_t::Y_hat and aec_state_t::y_hat*/
+    complex_s32_t Y_hat[AEC_MAX_Y_CHANNELS][AEC_FD_FRAME_LENGTH];
+    /** Memory pointed to by main_filter aec_state_t::X_energy*/
+    int32_t X_energy[AEC_MAX_X_CHANNELS][AEC_FD_FRAME_LENGTH];
+    /** Memory pointed to by aec_shared_state_t::sigma_XX*/
+    int32_t sigma_XX[AEC_MAX_X_CHANNELS][AEC_FD_FRAME_LENGTH];
+    /** Memory pointed to by main filter aec_state_t::inv_X_energy*/
+    int32_t inv_X_energy[AEC_MAX_X_CHANNELS][AEC_FD_FRAME_LENGTH];
+    /** Memory pointed to by main filter aec_state_t::overlap*/
+    int32_t overlap[AEC_MAX_Y_CHANNELS][UNUSED_TAPS_PER_PHASE*2];
+}aec_memory_pool_t;
+
+typedef struct {
+    /** Memory pointed to by shadow filter aec_state_t::H_hat*/
+    complex_s32_t phase_pool_H_hat[AEC_MAX_Y_CHANNELS * AEC_MAX_X_CHANNELS * AEC_SHADOW_FILTER_PHASES * AEC_FD_FRAME_LENGTH];
+    /** Memory pointed to by shadow filter aec_state_t::Error and aec_state_t::error*/
+    complex_s32_t Error[AEC_MAX_Y_CHANNELS][AEC_FD_FRAME_LENGTH];
+    /** Memory pointed to by shadow filter aec_state_t::Y_hat and aec_state_t::y_hat*/
+    complex_s32_t Y_hat[AEC_MAX_Y_CHANNELS][AEC_FD_FRAME_LENGTH];
+    /** Memory pointed to by shadow filter aec_state_t::T*/
+    complex_s32_t T[AEC_MAX_X_CHANNELS][AEC_FD_FRAME_LENGTH];
+    /** Memory pointed to by shadow_filter aec_state_t::X_energy*/
+    int32_t X_energy[AEC_MAX_X_CHANNELS][AEC_FD_FRAME_LENGTH];
+    /** Memory pointed to by shadow_filter aec_state_t::inv_X_energy*/
+    int32_t inv_X_energy[AEC_MAX_X_CHANNELS][AEC_FD_FRAME_LENGTH];
+    /** Memory pointed to by shadow filter aec_state_t::overlap*/
+    int32_t overlap[AEC_MAX_Y_CHANNELS][UNUSED_TAPS_PER_PHASE*2];
+}aec_shadow_filt_memory_pool_t;
+#endif
diff --git a/examples/bare-metal/shared_src/aec/aec_process_frame_1thread.c b/examples/bare-metal/shared_src/aec/aec_process_frame_1thread.c
new file mode 100644
index 000000000..5fa784d90
--- /dev/null
+++ b/examples/bare-metal/shared_src/aec/aec_process_frame_1thread.c
@@ -0,0 +1,222 @@
+// Copyright 2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <stdio.h>
+#include <string.h>
+#include "aec_defines.h"
+#include "aec_api.h"
+
+/* This is an example of processing one frame of data through the AEC pipeline stage. The example runs on 1 thread and
+ * can be compiled for both bare metal and x86.
+ */
+static unsigned X_energy_recalc_bin = 0;
+static int framenum = 0;
+void aec_process_frame_1thread(
+        aec_state_t *main_state,
+        aec_state_t *shadow_state,
+        int32_t (*output_main)[AEC_FRAME_ADVANCE],
+        int32_t (*output_shadow)[AEC_FRAME_ADVANCE],
+        const int32_t (*y_data)[AEC_FRAME_ADVANCE],
+        const int32_t (*x_data)[AEC_FRAME_ADVANCE])
+{
+    // Read number of mic and reference channels. These are specified as part of the configuration when aec_init() is called.
+    int num_y_channels = main_state->shared_state->num_y_channels; //Number of mic channels
+    int num_x_channels = main_state->shared_state->num_x_channels; //Number of reference channels
+    
+    // Set up the input BFP structures main_state->shared_state->y and main_state->shared_state->x to point to the new frame.
+    // Initialise some other BFP structures that need to be initialised at the beginning of each frame
+    aec_frame_init(main_state, shadow_state, y_data, x_data);
+    
+    // Calculate Exponential moving average (EMA) energy of the mic and reference input.
+    for(int ch=0; ch<num_y_channels; ch++) {
+        aec_calc_time_domain_ema_energy(&main_state->shared_state->y_ema_energy[ch], &main_state->shared_state->y[ch],
+                AEC_PROC_FRAME_LENGTH - AEC_FRAME_ADVANCE, AEC_FRAME_ADVANCE, &main_state->shared_state->config_params);
+    }
+    for(int ch=0; ch<num_x_channels; ch++) {
+        aec_calc_time_domain_ema_energy(&main_state->shared_state->x_ema_energy[ch], &main_state->shared_state->x[ch],
+                AEC_PROC_FRAME_LENGTH - AEC_FRAME_ADVANCE, AEC_FRAME_ADVANCE, &main_state->shared_state->config_params);
+    }
+
+    // Calculate mic input spectrum for all num_y_channels of mic input
+    /* The spectrum calculation is done in place. Taking mic input as example, after the aec_forward_fft() call
+     * main_state->shared_state->Y[ch].data and main_state->shared_state->y[ch].data point to the same memory address.
+     * The spectral representation of the input is used after this function. Time domain input
+     * BFP structure main_state->shared_state->y should not be used.
+     * main_state->shared_state->Y[ch].data points to AEC_PROC_FRAME_LENGTH/2 + 1 complex 32bit spectrum samples,
+     * which represent the spectrum samples from DC to Nyquist frequency.
+     * Same is true for reference spectrum samples pointed to by  main_state->shared_state->X[ch].data
+     * as well.
+     */
+    for(int ch=0; ch<num_y_channels; ch++) {
+        aec_forward_fft(&main_state->shared_state->Y[ch], &main_state->shared_state->y[ch]);
+    }
+    // Calculate reference input spectrum for all num_x_channels of reference input
+    for(int ch=0; ch<num_x_channels; ch++) {
+        aec_forward_fft(&main_state->shared_state->X[ch], &main_state->shared_state->x[ch]);
+    }
+
+    // Calculate sum of X energy over X FIFO phases for all num_x_channels reference channels
+    /* AEC data structures store a single copy of the X FIFO that is shared between the main and shadow filter.
+     * Since main filter phases main_state->num_phases are more than the shadow filter phases shadow_state->num_phases,
+     * X FIFO holds main_state->num_phases most recent frames of reference input spectrum, where the frames are ordered
+     * from most recent to least recent. For shadow filter operation, out of this shared X FIFO, the first shadow_state->num_phases
+     * frames are considered.
+     */
+    for(int ch=0; ch<num_x_channels; ch++) {
+        // Calculate sum of X energy for main filter
+        /* BFP struct main_state->X_energy[ch] points to AEC_PROC_FRAME_LENGTH/2 + 1 real 32bit values where value at index n is
+         * the nth X sample's energy summed over main_state->num_phases number of frames in the X FIFO.
+         */
+        aec_calc_X_fifo_energy(main_state, ch, X_energy_recalc_bin);
+        
+        // Calculate sum of X energy for shadow filter
+        /* BFP struct shadow_state->X_energy[ch] points to AEC_PROC_FRAME_LENGTH/2 + 1 real 32bit values where value at index n is
+         * the nth X sample's energy summed over shadow_state->num_phases number of frames in the X FIFO.
+         */
+        aec_calc_X_fifo_energy(shadow_state, ch, X_energy_recalc_bin);
+    }
+
+    // Increment X_energy_recalc_bin to the next sample index.
+    /* Passing X_energy_recalc_bin to aec_calc_X_fifo_energy() ensures that energy of sample at index X_energy_recalc_bin
+     * is recalculated without the speed optimisations so that quantisation error can be kept in check
+     */
+    X_energy_recalc_bin += 1;
+    if(X_energy_recalc_bin == (AEC_PROC_FRAME_LENGTH/2) + 1) { // Wrap around to 0 on completing one (AEC_PROC_FRAME_LENGTH/2) + 1 samples pass.
+        X_energy_recalc_bin = 0;
+    }
+
+    // Update X-FIFO and calculate sigma_XX.
+    /* Add the current X frame to the X FIFO and remove the oldest X frame from the X FIFO.
+     * Also, calculate state->shared_state->sigma_XX. sigma_XX is the EMA of current X frame energy.
+     * It is later used to time smooth the X_energy while calculating the normalisation spectrum
+     */
+    for(int ch=0; ch<num_x_channels; ch++) {
+        aec_update_X_fifo_and_calc_sigmaXX(main_state, ch);
+    }
+
+    // Copy state->shared_state->X_fifo BFP struct to main_state->X_fifo_1d and shadow_state->X_fifo_1d BFP structs
+    /* The updated state->shared_state->X_FIFO BFP structures are copied to an alternate set of BFP structs present in the 
+     * main and shadow filter state structure, that are used to efficiently access the X FIFO in the Error computation and filter
+     * update steps.
+     */
+    aec_update_X_fifo_1d(main_state);
+    aec_update_X_fifo_1d(shadow_state);
+
+    // Calculate error spectrum and estimated mic spectrum for main and shadow adaptive filters
+    for(int ch=0; ch<num_y_channels; ch++) {
+        // main_state->Error[ch] and main_state->Y_hat[ch] are updated
+        aec_calc_Error_and_Y_hat(main_state, ch);
+
+        // shadow_state->Error[ch] and shadow_state->Y_hat[ch] are updated
+        aec_calc_Error_and_Y_hat(shadow_state, ch);
+    }
+    
+    // Calculate time domain error and time domain estimated mic input from their spectrums calculated in the previous step.
+    /* The time domain estimated mic_input (y_hat) is used to calculate the average coherence between y and y_hat in aec_calc_coherence.
+     * Only the estimated mic input calculated using the main filter is needed for coherence calculation, so the y_hat calculation is
+     * done only for main filter.
+     */
+    for(int ch=0; ch<num_y_channels; ch++) {
+        aec_inverse_fft(&main_state->error[ch], &main_state->Error[ch]);
+        aec_inverse_fft(&shadow_state->error[ch], &shadow_state->Error[ch]);
+        aec_inverse_fft(&main_state->y_hat[ch], &main_state->Y_hat[ch]);
+    }
+
+    // Calculate average coherence and average slow moving coherence between mic and estimated mic time domain signals
+    for(int ch=0; ch<num_y_channels; ch++) {
+        // main_state->shared_state->coh_mu_state[ch].coh and main_state->shared_state->coh_mu_state[ch].coh_slow are updated
+        aec_calc_coherence(main_state, ch);
+    }
+
+    // Calculate AEC filter time domain output. This is the output sent to downstream pipeline stages
+    for(int ch=0; ch<num_y_channels; ch++) {
+        aec_calc_output(main_state, &output_main[ch], ch);
+        /* Application can choose to not generate AEC shadow filter output by passing NULL as output_shadow argument.
+         * Note that aec_calc_output() will still need to be called since this function also windows the error signal
+         * which is needed for subsequent processing of the shadow filter even when output is not generated.
+         */
+        if(output_shadow != NULL) {           
+            aec_calc_output(shadow_state, &output_shadow[ch], ch);
+        }
+        else {
+            aec_calc_output(shadow_state, NULL, ch);
+        }
+    }
+
+    // Calculate exponential moving average of main_filter time domain error.
+    /* The EMA error energy is used in ERLE calculations which are done only for the main filter,
+     * so not calling this function to calculate shadow filter error EMA energy.
+     */
+    for(int ch=0; ch<num_y_channels; ch++) {
+        //create a bfp_s32_t structure to point to output array
+        bfp_s32_t temp;
+        bfp_s32_init(&temp, &output_main[ch][0], -31, AEC_FRAME_ADVANCE, 1);
+        aec_calc_time_domain_ema_energy(&main_state->error_ema_energy[ch], &temp, 0, AEC_FRAME_ADVANCE, &main_state->shared_state->config_params);
+    }
+
+    // Convert shadow and main filters error back to frequency domain since subsequent AEC functions will use the error spectrum.
+    // The error spectrum is later used to compute T values which are then used while updating the adaptive filter.
+    for(int ch=0; ch<num_y_channels; ch++) {
+        // main_state->Error[ch] is updated
+        aec_forward_fft(&main_state->Error[ch], &main_state->error[ch]);
+        
+        // shadow_state->Error[ch] is updated
+        aec_forward_fft(&shadow_state->Error[ch], &shadow_state->error[ch]
+               );
+    }
+
+    // Calculate energies of mic input and error spectrum of main and shadow filters.
+    // These energy values are later used in aec_compare_filters_and_calc_mu() to estimate how well the filters are performing.
+    for(int ch=0; ch<num_y_channels; ch++) {
+        // main_state->overall_Error[ch] is updated
+        aec_calc_freq_domain_energy(&main_state->overall_Error[ch], &main_state->Error[ch]);
+        
+        // shadow_state->overall_Error[ch] is updated
+        aec_calc_freq_domain_energy(&shadow_state->overall_Error[ch], &shadow_state->Error[ch]);
+        
+        // main_state->shared_state->overall_Y[ch] is updated
+        aec_calc_freq_domain_energy(&main_state->shared_state->overall_Y[ch], &main_state->shared_state->Y[ch]);
+    }
+
+    // Compare and update filters. Calculate adaption step_size mu
+    /* At this point we're ready to check how well the filters are performing and update them if needed.
+     * 
+     * main_state->shared_state->shadow_filter_params are updated to indicate the current state of filter comparison algorithm.
+     * main_state->H_hat, main_state->Error, shadow_state->H_hat, shadow_state->Error are optionally updated depending on the update needed.
+     *
+     * After the filter comparison and update step, the adaption step size mu is calculated for main and shadow filter.
+     * main_state->mu and shadow_state->mu are updated.
+     */
+    aec_compare_filters_and_calc_mu(
+            main_state,
+            shadow_state);
+
+    // Calculate smoothed reference FIFO energy that is later used to scale the X FIFO in the filter update step.
+    // This calculation is done differently for main and shadow filters, so a flag indicating filter type is specified as one of the input arguments.
+    for(int ch=0; ch<num_x_channels; ch++) {
+        // main_state->inv_X_energy[ch] is updated.
+        aec_calc_normalisation_spectrum(main_state, ch, 0);
+
+        // shadow_state->inv_X_energy[ch] is updated.
+        aec_calc_normalisation_spectrum(shadow_state, ch, 1);
+    }
+
+    for(int ych=0; ych<num_y_channels; ych++) {
+        // Compute T values.
+        // T is a function of state->mu, state->Error and state->inv_X_energy.
+        for(int xch=0; xch<num_x_channels; xch++) {
+            // main_state->T[ch] is updated
+            aec_calc_T(main_state, ych, xch);
+
+            // shadow_state->T[ch] is updated
+            aec_calc_T(shadow_state, ych, xch);
+        }
+        // Update filters
+        
+        // Update main_state->H_hat
+        aec_filter_adapt(main_state, ych);
+
+        // Update shadow_state->H_hat
+        aec_filter_adapt(shadow_state, ych);
+    }
+    framenum++; 
+}
diff --git a/examples/bare-metal/shared_src/aec/aec_process_frame_2threads.c b/examples/bare-metal/shared_src/aec/aec_process_frame_2threads.c
new file mode 100644
index 000000000..42231301f
--- /dev/null
+++ b/examples/bare-metal/shared_src/aec/aec_process_frame_2threads.c
@@ -0,0 +1,478 @@
+// Copyright 2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <stdio.h>
+#include <string.h>
+#include "aec_task_distribution.h"
+
+#include "aec_defines.h"
+#include "aec_api.h"
+
+/* This is a bare-metal example of processing one frame of data through the AEC pipeline stage. This example
+ * demonstrates distributing AEC functions across 2 cores in parallel using lib_xcore PAR functionality.
+ * Task distribution across cores is done using the scheme defined in aec_task_distribution.h and
+ * aec_task_distribution.c
+ */
+enum e_td_ema {Y_EMA, X_EMA, ERROR_EMA};
+enum e_fft {Y_FFT, X_FFT, ERROR_FFT};
+
+#include <xcore/parallel.h>
+DECLARE_JOB(calc_time_domain_ema_energy_task, (par_tasks_and_channels_t*, aec_state_t *, int32_t*, int, int, enum e_td_ema));
+DECLARE_JOB(fft_task, (par_tasks_and_channels_t*, aec_state_t*, aec_state_t*, int, int, enum e_fft));
+DECLARE_JOB(update_X_energy_task, (par_tasks_and_channels_t*, aec_state_t*, aec_state_t*, int, int, int));
+DECLARE_JOB(update_X_fifo_task, (par_tasks_and_channels_t*, aec_state_t*, int, int));
+DECLARE_JOB(calc_Error_task, (par_tasks_and_channels_t*, aec_state_t*, aec_state_t*, int, int));
+DECLARE_JOB(ifft_task, (par_tasks_and_channels_t*, aec_state_t*, aec_state_t*, int, int));
+DECLARE_JOB(calc_coh_task, (par_tasks_and_channels_t*, aec_state_t*, int, int));
+DECLARE_JOB(calc_output_task, (par_tasks_and_channels_t*, aec_state_t*, aec_state_t*, int32_t*, int32_t*, int, int));
+DECLARE_JOB(calc_freq_domain_energy_task, (par_tasks_and_channels_t*, aec_state_t*, aec_state_t*, int, int));
+DECLARE_JOB(calc_normalisation_spectrum_task, (par_tasks_and_channels_t*, aec_state_t*, aec_state_t*, int, int));
+DECLARE_JOB(calc_T_task, (par_tasks_and_channels_t*, aec_state_t*, aec_state_t*, int, int, int));
+DECLARE_JOB(filter_adapt_task, (par_tasks_t*, aec_state_t*, aec_state_t*, int, int));
+
+extern task_distribution_t tdist;
+static unsigned X_energy_recalc_bin = 0;
+static int framenum = 0;
+void aec_process_frame_2threads(
+        aec_state_t *main_state,
+        aec_state_t *shadow_state,
+        int32_t (*output_main)[AEC_FRAME_ADVANCE],
+        int32_t (*output_shadow)[AEC_FRAME_ADVANCE],    
+        const int32_t (*y_data)[AEC_FRAME_ADVANCE],
+        const int32_t (*x_data)[AEC_FRAME_ADVANCE])
+{
+    // Read number of mic and reference channels. These are specified as part of the configuration when aec_init() is called.
+    int num_y_channels = main_state->shared_state->num_y_channels; //Number of mic channels
+    int num_x_channels = main_state->shared_state->num_x_channels; //Number of reference channels
+
+    // Set up the input BFP structures main_state->shared_state->y and main_state->shared_state->x to point to the new frame.
+    // Initialise some other BFP structures that need to be initialised at the beginning of each frame
+    aec_frame_init(main_state, shadow_state, y_data, x_data);
+
+    // Calculate Exponential moving average (EMA) energy of the mic and reference input.
+    PAR_JOBS(
+        PJOB(calc_time_domain_ema_energy_task, (tdist.par_1_tasks_and_channels[0], main_state, NULL, AEC_1_TASKS_AND_CHANNELS_PASSES, num_y_channels, Y_EMA)),
+        PJOB(calc_time_domain_ema_energy_task, (tdist.par_1_tasks_and_channels[1], main_state, NULL, AEC_1_TASKS_AND_CHANNELS_PASSES, num_y_channels, Y_EMA))
+        );
+
+    PAR_JOBS(
+        PJOB(calc_time_domain_ema_energy_task, (tdist.par_1_tasks_and_channels[0], main_state, NULL, AEC_1_TASKS_AND_CHANNELS_PASSES, num_x_channels, X_EMA)),
+        PJOB(calc_time_domain_ema_energy_task, (tdist.par_1_tasks_and_channels[1], main_state, NULL, AEC_1_TASKS_AND_CHANNELS_PASSES, num_x_channels, X_EMA))
+        );
+
+    // Calculate mic input spectrum for all num_y_channels of mic input
+    /* The spectrum calculation is done in place. Taking mic input as example, after the aec_forward_fft() call
+     * main_state->shared_state->Y[ch].data and main_state->shared_state->y[ch].data point to the same memory address.
+     * The spectral representation of the input is used after this function. Time domain input
+     * BFP structure main_state->shared_state->y should not be used.
+     * main_state->shared_state->Y[ch].data points to AEC_PROC_FRAME_LENGTH/2 + 1 complex 32bit spectrum samples,
+     * which represent the spectrum samples from DC to Nyquist frequency.
+     * Same is true for reference spectrum samples pointed to by  main_state->shared_state->X[ch].data
+     * as well.
+     */
+    PAR_JOBS(
+        PJOB(fft_task, (tdist.par_1_tasks_and_channels[0], main_state, shadow_state, AEC_1_TASKS_AND_CHANNELS_PASSES, num_y_channels, Y_FFT)),
+        PJOB(fft_task, (tdist.par_1_tasks_and_channels[1], main_state, shadow_state, AEC_1_TASKS_AND_CHANNELS_PASSES, num_y_channels, Y_FFT))
+        );
+
+    // Calculate reference input spectrum for all num_x_channels of reference input
+    PAR_JOBS(
+        PJOB(fft_task, (tdist.par_1_tasks_and_channels[0], main_state, shadow_state, AEC_1_TASKS_AND_CHANNELS_PASSES, num_x_channels, X_FFT)),
+        PJOB(fft_task, (tdist.par_1_tasks_and_channels[1], main_state, shadow_state, AEC_1_TASKS_AND_CHANNELS_PASSES, num_x_channels, X_FFT))
+        );
+
+    // Calculate sum of X energy over X FIFO phases for all num_x_channels reference channels for main and shadow filter.   
+    /* AEC data structures store a single copy of the X FIFO that is shared between the main and shadow filter.
+     * Since main filter phases main_state->num_phases are more than the shadow filter phases shadow_state->num_phases,
+     * X FIFO holds main_state->num_phases most recent frames of reference input spectrum, where the frames are ordered
+     * from most recent to least recent. For shadow filter operation, out of this shared X FIFO, the first shadow_state->num_phases
+     * frames are considered.
+     *
+     * For main filter, X energy is stored in BFP struct main_state->X_energy[ch]. For shadow filter, X energy is stored
+     * in BFP structure shadow_state->X_energy[ch]. These BFP structures point to AEC_PROC_FRAME_LENGTH/2 + 1, real
+     * 32bit values where the value at index n is the nth X sample's energy summed over main_state->num_phases number
+     * of frames in the X FIFO.
+     */
+    PAR_JOBS(
+        PJOB(update_X_energy_task, (tdist.par_2_tasks_and_channels[0], main_state, shadow_state, AEC_2_TASKS_AND_CHANNELS_PASSES, num_x_channels, X_energy_recalc_bin)),
+        PJOB(update_X_energy_task, (tdist.par_2_tasks_and_channels[1], main_state, shadow_state, AEC_2_TASKS_AND_CHANNELS_PASSES, num_x_channels, X_energy_recalc_bin))
+        );
+
+    // Increment X_energy_recalc_bin to the next sample index.
+    /* Passing X_energy_recalc_bin to aec_calc_X_fifo_energy() ensures that energy of sample at index X_energy_recalc_bin
+     * is recalculated without the speed optimisations so that quantisation error can be kept in check
+     */
+    X_energy_recalc_bin += 1;
+    if(X_energy_recalc_bin == (AEC_PROC_FRAME_LENGTH/2) + 1) {
+        X_energy_recalc_bin = 0;
+    }
+
+    // Update X-FIFO and calculate sigma_XX.
+    /* Add the current X frame to the X FIFO and remove the oldest X frame from the X FIFO.
+     * Also, calculate state->shared_state->sigma_XX. sigma_XX is the EMA of current X frame energy.
+     * It is later used to time smooth the X_energy while calculating the normalisation spectrum
+     */
+    PAR_JOBS(
+        PJOB(update_X_fifo_task, (tdist.par_1_tasks_and_channels[0], main_state, AEC_1_TASKS_AND_CHANNELS_PASSES, num_x_channels)),
+        PJOB(update_X_fifo_task, (tdist.par_1_tasks_and_channels[1], main_state, AEC_1_TASKS_AND_CHANNELS_PASSES, num_x_channels))
+        );
+
+    // Copy state->shared_state->X_fifo BFP struct to main_state->X_fifo_1d and shadow_state->X_fifo_1d BFP structs
+    /* The updated state->shared_state->X_FIFO BFP structures are copied to an alternate set of BFP structs present in the 
+     * main and shadow filter state structure, that are used to efficiently access the X FIFO in the Error computation and filter
+     * update steps.
+     */
+    aec_update_X_fifo_1d(main_state);
+    aec_update_X_fifo_1d(shadow_state);
+
+    // Calculate error spectrum and estimated mic spectrum for main and shadow adaptive filters
+    /* For main filter, main_state->Error[ch] and main_state->Y_hat[ch] are updated.
+     * For shadow filter, shadow_state->Error[ch] and shadow_state->Y_hat[ch] are updated. 
+     */
+    PAR_JOBS(
+        PJOB(calc_Error_task, (tdist.par_2_tasks_and_channels[0], main_state, shadow_state, AEC_2_TASKS_AND_CHANNELS_PASSES, num_y_channels)),
+        PJOB(calc_Error_task, (tdist.par_2_tasks_and_channels[1], main_state, shadow_state, AEC_2_TASKS_AND_CHANNELS_PASSES, num_y_channels))
+        );
+    
+    // Calculate time domain error and time domain estimated mic input from their spectrums calculated in the previous step.
+    /* The time domain estimated mic_input (y_hat) is used to calculate the average coherence between y and y_hat in aec_calc_coherence.
+     * Only the estimated mic input calculated using the main filter is needed for coherence calculation, so the y_hat calculation is
+     * done only for main filter.
+     */
+    PAR_JOBS(
+        PJOB(ifft_task, (tdist.par_3_tasks_and_channels[0], main_state, shadow_state, AEC_3_TASKS_AND_CHANNELS_PASSES, num_y_channels)),
+        PJOB(ifft_task, (tdist.par_3_tasks_and_channels[1], main_state, shadow_state, AEC_3_TASKS_AND_CHANNELS_PASSES, num_y_channels))
+        );
+
+    // Calculate average coherence and average slow moving coherence between mic and estimated mic time domain signals
+    // main_state->shared_state->coh_mu_state[ch].coh and main_state->shared_state->coh_mu_state[ch].coh_slow are updated
+    PAR_JOBS(
+        PJOB(calc_coh_task, (tdist.par_1_tasks_and_channels[0], main_state, AEC_1_TASKS_AND_CHANNELS_PASSES, num_y_channels)),
+        PJOB(calc_coh_task, (tdist.par_1_tasks_and_channels[1], main_state, AEC_1_TASKS_AND_CHANNELS_PASSES, num_y_channels))
+        );
+
+    // Calculate AEC filter time domain output. This is the output sent to downstream pipeline stages
+    /* Application can choose to not generate AEC shadow filter output by passing NULL as output_shadow argument.
+     * Note that aec_calc_output() will still need to be called since this function also windows the error signal
+     * which is needed for subsequent processing of the shadow filter even when output is not generated.
+     */
+    PAR_JOBS(
+        PJOB(calc_output_task, (tdist.par_2_tasks_and_channels[0], main_state, shadow_state, (int32_t*)output_main, (int32_t*)output_shadow, AEC_2_TASKS_AND_CHANNELS_PASSES, num_y_channels)),
+        PJOB(calc_output_task, (tdist.par_2_tasks_and_channels[1], main_state, shadow_state, (int32_t*)output_main, (int32_t*)output_shadow, AEC_2_TASKS_AND_CHANNELS_PASSES, num_y_channels))
+        );
+
+    // Calculate exponential moving average of main_filter time domain error.
+    /* The EMA error energy is used in ERLE calculations which are done only for the main filter,
+     * so not calling this function to calculate shadow filter error EMA energy.
+     */
+    PAR_JOBS(
+        PJOB(calc_time_domain_ema_energy_task, (tdist.par_1_tasks_and_channels[0], main_state, (int32_t*)output_main, AEC_1_TASKS_AND_CHANNELS_PASSES, num_y_channels, ERROR_EMA)),
+        PJOB(calc_time_domain_ema_energy_task, (tdist.par_1_tasks_and_channels[1], main_state, (int32_t*)output_main, AEC_1_TASKS_AND_CHANNELS_PASSES, num_y_channels, ERROR_EMA))
+        );
+
+    // Convert shadow and main filters error back to frequency domain since subsequent AEC functions will use the error spectrum.
+    /* The error spectrum is later used to compute T values which are then used while updating the adaptive filter.
+     * main_state->Error[ch] and shadow_state->Error[ch] are updated.
+     */
+    PAR_JOBS(
+        PJOB(fft_task, (tdist.par_2_tasks_and_channels[0], main_state, shadow_state, AEC_2_TASKS_AND_CHANNELS_PASSES, num_y_channels, ERROR_FFT)),
+        PJOB(fft_task, (tdist.par_2_tasks_and_channels[1], main_state, shadow_state, AEC_2_TASKS_AND_CHANNELS_PASSES, num_y_channels, ERROR_FFT))
+            );
+
+    // Calculate energies of mic input and error spectrum of main and shadow filters.
+    /* These energy values are later used in aec_compare_filters_and_calc_mu() to estimate how well the filters are performing.
+     * main_state->overall_Error[ch], shadow_state->overall_Error[ch] and main_state->shared_state->overall_Y[ch] are
+     * updated.
+     */
+    PAR_JOBS(
+        PJOB(calc_freq_domain_energy_task, (tdist.par_3_tasks_and_channels[0], main_state, shadow_state, AEC_3_TASKS_AND_CHANNELS_PASSES, num_y_channels)),
+        PJOB(calc_freq_domain_energy_task, (tdist.par_3_tasks_and_channels[1], main_state, shadow_state, AEC_3_TASKS_AND_CHANNELS_PASSES, num_y_channels))
+        );
+
+    // Compare and update filters. Calculate adaption step_size mu
+    /* At this point we're ready to check how well the filters are performing and update them if needed.
+     * 
+     * main_state->shared_state->shadow_filter_params are updated to indicate the current state of filter comparison algorithm.
+     * main_state->H_hat, main_state->Error, shadow_state->H_hat, shadow_state->Error are optionally updated depending on the update needed.
+     *
+     * After the filter comparison and update step, the adaption step size mu is calculated for main and shadow filter.
+     * main_state->mu and shadow_state->mu are updated.
+     */
+    aec_compare_filters_and_calc_mu(
+            main_state,
+            shadow_state);
+
+    // Calculate smoothed reference FIFO energy that is later used to scale the X FIFO in the filter update step.
+    /* This calculation is done differently for main and shadow filters, so a flag indicating filter type is specified
+     * as one of the input arguments.
+     * main_state->inv_X_energy[ch] and shadow_state->inv_X_energy[ch] is updated.
+     */
+    PAR_JOBS(
+        PJOB(calc_normalisation_spectrum_task, (tdist.par_2_tasks_and_channels[0], main_state, shadow_state, AEC_2_TASKS_AND_CHANNELS_PASSES, num_x_channels)),
+        PJOB(calc_normalisation_spectrum_task, (tdist.par_2_tasks_and_channels[1], main_state, shadow_state, AEC_2_TASKS_AND_CHANNELS_PASSES, num_x_channels))
+        );
+
+    //Adapt H_hat
+    for(int ych=0; ych<num_y_channels; ych++) {
+        // Compute T values.
+        // T is a function of state->mu, state->Error and state->inv_X_energy.
+        // main_state->T[ch] and shadow_state->T[ch] are updated.
+        PAR_JOBS(
+            PJOB(calc_T_task, (tdist.par_2_tasks_and_channels[0], main_state, shadow_state, AEC_2_TASKS_AND_CHANNELS_PASSES, num_x_channels, ych)),
+            PJOB(calc_T_task, (tdist.par_2_tasks_and_channels[1], main_state, shadow_state, AEC_2_TASKS_AND_CHANNELS_PASSES, num_x_channels, ych))
+            );
+
+        // Update filters
+        // main_state->H_hat and shadow_state->H_hat are updated.
+        PAR_JOBS(
+            PJOB(filter_adapt_task, (tdist.par_2_tasks[0], main_state, shadow_state, AEC_2_TASKS_PASSES, ych)),
+            PJOB(filter_adapt_task, (tdist.par_2_tasks[1], main_state, shadow_state, AEC_2_TASKS_PASSES, ych))
+            );
+    }
+    framenum++; 
+}
+
+void calc_time_domain_ema_energy_task(par_tasks_and_channels_t* s, aec_state_t *state, int32_t *output, int passes, int channels, enum e_td_ema type) {
+    for(int i=0; i<passes; i++) {
+        int ch = s[i].channel;
+        if(ch >= channels) continue;
+        int is_active = s[i].is_active;
+        if(is_active) {
+            if(type == Y_EMA) {
+                aec_calc_time_domain_ema_energy(&state->shared_state->y_ema_energy[ch], &state->shared_state->y[ch], AEC_PROC_FRAME_LENGTH - AEC_FRAME_ADVANCE, AEC_FRAME_ADVANCE, &state->shared_state->config_params);
+            }
+            else if(type == X_EMA) {
+                aec_calc_time_domain_ema_energy(&state->shared_state->x_ema_energy[ch], &state->shared_state->x[ch], AEC_PROC_FRAME_LENGTH - AEC_FRAME_ADVANCE, AEC_FRAME_ADVANCE, &state->shared_state->config_params);
+            }
+            else if(type == ERROR_EMA) {
+                int32_t (*ptr)[AEC_FRAME_ADVANCE] = (int32_t(*)[AEC_FRAME_ADVANCE])output;
+                //create a bfp_s32_t structure to point to output array
+                bfp_s32_t temp;
+                bfp_s32_init(&temp, &ptr[ch][0], -31, AEC_FRAME_ADVANCE, 1);
+                
+                aec_calc_time_domain_ema_energy(&state->error_ema_energy[ch], &temp, 0, AEC_FRAME_ADVANCE, &state->shared_state->config_params);
+            }
+            else{assert(0);}
+        }
+    }
+}
+
+void fft_task(par_tasks_and_channels_t *s, aec_state_t *main_state, aec_state_t *shadow_state, int passes, int channels, enum e_fft type) {
+    for(int i=0; i<passes; i++) {
+        int task = s[i].task;
+        int ch = s[i].channel;
+        if(ch >= channels) continue;
+        int is_active = s[i].is_active;
+        if(is_active) {
+            if(type == Y_FFT) {
+                aec_forward_fft(
+                        &main_state->shared_state->Y[ch],
+                        &main_state->shared_state->y[ch]);
+            }
+            else if(type == X_FFT) {
+                aec_forward_fft(
+                        &main_state->shared_state->X[ch],
+                        &main_state->shared_state->x[ch]);
+            }
+            else if((type==ERROR_FFT) && (task==0)) {
+                aec_forward_fft(
+                        &main_state->Error[ch],
+                        &main_state->error[ch]
+                        ); //error -> Error
+            }
+            else if((type==ERROR_FFT) && (task==1) && (shadow_state != NULL)){
+                aec_forward_fft(
+                        &shadow_state->Error[ch],
+                        &shadow_state->error[ch]
+                        ); //error_shad -> Error_shad
+            }
+            else{assert(0);}
+        }
+    }
+}
+
+void update_X_energy_task(par_tasks_and_channels_t *s, aec_state_t *main_state, aec_state_t *shadow_state, int passes, int channels, int recalc_bin) {
+    for(int i=0; i<passes; i++) {
+        int task = s[i].task;
+        int ch = s[i].channel;
+        if(ch >= channels) continue;
+        int is_active = s[i].is_active;
+        if(is_active) {
+            if(task == 0) {
+                aec_calc_X_fifo_energy(main_state, ch, recalc_bin);
+            }
+            else {
+                if(shadow_state != NULL) {
+                    aec_calc_X_fifo_energy(shadow_state, ch, recalc_bin);
+                }
+            }
+        }
+    }
+}
+
+void update_X_fifo_task(par_tasks_and_channels_t *s, aec_state_t *state, int passes, int channels) {
+    for(int i=0; i<passes; i++) {
+        int ch = s[i].channel;
+        if(ch >= channels) continue;
+        int is_active = s[i].is_active;
+        if(is_active) {
+            aec_update_X_fifo_and_calc_sigmaXX(state, ch);
+        }
+    }
+}
+
+void calc_Error_task(par_tasks_and_channels_t *s, aec_state_t *main_state, aec_state_t *shadow_state, int passes, int channels) {
+    for(int i=0; i<passes; i++) {
+        int task = s[i].task;
+        int ch = s[i].channel;
+        if(ch >= channels) continue;
+        int is_active = s[i].is_active;
+        if(is_active) {
+            if(task == 0) {
+                aec_calc_Error_and_Y_hat(main_state, ch);
+            }
+            else {
+                if(shadow_state != NULL) {
+                    aec_calc_Error_and_Y_hat(shadow_state, ch);
+                }
+            }
+        }
+    }
+}
+
+void ifft_task(par_tasks_and_channels_t *s, aec_state_t *main_state, aec_state_t *shadow_state, int passes, int channels)
+{
+    for(int i=0; i<passes; i++) {
+        int task = s[i].task;
+        int ch = s[i].channel;
+        if(ch >= channels) continue;
+        int is_active = s[i].is_active;
+        if(is_active) {
+            if(task == 0) {
+                aec_inverse_fft(&main_state->error[ch], &main_state->Error[ch]);
+            }
+            else if(task == 1){
+                aec_inverse_fft(&main_state->y_hat[ch], &main_state->Y_hat[ch]);
+            }
+            else {
+                if(shadow_state != NULL) {
+                    aec_inverse_fft(&shadow_state->error[ch], &shadow_state->Error[ch]);
+                }
+            }
+        }
+    }
+}
+void calc_coh_task(par_tasks_and_channels_t *s, aec_state_t *state, int passes, int channels) {
+    for(int i=0; i<passes; i++) {
+        int ch = s[i].channel;
+        if(ch >= channels) continue;
+        int is_active = s[i].is_active;
+        if(is_active) {
+            aec_calc_coherence(state, ch);
+        }
+    }
+}
+
+void calc_output_task(par_tasks_and_channels_t *s, aec_state_t *main_state, aec_state_t *shadow_state, int32_t *output_main, int32_t *output_shadow, int passes, int channels) {
+    for(int i=0; i<passes; i++) {
+        int task = s[i].task;
+        int ch = s[i].channel;
+        if(ch >= channels) continue;
+        int is_active = s[i].is_active;
+        if(is_active) {
+            if(task == 0) {
+                int32_t (*tmp)[AEC_FRAME_ADVANCE] = (int32_t(*)[AEC_FRAME_ADVANCE])output_main;
+                aec_calc_output(main_state, &tmp[ch], ch);
+            }
+            else {
+                if(shadow_state != NULL) {
+                    if(output_shadow != NULL) {
+                        int32_t (*tmp)[AEC_FRAME_ADVANCE] = (int32_t(*)[AEC_FRAME_ADVANCE])output_shadow;
+                        aec_calc_output(shadow_state, &tmp[ch], ch);
+                    }
+                    else {
+                        aec_calc_output(shadow_state, NULL, ch);
+                    }
+                }
+            }
+        }
+    }
+}
+
+void calc_freq_domain_energy_task(par_tasks_and_channels_t *s, aec_state_t *main_state, aec_state_t *shadow_state, int passes, int channels)
+{
+    for(int i=0; i<passes; i++) {
+        int task = s[i].task;
+        int ch = s[i].channel;
+        if(ch >= channels) continue;
+        int is_active = s[i].is_active;
+        if(is_active) {
+            if(task == 0) {
+                aec_calc_freq_domain_energy(&main_state->overall_Error[ch], &main_state->Error[ch]);
+            }
+            else if(task == 1){
+                aec_calc_freq_domain_energy(&main_state->shared_state->overall_Y[ch], &main_state->shared_state->Y[ch]);
+            }
+            else {
+                if(shadow_state != NULL) {
+                    aec_calc_freq_domain_energy(&shadow_state->overall_Error[ch], &shadow_state->Error[ch]);
+                }
+            }
+        }
+    }
+}
+
+void calc_normalisation_spectrum_task(par_tasks_and_channels_t *s, aec_state_t *main_state, aec_state_t *shadow_state, int passes, int channels) {
+    for(int i=0; i<passes; i++) {
+        int task = s[i].task;
+        int ch = s[i].channel;
+        if(ch >= channels) continue;
+        int is_active = s[i].is_active;
+        if(is_active) {
+            if(task == 0) {
+                aec_calc_normalisation_spectrum(main_state, ch, 0);
+            }
+            else {
+                if(shadow_state != NULL) {
+                    aec_calc_normalisation_spectrum(shadow_state, ch, 1);
+                }
+            }
+        }
+    }
+}
+
+void calc_T_task(par_tasks_and_channels_t *s, aec_state_t *main_state, aec_state_t *shadow_state, int passes, int channels, int ych) {
+    for(int i=0; i<passes; i++) {
+        int task = s[i].task;
+        int xch = s[i].channel;
+        if(xch >= channels) continue;
+        int is_active = s[i].is_active;
+        if(is_active) {
+            if(task == 0) {
+                aec_calc_T(main_state, ych, xch);
+            }
+            else {
+                if(shadow_state != NULL) {
+                    aec_calc_T(shadow_state, ych, xch);
+                }
+            }
+        }
+    }
+}
+
+void filter_adapt_task(par_tasks_t *s, aec_state_t *main_state, aec_state_t *shadow_state, int passes, int ych) {
+    for(int i=0; i<passes; i++) {
+        int task = s[i].task;
+        int is_active = s[i].is_active;
+        if(is_active) {
+            if(task == 0) {
+                aec_filter_adapt(main_state, ych);
+            }
+            else
+            {
+                if(shadow_state != NULL) {
+                    aec_filter_adapt(shadow_state, ych);
+                }
+            }
+        }
+    }
+}
diff --git a/examples/bare-metal/shared_src/file_utils/fileio.c b/examples/bare-metal/shared_src/file_utils/fileio.c
new file mode 100644
index 000000000..1c436065b
--- /dev/null
+++ b/examples/bare-metal/shared_src/file_utils/fileio.c
@@ -0,0 +1,90 @@
+
+#include "fileio.h"
+
+int file_open(file_t *fp, const char* name, const char *mode) {
+#if TEST_WAV_XSCOPE
+    fp->xscope_file = xscope_open_file(name, (char*)mode); 
+#else
+    if(!strcmp(mode, "rb")) {
+        fp->file = open(name, O_RDONLY);
+        if(fp->file == -1) {return -1;}
+    }
+    else if(!strcmp(mode, "wb")) {
+        fp->file = open(name, O_WRONLY|O_CREAT, 0644);
+        if(fp->file == -1) {return -1;}
+    }
+    else {
+        assert((0) && "invalid file open mode specified. Only 'rb' and 'wb' modes supported");
+    }
+#endif
+    return 0;
+}
+
+void file_seek(file_t *fp, long int offset, int origin) {
+#if TEST_WAV_XSCOPE
+    xscope_fseek(&fp->xscope_file, offset, origin);
+#else
+    lseek(fp->file, offset, origin);
+#endif
+}
+
+int get_current_file_offset(file_t *fp) {
+#if TEST_WAV_XSCOPE
+    int current_offset = xscope_ftell(&fp->xscope_file);
+#else
+    int current_offset = lseek(fp->file, 0, SEEK_CUR); 
+#endif
+    return current_offset;
+}
+
+int get_file_size(file_t *fp) {
+#if TEST_WAV_XSCOPE
+    //find the current offset in the file
+    int current_offset = xscope_ftell(&fp->xscope_file);
+    //go to the end
+    xscope_fseek(&fp->xscope_file, 0, SEEK_END);
+    //get offset which will be file size
+    int size = xscope_ftell(&fp->xscope_file);
+    //return back to the original offset
+    xscope_fseek(&fp->xscope_file, current_offset, SEEK_SET);
+#else
+    //find the current offset in the file
+    int current_offset = lseek(fp->file, 0, SEEK_CUR);
+    //get file size
+    int size = lseek(fp->file, 0, SEEK_END);
+    //go back to original offset
+    lseek(fp->file, current_offset, SEEK_SET);
+#endif
+    return size;
+}
+
+void file_read(file_t *fp, void *buf, size_t count) {
+#if TEST_WAV_XSCOPE
+    xscope_fread(&fp->xscope_file, (uint8_t*)buf, count);
+#else
+    read(fp->file, buf, count);
+#endif
+}
+
+void file_write(file_t *fp, void *buf, size_t count) {
+#if TEST_WAV_XSCOPE
+    xscope_fwrite(&fp->xscope_file, (uint8_t*)buf, count);
+#else
+    write(fp->file, buf, count);
+#endif
+}
+
+void file_close(file_t *fp) {
+#if !TEST_WAV_XSCOPE
+    close(fp->file);
+#else
+    //files are closed by a single call to xscope_close_all_files()
+#endif
+}
+
+void shutdown_session() {
+    //Needed for XSCOPE_ID_HOST_QUIT in xscope_close_all_files()
+#if TEST_WAV_XSCOPE
+    xscope_close_all_files();
+#endif
+}
diff --git a/examples/bare-metal/shared_src/file_utils/fileio.h b/examples/bare-metal/shared_src/file_utils/fileio.h
new file mode 100644
index 000000000..0d7fd62ee
--- /dev/null
+++ b/examples/bare-metal/shared_src/file_utils/fileio.h
@@ -0,0 +1,31 @@
+#ifndef FILEIO_H
+#define FILEIO_H
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#if TEST_WAV_XSCOPE
+#include "xscope_io_device.h"
+#endif
+#include <fcntl.h>
+#include <unistd.h>
+
+typedef union {
+    int file;
+#if TEST_WAV_XSCOPE
+    xscope_file_t xscope_file;
+#endif
+}file_t;
+
+
+int file_open(file_t *fp, const char* name, const char *mode);
+void file_read(file_t *fp, void *buf, size_t count);
+void file_write(file_t *fp, void *buf, size_t count);
+void file_seek(file_t *fp, long int offset, int origin);
+void file_close(file_t *fp);
+void shutdown_session(); //Needed for XSCOPE_ID_HOST_QUIT in xscope_close_all_files()
+
+int get_current_file_offset(file_t *fp);
+int get_file_size(file_t *fp);
+#endif
diff --git a/examples/bare-metal/shared_src/file_utils/wav_utils.c b/examples/bare-metal/shared_src/file_utils/wav_utils.c
new file mode 100644
index 000000000..c8665a4f3
--- /dev/null
+++ b/examples/bare-metal/shared_src/file_utils/wav_utils.c
@@ -0,0 +1,135 @@
+// Copyright 2018-2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include "fileio.h"
+#include "wav_utils.h"
+
+#define RIFF_SECTION_SIZE (12)
+#define FMT_SUBCHUNK_MIN_SIZE (24)
+#define EXTENDED_FMT_GUID_SIZE (16)
+static const char wav_default_header[WAV_HEADER_BYTES] = {
+        0x52, 0x49, 0x46, 0x46,
+        0x00, 0x00, 0x00, 0x00,
+        0x57, 0x41, 0x56, 0x45,
+        0x66, 0x6d, 0x74, 0x20,
+        0x10, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00,
+        0x64, 0x61, 0x74, 0x61,
+        0x00, 0x00, 0x00, 0x00,
+};
+
+int get_wav_header_details(file_t *input_file, wav_header *s, unsigned *header_size){
+  //Assume file is already open here. First rewind.
+  file_seek(input_file, 0, SEEK_SET);
+  //read riff header section (12 bytes)
+  file_read(input_file, (uint8_t*)(&s->riff_header[0]), RIFF_SECTION_SIZE);
+  if(memcmp(s->riff_header, "RIFF", sizeof(s->riff_header)) != 0)
+  {
+    printf("Error: couldn't find RIFF: 0x%x, 0x%x, 0x%x, 0x%x\n", s->riff_header[0], s->riff_header[1], s->riff_header[2], s->riff_header[3]);
+    return 1;
+  }
+
+  if(memcmp(s->wave_header, "WAVE", sizeof(s->wave_header)) != 0)
+  {
+    printf("couldn't find WAVE:, 0x%x, 0x%x, 0x%x, 0x%x\n", s->wave_header[0], s->wave_header[1], s->wave_header[2], s->wave_header[3]);
+    return 1;
+  }
+  
+  file_read(input_file, (uint8_t*)&s->fmt_header[0], FMT_SUBCHUNK_MIN_SIZE);
+  if(memcmp(s->fmt_header, "fmt ", sizeof(s->fmt_header)) != 0)
+  {
+    printf("Error: couldn't find fmt: 0x%x, 0x%x, 0x%x, 0x%x\n", s->fmt_header[0], s->fmt_header[1], s->fmt_header[2], s->fmt_header[3]);
+    return 1;
+  }
+  
+  unsigned fmt_subchunk_actual_size = s->fmt_chunk_size + sizeof(s->fmt_header) + sizeof(s->fmt_chunk_size); //fmt_chunk_size doesn't include the fmt_header(4) and size(4) bytes
+  unsigned fmt_subchunk_remaining_size = fmt_subchunk_actual_size - FMT_SUBCHUNK_MIN_SIZE;
+  
+  if(s->audio_format == (short)0xfffe)
+  {
+    //seek to the end of fmt subchunk and rewind 16bytes to the beginning of GUID
+    file_seek(input_file, fmt_subchunk_remaining_size - EXTENDED_FMT_GUID_SIZE, SEEK_CUR);
+    //The first 2 bytes of GUID is the audio_format.
+    file_read(input_file, (uint8_t *)&s->audio_format, sizeof(s->audio_format));
+    //skip the rest of GUID
+    file_seek(input_file, EXTENDED_FMT_GUID_SIZE - sizeof(s->audio_format), SEEK_CUR);
+  }
+  else
+  {
+    //go to the end of fmt subchunk
+    file_seek(input_file, fmt_subchunk_remaining_size, SEEK_CUR);
+  }
+  if(s->audio_format != 1)
+  {
+    printf("Error: audio format(%d) is not PCM\n", s->audio_format);
+    return 1;
+  }
+  
+  //read header (4 bytes) for the next subchunk
+  file_read(input_file, (uint8_t*)&s->data_header[0], sizeof(s->data_header));
+  //if next subchunk is fact, read subchunk size and skip it
+  if(memcmp(s->data_header, "fact", sizeof(s->data_header)) == 0)
+  {
+    uint32_t chunksize;
+    file_read(input_file, (uint8_t *)&chunksize, sizeof(s->data_bytes));
+    file_seek(input_file, chunksize, SEEK_CUR);
+    file_read(input_file, (uint8_t*)(&s->data_header[0]), sizeof(s->data_header));
+  }
+  //only thing expected at this point is the 'data' subchunk. Throw error if not found.
+  if(memcmp(s->data_header, "data", sizeof(s->data_header)) != 0)
+  {
+    printf("Error: couldn't find data: 0x%x, 0x%x, 0x%x, 0x%x\n", s->data_header[0], s->data_header[1], s->data_header[2], s->data_header[3]);
+    return 1;
+  }
+  //read data subchunk size. 
+  file_read(input_file, (uint8_t *)&s->data_bytes, sizeof(s->data_bytes));
+  *header_size = get_current_file_offset(input_file); //total file size should be header_size + data_bytes
+  //No need to close file - handled by caller
+
+  return 0;
+}
+
+int wav_form_header(wav_header *header,
+        short audio_format,
+        short num_channels,
+        int sample_rate,
+        short bit_depth,
+        int num_frames){
+    memcpy((char*)header, wav_default_header, WAV_HEADER_BYTES);
+
+    header->audio_format = audio_format;
+    header->num_channels = num_channels;
+    header->sample_rate = sample_rate;
+    header->bit_depth = bit_depth;
+
+    header->byte_rate = sample_rate*bit_depth*num_channels/8;
+
+    header->sample_alignment = num_channels* (bit_depth/8);
+    int data_bytes = num_frames * num_channels * (bit_depth/8);
+    header->data_bytes = data_bytes;
+    header->wav_size = data_bytes + WAV_HEADER_BYTES - 8;
+
+    return 0;
+}
+
+unsigned wav_get_num_bytes_per_frame(const wav_header *s){
+    int bytes_per_sample = s->bit_depth/CHAR_BIT;
+    return (unsigned)(bytes_per_sample * s->num_channels);
+}
+
+int wav_get_num_frames(const wav_header *s){
+    unsigned bytes_per_frame = wav_get_num_bytes_per_frame(s);
+    return s->data_bytes / bytes_per_frame;
+}
+
+long wav_get_frame_start(const wav_header *s, unsigned frame_number, uint32_t wavheader_size){
+    return wavheader_size + frame_number * wav_get_num_bytes_per_frame(s);
+}
diff --git a/examples/bare-metal/shared_src/file_utils/wav_utils.h b/examples/bare-metal/shared_src/file_utils/wav_utils.h
new file mode 100644
index 000000000..5003638cf
--- /dev/null
+++ b/examples/bare-metal/shared_src/file_utils/wav_utils.h
@@ -0,0 +1,45 @@
+// Copyright 2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#ifndef WAV_UTILS_H
+#define WAV_UTILS_H
+
+#include <stdint.h>
+
+#define WAV_HEADER_BYTES 44
+
+typedef struct wav_header {
+    // RIFF Header
+    char riff_header[4];    // Should be "RIFF"
+    int wav_size;           // File size - 8 = data_bytes + WAV_HEADER_BYTES - 8
+    char wave_header[4];    // Should be "WAVE"
+
+    // Format Subsection
+    char fmt_header[4];     // Should be "fmt "
+    int fmt_chunk_size;     // Size of the rest of this subchunk
+    short audio_format;
+    short num_channels;
+    int sample_rate;
+    int byte_rate;          // sample_rate * num_channels * (bit_depth/8)
+    short sample_alignment; // num_channels * (bit_depth/8)
+    short bit_depth;        // bits per sample
+
+    // Data Subsection
+    char data_header[4];    // Should be "data"
+    int data_bytes;         // frame count * num_channels * (bit_depth/8)
+} wav_header;
+
+int get_wav_header_details(file_t *input_file, wav_header *s, unsigned *header_size);
+
+int wav_form_header(wav_header *header,
+        short audio_format,
+        short num_channels,
+        int sample_rate,
+        short bit_depth,
+        int num_frames);
+
+unsigned wav_get_num_bytes_per_frame(const wav_header *s);
+
+int wav_get_num_frames(const wav_header *s);
+
+long wav_get_frame_start(const wav_header *s, unsigned frame_number, uint32_t wavheader_size);
+#endif
diff --git a/examples/bare-metal/shared_src/python/run_xcoreai.py b/examples/bare-metal/shared_src/python/run_xcoreai.py
new file mode 100644
index 000000000..93f22284f
--- /dev/null
+++ b/examples/bare-metal/shared_src/python/run_xcoreai.py
@@ -0,0 +1,77 @@
+# Copyright 2018-2021 XMOS LIMITED.
+# This Software is subject to the terms of the XMOS Public Licence: Version 1.
+import xscope_fileio
+import argparse
+import shutil
+import subprocess
+
+def get_adapter_id():
+    try:
+        xrun_out = subprocess.check_output(['xrun', '-l'], text=True, stderr=subprocess.STDOUT)
+    except subprocess.CalledProcessError as e:
+        print('Error: %s' % e.output)
+        assert False
+
+    xrun_out = xrun_out.split('\n')
+    # Check that the first 4 lines of xrun_out match the expected lines
+    expected_header = ["", "Available XMOS Devices", "----------------------", ""]
+    if len(xrun_out) < len(expected_header):
+        raise RuntimeError(
+            f"Error: xrun output:\n{xrun_out}\n"
+            f"does not contain expected header:\n{expected_header}"
+        )
+
+    header_match = True
+    for i, expected_line in enumerate(expected_header):
+        if xrun_out[i] != expected_line:
+            header_match = False
+
+    if not header_match:
+        raise RuntimeError(
+            f"Error: xrun output header:\n{xrun_out[:4]}\n"
+            f"does not match expected header:\n{expected_header}"
+        )
+
+    try:
+        if "No Available Devices Found" in xrun_out[4]:
+            raise RuntimeError(f"Error: No available devices found\n")
+            return
+    except IndexError:
+        raise RuntimeError(f"Error: xrun output is too short:\n{xrun_out}\n")
+
+    for line in xrun_out[6:]:
+        if line.strip():
+            adapterID = line[26:34].strip()
+            status = line[34:].strip()
+        else:
+            continue
+    print("adapter_id = ",adapterID)
+    return adapterID
+
+
+def parse_arguments():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("xe", nargs='?',
+                        help=".xe file to run")
+    parser.add_argument('--input', type=str, default="input.wav",
+                        help="input wav file. Default: input.wav")
+    args = parser.parse_args()
+    return args
+
+if __name__ == "__main__":
+    args = parse_arguments()
+    assert args.xe is not None, "Specify vaild .xe file"
+    adapter_id = get_adapter_id()
+    print("Running on adapter_id ",adapter_id)
+    print(f"args.input = {args.input}")
+
+    try:
+        shutil.copy2(args.input, "input.wav")
+    except shutil.SameFileError as e:
+        pass
+    except IOError as e:
+         print('Error: %s' % e.strerror)
+         assert False, "Invalid input file"
+
+    xscope_fileio.run_on_target(adapter_id, args.xe)
+
diff --git a/examples/bare-metal/shared_src/test_streams/aec_example_input.wav b/examples/bare-metal/shared_src/test_streams/aec_example_input.wav
new file mode 100644
index 000000000..8daa88cc7
Binary files /dev/null and b/examples/bare-metal/shared_src/test_streams/aec_example_input.wav differ
diff --git a/examples/bare-metal/shared_src/test_streams/agc_example_input.wav b/examples/bare-metal/shared_src/test_streams/agc_example_input.wav
new file mode 100644
index 000000000..bb104990b
Binary files /dev/null and b/examples/bare-metal/shared_src/test_streams/agc_example_input.wav differ
diff --git a/examples/bare-metal/shared_src/test_streams/pipeline_example_input.wav b/examples/bare-metal/shared_src/test_streams/pipeline_example_input.wav
new file mode 100644
index 000000000..518e47ffb
Binary files /dev/null and b/examples/bare-metal/shared_src/test_streams/pipeline_example_input.wav differ
diff --git a/examples/bare-metal/shared_src/xscope_fileio b/examples/bare-metal/shared_src/xscope_fileio
new file mode 160000
index 000000000..c75386dba
--- /dev/null
+++ b/examples/bare-metal/shared_src/xscope_fileio
@@ -0,0 +1 @@
+Subproject commit c75386dbae61e96c711e69070b27110ee09d2900
diff --git a/index.rst b/index.rst
index ef98b359c..31e85156d 100644
--- a/index.rst
+++ b/index.rst
@@ -5,5 +5,6 @@ XMOS Avona Voice Reference Design
 .. toctree::
    :maxdepth: 2
 
-   ./doc/quick_start
+   ./doc/getting_started
+   ./doc/user_guide/index
    
diff --git a/modules/CMakeLists.txt b/modules/CMakeLists.txt
new file mode 100644
index 000000000..4e0034ed1
--- /dev/null
+++ b/modules/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_subdirectory( ${XCORE_SDK_PATH}/modules/lib_xs3_math/lib_xs3_math ${CMAKE_CURRENT_BINARY_DIR}/libs/ )
+
+add_subdirectory( lib_aec )
+
+add_subdirectory( lib_agc )
diff --git a/modules/lib_aec/CMakeLists.txt b/modules/lib_aec/CMakeLists.txt
new file mode 100644
index 000000000..435ddc149
--- /dev/null
+++ b/modules/lib_aec/CMakeLists.txt
@@ -0,0 +1,27 @@
+
+## Target name
+set( LIB_NAME   lib_aec  )
+
+## Export lib directory as variable
+set( AEC_PATH ${CMAKE_CURRENT_SOURCE_DIR} PARENT_SCOPE )
+
+include(lib_aec.cmake)
+
+## Set library output directory
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/libs)
+
+## Deliverable is a static library
+add_library( ${LIB_NAME}  STATIC      ${LIB_AEC_SOURCES} )
+
+target_include_directories( ${LIB_NAME} PUBLIC ${LIB_AEC_INCLUDES} )
+
+target_link_libraries(${LIB_NAME} lib_xs3_math)
+
+set_target_properties( ${LIB_NAME} PROPERTIES   PREFIX      ""
+                                                OUTPUT_NAME ${LIB_NAME}
+                                                SUFFIX      ".a"        )
+
+target_compile_options( ${LIB_NAME} PRIVATE ${COMPILE_FLAGS} )
+
+target_compile_options( ${LIB_NAME} PRIVATE "$<$<CONFIG:DEBUG>:-DDEBUGG=1>" )
+
diff --git a/modules/lib_aec/api/aec_api.h b/modules/lib_aec/api/aec_api.h
new file mode 100644
index 000000000..39ab0d53d
--- /dev/null
+++ b/modules/lib_aec/api/aec_api.h
@@ -0,0 +1,458 @@
+// Copyright 2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#ifndef AEC_API_H
+#define AEC_API_H
+
+#include <stdio.h>
+#include <string.h>
+#include "bfp_math.h"
+#include "xs3_math.h"
+#include "aec_state.h"
+
+/**
+ * @page page_aec_api_h aec_api.h
+ * 
+ * lib_aec public functions API.
+ *
+ * @ingroup aec_header_file
+ */
+
+/**
+ * @defgroup aec_func     High Level API Functions
+ * @defgroup aec_low_level_func   Low Level API Functions (STILL WIP)
+ */ 
+
+/**
+ * @brief Initialise AEC data structures
+ *
+ * This function initializes AEC data structures for a given configuration.
+ * The configuration parameters num_y_channels, num_x_channels, num_main_filter_phases and num_shadow_filter_phases are
+ * passed in as input arguments.
+ *
+ * This function needs to be called at startup to first initialise the AEC and subsequently whenever the AEC configuration changes.
+ *
+ * @param[inout] main_state               AEC state structure for holding main filter specific state
+ * @param[inout] shadow_state             AEC state structure for holding shadow filter specific state
+ * @param[inout] shared_state             Shared state structure for holding state that is common to main and shadow filter
+ * @param[inout] main_mem_pool            Memory pool containing main filter memory buffers
+ * @param[inout] shadow_mem_pool          Memory pool containing shadow filter memory buffers
+ * @param[in] num_y_channels              Number of mic input channels
+ * @param[in] num_x_channels              Number of reference input channels
+ * @param[in] num_main_filter_phases      Number of phases in the main filter
+ * @param[in] num_shadow_filter_phases    Number of phases in the shadow filter
+ *
+ * `main_state`, `shadow_state` and shared_state structures must start at double word aligned addresses.
+ *
+ * main_mem_pool and shadow_mem_pool must point to memory buffers big enough to support main and shadow filter
+ * processing.  AEC state aec_state_t and shared state aec_shared_state_t structures contain only the BFP data
+ * strctures used in the AEC. The memory these BFP structures will point to needs to be provided by the user in the
+ * memory pool main and shadow filters memory pool. An example memory pool structure is present in aec_memory_pool_t and
+ * aec_shadow_filt_memory_pool_t.
+ *
+ * main_mem_pool and shadow_mem_pool must also start at double word aligned addresses.
+ *
+ * @par Example
+ * @code{.c}
+ *      #include "aec_memory_pool.h"
+        aec_state_t DWORD_ALIGNED main_state;
+        aec_state_t DWORD_ALIGNED shadow_state;
+        aec_shared_state_t DWORD_ALIGNED aec_shared_state;
+        uint8_t DWORD_ALIGNED aec_mem[sizeof(aec_memory_pool_t)];
+        uint8_t DWORD_ALIGNED aec_shadow_mem[sizeof(aec_shadow_filt_memory_pool_t)];
+        unsigned y_chans = 2, x_chans = 2;
+        unsigned main_phases = 10, shadow_phases = 5;
+        // There is one main and one shadow filter per x-y channel pair, so for this example there will be 4 main and 4
+        // shadow filters. Each main filter will have 10 phases and each shadow filter will have 5 phases.
+        aec_init(&main_state, &shadow_state, &shared_state, aec_mem, aec_shadow_mem, y_chans, x_chans, main_phases, shadow_phases);
+ * @endcode
+ *
+ * @ingroup aec_func
+ */
+void aec_init(
+        aec_state_t *main_state,
+        aec_state_t *shadow_state,
+        aec_shared_state_t *shared_state,
+        uint8_t *main_mem_pool,
+        uint8_t *shadow_mem_pool,
+        unsigned num_y_channels,
+        unsigned num_x_channels,
+        unsigned num_main_filter_phases,
+        unsigned num_shadow_filter_phases);
+
+
+/**
+ * @brief Initialise AEC data structures for processing a new frame
+ *
+ * This is the first function that is called when a new frame is available for processing.
+ * It takes the new samples as input and combines the new samples and previous frame's history to create a processing block on which further processing happens.
+ * It also initialises some data structures that need to be initialised at the beginning of a frame.
+ *
+ * @param[inout] main_state main filter state
+ * @param[inout] shadow_state shadow filter state
+ * @param[in] y_data pointer to mic input buffer
+ * @param[in] x_data pointer to reference input buffer
+ *
+ * @note
+ * @parblock
+ * y_data and x_data buffers memory is free to be reused after this function call.
+ * @endparblock
+ *
+ * @ingroup aec_func
+ */
+void aec_frame_init(
+        aec_state_t *main_state,
+        aec_state_t *shadow_state,
+        const int32_t (*y_data)[AEC_FRAME_ADVANCE],
+        const int32_t (*x_data)[AEC_FRAME_ADVANCE]);
+
+/**
+ * @brief Calculate energy in the spectrum
+ *
+ * This function calculates the energy of frequency domain data used in the AEC. Frequency domain data in AEC is in the form of complex 32bit vectors and energy is calculated as the squared magnitude of the input vector.
+ *
+ * @param[out] fd_energy energy of the input spectrum
+ * @param[in] input input spectrum BFP structure
+ *
+ * @ingroup aec_func
+ */
+void aec_calc_freq_domain_energy(
+        float_s32_t *fd_energy,
+        const bfp_complex_s32_t *input);
+
+/**
+ * @brief Calculate exponential moving average (EMA) energy of a time domain (TD) vector
+ *
+ * This function calculates the EMA energy of AEC time domain data which is in the form of real 32bit vectors.
+ *
+ * This function can be called to calculate the EMA energy of subsets of the input vector as well. 
+ *
+ * @param[out] ema_energy   EMA energy of the input
+ * @param[in] input         time domain input BFP structure
+ * @param[in] start_offset  offset in the input vector from where to start calculating EMA energy
+ * @param[in] length        length over which to calculate EMA energy
+ * @param[in] conf          AEC configuration parameters.
+ *
+ * @ingroup aec_func
+ */
+void aec_calc_time_domain_ema_energy(
+        float_s32_t *ema_energy,
+        const bfp_s32_t *input,
+        unsigned start_offset,
+        unsigned length,
+        const aec_config_params_t *conf);
+
+/**
+ * @brief Calculate Discrete Fourier Transform (DFT) spectrum of an input time domain vector.
+ *
+ * This function calculates the spectrum of a real 32bit time domain vector. It calculates an N point real DFT where N is the length of the input vector to output a complex N/2+1 length complex 32bit vector.
+ * The N/2+1 complex output values represent spectrum samples from DC up to the Nyquist frequency.
+ *
+ * The DFT calculation is done in place. After this function call the input and output BFP structures `data` fields point to the same memory.
+ * Since DFT is calculated in place, use of the input BFP struct is undefined after this function.
+ *
+ * @param[out] output    DFT output BFP structure
+ * @param[in] input     DFT input BFP structure
+ *
+ * To allow for inplace transform from N real 32bit values to N/2+1 complex 32bit values, the input vector should have 2 extra real 32bit samples worth of memory.
+ * This means that `input->data` should point to a buffer of length `input->length`+2
+ *
+ * After this function `input->data` and `output->data` point to the same memory address.
+ *
+ * @ingroup aec_func
+ */
+void aec_forward_fft(
+        bfp_complex_s32_t *output,
+        bfp_s32_t *input);
+
+/**
+ * @brief Calculate inverse Discrete Fourier Transform (DFT) of an input spectrum
+ *
+ * This function calculates a N point inverse real DFT of a complex 32bit where N is 2*(length-1) where length is the length of the input vector.
+ * The output is a real 32bit vector of length N.
+ *
+ * The inverse DFT calculation is done in place. After this operation the input and the output BFP structures `data` fields point to the same memory.
+ * Since the calculation is done in place, use of input BFP struct after this function is undefined.
+ *
+ *  @param[out] output inverse DFT output BFP structure
+ *  @param[in] input inverse DFT input BFP structure
+ *
+ *  After this function `input->data` and `output->data` point to the same memory address.
+ *
+ * @ingroup aec_func
+ */
+void aec_inverse_fft(
+        bfp_s32_t *output,
+        bfp_complex_s32_t *input
+        );
+
+/**
+ * @brief Calculate total energy of the X FIFO
+ *
+ * `X FIFO` is a FIFO of the most recent `X` frames, where `X` is spectrum of one frame of reference input.
+ * There's a common X FIFO that is shared between main and shadow filters. It holds `num_main_filter_phases` most recent X frames and the shadow filter uses `num_shadow_filter_phases` most recent frames out of it.
+ *
+ * This function calculates the energy per X sample index summed across the X FIFO phases.
+ * This function also calculates the maximum energy across all samples indices of the output energy vector 
+ * 
+ * @param[inout] state  AEC state. state->X_energy[ch] and state->max_X_energy[ch] are updated 
+ * @param[in]    ch     channel index for which energy calculations are done 
+ * @param[in]    recalc_bin The sample index for which energy is recalculated to eliminate quantisation errors
+ * 
+ * @note
+ * @parblock
+ * This function implements some speed optimisations which introduce quantisation error. To stop quantisation error build up, in every call of this function, energy for one sample index, which is specified in the `recalc_bin` argument, is recalculated without the optimisations. There are a total of AEC_FD_FRAME_LENGTH samples in the energy vector, so recalc_bin keeps cycling through indexes 0 to AEC_PROC_FRAME_LENGTH/2.
+ * @endparblock
+ *
+ * @ingroup aec_func
+ */
+void aec_calc_X_fifo_energy(
+        aec_state_t *state,
+        unsigned ch,
+        unsigned recalc_bin);
+
+/**
+ * @brief Update X FIFO with the current X frame
+ *
+ * This function updates the X FIFO by removing the oldest X frame from it and adding the current X frame to it. 
+ * This function also calculates sigmaXX which is the exponential moving average of the current X frame energy
+ *
+ * @param[inout] state AEC state structure. state->shared_state->X_fifo[ch] and state->shared_state->sigma_XX[ch] are updated.
+ * @param[in] ch X channel index for which to update X FIFO
+ *
+ * @ingroup aec_func
+ */
+void aec_update_X_fifo_and_calc_sigmaXX(
+        aec_state_t *state,
+        unsigned ch);
+
+/**
+ * @brief Calculate error spectrum and estimated mic signal spectrum 
+ *
+ * This function calculates the error spectrum (`Error`) and estimated mic input spectrum (`Y_hat`)
+ * `Y_hat` is calculated as the sum of all phases of the adaptive filter multiplied by the respective phases of the reference input spectrum.
+ * Error is calculated by subtracting `Y_hat` from the mic input spectrum `Y`
+ *
+ * @param[inout] state AEC state structure. state->Error[ch] and state->Y_hat[ch] are updated
+ * @param[in] ch mic channel index for which to compute Error and Y_hat
+ *
+ * @ingroup aec_func
+ */
+void aec_calc_Error_and_Y_hat(
+        aec_state_t *state,
+        unsigned ch);
+
+/**
+ * @brief Calculate coherence
+ *
+ * This function calculates the average coherence between mic input signal (`y`) and estimated mic signal (`y_hat`).
+ * A metric is calcuated using `y` and `y_hat` and the moving average (`coh`) and a slow moving average (`coh_slow`) of that metric is calculated.
+ * The coherence values are used to distinguish between situations when filter adaption should continue or freeze and update mu accordingly.
+ *
+ * @param[inout] state AEC state structure. `state->shared_state->coh_mu_state[ch].coh` and `state->shared_state->coh_mu_state[ch].coh_slow` are updated
+ * @param[in] ch mic channel index for which to calculate average coherence
+ *
+ * @ingroup aec_func
+ */
+void aec_calc_coherence(
+        aec_state_t *state,
+        unsigned ch);
+
+/**
+ * @brief Calculate AEC filter output signal
+ *
+ * This function is responsible for windowing the filter `error` signal and creating AEC filter output that can be propagated to downstream stages.
+ * `output` is calculated by overlapping and adding current frame's windowed error signal with the previous frame windowed error. This is done to smooth discontinuities in the output as the filter adapts.
+ *
+ * @param[inout] state AEC state structure. `state->error[ch]`
+ * @param[out] output pointer to the output buffer
+ * @param[in] ch mic channel index for which to calculate output
+ *
+ * @ingroup aec_func
+ *
+ */
+void aec_calc_output(
+        aec_state_t *state,
+        int32_t (*output)[AEC_FRAME_ADVANCE],
+        unsigned ch);
+
+/**
+ * @brief Calculate normalisation specturm
+ *
+ * This function calculates the normalisation spectrum of the reference input signal. This normalised spectrum is later used during filter adaption to scale the adaption to the size of the input signal.
+ * The normalisation spectrum is calculated as a time and frequency smoothed energy of the reference input spectrum.
+ *
+ * The normalisation spectrum is calculated differetly for main and shadow filter, so a flag indicating whether this calculation is being done for the main or shadow filter is passed as an input to the function
+ *
+ * @param[inout] state AEC state structure. state->inv_X_energy[ch] is updated
+ * @param[in] ch reference channel index for which to calculate normalisation spectrum
+ * @param[in] is_shadow flag indicating filter type. 0: Main filter, 1: Shadow filter
+ *
+ * @ingroup aec_func
+ */
+void aec_calc_normalisation_spectrum(
+        aec_state_t *state,
+        unsigned ch,
+        unsigned is_shadow);
+
+/**
+ * @brief Compare and update filters. Calculate the adaption step size mu.
+ *
+ * This function has 2 reponsibilities. 
+ * First, it compares the energies in the error spectrums of the main and shadow filter with each other and with the mic input spectum energy, and makes an estimate of how well the filters are performing. Based on this, it optionally modifies the filters by either resetting the filter coefficients or copying one filter into another.
+ * Second, it uses the coherence values calculated in aec_calc_coherence as well as information from filter comparison done in step 1 to calculate the adaption step size mu.
+ *
+ * @param[inout] main_state AEC state structure for the main filter
+ * @param[inout] shadow_state AEC state structure for the shadow filter
+ *
+ * @ingroup aec_func
+ */
+void aec_compare_filters_and_calc_mu(
+        aec_state_t *main_state,
+        aec_state_t *shadow_state);
+
+/**
+ * @brief Calculate the parameter `T`
+ *
+ * This function calculates a parameter referred to as `T` that is later used to scale the reference input spectrum in the filter update step.
+ * `T` is a function of the adaption step size `mu`, normalisation spectrum `inv_X_energy` and the filter error spectrum `Error`.
+ * 
+ * @param[inout] state AEC state structure. `state->T[x_ch]` is updated
+ * @param[in] y_ch mic channel index
+ * @param[in] x_ch reference channel index
+ *
+ * @ingroup aec_func
+ */
+void aec_calc_T(
+        aec_state_t *state,
+        unsigned y_ch,
+        unsigned x_ch);
+
+/** @brief Update filter
+ *
+ * This function updates the adaptive filter spectrum (`H_hat'). It calculates the delta update that is applied to the filter by scaling the X FIFO with the T values computed in `aec_compute_T()` and applies the delta update to `H_hat`.
+ * A gradient contraint FFT is then applied to constrain the length of each phase of the filter to avoid wrapping when calculating `y_hat`
+ *
+ * @param[inout] state AEC state structure. `state->H_hat[y_ch]` is updated
+ * @param[in] y_ch mic channel index
+ *
+ * @ingroup aec_func
+ *
+ */
+void aec_filter_adapt(
+        aec_state_t *state,
+        unsigned y_ch);
+
+/** @brief Update the X FIFO alternate BFP structure
+ *
+ * The X FIFO BFP structure is maintained in 2 forms - as a 2 dimensional [x_channels][num_phases] and as a [x_channels * num_phases] 1 dimensional array.
+ * This is done in order to optimally access the X FIFO as needed in different functions.
+ * After the X FIFO is updated with the current X frame, this function is called in order to copy the 2 dimensional BFP structure into it's 1 dimensional counterpart.
+ *
+ * @param[inout] state AEC state structure. `state->X_fifo_1d` is updated
+ *
+ * @ingroup aec_func
+ *
+ */
+void aec_update_X_fifo_1d(
+        aec_state_t *state);
+
+/** @brief Calculate a correlation metric between the microphone input and estimated microphone signal
+ *
+ * This function calculates a metric of resemblence between the mic input and the estimated mic signal. The correlation
+ * metric, along with reference signal energy is used to infer presence of near and far end signals in the AEC mic
+ * input.
+ *
+ * @param[in] state AEC state structure. `state->y` and `state->y_hat` are used to calculate the correlation metric
+ * @param[in] ch mic channel index for which to calculate the metric
+ * @returns correlation metric in float_s32_t format
+ *
+ * @ingroup aec_func
+ *
+ */
+float_s32_t aec_calc_corr_factor(
+        aec_state_t *state,
+        unsigned ch);
+
+/** @brief Calculate the energy of the reference input signal 
+ *
+ * This function calculates the sum of the energy across all samples of the time domain reference input channel and
+ * returns the maximum energy across all channels. 
+ *
+ * @param[in] x_data Pointer to the reference (x) data buffer. The input is assumed to be in Q1.31 fixed point format.
+ * @param[in] num_channels Number of reference input channels.
+ * @returns Maximum reference energy in float_s32_t format.
+ *
+ * @ingroup aec_func
+ *
+ */
+float_s32_t aec_calc_max_ref_energy(
+        const int32_t (*x_data)[AEC_FRAME_ADVANCE],
+        int num_channels);
+
+/// Estimate delay
+/*int aec_estimate_delay (
+        aec_state_t *state);*/
+int aec_estimate_delay (
+        delay_estimator_params_t *de_state,
+        const bfp_complex_s32_t* H_hat, 
+        unsigned num_phases);
+
+
+
+//TODO pending documentation and examples for L2 APIs
+/**
+ * @brief Calculate Error and Y_hat for a channel over a range of bins.
+ *
+ * @ingroup aec_low_level_func
+ */
+void aec_l2_calc_Error_and_Y_hat(
+        bfp_complex_s32_t *Error,
+        bfp_complex_s32_t *Y_hat,
+        const bfp_complex_s32_t *Y,
+        const bfp_complex_s32_t *X_fifo,
+        const bfp_complex_s32_t *H_hat,
+        unsigned num_x_channels,
+        unsigned num_phases,
+        unsigned start_offset,
+        unsigned length,
+        int32_t bypass_enabled);
+
+/**
+ * @brief Adapt one phase of the adaptive filter
+ *
+ * @ingroup aec_low_level_func
+ */
+void aec_l2_adapt_plus_fft_gc(
+        bfp_complex_s32_t *H_hat_ph,
+        const bfp_complex_s32_t *X_fifo_ph,
+        const bfp_complex_s32_t *T_ph
+        );
+
+/**
+ * @brief Unify bfp_complex_s32_t chunks into a single exponent and headroom
+ *
+ * @ingroup aec_low_level_func
+ */
+void aec_l2_bfp_complex_s32_unify_exponent(
+        bfp_complex_s32_t *chunks,
+        int *final_exp,
+        int *final_hr,
+        const int *mapping,
+        int array_len,
+        int desired_index,
+        int min_headroom);
+
+/**
+ * @brief Unify bfp_s32_t chunks into a single exponent and headroom
+ *
+ * @ingroup aec_low_level_func
+ */
+void aec_l2_bfp_s32_unify_exponent(
+        bfp_s32_t *chunks,
+        int *final_exp,
+        int *final_hr,
+        const int *mapping,
+        int array_len,
+        int desired_index,
+        int min_headroom);
+#endif
diff --git a/modules/lib_aec/api/aec_defines.h b/modules/lib_aec/api/aec_defines.h
new file mode 100644
index 000000000..9d4b9178f
--- /dev/null
+++ b/modules/lib_aec/api/aec_defines.h
@@ -0,0 +1,98 @@
+// Copyright 2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#ifndef AEC_DEFINES_H
+#define AEC_DEFINES_H
+
+/**
+ * @page page_aec_defines_h aec_defines.h
+ * 
+ * This header contains lib_aec public defines 
+ *
+ * @ingroup aec_header_file
+ */
+/**
+ * @defgroup aec_defines   AEC #define constants
+ */ 
+
+/** @brief Maximum number of microphone input channels supported in the library.
+ * Microphone input to the AEC refers to the input from the device's microphones from which AEC removes the echo
+ * created in the room by the device's loudpeakers.
+ *
+ * AEC functions follow the convention of using @math{y} and @math{Y} for referring to time domain and frequency domain
+ * representation of microphone input.
+ *
+ * The `num_y_channels` passed into aec_init() call should be less than or equal to AEC_LIB_MAX_Y_CHANNELS.
+ * This define is only used for defining data structures in the aec_state. The library code implementation uses only the
+ * num_y_channels aec is initialised for in the aec_init() call.  
+ *
+ * @ingroup aec_defines
+ */
+#define AEC_LIB_MAX_Y_CHANNELS (2)
+
+/** @brief Maximum number of reference input channels supported in the library.
+ * Reference input to the AEC refers to a copy of the device's speaker output audio that is also sent as an input to the
+ * AEC. It is used to model the echo characteristics between a mic-loudspeaker pair.
+ *
+ * AEC functions follow the convention of using @math{x} and @math{X} for referring to time domain and frequency domain
+ * representation of reference input.
+ *
+ * The `num_x_channels` passed into aec_init() call should be less than or equal to AEC_LIB_MAX_X_CHANNELS.
+ * This define is only used for defining data structures in the aec_state. The library code implementation uses only the
+ * num_x_channels aec is initialised for in the aec_init() call.  
+ *
+ * @ingroup aec_defines
+ */
+#define AEC_LIB_MAX_X_CHANNELS (2)
+
+/** @brief AEC frame size
+ * This is the number of samples of new data that the AEC works on every frame. 240 samples at 16kHz is 15msec. Every
+ * frame, the echo canceller takes in 15msec of mic and reference data and generates 15msec of echo cancelled output.
+ *
+ * @ingroup aec_defines
+ */
+#define AEC_FRAME_ADVANCE (240)
+
+/** Time domain samples block length used internally in AEC's block LMS algorithm
+ *
+ * @ingroup aec_defines
+ */
+#define AEC_PROC_FRAME_LENGTH (512)
+
+/** Number of bins of spectrum data computed when doing a DFT of a AEC_PROC_FRAME_LENGTH length time domain vector. The
+ * AEC_FD_FRAME_LENGTH spectrum values represent the bins from DC to Nyquist.
+ *
+ * @ingroup aec_defines
+ */   
+#define AEC_FD_FRAME_LENGTH ((AEC_PROC_FRAME_LENGTH / 2) + 1)
+
+/** @brief Maximum total number of phases supported in the AEC library 
+ * This is the maximum number of total phases supported in the AEC library. Total phases are calculated by summing
+ * phases across adaptive filters for all x-y pairs.
+ *
+ * For example. for a 2 y-channels, 2 x-channels, 10 phases per x channel configuration, there are 4 adaptive filters,
+ * H_hat<SUB>y0x0</SUB>, H_hat<SUB>y0x1</SUB>, H_hat<SUB>y1x0</SUB> and H_hat<SUB>y1x1</SUB>, each filter having 10
+ * phases, so the total number of phases is 40.
+ * When aec_init() is called to initialise the AEC, the num_y_channels, num_x_channels and num_main_filter_phases
+ * parameters passed in should be such that num_y_channels * num_x_channels * num_main_filter_phases is less than equal
+ * to AEC_LIB_MAX_PHASES. 
+ *
+ * This define is only used when defining data structures within the AEC state structure. The AEC algorithm
+ * implementation uses the num_main_filter_phases and num_shadow_filter_phases values that are passed into aec_init().
+ *
+ * @ingroup aec_defines
+ */
+#define AEC_LIB_MAX_PHASES (AEC_LIB_MAX_Y_CHANNELS * AEC_LIB_MAX_X_CHANNELS * 10)
+
+/** Overlap data length
+ *
+ * @ingroup aec_defines
+ */
+#define UNUSED_TAPS_PER_PHASE (16)
+
+
+#if !PROFILE_PROCESSING
+    #define prof(n, str)
+    #define print_prof(start, end, framenum)
+#endif
+
+#endif
diff --git a/modules/lib_aec/api/aec_state.h b/modules/lib_aec/api/aec_state.h
new file mode 100644
index 000000000..b71bed0d8
--- /dev/null
+++ b/modules/lib_aec/api/aec_state.h
@@ -0,0 +1,362 @@
+// Copyright 2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#ifndef AEC_STATE_H
+#define AEC_STATE_H
+
+#include <stdio.h>
+#include <string.h>
+#include "aec_defines.h"
+#include "bfp_math.h"
+#include "xs3_math.h"
+
+/**
+ * @page page_aec_state_h aec_state.h
+ * 
+ * This header contains definitions for data structures and enums used in lib_aec.
+ *
+ * @ingroup aec_header_file
+ */
+
+/**
+ * @defgroup aec_types   AEC Data Structure and Enum Definitions
+ */ 
+
+/**
+ * @ingroup aec_types
+ */
+typedef enum {
+    AEC_ADAPTION_AUTO, ///< Compute filter adaption config every frame
+    AEC_ADAPTION_FORCE_ON, ///< Filter adaption always ON
+    AEC_ADAPTION_FORCE_OFF, ///< Filter adaption always OFF
+} aec_adaption_e;
+
+/**
+ * @ingroup aec_types
+ */
+typedef enum {
+    LOW_REF = -4,    ///< Not much reference so no point in acting on AEC filter logic
+    ERROR = -3,      ///< something has gone wrong, zero shadow filter
+    ZERO = -2,       ///< shadow filter has been reset multiple times, zero shadow filter
+    RESET = -1,      ///< copy main filter to shadow filter
+    EQUAL = 0,       ///< main filter and shadow filter are similar
+    SIGMA = 1,       ///< shadow filter bit better than main, reset sigma_xx for faster convergence
+    COPY = 2,        ///< shadow filter much better, copy to main
+}shadow_state_e;
+
+/**
+ * @ingroup aec_types
+ */
+typedef struct {
+    /** Update rate of `coh`.*/
+    float_s32_t coh_alpha;
+    /** Update rate of `coh_slow`.*/
+    float_s32_t coh_slow_alpha;
+    /** Adaption frozen if coh below (coh_thresh_slow*coh_slow)*/
+    float_s32_t coh_thresh_slow;
+    /** Adaption frozen if coh below coh_thresh_abs.*/
+    float_s32_t coh_thresh_abs;
+    /** Scalefactor for scaling the calculated mu.*/
+    float_s32_t mu_scalar;
+    /** Parameter to avoid divide by 0 in coh calculation.*/
+    float_s32_t eps;
+    /** -20dB threshold*/
+    float_s32_t thresh_minus20dB;
+    /** X_energy threshold used for determining if the signal has enough reference energy for sensible coherence mu calculation*/ 
+    float_s32_t x_energy_thresh;
+    /** Number of frames after low coherence, adaption frozen for.*/
+    unsigned mu_coh_time;
+    /** Number of frames after shadow filter use, the adaption is fast for*/
+    unsigned mu_shad_time;
+    /** Filter adaption mode. Auto, force ON or force OFF*/
+    aec_adaption_e adaption_config;
+    /** Fixed mu value used when filter adaption is forced ON*/
+    int32_t force_adaption_mu_q30;
+} coherence_mu_config_params_t;
+
+/**
+ * @ingroup aec_types
+ */
+typedef struct {
+    /** threshold for resetting sigma_XX.*/
+    float_s32_t shadow_sigma_thresh;
+    /** threshold for copying shadow filter.*/
+    float_s32_t shadow_copy_thresh;
+    /** threshold for resetting shadow filter.*/
+    float_s32_t shadow_reset_thresh;
+    /** threshold for turning off shadow filter reset if reference delay is large*/
+    float_s32_t shadow_delay_thresh;
+    /** X energy threshold used for deciding whether the system has enough reference energy for main and shadow filter
+     * comparison to make sense*/
+    float_s32_t x_energy_thresh;
+    /** fixed mu value used during shadow filter adaption.*/
+    float_s32_t shadow_mu;
+    /** Number of times shadow filter needs to be better before it gets copied to main filter.*/
+    int32_t shadow_better_thresh;
+    /** Number of times shadow filter is reset by copying the main filter to it before it gets zeroed.*/
+    int32_t shadow_zero_thresh;
+    /** Number of frames between zeroing resets of shadow filter.*/
+    int32_t shadow_reset_timer;
+}shadow_filt_config_params_t;
+
+/**
+ * @ingroup aec_types
+ */
+typedef struct {
+    /** bypass AEC flag.*/
+    int bypass;
+    /** parameter for deriving the gamma value that used in normalisation spectrum calculation. gamma is calculated as
+     * 2^gamma_log2*/
+    int gamma_log2;
+    /** parameter used for deriving the alpha value used while calculating EMA of X_energy to calculate sigma_XX.*/
+    uint32_t sigma_xx_shift;
+    /** delta value used in normalisation spectrum computation when adaption is forced as always ON.*/
+    float_s32_t delta_adaption_force_on;
+    /** Lower limit of delta computed using fractional regularisation.*/
+    float_s32_t delta_min;
+    /** coefficient index used to track H_hat index when sending H_hat values over the host control interface.*/
+    uint32_t coeff_index;
+    /** alpha used while calculating y_ema_energy, x_ema_energy and error_ema_energy.*/
+    fixed_s32_t ema_alpha_q30;
+}aec_core_config_params_t;
+
+/**
+ * @brief AEC control parameters.
+ *
+ * This structure contains control parameters that the user can modify at run time.
+ * @ingroup aec_types
+ */
+typedef struct {
+    /** Coherence mu related control params.*/
+    coherence_mu_config_params_t coh_mu_conf;
+    /** Shadow filter related control params.*/
+    shadow_filt_config_params_t shadow_filt_conf;
+    /** All AEC control params except those for coherence mu and shadow filter.*/
+    aec_core_config_params_t aec_core_conf;
+}aec_config_params_t;
+
+/**
+ * @ingroup aec_types
+ */
+typedef struct {
+    float_s32_t coh; ///< Moving average coherence
+    float_s32_t coh_slow; ///< Slow moving average coherence
+
+    int32_t mu_coh_count; ///< Counter for tracking number of frames coherence has been low for.
+    int32_t mu_shad_count; ///< Counter for tracking number of frames shadow filter has been used in
+    float_s32_t coh_mu[AEC_LIB_MAX_X_CHANNELS]; ///< Coherence mu
+}coherence_mu_params_t;
+
+
+/**
+ * @ingroup aec_types
+ */
+typedef struct {
+    int32_t shadow_flag[AEC_LIB_MAX_Y_CHANNELS]; ///< shadow_state_e enum indicating shadow filter status
+    int shadow_reset_count[AEC_LIB_MAX_Y_CHANNELS]; ///< counter for tracking shadow filter resets
+    int shadow_better_count[AEC_LIB_MAX_Y_CHANNELS]; ///< counter for tracking shadow filter copy to main filter
+}shadow_filter_params_t;
+
+/**
+ * @ingroup aec_types
+ */
+typedef struct {
+    int32_t peak_power_phase_index; ///< H_hat phase index with the maximum energy
+    float_s32_t peak_phase_power; ///< Maximum energy across all H_hat phases
+    float_s32_t sum_phase_powers; ///< Sum of filter energy across all filter phases. Used in peak_to_average_ratio calculation. 
+    float_s32_t peak_to_average_ratio; ///< peak to average ratio of H_hat phase energy.
+    float_s32_t phase_power[AEC_LIB_MAX_PHASES]; ///< Energy for every H_hat phase
+}delay_estimator_params_t;
+
+
+/**
+ * @brief AEC shared state structure.
+ *
+ * Data structures holding AEC persistant state that is common between main filter and shadow filter.
+ * aec_state_t::shared_state for both main and shadow filter point to the common aec_shared_t structure.
+ *
+ * @ingroup aec_types
+ */
+ //! [aec_shared_state_t]
+typedef struct {
+    /** BFP array pointing to the reference input spectrum phases. The term \b phase refers to the spectrum data for a
+     * frame. Multiple phases means multiple frames of data.
+     *
+     * For example, 10 phases would mean the 10 most recent frames of data.
+     * Each phase spectrum, pointed to by X_fifo[i][j]->data is stored as a length AEC_FD_FRAME_LENGTH, complex 32bit
+     * array.
+     *
+     * The phases are ordered from most recent to least recent in the X_fifo. For example, for an AEC configuration of 2
+     * x-channels and 10 phases per x channel, 10 frames of X data spectrum is stored in the X_fifo. For a given x
+     * channel, say x channel 0, X_fifo[0][0] points to the most recent frame's X spectrum and X_fifo[0][9] points to
+     * the last phase, i.e the least recent frame's X spectrum.*/
+    bfp_complex_s32_t X_fifo[AEC_LIB_MAX_X_CHANNELS][AEC_LIB_MAX_PHASES];
+
+    /** BFP array pointing to reference input signal spectrum. The X data values are stored as a length
+     * AEC_FD_FRAME_LENGTH complex 32bit array per x channel.*/
+    bfp_complex_s32_t X[AEC_LIB_MAX_X_CHANNELS];
+
+    /** BFP array pointing to mic input signal spectrum. The Y data values are stored as a length
+     * AEC_FD_FRAME_LENGTH complex 32bit array per y channel.*/
+    bfp_complex_s32_t Y[AEC_LIB_MAX_Y_CHANNELS];
+    
+    /** BFP array pointing to time domain mic input processing block. The y data values are stored as length
+     * AEC_PROC_FRAME_LENGTH, 32bit integer array per y channel.*/
+    bfp_s32_t y[AEC_LIB_MAX_Y_CHANNELS];
+
+    /** BFP array pointing to time domain reference input processing block. The x data values are stored as length
+     * AEC_PROC_FRAME_LENGTH, 32bit integer array per x channel.*/
+    bfp_s32_t x[AEC_LIB_MAX_X_CHANNELS];
+
+    /** BFP array pointing to time domain mic input values from the previous frame. These are put together with the new
+     * samples received in the current frame to make a AEC_PROC_FRAME_LENGTH processing block. The prev_y data values
+     * are stored as length (AEC_PROC_FRAME_LENGTH - AEC_FRAME_ADVANCE), 32bit integer array per y channel.*/
+    bfp_s32_t prev_y[AEC_LIB_MAX_Y_CHANNELS];
+
+    /** BFP array pointing to time domain reference input values from the previous frame. These are put together with
+     * the new samples received in the current frame to make a AEC_PROC_FRAME_LENGTH processing block. The prev_x data
+     * values are stored as length (AEC_PROC_FRAME_LENGTH - AEC_FRAME_ADVANCE), 32bit integer array per x channel.*/
+    bfp_s32_t prev_x[AEC_LIB_MAX_X_CHANNELS];
+    
+    /** BFP array pointing to sigma_XX values which are the weighted average of the X_energy signal. The sigma_XX data
+     * is stored as 32bit integer array of length AEC_FD_FRAME_LENGTH*/
+    bfp_s32_t sigma_XX[AEC_LIB_MAX_X_CHANNELS];
+
+    /** Exponential moving average of the time domain mic signal energy. This is calculated by calculating energy
+     * per sample and summing across all samples. Stored in a y channels array
+     * with every value stored as a 32bit integer mantissa and exponent.*/
+    float_s32_t y_ema_energy[AEC_LIB_MAX_Y_CHANNELS];
+
+    /** Exponential moving average of the time domain reference signal energy. This is calculated by calculating energy
+     * per sample and summing across all samples. Stored in a x channels array with every value stored as a 32bit
+     * integer mantissa and exponent.*/
+    float_s32_t x_ema_energy[AEC_LIB_MAX_X_CHANNELS];
+
+    /** Energy of the mic input spectrum. This is calculated by calculating the energy per bin and summing across all
+     * bins. Stored in a y channels array with every value stored as a 32bit integer mantissa and exponent.*/
+    float_s32_t overall_Y[AEC_LIB_MAX_Y_CHANNELS];
+
+    /** Sum of the X_energy across all bins for a given x channel. Stored in a x channels array with every value stored
+     * as a 32bit integer mantissa and exponent.*/ 
+    float_s32_t sum_X_energy[AEC_LIB_MAX_X_CHANNELS]; 
+    
+    /** Structure containing coherence mu calculation related parameters.*/
+    coherence_mu_params_t coh_mu_state[AEC_LIB_MAX_Y_CHANNELS];
+
+    /** Structure containing shadow filter related parameters.*/
+    shadow_filter_params_t shadow_filter_params;
+
+    /** Structure containg delay estimator related parameters.*/
+    delay_estimator_params_t delay_estimator_params;
+
+    /** Structure containing AEC control parameters. These are initialised to the default values and can be changed at
+     * runtime by the user.*/
+    aec_config_params_t config_params;
+
+    /** Number of mic input channels that the AEC is configured for. This is the input parameter num_y_channels that
+     * aec_init() gets called with.*/
+    unsigned num_y_channels;
+
+    /** Number of reference input channels that the AEC is configured for. This is the input parameter num_x_channels that
+     * aec_init() gets called with.*/
+    unsigned num_x_channels;
+}aec_shared_state_t;
+//! [aec_shared_state_t]
+
+/**
+ * @brief AEC state structure.
+ *
+ * Data structures holding AEC persistant state. There are 2 instances of aec_state_t maintained within AEC; one for
+ * main filter and one for shadow filter specific state.
+ *
+ * @ingroup aec_types
+ */
+//! [aec_state_t]
+typedef struct {
+    /** BFP array pointing to estimated mic signal spectrum. The Y_data data values are stored as length
+     * AEC_FD_FRAME_LENGTH, complex 32bit array per y channel.*/
+    bfp_complex_s32_t Y_hat[AEC_LIB_MAX_Y_CHANNELS];
+
+    /** BFP array pointing to adaptive filter error signal spectrum. The Error data is stored as length
+     * AEC_FD_FRAME_LENGTH, complex 32bit array per y channel.*/
+    bfp_complex_s32_t Error[AEC_LIB_MAX_Y_CHANNELS];
+
+    /** BFP array pointing to the adaptive filter spectrum.
+     * The filter spectrum is stored as a num_y_channels x total_phases_across_all_x_channels array where each H_hat[i][j]
+     * entry points to the spectrum of a single phase.
+     *
+     * Number of phases in the filter refers to its tail length. A filter with more phases would be able to model a longer
+     * echo thereby causing better echo cancellation.
+     *
+     * For example, for a 2 y-channels, 3 x-channels, 10 phases per x channel configuration,
+     * the filter spectrum phases are stored in a 2x30 array. For a given y channel, say y channel 0, H_hat[0][0] to
+     * H_hat[0][9] points to 10 phases of H_hat<SUB>y0x0</SUB>, H_hat[0][10] to H_hat[0][19] points to 10 phases of
+     * H_hat<SUB>y0x1</SUB> and H_hat[0][20] to H_hat[0][29] points to 10 phases of H_hat<SUB>y0x2</SUB>.
+     *
+     * Each filter phase data which is pointed to by H_hat[i][j].data is stored as AEC_FD_FRAME_LENGTH complex 32bit
+     * array.*/
+    bfp_complex_s32_t H_hat[AEC_LIB_MAX_Y_CHANNELS][AEC_LIB_MAX_PHASES];
+
+    /** BFP array pointing to all phases of reference input spectrum across all x channels. Here, the reference input
+     * spectrum is saved in a 1 dimensional array of phases, with x channel 0 phases followed by x channel 1 phases and
+     * so on.  For example, for a 2 x-channels, 10 phases per x channel configuration, X_fifo_1d[0] to X_fifo_1d[9]
+     * points to the 10 phases for channel 0 and X_fifo[10] to X_fifo[19] points to the 10 phases for channel 1.
+     *
+     * Each X data spectrum phase pointed to by X_fifo_1d[i][j].data is stored as length AEC_FD_FRAME_LENGTH complex
+     * 32bit array.*/
+    bfp_complex_s32_t X_fifo_1d[AEC_LIB_MAX_PHASES];
+
+    /** BFP array pointing to T values which are stored as a length AEC_FD_FRAME_LENGTH, complex array per x channel.*/ 
+    bfp_complex_s32_t T[AEC_LIB_MAX_X_CHANNELS]; 
+
+    /** BFP array pointing to the normalisation spectrum which are stored as a length AEC_FD_FRAME_LENGTH, 32bit
+     * integer array per x channel.*/ 
+    bfp_s32_t inv_X_energy[AEC_LIB_MAX_X_CHANNELS];
+
+    /** BFP array pointing to the X_energy data which is the energy per bin of the X spectrum summed over all phases of
+     * the X data. X_energy data is stored as a length AEC_FD_FRAME_LENGTH, integer 32bit array per x channel.*/
+    bfp_s32_t X_energy[AEC_LIB_MAX_X_CHANNELS];
+
+    /** BFP array pointing to time domain overlap data values which are used in the overlap add operation done while
+     * calculating the echo canceller time domain output. Stored as a length 32, 32 bit integer array per y channel.*/
+    bfp_s32_t overlap[AEC_LIB_MAX_Y_CHANNELS];
+
+    /** BFP array pointing to the time domain estimated mic signal. Stored as length AEC_PROC_FRAME_LENGTH, 32 bit
+     * integer array per y channel.*/ 
+    bfp_s32_t y_hat[AEC_LIB_MAX_Y_CHANNELS];
+
+    /** BFP array pointing to the time domain adaptive filter error signal. Stored as length AEC_PROC_FRAME_LENGTH, 32 bit
+     * integer array per y channel.*/ 
+    bfp_s32_t error[AEC_LIB_MAX_Y_CHANNELS];
+
+    /** mu values for every x-y pair stored as 32 bit integer mantissa and 32 bit integer exponent*/
+    float_s32_t mu[AEC_LIB_MAX_Y_CHANNELS][AEC_LIB_MAX_X_CHANNELS];
+
+    /** Exponential moving average of the time domain adaptive filter error signal energy. Stored in an x channels array
+     * with every value stored as a 32bit integer mantissa and exponent.*/
+    float_s32_t error_ema_energy[AEC_LIB_MAX_Y_CHANNELS];
+
+    /** Energy of the adaptive filter error spectrum. Stored in a y channels array with every value stored as a 32bit
+     * integer mantissa and exponent.*/
+    float_s32_t overall_Error[AEC_LIB_MAX_Y_CHANNELS];
+
+    /** Maximum X energy across all values of X_energy for a given x channel. Stored in an x channels array with every
+     * value stored as a 32bit integer mantissa and exponent.*/
+    float_s32_t max_X_energy[AEC_LIB_MAX_X_CHANNELS];
+    
+    /** fractional regularisation scalefactor.*/
+    float_s32_t delta_scale;
+
+    /** delta parameter used in the normalisation spectrum calculation.*/
+    float_s32_t delta; 
+    
+    /** pointer to the state data shared between main and shadow filter.*/
+    aec_shared_state_t *shared_state;
+    
+    /** Number of filter phases per x-y pair that AEC filter is configured for. This is the input argument
+     * num_main_filter_phases or num_shadow_filter_phases, depending on which filter the aec_state_t is instantiated
+     * for, passed in aec_init() call.*/
+    unsigned num_phases; 
+}aec_state_t;
+//! [aec_state_t]
+
+#endif
diff --git a/modules/lib_aec/doc/.gitignore b/modules/lib_aec/doc/.gitignore
new file mode 100644
index 000000000..fca726c76
--- /dev/null
+++ b/modules/lib_aec/doc/.gitignore
@@ -0,0 +1,4 @@
+
+_doxygen/
+_build/
+_templates/
\ No newline at end of file
diff --git a/modules/lib_aec/doc/index.rst b/modules/lib_aec/doc/index.rst
new file mode 100755
index 000000000..a126024cd
--- /dev/null
+++ b/modules/lib_aec/doc/index.rst
@@ -0,0 +1,34 @@
+Acoustic Echo Canceller Library
+================================
+
+Introduction
+************
+
+``lib_aec`` is a library of functions for performing Acoustic Echo Cancellation on input data.
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Contents:
+    
+   src/getting_started
+   src/overview
+   src/reference/index
+
+
+On GitHub
+---------
+
+``lib_aec`` is present as part of ``sw_avona``. Get the latest version of ``sw_avona`` from
+``https://github.com/xmos/sw_avona``. ``lib_aec`` is present within the `modules/lib_aec` directory in ``sw_avona``
+
+API
+---
+
+To use the functions in this library in an application, include :ref:`aec_api_h` in the application source file
+
+
+Indices and tables
+******************
+
+* :ref:`genindex`
+* :ref:`search`
diff --git a/modules/lib_aec/doc/src/getting_started.rst b/modules/lib_aec/doc/src/getting_started.rst
new file mode 100644
index 000000000..5e23ce2f8
--- /dev/null
+++ b/modules/lib_aec/doc/src/getting_started.rst
@@ -0,0 +1,56 @@
+Getting Started
+===============
+
+Overview
+--------
+
+``lib_aec`` is a library which provides functions that can be put together to perform Acoustic Echo Cancellation (AEC)
+on input mic data using the input reference data to model the room echo characteristics. ``lib_aec`` library functions
+make use of functionality provided in ``lib_xs3_math`` to perform DSP operations. For more details refer to
+:ref:`aec_overview`.
+
+Repository Structure
+--------------------
+
+* ``modules/lib_aec`` - The actual ``lib_aec`` library directory within ``https://github.com/xmos/sw_avona/``. Within ``lib_aec``
+
+  * ``api/`` - Headers containing the public API for ``lib_aec``.
+  * ``doc/`` - Library documentation source (for non-embedded documentation) and build directory.
+  * ``src/`` - Library source code.
+
+
+Requirements
+------------
+
+``lib_aec`` is included as part of the ``sw_avona`` github repository
+and all requirements for cloning and building ``sw_avona`` apply. ``lib_aec`` is compiled as a static library as part of
+overall ``sw_avona`` build. It depends on `lib_xs3_math <https://github.com/xmos/lib_xs3_math/>`_.
+
+API Structure
+-------------
+
+The API can be categorised into high level and low level functions.
+
+High level API has fewer input arguments and is simpler. However, it provides limited options for calling functions in parallel
+across multiple threads. Keeping API simplicity in mind, most of the high level API functions accept a pointer to the AEC state
+structure as an input and modify the relevant part of the AEC state. API and example documentation provides more
+details about the fields within the state modified when calling a given function. High level API functions allow
+2 levels of parallelism:
+
+* Single level of parallelism where for a given function, main and shadow filter processing can happen in parallel.
+* Two levels of parallelism where a for a given function, processing across multiple channels as well as main and shadow filter can be done in parallel.
+
+Low level API has more input arguments but allows more freedom for running in parallel across multiple threads. Low
+level API function names begin with a ``aec_l2_`` prefix. 
+Depending on the low level API used, functions can be run in parallel to work over a range of bins or a range of phases.
+This API is still a work in progess and will be fully supported in the future.
+
+Getting and Building
+####################
+
+This repo is got as part of the parent ``sw_avona`` repo clone. It is compiled as a static library as part of sw_avona
+compilation process.
+
+To include ``lib_aec`` in an application as a static library, the generated ``lib_aec.a`` can then be linked into the
+application. Be sure to also add ``lib_aec/api`` as an include directory for the application.
+
diff --git a/modules/lib_aec/doc/src/overview.rst b/modules/lib_aec/doc/src/overview.rst
new file mode 100644
index 000000000..9faeea0a8
--- /dev/null
+++ b/modules/lib_aec/doc/src/overview.rst
@@ -0,0 +1,64 @@
+.. _aec_overview:
+
+AEC Overview
+~~~~~~~~~~~~
+
+The lib_aec library provides functions that can be put together to
+perform Automatic Echo Cancellation on input microphone data by using
+input reference data to model the echo characteristics of the room.
+
+The echo canceller takes in one or more channels of microphone (mic)
+input and one or more channels of reference input data. The mic input is
+the input captured by the device microphones. Reference input is the
+audio that is played out of the device speakers. The echo canceller uses
+the reference input to model the room echo characteristics for each
+mic-loudspeaker pair and outputs an echo cancelled version of the mic
+input. AEC uses adaptive filters, one per mic-speaker pair to constantly
+remove echo from the the mic input. The filters continually adapt to the
+acoustic environment to accommodate changes in the room created by
+events such as doors opening or closing and people moving about.
+
+Echo cancellation is performed on a frame by frame basis. Each frame is
+made of 15msec chunks of data, which is 240 samples at 16kHz input
+sampling frequency, per input channel. For example, for a 2 mic channel
+and 2 reference channel input configuration, an input frame is made of
+2x240 samples of mic data and 2x240 samples of reference data. Input
+data is expected to be in fixed point 32bit 1.31 format. Further, in
+this example, there will be a total of 4 adaptive filters;
+:math:`\hat{H}_{y0x0}`, :math:`\hat{H}_{y0x1}`, :math:`\hat{H}_{y1x0}`
+and :math:`\hat{H}_{y1x1}`, monitoring the echo seen in mic channel 0
+from reference channel 0 and 1 and echo seen in mic channel 1 from
+reference channel 0 and 1.
+
+Microphone data is referred to as :math:`y` when in time domain and
+:math:`Y` when in frequency domain. In general throughout the code,
+names starting with lower case represent time domain and those beginning
+with upper case represent frequency domain. For example :math:`error` is
+the filter error and :math:`Error` is the spectrum of the filter error.
+Reference input is referred to as :math:`x` in time domain and :math:`X`
+when in frequency domain. Filter is referred to as :math:`\hat{h}` in
+time domain and :math:`\hat{H}` in frequency domain.
+
+A filter has multiple phases. The term phases refers to the tail length
+of the filter. A filter with more phases or a longer tail length will be
+able to model a more reverberant room response leading to better echo
+cancellation.
+
+There are 2 types of adaptive filters used in the AEC. These are
+referred to as main filter and shadow filter. The main filter as the
+name suggests is the main filter that is used to generate the echo
+cancelled output of the AEC. Shadow filter is a filter that used to
+quickly detect and respond to changes in the room transfer function.
+There is one main filter and one shadow filter per :math:`x`-:math:`y`
+pair. Typically the main filter has more phases than the shadow filter.
+Fewer phases in the shadow filter enable it to rapidly detect and
+respond to changes while more phases in main filter lead to deeper
+convergence and hence better echo cancellation at the AEC output.
+
+Before starting AEC processing or every time there’s a configuration
+change, the user needs to call aec_init() to initialise the echo
+canceller for a desired configuration. Once the AEC is initialised, the
+library functions can be called in a logical order to perform echo
+cancellation on a frame by frame basis. Refer to the aec_1_thread and
+aec_2_threads examples to see how the functions are called to perform
+echo cancellation using one thread or 2 threads.
diff --git a/modules/lib_aec/doc/src/reference/api/high_level_api.rst b/modules/lib_aec/doc/src/reference/api/high_level_api.rst
new file mode 100644
index 000000000..35f657be3
--- /dev/null
+++ b/modules/lib_aec/doc/src/reference/api/high_level_api.rst
@@ -0,0 +1,8 @@
+.. _aec_func:
+
+AEC High Level API Functions
+=====================================
+
+.. doxygengroup:: aec_func 
+    :content-only:
+
diff --git a/modules/lib_aec/doc/src/reference/api/index.rst b/modules/lib_aec/doc/src/reference/api/index.rst
new file mode 100644
index 000000000..b78a7f13c
--- /dev/null
+++ b/modules/lib_aec/doc/src/reference/api/index.rst
@@ -0,0 +1,9 @@
+########################
+AEC API
+########################
+
+.. toctree::
+
+    high_level_api
+    low_level_api
+
diff --git a/modules/lib_aec/doc/src/reference/api/low_level_api.rst b/modules/lib_aec/doc/src/reference/api/low_level_api.rst
new file mode 100644
index 000000000..d459ae026
--- /dev/null
+++ b/modules/lib_aec/doc/src/reference/api/low_level_api.rst
@@ -0,0 +1,7 @@
+.. _aec_low_level_func:
+
+AEC Low Level API Functions (STILL WIP)
+=========================================
+
+.. doxygengroup:: aec_low_level_func 
+    :content-only:
diff --git a/modules/lib_aec/doc/src/reference/defines.rst b/modules/lib_aec/doc/src/reference/defines.rst
new file mode 100644
index 000000000..6e1182e46
--- /dev/null
+++ b/modules/lib_aec/doc/src/reference/defines.rst
@@ -0,0 +1,8 @@
+.. _aec_defines:
+
+AEC #define constants
+========================================
+
+.. doxygengroup:: aec_defines
+    :members:
+    :content-only:
diff --git a/modules/lib_aec/doc/src/reference/header_files.rst b/modules/lib_aec/doc/src/reference/header_files.rst
new file mode 100644
index 000000000..5b4322ed4
--- /dev/null
+++ b/modules/lib_aec/doc/src/reference/header_files.rst
@@ -0,0 +1,31 @@
+###########################
+`lib_aec` Header Files
+###########################
+
+.. _aec_defines_h:
+
+`aec_defines.h`
+---------------
+
+.. doxygenpage:: page_aec_defines_h
+  :content-only:
+
+.. _aec_state_h:
+
+`aec_state.h`
+-------------
+
+.. doxygenpage:: page_aec_state_h
+  :content-only:
+
+
+.. _aec_api_h:
+
+`aec_api.h`
+------------
+
+.. doxygenpage:: page_aec_api_h
+  :content-only:
+
+
+
diff --git a/modules/lib_aec/doc/src/reference/index.rst b/modules/lib_aec/doc/src/reference/index.rst
new file mode 100644
index 000000000..447a4929b
--- /dev/null
+++ b/modules/lib_aec/doc/src/reference/index.rst
@@ -0,0 +1,16 @@
+#############
+API Reference
+#############
+
+.. toctree::
+    :maxdepth: 2
+
+    types
+
+
+.. toctree::
+    :maxdepth: 1
+    
+    defines
+    api/index
+    header_files
diff --git a/modules/lib_aec/doc/src/reference/types.rst b/modules/lib_aec/doc/src/reference/types.rst
new file mode 100644
index 000000000..74eaf8215
--- /dev/null
+++ b/modules/lib_aec/doc/src/reference/types.rst
@@ -0,0 +1,8 @@
+.. _aec_types:
+
+AEC Data Structure and Enum Definitions
+========================================
+
+.. doxygengroup:: aec_types
+    :members:
+    :content-only:
diff --git a/modules/lib_aec/lib_aec.cmake b/modules/lib_aec/lib_aec.cmake
new file mode 100644
index 000000000..cac6cda2f
--- /dev/null
+++ b/modules/lib_aec/lib_aec.cmake
@@ -0,0 +1,9 @@
+## Source files
+file( GLOB_RECURSE    LIB_AEC_C_SOURCES       src/*.c )
+
+## set LIB_AEC_INCLUDES & LIB_AEC_SOURCES
+set( LIB_AEC_INCLUDES     "${CMAKE_CURRENT_LIST_DIR}/api"           )
+
+unset(LIB_AEC_SOURCES)
+list( APPEND  LIB_AEC_SOURCES   ${LIB_AEC_C_SOURCES}    )
+
diff --git a/modules/lib_aec/src/aec_delay_estimator.c b/modules/lib_aec/src/aec_delay_estimator.c
new file mode 100644
index 000000000..aae8aa4f2
--- /dev/null
+++ b/modules/lib_aec/src/aec_delay_estimator.c
@@ -0,0 +1,81 @@
+// Copyright 2019-2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#include "aec_defines.h"
+#include "aec_api.h"
+
+#if 0
+int aec_estimate_delay (
+        aec_state_t *state)
+{
+    //Direct manipulation of mant/exp because double_to_float_s32(0.0) takes hundreds of cycles
+    const float_s32_t zero = {0, 0};
+    const float_s32_t one = {1, 0};
+
+    float_s32_t peak_fd_power = zero;
+    int32_t peak_power_phase_index = 0;
+    state->shared_state->delay_estimator_params.sum_phase_powers = zero;
+    
+    for(int ch=0; ch<1; ch++) { //estimate delay for the first y-channel
+        for(int ph=0; ph<state->num_phases; ph++) { //compute delay over 1 x-y pair phases
+            float_s32_t phase_power;
+            aec_calc_freq_domain_energy(&phase_power, &state->H_hat[ch][ph]);
+            state->shared_state->delay_estimator_params.phase_power[ph] = phase_power;
+            state->shared_state->delay_estimator_params.sum_phase_powers = float_s32_add(state->shared_state->delay_estimator_params.sum_phase_powers, phase_power);
+            if(float_s32_gt(phase_power, peak_fd_power)) {
+                peak_fd_power = phase_power;
+                peak_power_phase_index = ph;
+            }
+        }
+    }
+    state->shared_state->delay_estimator_params.peak_phase_power = peak_fd_power;
+    state->shared_state->delay_estimator_params.peak_power_phase_index = peak_power_phase_index;
+
+    if(float_s32_gt(state->shared_state->delay_estimator_params.sum_phase_powers, zero)){
+        float_s32_t num_phases_s32 = {state->num_phases, 0};
+        state->shared_state->delay_estimator_params.peak_to_average_ratio = 
+                float_s32_div(float_s32_mul(peak_fd_power, num_phases_s32), state->shared_state->delay_estimator_params.sum_phase_powers);
+    }else{
+        state->shared_state->delay_estimator_params.peak_to_average_ratio = one;
+    }
+
+    return AEC_FRAME_ADVANCE * peak_power_phase_index;
+}
+#endif
+
+int aec_estimate_delay (
+        delay_estimator_params_t *de_state,
+        const bfp_complex_s32_t* H_hat, 
+        unsigned num_phases)
+{
+    //Direct manipulation of mant/exp because double_to_float_s32(0.0) takes hundreds of cycles
+    const float_s32_t zero = {0, 0};
+    const float_s32_t one = {1, 0};
+
+    float_s32_t peak_fd_power = zero;
+    int32_t peak_power_phase_index = 0;
+    de_state->sum_phase_powers = zero;
+    
+    for(int ph=0; ph<num_phases; ph++) { //compute delay over 1 x-y pair phases
+        float_s32_t phase_power;
+        aec_calc_freq_domain_energy(&phase_power, &H_hat[ph]);
+        de_state->phase_power[ph] = phase_power;
+        de_state->sum_phase_powers = float_s32_add(de_state->sum_phase_powers, phase_power);
+        if(float_s32_gt(phase_power, peak_fd_power)) {
+            peak_fd_power = phase_power;
+            peak_power_phase_index = ph;
+        }
+    }
+    de_state->peak_phase_power = peak_fd_power;
+    de_state->peak_power_phase_index = peak_power_phase_index;
+
+    if(float_s32_gt(de_state->sum_phase_powers, zero)){
+        float_s32_t num_phases_s32 = {num_phases, 0};
+        de_state->peak_to_average_ratio = 
+                float_s32_div(float_s32_mul(peak_fd_power, num_phases_s32), de_state->sum_phase_powers);
+    }else{
+        de_state->peak_to_average_ratio = one;
+    }
+
+    return AEC_FRAME_ADVANCE * peak_power_phase_index;
+}
diff --git a/modules/lib_aec/src/aec_impl.c b/modules/lib_aec/src/aec_impl.c
new file mode 100644
index 000000000..e92bb64ef
--- /dev/null
+++ b/modules/lib_aec/src/aec_impl.c
@@ -0,0 +1,396 @@
+// Copyright 2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include "aec_defines.h"
+#include "aec_api.h"
+#include "aec_priv.h"
+
+void aec_init(
+        aec_state_t *main_state,
+        aec_state_t *shadow_state,
+        aec_shared_state_t *shared_state,
+        uint8_t *main_mem_pool,
+        uint8_t *shadow_mem_pool,
+        unsigned num_y_channels,
+        unsigned num_x_channels,
+        unsigned num_main_filter_phases,
+        unsigned num_shadow_filter_phases) {
+
+    aec_priv_main_init(main_state, shared_state, main_mem_pool, num_y_channels, num_x_channels, num_main_filter_phases);
+    aec_priv_shadow_init(shadow_state, shared_state, shadow_mem_pool, num_shadow_filter_phases);
+}
+
+void aec_frame_init(
+        aec_state_t *main_state,
+        aec_state_t *shadow_state,
+        const int32_t (*y_data)[AEC_FRAME_ADVANCE],
+        const int32_t (*x_data)[AEC_FRAME_ADVANCE])
+{
+    unsigned num_y_channels = main_state->shared_state->num_y_channels;
+    unsigned num_x_channels = main_state->shared_state->num_x_channels;
+
+    // y frame 
+    for(unsigned ch=0; ch<num_y_channels; ch++) {
+        /* Create 512 samples frame */
+        // Copy previous y samples
+        memcpy(main_state->shared_state->y[ch].data, main_state->shared_state->prev_y[ch].data, (AEC_PROC_FRAME_LENGTH - AEC_FRAME_ADVANCE)*sizeof(int32_t));
+        // Copy current y samples
+        memcpy(&main_state->shared_state->y[ch].data[AEC_PROC_FRAME_LENGTH - AEC_FRAME_ADVANCE], &y_data[ch][0], (AEC_FRAME_ADVANCE)*sizeof(int32_t));
+        // Update exp just in case
+        main_state->shared_state->y[ch].exp = -31;
+        // Update headroom
+        bfp_s32_headroom(&main_state->shared_state->y[ch]);
+
+        /* Update previous samples */
+        // Copy the last 32 samples to the beginning
+        memcpy(main_state->shared_state->prev_y[ch].data, &main_state->shared_state->prev_y[ch].data[AEC_FRAME_ADVANCE], (AEC_PROC_FRAME_LENGTH - (2*AEC_FRAME_ADVANCE))*sizeof(int32_t));
+        // Copy current frame to previous
+        memcpy(&main_state->shared_state->prev_y[ch].data[(AEC_PROC_FRAME_LENGTH - (2*AEC_FRAME_ADVANCE))], &y_data[ch][0], AEC_FRAME_ADVANCE*sizeof(int32_t));
+        // Update headroom
+        bfp_s32_headroom(&main_state->shared_state->prev_y[ch]);
+        // Update exp just in case
+        main_state->shared_state->prev_y[ch].exp = -31;
+    }
+    // x frame 
+    for(unsigned ch=0; ch<num_x_channels; ch++) {
+        /* Create 512 samples frame */
+        // Copy previous x samples
+        memcpy(main_state->shared_state->x[ch].data, main_state->shared_state->prev_x[ch].data, (AEC_PROC_FRAME_LENGTH - AEC_FRAME_ADVANCE)*sizeof(int32_t));
+        // Copy current x samples
+        memcpy(&main_state->shared_state->x[ch].data[AEC_PROC_FRAME_LENGTH - AEC_FRAME_ADVANCE], &x_data[ch][0], (AEC_FRAME_ADVANCE)*sizeof(int32_t));
+        // Update exp just in case
+        main_state->shared_state->x[ch].exp = -31;
+        // Update headroom
+        bfp_s32_headroom(&main_state->shared_state->x[ch]);
+
+        /* Update previous samples */
+        // Copy the last 32 samples to the beginning
+        memcpy(main_state->shared_state->prev_x[ch].data, &main_state->shared_state->prev_x[ch].data[AEC_FRAME_ADVANCE], (AEC_PROC_FRAME_LENGTH - (2*AEC_FRAME_ADVANCE))*sizeof(int32_t));
+        // Copy current frame to previous
+        memcpy(&main_state->shared_state->prev_x[ch].data[(AEC_PROC_FRAME_LENGTH - (2*AEC_FRAME_ADVANCE))], &x_data[ch][0], AEC_FRAME_ADVANCE*sizeof(int32_t));
+        // Update exp just in case
+        main_state->shared_state->prev_x[ch].exp = -31;
+        // Update headroom
+        bfp_s32_headroom(&main_state->shared_state->prev_x[ch]);
+    }
+
+    //Initialise T
+    //At the moment, there's only enough memory for storing num_x_channels and not num_y_channels*num_x_channels worth of T.
+    //So T calculation cannot be parallelised across Y channels
+    //Reuse X memory for calculating T
+    for(unsigned ch=0; ch<num_x_channels; ch++) {
+        bfp_complex_s32_init(&main_state->T[ch], (complex_s32_t*)&main_state->shared_state->x[ch].data[0], 0, (AEC_PROC_FRAME_LENGTH/2)+1, 0);
+    }
+
+    //set Y_hat memory to 0 since it will be used in bfp_complex_s32_macc operation in aec_l2_calc_Error_and_Y_hat()
+    for(unsigned ch=0; ch<num_y_channels; ch++) {
+        main_state->Y_hat[ch].exp = -1024;
+        main_state->Y_hat[ch].hr = 0;
+        memset(&main_state->Y_hat[ch].data[0], 0, ((AEC_PROC_FRAME_LENGTH/2)+1)*sizeof(complex_s32_t));
+    }
+    if(shadow_state != NULL) {
+        for(unsigned ch=0; ch<num_y_channels; ch++) {
+            shadow_state->Y_hat[ch].exp = -1024;
+            shadow_state->Y_hat[ch].hr = 0;
+            memset(&shadow_state->Y_hat[ch].data[0], 0, ((AEC_PROC_FRAME_LENGTH/2)+1)*sizeof(complex_s32_t));
+        }
+    }
+}
+
+void aec_calc_time_domain_ema_energy(
+        float_s32_t *ema_energy,
+        const bfp_s32_t *input,
+        unsigned start_offset,
+        unsigned length,
+        const aec_config_params_t *conf)
+{
+    if(!length) {
+        return;
+    }
+    bfp_s32_t input_chunk;
+    bfp_s32_init(&input_chunk, &input->data[start_offset], input->exp, length, 0);
+    input_chunk.hr = input->hr;
+    float_s64_t dot64 = bfp_s32_dot(&input_chunk, &input_chunk);
+    float_s32_t dot = float_s64_to_float_s32(dot64);
+    *ema_energy = float_s32_ema(*ema_energy, dot, conf->aec_core_conf.ema_alpha_q30);
+}
+
+float_s32_t aec_calc_max_ref_energy(const int32_t (*x_data)[AEC_FRAME_ADVANCE], int num_channels) {
+    bfp_s32_t ref;
+
+    bfp_s32_init(&ref, (int32_t*)&x_data[0][0], -31, AEC_FRAME_ADVANCE, 1);
+    float_s32_t max = float_s64_to_float_s32(bfp_s32_energy(&ref));
+    for(int ch=1; ch<num_channels; ch++) {
+        bfp_s32_init(&ref, (int32_t*)&x_data[ch][0], -31, AEC_FRAME_ADVANCE, 1);
+        float_s32_t current = float_s64_to_float_s32(bfp_s32_energy(&ref));
+        if(float_s32_gt(current, max)){max = current;}
+    }
+    return max;
+}
+
+void aec_forward_fft(
+        bfp_complex_s32_t *output,
+        bfp_s32_t *input)
+{
+    //Input bfp_s32_t structure will get overwritten since FFT is computed in-place. Keep a copy of input->length and assign it back after fft call.
+    //This is done to avoid having to call bfp_s32_init() on the input every frame
+    int32_t len = input->length; 
+    bfp_complex_s32_t *temp = bfp_fft_forward_mono(input);
+    
+    memcpy(output, temp, sizeof(bfp_complex_s32_t));
+    bfp_fft_unpack_mono(output);
+    input->length = len;
+    return;
+}
+
+//per x-channel
+//API: calculate X-energy (per x-channel)
+void aec_calc_X_fifo_energy(
+        aec_state_t *state,
+        unsigned ch,
+        unsigned recalc_bin) 
+{
+    if((state == NULL) || (!state->num_phases)) {
+        return;
+    }
+ 
+    bfp_s32_t *X_energy_ptr = &state->X_energy[ch];
+    bfp_complex_s32_t *X_ptr = &state->shared_state->X[ch];
+    float_s32_t *max_X_energy_ptr = &state->max_X_energy[ch];
+    aec_priv_update_total_X_energy(X_energy_ptr, max_X_energy_ptr, &state->shared_state->X_fifo[ch][0], X_ptr, state->num_phases, recalc_bin);
+    return;
+}
+//per x-channel
+void aec_update_X_fifo_and_calc_sigmaXX(
+        aec_state_t *state,
+        unsigned ch)
+{
+    bfp_s32_t *sigma_XX_ptr = &state->shared_state->sigma_XX[ch];
+    bfp_complex_s32_t *X_ptr = &state->shared_state->X[ch];
+    uint32_t sigma_xx_shift = state->shared_state->config_params.aec_core_conf.sigma_xx_shift;
+    float_s32_t *sum_X_energy_ptr = &state->shared_state->sum_X_energy[ch]; //This needs to be done only for main filter, so doing it here instead of in aec_calc_X_fifo_energy
+    aec_priv_update_X_fifo_and_calc_sigmaXX(&state->shared_state->X_fifo[ch][0], sigma_XX_ptr, sum_X_energy_ptr, X_ptr, state->num_phases, sigma_xx_shift);
+    return;
+}
+
+//per y-channel
+void aec_calc_Error_and_Y_hat(
+        aec_state_t *state,
+        unsigned ch)
+{
+    if(state == NULL) {
+        return;
+    }
+    bfp_complex_s32_t *Y_ptr = &state->shared_state->Y[ch];
+    bfp_complex_s32_t *Y_hat_ptr = &state->Y_hat[ch];
+    bfp_complex_s32_t *Error_ptr = &state->Error[ch];
+    int32_t bypass_enabled = state->shared_state->config_params.aec_core_conf.bypass;
+    aec_priv_calc_Error_and_Y_hat(Error_ptr, Y_hat_ptr, Y_ptr, state->X_fifo_1d, state->H_hat[ch], state->shared_state->num_x_channels, state->num_phases, bypass_enabled);
+}
+
+void aec_inverse_fft(
+        bfp_s32_t *output,
+        bfp_complex_s32_t *input)
+{
+    //Input bfp_complex_s32_t structure will get overwritten since IFFT is computed in-place. Keep a copy of input->length and assign it back after ifft call.
+    //This is done to avoid having to call bfp_complex_s32_init() on the input every frame
+    int32_t len = input->length;
+    bfp_fft_pack_mono(input);
+    bfp_s32_t *temp = bfp_fft_inverse_mono(input);
+    memcpy(output, temp, sizeof(bfp_s32_t));
+
+    input->length = len;
+    return;
+}
+
+float_s32_t aec_calc_corr_factor(
+        aec_state_t *state,
+        unsigned ch) {
+    // We need yhat[240:480-32] and y[240:480-32]
+    int frame_window = 32;
+
+    // y[240:480] is prev_y[0:240].
+    bfp_s32_t y_subset;
+    bfp_s32_init(&y_subset, state->shared_state->prev_y[ch].data, state->shared_state->prev_y[ch].exp, AEC_FRAME_ADVANCE-frame_window, 1);
+
+    bfp_s32_t yhat_subset;
+    bfp_s32_init(&yhat_subset, &state->y_hat[ch].data[AEC_FRAME_ADVANCE], state->y_hat[ch].exp, AEC_FRAME_ADVANCE-frame_window, 1);
+
+    float_s32_t corr_factor = aec_priv_calc_corr_factor(&y_subset, &yhat_subset);
+    return corr_factor;
+}
+
+void aec_calc_coherence(
+        aec_state_t *state,
+        unsigned ch)
+{
+    if(state->shared_state->config_params.aec_core_conf.bypass) {
+        return;
+    }
+    coherence_mu_params_t *coh_mu_state_ptr = &state->shared_state->coh_mu_state[ch];
+    //We need y_hat[240:480] and y[240:480]
+    bfp_s32_t y_hat_subset;
+    bfp_s32_init(&y_hat_subset, &state->y_hat[ch].data[AEC_FRAME_ADVANCE], state->y_hat[ch].exp, AEC_FRAME_ADVANCE, 0);
+    y_hat_subset.hr = state->y_hat[ch].hr;
+
+    //y[240:480] is prev_y[0:240]. Create a temporary bfp_s32_t to point to prev_y[0:240]
+    bfp_s32_t temp;
+    bfp_s32_init(&temp, state->shared_state->prev_y[ch].data, state->shared_state->prev_y[ch].exp, AEC_FRAME_ADVANCE, 0);
+    temp.hr = state->shared_state->prev_y[ch].hr;
+
+    aec_priv_calc_coherence(coh_mu_state_ptr, &temp, &y_hat_subset, &state->shared_state->config_params);
+    return;
+}
+
+void aec_calc_output(
+        aec_state_t *state,
+        int32_t (*output)[AEC_FRAME_ADVANCE],
+        unsigned ch)
+{
+    if(state == NULL) {
+        return;
+    }
+
+    bfp_s32_t output_struct;
+    if(output != NULL) {
+        bfp_s32_init(&output_struct, &output[0][0], -31, AEC_FRAME_ADVANCE, 0);
+    }
+    else {
+        bfp_s32_init(&output_struct, NULL, -31, AEC_FRAME_ADVANCE, 0);
+    }
+    bfp_s32_t *output_ptr = &output_struct;
+    bfp_s32_t *overlap_ptr = &state->overlap[ch];
+    bfp_s32_t *error_ptr = &state->error[ch];
+    aec_priv_create_output(output_ptr, overlap_ptr, error_ptr, &state->shared_state->config_params);
+    return;
+}
+
+void aec_calc_freq_domain_energy(
+        float_s32_t *fd_energy,
+        const bfp_complex_s32_t *input)
+{
+    int32_t DWORD_ALIGNED scratch_mem[AEC_PROC_FRAME_LENGTH/2 + 1];
+    assert(input->length <= AEC_PROC_FRAME_LENGTH/2 + 1);
+    bfp_s32_t scratch;
+    bfp_s32_init(&scratch, scratch_mem, 0, input->length, 0);
+    bfp_complex_s32_squared_mag(&scratch, input);
+
+    float_s64_t sum64 = bfp_s32_sum(&scratch);
+    *fd_energy = float_s64_to_float_s32(sum64);
+}
+
+void aec_calc_normalisation_spectrum(
+        aec_state_t *state,
+        unsigned ch,
+        unsigned is_shadow)
+{
+    if(state == NULL) {
+        return;
+    }
+    //frequency smoothing
+    //calc inverse energy
+    bfp_s32_t *sigma_XX_ptr = &state->shared_state->sigma_XX[ch];
+    bfp_s32_t *X_energy_ptr = &state->X_energy[ch];
+    aec_priv_calc_inv_X_energy(&state->inv_X_energy[ch], X_energy_ptr, sigma_XX_ptr, &state->shared_state->config_params, state->delta, is_shadow);
+    return;
+}
+
+void aec_filter_adapt(
+        aec_state_t *state,
+        unsigned y_ch)
+{
+    if(state == NULL) {
+        return;
+    }
+    if(state->shared_state->config_params.aec_core_conf.bypass) {
+        return;
+    }
+    bfp_complex_s32_t *T_ptr = &state->T[0];
+
+    aec_priv_filter_adapt(state->H_hat[y_ch], state->X_fifo_1d, T_ptr, state->shared_state->num_x_channels, state->num_phases);
+}
+
+void aec_calc_T(
+        aec_state_t *state,
+        unsigned y_ch,
+        unsigned x_ch)
+{
+    if(state == NULL) {
+        return;
+    }
+    bfp_complex_s32_t *T_ptr = &state->T[x_ch]; //Use the same memory as X to store T
+    bfp_complex_s32_t *Error_ptr = &state->Error[y_ch];
+    bfp_s32_t *inv_X_energy_ptr = &state->inv_X_energy[x_ch];
+    float_s32_t mu = state->mu[y_ch][x_ch];
+    aec_priv_compute_T(T_ptr, Error_ptr, inv_X_energy_ptr, mu);
+}
+
+void aec_compare_filters_and_calc_mu(
+        aec_state_t *main_state,
+        aec_state_t *shadow_state)
+{
+    if(main_state->shared_state->config_params.aec_core_conf.bypass) {
+        return;
+    }
+    if(shadow_state != NULL) {
+        aec_priv_compare_filters(main_state, shadow_state);
+    }
+
+    coherence_mu_params_t *coh_mu_state_ptr = main_state->shared_state->coh_mu_state;
+    coherence_mu_config_params_t *coh_mu_conf_ptr = &main_state->shared_state->config_params.coh_mu_conf;
+    aec_priv_calc_coherence_mu(coh_mu_state_ptr, coh_mu_conf_ptr, main_state->shared_state->sum_X_energy,
+            main_state->shared_state->shadow_filter_params.shadow_flag, main_state->shared_state->num_y_channels, main_state->shared_state->num_x_channels);
+    
+    //calculate delta. Done here instead of aec_l2_calc_inv_X_energy_denom() since max_X_energy across all x-channels is needed in delta computation.
+    //aec_l2_calc_inv_X_energy_denom() is called per x channel
+    aec_priv_calc_delta(&main_state->delta, &main_state->max_X_energy[0], &main_state->shared_state->config_params, main_state->delta_scale, main_state->shared_state->num_x_channels);
+    if(shadow_state != NULL) {
+        aec_priv_calc_delta(&shadow_state->delta, &shadow_state->max_X_energy[0], &shadow_state->shared_state->config_params, shadow_state->delta_scale, shadow_state->shared_state->num_x_channels);
+    }
+    
+    //Update main and shadow filter mu
+    for(unsigned y_ch=0; y_ch<main_state->shared_state->num_y_channels; y_ch++) {
+        for(unsigned x_ch=0; x_ch<main_state->shared_state->num_x_channels; x_ch++) {
+            if(shadow_state != NULL) {
+                shadow_state->mu[y_ch][x_ch] = shadow_state->shared_state->config_params.shadow_filt_conf.shadow_mu;
+            }
+            main_state->mu[y_ch][x_ch] = coh_mu_state_ptr[y_ch].coh_mu[x_ch];
+        }
+    }
+}
+
+void aec_update_X_fifo_1d(
+        aec_state_t *state)
+{
+    if(state == NULL) {
+        return;
+    }
+    unsigned count = 0;
+    for(unsigned ch=0; ch<state->shared_state->num_x_channels; ch++) {
+        for(unsigned ph=0; ph<state->num_phases; ph++) {
+            state->X_fifo_1d[count] = state->shared_state->X_fifo[ch][ph];
+            count += 1;
+        }
+    }
+}
+
+#if 0
+#include <xclib.h>
+unsigned mk_mask(unsigned m){
+    //(1<<m)-1
+    asm volatile("mkmsk %0, %1":"=r"(m): "r"(m));
+    return m;
+}
+
+void bfp_s32_calculate_min_mask(
+        bfp_s32_t *input,
+        unsigned *min_mask)
+{
+    *min_mask = 0;
+    for(unsigned i=0; i<input->length; i++) {
+        *min_mask |= mk_mask(clz(input->data[i]));
+    }
+}
+#endif
diff --git a/modules/lib_aec/src/aec_l2_impl.c b/modules/lib_aec/src/aec_l2_impl.c
new file mode 100644
index 000000000..80d122291
--- /dev/null
+++ b/modules/lib_aec/src/aec_l2_impl.c
@@ -0,0 +1,114 @@
+// Copyright 2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include "aec_defines.h"
+#include "aec_api.h"
+
+//AEC level 2
+void aec_l2_calc_Error_and_Y_hat(
+        bfp_complex_s32_t *Error,
+        bfp_complex_s32_t *Y_hat,
+        const bfp_complex_s32_t *Y,
+        const bfp_complex_s32_t *X_fifo,
+        const bfp_complex_s32_t *H_hat,
+        unsigned num_x_channels,
+        unsigned num_phases,
+        unsigned start_offset,
+        unsigned length,
+        int32_t bypass_enabled)
+{
+    if(!length) {
+        //printf("0 length\n");
+        return;
+    }
+    if(bypass_enabled) { //Copy Y into Error. Set Y_hat to 0
+        memcpy(Error->data, &Y->data[start_offset], length*sizeof(complex_s32_t));
+        Error->exp = Y->exp;
+        Error->hr = Y->hr;
+
+        memset(Y_hat->data, 0, length*sizeof(complex_s32_t));
+        Y_hat->exp = -1024;
+        Y_hat->hr = 0;
+    }
+    else {
+        int32_t phases = num_x_channels * num_phases;
+        for(unsigned ph=0; ph<phases; ph++) {
+            //create input chunks
+            bfp_complex_s32_t X_chunk, H_hat_chunk;
+            bfp_complex_s32_init(&X_chunk, &X_fifo[ph].data[start_offset], X_fifo[ph].exp, length, 0);
+            X_chunk.hr = X_fifo[ph].hr;
+            bfp_complex_s32_init(&H_hat_chunk, &H_hat[ph].data[start_offset], H_hat[ph].exp, length, 0);
+            H_hat_chunk.hr = H_hat[ph].hr;
+            bfp_complex_s32_macc(Y_hat, &X_chunk, &H_hat_chunk);
+        }
+
+        bfp_complex_s32_t Y_chunk;
+        bfp_complex_s32_init(&Y_chunk, &Y->data[start_offset], Y->exp, length, 0);
+        Y_chunk.hr = Y->hr;
+        bfp_complex_s32_sub(Error, &Y_chunk, Y_hat);
+    }
+    return;
+}
+
+void aec_l2_adapt_plus_fft_gc(
+        bfp_complex_s32_t *H_hat_ph,
+        const bfp_complex_s32_t *X_fifo_ph,
+        const bfp_complex_s32_t *T_ph
+        )
+{
+    bfp_complex_s32_conj_macc(H_hat_ph, T_ph, X_fifo_ph);
+    bfp_fft_pack_mono(H_hat_ph);
+    bfp_complex_s32_gradient_constraint_mono(H_hat_ph, 240);
+    bfp_fft_unpack_mono(H_hat_ph);
+}
+
+void aec_l2_bfp_complex_s32_unify_exponent(
+        bfp_complex_s32_t *chunks,
+        int *final_exp, int *final_hr,
+        const int *mapping, int array_len,
+        int desired_index,
+        int min_headroom)
+{
+    *final_exp = INT_MIN; 
+    for(int i=0; i<array_len; i++) {
+        if(((mapping == NULL) || (mapping[i] == desired_index)) && (chunks[i].length > 0)) {
+            if((int32_t)(chunks[i].exp - chunks[i].hr + min_headroom) > *final_exp) {
+                *final_exp = chunks[i].exp - chunks[i].hr + min_headroom;
+            }
+        }
+    }
+    *final_hr = INT_MAX; //smallest hr
+    for(int i=0; i<array_len; i++) {
+        if(((mapping == NULL) || (mapping[i] == desired_index)) && (chunks[i].length > 0)) {
+           bfp_complex_s32_use_exponent(&chunks[i], *final_exp);
+           *final_hr = (chunks[i].hr < *final_hr) ? chunks[i].hr : *final_hr;
+        }
+    }
+}
+
+void aec_l2_bfp_s32_unify_exponent(
+        bfp_s32_t *chunks, int *final_exp,
+        int *final_hr,
+        const int *mapping,
+        int array_len,
+        int desired_index,
+        int min_headroom)
+{
+    *final_exp = INT_MIN; //find biggest exponent (fewest fraction bits) 
+    for(int i=0; i<array_len; i++) {
+        if((mapping == NULL) || ((mapping != NULL) && (mapping[i] == desired_index) && (chunks[i].length > 0))) {
+            if((int32_t)(chunks[i].exp - chunks[i].hr + min_headroom) > *final_exp) {
+                *final_exp = chunks[i].exp - chunks[i].hr + min_headroom;
+            }
+        }
+    }
+    *final_hr = INT_MAX; //smallest hr
+    for(int i=0; i<array_len; i++) {
+        if((mapping == NULL) || ((mapping != NULL) && (mapping[i] == desired_index) && (chunks[i].length > 0))) {
+           bfp_s32_use_exponent(&chunks[i], *final_exp);
+           *final_hr = (chunks[i].hr < *final_hr) ? chunks[i].hr : *final_hr;
+        }
+    }
+}
diff --git a/modules/lib_aec/src/aec_priv.h b/modules/lib_aec/src/aec_priv.h
new file mode 100644
index 000000000..875c78622
--- /dev/null
+++ b/modules/lib_aec/src/aec_priv.h
@@ -0,0 +1,143 @@
+// Copyright 2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#ifndef AEC_PRIV_H
+#define AEC_PRIV_H
+
+#include <stdio.h>
+#include <string.h>
+#include "bfp_math.h"
+#include "xs3_math.h"
+
+//private AEC functions
+void aec_priv_main_init(
+        aec_state_t *state,
+        aec_shared_state_t *shared_state,
+        uint8_t *mem_pool,
+        unsigned num_y_channels,
+        unsigned num_x_channels,
+        unsigned num_phases);
+
+void aec_priv_shadow_init(
+        aec_state_t *state,
+        aec_shared_state_t *shared_state,
+        uint8_t *mem_pool,
+        unsigned num_phases);
+void aec_priv_reset_filter(
+        bfp_complex_s32_t *H_hat,
+        unsigned num_x_channels,
+        unsigned num_phases);
+
+void aec_priv_copy_filter(
+        bfp_complex_s32_t *H_hat_dst,
+        const bfp_complex_s32_t *H_hat_src,
+        unsigned num_x_channels,
+        unsigned num_dst_phases,
+        unsigned num_src_phases);
+
+void aec_priv_bfp_complex_s32_copy(
+        bfp_complex_s32_t *dst,
+        const bfp_complex_s32_t *src);
+
+void aec_priv_bfp_s32_reset(bfp_s32_t *a);
+
+void aec_priv_bfp_complex_s32_reset(bfp_complex_s32_t *a);
+
+void aec_priv_compare_filters(
+        aec_state_t *main_state,
+        aec_state_t *shadow_state);
+
+void aec_priv_calc_coherence_mu(
+        coherence_mu_params_t *coh_mu_state,
+        const coherence_mu_config_params_t *coh_conf,
+        const float_s32_t *sum_X_energy,
+        const int32_t *shadow_flag,
+        unsigned num_y_channels,
+        unsigned num_x_channnels);
+
+void aec_priv_update_total_X_energy(
+        bfp_s32_t *X_energy,
+        float_s32_t *max_X_energy,
+        const bfp_complex_s32_t *X_fifo,
+        const bfp_complex_s32_t *X_data,
+        unsigned num_phases,
+        unsigned recalc_bin);
+
+void aec_priv_update_X_fifo_and_calc_sigmaXX(
+        bfp_complex_s32_t *X_fifo,
+        bfp_s32_t *sigma_XX,
+        float_s32_t *sum_X_energy,
+        const bfp_complex_s32_t *X_data,
+        unsigned num_phases,
+        uint32_t sigma_xx_shift);
+
+void aec_priv_calc_Error_and_Y_hat(
+        bfp_complex_s32_t *Error,
+        bfp_complex_s32_t *Y_hat,
+        const bfp_complex_s32_t *Y,
+        const bfp_complex_s32_t *X_fifo,
+        const bfp_complex_s32_t *H_hat,
+        unsigned num_x_channels,
+        unsigned num_phases,
+        int32_t bypass_enabled);
+
+void aec_priv_calc_coherence(
+        coherence_mu_params_t *coh_mu_state,
+        const bfp_s32_t *y,
+        const bfp_s32_t *y_hat,
+        const aec_config_params_t *conf);
+
+void aec_priv_create_output(
+        bfp_s32_t *output,
+        bfp_s32_t *overlap,
+        bfp_s32_t *error,
+        const aec_config_params_t *conf);
+
+/// Calculate inverse X energy for a channel over a range of bins
+void aec_priv_calc_inverse(
+        bfp_s32_t *input);
+
+/// Calculate denominator that is used in the inv_X_energy = 1/denom calculation
+void aec_priv_calc_inv_X_energy_denom(
+        bfp_s32_t *inv_X_energy_denom,
+        const bfp_s32_t *X_energy,
+        const bfp_s32_t *sigma_XX,
+        const aec_config_params_t *conf,
+        float_s32_t delta,
+        unsigned is_shadow);
+
+void aec_priv_calc_inv_X_energy(
+        bfp_s32_t *inv_X_energy,
+        const bfp_s32_t *X_energy,
+        const bfp_s32_t *sigma_XX,
+        const aec_config_params_t *conf,
+        float_s32_t delta,
+        unsigned is_shadow);
+
+void aec_priv_filter_adapt(
+        bfp_complex_s32_t *H_hat,
+        const bfp_complex_s32_t *X_fifo,
+        const bfp_complex_s32_t *T,
+        unsigned num_x_channels,
+        unsigned num_phases);
+
+void aec_priv_compute_T(
+        bfp_complex_s32_t *T,
+        const bfp_complex_s32_t *Error,
+        const bfp_s32_t *inv_X_energy,
+        float_s32_t mu);
+
+void aec_priv_init_config_params(
+        aec_config_params_t *config_params);
+
+void aec_priv_calc_delta(
+        float_s32_t *delta, 
+        const float_s32_t *max_X_energy,
+        aec_config_params_t *conf,
+        float_s32_t scale,
+        int channels);
+
+float_s32_t aec_priv_calc_corr_factor(
+        bfp_s32_t *y,
+        bfp_s32_t *yhat);
+
+#endif
diff --git a/modules/lib_aec/src/aec_priv_impl.c b/modules/lib_aec/src/aec_priv_impl.c
new file mode 100644
index 000000000..4aa0d0374
--- /dev/null
+++ b/modules/lib_aec/src/aec_priv_impl.c
@@ -0,0 +1,983 @@
+// Copyright 2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include "aec_defines.h"
+#include "aec_api.h"
+#include "aec_priv.h"
+
+void aec_priv_main_init(
+        aec_state_t *state,
+        aec_shared_state_t *shared_state,
+        uint8_t *mem_pool,
+        unsigned num_y_channels,
+        unsigned num_x_channels,
+        unsigned num_phases)
+{ 
+    memset(state, 0, sizeof(aec_state_t));
+    //reset shared_state. Only done in main_init()
+    memset(shared_state, 0, sizeof(aec_shared_state_t));
+
+    uint8_t *available_mem_start = (uint8_t*)mem_pool;
+
+    state->shared_state = shared_state;
+    //Initialise number of y and x channels
+    state->shared_state->num_y_channels = num_y_channels;
+    state->shared_state->num_x_channels = num_x_channels;
+    //Initialise number of phases
+    state->num_phases = num_phases;
+
+    //y
+    for(unsigned ch=0; ch<num_y_channels; ch++) {
+        bfp_s32_init(&state->shared_state->y[ch], (int32_t*)available_mem_start, -31, (AEC_PROC_FRAME_LENGTH), 0); //input data is 1.31 so initialising with exp -31
+        available_mem_start += ((AEC_PROC_FRAME_LENGTH + 2)*sizeof(int32_t)); //2 extra samples of memory allocated. state->shared_state->y[ch].length is still AEC_PROC_FRAME_LENGTH though
+    }
+    //x
+    for(unsigned ch=0; ch<num_x_channels; ch++) {
+        bfp_s32_init(&state->shared_state->x[ch], (int32_t*)available_mem_start, -31, (AEC_PROC_FRAME_LENGTH), 0); //input data is 1.31 so initialising with exp -31
+        available_mem_start += ((AEC_PROC_FRAME_LENGTH + 2)*sizeof(int32_t)); //2 extra samples of memory allocated. state->shared_state->x[ch].length is still AEC_PROC_FRAME_LENGTH though
+    }
+    //prev_y
+    for(unsigned ch=0; ch<num_y_channels; ch++) {
+        bfp_s32_init(&state->shared_state->prev_y[ch], (int32_t*)available_mem_start, -31, (AEC_PROC_FRAME_LENGTH - AEC_FRAME_ADVANCE), 0); //input data is 1.31 so initialising with exp -31
+        available_mem_start += ((AEC_PROC_FRAME_LENGTH - AEC_FRAME_ADVANCE)*sizeof(int32_t));
+    }
+    //prev_x
+    for(unsigned ch=0; ch<num_x_channels; ch++) {
+        bfp_s32_init(&state->shared_state->prev_x[ch], (int32_t*)available_mem_start, -31, (AEC_PROC_FRAME_LENGTH - AEC_FRAME_ADVANCE), 0); //input data is 1.31 so initialising with exp -31
+        available_mem_start += ((AEC_PROC_FRAME_LENGTH - AEC_FRAME_ADVANCE)*sizeof(int32_t));
+    }
+    
+    //H_hat
+    for(unsigned ch=0; ch<num_y_channels; ch++) {
+        for(unsigned ph=0; ph<(num_x_channels * num_phases); ph++) {
+            bfp_complex_s32_init(&state->H_hat[ch][ph], (complex_s32_t*)available_mem_start, -1024, (AEC_PROC_FRAME_LENGTH/2)+1, 0);
+            available_mem_start += ((AEC_PROC_FRAME_LENGTH/2 + 1)*sizeof(complex_s32_t)); 
+        }
+    }
+    //X_fifo
+    for(unsigned ch=0; ch<num_x_channels; ch++) {
+        for(unsigned ph=0; ph<num_phases; ph++) {
+            bfp_complex_s32_init(&state->shared_state->X_fifo[ch][ph], (complex_s32_t*)available_mem_start, -1024, (AEC_PROC_FRAME_LENGTH/2)+1, 0);
+            available_mem_start += ((AEC_PROC_FRAME_LENGTH/2 + 1)*sizeof(complex_s32_t)); 
+        }
+    }
+    //initialise Error
+    for(unsigned ch=0; ch<num_y_channels; ch++) {
+        bfp_complex_s32_init(&state->Error[ch], (complex_s32_t*)available_mem_start, -1024, (AEC_PROC_FRAME_LENGTH/2)+1, 0);
+        available_mem_start += ((AEC_PROC_FRAME_LENGTH/2 + 1)*sizeof(complex_s32_t)); 
+    }
+    //Initiaise Y_hat
+    for(unsigned ch=0; ch<num_y_channels; ch++) {
+        bfp_complex_s32_init(&state->Y_hat[ch], (complex_s32_t*)available_mem_start, -1024, (AEC_PROC_FRAME_LENGTH/2)+1, 0);
+        available_mem_start += ((AEC_PROC_FRAME_LENGTH/2 + 1)*sizeof(complex_s32_t)); 
+    }
+
+    //X_energy 
+    for(unsigned ch=0; ch<num_x_channels; ch++) {
+        bfp_s32_init(&state->X_energy[ch], (int32_t*)available_mem_start, -1024, (AEC_PROC_FRAME_LENGTH/2)+1, 0); 
+        available_mem_start += ((AEC_PROC_FRAME_LENGTH/2 + 1)*sizeof(int32_t)); 
+    }
+    //sigma_XX
+    for(unsigned ch=0; ch<num_x_channels; ch++) {
+        bfp_s32_init(&state->shared_state->sigma_XX[ch], (int32_t*)available_mem_start, -1024, (AEC_PROC_FRAME_LENGTH/2)+1, 0);
+        available_mem_start += ((AEC_PROC_FRAME_LENGTH/2 + 1)*sizeof(int32_t)); 
+    }
+    //inv_X_energy
+    for(unsigned ch=0; ch<num_x_channels; ch++) {
+        bfp_s32_init(&state->inv_X_energy[ch], (int32_t*)available_mem_start, -1024, (AEC_PROC_FRAME_LENGTH/2)+1, 0);
+        available_mem_start += ((AEC_PROC_FRAME_LENGTH/2 + 1)*sizeof(int32_t)); 
+    }
+
+    //overlap
+    for(unsigned ch=0; ch<num_y_channels; ch++) {
+        bfp_s32_init(&state->overlap[ch], (int32_t*)available_mem_start, -1024, 32, 0);
+        available_mem_start += (32*sizeof(int32_t)); 
+    }
+    int memory_used = available_mem_start - (uint8_t*)mem_pool; 
+    memset(mem_pool, 0, memory_used);
+
+    //Initialise ema energy
+    for(unsigned ch=0; ch<num_y_channels; ch++) {
+        state->shared_state->y_ema_energy[ch].exp = -1024;
+        state->error_ema_energy[ch].exp = -1024;
+    }
+    for(unsigned ch=0; ch<num_x_channels; ch++) {
+        state->shared_state->x_ema_energy[ch].exp = -1024;
+    }
+    //fractional regularisation scalefactor
+    state->delta_scale = double_to_float_s32((double)1e-5);
+
+    //Initialise aec config params
+    aec_priv_init_config_params(&state->shared_state->config_params);
+
+    //Initialise coherence mu params
+    coherence_mu_params_t *coh_params = state->shared_state->coh_mu_state;
+    for(unsigned ch=0; ch<num_y_channels; ch++) {
+        coh_params[ch].coh = double_to_float_s32(1.0);
+        coh_params[ch].coh_slow = double_to_float_s32(0.0);
+        coh_params[ch].mu_coh_count = 0;
+        coh_params[ch].mu_shad_count = 0;
+    }
+
+    //Initialise shadow filter params
+    shadow_filter_params_t *shadow_params = &state->shared_state->shadow_filter_params;
+    for(unsigned ch=0; ch<num_y_channels; ch++) {
+        shadow_params->shadow_flag[ch] = EQUAL;
+        shadow_params->shadow_reset_count[ch] = -(state->shared_state->config_params.shadow_filt_conf.shadow_reset_timer);
+        shadow_params->shadow_better_count[ch] = 0;
+    }
+}
+
+void aec_priv_shadow_init(
+        aec_state_t *state,
+        aec_shared_state_t *shared_state,
+        uint8_t *mem_pool,
+        unsigned num_phases)
+{
+    if(state == NULL) {
+        return;
+    }
+    memset(state, 0, sizeof(aec_state_t));
+    uint8_t *available_mem_start = (uint8_t*)mem_pool;
+    
+    //initialise number of phases
+    state->num_phases = num_phases;
+
+    state->shared_state = shared_state;
+    unsigned num_y_channels = state->shared_state->num_y_channels;
+    unsigned num_x_channels = state->shared_state->num_x_channels;
+
+    //H_hat
+    for(unsigned ch=0; ch<num_y_channels; ch++) {
+        for(unsigned ph=0; ph<(num_x_channels * num_phases); ph++) {
+            bfp_complex_s32_init(&state->H_hat[ch][ph], (complex_s32_t*)available_mem_start, -1024, (AEC_PROC_FRAME_LENGTH/2)+1, 0);
+            available_mem_start += ((AEC_PROC_FRAME_LENGTH/2 + 1)*sizeof(complex_s32_t)); 
+        }
+    }
+    //initialise Error
+    for(unsigned ch=0; ch<num_y_channels; ch++) {
+        bfp_complex_s32_init(&state->Error[ch], (complex_s32_t*)available_mem_start, -1024, (AEC_PROC_FRAME_LENGTH/2)+1, 0);
+        available_mem_start += ((AEC_PROC_FRAME_LENGTH/2 + 1)*sizeof(complex_s32_t)); 
+    }
+    //Initiaise Y_hat
+    for(unsigned ch=0; ch<num_y_channels; ch++) {
+        bfp_complex_s32_init(&state->Y_hat[ch], (complex_s32_t*)available_mem_start, -1024, (AEC_PROC_FRAME_LENGTH/2)+1, 0);
+        available_mem_start += ((AEC_PROC_FRAME_LENGTH/2 + 1)*sizeof(complex_s32_t)); 
+    }
+    //initialise T
+    for(unsigned ch=0; ch<num_x_channels; ch++) {
+        bfp_complex_s32_init(&state->T[ch], (complex_s32_t*)available_mem_start, -1024, (AEC_PROC_FRAME_LENGTH/2)+1, 0);
+        available_mem_start += ((AEC_PROC_FRAME_LENGTH/2 + 1)*sizeof(complex_s32_t)); 
+    }
+
+    //X_energy
+    for(unsigned ch=0; ch<num_x_channels; ch++) {
+       bfp_s32_init(&state->X_energy[ch], (int32_t*)available_mem_start, -1024, (AEC_PROC_FRAME_LENGTH/2)+1, 0); 
+       available_mem_start += ((AEC_PROC_FRAME_LENGTH/2 + 1)*sizeof(int32_t)); 
+    }
+    //inv_X_energy
+    for(unsigned ch=0; ch<num_x_channels; ch++) {
+        bfp_s32_init(&state->inv_X_energy[ch], (int32_t*)available_mem_start, -1024, (AEC_PROC_FRAME_LENGTH/2)+1, 0);
+        available_mem_start += ((AEC_PROC_FRAME_LENGTH/2 + 1)*sizeof(int32_t)); 
+    }
+
+    //overlap
+    for(unsigned ch=0; ch<num_y_channels; ch++) {
+        bfp_s32_init(&state->overlap[ch], (int32_t*)available_mem_start, -1024, 32, 0);
+        available_mem_start += (32*sizeof(int32_t)); 
+    }
+
+    int memory_used = available_mem_start - (uint8_t*)mem_pool; 
+    memset(mem_pool, 0, memory_used);
+
+    //Initialise ema energy
+    for(unsigned ch=0; ch<num_y_channels; ch++) {
+        state->error_ema_energy[ch].exp = -1024;
+    }
+    //fractional regularisation scalefactor
+    state->delta_scale = double_to_float_s32((double)1e-3);
+}
+
+void aec_priv_bfp_complex_s32_copy(
+        bfp_complex_s32_t *dst,
+        const bfp_complex_s32_t *src)
+{
+    //This assumes that both dst and src are same length
+    memcpy(dst->data, src->data, dst->length*sizeof(complex_s32_t));
+    dst->exp = src->exp;
+    dst->hr = src->hr;
+}
+
+void aec_priv_bfp_s32_reset(bfp_s32_t *a)
+{
+    memset(a->data, 0, a->length*sizeof(int32_t));
+    a->exp = -1024;
+    a->hr = 31;
+}
+
+void aec_priv_bfp_complex_s32_reset(bfp_complex_s32_t *a)
+{
+    memset(a->data, 0, a->length*sizeof(complex_s32_t));
+    a->exp = -1024;
+    a->hr = 31;
+}
+
+void aec_priv_reset_filter(
+        bfp_complex_s32_t *H_hat,
+        unsigned num_x_channels,
+        unsigned num_phases)
+{
+    for(unsigned ph=0; ph<num_x_channels*num_phases; ph++) {
+        aec_priv_bfp_complex_s32_reset(&H_hat[ph]);
+    }
+}
+
+void aec_priv_copy_filter(
+        bfp_complex_s32_t *H_hat_dst,
+        const bfp_complex_s32_t *H_hat_src,
+        unsigned num_x_channels,
+        unsigned num_dst_phases,
+        unsigned num_src_phases)
+{
+    int32_t phases_to_copy = num_src_phases;
+    if(num_dst_phases < phases_to_copy) {
+        phases_to_copy = num_dst_phases;
+    }
+    //Copy the H_hat_src phases into H_hat_dst
+    for(int ch=0; ch<num_x_channels; ch++) {
+        int dst_ph_start_offset = ch * num_dst_phases;
+        int src_ph_start_offset = ch * num_src_phases;
+        for(int ph=0; ph<phases_to_copy; ph++) {
+            aec_priv_bfp_complex_s32_copy(&H_hat_dst[dst_ph_start_offset + ph], &H_hat_src[src_ph_start_offset + ph]);
+        }
+    }
+    //Zero the remaining H_hat_dst phases
+    for(int ch=0; ch<num_x_channels; ch++) {
+        int dst_ph_start_offset = ch * num_dst_phases;
+        for(int ph=num_src_phases; ph<num_dst_phases; ph++) {
+            aec_priv_bfp_complex_s32_reset(&H_hat_dst[dst_ph_start_offset + ph]);
+        }
+    }
+}
+
+void aec_priv_compare_filters(
+        aec_state_t *main_state,
+        aec_state_t *shadow_state)
+{
+    aec_shared_state_t *shared_state = main_state->shared_state;
+    shadow_filt_config_params_t *shadow_conf = &shared_state->config_params.shadow_filt_conf;
+    shadow_filter_params_t *shadow_params = &shared_state->shadow_filter_params;
+    unsigned ref_low_all_xch = 1;
+    for(unsigned ch=0; ch<main_state->shared_state->num_x_channels; ch++) {
+        if(float_s32_gte(shared_state->sum_X_energy[ch], shadow_conf->x_energy_thresh)) {
+            ref_low_all_xch = 0;
+            break;
+        }
+    }
+    for(unsigned ch=0; ch<main_state->shared_state->num_y_channels; ch++) {
+        main_state->shared_state->overall_Y[ch].exp -= 1; //Y_data is 512 samples, Errors are 272 (inc window), approx half the size
+        float_s32_t shadow_copy_thresh_x_Ov_Error = float_s32_mul(shadow_conf->shadow_copy_thresh, main_state->overall_Error[ch]);
+        float_s32_t shadow_sigma_thresh_x_Ov_Error = float_s32_mul(shadow_conf->shadow_sigma_thresh, main_state->overall_Error[ch]);
+        float_s32_t shadow_reset_thresh_x_Ov_Error = float_s32_mul(shadow_conf->shadow_reset_thresh, main_state->overall_Error[ch]);
+        //# check if shadow or reference filter will be used and flag accordingly
+        if(ref_low_all_xch) {
+            //# input level is low, so error is unreliable, do nothing
+            shadow_params->shadow_flag[ch] = LOW_REF;
+            continue;
+        }
+        //# if error way bigger than input, reset- should percolate through to main filter if better
+        if(float_s32_gt(shadow_state->overall_Error[ch], shared_state->overall_Y[ch]) && shadow_params->shadow_reset_count[ch] >= 0)
+        {
+            shadow_params->shadow_flag[ch] = ERROR;
+            aec_priv_reset_filter(shadow_state->H_hat[ch], shadow_state->shared_state->num_x_channels, shadow_state->num_phases);
+            //Y -> shadow Error
+            aec_priv_bfp_complex_s32_copy(&shadow_state->Error[ch], &shared_state->Y[ch]);
+            shadow_state->overall_Error[ch] = shared_state->overall_Y[ch];
+            //# give the zeroed filter time to reconverge (or redeconverge)
+            shadow_params->shadow_reset_count[ch] = -(int)shadow_conf->shadow_reset_timer;
+        }
+        if(float_s32_gte(shadow_copy_thresh_x_Ov_Error, shadow_state->overall_Error[ch]) &&
+                (shadow_params->shadow_better_count[ch] > shadow_conf->shadow_better_thresh)) {
+            //# if shadow filter is much better, and has been for several frames,
+            //# copy to reference filter
+            shadow_params->shadow_flag[ch] = COPY;
+            shadow_params->shadow_reset_count[ch] = 0;
+            shadow_params->shadow_better_count[ch] += 1;
+            //shadow Error -> Error
+            aec_priv_bfp_complex_s32_copy(&main_state->Error[ch], &shadow_state->Error[ch]);
+            //shadow filter -> main filter
+            aec_priv_copy_filter(main_state->H_hat[ch], shadow_state->H_hat[ch], main_state->shared_state->num_x_channels, main_state->num_phases, shadow_state->num_phases);
+        }
+        else if(float_s32_gte(shadow_sigma_thresh_x_Ov_Error, shadow_state->overall_Error[ch]))
+        {
+            shadow_params->shadow_better_count[ch] += 1;
+            if(shadow_params->shadow_better_count[ch] > shadow_conf->shadow_better_thresh) {
+                //# if shadow is somewhat better, reset sigma_xx if both channels are better
+                shadow_params->shadow_flag[ch] = SIGMA;
+            }
+            else {
+                shadow_params->shadow_flag[ch] = EQUAL;
+            }
+        }
+        else if(float_s32_gte(shadow_state->overall_Error[ch], shadow_reset_thresh_x_Ov_Error) && 
+                shadow_params->shadow_reset_count[ch] >= 0)
+        {
+            //# if shadow filter is worse than reference, reset provided that
+            //# the delay is small and we're not letting the shadow filter reconverge after zeroing
+            shadow_params->shadow_reset_count[ch] += 1;
+            shadow_params->shadow_better_count[ch] = 0;
+            if(shadow_params->shadow_reset_count[ch] > shadow_conf->shadow_zero_thresh) {
+                //# if shadow filter has been reset several times in a row, reset to zeros
+                shadow_params->shadow_flag[ch] = ZERO;
+                aec_priv_reset_filter(shadow_state->H_hat[ch], shadow_state->shared_state->num_x_channels, shadow_state->num_phases);
+                aec_priv_bfp_complex_s32_copy(&shadow_state->Error[ch], &shared_state->Y[ch]);
+                //# give the zeroed filter time to reconverge (or redeconverge)
+                shadow_params->shadow_reset_count[ch] = -(int)shadow_conf->shadow_reset_timer;
+            }
+            else {
+                //debug_printf("Frame %d, main -> shadow filter copy.\n",frame_counter);
+                //# otherwise copy the main filter to the shadow filter
+                aec_priv_copy_filter(shadow_state->H_hat[ch], main_state->H_hat[ch], main_state->shared_state->num_x_channels, shadow_state->num_phases, main_state->num_phases);
+                aec_priv_bfp_complex_s32_copy(&shadow_state->Error[ch], &main_state->Error[ch]);
+                shadow_params->shadow_flag[ch] = RESET;
+            }
+        }
+        else {
+            //# shadow filter is comparable to main filter, 
+            //# or we're waiting for it to reconverge after zeroing
+            shadow_params->shadow_better_count[ch] = 0;
+            shadow_params->shadow_flag[ch] = EQUAL;
+            if(shadow_params->shadow_reset_count[ch] < 0) {
+                shadow_params->shadow_reset_count[ch] += 1;
+            }
+        }
+    }
+    unsigned all_channels_shadow = 1;
+    for(unsigned ch=0; ch<main_state->shared_state->num_y_channels; ch++) {
+        if(shadow_params->shadow_flag[ch] <= EQUAL) {
+            all_channels_shadow = 0;
+        }
+    }
+
+    if(all_channels_shadow) {
+        for(unsigned ch=0; ch<main_state->shared_state->num_x_channels; ch++)
+        {
+            aec_priv_bfp_s32_reset(&shared_state->sigma_XX[ch]);
+        }
+    }
+}
+
+void aec_priv_calc_coherence_mu(
+        coherence_mu_params_t *coh_mu_state,
+        const coherence_mu_config_params_t *coh_conf,
+        const float_s32_t *sum_X_energy,
+        const int32_t *shadow_flag,
+        unsigned num_y_channels,
+        unsigned num_x_channels)
+{
+    //# If the coherence has been low within the last 15 frames, keep the count != 0
+    for(unsigned ch=0; ch<num_y_channels; ch++)
+    {
+        if(coh_mu_state[ch].mu_coh_count > 0) {
+            coh_mu_state[ch].mu_coh_count += 1;
+        }
+        if(coh_mu_state[ch].mu_coh_count > coh_conf->mu_coh_time) {
+            coh_mu_state[ch].mu_coh_count = 0;
+        }
+    }
+    //# If the shadow filter has be en used within the last 15 frames, keep the count != 0
+    for(unsigned ch=0; ch<num_y_channels; ch++)
+    {
+        if(shadow_flag[ch] == COPY) {
+            coh_mu_state[ch].mu_shad_count = 1;
+        }
+        else if(coh_mu_state[ch].mu_shad_count > 0) {
+            coh_mu_state[ch].mu_shad_count += 1;
+        }
+        if(coh_mu_state[ch].mu_shad_count > coh_conf->mu_shad_time) {
+            coh_mu_state[ch].mu_shad_count = 0;
+        }
+    }
+    //# threshold for coherence between y and y_hat
+    float_s32_t min_coh_slow = coh_mu_state[0].coh_slow;
+    for(unsigned ch=1; ch<num_y_channels; ch++)
+    {
+        if(float_s32_gt(min_coh_slow, coh_mu_state[ch].coh_slow)) {
+            min_coh_slow = coh_mu_state[ch].coh_slow;
+        }
+    }
+    //# threshold for coherence between y and y_hat
+    float_s32_t CC_thres = float_s32_mul(min_coh_slow, coh_conf->coh_thresh_slow);
+    for(unsigned ch=0; ch<num_y_channels; ch++)
+    {
+        if(shadow_flag[ch] >= SIGMA) {
+            //# if the shadow filter has triggered, override any drop in coherence
+            coh_mu_state[ch].mu_coh_count = 0;
+        }
+        else {
+            //# otherwise if the coherence is low start the count
+            if(float_s32_gt(coh_conf->coh_thresh_abs, coh_mu_state[ch].coh)) {
+                coh_mu_state[ch].mu_coh_count = 1;
+            }
+        }
+    }
+    if(coh_conf->adaption_config == AEC_ADAPTION_AUTO){
+        //# Order of priority for coh_mu:
+        //# 1) if the reference energy is low, don't converge (not enough SNR to be accurate)
+        //# 2) if shadow filter has triggered recently, converge fast
+        //# 3) if coherence has dropped recently, don't converge
+        //# 4) otherwise, converge fast.
+        for(unsigned ch=0; ch<num_y_channels; ch++) {
+            if(coh_mu_state[ch].mu_shad_count >= 1)
+            {
+                for(unsigned x_ch=0; x_ch<num_x_channels; x_ch++) {
+                    coh_mu_state[ch].coh_mu[x_ch] = double_to_float_s32(1.0); //TODO profile double_to_float_s32
+                }
+            }
+            else if(coh_mu_state[ch].mu_coh_count > 0)
+            {
+                for(unsigned x_ch=0; x_ch<num_x_channels; x_ch++) {
+                    coh_mu_state[ch].coh_mu[x_ch] = double_to_float_s32(0);
+                }
+            }
+            else { //# if yy_hat coherence denotes absence of near-end/noise
+                if(float_s32_gt(coh_mu_state[ch].coh, coh_mu_state[ch].coh_slow)) {
+                    for(unsigned x_ch=0; x_ch<num_x_channels; x_ch++) {
+                        coh_mu_state[ch].coh_mu[x_ch] = double_to_float_s32(1.0);
+                    }
+                }
+                else if(float_s32_gt(coh_mu_state[ch].coh, CC_thres))
+                {
+                    //# scale coh_mu depending on how far above the threshold it is
+                    //self.mu[y_ch] = ((self.coh[y_ch]-CC_thres)/(self.coh_slow[y_ch]-CC_thres))**2
+                    float_s32_t s1 = float_s32_sub(coh_mu_state[ch].coh, CC_thres);
+                    float_s32_t s2 = float_s32_sub(coh_mu_state[ch].coh_slow, CC_thres);
+                    float_s32_t s3 = float_s32_div(s1, s2);
+                    s3 = float_s32_mul(s3, s3);
+                    for(unsigned x_ch=0; x_ch<num_x_channels; x_ch++) {
+                        coh_mu_state[ch].coh_mu[x_ch] = s3;
+                    }
+                }
+                else {
+                    for(unsigned x_ch=0; x_ch<num_x_channels; x_ch++) {
+                        coh_mu_state[ch].coh_mu[x_ch] = double_to_float_s32(0);
+                    }
+                }
+            }
+        }
+        float_s32_t max_ref_energy = sum_X_energy[0];
+        for(unsigned x_ch=1; x_ch<num_x_channels; x_ch++) {
+            if(float_s32_gt(sum_X_energy[x_ch], max_ref_energy)) {
+                max_ref_energy = sum_X_energy[x_ch];
+            }
+        }
+        //np.max(ref_energy_log)-20 is done as (max_ref_energy_not_log*(pow(10, -20/10)))
+        float_s32_t max_ref_energy_minus_20dB = float_s32_mul(max_ref_energy, coh_conf->thresh_minus20dB);
+        for(unsigned x_ch=0; x_ch<num_x_channels; x_ch++) {
+            //if ref_energy_log[x_ch] <= ref_energy_thresh or ref_energy_log[x_ch] < np.max(ref_energy_log)-20: 
+            //        self.mu[:, x_ch] = 0
+            if(float_s32_gte(coh_conf->x_energy_thresh, sum_X_energy[x_ch]) ||
+                float_s32_gt(max_ref_energy_minus_20dB, sum_X_energy[x_ch])
+                )
+            {
+                for(unsigned y_ch=0; y_ch<num_y_channels; y_ch++) {
+                    coh_mu_state[y_ch].coh_mu[x_ch] = double_to_float_s32(0);
+                }
+            }
+        }
+        for(unsigned y_ch=0; y_ch<num_y_channels; y_ch++) {
+            for(unsigned x_ch=0; x_ch<num_x_channels; x_ch++) {
+                coh_mu_state[y_ch].coh_mu[x_ch] = float_s32_mul(coh_mu_state[y_ch].coh_mu[x_ch], coh_conf->mu_scalar);
+            }
+        }
+    }
+    else if(coh_conf->adaption_config == AEC_ADAPTION_FORCE_ON){
+        for(unsigned y_ch=0; y_ch<num_y_channels; y_ch++) {
+            for(unsigned x_ch=0; x_ch<num_x_channels; x_ch++) {
+                coh_mu_state[y_ch].coh_mu[x_ch].mant = coh_conf->force_adaption_mu_q30;
+                coh_mu_state[y_ch].coh_mu[x_ch].exp = -30;
+            }
+        }
+    }
+    else if(coh_conf->adaption_config == AEC_ADAPTION_FORCE_OFF){
+        for(unsigned y_ch=0; y_ch<num_y_channels; y_ch++) {
+            for(unsigned x_ch=0; x_ch<num_x_channels; x_ch++) {
+                coh_mu_state[y_ch].coh_mu[x_ch] = double_to_float_s32(0);
+            }
+        }
+    }
+}
+
+void aec_priv_bfp_complex_s32_recalc_energy_one_bin(
+        bfp_s32_t *X_energy,
+        const bfp_complex_s32_t *X_fifo,
+        const bfp_complex_s32_t *X,
+        unsigned num_phases,
+        unsigned recalc_bin)
+{
+    int32_t sum_out_data = 0;
+    bfp_s32_t sum_out;
+    bfp_s32_init(&sum_out, &sum_out_data, -1024, 1, 0);
+
+    int32_t t;
+    bfp_s32_t temp_out;
+    bfp_s32_init(&temp_out, &t, 0, 1, 0);
+    bfp_complex_s32_t temp_in;
+
+    for(unsigned i=0; i<num_phases-1; i++) {
+        bfp_complex_s32_init(&temp_in, &X_fifo[i].data[recalc_bin], X_fifo[i].exp, 1, 0);
+        temp_in.hr = X_fifo[i].hr;
+        bfp_complex_s32_squared_mag(&temp_out, &temp_in);
+        bfp_s32_add(&sum_out, &sum_out, &temp_out);
+    }
+    bfp_complex_s32_init(&temp_in, &X->data[recalc_bin], X->exp, 1, 0);
+    temp_in.hr = X->hr;
+    
+    bfp_complex_s32_squared_mag(&temp_out, &temp_in);
+    bfp_s32_add(&sum_out, &sum_out, &temp_out);
+    bfp_s32_use_exponent(&sum_out, X_energy->exp);
+
+    //TODO manage headroom mismatch
+    X_energy->data[recalc_bin] = sum_out.data[0];
+    if(sum_out.hr < X_energy->hr) {
+        X_energy->hr = sum_out.hr;
+    }
+    //printf("after recalc 0x%lx\n",X_energy->data[recalc_bin]);
+}
+
+void aec_priv_update_total_X_energy(
+        bfp_s32_t *X_energy,
+        float_s32_t *max_X_energy,
+        const bfp_complex_s32_t *X_fifo,
+        const bfp_complex_s32_t *X,
+        unsigned num_phases,
+        unsigned recalc_bin)
+{
+    int32_t DWORD_ALIGNED energy_scratch[AEC_PROC_FRAME_LENGTH/2 + 1];
+    bfp_s32_t scratch;
+    bfp_s32_init(&scratch, energy_scratch, 0, AEC_PROC_FRAME_LENGTH/2+1, 0);
+    //X_fifo ordered from newest to oldest phase
+    //subtract oldest phase
+    bfp_complex_s32_squared_mag(&scratch, &X_fifo[num_phases-1]);
+    bfp_s32_sub(X_energy, X_energy, &scratch);
+    //add newest phase
+    bfp_complex_s32_squared_mag(&scratch, X);
+    bfp_s32_add(X_energy, X_energy, &scratch);
+
+    aec_priv_bfp_complex_s32_recalc_energy_one_bin(X_energy, X_fifo, X, num_phases, recalc_bin);
+    *max_X_energy = bfp_s32_max(X_energy);
+    /** Steps taken to make sure divide by 0 doesn't happen while calculating inv_X_energy in aec_priv_calc_inverse().
+      * Divide by zero Scenario 1: All X_energy bins are 0 => max_X_energy is 0, but the exponent is something reasonably big, like
+      * -34 and delta value ends up as delta min which is (some_non_zero_mant, -97 exp). So we end up with inv_X_energy
+      * = 1/denom, where denom is (zero_mant, -34 exp) + (some_non_zero_mant, -97 exp) which is still calculated as 0
+      * mant. To avoid this situation, we set X_energy->exp to something much smaller (like -1024) than delta_min->exp so that
+      * (zero_mant, -1024 exp) + (some_non_zero_mant, -97 exp) = (some_non_zero_mant, -97 exp). I haven't been able to
+      * recreate this situation.
+      *
+      * Divide by zero Scenario 2: A few X_energy bins are 0 with exp something reasonably big and delta is delta_min.
+      * We'll not be able to find this happen by checking for max_X_energy->mant == 0. I have addressed this in
+      * aec_priv_calc_inv_X_energy_denom()
+      */
+
+    //Scenario 1 (All bins 0 mant) fix
+    if(max_X_energy->mant == 0) {
+        X_energy->exp = -1024;
+    }    
+    return;
+}
+
+void aec_priv_update_X_fifo_and_calc_sigmaXX(
+        bfp_complex_s32_t *X_fifo,
+        bfp_s32_t *sigma_XX,
+        float_s32_t *sum_X_energy,
+        const bfp_complex_s32_t *X,
+        unsigned num_phases,
+        uint32_t sigma_xx_shift)
+{
+    /* Note: Instead of maintaining a separate mapping array, I'll instead, shift around the X_fifo array at the end of update_X_fifo.
+    * This will only involve memcpys of size x_channels*num_phases*sizeof(bfp_complex_s32_t).
+    */
+    //X-fifo update
+    //rearrage X-fifo to point from newest phase to oldest phase
+    bfp_complex_s32_t last_phase = X_fifo[num_phases-1];
+    for(int n=num_phases-1; n>=1; n--) {
+        X_fifo[n] = X_fifo[n-1];
+    }
+    X_fifo[0] = last_phase;
+    //Update X as newest phase
+    memcpy(X_fifo[0].data, X->data, X->length*sizeof(complex_s32_t));
+    X_fifo[0].exp = X->exp;
+    X_fifo[0].hr = X->hr;
+    X_fifo[0].length = X->length;
+    
+    //update sigma_XX
+    int32_t DWORD_ALIGNED sigma_scratch_mem[AEC_PROC_FRAME_LENGTH/2 + 1];
+    bfp_s32_t scratch;
+    bfp_s32_init(&scratch, sigma_scratch_mem, 0, (AEC_PROC_FRAME_LENGTH/2)+1, 0);
+    bfp_complex_s32_squared_mag(&scratch, X);
+    float_s64_t sum = bfp_s32_sum(&scratch);
+    *sum_X_energy = float_s64_to_float_s32(sum);
+
+    //(pow(2, -ema_coef_shr))*X_energy
+    scratch.exp -= sigma_xx_shift;
+
+    //sigma_XX * (1 - pow(2, -ema_coef_shr)) = sigma_XX - (sigma_XX * pow(2, -ema_coef_shr))
+    bfp_s32_t sigma_XX_scaled = *sigma_XX;
+    sigma_XX_scaled.exp -= sigma_xx_shift;
+    bfp_s32_sub(sigma_XX, sigma_XX, &sigma_XX_scaled); //sigma_XX - (sigma_XX * pow(2, -ema_coef_shr))
+    bfp_s32_add(sigma_XX, sigma_XX, &scratch); //sigma_XX - (sigma_XX * pow(2, -ema_coef_shr)) + (pow(2, -ema_coef_shr))*X_energy
+
+    return;
+}
+
+void aec_priv_calc_Error_and_Y_hat(
+        bfp_complex_s32_t *Error,
+        bfp_complex_s32_t *Y_hat,
+        const bfp_complex_s32_t *Y,
+        const bfp_complex_s32_t *X_fifo,
+        const bfp_complex_s32_t *H_hat,
+        unsigned num_x_channels,
+        unsigned num_phases,
+        int32_t bypass_enabled)
+{
+    aec_l2_calc_Error_and_Y_hat(Error, Y_hat, Y, X_fifo, H_hat, num_x_channels, num_phases, 0, AEC_PROC_FRAME_LENGTH/2 + 1, bypass_enabled);
+}
+
+void aec_priv_calc_coherence(
+        coherence_mu_params_t *coh_mu_state,
+        const bfp_s32_t *y_subset,
+        const bfp_s32_t *y_hat_subset,
+        const aec_config_params_t *conf)
+{
+    const coherence_mu_config_params_t *coh_conf = &conf->coh_mu_conf;
+
+    float_s32_t sigma_yy = float_s64_to_float_s32(bfp_s32_dot(y_subset, y_subset));
+    float_s32_t sigma_yhatyhat = float_s64_to_float_s32(bfp_s32_dot(y_hat_subset, y_hat_subset));
+    float_s32_t sigma_yyhat = float_s64_to_float_s32(bfp_s32_dot(y_subset, y_hat_subset));
+
+    //# Calculate coherence between y and y_hat
+    //eps = 1e-100
+    //this_coh = np.abs(sigma_yyhat/(np.sqrt(sigma_yy)*np.sqrt(sigma_yhatyhat) + eps))
+    float_s32_t denom;
+    denom = float_s32_mul(sigma_yy, sigma_yhatyhat);
+    denom = float_s32_sqrt(denom);
+    denom = float_s32_add(denom, coh_conf->eps);
+    if(denom.mant == 0) {
+        denom = coh_conf->eps;
+    }
+
+    float_s32_t this_coh = float_s32_div(float_s32_abs(sigma_yyhat), denom);
+
+    //# moving average coherence
+    //self.coh = self.coh_alpha*self.coh + (1.0 - self.coh_alpha)*this_coh
+    float_s32_t one = double_to_float_s32(1.0); //TODO profile this call
+    float_s32_t one_minus_alpha = float_s32_sub(one, coh_conf->coh_alpha);
+    float_s32_t t1 = float_s32_mul(coh_conf->coh_alpha, coh_mu_state->coh);
+    float_s32_t t2 = float_s32_mul(one_minus_alpha, this_coh);
+    coh_mu_state->coh = float_s32_add(t1, t2);
+
+    //# update slow moving averages used for thresholding
+    //self.coh_slow = self.coh_slow_alpha*self.coh_slow + (1.0 - self.coh_slow_alpha)*self.coh
+    float_s32_t one_minus_slow_alpha = float_s32_sub(one, coh_conf->coh_slow_alpha);
+    t1 = float_s32_mul(coh_conf->coh_slow_alpha, coh_mu_state->coh_slow);
+    t2 = float_s32_mul(one_minus_slow_alpha, coh_mu_state->coh);
+    coh_mu_state->coh_slow = float_s32_add(t1, t2);
+}
+
+float_s32_t aec_priv_calc_corr_factor(bfp_s32_t *y, bfp_s32_t *yhat) {
+    // abs(sigma_yyhat)/(sigma_abs(y)abs(yhat))
+    int32_t DWORD_ALIGNED y_abs_mem[AEC_FRAME_ADVANCE]; 
+    int32_t DWORD_ALIGNED yhat_abs_mem[AEC_FRAME_ADVANCE]; 
+    bfp_s32_t y_abs, yhat_abs;
+
+    bfp_s32_init(&y_abs, &y_abs_mem[0], 0, y->length, 0);
+    bfp_s32_init(&yhat_abs, &yhat_abs_mem[0], 0, yhat->length, 0);
+
+    bfp_s32_abs(&y_abs, y);
+    bfp_s32_abs(&yhat_abs, yhat);
+
+    float_s32_t num, denom;
+    // sigma_yyhat
+    num = float_s64_to_float_s32(bfp_s32_dot(y, yhat));
+    // sigma_abs(y)abs(yhat)
+    denom = float_s64_to_float_s32(bfp_s32_dot(&y_abs, &yhat_abs));
+    
+    // abs(sigma_yyhat)/sigma_abs(y)abs(yhat)
+    if(denom.mant == 0) {
+        /** denom 0 implies sigma_abs(y)abs(yhat) is 0 which in turn means y or y_hat is 0. y 0 means no near end, y_hat
+         * 0 means no far end and for both these, we don't want AGC LC to apply extra attenuation so setting corr_factor
+         * to 0*/
+        return (float_s32_t){0, -31};
+    }
+    float_s32_t corr_factor = float_s32_div(float_s32_abs(num), denom);
+
+    return corr_factor;
+}
+
+// Hanning window structure used in the windowing operation done to remove discontinuities from the filter error
+static const int32_t WOLA_window[32] = {
+       4861986,   19403913,   43494088,   76914346,  119362028,  170452721,  229723740,  296638317,
+     370590464,  450910459,  536870911,  627693349,  722555272,  820597594,  920932429, 1022651130,
+    1124832516, 1226551217, 1326886052, 1424928374, 1519790297, 1610612735, 1696573187, 1776893182,
+    1850845329, 1917759906, 1977030925, 2028121618, 2070569300, 2103989558, 2128079733, 2142621660
+};
+
+static const int32_t WOLA_window_flpd[32] = {
+    2142621660, 2128079733, 2103989558, 2070569300, 2028121618, 1977030925, 1917759906, 1850845329, 
+    1776893182, 1696573187, 1610612735, 1519790297, 1424928374, 1326886052, 1226551217, 1124832516, 
+    1022651130, 920932429, 820597594, 722555272, 627693349, 536870911, 450910459, 370590464, 
+    296638317, 229723740, 170452721, 119362028, 76914346, 43494088, 19403913, 4861986, 
+};
+
+void aec_priv_create_output(
+        bfp_s32_t *output,
+        bfp_s32_t *overlap,
+        bfp_s32_t *error,
+        const aec_config_params_t *conf)
+{
+    bfp_s32_t win, win_flpd;
+    bfp_s32_init(&win, (int32_t*)&WOLA_window[0], -31, 32, 0);
+    bfp_s32_init(&win_flpd, (int32_t*)&WOLA_window_flpd[0], -31, 32, 0);
+
+    //zero first 240 samples
+    memset(error->data, 0, AEC_FRAME_ADVANCE*sizeof(int32_t));
+
+    bfp_s32_t chunks[2];
+    bfp_s32_init(&chunks[0], &error->data[240], error->exp, 32, 0); //240-272 fwd win
+    chunks[0].hr = error->hr;
+    bfp_s32_init(&chunks[1], &error->data[480], error->exp, 32, 0); //480-512 flpd win
+    chunks[1].hr = error->hr;
+
+    //window error
+    bfp_s32_mul(&chunks[0], &chunks[0], &win);
+    bfp_s32_mul(&chunks[1], &chunks[1], &win_flpd);
+    //Bring the windowed portions back to the format to the non-windowed region.
+    //Here, we're assuming that the window samples are less than 1 so that windowed region can be safely brought to format of non-windowed portion without risking saturation
+    bfp_s32_use_exponent(&chunks[0], error->exp);
+    bfp_s32_use_exponent(&chunks[1], error->exp);
+    int min_hr = (chunks[0].hr < chunks[1].hr) ? chunks[0].hr : chunks[1].hr;
+    min_hr = (min_hr < error->hr) ? min_hr : error->hr;
+    error->hr = min_hr;
+    
+    //copy error to output
+    if(output->data != NULL) {
+        memcpy(output->data, &error->data[AEC_FRAME_ADVANCE], AEC_FRAME_ADVANCE*sizeof(int32_t));
+        output->length = AEC_FRAME_ADVANCE;
+        output->exp = error->exp;
+        output->hr = error->hr;
+
+        //overlap add
+        //split output into 2 chunks. chunk[0] with first 32 samples of output. chunk[1] has rest of the 240-32 samples of output
+        bfp_s32_init(&chunks[0], &output->data[0], output->exp, 32, 0);
+        chunks[0].hr = output->hr;
+        bfp_s32_init(&chunks[1], &output->data[32], output->exp, 240-32, 0);
+        chunks[1].hr = output->hr;
+
+        //Add previous frame's overlap to first 32 samples of output
+        bfp_s32_add(&chunks[0], &chunks[0], overlap);
+        bfp_s32_use_exponent(&chunks[0], -31); //bring the overlapped-added part back to 1.31
+        bfp_s32_use_exponent(&chunks[1], -31); //bring the rest of output to 1.31
+        output->hr = (chunks[0].hr < chunks[1].hr) ? chunks[0].hr : chunks[1].hr;
+    }
+    
+    //update overlap
+    memcpy(overlap->data, &error->data[480], 32*sizeof(int32_t));
+    overlap->hr = error->hr;
+    overlap->exp = error->exp;
+    return;
+}
+
+void aec_priv_calc_inverse(
+        bfp_s32_t *input)
+{ 
+#if 1 //82204 cycles. 2 x-channels, single thread, but get rids of voice_toolbox dependency
+    bfp_s32_inverse(input, input);
+#else //36323 cycles. 2 x-channels, single thread
+    int32_t min_element = xs3_vect_s32_min(
+                                input->data,
+                                input->length);
+ 
+    // HR_S32() gets headroom of a single int32_t
+    //old aec would calculate shr as HR_S32(min_element) + 2. Since VPU deals with only signed numbers, increase shr by 1 to account for sign bit in the result of the inverse function.
+    int input_shr = HR_S32(min_element) + 2 + 1;
+    //vtb_inv_X_energy
+    input->exp = (-input->exp - 32); //TODO work out this mysterious calculation
+    input->exp -= (32 - input_shr);
+    vtb_inv_X_energy_asm((uint32_t *)input->data, input_shr, input->length);
+    input->hr = 0;
+#endif
+}
+
+void aec_priv_calc_inv_X_energy_denom(
+        bfp_s32_t *inv_X_energy_denom,
+        const bfp_s32_t *X_energy,
+        const bfp_s32_t *sigma_XX,
+        const aec_config_params_t *conf,
+        float_s32_t delta,
+        unsigned is_shadow) {
+    
+    int gamma_log2 = conf->aec_core_conf.gamma_log2;
+    if(!is_shadow) { //frequency smoothing
+        int32_t norm_denom_buf[AEC_PROC_FRAME_LENGTH/2 + 1];
+        bfp_s32_t norm_denom;
+        bfp_s32_init(&norm_denom, &norm_denom_buf[0], 0, AEC_PROC_FRAME_LENGTH/2+1, 0);
+
+        bfp_s32_t sigma_times_gamma;
+        bfp_s32_init(&sigma_times_gamma, sigma_XX->data, sigma_XX->exp+gamma_log2, sigma_XX->length, 0);
+        sigma_times_gamma.hr = sigma_XX->hr;
+        bfp_s32_add(&norm_denom, &sigma_times_gamma, X_energy);
+
+        //self.taps = [0.5, 1, 1, 1, 0.5] 
+        fixed_s32_t taps_q30[5] = {0x20000000, 0x40000000, 0x40000000, 0x40000000, 0x20000000};
+        for(int i=0; i<5; i++) {
+            taps_q30[i] = taps_q30[i] >> 2;//This is equivalent to a divide by 4
+        }
+
+        bfp_s32_convolve_same(inv_X_energy_denom, &norm_denom, &taps_q30[0], 5, PAD_MODE_REFLECT);
+
+        bfp_s32_add_scalar(inv_X_energy_denom, inv_X_energy_denom, delta);
+    }
+    else
+    {
+        bfp_s32_add_scalar(inv_X_energy_denom, X_energy, delta);
+    }
+
+    /**Fix for divide by 0 scenario 2 discussed in a comment in aec_priv_update_total_X_energy()
+     * We have 2 options.
+     * Option 1: Clamp the denom values between max:(denom_max mant, denom->exp exp) and min (1 mant, denom->exp exp).
+     * This will change all the (0, exp) bins to (1, exp) while leaving other bins unchanged. This could be done without
+     * checking if (bfp_s32_min(denom))->mant is 0, since if there are no zero bins, the bfp_s32_clamp() would change
+     * nothing in the denom vector.
+     * Option 2: Add a (1 mant, denom->exp) scalar to the denom vector. I'd do this after checking if
+     * (bfp_s32_min(denom))->mant is 0 to avoid adding an offset to the denom vector unnecessarily.
+     * Since this is not a recreatable scenario I'm not sure which option is better. Going with option 2 since it
+     * consumes fewer cycles.
+     */
+     //Option 1 (3220 cycles)
+     /*float_s32_t max = bfp_s32_max(inv_X_energy_denom);
+     bfp_s32_clip(inv_X_energy_denom, inv_X_energy_denom, 1, max.mant, inv_X_energy_denom->exp);*/
+
+     //Option 2 (1528 cycles for the bfp_s32_min() call. Haven't profiled when min.mant == 0 is true
+     float_s32_t min = bfp_s32_min(inv_X_energy_denom);
+     if(min.mant == 0) {
+         /** The presence of delta even when it's zero in bfp_s32_add_scalar(inv_X_energy_denom, X_energy, delta); above
+          * ensures that bfp_s32_max(inv_X_energy_denom) always has a headroom of 1, making sure that t is not right shifted as part
+          * of bfp_s32_add_scalar() making t.mant 0*/
+         float_s32_t t = {1, inv_X_energy_denom->exp};
+         bfp_s32_add_scalar(inv_X_energy_denom, inv_X_energy_denom, t);
+     }
+}
+
+void aec_priv_calc_inv_X_energy(
+        bfp_s32_t *inv_X_energy,
+        const bfp_s32_t *X_energy,
+        const bfp_s32_t *sigma_XX,
+        const aec_config_params_t *conf,
+        float_s32_t delta,
+        unsigned is_shadow)
+{
+    //Calculate denom for the inv_X_energy = 1/denom calculation. denom calculation is different for shadow and main filter
+    aec_priv_calc_inv_X_energy_denom(inv_X_energy, X_energy, sigma_XX, conf, delta, is_shadow);
+    aec_priv_calc_inverse(inv_X_energy);
+
+    return;
+}
+
+void aec_priv_filter_adapt(
+        bfp_complex_s32_t *H_hat,
+        const bfp_complex_s32_t *X_fifo,
+        const bfp_complex_s32_t *T,
+        unsigned num_x_channels,
+        unsigned num_phases)
+{
+    unsigned phases = num_x_channels * num_phases;
+    for(unsigned ph=0; ph<phases; ph++) {
+        //find out which channel this phase belongs to
+        aec_l2_adapt_plus_fft_gc(&H_hat[ph], &X_fifo[ph], &T[ph/num_phases]);
+    }
+}
+
+void aec_priv_compute_T(
+        bfp_complex_s32_t *T,
+        const bfp_complex_s32_t *Error,
+        const bfp_s32_t *inv_X_energy,
+        float_s32_t mu)
+{
+    //T[x_ch] = self.mu[y_ch, x_ch] * Inv_x_energy[x_ch] * Error[y_ch] / self.K
+
+    //The more optimal way to calculate T is temp = mu*inv_X_energy followed by T = Error*temp since
+    //this will require half the multiplies in mu*inv_X_energy stage, but this will require a temporary buffer
+    //of bfp_s32_t type, with length the same as inv_X_energy. So instead, I've done T = inv_X_energy * Error
+    //followed by T = T * mu
+    bfp_complex_s32_real_mul(T, Error, inv_X_energy);
+    bfp_complex_s32_real_scale(T, T, mu);
+
+    //bfp_complex_s32_real_scale(T, Error, mu);
+    //bfp_complex_s32_real_mul(T, T, inv_X_energy);
+}
+#define Q1_30(f) ((int32_t)((double)(INT_MAX>>1) * f)) //TODO use lib_xs3_math use_exponent instead
+void aec_priv_init_config_params(
+        aec_config_params_t *config_params)
+{
+    //TODO profile double_to_float_s32() calls
+    //aec_core_config_params_t
+    aec_core_config_params_t *core_conf = &config_params->aec_core_conf;
+    core_conf->sigma_xx_shift = 11;
+    core_conf->ema_alpha_q30 = Q1_30(0.98);
+    core_conf->gamma_log2 = 6;
+    core_conf->delta_adaption_force_on.mant = (unsigned)UINT_MAX >> 1;
+    core_conf->delta_adaption_force_on.exp = -32 - 6 + 1; //extra +1 to account for shr of 1 to the mant in order to store it as a signed number
+    core_conf->delta_min = double_to_float_s32((double)1e-20);
+    core_conf->bypass = 0;
+    core_conf->coeff_index = 0;
+
+    //shadow_filt_config_params_t
+    shadow_filt_config_params_t *shadow_cfg = &config_params->shadow_filt_conf;
+    shadow_cfg->shadow_sigma_thresh = double_to_float_s32(0.6); //# threshold for resetting sigma_xx
+    shadow_cfg->shadow_copy_thresh = double_to_float_s32(0.5); //# threshold for copying shadow filter
+    shadow_cfg->shadow_reset_thresh = double_to_float_s32(1.5);
+    shadow_cfg->shadow_delay_thresh = double_to_float_s32(0.5); //# will not reset if reference delay is large
+    shadow_cfg->x_energy_thresh = double_to_float_s32(pow(10, -40/10.0));
+    shadow_cfg->shadow_better_thresh = 5; //# how many times better before copying
+    shadow_cfg->shadow_zero_thresh = 5;//# zero shadow filter every n resets
+    shadow_cfg->shadow_reset_timer = 20; //# number of frames between zeroing resets
+    shadow_cfg->shadow_mu = double_to_float_s32(1.0);
+
+    //coherence_mu_config_params_t 
+    coherence_mu_config_params_t *coh_cfg = &config_params->coh_mu_conf;
+    coh_cfg->coh_alpha = double_to_float_s32(0.0);
+    coh_cfg->coh_slow_alpha = double_to_float_s32(0.99);
+    coh_cfg->coh_thresh_slow = double_to_float_s32(0.9);
+    coh_cfg->coh_thresh_abs = double_to_float_s32(0.65);
+    coh_cfg->mu_scalar = double_to_float_s32(1.0);
+    coh_cfg->eps = double_to_float_s32((double)1e-100);
+    coh_cfg->thresh_minus20dB = double_to_float_s32(pow(10, -20/10.0));
+    coh_cfg->x_energy_thresh = double_to_float_s32(pow(10, -40/10.0));
+    coh_cfg->mu_coh_time = 2;
+    coh_cfg->mu_shad_time = 5;
+
+    coh_cfg->adaption_config = AEC_ADAPTION_AUTO;
+    coh_cfg->force_adaption_mu_q30 = Q1_30(1.0);
+}
+
+void aec_priv_calc_delta(
+        float_s32_t *delta, 
+        const float_s32_t *max_X_energy,
+        aec_config_params_t *conf,
+        float_s32_t scale,
+        int channels) {
+    if(conf->coh_mu_conf.adaption_config == AEC_ADAPTION_AUTO) {
+        float_s32_t delta_min = conf->aec_core_conf.delta_min;
+        float_s32_t max = max_X_energy[0];
+        for(int i=1; i<channels; i++) {
+            max = float_s32_gt(max, max_X_energy[i]) ? max : max_X_energy[i];
+        }
+        max = float_s32_mul(max, scale);
+        *delta = float_s32_gt(max, delta_min) ? max : delta_min;
+    }
+    else {
+        *delta = conf->aec_core_conf.delta_adaption_force_on;
+    }
+}
diff --git a/modules/lib_agc/CMakeLists.txt b/modules/lib_agc/CMakeLists.txt
new file mode 100644
index 000000000..7ed1b8661
--- /dev/null
+++ b/modules/lib_agc/CMakeLists.txt
@@ -0,0 +1,8 @@
+add_library(lib_agc STATIC src/agc_impl.c)
+
+target_include_directories(lib_agc PUBLIC api)
+
+target_link_libraries(lib_agc lib_xs3_math)
+
+set_target_properties(lib_agc PROPERTIES PREFIX ""
+                                         SUFFIX ".a")
diff --git a/modules/lib_agc/api/agc_api.h b/modules/lib_agc/api/agc_api.h
new file mode 100644
index 000000000..af9e780ed
--- /dev/null
+++ b/modules/lib_agc/api/agc_api.h
@@ -0,0 +1,227 @@
+// Copyright 2021-2022 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#ifndef AGC_API_H
+#define AGC_API_H
+
+#include <xs3_math.h>
+#include <agc_profiles.h>
+
+/**
+ * @page page_agc_api_h agc_api.h
+ *
+ * This header should be included in application source code to gain access to the
+ * lib_agc public functions API.
+ */
+
+/**
+ * @defgroup agc_func   AGC API functions
+ * @defgroup agc_defs   AGC API structure definitions
+ */
+
+/**
+ * @brief Length of the frame of data on which the AGC will operate.
+ *
+ * @ingroup agc_defs
+ */
+#define AGC_FRAME_ADVANCE 240u
+
+/**
+ * @brief AGC configuration structure
+ *
+ * This structure contains configuration settings that can be changed to alter the
+ * behaviour of the AGC instance.
+ *
+ * Members with the "lc_" prefix are parameters for the Loss Control feature.
+ *
+ * @ingroup agc_defs
+ */
+typedef struct {
+    /** Boolean to enable AGC adaption; if enabled, the gain to apply will adapt based on the
+     *  peak of the input frame and the upper/lower threshold parameters. */
+    int adapt;
+    /** Boolean to enable adaption based on the VAD meta-data; if enabled, adaption will always
+     *  be performed when voice activity is detected. This must be disabled if the application
+     *  doesn't have a VAD. */
+    int adapt_on_vad;
+    /** Boolean to enable soft-clipping of the output frame. */
+    int soft_clipping;
+    /** The current gain to be applied, not including loss control. */
+    float_s32_t gain;
+    /** The maximum gain allowed when adaption is enabled. */
+    float_s32_t max_gain;
+    /** The minimum gain allowed when adaption is enabled. */
+    float_s32_t min_gain;
+    /** The upper limit for the gained peak of the frame when adaption is enabled. */
+    float_s32_t upper_threshold;
+    /** The lower limit for the gained peak of the frame when adaption is enabled. */
+    float_s32_t lower_threshold;
+    /** Factor by which to increase the gain during adaption. */
+    float_s32_t gain_inc;
+    /** Factor by which to decrease the gain during adaption. */
+    float_s32_t gain_dec;
+    /** Boolean to enable loss control. This must be disabled if the application doesn't have
+     *  an AEC. */
+    int lc_enabled;
+    /** Number of frames required to consider far-end audio active. */
+    int lc_n_frame_far;
+    /** Number of frames required to consider near-end audio active. */
+    int lc_n_frame_near;
+    /** Threshold for far-end correlation above which to indicate far-end activity only. */
+    float_s32_t lc_corr_threshold;
+    /** Gamma coefficient for estimating the power of the far-end background noise. */
+    float_s32_t lc_bg_power_gamma;
+    /** Factor by which to increase the loss control gain when less than target value. */
+    float_s32_t lc_gamma_inc;
+    /** Factor by which to decrease the loss control gain when greater than target value. */
+    float_s32_t lc_gamma_dec;
+    /** Delta multiplier used when only far-end activity is detected. */
+    float_s32_t lc_far_delta;
+    /** Delta multiplier used when only near-end activity is detected. */
+    float_s32_t lc_near_delta;
+    /** Delta multiplier used when both near-end and far-end activity is detected. */
+    float_s32_t lc_near_delta_far_active;
+    /** Loss control gain to apply when near-end activity only is detected. */
+    float_s32_t lc_gain_max;
+    /** Loss control gain to apply when double-talk is detected. */
+    float_s32_t lc_gain_double_talk;
+    /** Loss control gain to apply when silence is detected. */
+    float_s32_t lc_gain_silence;
+    /** Loss control gain to apply when far-end activity only is detected. */
+    float_s32_t lc_gain_min;
+} agc_config_t;
+
+/**
+ * @brief AGC state structure
+ *
+ * This structure holds the current state of the AGC instance and members are updated each
+ * time that `agc_process_frame()` runs. Many of these members are exponentially-weighted
+ * moving averages (EWMA) which influence the adaption of the AGC gain or the loss control
+ * feature. The user should not directly modify any of these members, except the config.
+ *
+ * @ingroup agc_defs
+ */
+typedef struct {
+    /** The current configuration of the AGC. Any member of this configuration structure can
+     * be modified and that change will take effect on the next run of `agc_process_frame()`. */
+    agc_config_t config;
+    /** EWMA of the frame peak, which is used to identify the overall trend of a rise or fall
+     * in the input signal. */
+    float_s32_t x_slow;
+    /** EWMA of the frame peak, which is used to identify a rise or fall in the peak of frame. */
+    float_s32_t x_fast;
+    /** EWMA of `x_fast`, which is used when adapting to the `agc_config_t::upper_threshold`. */
+    float_s32_t x_peak;
+    /** Timer counting down until enough frames with far-end activity have been processed. */
+    int lc_t_far;
+    /** Timer counting down until enough frames with near-end activity have been processed. */
+    int lc_t_near;
+    /** EWMA of estimates of the near-end power. */
+    float_s32_t lc_near_power_est;
+    /** EWMA of estimates of the far-end power. */
+    float_s32_t lc_far_power_est;
+    /** EWMA of estimates of the power of near-end background noise. */
+    float_s32_t lc_near_bg_power_est;
+    /** Loss control gain applied on top of the AGC gain in `agc_config_t`. */
+    float_s32_t lc_gain;
+    /** EWMA of estimates of the power of far-end background noise. */
+    float_s32_t lc_far_bg_power_est;
+    /** EWMA of the far-end correlation for detecting double-talk. */
+    float_s32_t lc_corr_val;
+} agc_state_t;
+
+/**
+ * @brief Initialise the AGC
+ *
+ * This function initialises the AGC state with the provided configuration. It must be called
+ * at startup to initialise the AGC before processing any frames, and can be called at any time
+ * after that to reset the AGC instance, returning the internal AGC state to its defaults.
+ *
+ * @param[out] agc       AGC state structure
+ * @param[in]  config    Initial configuration values
+ *
+ * @par Example with an unmodified profile
+ * @code{.c}
+ *      agc_state_t agc;
+        agc_init(&agc, &AGC_PROFILE_ASR);
+ * @endcode
+ *
+ * @par Example with modification to the profile
+ * @code{.c}
+ *      agc_config_t conf = AGC_PROFILE_FIXED_GAIN;
+        conf.gain = float_to_float_s32(100);
+        agc_state_t agc;
+        agc_init(&agc, &conf);
+ * @endcode
+ *
+ * @ingroup agc_func
+ */
+void agc_init(agc_state_t *agc, agc_config_t *config);
+
+/**
+ * @brief AGC meta data structure
+ *
+ * This structure holds meta-data about the current frame to be processed, and must be updated
+ * to reflect the current frame before calling `agc_process_frame()`.
+ *
+ * @ingroup agc_defs
+ */
+typedef struct {
+    /** Boolean to indicate the detection of voice activity in the current frame. */
+    int vad_flag;
+    /** The power of the most powerful reference channel. */
+    float_s32_t aec_ref_power;
+    /** Correlation factor between the microphone input and the AEC's estimated microphone
+     *  signal. */
+    float_s32_t aec_corr_factor;
+} agc_meta_data_t;
+
+/**
+ * If the application has no VAD, `adapt_on_vad` must be disabled in the configuration. This
+ * pre-processor definition can be assigned to the `vad_flag` in `agc_meta_data_t` in that
+ * situation to make it clear in the code that there is no VAD.
+ *
+ * @ingroup agc_defs
+ */
+#define AGC_META_DATA_NO_VAD 0u
+
+/**
+ * If the application has no AEC, `lc_enabled` must be disabled in the configuration. This
+ * pre-processor definition can be assigned to the `aec_ref_power` and `aec_corr_factor` in
+ * `agc_meta_data_t` in that situation to make it clear in the code that there is no AEC.
+ *
+ * @ingroup agc_defs
+ */
+#define AGC_META_DATA_NO_AEC (float_s32_t){0, 0}
+
+/**
+ * @brief Perform AGC processing on a frame of input data
+ *
+ * This function updates the AGC's internal state based on the input frame and meta-data, and
+ * returns an output containing the result of the AGC algorithm applied to the input.
+ *
+ * The `input` and `output` pointers can be equal to perform the processing in-place.
+ *
+ * @param[inout] agc      AGC state structure
+ * @param[out] output     Array to return the resulting frame of data
+ * @param[in] input       Array of frame data on which to perform the AGC
+ * @param[in] meta_data   Meta-data structure with VAD/AEC data
+ *
+ * @par Example
+ * @code{.c}
+ *      int32_t input[AGC_FRAME_ADVANCE];
+        int32_t output[AGC_FRAME_ADVANCE];
+        agc_meta_data md;
+        md.vad_flag = AGC_META_DATA_NO_VAD;
+        md.aec_ref_power = AGC_META_DATA_NO_AEC;
+        md.aec_corr_factor = AGC_META_DATA_NO_AEC;
+        agc_process_frame(&agc, output, input, &md);
+ * @endcode
+ *
+ * @ingroup agc_func
+ */
+void agc_process_frame(agc_state_t *agc,
+                       int32_t output[AGC_FRAME_ADVANCE],
+                       const int32_t input[AGC_FRAME_ADVANCE],
+                       agc_meta_data_t *meta_data);
+
+#endif
diff --git a/modules/lib_agc/api/agc_profiles.h b/modules/lib_agc/api/agc_profiles.h
new file mode 100644
index 000000000..b8b9d0181
--- /dev/null
+++ b/modules/lib_agc/api/agc_profiles.h
@@ -0,0 +1,118 @@
+// Copyright 2021-2022 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#ifndef AGC_PROFILES_H
+#define AGC_PROFILES_H
+
+#include <xs3_math.h>
+
+/**
+ * @page page_agc_profiles_h agc_profiles.h
+ *
+ * This header contains pre-defined profiles for AGC configurations.
+ * These profiles can be used to initialise the `agc_config_t` data
+ * for use with `agc_init()`.
+ *
+ * This header is automatically included by `agc_api.h`.
+ */
+
+/**
+ * @defgroup agc_profiles   Pre-defined AGC configuration profiles
+ */
+
+/**
+ * @brief AGC profile tuned for Automatic Speech Recognition (ASR).
+ *
+ * @ingroup agc_profiles
+ */
+#define AGC_PROFILE_ASR (agc_config_t){ \
+    .adapt = 1, \
+    .adapt_on_vad = 1, \
+    .soft_clipping = 1, \
+    .gain = float_to_float_s32(500), \
+    .max_gain = float_to_float_s32(1000), \
+    .min_gain = float_to_float_s32(0), \
+    .upper_threshold = float_to_float_s32(0.7079), \
+    .lower_threshold = float_to_float_s32(0.1905), \
+    .gain_inc = float_to_float_s32(1.197), \
+    .gain_dec = float_to_float_s32(0.87), \
+    .lc_enabled = 0, \
+    .lc_n_frame_far = 0, \
+    .lc_n_frame_near = 0, \
+    .lc_corr_threshold = float_to_float_s32(0), \
+    .lc_bg_power_gamma = float_to_float_s32(0), \
+    .lc_gamma_inc = float_to_float_s32(0), \
+    .lc_gamma_dec = float_to_float_s32(0), \
+    .lc_far_delta = float_to_float_s32(0), \
+    .lc_near_delta = float_to_float_s32(0), \
+    .lc_near_delta_far_active = float_to_float_s32(0), \
+    .lc_gain_max = float_to_float_s32(0), \
+    .lc_gain_double_talk = float_to_float_s32(0), \
+    .lc_gain_silence = float_to_float_s32(0), \
+    .lc_gain_min = float_to_float_s32(0), \
+    }
+
+/**
+ * @brief AGC profile tuned for communication with a human listener.
+ *
+ * @ingroup agc_profiles
+ */
+#define AGC_PROFILE_COMMS (agc_config_t){ \
+    .adapt = 1, \
+    .adapt_on_vad = 1, \
+    .soft_clipping = 1, \
+    .gain = float_to_float_s32(500), \
+    .max_gain = float_to_float_s32(1000), \
+    .min_gain = float_to_float_s32(0), \
+    .upper_threshold = float_to_float_s32(0.4), \
+    .lower_threshold = float_to_float_s32(0.4), \
+    .gain_inc = float_to_float_s32(1.0034), \
+    .gain_dec = float_to_float_s32(0.98804), \
+    .lc_enabled = 0, \
+    .lc_n_frame_far = 17, \
+    .lc_n_frame_near = 34, \
+    .lc_corr_threshold = float_to_float_s32(0.993), \
+    .lc_bg_power_gamma = float_to_float_s32(1.002), \
+    .lc_gamma_inc = float_to_float_s32(1.005), \
+    .lc_gamma_dec = float_to_float_s32(0.995), \
+    .lc_far_delta = float_to_float_s32(300), \
+    .lc_near_delta = float_to_float_s32(50), \
+    .lc_near_delta_far_active = float_to_float_s32(100), \
+    .lc_gain_max = float_to_float_s32(1), \
+    .lc_gain_double_talk = float_to_float_s32(0.9), \
+    .lc_gain_silence = float_to_float_s32(0.1), \
+    .lc_gain_min = float_to_float_s32(0.022387), \
+    }
+
+/**
+ * @brief AGC profile tuned to apply a fixed gain.
+ *
+ * @ingroup agc_profiles
+ */
+#define AGC_PROFILE_FIXED_GAIN (agc_config_t){ \
+    .adapt = 0, \
+    .adapt_on_vad = 0, \
+    .soft_clipping = 0, \
+    .gain = float_to_float_s32(25), \
+    .max_gain = float_to_float_s32(0), \
+    .min_gain = float_to_float_s32(0), \
+    .upper_threshold = float_to_float_s32(0), \
+    .lower_threshold = float_to_float_s32(0), \
+    .gain_inc = float_to_float_s32(0), \
+    .gain_dec = float_to_float_s32(0), \
+    .lc_enabled = 0, \
+    .lc_n_frame_far = 0, \
+    .lc_n_frame_near = 0, \
+    .lc_corr_threshold = float_to_float_s32(0), \
+    .lc_bg_power_gamma = float_to_float_s32(0), \
+    .lc_gamma_inc = float_to_float_s32(0), \
+    .lc_gamma_dec = float_to_float_s32(0), \
+    .lc_far_delta = float_to_float_s32(0), \
+    .lc_near_delta = float_to_float_s32(0), \
+    .lc_near_delta_far_active = float_to_float_s32(0), \
+    .lc_gain_max = float_to_float_s32(0), \
+    .lc_gain_double_talk = float_to_float_s32(0), \
+    .lc_gain_silence = float_to_float_s32(0), \
+    .lc_gain_min = float_to_float_s32(0), \
+    }
+
+#endif
diff --git a/modules/lib_agc/doc/index.rst b/modules/lib_agc/doc/index.rst
new file mode 100644
index 000000000..60997824b
--- /dev/null
+++ b/modules/lib_agc/doc/index.rst
@@ -0,0 +1,35 @@
+Automatic Gain Control Library
+==============================
+
+Introduction
+************
+
+``lib_agc`` is a library of functions for performing Automatic Gain Control on input data. It can dynamically
+adapt to maintain a specified output level for the voice content.
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Contents:
+
+   src/getting_started
+   src/overview
+   src/reference/index
+
+
+On GitHub
+---------
+
+``lib_agc`` is present as part of ``sw_avona``. Get the latest version of ``sw_avona`` from
+``https://github.com/xmos/sw_avona``. ``lib_agc`` is present within the `modules/lib_agc` directory in ``sw_avona``.
+
+API
+---
+
+To use the functions in this library in an application, include :ref:`agc_api_h` in the application source file.
+
+
+Indices and tables
+******************
+
+* :ref:`genindex`
+* :ref:`search`
diff --git a/modules/lib_agc/doc/src/getting_started.rst b/modules/lib_agc/doc/src/getting_started.rst
new file mode 100644
index 000000000..b2e31e6cf
--- /dev/null
+++ b/modules/lib_agc/doc/src/getting_started.rst
@@ -0,0 +1,37 @@
+Getting Started
+===============
+
+Overview
+--------
+
+``lib_agc`` is a library which performs Automatic Gain Control (AGC), with support for Loss Control.
+For more details, refer to :ref:`agc_overview`.
+
+
+Repository Structure
+--------------------
+
+* ``modules/lib_agc`` - The actual ``lib_agc`` library directory within ``https://github.com/xmos/sw_avona/``.
+  Within ``lib_agc``
+
+  * ``api/`` - Headers containing the public API for ``lib_agc``.
+  * ``doc/`` - Library documentation source (for non-embedded documentation) and build directory.
+  * ``src/`` - Library source code.
+
+
+Requirements
+------------
+
+``lib_agc`` is included as part of the ``sw_avona`` github repository and all requirements for cloning
+and building ``sw_avona`` apply. ``lib_agc`` is compiled as a static library as part of the overall
+``sw_avona`` build. It depends on `lib_xs3_math <https://github.com/xmos/lib_xs3_math/>`_.
+
+
+Getting and Building
+--------------------
+
+This module is part of the parent ``sw_avona`` repo clone. It is compiled as a static library as part of
+``sw_avona`` compilation process.
+
+To include ``lib_agc`` in an application as a static library, the generated ``lib_agc.a`` can then be linked
+into the application. Add ``lib_agc/api`` to the include directories when building the application.
diff --git a/modules/lib_agc/doc/src/overview.rst b/modules/lib_agc/doc/src/overview.rst
new file mode 100644
index 000000000..9ba0b8e1e
--- /dev/null
+++ b/modules/lib_agc/doc/src/overview.rst
@@ -0,0 +1,38 @@
+.. _agc_overview:
+
+AGC Overview
+~~~~~~~~~~~~
+
+The ``lib_agc`` library provides an API to implement Automatic Gain Control within
+an application. The goal of the AGC algorithm is to provide consistent output
+levels for voice audio.
+
+The gain control can adapt to maintain the amplitude of the peak of the frame
+within an upper and lower bound configured for the AGC instance. When used in an
+application with a Voice Activity Detector (VAD), the AGC will adapt only when
+voice activity is detected, so that speech in the input signal is amplified
+above other sounds.
+
+The AGC also has a Loss Control feature which can be used when the application
+has an Acoustic Echo Canceller (AEC). This feature uses data from the AEC to
+adjust the gain applied to reduce residual echoes by attenuating the audio when
+near-end speech is not present.
+
+The AGC takes as input a frame of data from an audio channel. This could be the
+microphone input or the output of another module in the application.
+
+Gain control is performed on a frame-by-frame basis. Each frame consists of 15ms
+of data, which is 240 samples at 16kHz input sampling frequency. Input data is
+expected to be in a fixed-point 32-bit 1.31 format.
+
+Before processing any frames, the application must configure and initialise the
+AGC instance by calling ``agc_init()``. Then for each frame,
+``agc_process_frame()`` will update the AGC instance's internal state and produce
+the output frame by applying the AGC algorithm to the input frame.
+
+The gain values in this module for AGC gain and Loss Control gain are
+multiplicative factors that are applied to scale the input frame. Therefore, a
+fixed gain value of 1.0 (without loss control) will create no change to the input.
+
+If multiple channels need to be processed by the application, or multiple outputs
+are required, an independent instance of the AGC must be run for each channel.
diff --git a/modules/lib_agc/doc/src/reference/api.rst b/modules/lib_agc/doc/src/reference/api.rst
new file mode 100644
index 000000000..2951c05d1
--- /dev/null
+++ b/modules/lib_agc/doc/src/reference/api.rst
@@ -0,0 +1,7 @@
+.. _agc_func:
+
+AGC API Functions
+=================
+
+.. doxygengroup:: agc_func
+    :content-only:
diff --git a/modules/lib_agc/doc/src/reference/defines.rst b/modules/lib_agc/doc/src/reference/defines.rst
new file mode 100644
index 000000000..908d6bf13
--- /dev/null
+++ b/modules/lib_agc/doc/src/reference/defines.rst
@@ -0,0 +1,8 @@
+.. _agc_defines:
+
+AGC API Structure Definitions
+=============================
+
+.. doxygengroup:: agc_defs
+    :members:
+    :content-only:
diff --git a/modules/lib_agc/doc/src/reference/header_files.rst b/modules/lib_agc/doc/src/reference/header_files.rst
new file mode 100644
index 000000000..96221204d
--- /dev/null
+++ b/modules/lib_agc/doc/src/reference/header_files.rst
@@ -0,0 +1,18 @@
+AGC Header Files
+================
+
+.. _agc_api_h:
+
+`agc_api.h`
+-----------
+
+.. doxygenpage:: page_agc_api_h
+  :content-only:
+
+.. _agc_profiles_h:
+
+`agc_profiles.h`
+----------------
+
+.. doxygenpage:: page_agc_profiles_h
+  :content-only:
diff --git a/modules/lib_agc/doc/src/reference/index.rst b/modules/lib_agc/doc/src/reference/index.rst
new file mode 100644
index 000000000..7aa3b7459
--- /dev/null
+++ b/modules/lib_agc/doc/src/reference/index.rst
@@ -0,0 +1,11 @@
+#############
+API Reference
+#############
+
+.. toctree::
+    :maxdepth: 1
+
+    api
+    profiles
+    defines
+    header_files
diff --git a/modules/lib_agc/doc/src/reference/profiles.rst b/modules/lib_agc/doc/src/reference/profiles.rst
new file mode 100644
index 000000000..848bbecc5
--- /dev/null
+++ b/modules/lib_agc/doc/src/reference/profiles.rst
@@ -0,0 +1,7 @@
+.. _agc_profiles:
+
+AGC Pre-Defined Profiles
+========================
+
+.. doxygengroup:: agc_profiles
+    :content-only:
diff --git a/modules/lib_agc/src/agc_defines.h b/modules/lib_agc/src/agc_defines.h
new file mode 100644
index 000000000..d42cc1577
--- /dev/null
+++ b/modules/lib_agc/src/agc_defines.h
@@ -0,0 +1,34 @@
+// Copyright 2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#ifndef AGC_DEFINES_H
+#define AGC_DEFINES_H
+
+#include <xs3_math.h>
+
+// The input and output frame data format is Q1.31
+#define FRAME_EXP -31
+
+// Pre-calculated values to avoid the cycles of float_to_float_s32()
+#define FLOAT_S32_ZERO (float_s32_t){0, -31}
+#define FLOAT_S32_ONE (float_s32_t){1073741824, -30}
+
+// Alphas for EMA calculations are in Q30 format for float_s32_ema()
+#define AGC_ALPHA_SLOW_RISE 952301632  // 0.8869
+#define AGC_ALPHA_SLOW_FALL 1035731392  // 0.9646
+#define AGC_ALPHA_FAST_RISE 409525120  // 0.3814
+#define AGC_ALPHA_FAST_FALL 952301632  // 0.8869
+#define AGC_ALPHA_PEAK_RISE 588410496  // 0.5480
+#define AGC_ALPHA_PEAK_FALL 1035731392  // 0.9646
+#define AGC_ALPHA_LC_EST_INC 588410496  // 0.5480
+#define AGC_ALPHA_LC_EST_DEC 748720192  // 0.6973
+#define AGC_ALPHA_LC_BG_POWER_EST_DEC 588410496  // 0.5480
+#define AGC_ALPHA_LC_CORR 1052267008  // 0.9800
+
+// Minimum value for the estimated far background power
+#define AGC_LC_FAR_BG_POWER_EST_MIN (float_s32_t){1407374848, -47}  //0.00001
+
+// Pre-calculated values for soft-clipping constants
+#define AGC_SOFT_CLIPPING_THRESH (float_s32_t){1073741824, -31}  // 0.5
+#define AGC_SOFT_CLIPPING_NUMERATOR (float_s32_t){1073741824, -32}  // 0.25; AGC_SOFT_CLIPPING_THRESH squared
+
+#endif
diff --git a/modules/lib_agc/src/agc_impl.c b/modules/lib_agc/src/agc_impl.c
new file mode 100644
index 000000000..3cbe874b3
--- /dev/null
+++ b/modules/lib_agc/src/agc_impl.c
@@ -0,0 +1,263 @@
+// Copyright 2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <limits.h>
+
+#include <bfp_math.h>
+#include <agc_api.h>
+#include "agc_defines.h"
+
+void agc_init(agc_state_t *agc, agc_config_t *config)
+{
+    agc->config = *config;
+
+    agc->x_slow = float_to_float_s32(0);
+    agc->x_fast = float_to_float_s32(0);
+    agc->x_peak = float_to_float_s32(0);
+
+    agc->lc_t_far = 0;
+    agc->lc_t_near = 0;
+
+    agc->lc_near_power_est = float_to_float_s32(0.00001);
+    agc->lc_far_power_est = float_to_float_s32(0.01);
+    agc->lc_near_bg_power_est = float_to_float_s32(0.01);
+    agc->lc_gain = float_to_float_s32(1);
+    agc->lc_far_bg_power_est = float_to_float_s32(0.01);
+    agc->lc_corr_val = float_to_float_s32(0);
+}
+
+// Returns the mantissa for the input float shifted to an exponent of parameter exp
+static int32_t use_exp_float(float_s32_t fl, exponent_t exp)
+{
+    exponent_t exp_diff = fl.exp - exp;
+
+    if (exp_diff > 0) {
+        return fl.mant << exp_diff;
+    } else if (exp_diff < 0) {
+        return fl.mant >> -exp_diff;
+    }
+
+    return fl.mant;
+}
+
+// Returns the soft-clipped mantissa in terms of the original exponent
+static int32_t apply_soft_clipping(int32_t mant, exponent_t exp)
+{
+    float_s32_t sample = {mant, exp};
+    float_s32_t sample_abs = float_s32_abs(sample);
+
+    if (float_s32_gt(AGC_SOFT_CLIPPING_THRESH, sample_abs)) {
+        return mant;
+    }
+
+    // Division by zero is not possible after the absolute value test against AGC_LC_LIMIT_POINT
+    float_s32_t sample_limit = float_s32_div(AGC_SOFT_CLIPPING_NUMERATOR, sample_abs);
+    sample_limit = float_s32_sub(FLOAT_S32_ONE, sample_limit);
+
+    if (float_s32_gt(FLOAT_S32_ZERO, sample)) {
+        sample_limit = float_s32_sub(FLOAT_S32_ZERO, sample_limit);
+    }
+
+    return use_exp_float(sample_limit, exp);
+}
+
+void agc_process_frame(agc_state_t *agc,
+                       int32_t output[AGC_FRAME_ADVANCE],
+                       const int32_t input[AGC_FRAME_ADVANCE],
+                       agc_meta_data_t *meta_data)
+{
+    int vad_flag;
+
+    if (agc->config.adapt_on_vad == 0) {
+        vad_flag = 1;
+    } else {
+        vad_flag = meta_data->vad_flag;
+    }
+
+    bfp_s32_t input_bfp;
+    bfp_s32_init(&input_bfp, (int32_t *)input, FRAME_EXP, AGC_FRAME_ADVANCE, 1);
+
+    bfp_s32_t output_bfp;
+    bfp_s32_init(&output_bfp, (int32_t *)output, FRAME_EXP, AGC_FRAME_ADVANCE, 0);
+
+    if (agc->config.adapt) {
+        // Get max absolute sample value by comparing the absolute values of the min and max.
+        // An alternative approach is to form a new vector with the absolute values and then find
+        // the max value, which took 48 fewer cycles but required an extra 760 bytes of memory.
+        float_s32_t max_sample = float_s32_abs(bfp_s32_max(&input_bfp));
+        float_s32_t min_sample = float_s32_abs(bfp_s32_min(&input_bfp));
+        float_s32_t max_abs_value;
+
+        if (float_s32_gte(max_sample, min_sample)) {
+            max_abs_value = max_sample;
+        } else {
+            max_abs_value = min_sample;
+        }
+
+        unsigned rising = float_s32_gte(max_abs_value, agc->x_slow);
+        if (rising) {
+            agc->x_slow = float_s32_ema(agc->x_slow, max_abs_value, AGC_ALPHA_SLOW_RISE);
+            agc->x_fast = float_s32_ema(agc->x_fast, max_abs_value, AGC_ALPHA_FAST_RISE);
+        } else {
+            agc->x_slow = float_s32_ema(agc->x_slow, max_abs_value, AGC_ALPHA_SLOW_FALL);
+            agc->x_fast = float_s32_ema(agc->x_fast, max_abs_value, AGC_ALPHA_FAST_FALL);
+        }
+
+        float_s32_t gained_max_abs_value = float_s32_mul(max_abs_value, agc->config.gain);
+        unsigned exceed_threshold = float_s32_gte(gained_max_abs_value, agc->config.upper_threshold);
+
+        if (exceed_threshold || vad_flag) {
+            unsigned peak_rising = float_s32_gte(agc->x_fast, agc->x_peak);
+            if (peak_rising) {
+                agc->x_peak = float_s32_ema(agc->x_peak, agc->x_fast, AGC_ALPHA_PEAK_RISE);
+            } else {
+                agc->x_peak = float_s32_ema(agc->x_peak, agc->x_fast, AGC_ALPHA_PEAK_FALL);
+            }
+
+            float_s32_t gained_pk = float_s32_mul(agc->x_peak, agc->config.gain);
+            unsigned near_only = (agc->lc_t_near != 0) && (agc->lc_t_far == 0);
+            if (float_s32_gte(gained_pk, agc->config.upper_threshold)) {
+                agc->config.gain = float_s32_mul(agc->config.gain_dec, agc->config.gain);
+            } else if (float_s32_gte(agc->config.lower_threshold, gained_pk) &&
+                       (agc->config.lc_enabled == 0 || near_only != 0)) {
+                agc->config.gain = float_s32_mul(agc->config.gain_inc, agc->config.gain);
+            }
+
+            if (float_s32_gte(agc->config.gain, agc->config.max_gain)) {
+                agc->config.gain = agc->config.max_gain;
+            }
+
+            if (float_s32_gte(agc->config.min_gain, agc->config.gain)) {
+                agc->config.gain = agc->config.min_gain;
+            }
+        }
+    }
+
+    float_s32_t frame_power = float_s64_to_float_s32(bfp_s32_energy(&input_bfp));
+    bfp_s32_scale(&output_bfp, &input_bfp, agc->config.gain);
+
+    // Update loss control state
+
+    if (float_s32_gte(agc->lc_far_power_est, meta_data->aec_ref_power)) {
+        agc->lc_far_power_est = float_s32_ema(agc->lc_far_power_est, meta_data->aec_ref_power, AGC_ALPHA_LC_EST_DEC);
+    } else {
+        agc->lc_far_power_est = float_s32_ema(agc->lc_far_power_est, meta_data->aec_ref_power, AGC_ALPHA_LC_EST_INC);
+    }
+
+    float_s32_t far_bg_power_est = float_s32_mul(agc->config.lc_bg_power_gamma, agc->lc_far_bg_power_est);
+    if (float_s32_gte(far_bg_power_est, agc->lc_far_power_est)) {
+        agc->lc_far_bg_power_est = agc->lc_far_power_est;
+    } else {
+        agc->lc_far_bg_power_est = far_bg_power_est;
+    }
+
+    if (float_s32_gte(AGC_LC_FAR_BG_POWER_EST_MIN, agc->lc_far_bg_power_est)) {
+        agc->lc_far_bg_power_est = AGC_LC_FAR_BG_POWER_EST_MIN;
+    }
+
+    if (float_s32_gte(agc->lc_near_power_est, frame_power)) {
+        agc->lc_near_power_est = float_s32_ema(agc->lc_near_power_est, frame_power, AGC_ALPHA_LC_EST_DEC);
+    } else {
+        agc->lc_near_power_est = float_s32_ema(agc->lc_near_power_est, frame_power, AGC_ALPHA_LC_EST_INC);
+    }
+
+    if (float_s32_gt(agc->lc_near_bg_power_est, agc->lc_near_power_est)) {
+        agc->lc_near_bg_power_est = float_s32_ema(agc->lc_near_bg_power_est, agc->lc_near_power_est, AGC_ALPHA_LC_BG_POWER_EST_DEC);
+    } else {
+        agc->lc_near_bg_power_est = float_s32_mul(agc->config.lc_bg_power_gamma, agc->lc_near_bg_power_est);
+    }
+
+    if (agc->config.lc_enabled) {
+        if (float_s32_gt(meta_data->aec_corr_factor, agc->lc_corr_val)) {
+            agc->lc_corr_val = meta_data->aec_corr_factor;
+        } else {
+            agc->lc_corr_val = float_s32_ema(agc->lc_corr_val, meta_data->aec_corr_factor, AGC_ALPHA_LC_CORR);
+        }
+
+        if (float_s32_gt(agc->lc_far_power_est, float_s32_mul(agc->config.lc_far_delta, agc->lc_far_bg_power_est))) {
+            agc->lc_t_far = agc->config.lc_n_frame_far;
+        } else {
+            if (agc->lc_t_far > 0) {
+                --agc->lc_t_far;
+            }
+        }
+
+        float_s32_t delta = (agc->lc_t_far > 0) ? agc->config.lc_near_delta_far_active : agc->config.lc_near_delta;
+
+        if (float_s32_gt(agc->lc_near_power_est, float_s32_mul(delta, agc->lc_near_bg_power_est))) {
+            if (agc->lc_t_far == 0 || (agc->lc_t_far > 0 &&
+                                       float_s32_gt(agc->config.lc_corr_threshold, agc->lc_corr_val))) {
+                // Near-end speech only or double talk
+                agc->lc_t_near = agc->config.lc_n_frame_near;
+            } else {
+                // Far-end speech only
+                // Do nothing
+            }
+        } else {
+            // Silence
+            if (agc->lc_t_near > 0) {
+                --agc->lc_t_near;
+            }
+        }
+
+        // Adapt loss control gain
+        float_s32_t lc_target_gain;
+        if (agc->lc_t_far <= 0 && agc->lc_t_near > 0) {
+            // Near-end only
+            lc_target_gain = agc->config.lc_gain_max;
+        } else if (agc->lc_t_far <= 0 && agc->lc_t_near <= 0) {
+            // Silence
+            lc_target_gain = agc->config.lc_gain_silence;
+        } else if (agc->lc_t_far > 0 && agc->lc_t_near <= 0) {
+            // Far-end only
+            lc_target_gain = agc->config.lc_gain_min;
+        } else {
+            // Double talk
+            lc_target_gain = agc->config.lc_gain_double_talk;
+        }
+
+        // When changing from one value of lc_target_gain to a different one, the change
+        // is applied gradually, sample-by-sample in the frame, using lc_gamma_inc/dec.
+        // The lc_scale array is initially set to the target value and then overwritten
+        // from the beginning as required to transition from the previous lc_gain value.
+        // This will create a BFP array representing the gradual scale changes which
+        // can be applied by multiplying element-wise using the VPU.
+        int32_t lc_scale[AGC_FRAME_ADVANCE];
+        bfp_s32_t lc_scale_bfp;
+        bfp_s32_init(&lc_scale_bfp, lc_scale, lc_target_gain.exp, AGC_FRAME_ADVANCE, 0);
+        bfp_s32_set(&lc_scale_bfp, lc_target_gain.mant, lc_target_gain.exp);
+        // Add some headroom to avoid changing the exponent when gradually transitioning from
+        // previous lc_gain to lc_target_gain. Anyway, 32 bits of precision is unnecessary.
+        bfp_s32_shl(&lc_scale_bfp, &lc_scale_bfp, -8);
+
+        for (unsigned idx = 0; idx < AGC_FRAME_ADVANCE; ++idx) {
+            if (float_s32_gt(agc->lc_gain, lc_target_gain)) {
+                agc->lc_gain = float_s32_mul(agc->lc_gain, agc->config.lc_gamma_dec);
+                if (float_s32_gt(lc_target_gain, agc->lc_gain)) {
+                    agc->lc_gain = lc_target_gain;
+                }
+                lc_scale[idx] = use_exp_float(agc->lc_gain, lc_target_gain.exp);
+            } else if (float_s32_gt(lc_target_gain, agc->lc_gain)) {
+                agc->lc_gain = float_s32_mul(agc->lc_gain, agc->config.lc_gamma_inc);
+                if (float_s32_gt(agc->lc_gain, lc_target_gain)) {
+                    agc->lc_gain = lc_target_gain;
+                }
+                lc_scale[idx] = use_exp_float(agc->lc_gain, lc_target_gain.exp);
+            } else {
+                break;
+            }
+        }
+
+        bfp_s32_mul(&output_bfp, &output_bfp, &lc_scale_bfp);
+    }
+
+    if (agc->config.soft_clipping) {
+        for (unsigned idx = 0; idx < AGC_FRAME_ADVANCE; ++idx) {
+            output[idx] = apply_soft_clipping(output[idx], output_bfp.exp);
+        }
+    }
+
+    // Clip to avoid over/underflow when changing to the output frame exponent
+    bfp_s32_clip(&output_bfp, &output_bfp, INT_MIN, INT_MAX, FRAME_EXP);
+
+    bfp_s32_use_exponent(&output_bfp, FRAME_EXP);
+}
diff --git a/modules/todo b/modules/todo
deleted file mode 100644
index e69de29bb..000000000
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 000000000..c93369bea
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,42 @@
+# python_version 3.7.6
+#
+# The parse_version_from_requirements() function in the installPipfile.groovy
+# file of the Jenkins Shared Library uses the python_version comment to set
+# the version of python used.
+
+# Distributed (released) dependencies
+#
+# The python modules listed below specify a known working combination required
+# by the python code in this repository.  The procedure used to set up a
+# suitable python environment for it installs the version of each module in
+# the list.  Using a specific version ensures a controlled infrastructure for
+# development, testing and release of this repository.
+#
+# Another repository might depend on python code defined in this one.  The
+# procedure to set up a suitable python environment for that repository may
+# pip-install this one as editable using this repository's setup.py file.  The
+# same modules should appear in the setup.py list as given below.
+matplotlib==3.3.1
+
+# Pin numpy to 1.18.5 due to tensorflow v2.1.1 hard pinning it to that version.
+numpy==1.18.5
+pytest==6.0.0
+pytest-xdist==1.34.0
+
+# Pin scipy to 1.4.1 due to tensorflow v2.1.1 hard pinning it to that version.
+scipy==1.4.1
+soundfile==0.10.3.post1
+
+# Development dependencies
+#
+# Each link listed below specifies the path to a setup.py file which are
+# installed in editable mode with '-e $PATH' (without the quotes).
+#
+# If python code in this repository depends on python code under development
+# in another repository, then an entry for that other respository should
+# appear in this list instead of the released dependencies list.
+#
+# If this repository uses the setup functionality (e.g., script entry points)
+# of its own setup.py file, then this list must include an entry for that
+# setup.py file, e.g., '-e .' or '-e ./python' (without the quotes).
+-e ./../audio_test_tools/python
diff --git a/settings.json b/settings.json
index 4a960e733..be6e43997 100644
--- a/settings.json
+++ b/settings.json
@@ -1,5 +1,5 @@
 {
     "title": "Avona Voice Reference Design",
     "project": "Avona",
-    "version": "0.11.0"
-}
\ No newline at end of file
+    "version": "0.1.0"
+}
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
new file mode 100644
index 000000000..25129b16c
--- /dev/null
+++ b/test/CMakeLists.txt
@@ -0,0 +1,10 @@
+#set(DEPS_ROOT ${CMAKE_BINARY_DIR}/deps)  ## For fetching dependencies using cmake
+set(DEPS_ROOT ${CMAKE_SOURCE_DIR}/..)
+set( SHARED_SRC_PATH ${CMAKE_SOURCE_DIR}/examples/bare-metal/shared_src )
+set( XSCOPE_FILEIO_PATH ${SHARED_SRC_PATH}/xscope_fileio/xscope_fileio )
+
+add_subdirectory( shared )
+
+add_subdirectory( lib_aec )
+
+add_subdirectory( lib_agc )
diff --git a/test/etc/config.xscope b/test/etc/config.xscope
new file mode 100644
index 000000000..fc920544a
--- /dev/null
+++ b/test/etc/config.xscope
@@ -0,0 +1,44 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!-- ======================================================= -->
+<!-- The 'ioMode' attribute on the xSCOPEconfig              -->
+<!-- element can take the following values:                  -->
+<!--   "none", "basic", "timed"                              -->
+<!--                                                         -->
+<!-- The 'type' attribute on Probe                           -->
+<!-- elements can take the following values:                 -->
+<!--   "STARTSTOP", "CONTINUOUS", "DISCRETE", "STATEMACHINE" -->
+<!--                                                         -->
+<!-- The 'datatype' attribute on Probe                       -->
+<!-- elements can take the following values:                 -->
+<!--   "NONE", "UINT", "INT", "FLOAT"                        -->
+<!-- ======================================================= -->
+
+<xSCOPEconfig ioMode="basic" enabled="true">
+
+    <!-- For example: -->
+    <!-- <Probe name="Probe Name" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/> -->
+    <!-- From the target code, call: xscope_int(PROBE_NAME, value); -->
+    
+    <!--<Probe name="out_buffer_level"       type="CONTINUOUS" datatype="INT" units="Value" enabled="true"/>  -->
+    <!--<Probe name="GC_GAIN" type="CONTINUOUS" datatype="INT" units="Value" enabled="true"/> -->   
+    <!-- <Probe name="out_buffer_level"       type="CONTINUOUS" datatype="INT" units="Value" enabled="true"/>  -->
+    <!-- <Probe name="samples_out"            type="CONTINUOUS" datatype="INT" units="Value" enabled="true"/>  -->
+    <!-- <Probe name="peak_association_time"  type="CONTINUOUS" datatype="INT" units="Value" enabled="true"/>  -->
+    <!-- <Probe name="start_bin"         type="CONTINUOUS" datatype="INT" units="Value" enabled="true"/>  -->
+    <!-- <Probe name="resort_time"       type="CONTINUOUS" datatype="INT" units="Value" enabled="true"/>  -->
+    <!-- <Probe name="peak_count"        type="CONTINUOUS" datatype="INT" units="Value" enabled="true"/>  -->
+    <!-- <Probe name="samples_out"       type="CONTINUOUS" datatype="INT" units="Value" enabled="true"/>  -->
+    <!-- <Probe name="frame_recv_time"    type="CONTINUOUS" datatype="INT" units="Value" enabled="true"/>  -->
+    <!-- <Probe name="frame_send_time"    type="CONTINUOUS" datatype="INT" units="Value" enabled="true"/>  -->
+    <!-- <Probe name="xcspe"       type="CONTINUOUS" datatype="INT" units="Value" enabled="false"/>  -->
+    <!-- <Probe name="gain"       type="CONTINUOUS" datatype="INT" units="Value" enabled="false"/>  -->
+    <!-- <Probe name="fit_count"    type="CONTINUOUS" datatype="INT" units="Value" enabled="true"/>  -->
+    <!-- <Probe name="timing_application_task" type="CONTINUOUS" datatype="INT" units="Value" enabled="true"/>  -->
+    <!-- <Probe name="timing_singlet_fit"    type="CONTINUOUS" datatype="INT" units="Value" enabled="true"/>  -->
+    <!-- <Probe name="timing_speaker_model"    type="CONTINUOUS" datatype="INT" units="Value" enabled="true"/>  -->
+    <!-- <Probe name="timing_fitter"    type="CONTINUOUS" datatype="INT" units="Value" enabled="true"/>  -->
+    <!-- <Probe name="timing_resynth"    type="CONTINUOUS" datatype="INT" units="Value" enabled="true"/>  -->
+    <!-- <Probe name="timing_td_detection"    type="CONTINUOUS" datatype="INT" units="Value" enabled="false"/>  -->
+    <!-- <Probe name="timing_kde" type="CONTINUOUS" datatype="INT" units="Value" enabled="true"/>  -->
+</xSCOPEconfig>
diff --git a/test/lib_aec/CMakeLists.txt b/test/lib_aec/CMakeLists.txt
new file mode 100644
index 000000000..faec87a9d
--- /dev/null
+++ b/test/lib_aec/CMakeLists.txt
@@ -0,0 +1,13 @@
+
+## Defines for tests
+
+## The unit test apps
+
+add_subdirectory( test_wav_aec )
+if( XCORE )
+    add_subdirectory( test_aec_enhancements )
+    add_subdirectory( test_delay_estimator )
+    add_subdirectory( test_aec_spec )
+    add_subdirectory( test_aec_profile )
+    add_subdirectory( aec_unit_tests )
+endif()
diff --git a/test/lib_aec/aec_unit_tests/CMakeLists.txt b/test/lib_aec/aec_unit_tests/CMakeLists.txt
new file mode 100644
index 000000000..1d2f238d3
--- /dev/null
+++ b/test/lib_aec/aec_unit_tests/CMakeLists.txt
@@ -0,0 +1,145 @@
+## App name
+set( APP_NAME  aec_unit_tests )
+
+# Auto-generate task distribution scheme and top level config files
+
+if( NOT ${Python3_FOUND} )
+  message(FATAL_ERROR "Python3 not found for running . ") 
+endif()
+
+#copy conftest.py in the build directory since pytest_collect_file only collects tests from the directory tree where conftest.py is present
+configure_file( conftest.py conftest.py COPYONLY ) 
+set( GEN_SCHEDULE_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/../shared_src/python/generate_task_distribution_scheme.py )
+set( AUTOGEN_DIR ${CMAKE_CURRENT_BINARY_DIR}/src.autogen )
+set( AUTOGEN_SOURCES ${AUTOGEN_DIR}/aec_task_distribution.c )
+set( AUTOGEN_INCLUDES ${AUTOGEN_DIR}/aec_task_distribution.h ${AUTOGEN_DIR}/aec_config.h)
+
+message(STATUS "${APP_NAME} aec build config:  ${AEC_UNIT_TESTS_BUILD_CONFIG}" )
+message( STATUS "Unit tests speedup factor ${TEST_SPEEDUP_FACTOR}" )
+set( GEN_SCHEDULE_SCRIPT_BYPRODUCTS ${AUTOGEN_SOURCES} ${AUTOGEN_INCLUDES} )
+
+unset(GEN_SCHEDULE_SCRIPT_ARGS) 
+list(APPEND GEN_SCHEDULE_SCRIPT_ARGS --out-dir ${AUTOGEN_DIR})
+list(APPEND GEN_SCHEDULE_SCRIPT_ARGS --config ${AEC_UNIT_TESTS_BUILD_CONFIG})
+
+file(MAKE_DIRECTORY ${AUTOGEN_DIR})
+
+add_custom_command(
+OUTPUT ${GEN_SCHEDULE_SCRIPT_BYPRODUCTS}
+COMMAND ${Python3_EXECUTABLE} ${GEN_SCHEDULE_SCRIPT} ${GEN_SCHEDULE_SCRIPT_ARGS}
+COMMENT "Generating AEC task distribution and top level config" )
+
+## Depends on libraries
+list( APPEND  DEP_LIBS_XCORE  ""  )
+
+list( APPEND  DEP_LIBS        
+    lib_xs3_math
+    lib_aec 
+    ${DEP_LIBS_${CMAKE_SYSTEM_NAME}}
+)
+
+list( APPEND  DEP_LIBS ${DEP_LIBS_${CMAKE_SYSTEM_NAME}} )
+
+## Compile flags
+unset(COMPILE_FLAGS)
+unset(COMPILE_FLAGS_XCORE)
+
+list(APPEND   COMPILE_FLAGS_XCORE -DUNITY_SUPPORT_64 -Wno-xcore-fptrgroup )
+
+##Linker flags
+unset(LINKER_FLAGS)
+list( APPEND  LINKER_FLAGS  "" )
+
+unset(LINKER_FLAGS_XCORE)
+list( APPEND  LINKER_FLAGS_XCORE  "-target=${XCORE_TARGET}"     )
+list( APPEND  LINKER_FLAGS_XCORE  "-report"                     )
+
+
+list( APPEND  LINKER_FLAGS ${LINKER_FLAGS_${CMAKE_SYSTEM_NAME}} )
+list( APPEND  COMPILE_FLAGS ${COMPILE_FLAGS_${CMAKE_SYSTEM_NAME}} ${LINKER_FLAGS_${CMAKE_SYSTEM_NAME}} -DSPEEDUP_FACTOR=${TEST_SPEEDUP_FACTOR} )
+
+## Sources
+# Unity
+set( UNITY_PATH ${DEPS_ROOT}/Unity/src )
+file( GLOB UNITY_SOURCES ${UNITY_PATH}/*.c )
+
+# audio_test_tools
+set( AUDIO_TEST_TOOLS_PATH ${DEPS_ROOT}/audio_test_tools/audio_test_tools )
+list( APPEND AUDIO_TEST_TOOLS_SOURCES ${AUDIO_TEST_TOOLS_PATH}/src/testing.xc ${AUDIO_TEST_TOOLS_PATH}/src/floating_fft.xc)
+
+#lib_dsp
+set( LIB_DSP_PATH ${XCORE_SDK_PATH}/modules/lib_dsp/lib_dsp )
+list( APPEND LIB_DSP_SOURCES ${LIB_DSP_PATH}/src/dsp_math.c ${LIB_DSP_PATH}/src/bfp/dsp_bfp_cls.S )
+
+# All common sources
+list( APPEND  COMMON_SOURCES ${AUTOGEN_SOURCES} ${UNITY_SOURCES} ${AUDIO_TEST_TOOLS_SOURCES} ${LIB_DSP_SOURCES} )
+
+## Includes
+list( APPEND  INCLUDES src ${SHARED_SRC_PATH}/aec ${AUTOGEN_DIR} ${UNITY_PATH} ${AUDIO_TEST_TOOLS_PATH}/api ${LIB_DSP_PATH}/api )
+
+## executable output directory
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
+
+# Set unity runner generate script
+set( GEN_RUNNER_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/generate_unity_runner.py )
+
+# Create directory for runner files
+set( RUNNERS_DIR ${CMAKE_CURRENT_BINARY_DIR}/src.runners )
+file( MAKE_DIRECTORY ${RUNNERS_DIR} )
+
+file( GLOB TEST_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/src/*.xc )
+
+# For every source file in aec_unit_tests/src
+foreach( testfile ${TEST_SOURCES} )
+    unset( SOURCES )
+    list( APPEND SOURCES ${COMMON_SOURCES} )
+    # Get test name
+    #cmake_path( GET testfile STEM TESTNAME )
+    get_filename_component(TESTNAME ${testfile} NAME_WLE)
+    
+    # Create runner file directory
+    file( MAKE_DIRECTORY ${RUNNERS_DIR}/${TESTNAME} )
+
+    #########
+    ## Create runner file
+    set( RUNNER_FILE ${RUNNERS_DIR}/${TESTNAME}/${TESTNAME}_Runner.c )
+    set( GEN_RUNNER_SCRIPT_BYPRODUCTS ${RUNNER_FILE} )
+
+    unset(GEN_RUNNER_SCRIPT_ARGS) 
+    list(APPEND GEN_RUNNER_SCRIPT_ARGS --project-root ${DEPS_ROOT} )
+    list(APPEND GEN_RUNNER_SCRIPT_ARGS --source-file ${testfile} )
+    list(APPEND GEN_RUNNER_SCRIPT_ARGS --runner-file ${RUNNER_FILE})
+
+    ## Add command to generate runner file
+    add_custom_command(
+        OUTPUT ${RUNNER_FILE}
+        COMMAND ${Python3_EXECUTABLE} ${GEN_RUNNER_SCRIPT} ${GEN_RUNNER_SCRIPT_ARGS}
+        COMMENT "Generating AEC runner" )
+    
+    if (${testfile} MATCHES "test_calc_max_ref_energy.xc")
+        set (SOURCES ${SOURCES} ${CMAKE_CURRENT_SOURCE_DIR}/src/calc_max_ref_energy_c_wrapper.c)
+    endif ()
+    if (${testfile} MATCHES "test_estimate_delay.xc")
+        set (SOURCES ${SOURCES} ${CMAKE_CURRENT_SOURCE_DIR}/src/test_calc_fd_frame_energy.xc)
+    endif()
+
+    #########
+    ## Add a build target
+    add_executable( ${TESTNAME} ${testfile} ${RUNNER_FILE} ${SOURCES} )
+
+    target_include_directories( ${TESTNAME} PRIVATE ${INCLUDES} )
+
+    target_link_libraries( ${TESTNAME} ${DEP_LIBS})
+
+    target_compile_options( ${TESTNAME} PRIVATE ${COMPILE_FLAGS} )
+    #(because otherwise the set_target_properties command fails)
+    string(REPLACE ";" " " LINKER_FLAGS_STR "${LINKER_FLAGS}")
+    set_target_properties( ${TESTNAME} PROPERTIES LINK_FLAGS "${LINKER_FLAGS_STR}" )
+
+    if ( XCORE )
+        set_target_properties( ${TESTNAME} PROPERTIES
+          SUFFIX ".xe"
+          )
+    endif()
+endforeach( testfile ${TEST_SOURCES} )
+
diff --git a/test/lib_aec/aec_unit_tests/conftest.py b/test/lib_aec/aec_unit_tests/conftest.py
new file mode 100644
index 000000000..32bc16fd8
--- /dev/null
+++ b/test/lib_aec/aec_unit_tests/conftest.py
@@ -0,0 +1,104 @@
+# Copyright 2018-2021 XMOS LIMITED.
+# This Software is subject to the terms of the XMOS Public Licence: Version 1.
+from builtins import str
+import os.path
+import pytest
+import subprocess
+import xtagctl
+
+
+def pytest_collect_file(parent, path):
+    if(path.ext == ".xe"):
+        print('path = ',path)
+        return UnityTestSource.from_parent(parent, fspath=path)
+
+class UnityTestSource(pytest.File):
+    def collect(self):
+        # Find the binary built from the runner for this test file
+        #
+        # Assume the following directory layout:
+        # unit_tests/       <- Test root directory
+        # |-- bin/          <- Compiled binaries of the test runners
+        # |-- conftest.py   <- This file
+        # |-- runners/      <- Auto-generated buildable source of test binaries
+        # |-- src/          <- Unity test functions
+        # `-- wscript       <- Build system file used to generate/build runners
+        print("self.name ", self.fspath)
+        yield UnityTestExecutable.from_parent(self, fspath=self.fspath, name=self.name)
+
+
+class UnityTestExecutable(pytest.Item):
+    def __init__(self, fspath, name, parent):
+        super(UnityTestExecutable, self).__init__(name, parent)
+        self.fspath = fspath
+        self._nodeid = self.name  # Override the naming to suit C better
+
+    def runtest(self):
+        # Run the binary in the simulator
+        simulator_fail = False
+        test_output = None
+        try:
+            print("run xrun for executable ", self.fspath)
+            with xtagctl.acquire("XCORE-AI-EXPLORER") as adapter_id:
+                test_output = subprocess.check_output(['xrun', '--io', '--adapter-id', adapter_id, self.fspath], text=True, stderr=subprocess.STDOUT)
+        except subprocess.CalledProcessError as e:
+            # Unity exits non-zero if an assertion fails
+            simulator_fail = True
+            test_output = e.output
+
+        # Parse the Unity output
+        unity_pass = False
+        test_output = test_output.split('\n')
+        for line in test_output:
+            if 'test' in line:
+                test_report = line.split(':')
+                # Unity output is as follows:
+                #   <test_source>:<line_number>:<test_case>:PASS
+                #   <test_source>:<line_number>:<test_case>:FAIL:<failure_reason>
+                test_source = test_report[0]
+                line_number = test_report[1]
+                test_case = test_report[2]
+                result = test_report[3]
+                failure_reason = None
+                print(('\n {}()'.format(test_case)), end=' ')
+                if result == 'PASS':
+                    unity_pass = True
+                    continue
+                if result == 'FAIL':
+                    failure_reason = test_report[4]
+                    print('')  # Insert line break after test_case print
+                    raise UnityTestException(self, {'test_source': test_source,
+                                                    'line_number': line_number,
+                                                    'test_case': test_case,
+                                                    'failure_reason':
+                                                        failure_reason})
+
+        if simulator_fail:
+            raise Exception(self, "Simulation failed.")
+        if not unity_pass:
+            raise Exception(self, "Unity test output not found.")
+        print('')  # Insert line break after final test_case which passed
+
+    def repr_failure(self, excinfo):
+        if isinstance(excinfo.value, UnityTestException):
+            return '\n'.join([str(self.parent).strip('<>'),
+                              '{}:{}:{}()'.format(
+                                    excinfo.value[1]['test_source'],
+                                    excinfo.value[1]['line_number'],
+                                    excinfo.value[1]['test_case']),
+                              'Failure reason:',
+                              excinfo.value[1]['failure_reason']])
+        else:
+            return str(excinfo.value)
+
+    def reportinfo(self):
+        # It's not possible to give sensible line number info for an executable
+        # so we return it as 0.
+        #
+        # The source line number will instead be recovered from the Unity print
+        # statements.
+        return self.fspath, 0, self.name
+
+
+class UnityTestException(Exception):
+    pass
diff --git a/test/lib_aec/aec_unit_tests/generate_unity_runner.py b/test/lib_aec/aec_unit_tests/generate_unity_runner.py
new file mode 100644
index 000000000..c4829b025
--- /dev/null
+++ b/test/lib_aec/aec_unit_tests/generate_unity_runner.py
@@ -0,0 +1,58 @@
+import glob
+import os.path
+import subprocess
+import sys
+import argparse
+
+def parse_arguments():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--project-root", nargs='?', help="Project root directory")
+    parser.add_argument("--source-file", nargs='?', help="source file.")
+    parser.add_argument("--runner-file", nargs='?', help="runner file.")
+    args = parser.parse_args()
+    return args
+
+def get_ruby():
+    """
+    Check ruby is avaliable and return the command to invoke it.
+    """
+    interpreter_name = 'ruby'
+    try:
+        dev_null = open(os.devnull, 'w')
+        # Call the version command to check the interpreter can be run
+        subprocess.check_call([interpreter_name, '--version'],
+                              stdout=dev_null,
+                              close_fds=True)
+    except OSError as e:
+        print("Failed to run Ruby interpreter: {}".format(e), file=sys.stderr)
+        exit(1)  # TODO: Check this is the correct way to kill xwaf on error
+
+    return interpreter_name
+
+def get_unity_runner_generator(project_root_path):
+    """
+    Check the Unity generate_test_runner script is avaliable, and return the
+    path to it.
+    """
+    unity_runner_generator = os.path.join(
+        project_root_path, 'Unity', 'auto', 'generate_test_runner.rb')
+    if not os.path.exists(unity_runner_generator):
+        print("Unity repo not found in workspace", file=sys.stderr)
+        exit(1)  # TODO: Check this is the correct way to kill xwaf on error
+    return unity_runner_generator
+
+if __name__ == "__main__":
+    args = parse_arguments()
+    print(f"in python: root {args.project_root}, source {args.source_file}, runner {args.runner_file}")
+
+    try:
+        subprocess.check_call([get_ruby(),
+                               get_unity_runner_generator(args.project_root),
+                               args.source_file,
+                               args.runner_file])
+    except OSError as e:
+        print("Ruby generator failed for {}\n\t{}".format(unity_test_path, e),
+              file=sys.stderr)
+        exit(1)  # TODO: Check this is the correct way to kill xwaf on error
+    
+
diff --git a/test/lib_aec/aec_unit_tests/pytest.ini b/test/lib_aec/aec_unit_tests/pytest.ini
new file mode 100644
index 000000000..75a2ca300
--- /dev/null
+++ b/test/lib_aec/aec_unit_tests/pytest.ini
@@ -0,0 +1,3 @@
+[pytest]
+testpaths = ../../../build/test/lib_aec/aec_unit_tests/
+#testpaths = bin
diff --git a/test/lib_aec/aec_unit_tests/src/aec_unit_tests.h b/test/lib_aec/aec_unit_tests/src/aec_unit_tests.h
new file mode 100644
index 000000000..7ac0eb443
--- /dev/null
+++ b/test/lib_aec/aec_unit_tests/src/aec_unit_tests.h
@@ -0,0 +1,36 @@
+// Copyright 2018-2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#ifndef AEC_UNIT_TESTS_
+#define AEC_UNIT_TESTS_
+
+#include "unity.h"
+
+#ifdef __XC__
+
+#include <xs1.h>
+#include <string.h>
+#include <math.h>
+
+#include <xclib.h>
+
+#include "audio_test_tools.h"
+extern "C" {
+#include "aec_defines.h"
+}
+#include "aec_config.h"
+#include "aec_memory_pool.h"
+
+#define TEST_ASM 1
+#define MULTI_CORE 1
+
+// Set F to a power of 2 greater than 1 to speedup testing by a Fx
+#undef F
+#if SPEEDUP_FACTOR
+    #define F (SPEEDUP_FACTOR)
+#else
+    #define F 1
+#endif
+
+#endif // __XC__
+
+#endif /* AEC_UNIT_TESTS_ */
diff --git a/test/lib_aec/aec_unit_tests/src/calc_max_ref_energy_c_wrapper.c b/test/lib_aec/aec_unit_tests/src/calc_max_ref_energy_c_wrapper.c
new file mode 100644
index 000000000..58706a4e0
--- /dev/null
+++ b/test/lib_aec/aec_unit_tests/src/calc_max_ref_energy_c_wrapper.c
@@ -0,0 +1,9 @@
+
+#include "aec_config.h"
+#include "aec_api.h"
+
+float_s32_t aec_calc_max_ref_energy_c_wrapper(int32_t (*input)[AEC_FRAME_ADVANCE], int channels) {
+    //To be able to pass input in a function expecting pointer to const data
+    float_s32_t temp = aec_calc_max_ref_energy(input, channels);
+    return temp;
+}
diff --git a/test/lib_aec/aec_unit_tests/src/test_calc_Error_and_Y_hat.xc b/test/lib_aec/aec_unit_tests/src/test_calc_Error_and_Y_hat.xc
new file mode 100644
index 000000000..9a009028c
--- /dev/null
+++ b/test/lib_aec/aec_unit_tests/src/test_calc_Error_and_Y_hat.xc
@@ -0,0 +1,263 @@
+// Copyright 2018-2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <xs1.h>
+#include "aec_unit_tests.h"
+#include <stdio.h>
+#include <assert.h>
+extern "C"{
+    #include "aec_defines.h"
+    #include "aec_api.h"
+}
+
+#define TEST_MAIN_PHASES (5)
+#define TEST_NUM_Y (1)
+#define TEST_NUM_X (2)
+#define TEST_SHADOW_PHASES (3)
+#define NUM_BINS ((AEC_PROC_FRAME_LENGTH/2) + 1)
+
+void calc_Error_and_Y_hat_fp(
+        dsp_complex_fp (*Error)[NUM_BINS],
+        dsp_complex_fp (*Y_hat)[NUM_BINS],
+        dsp_complex_fp (*Y)[NUM_BINS],
+        dsp_complex_fp (*H_hat_main)[TEST_NUM_X*TEST_MAIN_PHASES][NUM_BINS],
+        dsp_complex_fp (*H_hat_shad)[TEST_NUM_X*TEST_SHADOW_PHASES][NUM_BINS],
+        dsp_complex_fp (*X_fifo)[TEST_MAIN_PHASES][NUM_BINS],
+        int y_channels,
+        int x_channels,
+        int phases,
+        int is_main,
+        int bypass) {
+    if(bypass) {
+        for(int ch=0; ch<y_channels; ch++) {
+            for(int i=0; i<NUM_BINS; i++) {
+                Y_hat[ch][i].re = 0.0;
+                Y_hat[ch][i].im = 0.0;
+                Error[ch][i].re = Y[ch][i].re;
+                Error[ch][i].im = Y[ch][i].im;
+            }
+        }
+    }
+    else {
+        for(int ch=0; ch<y_channels; ch++) {
+            for(int i=0; i<NUM_BINS; i++) {
+                Y_hat[ch][i].re = 0.0;
+                Y_hat[ch][i].im = 0.0;
+            }
+            for(int xch=0; xch<x_channels; xch++) {
+                for(int ph=0; ph<phases; ph++) {
+                    if(is_main) {
+                        for(int i=0; i<NUM_BINS; i++) {
+                            Y_hat[ch][i].re += ((H_hat_main[ch][xch*phases + ph][i].re * X_fifo[xch][ph][i].re) - (H_hat_main[ch][xch*phases + ph][i].im * X_fifo[xch][ph][i].im));
+                            Y_hat[ch][i].im += ((H_hat_main[ch][xch*phases + ph][i].re * X_fifo[xch][ph][i].im) + (H_hat_main[ch][xch*phases + ph][i].im * X_fifo[xch][ph][i].re));
+                        }
+                    }
+                    else {
+                        for(int i=0; i<NUM_BINS; i++) {
+                            Y_hat[ch][i].re += ((H_hat_shad[ch][xch*phases + ph][i].re * X_fifo[xch][ph][i].re) - (H_hat_shad[ch][xch*phases + ph][i].im * X_fifo[xch][ph][i].im));                        
+                            Y_hat[ch][i].im += ((H_hat_shad[ch][xch*phases + ph][i].re * X_fifo[xch][ph][i].im) + (H_hat_shad[ch][xch*phases + ph][i].im * X_fifo[xch][ph][i].re));
+                        }
+                    }
+                }
+            }
+            for(int i=0; i<NUM_BINS; i++) {
+                Error[ch][i].re = Y[ch][i].re - Y_hat[ch][i].re;
+                Error[ch][i].im = Y[ch][i].im - Y_hat[ch][i].im;
+            }
+        }
+    }
+}
+
+void test_calc_Error_and_Y_hat() {
+    unsafe {
+    unsigned num_y_channels = TEST_NUM_Y;
+    unsigned num_x_channels = TEST_NUM_X;
+    unsigned main_filter_phases = TEST_MAIN_PHASES;
+    unsigned shadow_filter_phases = TEST_SHADOW_PHASES;
+    
+    aec_state_t state, shadow_state;
+    aec_memory_pool_t aec_memory_pool;
+    aec_shadow_filt_memory_pool_t aec_shadow_memory_pool;
+    aec_shared_state_t aec_shared_state;
+    
+    aec_init(&state, &shadow_state, &aec_shared_state, (uint8_t*)&aec_memory_pool, (uint8_t*)&aec_shadow_memory_pool, num_y_channels, num_x_channels, main_filter_phases, shadow_filter_phases);
+    
+    //Declare floating point arrays
+    dsp_complex_fp H_hat_fp[TEST_NUM_Y][TEST_NUM_X*TEST_MAIN_PHASES][NUM_BINS];
+    dsp_complex_fp H_hat_shadow_fp[TEST_NUM_Y][TEST_NUM_X*TEST_SHADOW_PHASES][NUM_BINS];
+    dsp_complex_fp X_fifo_fp[TEST_NUM_X][TEST_MAIN_PHASES][NUM_BINS];
+    dsp_complex_fp Y_fp[TEST_NUM_Y][NUM_BINS];
+    dsp_complex_fp Y_hat_fp[TEST_NUM_Y][NUM_BINS];
+    dsp_complex_fp Error_fp[TEST_NUM_Y][NUM_BINS];
+
+    unsigned seed = 2;
+    int max_diff = 0;
+    for(int iter=0; iter<(1<<12)/F; iter++) {
+        int32_t new_frame[AEC_MAX_Y_CHANNELS+AEC_MAX_X_CHANNELS][AEC_FRAME_ADVANCE];
+        unsigned is_main = att_random_uint32(seed) % 2;
+        aec_state_t *state_ptr;
+        if(is_main) {
+            state_ptr = &state;
+        }
+        else {
+            state_ptr = &shadow_state;
+        }
+        unsigned test_l2_api = att_random_uint32(seed) % 2;
+        //printf("is_main %d, test_l2_api %d\n", is_main, test_l2_api);
+        int bypass = att_random_uint32(seed) % 2;
+        state_ptr->shared_state->config_params.aec_core_conf.bypass = bypass;
+
+        aec_frame_init(&state, &shadow_state, &new_frame[0], &new_frame[AEC_MAX_Y_CHANNELS]);        
+        for(int ch=0; ch<num_y_channels; ch++) {
+            //state_ptr->shared_state->Y is initialised in the y->Y fft aec_fft() call with state_ptr->shared_state->y as input. Initialising here for
+            //standalone testing.
+            bfp_complex_s32_init(&state_ptr->shared_state->Y[ch], (complex_s32_t*)&state_ptr->shared_state->y[ch].data[0], 0, NUM_BINS, 0);
+        }
+        //Generate H_hat
+        for(int ch=0; ch<num_y_channels; ch++) {
+            for(int ph=0; ph<num_x_channels*state_ptr->num_phases; ph++) {
+                state_ptr->H_hat[ch][ph].exp = sext(att_random_int32(seed), 6);
+                state_ptr->H_hat[ch][ph].hr = att_random_uint32(seed) % 3;
+                for(int i=0; i<NUM_BINS; i++) {
+                    state_ptr->H_hat[ch][ph].data[i].re = att_random_int32(seed) >> state_ptr->H_hat[ch][ph].hr;
+                    state_ptr->H_hat[ch][ph].data[i].im = att_random_int32(seed) >> state_ptr->H_hat[ch][ph].hr;
+                    if(is_main) {
+                        H_hat_fp[ch][ph][i].re = att_int32_to_double(state_ptr->H_hat[ch][ph].data[i].re, state_ptr->H_hat[ch][ph].exp);
+                        H_hat_fp[ch][ph][i].im = att_int32_to_double(state_ptr->H_hat[ch][ph].data[i].im, state_ptr->H_hat[ch][ph].exp);
+                    }
+                    else {
+                        H_hat_shadow_fp[ch][ph][i].re = att_int32_to_double(state_ptr->H_hat[ch][ph].data[i].re, state_ptr->H_hat[ch][ph].exp);
+                        H_hat_shadow_fp[ch][ph][i].im = att_int32_to_double(state_ptr->H_hat[ch][ph].data[i].im, state_ptr->H_hat[ch][ph].exp);
+                    }
+                }
+            }
+        }
+        //Generate X_fifo, (always for number of phases in main_state)
+        aec_state_t *main_state_ptr = &state;
+        for(int ch=0; ch<num_x_channels; ch++) {
+            for(int ph=0; ph<main_state_ptr->num_phases; ph++) {
+                state_ptr->shared_state->X_fifo[ch][ph].exp = sext(att_random_int32(seed), 6);
+                state_ptr->shared_state->X_fifo[ch][ph].hr = att_random_uint32(seed) % 3;
+                for(int i=0; i<NUM_BINS; i++) {
+                    state_ptr->shared_state->X_fifo[ch][ph].data[i].re = att_random_int32(seed) >> state_ptr->shared_state->X_fifo[ch][ph].hr;
+                    state_ptr->shared_state->X_fifo[ch][ph].data[i].im = att_random_int32(seed) >> state_ptr->shared_state->X_fifo[ch][ph].hr;
+
+                    X_fifo_fp[ch][ph][i].re = att_int32_to_double(state_ptr->shared_state->X_fifo[ch][ph].data[i].re, state_ptr->shared_state->X_fifo[ch][ph].exp);
+                    X_fifo_fp[ch][ph][i].im = att_int32_to_double(state_ptr->shared_state->X_fifo[ch][ph].data[i].im, state_ptr->shared_state->X_fifo[ch][ph].exp);
+                }
+            }
+        }
+        //aec init only initialises the 2d Xfifo. Since we're using the 1d fifo for error computation, call aec_update_X_fifo_1d()
+        //to update the 1d Fifo
+        aec_update_X_fifo_1d(state_ptr);
+        //Generate Y
+        for(int ch=0; ch<num_y_channels; ch++) {
+            state_ptr->shared_state->Y[ch].exp = sext(att_random_int32(seed), 6);
+            state_ptr->shared_state->Y[ch].hr = att_random_uint32(seed) % 3;                
+            for(int i=0; i<NUM_BINS; i++) {
+                state_ptr->shared_state->Y[ch].data[i].re = att_random_int32(seed) >> state_ptr->shared_state->Y[ch].hr;
+                state_ptr->shared_state->Y[ch].data[i].im = att_random_int32(seed) >> state_ptr->shared_state->Y[ch].hr;
+
+                Y_fp[ch][i].re = att_int32_to_double(state_ptr->shared_state->Y[ch].data[i].re, state_ptr->shared_state->Y[ch].exp);
+                Y_fp[ch][i].im = att_int32_to_double(state_ptr->shared_state->Y[ch].data[i].im, state_ptr->shared_state->Y[ch].exp);
+            }
+        }
+        //Calculate reference
+        if(is_main) {
+            calc_Error_and_Y_hat_fp(Error_fp, Y_hat_fp, Y_fp, H_hat_fp, NULL, X_fifo_fp, num_y_channels, num_x_channels, state_ptr->num_phases, is_main, bypass);
+        }
+        else {
+            calc_Error_and_Y_hat_fp(Error_fp, Y_hat_fp, Y_fp, NULL, H_hat_shadow_fp, X_fifo_fp, num_y_channels, num_x_channels, state_ptr->num_phases, is_main, bypass);
+        }
+        //Calculate DUT
+        if(!test_l2_api) {
+            for(int ch=0; ch<num_y_channels; ch++) {
+                aec_calc_Error_and_Y_hat(state_ptr, ch);
+            }
+        }
+        else { //test calling l2 API directly
+            #define NUM_CHUNKS_PER_Y (4) //spread each y-channel over 4 chunks
+            int mapping[TEST_NUM_Y*NUM_CHUNKS_PER_Y];
+            for(int i=0; i<TEST_NUM_Y*NUM_CHUNKS_PER_Y; i++) {
+                mapping[i] = -1;
+            }
+            bfp_complex_s32_t Error_par[TEST_NUM_Y*NUM_CHUNKS_PER_Y], Y_hat_par[TEST_NUM_Y*NUM_CHUNKS_PER_Y];
+            for(unsigned t=0; t<TEST_NUM_Y; t++) {
+                int remaining_length = NUM_BINS;
+                int start = 0;
+                for(unsigned j=0; j<NUM_CHUNKS_PER_Y; j++) { 
+                    unsigned ch = t;
+                    unsigned length = 0;
+                    unsigned start_offset = 0;
+                    if((j == NUM_CHUNKS_PER_Y - 1) || (remaining_length <= 1)) {
+                        length = remaining_length;
+                        start_offset = start;
+                        remaining_length = 0;
+                    }
+                    else if(remaining_length > 1) {
+                        length = att_random_uint32(seed) % remaining_length;
+                        start_offset = start;
+                        start += length;
+                        remaining_length -= length;
+                    }
+
+                    unsigned index = (t*NUM_CHUNKS_PER_Y) + j;
+                    mapping[index] = ch;
+                    //printf("ych %d, chunk %d, start_offset %d, length %d, mapping[%d]=%d\n",t, j, start_offset, length, index, mapping[index]);
+
+                    bfp_complex_s32_init(&Error_par[index], &state_ptr->Error[ch].data[start_offset], state_ptr->Error[ch].exp, length, 0);
+                    Error_par[index].hr = state_ptr->Error[ch].hr;
+                    bfp_complex_s32_init(&Y_hat_par[index], &state_ptr->Y_hat[ch].data[start_offset], state_ptr->Y_hat[ch].exp, length, 0);
+                    Y_hat_par[index].hr = state_ptr->Y_hat[ch].hr;
+
+                    aec_l2_calc_Error_and_Y_hat(&Error_par[index], &Y_hat_par[index], &state_ptr->shared_state->Y[ch], state_ptr->X_fifo_1d, state_ptr->H_hat[ch], num_x_channels, state_ptr->num_phases, start_offset, length, state_ptr->shared_state->config_params.aec_core_conf.bypass);
+                    //printf("Error: (%d, %d), Y_hat: (%d,%d)\n", Error_par[index].exp, Error_par[index].hr, Y_hat_par[index].exp, Y_hat_par[index].hr);
+                }
+            }    
+            //printf("\n");
+            //Unify
+            for(int ch=0; ch<num_y_channels; ch++) {
+                int final_exp, final_hr;
+                aec_l2_bfp_complex_s32_unify_exponent(Error_par, &final_exp, &final_hr, mapping, TEST_NUM_Y*NUM_CHUNKS_PER_Y, ch, 0);                
+                state_ptr->Error[ch].exp = final_exp;
+                state_ptr->Error[ch].hr = final_hr;
+                if(state_ptr->Error[ch].exp == INT_MIN)
+                {
+                    assert(0);
+                }
+                aec_l2_bfp_complex_s32_unify_exponent(Y_hat_par, &final_exp, &final_hr, mapping, TEST_NUM_Y*NUM_CHUNKS_PER_Y, ch, 0);                
+                state_ptr->Y_hat[ch].exp = final_exp;
+                state_ptr->Y_hat[ch].hr = final_hr;
+                if(state_ptr->Y_hat[ch].exp == INT_MIN)
+                {
+                    assert(0);
+                }
+                //printf("l2: ch %d, Error: (exp %d, hr %d), Y_hat: (exp %d, hr %d)\n",ch, state_ptr->Error[ch].exp, state_ptr->Error[ch].hr, state_ptr->Y_hat[ch].exp, state_ptr->Y_hat[ch].hr);
+            }
+        }
+
+        //compare results
+        //printf("iter = %d\n",iter);
+        for(int ch=0; ch<num_y_channels; ch++) {
+            int32_t *dut_Error_ptr = (int32_t*)&state_ptr->Error[ch].data[0];
+            double *ref_Error_ptr = (double*)&Error_fp[ch][0];
+
+            int32_t *dut_Y_hat_ptr = (int32_t*)&state_ptr->Y_hat[ch].data[0];
+            double *ref_Y_hat_ptr = (double*)&Y_hat_fp[ch][0];
+            for(int i=0; i<NUM_BINS*2; i++) {
+                //Error
+                int32_t diff = att_double_to_int32(ref_Error_ptr[i], state_ptr->Error[ch].exp) - dut_Error_ptr[i];
+                diff = (diff < 0) ? -diff : diff;
+                if(diff > max_diff) max_diff = diff;
+                TEST_ASSERT_LESS_OR_EQUAL_UINT32_MESSAGE(1<<4, max_diff, "Error diff too large.");
+                //Y_hat
+                diff = att_double_to_int32(ref_Y_hat_ptr[i], state_ptr->Y_hat[ch].exp) - dut_Y_hat_ptr[i];
+                diff = (diff < 0) ? -diff : diff;
+                if(diff > max_diff) max_diff = diff;
+                TEST_ASSERT_LESS_OR_EQUAL_UINT32_MESSAGE(1<<4, max_diff, "Y_hat diff too large.");
+            }            
+        }
+        
+    }
+    printf("max_diff = %d\n",max_diff);
+    }
+}
diff --git a/test/lib_aec/aec_unit_tests/src/test_calc_coherence.xc b/test/lib_aec/aec_unit_tests/src/test_calc_coherence.xc
new file mode 100644
index 000000000..55866d3f3
--- /dev/null
+++ b/test/lib_aec/aec_unit_tests/src/test_calc_coherence.xc
@@ -0,0 +1,170 @@
+// Copyright 2018-2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <xs1.h>
+#include "aec_unit_tests.h"
+#include <stdio.h>
+#include <assert.h>
+extern "C"{
+    #include "aec_defines.h"
+    #include "aec_api.h"
+}
+
+typedef struct {
+    double coh_alpha;
+    double coh_slow_alpha;
+    double coh_thresh_slow;
+    double coh_thresh_abs;
+    double mu_scalar;
+    double eps;
+    double thresh_minus20dB;
+    double x_energy_thresh;
+
+    unsigned mu_coh_time;
+    unsigned mu_shad_time;
+    aec_adaption_e adaption_config;
+    double force_adaption_mu;
+
+    double coh[AEC_MAX_Y_CHANNELS];
+    double coh_slow[AEC_MAX_Y_CHANNELS];
+    unsigned mu_coh_count[AEC_MAX_Y_CHANNELS];
+    unsigned mu_shad_count[AEC_MAX_Y_CHANNELS];
+} coherence_mu_params_fp;
+
+static void init_coherence_mu_config_fp(coherence_mu_params_fp *cfg, int channels) {
+    //config
+    cfg->coh_alpha = 0.0;
+    cfg->coh_slow_alpha = 0.99;
+    cfg->coh_thresh_slow = 0.9;
+    cfg->coh_thresh_abs = 0.65;
+    cfg->mu_scalar = 1;
+    cfg->eps = 1e-100;
+    
+    cfg->x_energy_thresh = -40;
+    cfg->mu_coh_time = 2;
+    cfg->mu_shad_time = 30;
+    cfg->adaption_config = AEC_ADAPTION_AUTO;
+    cfg->force_adaption_mu = 1.0;
+    //state
+    for(int i=0; i<channels; i++) {
+        cfg->coh[i] = 1.0;
+        cfg->coh_slow[i] = 0.0;
+        cfg->mu_coh_count[i] = 0;
+        cfg->mu_shad_count[i] = 0;
+    }
+}
+
+void aec_calc_coherence_fp(
+        coherence_mu_params_fp *cfg,
+        double (*y)[AEC_PROC_FRAME_LENGTH],
+        double (*y_hat)[AEC_PROC_FRAME_LENGTH],
+        int channels,
+        int bypass) {
+    if(bypass) {
+        return;
+    }
+    for(int ch=0; ch<channels; ch++) {
+        double sigma_yy = 0.0;
+        double sigma_yhatyhat = 0.0;
+        double sigma_yyhat = 0.0;
+        for(int i=240; i<480; i++) {
+            sigma_yy += (y[ch][i] * y[ch][i]);
+            sigma_yyhat += (y[ch][i] * y_hat[ch][i]);
+            sigma_yhatyhat += (y_hat[ch][i] * y_hat[ch][i]);
+        }
+        //eps = 1e-100
+        //this_coh = np.abs(sigma_yyhat/(np.sqrt(sigma_yy)*np.sqrt(sigma_yhatyhat) + eps))
+        double denom = ((sqrt(sigma_yy) * sqrt(sigma_yhatyhat)) + cfg->eps);
+        double this_coh = sigma_yyhat / denom;
+        if(this_coh < 0.0) this_coh = -this_coh;
+
+        //# moving average coherence
+        //self.coh = self.coh_alpha*self.coh + (1.0 - self.coh_alpha)*this_coh
+        cfg->coh[ch] = (cfg->coh_alpha * cfg->coh[ch]) + ((1.0 - cfg->coh_alpha) * this_coh);
+        
+        //# update slow moving averages used for thresholding
+        //self.coh_slow = self.coh_slow_alpha*self.coh_slow + (1.0 - self.coh_slow_alpha)*self.coh
+        cfg->coh_slow[ch] = (cfg->coh_slow_alpha * cfg->coh_slow[ch]) + ((1.0 - cfg->coh_slow_alpha) * cfg->coh[ch]);
+    }
+}
+
+void test_calc_coherence() {
+    unsafe {
+        unsigned num_y_channels = 1;
+        unsigned num_x_channels = 1;
+        unsigned num_phases = AEC_MAIN_FILTER_PHASES - 1;
+
+        aec_memory_pool_t aec_memory_pool;
+        aec_state_t state;
+        aec_shared_state_t aec_shared_state;
+        aec_init(&state, NULL, &aec_shared_state, (uint8_t*)&aec_memory_pool, NULL, num_y_channels, num_x_channels, num_phases, 0);
+        
+        //Initialize floating point
+        coherence_mu_params_fp coh_params_fp;
+        init_coherence_mu_config_fp(&coh_params_fp, num_y_channels);
+        double y_fp[AEC_MAX_Y_CHANNELS][AEC_PROC_FRAME_LENGTH], y_hat_fp[AEC_MAX_Y_CHANNELS][AEC_PROC_FRAME_LENGTH];
+        double coh_fp[AEC_MAX_Y_CHANNELS], coh_slow_fp[AEC_MAX_Y_CHANNELS];
+        
+        int32_t new_frame[AEC_MAX_Y_CHANNELS+AEC_MAX_X_CHANNELS][AEC_FRAME_ADVANCE];
+        unsigned seed = 10;
+        int32_t max_diff = 0; 
+        for(int iter=0; iter<(1<<12)/F; iter++) {
+            state.shared_state->config_params.aec_core_conf.bypass = att_random_uint32(seed) % 2;
+            aec_frame_init(&state, NULL, &new_frame[0], &new_frame[AEC_MAX_Y_CHANNELS]); //frame init will copy y[240:480] into output
+            //state.y_hat is initialised as part of Y_hat -> y_hat ifft. Do it here for standalone testing
+            for(int ch=0; ch<num_y_channels; ch++) {
+                bfp_s32_init(&state.y_hat[ch], (int32_t*)&state.Y_hat[ch].data[0], 0, AEC_PROC_FRAME_LENGTH, 0);
+            }
+
+            for(int ch=0; ch<num_y_channels; ch++) {
+                state.shared_state->y[ch].exp = sext(att_random_int32(seed), 6);
+                state.shared_state->y[ch].hr = att_random_uint32(seed) % 4;
+
+                state.y_hat[ch].exp = sext(att_random_int32(seed), 6);
+                state.y_hat[ch].hr = att_random_uint32(seed) % 4;
+
+                for(int i=0; i<AEC_PROC_FRAME_LENGTH; i++) {
+                    state.shared_state->y[ch].data[i] = att_random_int32(seed) >> state.shared_state->y[ch].hr;
+                    y_fp[ch][i] = att_int32_to_double(state.shared_state->y[ch].data[i], state.shared_state->y[ch].exp);
+
+                    state.y_hat[ch].data[i] = att_random_int32(seed) >> state.y_hat[ch].hr;
+                    y_hat_fp[ch][i] = att_int32_to_double(state.y_hat[ch].data[i], state.y_hat[ch].exp);
+                }
+            }
+
+            //since state.shared_state->y is being initialised with a new frame after calling aec_frame_init(), we need to update state->shared_state->prev_y again since that's where y[240:480] is read from in aec_calc_coherence()
+            for(int ch=0; ch<num_y_channels; ch++) {
+                memcpy(state.shared_state->prev_y[ch].data, &state.shared_state->y[ch].data[AEC_FRAME_ADVANCE], (AEC_PROC_FRAME_LENGTH-AEC_FRAME_ADVANCE)*sizeof(int32_t));
+                state.shared_state->prev_y[ch].exp = state.shared_state->y[ch].exp;
+                state.shared_state->prev_y[ch].hr = state.shared_state->y[ch].hr;
+            }
+
+
+            aec_calc_coherence_fp(&coh_params_fp, y_fp, y_hat_fp, num_y_channels, state.shared_state->config_params.aec_core_conf.bypass);
+
+            for(int ch=0; ch<num_y_channels; ch++) {
+                coherence_mu_params_t *coh_mu_state_ptr = &state.shared_state->coh_mu_state[ch];
+                aec_calc_coherence(&state, ch);
+                
+            }
+            for(int ch=0; ch<num_y_channels; ch++) {
+                coherence_mu_params_t *coh_mu_state_ptr = &state.shared_state->coh_mu_state[ch];
+
+                //printf("coh: %f, %f, (%d, %d)\n", coh_params_fp.coh[ch], ldexp(coh_mu_state_ptr->coh.mant, coh_mu_state_ptr->coh.exp), coh_mu_state_ptr->coh.mant, coh_mu_state_ptr->coh.exp);
+                int32_t dut_coh = coh_mu_state_ptr->coh.mant;
+                int32_t ref_coh = att_double_to_int32(coh_params_fp.coh[ch], coh_mu_state_ptr->coh.exp);
+                int diff = abs(ref_coh - dut_coh);
+                TEST_ASSERT_LESS_OR_EQUAL_UINT32_MESSAGE(1<<13, diff, "coh diff too large.");
+                if(diff > max_diff) max_diff = diff;
+
+                //printf("coh_slow: %f, %f, (%d, %d)\n", coh_params_fp.coh_slow[ch], ldexp(coh_mu_state_ptr->coh_slow.mant, coh_mu_state_ptr->coh_slow.exp), coh_mu_state_ptr->coh_slow.mant, coh_mu_state_ptr->coh_slow.exp);
+                int32_t dut_coh_slow = coh_mu_state_ptr->coh_slow.mant;
+                int32_t ref_coh_slow = att_double_to_int32(coh_params_fp.coh_slow[ch], coh_mu_state_ptr->coh_slow.exp);
+                diff = abs(ref_coh_slow - dut_coh_slow);
+                if(diff > max_diff) max_diff = diff;
+                TEST_ASSERT_LESS_OR_EQUAL_UINT32_MESSAGE(1<<13, diff, "coh slow diff too large.");
+            }
+            
+        }
+        printf("max_diff = %d\n",max_diff);
+    }
+}
diff --git a/test/lib_aec/aec_unit_tests/src/test_calc_corr_factor.xc b/test/lib_aec/aec_unit_tests/src/test_calc_corr_factor.xc
new file mode 100644
index 000000000..c7c427966
--- /dev/null
+++ b/test/lib_aec/aec_unit_tests/src/test_calc_corr_factor.xc
@@ -0,0 +1,94 @@
+#include <xs1.h>
+#include "aec_unit_tests.h"
+#include <stdio.h>
+#include <assert.h>
+extern "C"{
+    #include "aec_api.h"
+}
+
+#define TEST_MAIN_PHASES (10)
+#define TEST_NUM_Y (2)
+#define TEST_NUM_X (2)
+#define TEST_SHADOW_PHASES (0)
+#define NUM_BINS ((AEC_PROC_FRAME_LENGTH/2) + 1)
+
+double calc_corr_factor_fp(double *y_full, double *yhat_full) {
+    double *y = &y_full[AEC_FRAME_ADVANCE];
+    double *yhat = &yhat_full[AEC_FRAME_ADVANCE];
+    
+    double y_abs[AEC_FRAME_ADVANCE], yhat_abs[AEC_FRAME_ADVANCE];
+    for(int i=0; i<AEC_FRAME_ADVANCE; i++) {
+        y_abs[i] = (y[i] < 0.0) ? -y[i] : y[i];
+        yhat_abs[i] = (yhat[i] < 0.0) ? -yhat[i] : yhat[i];
+    }
+
+    double sigma_yyhat = 0.0;
+    double sigma_absy_absyhat = 0.0;
+    for(int i=0; i<AEC_FRAME_ADVANCE-32; i++) {
+        sigma_yyhat += (y[i] * yhat[i]);
+        sigma_absy_absyhat += (y_abs[i] * yhat_abs[i]);
+    }
+    if(sigma_yyhat < 0.0) {sigma_yyhat = -sigma_yyhat;}
+    if(sigma_absy_absyhat == 0.0) {return 0.0;}
+    double div = sigma_yyhat/sigma_absy_absyhat; 
+    return div;
+}
+
+void test_calc_corr_factor() {
+    unsafe {
+    unsigned num_y_channels = TEST_NUM_Y;
+    unsigned num_x_channels = TEST_NUM_X;
+    unsigned main_filter_phases = TEST_MAIN_PHASES;
+    unsigned shadow_filter_phases = TEST_SHADOW_PHASES;
+    
+    aec_state_t state;
+    aec_memory_pool_t [[aligned(8)]] aec_memory_pool;
+    aec_shared_state_t aec_shared_state;
+    double y_fp[TEST_NUM_Y][AEC_PROC_FRAME_LENGTH], y_hat_fp[TEST_NUM_Y][AEC_PROC_FRAME_LENGTH];
+    
+    aec_init(&state, NULL, &aec_shared_state, (uint8_t*)&aec_memory_pool, (uint8_t*)NULL, num_y_channels, num_x_channels, main_filter_phases, shadow_filter_phases);
+    
+    int32_t new_frame[TEST_NUM_Y+TEST_NUM_X][AEC_FRAME_ADVANCE];
+    unsigned seed = 10345;
+    int32_t max_diff = 0; 
+    for(int iter=0; iter<(1<<12)/F; iter++) {
+        //state.y_hat is initialised as part of Y_hat -> y_hat ifft. Do it here for standalone testing
+        for(int ch=0; ch<num_y_channels; ch++) {
+            bfp_s32_init(&state.y_hat[ch], (int32_t*)&state.Y_hat[ch].data[0], 0, AEC_PROC_FRAME_LENGTH, 0);
+        }
+
+        for(int ch=0; ch<num_y_channels; ch++) {
+            state.shared_state->y[ch].exp = sext(att_random_int32(seed), 6);
+            state.shared_state->y[ch].hr = att_random_uint32(seed) % 4;
+
+            state.y_hat[ch].exp = sext(att_random_int32(seed), 6);
+            state.y_hat[ch].hr = att_random_uint32(seed) % 4;
+
+            for(int i=0; i<AEC_PROC_FRAME_LENGTH; i++) {
+                state.shared_state->y[ch].data[i] = att_random_int32(seed) >> state.shared_state->y[ch].hr;
+                y_fp[ch][i] = att_int32_to_double(state.shared_state->y[ch].data[i], state.shared_state->y[ch].exp);
+
+                state.y_hat[ch].data[i] = att_random_int32(seed) >> state.y_hat[ch].hr;
+                y_hat_fp[ch][i] = att_int32_to_double(state.y_hat[ch].data[i], state.y_hat[ch].exp);
+            }
+        }
+
+        //since state.shared_state->y is being initialised with a new frame after calling aec_frame_init(), we need to update state->shared_state->prev_y again since that's where y[240:480] is read from in aec_calc_coherence()
+        for(int ch=0; ch<num_y_channels; ch++) {
+            memcpy(state.shared_state->prev_y[ch].data, &state.shared_state->y[ch].data[AEC_FRAME_ADVANCE], (AEC_PROC_FRAME_LENGTH-AEC_FRAME_ADVANCE)*sizeof(int32_t));
+            state.shared_state->prev_y[ch].exp = state.shared_state->y[ch].exp;
+            state.shared_state->prev_y[ch].hr = state.shared_state->y[ch].hr;
+        }
+
+        for(int ch=0; ch<num_y_channels; ch++) {
+            double ref_corr = calc_corr_factor_fp(y_fp[ch], y_hat_fp[ch]);
+            float_s32_t dut_corr = aec_calc_corr_factor(&state, ch); 
+            int32_t ref = att_double_to_int32(ref_corr, dut_corr.exp);
+            int32_t diff = abs(ref - dut_corr.mant);
+            TEST_ASSERT_LESS_OR_EQUAL_INT32_MESSAGE(1<<15, diff, "corr_factor diff too large.");
+            if(diff > max_diff) {max_diff = diff;}
+        } 
+    }
+    //printf("max_diff = %d\n",max_diff);
+    }
+}
diff --git a/test/lib_aec/aec_unit_tests/src/test_calc_fd_frame_energy.xc b/test/lib_aec/aec_unit_tests/src/test_calc_fd_frame_energy.xc
new file mode 100644
index 000000000..436be8a33
--- /dev/null
+++ b/test/lib_aec/aec_unit_tests/src/test_calc_fd_frame_energy.xc
@@ -0,0 +1,49 @@
+// Copyright 2018-2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <xs1.h>
+#include "aec_unit_tests.h"
+#include <stdio.h>
+#include <assert.h>
+extern "C"{
+    #include "aec_defines.h"
+    #include "aec_api.h"
+}
+
+void calc_fd_frame_energy_fp(double *output, dsp_complex_fp *input, int length) {
+    *output = 0.0;
+    for(int i=0; i<length; i++) {
+        *output += ((input[i].re * input[i].re) + (input[i].im * input[i].im));
+    }
+}
+
+#define TEST_LEN (AEC_PROC_FRAME_LENGTH/2 + 1)
+void test_calc_fd_frame_energy() {
+    unsafe {
+    complex_s32_t mem[TEST_LEN];
+    bfp_complex_s32_t dut_in;
+    bfp_complex_s32_init(&dut_in, mem, 0, TEST_LEN, 0);
+    dsp_complex_fp ref_in[TEST_LEN];
+    
+    unsigned seed = 34575;
+    for(int iter = 0; iter<(1<<12)/F; iter++) {
+        dut_in.exp = sext(att_random_int32(seed), 6);        
+        dut_in.hr = att_random_uint32(seed) % 4;
+        for(int i=0; i<TEST_LEN; i++) {
+            dut_in.data[i].re = att_random_int32(seed) >> dut_in.hr;
+            dut_in.data[i].im = att_random_int32(seed) >> dut_in.hr;
+
+            ref_in[i].re = att_int32_to_double(dut_in.data[i].re, dut_in.exp);
+            ref_in[i].im = att_int32_to_double(dut_in.data[i].im, dut_in.exp);
+        }
+        double ref_out;
+        calc_fd_frame_energy_fp(&ref_out, ref_in, TEST_LEN);
+        float_s32_t dut_out;
+        aec_calc_freq_domain_energy(&dut_out, &dut_in); //this only works for input size AEC_PROC_FRAME_LENGTH/2 + 1 since there is a static allocation of scratch memory of this size within the function
+
+        //printf("ref %f, dut %f\n",ref_out, att_int32_to_double(dut_out.mant, dut_out.exp));
+        int32_t ref = att_double_to_int32(ref_out, dut_out.exp);
+        int32_t dut = dut_out.mant;
+        TEST_ASSERT_INT32_WITHIN_MESSAGE(1<<2, ref, dut, "Output delta is too large");
+    }
+    }
+}
diff --git a/test/lib_aec/aec_unit_tests/src/test_calc_inv_X_energy.xc b/test/lib_aec/aec_unit_tests/src/test_calc_inv_X_energy.xc
new file mode 100644
index 000000000..260ed71ba
--- /dev/null
+++ b/test/lib_aec/aec_unit_tests/src/test_calc_inv_X_energy.xc
@@ -0,0 +1,203 @@
+// Copyright 2018-2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <xs1.h>
+#include "aec_unit_tests.h"
+#include <stdio.h>
+#include <assert.h>
+extern "C"{
+    #include "aec_defines.h"
+    #include "aec_api.h"
+}
+
+#define NUM_BINS ((AEC_PROC_FRAME_LENGTH/2) + 1)
+//right shift with symmetry around the first element (DC)
+static void shift_mirror_posi(double *output, const double *input, int shift, int length) {
+    //eg. [1, 2, 3, 4, 5] after shift of 2 becomes [3, 2, 1(DC), 2, 3]
+
+    //first copy the shifted part
+    for(int i=0; i<length-shift; i++) {
+        output[shift+i] = input[i]; //[-, -, 1, 2, 3]
+    }
+    //DC (1st element in the original array) is now at index 'shift'
+    int sym_index = shift;
+    //mirror around sym_index
+    for(int i=0; i<shift; i++) {
+        output[sym_index - 1 - i] = output[sym_index + 1 + i]; //[3, 2, -, -, -]
+    }
+}
+
+//left shift with symmetry around the last element (nyquist).
+static void shift_mirror_negi(double *output, const double *input, int shift, int length) {
+    shift = -shift;
+    //eg. [1, 2, 3, 4, 5] after shift of 2 becomes [3, 4, 5(nyquist), 4, 3]  
+    //first copy the shifted part => [3, 4, 5, -, -]
+    for(int i=0; i<length-shift; i++) {
+        output[i] = input[shift+i];
+    }
+    //The last element in the original array is now at index length-shift-1
+    //Symmetry exists around this element
+    int sym_index = length - shift - 1;
+    //now do symmetry around sym_index and populate the remaining length
+    //In our eg. this will populate [-, -, -, 4, 3]
+    for(int i=0; i<shift; i++) {
+        output[sym_index +1 + i] = output[sym_index -1 - i]; 
+    }
+}
+
+static void vect_smooth(double *output, double *scratch, const double *norm_denom, const double *taps, int input_length) {
+    for(int i=0; i<input_length; i++) {
+        output[i] = norm_denom[i]*taps[2];
+    }
+    shift_mirror_negi(scratch, norm_denom, -2, input_length);
+    for(int i=0; i<input_length; i++) {
+        output[i] += scratch[i]*taps[0];
+    }
+    shift_mirror_negi(scratch, norm_denom, -1, input_length);
+    for(int i=0; i<input_length; i++) {
+        output[i] += scratch[i]*taps[1];
+    }
+    shift_mirror_posi(scratch, norm_denom, 1, input_length);
+    for(int i=0; i<input_length; i++) {
+        output[i] += scratch[i]*taps[3];
+    }
+    shift_mirror_posi(scratch, norm_denom, 2, input_length);
+    for(int i=0; i<input_length; i++) {
+        output[i] += scratch[i]*taps[4];
+    }
+    
+    double sum = 0.0;
+    for(int i=0; i<5; i++) {
+        sum += taps[i];
+    }
+    for(int i=0; i<input_length; i++) {
+        output[i] = output[i]/sum;
+    }
+}
+
+void aec_calc_normalisation_spectrum_fp(double *inv_X_energy, double *X_energy, double *sigma_XX, double gamma_log2, double delta, int is_shadow) {
+    double norm_denom[NUM_BINS], scratch[NUM_BINS];
+    double gamma = pow(2.0, gamma_log2);
+    double taps[5] = {0.5, 1, 1, 1, 0.5};
+    if(!is_shadow) {
+        for(int i=0; i<NUM_BINS; i++) {
+            norm_denom[i] = sigma_XX[i]*gamma + X_energy[i];
+        }
+        vect_smooth(inv_X_energy, scratch, norm_denom, taps, NUM_BINS);
+        for(int i=0; i<NUM_BINS; i++) {
+            inv_X_energy[i] = inv_X_energy[i] + delta;
+        }
+    }
+    else {
+        for(int i=0; i<NUM_BINS; i++) {
+            inv_X_energy[i] = X_energy[i] + delta;
+        }
+    }
+    for(int i=0; i<NUM_BINS; i++) {
+        inv_X_energy[i] = 1.0 / inv_X_energy[i];
+    }
+}
+
+void test_aec_inv_X_energy_div_by_zero() {
+    unsafe {
+        unsigned num_y_channels = 1;
+        unsigned num_x_channels = 1;
+        unsigned main_filter_phases = 6;
+        unsigned shadow_filter_phases = 2;
+        
+        aec_state_t main_state, shadow_state;
+        aec_memory_pool_t aec_memory_pool;
+        aec_shadow_filt_memory_pool_t aec_shadow_memory_pool;
+        aec_shared_state_t aec_shared_state;
+
+        aec_init(&main_state, &shadow_state, &aec_shared_state, (uint8_t*)&aec_memory_pool, (uint8_t*)&aec_shadow_memory_pool, num_y_channels, num_x_channels, main_filter_phases, shadow_filter_phases);
+        unsigned seed = 3507;
+
+        aec_state_t *state_ptr = &shadow_state;
+        for(int ch=0; ch<num_x_channels; ch++) {
+            state_ptr->X_energy[ch].exp = sext(att_random_int32(seed), 6);
+            state_ptr->X_energy[ch].hr = 0;
+            for(int i=0; i<NUM_BINS; i++) {
+                unsigned is_zero = att_random_uint32(seed) % 2;
+                if(is_zero) {
+                    state_ptr->X_energy[ch].data[i] = 0;
+                }
+                else {
+                    state_ptr->X_energy[ch].data[i] = INT_MAX;
+                }
+            }
+        }
+        state_ptr->delta.mant = 0;
+        state_ptr->delta.exp = -1024;
+        unsigned is_shadow = 1; // to just test inv_X_energy = 1/(X_energy + delta) 
+        for(int ch=0; ch<num_x_channels; ch++) {
+            aec_calc_normalisation_spectrum(state_ptr, ch, 1);
+        }
+    }
+}
+void test_aec_calc_normalisation_spectrum() {
+    unsafe {
+        unsigned num_y_channels = 1;
+        unsigned num_x_channels = 2;
+        unsigned main_filter_phases = 6;
+        unsigned shadow_filter_phases = 2;
+        
+        aec_state_t main_state, shadow_state;
+        aec_memory_pool_t aec_memory_pool;
+        aec_shadow_filt_memory_pool_t aec_shadow_memory_pool;
+        aec_shared_state_t aec_shared_state;
+
+        aec_init(&main_state, &shadow_state, &aec_shared_state, (uint8_t*)&aec_memory_pool, (uint8_t*)&aec_shadow_memory_pool, num_y_channels, num_x_channels, main_filter_phases, shadow_filter_phases);
+        //declare floating point memory
+        double X_energy_fp[AEC_MAX_X_CHANNELS][NUM_BINS], sigma_XX_fp[AEC_MAX_X_CHANNELS][NUM_BINS];
+        double inv_X_energy_fp[AEC_MAX_X_CHANNELS][NUM_BINS];
+
+        unsigned seed = 46894;
+        
+        aec_state_t *state_ptr;
+        unsigned max_diff = 0;
+        for(int iter=0; iter<(1<<12)/F; iter++) {
+            unsigned is_shadow = att_random_uint32(seed) % 2;
+            if(is_shadow) {
+                state_ptr = &shadow_state;
+            }
+            else {
+                state_ptr = &main_state;
+            }
+            for(int ch=0; ch<num_x_channels; ch++) {
+                state_ptr->X_energy[ch].exp = sext(att_random_int32(seed), 6);
+                state_ptr->X_energy[ch].hr = att_random_uint32(seed) % 4;
+
+                state_ptr->shared_state->sigma_XX[ch].exp = sext(att_random_int32(seed), 6);
+                state_ptr->shared_state->sigma_XX[ch].hr = att_random_uint32(seed) % 4;
+                for(int i=0; i<NUM_BINS; i++) {
+                    state_ptr->X_energy[ch].data[i] = (att_random_int32(seed) & 0x7fffffff) >> state_ptr->X_energy[ch].hr; //energy is positive
+                    X_energy_fp[ch][i] = att_int32_to_double(state_ptr->X_energy[ch].data[i], state_ptr->X_energy[ch].exp);
+
+                    state_ptr->shared_state->sigma_XX[ch].data[i] = (att_random_int32(seed) & 0x7fffffff) >> state_ptr->shared_state->sigma_XX[ch].hr; //sigma_XX is positive
+                    sigma_XX_fp[ch][i] = att_int32_to_double(state_ptr->shared_state->sigma_XX[ch].data[i], state_ptr->shared_state->sigma_XX[ch].exp);                    
+                }
+                state_ptr->delta.exp = -32 - (att_random_uint32(seed) & 63);
+                state_ptr->delta.mant = att_random_int32(seed) & 0x7fffffff;
+                if(state_ptr->delta.mant == 0) {
+                    state_ptr->delta = state_ptr->shared_state->config_params.aec_core_conf.delta_min;
+                }
+
+                double delta_fp = att_int32_to_double(state_ptr->delta.mant, state_ptr->delta.exp);
+                for(int ch=0; ch<num_x_channels; ch++) {
+                    aec_calc_normalisation_spectrum_fp(inv_X_energy_fp[ch], X_energy_fp[ch], sigma_XX_fp[ch], 6, delta_fp, is_shadow);
+                }
+                for(int ch=0; ch<num_x_channels; ch++) {
+                    aec_calc_normalisation_spectrum(state_ptr, ch, is_shadow);
+                }
+            }
+            for(int ch=0; ch<num_x_channels; ch++) {
+                unsigned diff = att_bfp_vector_int32((int32_t*)&state_ptr->inv_X_energy[ch].data[0], state_ptr->inv_X_energy[ch].exp, (double*)inv_X_energy_fp[ch], 0, NUM_BINS);
+                TEST_ASSERT_LESS_OR_EQUAL_UINT32_MESSAGE(1<<16, diff, "inv_X_energy diff too large.");
+                
+                if(diff > max_diff) max_diff = diff;
+                //printf("iter %d diff %d\n",iter, diff);
+            }
+        }
+        printf("max_diff %d\n", max_diff);
+    }
+}
diff --git a/test/lib_aec/aec_unit_tests/src/test_calc_max_ref_energy.xc b/test/lib_aec/aec_unit_tests/src/test_calc_max_ref_energy.xc
new file mode 100644
index 000000000..03b24d167
--- /dev/null
+++ b/test/lib_aec/aec_unit_tests/src/test_calc_max_ref_energy.xc
@@ -0,0 +1,57 @@
+// Copyright 2018-2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <xs1.h>
+#include "aec_unit_tests.h"
+#include <stdio.h>
+#include <assert.h>
+extern "C"{
+    #include "aec_api.h"
+    float_s32_t aec_calc_max_ref_energy_c_wrapper(int32_t (*input)[AEC_FRAME_ADVANCE], int channels);
+}
+
+double calc_max_ref_energy_fp(double (*input)[AEC_FRAME_ADVANCE], int channels) {
+    double max=0.0;
+    for(int ch=0; ch<channels; ch++) {
+        double current = 0.0;
+        for(int i=0; i<AEC_FRAME_ADVANCE; i++) {
+            current += input[ch][i]*input[ch][i];
+        }
+        if(current > max) {max = current;}
+    }
+    return max;
+}
+#define CHANNELS (4)
+void test_calc_max_ref_energy() {
+    unsafe {
+    int32_t [[aligned(8)]] dut[CHANNELS][AEC_FRAME_ADVANCE];
+    float_s32_t dut_max;
+    double ref[CHANNELS][AEC_FRAME_ADVANCE];
+    double ref_max;
+    
+    unsigned seed = 568762;
+    int max_diff = 0;
+    for(int iter=0; iter<(1<<12)/F; iter++) {
+        //input
+        for(int ch=0; ch<CHANNELS; ch++) {
+            int hr = att_random_uint32(seed) % 12;
+            for(int i=0; i<AEC_FRAME_ADVANCE; i++) {
+                dut[ch][i] = att_random_int32(seed) >> hr;
+                ref[ch][i] = att_int32_to_double(dut[ch][i], -31);
+            }
+        }
+        ref_max = calc_max_ref_energy_fp(ref, CHANNELS);
+        // xc wouldn't allow passing as pointer to const data so added wrapper c file as quick workaround. This will all be
+        // cleaned up once unit tests are ported to c. 
+        dut_max = aec_calc_max_ref_energy_c_wrapper(dut, CHANNELS); 
+
+        int dut = dut_max.mant;
+        int ref = att_double_to_int32(ref_max, dut_max.exp);
+        //printf("ref 0x%x, dut 0x%x\n", ref, dut);
+        int32_t diff = ref - dut;
+        if(diff < 0) diff = -diff;
+        if(diff > max_diff) max_diff = diff;
+        TEST_ASSERT_INT32_WITHIN_MESSAGE(1<<5, ref, dut, "Output delta is too large");
+    }
+    //printf("max_diff = %d\n",max_diff);
+    }
+}
diff --git a/test/lib_aec/aec_unit_tests/src/test_compare_filters_and_calc_mu.xc b/test/lib_aec/aec_unit_tests/src/test_compare_filters_and_calc_mu.xc
new file mode 100644
index 000000000..33e6def55
--- /dev/null
+++ b/test/lib_aec/aec_unit_tests/src/test_compare_filters_and_calc_mu.xc
@@ -0,0 +1,862 @@
+// Copyright 2018-2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <xs1.h>
+#include "aec_unit_tests.h"
+#include <stdio.h>
+#include <assert.h>
+extern "C"{
+    #include "aec_defines.h"
+    #include "aec_api.h"
+}
+
+#define TEST_NUM_Y (2)
+#define TEST_NUM_X (2)
+#define TEST_MAIN_PHASES (3)
+#define TEST_SHADOW_PHASES (1)
+#define NUM_BINS ((AEC_PROC_FRAME_LENGTH/2) + 1)
+
+typedef struct {
+    double coh_alpha;
+    double coh_slow_alpha;
+    double coh_thresh_slow;
+    double coh_thresh_abs;
+    double mu_scalar;
+    double eps;
+    double thresh_minus20dB;
+    double x_energy_thresh;
+
+    unsigned mu_coh_time;
+    unsigned mu_shad_time;
+    aec_adaption_e adaption_config;
+    double force_adaption_mu;
+}coherence_mu_config_fp_t;
+
+typedef struct {
+    double shadow_sigma_thresh;
+    double shadow_copy_thresh;
+    double shadow_reset_thresh;
+    double shadow_delay_thresh;
+    double x_energy_thresh;
+    double shadow_mu;
+
+    int shadow_better_thresh;
+    int shadow_zero_thresh;
+    int shadow_reset_timer;
+}shadow_filt_config_fp_t;
+
+
+typedef struct {
+    //shadow filter
+    int shadow_flag[AEC_MAX_Y_CHANNELS];
+    int shadow_reset_count[AEC_MAX_Y_CHANNELS];
+    int shadow_better_count[AEC_MAX_Y_CHANNELS];
+
+    //coherence mu
+    double coh[AEC_MAX_Y_CHANNELS];
+    double coh_slow[AEC_MAX_Y_CHANNELS];
+    int mu_coh_count[AEC_MAX_Y_CHANNELS];
+    int mu_shad_count[AEC_MAX_Y_CHANNELS];
+    double coh_mu[AEC_MAX_Y_CHANNELS][AEC_MAX_X_CHANNELS];
+
+    //common
+    dsp_complex_fp H_hat[TEST_NUM_Y][TEST_NUM_X][TEST_MAIN_PHASES][NUM_BINS];
+    dsp_complex_fp H_hat_shadow[TEST_NUM_Y][TEST_NUM_X][TEST_SHADOW_PHASES][NUM_BINS];
+    dsp_complex_fp Y[TEST_NUM_Y][NUM_BINS];
+    dsp_complex_fp Error[TEST_NUM_Y][NUM_BINS];
+    dsp_complex_fp Error_shadow[TEST_NUM_Y][NUM_BINS];
+    double sigma_XX[TEST_NUM_X][NUM_BINS];
+    double main_filt_mu[AEC_MAX_Y_CHANNELS][AEC_MAX_X_CHANNELS];
+    double shadow_filt_mu[AEC_MAX_Y_CHANNELS][AEC_MAX_X_CHANNELS];
+    double overall_Error[AEC_MAX_Y_CHANNELS];
+    double overall_Error_shadow[AEC_MAX_Y_CHANNELS];
+    double overall_Y[AEC_MAX_Y_CHANNELS];
+    double sum_X_energy[AEC_MAX_X_CHANNELS];
+    double max_X_energy_main[AEC_MAX_X_CHANNELS];
+    double max_X_energy_shadow[AEC_MAX_X_CHANNELS];
+    double delta_min;
+    double delta_adaption_force_on;
+    double delta_scale_main;
+    double delta_scale_shadow;
+    double delta_main;
+    double delta_shadow;
+    int main_phases;
+    int shadow_phases;
+    int y_channels;
+    int x_channels;
+}shadow_filt_params_fp_t;
+
+static void init_shadow_config_fp(shadow_filt_config_fp_t *cfg) {
+    cfg->shadow_sigma_thresh = 0.6;
+    cfg->shadow_copy_thresh = 0.5;
+    cfg->shadow_reset_thresh = 1.5;
+    cfg->shadow_delay_thresh = 0.5;
+    cfg->x_energy_thresh = pow(10, -40/10);
+    cfg->shadow_mu = 1.0;
+    cfg->shadow_better_thresh = 5;
+    cfg->shadow_zero_thresh = 5;
+    cfg->shadow_reset_timer = 20;
+}
+
+static void init_coherence_mu_config_fp(coherence_mu_config_fp_t *cfg) {
+    //config
+    cfg->coh_alpha = 0.0;
+    cfg->coh_slow_alpha = 0.99;
+    cfg->coh_thresh_slow = 0.9;
+    cfg->coh_thresh_abs = 0.65;
+    cfg->mu_scalar = 1.0;
+    cfg->eps = (double)1e-100;
+    
+    cfg->thresh_minus20dB = pow(10, -20/10);
+    cfg->x_energy_thresh = pow(10, -40/10);
+    cfg->mu_coh_time = 2;
+    cfg->mu_shad_time = 5;
+    cfg->adaption_config = AEC_ADAPTION_AUTO;
+    cfg->force_adaption_mu = 1.0;
+}
+
+static void init_params_fp(
+        shadow_filt_params_fp_t *params,
+        const shadow_filt_config_fp_t *shadow_cfg,
+        const coherence_mu_config_fp_t *coh_mu_cfg,
+        unsigned num_y_channels,
+        unsigned num_x_channels,
+        unsigned main_filter_phases,
+        unsigned shadow_filter_phases)
+{
+    params->main_phases = main_filter_phases;
+    params->shadow_phases = shadow_filter_phases;
+    params->y_channels = num_y_channels;
+    params->x_channels = num_x_channels;
+    params->delta_min = (double)1e-20;
+    params->delta_adaption_force_on = ldexp(UINT_MAX, -32-6);
+    params->delta_scale_main = (double)1e-5;
+    params->delta_scale_shadow = (double)1e-3;
+
+    for(int ch=0; ch<params->y_channels; ch++) {
+        params->shadow_flag[ch] = EQUAL;
+        params->shadow_reset_count[ch] = -shadow_cfg->shadow_reset_timer;
+        params->shadow_better_count[ch] = 0;
+
+        params->coh[ch] = 1.0;
+        params->coh_slow[ch] = 0.0;
+        params->mu_coh_count[ch] = 0;
+        params->mu_shad_count[ch] = 0;
+    }
+}
+
+void reset_shadow_filter_fp(shadow_filt_params_fp_t *params, int y_ch) {
+    for(int ch=0; ch<params->x_channels; ch++) {
+        for(int ph=0; ph<params->shadow_phases; ph++) {
+            for(int bin=0; bin<NUM_BINS; bin++) {
+                params->H_hat_shadow[y_ch][ch][ph][bin].re = 0.0;
+                params->H_hat_shadow[y_ch][ch][ph][bin].im = 0.0;
+            }
+        }
+    }
+}
+
+void reset_main_filter_fp(shadow_filt_params_fp_t *params, int y_ch) {
+    printf("reset_main_filter_fp. ych %d\n",y_ch);
+    for(int ch=0; ch<params->x_channels; ch++) {
+        for(int ph=0; ph<params->main_phases; ph++) {
+            for(int bin=0; bin<NUM_BINS; bin++) {
+                params->H_hat[y_ch][ch][ph][bin].re = 0.0;
+                params->H_hat[y_ch][ch][ph][bin].im = 0.0;
+            }
+        }
+    }
+}
+
+void shadow_to_main_filter_copy_fp(shadow_filt_params_fp_t *params, int y_ch) {
+    for(int ch=0; ch<params->x_channels; ch++) {
+        for(int ph=0; ph<params->shadow_phases; ph++) {
+            for(int bin=0; bin<NUM_BINS; bin++) {
+                params->H_hat[y_ch][ch][ph][bin].re = params->H_hat_shadow[y_ch][ch][ph][bin].re;
+                params->H_hat[y_ch][ch][ph][bin].im = params->H_hat_shadow[y_ch][ch][ph][bin].im;
+            }
+        }
+        for(int ph=params->shadow_phases; ph<params->main_phases; ph++) {
+            for(int bin=0; bin<NUM_BINS; bin++) {
+                params->H_hat[y_ch][ch][ph][bin].re = 0.0;
+                params->H_hat[y_ch][ch][ph][bin].im = 0.0;
+            }
+        }
+    }
+}
+
+void main_to_shadow_filter_copy_fp(shadow_filt_params_fp_t *params, int y_ch) {
+    for(int ch=0; ch<params->x_channels; ch++) {
+        for(int ph=0; ph<params->shadow_phases; ph++) {
+            for(int bin=0; bin<NUM_BINS; bin++) {
+                params->H_hat_shadow[y_ch][ch][ph][bin].re = params->H_hat[y_ch][ch][ph][bin].re;
+                params->H_hat_shadow[y_ch][ch][ph][bin].im = params->H_hat[y_ch][ch][ph][bin].im;
+            }
+        }
+    }
+}
+
+#define NUM_MU_CHECKPOINTS (15)
+int checkpoints_mu[NUM_MU_CHECKPOINTS] = {0};
+void calc_coherence_mu_fp(
+        shadow_filt_params_fp_t *params,
+        const coherence_mu_config_fp_t *cfg)
+{
+    double *sum_X_energy = params->sum_X_energy;
+    //# If the coherence has been low within the last 15 frames, keep the count != 0
+    for(int ch=0; ch<params->y_channels; ch++) {
+        if(params->mu_coh_count[ch] > 0) {
+            checkpoints_mu[0] |= 1;
+            params->mu_coh_count[ch] += 1;
+        }
+        if(params->mu_coh_count[ch] > cfg->mu_coh_time) {
+            checkpoints_mu[1] |= 1;
+            params->mu_coh_count[ch] = 0;
+        }
+    }
+    //# If the shadow filter has be en used within the last 15 frames, keep the count != 0
+    for(int ch=0; ch<params->y_channels; ch++) {
+        if(params->shadow_flag[ch] == COPY) {
+            checkpoints_mu[2] |= 1;
+            params->mu_shad_count[ch] = 1;
+        }
+        else if(params->mu_shad_count[ch] > 0) {
+            checkpoints_mu[3] |= 1;
+            params->mu_shad_count[ch] += 1;
+        }
+        if(params->mu_shad_count[ch] > cfg->mu_shad_time) {
+            checkpoints_mu[4] |= 1;
+            params->mu_shad_count[ch] = 0;
+        }
+    }
+    //# threshold for coherence between y and y_hat
+    double min_coh_slow = params->coh_slow[0];
+    for(int ch=1; ch<params->y_channels; ch++) {
+        if(params->coh_slow[ch] < min_coh_slow) min_coh_slow = params->coh_slow[ch];
+    }
+    double CC_thres = min_coh_slow * cfg->coh_thresh_slow;
+    for(int ch=0; ch<params->y_channels; ch++) {
+        if(params->shadow_flag[ch] >= SIGMA) {
+            checkpoints_mu[5] |= 1;
+            //# if the shadow filter has triggered, override any drop in coherence
+            params->mu_coh_count[ch] = 0;
+        }
+        else {
+            //# otherwise if the coherence is low start the count
+            if(params->coh[ch] < cfg->coh_thresh_abs) {
+                checkpoints_mu[6] |= 1;
+                params->mu_coh_count[ch] = 1;
+            }
+        }
+    }
+    if(cfg->adaption_config == AEC_ADAPTION_AUTO) {
+        //# Order of priority for mu:
+        //# 1) if the reference energy is low, don't converge (not enough SNR to be accurate)
+        //# 2) if shadow filter has triggered recently, converge fast
+        //# 3) if coherence has dropped recently, don't converge
+        //# 4) otherwise, converge fast.
+        for(int ch=0; ch<params->y_channels; ch++) {
+            if(params->mu_shad_count[ch] >= 1) {
+                //printf("here 1\n");
+                checkpoints_mu[7] |= 1;
+                for(int xch=0; xch<params->x_channels; xch++) {
+                    params->coh_mu[ch][xch] = 1.0;
+                }
+            }
+            else if(params->mu_coh_count[ch] > 0) {
+                //printf("here 2\n");
+                checkpoints_mu[8] |= 1;
+                for(int xch=0; xch<params->x_channels; xch++) {
+                    params->coh_mu[ch][xch] = 0.0;
+                }
+            }
+            else { //# if yy_hat coherence denotes absence of near-end/noise
+                //printf("here 3\n");
+                //printf("coh %f, coh_slow %f, CC_thres %f\n",params->coh[ch], params->coh_slow[ch], CC_thres);
+                if(params->coh[ch] > params->coh_slow[ch]) {
+                    checkpoints_mu[9] |= 1;
+                    for(int xch=0; xch<params->x_channels; xch++) {
+                        params->coh_mu[ch][xch] = 1.0;
+                    }
+                }
+                else if(params->coh[ch] > CC_thres) {
+                    checkpoints_mu[10] |= 1;
+                    //# scale mu depending on how far above the threshold it is
+                    //self.mu[y_ch] = ((self.coh[y_ch]-CC_thres)/(self.coh_slow[y_ch]-CC_thres))**2
+                    double t = (params->coh[ch] - CC_thres) / (params->coh_slow[ch] - CC_thres);
+                    t = t * t;
+                    for(int xch=0; xch<params->x_channels; xch++) {
+                        params->coh_mu[ch][xch] = t;
+                    }
+                }
+                else { //# shouldn't go through here, but if it does coherence is low so don't adapt
+                    checkpoints_mu[11] |= 1;
+                    for(int xch=0; xch<params->x_channels; xch++) {
+                        params->coh_mu[ch][xch] = 0.0;
+                    }
+                }
+            }
+        }
+        double sum_X_energy_max = sum_X_energy[0]; 
+        for(int xch=1; xch<params->x_channels; xch++) {
+            if(sum_X_energy_max < sum_X_energy[xch]) sum_X_energy_max = sum_X_energy[xch];
+        }
+        for(int xch=0; xch<params->x_channels; xch++) {
+            //if ref_energy_log[x_ch] <= ref_energy_thresh or ref_energy_log[x_ch] < np.max(ref_energy_log)-20: 
+            if((sum_X_energy[xch] <= cfg->x_energy_thresh) || (sum_X_energy[xch] < (sum_X_energy_max * cfg->thresh_minus20dB))) {
+                checkpoints_mu[12] |= 1;
+                for(int ych=0; ych<params->y_channels; ych++) {
+                    params->coh_mu[ych][xch] = 0.0;
+                }
+            }
+        }
+        for(int ych=0; ych<params->y_channels; ych++) {
+            for(int xch=0; xch<params->x_channels; xch++) {
+                params->coh_mu[ych][xch] = params->coh_mu[ych][xch] * cfg->mu_scalar;
+            }
+        }
+    }
+    if(cfg->adaption_config == AEC_ADAPTION_FORCE_ON) {
+        checkpoints_mu[13] |= 1;
+        for(int ych=0; ych<params->y_channels; ych++) {
+            for(int xch=0; xch<params->x_channels; xch++) {
+                params->coh_mu[ych][xch] = cfg->force_adaption_mu;
+            }
+        }
+    }
+    else if(cfg->adaption_config == AEC_ADAPTION_FORCE_OFF) {
+        checkpoints_mu[14] |= 1;
+        for(int ych=0; ych<params->y_channels; ych++) {
+            for(int xch=0; xch<params->x_channels; xch++) {
+                params->coh_mu[ych][xch] = 0.0;
+            }
+        }
+    }
+}
+
+#define NUM_SHADOW_CHECKPOINTS (9)
+int checkpoints[NUM_SHADOW_CHECKPOINTS] = {0};
+void compare_filter_fp(
+        shadow_filt_params_fp_t *params,        
+        const shadow_filt_config_fp_t *cfg
+        )
+{
+    double *overall_Error = params->overall_Error;
+    double *overall_Error_shadow = params->overall_Error_shadow;
+    double *overall_Input = params->overall_Y;
+    double *sum_X_energy = params->sum_X_energy;
+
+    //# check if shadow or reference filter will be used and flag accordingly
+    int ref_low_all = 1;
+    for(int i=0; i<params->x_channels; i++) {
+        if(sum_X_energy[i] >= cfg->x_energy_thresh) {
+            ref_low_all = 0;
+            break;
+        }
+    }
+    for(int ch=0; ch<params->y_channels; ch++) {
+        overall_Input[ch] = overall_Input[ch] / 2;
+        if(ref_low_all) {
+            //printf("checkpoint 0\n");
+            checkpoints[0] |= 1;
+            params->shadow_flag[ch] = LOW_REF;
+            continue;
+        }
+        //# if error way bigger than input, reset- should percolate through to main filter if better
+        if((overall_Error_shadow[ch] > overall_Input[ch]) && (params->shadow_reset_count[ch] >= 0)) {
+            //printf("REF checkpoint 1. ych %d\n", ch);
+            checkpoints[1] |= 1;
+            params->shadow_flag[ch] = ERROR;
+            reset_shadow_filter_fp(params, ch);
+            for(int i=0; i<NUM_BINS; i++) {
+                params->Error_shadow[ch][i].re = params->Y[ch][i].re;
+                params->Error_shadow[ch][i].im = params->Y[ch][i].im;
+            }
+            overall_Error_shadow[ch] = overall_Input[ch]; 
+            //# give the zeroed filter time to reconverge (or redeconverge)
+            params->shadow_reset_count[ch] = -cfg->shadow_reset_timer;             
+        }
+        if((overall_Error_shadow[ch] <= (cfg->shadow_copy_thresh * overall_Error[ch])) &&
+            (params->shadow_better_count[ch] > cfg->shadow_better_thresh)) {
+            checkpoints[2] |= 1;
+            //printf("checkpoint 2\n");
+            //# if shadow filter is much better, and has been for several frames,
+            //# copy to reference filter                            
+            params->shadow_flag[ch] = COPY;
+            params->shadow_reset_count[ch] = 0;
+            params->shadow_better_count[ch] += 1;
+            for(int i=0; i<NUM_BINS; i++) {
+                params->Error[ch][i].re = params->Error_shadow[ch][i].re;
+                params->Error[ch][i].im = params->Error_shadow[ch][i].im;
+            }
+            shadow_to_main_filter_copy_fp(params, ch);
+        }
+        else if(overall_Error_shadow[ch] <= cfg->shadow_sigma_thresh*overall_Error[ch]) {
+            params->shadow_better_count[ch] += 1;
+            if(params->shadow_better_count[ch] > cfg->shadow_better_thresh) {
+                //# if shadow is somewhat better, reset sigma_xx if both channels are better
+                //printf("checkpoint 3\n");
+                checkpoints[3] |= 1;
+                params->shadow_flag[ch] = SIGMA;
+            }
+            else {
+                //printf("checkpoint 4\n");
+                checkpoints[4] |= 1;
+                params->shadow_flag[ch] = EQUAL;
+            }
+        }
+        else if((overall_Error_shadow[ch] >= cfg->shadow_reset_thresh * overall_Error[ch]) && 
+            (params->shadow_reset_count[ch] >= 0)) {
+            //# if shadow filter is worse than reference, reset provided that
+            //# the delay is small and we're not letting the shadow filter reconverge after zeroing
+            params->shadow_reset_count[ch] += 1;
+            params->shadow_better_count[ch] = 0;
+            if(params->shadow_reset_count[ch] > cfg->shadow_zero_thresh) {
+                //printf("checkpoint 5. ych %d\n", ch);
+                checkpoints[5] |= 1;
+                //# if shadow filter has been reset several times in a row, reset to zeros                
+                params->shadow_flag[ch] = ZERO;
+                reset_shadow_filter_fp(params, ch);
+                for(int i=0; i<NUM_BINS; i++) {
+                    params->Error_shadow[ch][i].re = params->Y[ch][i].re;
+                    params->Error_shadow[ch][i].im = params->Y[ch][i].im;
+                }
+                //# give the zeroed filter time to reconverge
+                params->shadow_reset_count[ch] = -cfg->shadow_reset_timer;                
+            }
+            else {
+                //printf("checkpoint 6\n");
+                checkpoints[6] |= 1;
+                //# otherwise copy the main filter to the shadow filter
+                main_to_shadow_filter_copy_fp(params, ch);
+                for(int i=0; i<NUM_BINS; i++) {
+                    params->Error_shadow[ch][i].re = params->Error[ch][i].re;                    
+                    params->Error_shadow[ch][i].im = params->Error[ch][i].im;                    
+                }
+                params->shadow_flag[ch] = RESET;
+            }
+        }
+        else {
+            //printf("checkpoint 7\n");
+            checkpoints[7] |= 1;
+            //# shadow filter is comparable to main filter, 
+            //# or we're waiting for it to reconverge after zeroing
+            params->shadow_better_count[ch] = 0;
+            params->shadow_flag[ch] = EQUAL;
+            if(params->shadow_reset_count[ch] < 0) {
+                params->shadow_reset_count[ch] += 1;
+            }            
+        }
+    }
+    //# reset sigma_xx if both mics shadow filtered
+    int both_shadow_filtered = 1;
+    for(int ch=0; ch<params->y_channels; ch++) {
+        if(params->shadow_flag[ch] <= EQUAL) {
+            both_shadow_filtered = 0;
+            break;
+        }
+    }
+    if(both_shadow_filtered) {
+        //printf("checkpoint 8\n");
+        checkpoints[8] |= 1;
+        for(int ch=0; ch<params->x_channels; ch++) {
+            for(int i=0; i<NUM_BINS; i++) {
+                params->sigma_XX[ch][i] = 0.0;
+            }
+        }
+    }
+}
+
+void calc_delta_fp(shadow_filt_params_fp_t *params, const coherence_mu_config_fp_t *coh_mu_cfg) {
+    if(coh_mu_cfg->adaption_config == AEC_ADAPTION_AUTO) {
+        //main filter delta
+        double max_energy = params->max_X_energy_main[0];
+        for(int ch=1; ch<params->x_channels; ch++) {
+            max_energy = (params->max_X_energy_main[ch] > max_energy) ? params->max_X_energy_main[ch] : max_energy;
+        }
+        max_energy = max_energy * params->delta_scale_main;
+        //params->delta_main = (max_energy > params->delta_min) ? max_energy : params->delta_min; 
+        if(max_energy > params->delta_min) {
+            params->delta_main = max_energy;
+        }
+        else {
+            params->delta_main = params->delta_min;
+        }
+
+        //shadow
+        max_energy = params->max_X_energy_shadow[0];
+        for(int ch=1; ch<params->x_channels; ch++) {
+            max_energy = (params->max_X_energy_shadow[ch] > max_energy) ? params->max_X_energy_shadow[ch] : max_energy;
+        }
+        max_energy = max_energy * params->delta_scale_shadow;
+        //params->delta_shadow = (max_energy > params->delta_min) ? max_energy : params->delta_min;
+        if(max_energy > params->delta_min) {
+            params->delta_shadow = max_energy;
+        }
+        else {
+            params->delta_shadow = params->delta_min;
+        }
+    }
+    else {
+        params->delta_main = params->delta_adaption_force_on;
+        params->delta_shadow = params->delta_adaption_force_on; 
+    }
+}
+
+void compare_filters_and_calc_mu_fp(
+        shadow_filt_params_fp_t *params,
+        const shadow_filt_config_fp_t *shadow_cfg,
+        const coherence_mu_config_fp_t *coh_mu_cfg,
+        int bypass)
+{
+    if(bypass) {return;}
+    compare_filter_fp(
+            params,
+            shadow_cfg);
+    
+    //TODO check if all paths executed
+    calc_delta_fp(params, coh_mu_cfg);
+
+    calc_coherence_mu_fp(
+            params,
+            coh_mu_cfg);
+
+    //update main and shadow filter mu
+    for(int ych=0; ych<params->y_channels; ych++) {
+        for(int xch=0; xch<params->x_channels; xch++) {
+            params->main_filt_mu[ych][xch] = params->coh_mu[ych][xch];
+            params->shadow_filt_mu[ych][xch] = shadow_cfg->shadow_mu;
+        }
+    }
+}
+
+
+void test_compare_filters_and_calc_mu() {
+
+    //What is needed for compare filters
+    //Input
+    //Y
+    //Ov_Error
+    //Ov_Error_shadow,
+    //Ov_Input
+    //ref_energy_thresh
+    //ref_energy_log
+    //adaption_config
+    //force_adaption_mu
+
+    //Output
+    //Error
+    //Error_shadow
+    //H_hat
+    //H_hat_shadow
+    unsafe {
+        aec_state_t main_state, shadow_state;
+        aec_shared_state_t shared_state;
+        aec_memory_pool_t main_memory_pool;
+        aec_shadow_filt_memory_pool_t shadow_memory_pool;
+        
+        unsigned num_y_channels = TEST_NUM_Y;
+        unsigned num_x_channels = TEST_NUM_X;
+        unsigned main_filter_phases = TEST_MAIN_PHASES;
+        unsigned shadow_filter_phases = TEST_SHADOW_PHASES;
+
+        //floating point arrays
+        shadow_filt_config_fp_t shadow_filt_cfg_fp;
+        coherence_mu_config_fp_t coh_mu_cfg_fp;
+        shadow_filt_params_fp_t shadow_filt_coh_mu_params_fp; 
+
+        //Initialise fixed point
+        aec_init(&main_state, &shadow_state, &shared_state, (uint8_t*)&main_memory_pool, (uint8_t*)&shadow_memory_pool, num_y_channels, num_x_channels, main_filter_phases, shadow_filter_phases);
+        //Initialise floating point
+        init_shadow_config_fp(&shadow_filt_cfg_fp);
+        init_coherence_mu_config_fp(&coh_mu_cfg_fp);
+        init_params_fp(
+                &shadow_filt_coh_mu_params_fp,
+                &shadow_filt_cfg_fp,
+                &coh_mu_cfg_fp,
+                num_y_channels,
+                num_x_channels,
+                main_filter_phases,
+                shadow_filter_phases);
+        
+        unsigned seed = 35788;
+        int32_t new_frame[TEST_NUM_Y + TEST_NUM_X][AEC_FRAME_ADVANCE];
+        unsigned max_diff_coh_mu = 0; 
+        for(int iter=0; iter<(1<<11)/F; iter++) {
+            //every 200 frames set bypass
+            main_state.shared_state->config_params.aec_core_conf.bypass = 0;
+            if((iter > 0) && !(iter % 200)) {
+                main_state.shared_state->config_params.aec_core_conf.bypass = att_random_uint32(seed) % 2;
+            }
+            //printf("iter %d\n",iter);
+            aec_frame_init(&main_state, &shadow_state, &new_frame[0], &new_frame[TEST_NUM_Y]);
+            for(int ch=0; ch<num_y_channels; ch++) {
+                //state_ptr->shared_state->Y is initialised in the y->Y fft aec_fft() call with state_ptr->shared_state->y as input. Initialising here for
+                //standalone testing.
+                bfp_complex_s32_init(&main_state.shared_state->Y[ch], (complex_s32_t*)&main_state.shared_state->y[ch].data[0], 0, NUM_BINS, 0);
+            }
+
+            shadow_filt_params_fp_t *params_fp = &shadow_filt_coh_mu_params_fp;
+            //generate input
+
+            for(int ych=0; ych<num_y_channels; ych++) {
+                //H_hat
+                for(int ph=0; ph<num_x_channels*main_state.num_phases; ph++) {
+                    int xch = ph/main_state.num_phases;
+                    int ph_xch = (main_state.num_phases == 1) ? 0 : ph % main_state.num_phases;
+                    main_state.H_hat[ych][ph].exp = sext(att_random_int32(seed), 6);
+                    main_state.H_hat[ych][ph].hr = att_random_uint32(seed) % 3;
+                    for(int i=0; i<NUM_BINS; i++) {
+                        main_state.H_hat[ych][ph].data[i].re = att_random_int32(seed) >> main_state.H_hat[ych][ph].hr;
+                        main_state.H_hat[ych][ph].data[i].im = att_random_int32(seed) >> main_state.H_hat[ych][ph].hr;
+                        params_fp->H_hat[ych][xch][ph_xch][i].re = att_int32_to_double(main_state.H_hat[ych][ph].data[i].re, main_state.H_hat[ych][ph].exp);
+                        params_fp->H_hat[ych][xch][ph_xch][i].im = att_int32_to_double(main_state.H_hat[ych][ph].data[i].im, main_state.H_hat[ych][ph].exp);
+                    }
+                }
+                for(int ph=0; ph<num_x_channels*shadow_state.num_phases; ph++) {
+                    int xch = ph/shadow_state.num_phases;
+                    int ph_xch = (shadow_state.num_phases == 1) ? 0 : ph % shadow_state.num_phases; //phase within the given xch
+                    shadow_state.H_hat[ych][ph].exp = sext(att_random_int32(seed), 6);
+                    shadow_state.H_hat[ych][ph].hr = att_random_uint32(seed) % 3;
+                    for(int i=0; i<NUM_BINS; i++) {
+                        shadow_state.H_hat[ych][ph].data[i].re = att_random_int32(seed) >> shadow_state.H_hat[ych][ph].hr;
+                        shadow_state.H_hat[ych][ph].data[i].im = att_random_int32(seed) >> shadow_state.H_hat[ych][ph].hr;
+                        params_fp->H_hat_shadow[ych][xch][ph_xch][i].re = att_int32_to_double(shadow_state.H_hat[ych][ph].data[i].re, shadow_state.H_hat[ych][ph].exp);
+                        params_fp->H_hat_shadow[ych][xch][ph_xch][i].im = att_int32_to_double(shadow_state.H_hat[ych][ph].data[i].im, shadow_state.H_hat[ych][ph].exp);
+                    }
+                }
+                //Error, Error_shadow, Y
+                main_state.Error[ych].exp = sext(att_random_int32(seed), 6);
+                main_state.Error[ych].hr = att_random_uint32(seed) % 3;
+                shadow_state.Error[ych].exp = sext(att_random_int32(seed), 6);
+                shadow_state.Error[ych].hr = att_random_uint32(seed) % 3;
+                main_state.shared_state->Y[ych].exp = sext(att_random_int32(seed), 6);
+                main_state.shared_state->Y[ych].hr = att_random_uint32(seed) % 3;
+                for(int i=0; i<NUM_BINS; i++) {
+                    //Error
+                    main_state.Error[ych].data[i].re = att_random_int32(seed) >> main_state.Error[ych].hr;
+                    main_state.Error[ych].data[i].im = att_random_int32(seed) >> main_state.Error[ych].hr;
+                    params_fp->Error[ych][i].re = att_int32_to_double(main_state.Error[ych].data[i].re, main_state.Error[ych].exp);
+                    params_fp->Error[ych][i].im = att_int32_to_double(main_state.Error[ych].data[i].im, main_state.Error[ych].exp);
+
+                    //Error_shadow
+                    shadow_state.Error[ych].data[i].re = att_random_int32(seed) >> shadow_state.Error[ych].hr;
+                    shadow_state.Error[ych].data[i].im = att_random_int32(seed) >> shadow_state.Error[ych].hr;
+                    params_fp->Error_shadow[ych][i].re = att_int32_to_double(shadow_state.Error[ych].data[i].re, shadow_state.Error[ych].exp);
+                    params_fp->Error_shadow[ych][i].im = att_int32_to_double(shadow_state.Error[ych].data[i].im, shadow_state.Error[ych].exp);
+
+                    //Y
+                    main_state.shared_state->Y[ych].data[i].re = att_random_int32(seed) >> main_state.shared_state->Y[ych].hr;
+                    main_state.shared_state->Y[ych].data[i].im = att_random_int32(seed) >> main_state.shared_state->Y[ych].hr;
+                    params_fp->Y[ych][i].re = att_int32_to_double(main_state.shared_state->Y[ych].data[i].re, main_state.shared_state->Y[ych].exp);
+                    params_fp->Y[ych][i].im = att_int32_to_double(main_state.shared_state->Y[ych].data[i].im, main_state.shared_state->Y[ych].exp);
+                }
+                //overall_Error
+                main_state.overall_Error[ych].exp = sext(att_random_int32(seed), 6);
+                main_state.overall_Error[ych].mant = att_random_uint32(seed) >> 1;
+                params_fp->overall_Error[ych] = att_int32_to_double(main_state.overall_Error[ych].mant, main_state.overall_Error[ych].exp);
+                
+                //overall_Error_shadow
+                shadow_state.overall_Error[ych].exp = sext(att_random_int32(seed), 6);
+                shadow_state.overall_Error[ych].mant = att_random_uint32(seed) >> 1;
+                params_fp->overall_Error_shadow[ych] = att_int32_to_double(shadow_state.overall_Error[ych].mant, shadow_state.overall_Error[ych].exp);
+                
+                //overall_Y
+                main_state.shared_state->overall_Y[ych].exp = sext(att_random_int32(seed), 6); 
+                main_state.shared_state->overall_Y[ych].mant = att_random_uint32(seed) >> 1;
+                params_fp->overall_Y[ych] = att_int32_to_double(main_state.shared_state->overall_Y[ych].mant, main_state.shared_state->overall_Y[ych].exp);
+
+                //shadow_reset_count
+                main_state.shared_state->shadow_filter_params.shadow_reset_count[ych] = sext(att_random_int32(seed), 6); //between -31 and 32
+                params_fp->shadow_reset_count[ych] = main_state.shared_state->shadow_filter_params.shadow_reset_count[ych];
+                //shadow_better_count
+                main_state.shared_state->shadow_filter_params.shadow_better_count[ych] = att_random_uint32(seed)%8; //between 0 and 7
+                params_fp->shadow_better_count[ych] = main_state.shared_state->shadow_filter_params.shadow_better_count[ych];
+                //coh
+                main_state.shared_state->coh_mu_state[ych].coh.exp = sext(att_random_int32(seed), 3) - 31;
+                main_state.shared_state->coh_mu_state[ych].coh.mant = att_random_uint32(seed) & 0x7fffffff;
+                params_fp->coh[ych] = att_int32_to_double(main_state.shared_state->coh_mu_state[ych].coh.mant, main_state.shared_state->coh_mu_state[ych].coh.exp);
+                //coh_slow
+                main_state.shared_state->coh_mu_state[ych].coh_slow.exp = sext(att_random_int32(seed), 3) - 31;
+                main_state.shared_state->coh_mu_state[ych].coh_slow.mant = att_random_uint32(seed) & 0x7fffffff;
+                params_fp->coh_slow[ych] = att_int32_to_double(main_state.shared_state->coh_mu_state[ych].coh_slow.mant, main_state.shared_state->coh_mu_state[ych].coh_slow.exp);
+
+                //clear mu_coh_count and mu_shad_count every few frames for better code coverage in calc_coherence_mu_fp
+                if((iter > 0) && !(iter % (params_fp->mu_shad_count[ych] + 2))) {
+                    main_state.shared_state->coh_mu_state[ych].mu_coh_count = 0;
+                    params_fp->mu_coh_count[ych] = 0;
+                    main_state.shared_state->coh_mu_state[ych].mu_shad_count = 0;
+                    params_fp->mu_shad_count[ych] = 0;
+                }
+            }
+            
+            //Set adaption_config to something other than AUTO once in a while
+            if((iter > 0) && !(iter % 100)) {
+                int force_on = att_random_uint32(seed) % 2;
+                if(force_on) {
+                    main_state.shared_state->config_params.coh_mu_conf.adaption_config = AEC_ADAPTION_FORCE_ON;
+                    coh_mu_cfg_fp.adaption_config = AEC_ADAPTION_FORCE_ON;
+                }
+                else {
+                    main_state.shared_state->config_params.coh_mu_conf.adaption_config = AEC_ADAPTION_FORCE_OFF;
+                    coh_mu_cfg_fp.adaption_config = AEC_ADAPTION_FORCE_OFF;
+                }
+                main_state.shared_state->config_params.coh_mu_conf.force_adaption_mu_q30 = att_random_uint32(seed) & 0x7fffffff;
+                coh_mu_cfg_fp.force_adaption_mu = att_int32_to_double(main_state.shared_state->config_params.coh_mu_conf.force_adaption_mu_q30, -30);
+            }
+            if((iter > 0) && !(iter % 15)) {
+                double min_coh_slow = params_fp->coh_slow[0];
+                for(int ch=1; ch<params_fp->y_channels; ch++) {
+                    if(params_fp->coh_slow[ch] < min_coh_slow) min_coh_slow = params_fp->coh_slow[ch];
+                }
+                double CC_thres = min_coh_slow * coh_mu_cfg_fp.coh_thresh_slow;
+                //set coh to a number between CC_thres and coh_slow for code coverage of an if condition
+                for(int ch=0; ch<params_fp->y_channels; ch++) {
+                    params_fp->coh[ch] = CC_thres + (params_fp->coh_slow[ch] - CC_thres)/(3.15 + params_fp->coh[ch]);
+                    main_state.shared_state->coh_mu_state[ch].coh = double_to_float_s32(params_fp->coh[ch]);
+                }
+            }
+            //mu_scalar
+            main_state.shared_state->config_params.coh_mu_conf.mu_scalar.exp = sext(att_random_int32(seed), 3) - 31;
+            main_state.shared_state->config_params.coh_mu_conf.mu_scalar.mant = att_random_uint32(seed) & 0x7fffffff;
+            coh_mu_cfg_fp.mu_scalar = att_int32_to_double(main_state.shared_state->config_params.coh_mu_conf.mu_scalar.mant, main_state.shared_state->config_params.coh_mu_conf.mu_scalar.exp);
+
+            for(int xch=0; xch<num_x_channels; xch++) {
+                //sigma_XX
+                main_state.shared_state->sigma_XX[xch].exp = sext(att_random_int32(seed), 6);
+                main_state.shared_state->sigma_XX[xch].hr = att_random_uint32(seed) % 3;
+                for(int i=0; i<NUM_BINS; i++) {
+                    main_state.shared_state->sigma_XX[xch].data[i] = att_random_int32(seed) >> main_state.shared_state->sigma_XX[xch].hr;
+                    params_fp->sigma_XX[xch][i] = att_int32_to_double(main_state.shared_state->sigma_XX[xch].data[i], main_state.shared_state->sigma_XX[xch].exp);
+                }
+                
+                //sum_X_energy
+                main_state.shared_state->sum_X_energy[xch].exp = sext(att_random_int32(seed), 6) - 31;
+                main_state.shared_state->sum_X_energy[xch].mant = att_random_uint32(seed) >> 1;
+                params_fp->sum_X_energy[xch] = att_int32_to_double(main_state.shared_state->sum_X_energy[xch].mant, main_state.shared_state->sum_X_energy[xch].exp);
+
+                //max_X_energy
+                main_state.max_X_energy[xch].exp = -32 - (att_random_uint32(seed) & 63);
+                main_state.max_X_energy[xch].mant = att_random_uint32(seed) >> 1;
+                params_fp->max_X_energy_main[xch] = att_int32_to_double(main_state.max_X_energy[xch].mant, main_state.max_X_energy[xch].exp);
+                //Make shadow max_energy smaller since shadow scale is bigger and we want the scaled_max_energy < delta_min code path executed
+                shadow_state.max_X_energy[xch].exp = -40 - (att_random_uint32(seed) & 63); 
+                shadow_state.max_X_energy[xch].mant = att_random_uint32(seed) >> 1;
+                params_fp->max_X_energy_shadow[xch] = att_int32_to_double(shadow_state.max_X_energy[xch].mant, shadow_state.max_X_energy[xch].exp);
+
+            }
+            
+            compare_filters_and_calc_mu_fp(
+                    &shadow_filt_coh_mu_params_fp,
+                    &shadow_filt_cfg_fp,
+                    &coh_mu_cfg_fp,
+                    main_state.shared_state->config_params.aec_core_conf.bypass);
+
+            aec_compare_filters_and_calc_mu(
+                    &main_state,
+                    &shadow_state);
+
+            //check compare_filters outputs
+            shadow_filter_params_t * unsafe shadow_params = &main_state.shared_state->shadow_filter_params;
+            for(int ych=0; ych<num_y_channels; ych++) {
+                //compare shadow_flag
+                if(params_fp->shadow_flag[ych] != main_state.shared_state->shadow_filter_params.shadow_flag[ych]) {
+                    printf("iter %d. shadow_flag (ref %d, dut %d) error\n", iter, params_fp->shadow_flag[ych], main_state.shared_state->shadow_filter_params.shadow_flag[ych]);
+                    assert(0);
+                }
+
+                //compare shadow_reset_count
+                if(params_fp->shadow_reset_count[ych] != main_state.shared_state->shadow_filter_params.shadow_reset_count[ych]) {
+                    printf("iter %d. shadow_reset_count (ref %d, dut %d) error\n", iter, params_fp->shadow_reset_count[ych], main_state.shared_state->shadow_filter_params.shadow_reset_count[ych]);
+                    assert(0);
+                }
+
+                //compare shadow_better_count
+                if(params_fp->shadow_better_count[ych] != main_state.shared_state->shadow_filter_params.shadow_better_count[ych]) {
+                    printf("iter %d. shadow_better_count (ref %d, dut %d) error\n", iter, params_fp->shadow_better_count[ych], main_state.shared_state->shadow_filter_params.shadow_better_count[ych]);
+                    assert(0);
+                }
+                //compare Error
+                unsigned diff_Error = att_bfp_vector_int32((int32_t*)&main_state.Error[ych].data[0], main_state.Error[ych].exp, (double*)params_fp->Error[ych], 0, 2*NUM_BINS);
+                if(diff_Error > 0) {printf("iter %d. diff_Error %d too large\n",iter, diff_Error); assert(0);}
+
+                unsigned diff_Error_shadow = att_bfp_vector_int32((int32_t*)&shadow_state.Error[ych].data[0], shadow_state.Error[ych].exp, (double*)params_fp->Error_shadow[ych], 0, 2*NUM_BINS);
+
+                //compare Error_Shadow
+                if(diff_Error_shadow > 0) {printf("iter %d. diff_Error_shadow %d too large\n",iter, diff_Error_shadow); assert(0);}
+                
+                //Compare H_hat and H_hat_shadow
+                for(int xch=0; xch<num_x_channels; xch++) {
+                    for(int ph=0; ph<main_state.num_phases; ph++) {
+                        unsigned diff_H_hat = att_bfp_vector_int32((int32_t*)&main_state.H_hat[ych][xch*main_state.num_phases + ph].data[0], main_state.H_hat[ych][xch*main_state.num_phases + ph].exp, (double*)&params_fp->H_hat[ych][xch][ph][0], 0, 2*NUM_BINS);
+                        if(diff_H_hat > 0){printf("iter %d, ych %d, xch %d, ph %d, shadow_flag %d. diff_H_hat %d too large\n",iter, ych, xch, ph, params_fp->shadow_flag[ych], diff_H_hat); assert(0);}
+                    }
+                    
+                    for(int ph=0; ph<shadow_state.num_phases; ph++) {
+                        unsigned diff_H_hat_shadow = att_bfp_vector_int32((int32_t*)&shadow_state.H_hat[ych][xch*shadow_state.num_phases + ph].data[0], shadow_state.H_hat[ych][xch*shadow_state.num_phases + ph].exp, (double*)&params_fp->H_hat_shadow[ych][xch][ph][0], 0, 2*NUM_BINS);
+                        if(diff_H_hat_shadow > 0){printf("iter %d, ych %d, xch %d, ph %d, shadow_flag %d. diff_H_hat_shadow %d too large\n",iter, ych, xch, ph, params_fp->shadow_flag[ych], diff_H_hat_shadow); assert(0);}
+                    }
+                }
+            }
+
+            //compare sigma_XX
+            for(int xch=0; xch<num_x_channels; xch++) {
+                unsigned diff_sigma_XX = att_bfp_vector_int32((int32_t*)&main_state.shared_state->sigma_XX[xch].data[0], main_state.shared_state->sigma_XX[xch].exp, &params_fp->sigma_XX[xch][0], 0, NUM_BINS);
+                if(diff_sigma_XX > 0) {printf("iter %d. diff_sigma_XX %d too large\n",iter, diff_sigma_XX); assert(0);}
+            }
+
+            //compare delta
+            unsigned delta_diff = att_bfp_vector_int32((int32_t*)&main_state.delta.mant, main_state.delta.exp, &params_fp->delta_main, 0, 1);
+            if(delta_diff > 1) {printf("iter %d. delta_diff_main %d too large\n",iter, delta_diff); assert(0);}
+            delta_diff = att_bfp_vector_int32((int32_t*)&shadow_state.delta.mant, shadow_state.delta.exp, &params_fp->delta_shadow, 0, 1);
+            if(delta_diff > 1) {printf("iter %d. delta_diff_shadow %d too large\n",iter, delta_diff); assert(0);}
+
+            
+            //check calc_mu outputs
+            for(int ych=0; ych<num_y_channels; ych++) {
+
+                //compare mu_coh_count
+                if(params_fp->mu_coh_count[ych] != main_state.shared_state->coh_mu_state[ych].mu_coh_count) {
+                    printf("iter %d. mu_coh_count mismatch. (ref %d, dut %d)\n",iter, params_fp->mu_coh_count[ych], main_state.shared_state->coh_mu_state[ych].mu_coh_count); assert(0);}
+                
+                //compare mu_shad_count
+                if(params_fp->mu_shad_count[ych] != main_state.shared_state->coh_mu_state[ych].mu_shad_count) {printf("iter %d. mu_shad_count mismatch. (ref %d, dut %d\n",iter, params_fp->mu_shad_count[ych], main_state.shared_state->coh_mu_state[ych].mu_shad_count); assert(0);}
+                
+                //compare coh_mu
+                for(int xch=0; xch<num_x_channels; xch++) {
+                    unsigned diff_coh_mu = att_bfp_vector_int32((int32_t*)&main_state.shared_state->coh_mu_state[ych].coh_mu[xch].mant, main_state.shared_state->coh_mu_state[ych].coh_mu[xch].exp, (double*)&params_fp->coh_mu[ych][xch], 0, 1);
+                    if(diff_coh_mu > 64) {
+                        printf("iter %d. diff_coh_mu[%d][%d] %d too large\n",iter, ych, xch, diff_coh_mu);
+                        assert(0);
+                    }
+                    if(diff_coh_mu > max_diff_coh_mu) {max_diff_coh_mu = diff_coh_mu;}
+                }
+
+                //Finally compare main and shadow filter mu
+                for(int xch=0; xch<num_x_channels; xch++) {
+                    unsigned diff_main_filt_mu = att_bfp_vector_int32((int32_t*)&main_state.mu[ych][xch].mant, main_state.mu[ych][xch].exp, (double*)&params_fp->main_filt_mu[ych][xch], 0, 1);
+                    if(diff_main_filt_mu > 64) {
+                        printf("iter %d. diff_main_filt_mu[%d][%d] %d too large\n",iter, ych, xch, diff_main_filt_mu);
+                        assert(0);
+                    }
+                }
+                for(int xch=0; xch<num_x_channels; xch++) {
+                    unsigned diff_shadow_filt_mu = att_bfp_vector_int32((int32_t*)&shadow_state.mu[ych][xch].mant, shadow_state.mu[ych][xch].exp, (double*)&params_fp->shadow_filt_mu[ych][xch], 0, 1);
+                    if(diff_shadow_filt_mu > 64) {
+                        printf("iter %d. diff_shadow_filt_mu[%d][%d] %d too large\n",iter, ych, xch, diff_shadow_filt_mu);
+                        assert(0);
+                    }
+                }
+            }
+        }
+        for(int i=0; i<NUM_SHADOW_CHECKPOINTS; i++) {
+            if(checkpoints[i] != 1) {printf("checkpoint %d not tested\n",i); assert(0);}
+            //printf("checkpoints[%d] = %d\n",i, checkpoints[i]);
+        }
+        printf("max_diff_coh_mu = %d\n",max_diff_coh_mu);
+        for(int i=0; i<NUM_MU_CHECKPOINTS; i++) {
+            if(checkpoints_mu[i] != 1) {printf("checkpoint_mu %d not tested\n",i); assert(0);}
+            //printf("checkpoints_mu[%d] = %d\n",i, checkpoints_mu[i]);
+        }
+    }
+
+}
diff --git a/test/lib_aec/aec_unit_tests/src/test_compute_T.xc b/test/lib_aec/aec_unit_tests/src/test_compute_T.xc
new file mode 100644
index 000000000..abf029483
--- /dev/null
+++ b/test/lib_aec/aec_unit_tests/src/test_compute_T.xc
@@ -0,0 +1,148 @@
+// Copyright 2018-2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <xs1.h>
+#include "aec_unit_tests.h"
+#include <stdio.h>
+#include <assert.h>
+extern "C"{
+    #include "aec_defines.h"
+    #include "aec_api.h"
+}
+
+#define NUM_BINS ((AEC_PROC_FRAME_LENGTH/2) + 1)
+ 
+void aec_calc_T_fp(
+        dsp_complex_fp (*T)[AEC_MAX_X_CHANNELS][NUM_BINS],
+        dsp_complex_fp (*Error)[NUM_BINS],
+        double (*inv_X_energy)[NUM_BINS],
+        double (*mu)[AEC_MAX_X_CHANNELS],
+        int y_channels,
+        int x_channels) {
+    //T[x_ch] = self.mu[y_ch, x_ch] * Inv_x_energy[x_ch] * Error[y_ch] / self.K
+    for(int ych=0; ych<y_channels; ych++) {
+        for(int xch=0; xch<x_channels; xch++) {
+            for(int i=0; i<NUM_BINS; i++) {
+                T[ych][xch][i].re = Error[ych][i].re * inv_X_energy[xch][i] * mu[ych][xch];
+                T[ych][xch][i].im = Error[ych][i].im * inv_X_energy[xch][i] * mu[ych][xch];
+            }
+        }
+    }
+}
+
+void test_calc_T() {
+    unsafe {
+        unsigned num_y_channels = 2;
+        unsigned num_x_channels = 2;
+        unsigned main_filter_phases = AEC_MAIN_FILTER_PHASES - 1;
+        unsigned shadow_filter_phases = AEC_MAIN_FILTER_PHASES - 1;
+
+        aec_memory_pool_t aec_memory_pool;
+        aec_shadow_filt_memory_pool_t aec_shadow_memory_pool;
+        aec_state_t state, shadow_state;
+        aec_shared_state_t aec_shared_state;
+        aec_init(&state, &shadow_state, &aec_shared_state, (uint8_t*)&aec_memory_pool, (uint8_t*)&aec_shadow_memory_pool, num_y_channels, num_x_channels, main_filter_phases, shadow_filter_phases);
+
+        //initialise float arrays
+        dsp_complex_fp Error_fp[AEC_MAX_Y_CHANNELS][NUM_BINS];
+        double inv_X_energy_fp[AEC_MAX_X_CHANNELS][NUM_BINS];
+        double mu_fp[AEC_MAX_Y_CHANNELS][AEC_MAX_X_CHANNELS];
+        dsp_complex_fp T_fp[AEC_MAX_Y_CHANNELS][AEC_MAX_X_CHANNELS][NUM_BINS];
+        
+        unsigned seed = 45;
+        double max_diff_percentage = 0.0;
+        for(int iter=0; iter<(1<<11)/F; iter++) {
+            int32_t new_frame[AEC_MAX_Y_CHANNELS+AEC_MAX_X_CHANNELS][AEC_FRAME_ADVANCE];
+            aec_frame_init(&state, &shadow_state, &new_frame[0], &new_frame[AEC_MAX_Y_CHANNELS]);
+            aec_state_t *state_ptr;
+            int is_main_filter = att_random_uint32(seed) % 2;
+            if(is_main_filter) {
+                state_ptr = &state;
+            }
+            else {
+                state_ptr = &shadow_state;
+            }
+
+            for(int ch=0; ch<num_y_channels; ch++) {
+                state_ptr->Error[ch].exp = sext(att_random_int32(seed), 3) - 31;
+                state_ptr->Error[ch].hr = att_random_uint32(seed) % 3;                
+                for(int i=0; i<NUM_BINS; i++) {
+                    state_ptr->Error[ch].data[i].re = att_random_int32(seed) >> state_ptr->Error[ch].hr;
+                    state_ptr->Error[ch].data[i].im = att_random_int32(seed) >> state_ptr->Error[ch].hr;
+
+                    Error_fp[ch][i].re = att_int32_to_double(state_ptr->Error[ch].data[i].re, state_ptr->Error[ch].exp);
+                    Error_fp[ch][i].im = att_int32_to_double(state_ptr->Error[ch].data[i].im, state_ptr->Error[ch].exp);
+                }
+            }
+            //initialise inv_X_energy
+            for(int ch=0; ch<num_x_channels; ch++) {
+                state_ptr->inv_X_energy[ch].exp = sext(att_random_int32(seed), 3) - 31;
+                state_ptr->inv_X_energy[ch].hr = (att_random_uint32(seed) % 3);
+                for(int i=0; i<NUM_BINS; i++) {
+                    state_ptr->inv_X_energy[ch].data[i] = att_random_int32(seed) >> state_ptr->inv_X_energy[ch].hr;
+                    inv_X_energy_fp[ch][i] = att_int32_to_double(state_ptr->inv_X_energy[ch].data[i], state_ptr->inv_X_energy[ch].exp);
+                }
+            }
+            //initialise mu
+            for(int ych=0; ych<num_y_channels; ych++) {
+                for(int xch=0; xch<num_y_channels; xch++) {
+                    state_ptr->mu[ych][xch].exp = sext(att_random_int32(seed), 3) - 31;
+                    int hr = att_random_uint32(seed) % 3;                
+                    state_ptr->mu[ych][xch].mant = att_random_int32(seed) >> hr;
+
+                    mu_fp[ych][xch] = att_int32_to_double(state_ptr->mu[ych][xch].mant, state_ptr->mu[ych][xch].exp);
+                }
+            }
+
+            /*if(iter == 293) {
+                printf("DUT: Error: exp %d, hr %d, re,im = (%d, %d). inv_X_energy: exp %d, hr %d, data %d\n", state_ptr->Error[1].exp, state_ptr->Error[1].hr, state_ptr->Error[1].data[79].re, state_ptr->Error[1].data[79].im, state_ptr->inv_X_energy[0].exp, state_ptr->inv_X_energy[0].hr, state_ptr->inv_X_energy[0].data[79]);
+                printf("DUT: Error = (%.15f, %.15f), inv_X_energy = (%.15f)\n", att_int32_to_double(state_ptr->Error[1].data[79].re, state_ptr->Error[1].exp), att_int32_to_double(state_ptr->Error[1].data[79].im, state_ptr->Error[1].exp), att_int32_to_double(state_ptr->inv_X_energy[0].data[79], state_ptr->inv_X_energy[0].exp));
+                printf("REF: Error = (%.15f, %.15f), inv_X_energy = (%.15f)\n", Error_fp[1][79].re, Error_fp[1][79].im, inv_X_energy_fp[0][79]);
+                printf("mu = (exp: %d mant %d), %.15f\n", state_ptr->mu[1][0].exp, state_ptr->mu[1][0].mant, mu_fp[1][0]);
+            }*/
+
+            aec_calc_T_fp(T_fp, Error_fp, inv_X_energy_fp, mu_fp, num_y_channels, num_x_channels);
+
+            for(int ych=0; ych<num_y_channels; ych++) {
+                for(int xch=0; xch<num_x_channels; xch++) {
+                    aec_calc_T(state_ptr, ych, xch);
+                } 
+                //Since T memory will be overwritten when computing for next y-channel, do error checking now
+                for(int xch=0; xch<num_x_channels; xch++) {
+                    for(int i=0; i<NUM_BINS; i++) {
+                        double ref_fp = T_fp[ych][xch][i].re;
+                        double dut_fp = att_int32_to_double(state_ptr->T[xch].data[i].re, state_ptr->T[xch].exp);
+                        double diff = (ref_fp - dut_fp);
+                        if(diff < 0.0) diff = -diff;
+                        double diff_percentage = (diff / (ref_fp < 0.0 ? -ref_fp : ref_fp)) * 100;
+                        /*if((iter == 293) && (ych == 1) && (xch == 0) && (i == 79)) {
+                            printf("Re: bin %d, diff = %.15f, diff_percent = %f. DUT T (0x%x, %d, %.15f). Ref T %.15f\n", i, diff, diff_percentage, state_ptr->T[xch].data[i].re, state_ptr->T[xch].exp, att_int32_to_double(state_ptr->T[xch].data[i].re, state_ptr->T[xch].exp), T_fp[ych][xch][i].re);
+                        }*/
+                        if(diff_percentage > max_diff_percentage) max_diff_percentage = diff_percentage;
+                        if(diff > 0.0002*(ref_fp < 0.0 ? -ref_fp : ref_fp) + pow(10, -8))
+                        {
+                            printf("Re fail: iter %d, ych %d, xch %d, bin %d\n",iter, ych, xch, i);
+                            assert(0);
+                        }
+
+                        ref_fp = T_fp[ych][xch][i].im;
+                        dut_fp = att_int32_to_double(state_ptr->T[xch].data[i].im, state_ptr->T[xch].exp);
+                        diff = (ref_fp - dut_fp);
+                        if(diff < 0.0) diff = -diff;
+                        diff = abs(diff);
+                        diff_percentage = (diff / (ref_fp < 0.0 ? -ref_fp : ref_fp)) * 100;
+                        /*if((iter == 293) && (ych == 1) && (xch == 0) && (i == 79)) {
+                            printf("Im: ych %d, xch %d, bin %d, diff = %f, diff_percent = %f\n", ych, xch, i, diff, diff_percentage);
+                        }*/
+
+                        if(diff_percentage > max_diff_percentage) max_diff_percentage = diff_percentage;
+                        if(diff > 0.0002*(ref_fp < 0.0 ? -ref_fp : ref_fp) + pow(10, -8))
+                        {
+                            printf("Im fail: iter %d, ych %d, xch %d, bin %d\n",iter, ych, xch, i);
+                            assert(0);
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
diff --git a/test/lib_aec/aec_unit_tests/src/test_create_output.xc b/test/lib_aec/aec_unit_tests/src/test_create_output.xc
new file mode 100644
index 000000000..666fff076
--- /dev/null
+++ b/test/lib_aec/aec_unit_tests/src/test_create_output.xc
@@ -0,0 +1,158 @@
+// Copyright 2018-2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <xs1.h>
+#include "aec_unit_tests.h"
+#include <stdio.h>
+#include <assert.h>
+extern "C"{
+    #include "aec_defines.h"
+    #include "aec_api.h"
+}
+
+static const double WOLA_window_fp[32] = {
+        0.0022640387134577056, 0.009035651368646647, 0.020253513192751316,
+        0.03581603349196372, 0.05558227567253826, 0.0793732335844094,
+        0.10697345262860625, 0.1381329809474649, 0.1725696330273574,
+        0.20997154521440087, 0.24999999999999994, 0.2922924934990568,
+        0.3364660183412892, 0.38212053224528636, 0.42884258086335736,
+        0.4762090420881288, 0.5237909579118711, 0.5711574191366425,
+        0.6178794677547135, 0.6635339816587108, 0.7077075065009433,
+        0.7499999999999999, 0.790028454785599, 0.8274303669726425,
+        0.861867019052535, 0.8930265473713938, 0.9206267664155905,
+        0.9444177243274616, 0.9641839665080363, 0.9797464868072486,
+        0.9909643486313533, 0.9977359612865423,};
+
+void aec_calc_output_fp(double *output, double *error, double *overlap) {
+    for(int i=0; i<AEC_FRAME_ADVANCE; i++) {
+        error[i] = 0.0;
+    }
+    //window error
+    for(int i=0; i<32; i++) {
+        error[AEC_FRAME_ADVANCE + i] = WOLA_window_fp[i] * error[AEC_FRAME_ADVANCE + i];
+        error[AEC_PROC_FRAME_LENGTH - 32 + i] = WOLA_window_fp[32 - 1 - i] * error[AEC_PROC_FRAME_LENGTH - 32 + i];
+    }
+    //copy output to error
+    for(int i=0; i<AEC_FRAME_ADVANCE; i++) {
+        output[i] = error[AEC_FRAME_ADVANCE + i];
+    }
+    //add previous frame overlap
+    for(int i=0; i<32; i++) {
+        output[i] += overlap[i];
+    }
+    //update overlap
+    for(int i=0; i<32; i++) {
+        overlap[i] = error[AEC_PROC_FRAME_LENGTH - 32 + i];
+    }
+
+    //normalise output to 1.31 and saturate
+    double max = ldexp(0x7fffffff, -31);
+    double min = ldexp(((double)(int32_t)0x80000000), -31);
+    for(int i=0; i<AEC_FRAME_ADVANCE; i++) {
+        if(output[i] < min) {
+            output[i] = min;
+        }
+        if(output[i] >= max) {
+            output[i] = max;
+        }
+    }
+}
+
+static inline double abs_double(double a) {
+    if(a < 0.0)
+        return -a;
+    else
+        return a;
+}
+
+double max_diff = 0.0;
+static inline void check_error(double ref, int32_t dut, int dut_exp, double rtol, double atol, int ch, int iter, const char *error_string) {
+    double ref_double = ref;
+    double dut_double = att_int32_to_double(dut, dut_exp);
+    double diff = abs_double(ref_double - dut_double);
+    if(diff > max_diff) diff = max_diff;
+    if(diff > rtol*abs_double(ref_double) + atol)
+    {
+        printf("diff %.15f, %.15f\n",diff, rtol*ref_double + atol);
+        printf("%s, iter %d, ch %d: diff %.15f, . ref %.15f, dut %.15f\n",error_string, iter, ch, diff, ref_double, dut_double);
+        assert(0);
+    }
+}
+
+void test_create_output() {
+    unsafe {
+        unsigned num_y_channels = 1;
+        unsigned num_x_channels = 1;
+        unsigned num_phases = AEC_MAIN_FILTER_PHASES - 1;
+
+        aec_memory_pool_t aec_memory_pool;
+        aec_shadow_filt_memory_pool_t aec_shadow_memory_pool;
+        aec_state_t state, shadow_state;
+        aec_shared_state_t aec_shared_state;
+        aec_init(&state, &shadow_state, &aec_shared_state, (uint8_t*)&aec_memory_pool, (uint8_t*)&aec_shadow_memory_pool, num_y_channels, num_x_channels, num_phases, num_phases);
+        //Initialise floating point arrays
+        double error_fp[AEC_MAX_Y_CHANNELS][AEC_PROC_FRAME_LENGTH];
+        double output_fp[AEC_MAX_Y_CHANNELS][AEC_FRAME_ADVANCE];
+        double overlap_fp[AEC_MAX_Y_CHANNELS][32];
+        for(int i=0; i<num_y_channels; i++) {
+            for(int j=0; j<32; j++) {
+                overlap_fp[i][j] = 0.0;
+            }
+        }
+        unsigned seed = 2;
+        //Generate error data
+        for(unsigned iter=0; iter<(1<<12)/F; iter++) {
+            int32_t new_frame[AEC_MAX_Y_CHANNELS+AEC_MAX_X_CHANNELS][AEC_FRAME_ADVANCE];
+            aec_frame_init(&state, &shadow_state, &new_frame[0], &new_frame[AEC_MAX_Y_CHANNELS]);
+            int is_main_filter = att_random_uint32(seed) % 2;
+            aec_state_t *state_ptr;
+            if(is_main_filter) {
+                state_ptr = &state;
+            }
+            else {
+                state_ptr = &shadow_state;
+            }
+
+            //state_ptr->error is initialised in the Error->error IFFT call. Initialise here for standalone testing
+            for(int ch=0; ch<num_y_channels; ch++) {
+                bfp_s32_init(&state_ptr->error[ch], (int32_t*)&state_ptr->Error[ch].data[0], 0, AEC_PROC_FRAME_LENGTH, 0);
+            }
+
+            for(int ch=0; ch<num_y_channels; ch++) {
+                bfp_s32_t *error_ptr = &state_ptr->error[ch]; 
+                error_ptr->exp = sext(att_random_int32(seed), 6);
+                error_ptr->hr = (att_random_uint32(seed) % 3);
+                for(int i=0; i<AEC_PROC_FRAME_LENGTH; i++) {
+                    error_ptr->data[i] = att_random_int32(seed) >> error_ptr->hr;
+                    error_fp[ch][i] = att_int32_to_double(error_ptr->data[i], error_ptr->exp);
+                }
+            }
+            int32_t output[AEC_MAX_Y_CHANNELS][AEC_FRAME_ADVANCE];
+            for(int ch=0; ch<state_ptr->shared_state->num_y_channels; ch++) {
+                aec_calc_output(state_ptr, &output[ch], ch);
+            }
+
+            for(int ch=0; ch<state_ptr->shared_state->num_y_channels; ch++) {
+                aec_calc_output_fp(output_fp[ch], error_fp[ch], overlap_fp[ch]);
+            }
+
+            for(int ch=0; ch<state_ptr->shared_state->num_y_channels; ch++) {
+                //check error
+                bfp_s32_t *error_ptr = &state_ptr->error[ch]; 
+                for(int i=0; i<AEC_PROC_FRAME_LENGTH; i++) {
+                    check_error(error_fp[ch][i], error_ptr->data[i], error_ptr->exp, 0.0000002, pow(10, -8), ch, iter, "error wrong");
+                }
+
+                //check output
+                for(int i=0; i<AEC_FRAME_ADVANCE; i++) {
+                    check_error(output_fp[ch][i], output[ch][i], -31, 0.0000002, pow(10, -8), ch, iter, "error wrong");
+                }
+
+                //check overlap
+                for(int i=0; i<32; i++) {
+                    check_error(overlap_fp[ch][i], state_ptr->overlap[ch].data[i], state_ptr->overlap[ch].exp, 0.0000002, pow(10, -8), ch, iter, "overlap wrong");
+                }
+            }
+        }
+        printf("max_diff = %.15f\n",max_diff);
+    }
+}
diff --git a/test/lib_aec/aec_unit_tests/src/test_estimate_delay.xc b/test/lib_aec/aec_unit_tests/src/test_estimate_delay.xc
new file mode 100644
index 000000000..6633588b6
--- /dev/null
+++ b/test/lib_aec/aec_unit_tests/src/test_estimate_delay.xc
@@ -0,0 +1,148 @@
+// Copyright 2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <xs1.h>
+#include "aec_unit_tests.h"
+#include <stdio.h>
+#include <assert.h>
+extern "C"{
+    #include "aec_defines.h"
+    #include "aec_api.h"
+}
+
+//Note this is larger than AEC_LIB_MAIN_FILTER_PHASES but AEC_LIB_MAX_Y_CHANNELS and AEC_LIB_MAX_X_CHANNELS are 2 so it works..
+//i.e. 30 <= 10 * 2 * 2
+#define NUM_PHASES_DELAY_EST    30
+#define PHASE_CMPLX_AIR_LEN     257
+
+
+//From test_cal_fd_frame_energy
+extern void calc_fd_frame_energy_fp(double *output, dsp_complex_fp *input, int length);
+
+
+int aec_estimate_delay_fp(  dsp_complex_fp H_hat[1][NUM_PHASES_DELAY_EST][PHASE_CMPLX_AIR_LEN], int32_t num_phases, int32_t len_phase, 
+                            double *sum_phase_powers, double phase_powers[NUM_PHASES_DELAY_EST], double *peak_to_average_ratio,
+                            double *peak_phase_power, int32_t *peak_power_phase_index){
+
+    double peak_fd_power = 0.0;
+    *peak_power_phase_index = 0;
+    *sum_phase_powers = 0.0;
+
+    for(int ch=0; ch<1; ch++) { //estimate delay for the first y-channel
+        for(int ph=0; ph<num_phases; ph++) { //compute delay over 1 x-y pair phases
+            double phase_power;
+            calc_fd_frame_energy_fp(&phase_power, H_hat[ch][ph], len_phase);
+            phase_powers[ph] = phase_power;
+            // printf("ph %d power %lf\n",ph, phase_power);
+            *sum_phase_powers += phase_power;
+            if(phase_power > peak_fd_power){
+                peak_fd_power = phase_power;
+                *peak_power_phase_index = ph;
+           }
+       }
+    }
+    *peak_phase_power = peak_fd_power;
+    if(*sum_phase_powers > 0){
+        *peak_to_average_ratio = (*peak_phase_power * num_phases) / *sum_phase_powers;
+    }else{
+        *peak_to_average_ratio = 1.0;
+    }
+    // printf("peak_power_phase_index %d\n", *peak_power_phase_index);
+    return AEC_FRAME_ADVANCE * *peak_power_phase_index;
+}
+
+#undef DWORD_ALIGNED
+#define DWORD_ALIGNED [[aligned(8)]]
+
+#define TEST_LEN (AEC_PROC_FRAME_LENGTH/2 + 1)
+void test_delay_estimate() {
+    unsafe {
+
+    uint8_t DWORD_ALIGNED aec_memory_pool[sizeof(aec_memory_pool_t)];
+    aec_state_t DWORD_ALIGNED state;
+    aec_shared_state_t DWORD_ALIGNED shared_state;
+
+    //FP version of phase coeffs
+    dsp_complex_fp H_hat[1][NUM_PHASES_DELAY_EST][PHASE_CMPLX_AIR_LEN] = {{{{0.0}}}};
+
+    const unsigned num_phases = 30;
+    unsigned seed = 34575;
+    unsigned ch = 0;
+
+    //Populate selected phase with energy to see if we can read peak
+    for(unsigned ph = 0; ph < num_phases; ph++){
+        aec_init(&state, NULL, &shared_state, aec_memory_pool, NULL, 1, 1, num_phases, 0);
+        memset(H_hat, 0, sizeof(H_hat));
+
+        unsigned length = state.H_hat[ch][ph].length;
+        TEST_ASSERT_EQUAL_INT32_MESSAGE(length, PHASE_CMPLX_AIR_LEN, "Phase length assumption wrong");
+
+
+        state.H_hat[ch][ph].exp = att_random_int32(seed) % 40; //Between +39 -39
+        for(unsigned i = 0; i < length; i++){
+            state.H_hat[ch][ph].data[i].re = att_random_int32(seed);
+            state.H_hat[ch][ph].data[i].im = att_random_int32(seed);
+
+            H_hat[ch][ph][i].re = att_int32_to_double(state.H_hat[ch][ph].data[i].re, state.H_hat[ch][ph].exp);
+            H_hat[ch][ph][i].im = att_int32_to_double(state.H_hat[ch][ph].data[i].im, state.H_hat[ch][ph].exp);
+
+        }
+        int measured_delay = aec_estimate_delay(&state.shared_state->delay_estimator_params, state.H_hat[0], state.num_phases);
+
+        double sum_phase_powers;
+        double phase_powers[NUM_PHASES_DELAY_EST];
+        double peak_to_average_ratio;
+        double peak_phase_power;
+        int32_t peak_power_phase_index;
+        int measured_delay_fp = aec_estimate_delay_fp(H_hat, NUM_PHASES_DELAY_EST, PHASE_CMPLX_AIR_LEN,
+                                    &sum_phase_powers, phase_powers, &peak_to_average_ratio, &peak_phase_power, &peak_power_phase_index);
+
+        int actual_delay = ph * AEC_FRAME_ADVANCE;
+        // printf("test_delay_estimate: %d (%d), fin\n", measured_delay, actual_delay);
+
+        //Now check some things. First actual delay estimate vs expected
+        TEST_ASSERT_EQUAL_INT32_MESSAGE(measured_delay, actual_delay, "DUT Delay estimate incorrect");
+        TEST_ASSERT_EQUAL_INT32_MESSAGE(measured_delay_fp, actual_delay, "REF Delay estimate incorrect");
+
+        //Now check accuracy
+        double dut_peak_phase_power_fp = att_int32_to_double(state.shared_state->delay_estimator_params.peak_phase_power.mant, state.shared_state->delay_estimator_params.peak_phase_power.exp);
+        double dut_sum_phase_powers_fp = att_int32_to_double(state.shared_state->delay_estimator_params.sum_phase_powers.mant, state.shared_state->delay_estimator_params.sum_phase_powers.exp);
+        double dut_peak_to_average_ratio_fp = att_int32_to_double(state.shared_state->delay_estimator_params.peak_to_average_ratio.mant, state.shared_state->delay_estimator_params.peak_to_average_ratio.exp);
+
+        double sum_phase_powers_ratio = sum_phase_powers / dut_sum_phase_powers_fp;
+        double peak_phase_power_ratio = peak_phase_power / dut_peak_phase_power_fp;
+        double peak_to_average_ratio_ratio = peak_to_average_ratio / dut_peak_to_average_ratio_fp;
+
+        // printf("exponent: %d\n", state.H_hat[ch][ph].exp);
+        // printf("sum_phase_powers ref: %lf dut: %lf, ratio: %lf\n", sum_phase_powers, dut_sum_phase_powers_fp, sum_phase_powers_ratio);
+        // printf("peak_phase_power ref: %lf dut: %lf, ratio: %lf\n", peak_phase_power, dut_peak_phase_power_fp, peak_phase_power_ratio);
+        // printf("peak_to_average_ratio ref: %lf dut: %lf, ratio: %lf\n", peak_to_average_ratio, dut_peak_to_average_ratio_fp, peak_to_average_ratio_ratio);
+
+        TEST_ASSERT_FLOAT_WITHIN_MESSAGE(0.0, 1.0, (float)sum_phase_powers_ratio, "sum_phase_powers_ratio incorrect");
+        TEST_ASSERT_FLOAT_WITHIN_MESSAGE(0.0, 1.0, (float)peak_phase_power_ratio, "peak_phase_power_ratio incorrect");
+        TEST_ASSERT_FLOAT_WITHIN_MESSAGE(0.0, 1.0, (float)peak_to_average_ratio_ratio, "peak_to_average_ratio_ratio incorrect");
+        TEST_ASSERT_FLOAT_WITHIN_MESSAGE(0.0, (float)NUM_PHASES_DELAY_EST, (float)dut_peak_to_average_ratio_fp, "peak_to_average_ratio_ratio incorrect");
+    }
+
+    //Now try a few corner cases
+    
+    aec_init(&state, NULL, &shared_state, aec_memory_pool, NULL, 1, 1, num_phases, 0);
+    memset(H_hat, 0, sizeof(H_hat));
+
+    double sum_phase_powers;
+    double phase_powers[NUM_PHASES_DELAY_EST];
+    double peak_to_average_ratio;
+    double peak_phase_power;
+    int32_t peak_power_phase_index;
+    int measured_delay_fp = aec_estimate_delay_fp(H_hat, NUM_PHASES_DELAY_EST, PHASE_CMPLX_AIR_LEN,
+                                &sum_phase_powers, phase_powers, &peak_to_average_ratio, &peak_phase_power, &peak_power_phase_index);
+    int measured_delay = aec_estimate_delay(&state.shared_state->delay_estimator_params, state.H_hat[0], state.num_phases);
+    double dut_peak_to_average_ratio_fp = att_int32_to_double(state.shared_state->delay_estimator_params.peak_to_average_ratio.mant, state.shared_state->delay_estimator_params.peak_to_average_ratio.exp);
+    printf("peak_to_average_ratio ref: %lf dut: %lf\n", peak_to_average_ratio, dut_peak_to_average_ratio_fp);
+
+    //Even though zero, should come out to 1 as no energy in H
+    TEST_ASSERT_FLOAT_WITHIN_MESSAGE(0.0, 1.0, (float)dut_peak_to_average_ratio_fp, "dut_peak_to_average_ratio_fp incorrect");
+    TEST_ASSERT_FLOAT_WITHIN_MESSAGE(0.0, 1.0, (float)peak_to_average_ratio, "peak_to_average_ratio incorrect");
+
+
+    }
+}
diff --git a/test/lib_aec/aec_unit_tests/src/test_fft.xc b/test/lib_aec/aec_unit_tests/src/test_fft.xc
new file mode 100644
index 000000000..96e548478
--- /dev/null
+++ b/test/lib_aec/aec_unit_tests/src/test_fft.xc
@@ -0,0 +1,111 @@
+// Copyright 2018-2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <xs1.h>
+#include "aec_unit_tests.h"
+#include <stdio.h>
+#include <assert.h>
+extern "C"{
+    #include "aec_defines.h"
+    #include "aec_api.h"
+}
+
+double sine_lut[AEC_PROC_FRAME_LENGTH / 4 + 1];
+//In-place N point complex FFT
+void aec_forward_fft_fp(complex_double_t *input, int length) {
+    att_bit_reverse((dsp_complex_fp*)input, length);
+    att_forward_fft((dsp_complex_fp*)input, length, sine_lut);
+}
+
+void test_fft() {
+    unsafe {
+        unsigned num_y_channels = 1;
+        unsigned num_x_channels = 2;
+        unsigned main_filter_phases = 6;
+        unsigned shadow_filter_phases = 2;
+        
+        aec_state_t main_state, shadow_state;
+        aec_memory_pool_t aec_memory_pool;
+        aec_shadow_filt_memory_pool_t aec_shadow_memory_pool;
+        aec_shared_state_t aec_shared_state;
+
+        aec_init(&main_state, &shadow_state, &aec_shared_state, (uint8_t*)&aec_memory_pool, (uint8_t*)&aec_shadow_memory_pool, num_y_channels, num_x_channels, main_filter_phases, shadow_filter_phases);
+
+        int32_t [[aligned(8)]] new_frame[AEC_MAX_Y_CHANNELS+AEC_MAX_X_CHANNELS][AEC_FRAME_ADVANCE];
+        complex_double_t [[aligned(8)]] ref[AEC_MAX_Y_CHANNELS+AEC_MAX_X_CHANNELS][AEC_PROC_FRAME_LENGTH + 2];
+        aec_state_t *state_ptr;
+        unsigned seed = 83472;
+        //Init FFT for reference
+        att_make_sine_table(sine_lut, AEC_PROC_FRAME_LENGTH);
+
+        //3 FFTs in AEC: Y, X, Error
+        int max_diff = 0;
+        for(int iter=0; iter<(1<<10)/F; iter++) {
+            unsigned call_type = att_random_uint32(seed) % 3;
+            unsigned is_shadow = att_random_uint32(seed) % 2;
+            //printf("call_type %d, is_shadow %d\n",call_type, is_shadow);
+            if(is_shadow) {
+                state_ptr = &main_state;
+            }
+            else {
+                state_ptr = &shadow_state;
+            }
+            aec_frame_init(&main_state, &shadow_state, &new_frame[0], &new_frame[AEC_MAX_Y_CHANNELS]);        
+            bfp_s32_t *fft_in;
+            bfp_complex_s32_t *fft_out;
+            int num_channels;
+
+            for(int ch=0; ch<num_y_channels; ch++) {
+                //state_ptr->error is initialised in the Error->error ifft aec_ifft() call with state_ptr->error as input. Initialising here for
+                //standalone testing.
+                bfp_s32_init(&state_ptr->error[ch], (int32_t*)&state_ptr->Error[ch].data[0], 0, AEC_PROC_FRAME_LENGTH, 0);
+            }
+            if(call_type == 0) { //FFT Y
+                fft_in = &state_ptr->shared_state->y[0];
+                fft_out = &state_ptr->shared_state->Y[0];
+                num_channels = num_y_channels;
+            }
+            else if(call_type == 1) { //FFT X
+                fft_in = &state_ptr->shared_state->x[0];
+                fft_out = &state_ptr->shared_state->Y[0];
+                num_channels = num_x_channels;
+            }
+            else { //FFT Error
+                fft_in = &state_ptr->error[0];
+                fft_out = &state_ptr->Error[0];
+                num_channels = num_y_channels;
+            }
+
+            //generate input
+            /* Generate inputs for the N-pt real FFT. Reference does an in-place N-pt complex FFT to generate
+             * N complex frequency domain values so store reference input in a N point complex input array with
+             * imaginary fields set to 0.
+             * */
+            for(int ch=0; ch<num_channels; ch++) {
+                fft_in[ch].exp = - (int) (att_random_uint32(seed)%50); //Between 0 and -49
+                fft_in[ch].hr = att_random_uint32(seed)%16; //Up to 15 bits HR
+
+                for(int i=0; i<AEC_PROC_FRAME_LENGTH; i++){
+                    fft_in[ch].data[i] = sext(att_random_int32(seed), 32-fft_in[ch].hr);
+                    ref[ch][i].re = att_int32_to_double(fft_in[ch].data[i], fft_in[ch].exp);
+                    ref[ch][i].im = 0;
+                }
+            }
+            //DUT FFT
+            for(int ch=0; ch<num_channels; ch++) {
+                aec_forward_fft(&fft_out[ch], &fft_in[ch]);
+            }
+            //Ref FFT
+            for(int ch=0; ch<num_channels; ch++) {
+                aec_forward_fft_fp(&ref[ch][0], AEC_PROC_FRAME_LENGTH);
+            }
+            //Compare first (N/2+1) complex values
+            for(int ch=0; ch<num_channels; ch++) {
+                unsigned diff = att_bfp_vector_int32((int32_t*)&fft_out[ch].data[0], fft_out[ch].exp, (double*)&ref[ch][0], 0, (AEC_PROC_FRAME_LENGTH/2+1)*2);
+                max_diff = (diff > max_diff) ? diff : max_diff;
+                TEST_ASSERT_LESS_OR_EQUAL_UINT32_MESSAGE(1<<5, diff, "FFT diff too large.");
+                TEST_ASSERT_EQUAL_UINT32_MESSAGE((AEC_PROC_FRAME_LENGTH/2)+1, fft_out[ch].length, "FFT output length incorrect");
+            }
+        }
+        printf("max_diff = %d\n",max_diff);
+    }
+}
diff --git a/test/lib_aec/aec_unit_tests/src/test_filter_adapt.xc b/test/lib_aec/aec_unit_tests/src/test_filter_adapt.xc
new file mode 100644
index 000000000..a89752716
--- /dev/null
+++ b/test/lib_aec/aec_unit_tests/src/test_filter_adapt.xc
@@ -0,0 +1,217 @@
+// Copyright 2018-2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <xs1.h>
+#include "aec_unit_tests.h"
+#include <stdio.h>
+#include <assert.h>
+extern "C"{
+    #include "aec_defines.h"
+    #include "aec_api.h"
+}
+
+#define TEST_NUM_Y (1)
+#define TEST_NUM_X (2)
+#define TEST_MAIN_PHASES (5)
+#define TEST_SHADOW_PHASES (3)
+#define NUM_BINS ((AEC_PROC_FRAME_LENGTH/2) + 1)
+
+static double sine_lut_ifft[AEC_PROC_FRAME_LENGTH / 4 + 1];
+static double sine_lut[AEC_PROC_FRAME_LENGTH / 4 + 1];
+void aec_filter_adapt_fp(
+        complex_double_t *H_hat,
+        complex_double_t *X_fifo,
+        complex_double_t *T,
+        int bypass) {
+    if(bypass) {
+        return;
+    }
+    complex_double_t scratch[AEC_PROC_FRAME_LENGTH];
+    int N = AEC_PROC_FRAME_LENGTH;
+    
+    for(int i=0; i<N/2+1; i++) {
+        complex_double_t T_mult_conj_X;
+        T_mult_conj_X.re = (T[i].re*X_fifo[i].re + T[i].im*X_fifo[i].im);
+        T_mult_conj_X.im = (T[i].im*X_fifo[i].re - T[i].re*X_fifo[i].im);
+        H_hat[i].re = H_hat[i].re + T_mult_conj_X.re;
+        H_hat[i].im = H_hat[i].im + T_mult_conj_X.im;
+    }
+    //Generate 2nd half of the spectrum based on symmetry
+    for(int i=0; i<N/2; i++) {
+        scratch[i].re = H_hat[i].re;
+        scratch[i].im = H_hat[i].im;
+
+        if(i) {
+            scratch[N-i].re = scratch[i].re;
+            scratch[N-i].im = -scratch[i].im;
+        }
+        //Copy nyquist
+        scratch[N/2].re = H_hat[N/2].re;
+        scratch[N/2].im = H_hat[N/2].im;
+    }
+    //IFFT
+    att_bit_reverse((dsp_complex_fp *)scratch, N);
+    att_inverse_fft((dsp_complex_fp *)scratch, N, sine_lut_ifft);
+    for(int i=AEC_FRAME_ADVANCE; i<AEC_PROC_FRAME_LENGTH; i++) {
+        scratch[i].re = 0.0;
+    }
+    att_bit_reverse((dsp_complex_fp*)scratch, N);
+    att_forward_fft((dsp_complex_fp*)scratch, N, sine_lut);
+    
+    for(int i=0; i<N/2+1; i++) {
+        H_hat[i].re = scratch[i].re;
+        H_hat[i].im = scratch[i].im;
+    }
+}
+void test_aec_filter_adapt() {
+    unsafe {
+    unsigned num_y_channels = TEST_NUM_Y;
+    unsigned num_x_channels = TEST_NUM_X;
+    unsigned main_filter_phases = TEST_MAIN_PHASES;
+    unsigned shadow_filter_phases = TEST_SHADOW_PHASES;
+
+    aec_state_t state, shadow_state;
+    aec_memory_pool_t aec_memory_pool;
+    aec_shadow_filt_memory_pool_t aec_shadow_memory_pool;
+    aec_shared_state_t aec_shared_state;
+
+    aec_init(&state, &shadow_state, &aec_shared_state, (uint8_t*)&aec_memory_pool, (uint8_t*)&aec_shadow_memory_pool, num_y_channels, num_x_channels, main_filter_phases, shadow_filter_phases);
+
+    //Declare floating point arrays
+    complex_double_t H_hat_fp[TEST_NUM_Y][TEST_NUM_X*TEST_MAIN_PHASES][NUM_BINS];
+    complex_double_t X_fifo_fp[TEST_NUM_X][TEST_MAIN_PHASES][NUM_BINS];
+    complex_double_t T_fp[TEST_NUM_X][NUM_BINS];
+
+    //Init FFT for reference
+    att_make_sine_table(sine_lut, AEC_PROC_FRAME_LENGTH);
+    att_make_sine_table(sine_lut_ifft, AEC_PROC_FRAME_LENGTH);
+    unsigned seed=578335;
+    unsigned max_diff = 0.0;
+    for(int itt=0; itt<(100)/F; itt++) {
+        int32_t new_frame[AEC_MAX_Y_CHANNELS+AEC_MAX_X_CHANNELS][AEC_FRAME_ADVANCE];
+        unsigned is_main = att_random_uint32(seed) % 2;
+        aec_state_t *state_ptr;
+        if(is_main) {
+            state_ptr = &state;
+        }
+        else {
+            state_ptr = &shadow_state;
+        }
+        state_ptr->shared_state->config_params.aec_core_conf.bypass = att_random_uint32(seed) % 2;
+        unsigned test_l2_api = att_random_uint32(seed) % 2;
+        aec_frame_init(&state, &shadow_state, &new_frame[0], &new_frame[AEC_MAX_Y_CHANNELS]);        
+        //Generate H_hat
+        for(int ch=0; ch<num_y_channels; ch++) {
+            for(int ph=0; ph<num_x_channels*state_ptr->num_phases; ph++) {
+                state_ptr->H_hat[ch][ph].exp = sext(att_random_int32(seed), 6);
+                state_ptr->H_hat[ch][ph].hr = att_random_uint32(seed) % 5;
+                for(int i=0; i<NUM_BINS; i++) {
+                    state_ptr->H_hat[ch][ph].data[i].re = att_random_int32(seed) >> state_ptr->H_hat[ch][ph].hr;
+                    state_ptr->H_hat[ch][ph].data[i].im = att_random_int32(seed) >> state_ptr->H_hat[ch][ph].hr;
+
+                    H_hat_fp[ch][ph][i].re = att_int32_to_double(state_ptr->H_hat[ch][ph].data[i].re, state_ptr->H_hat[ch][ph].exp);
+                    H_hat_fp[ch][ph][i].im = att_int32_to_double(state_ptr->H_hat[ch][ph].data[i].im, state_ptr->H_hat[ch][ph].exp);
+                }
+                //DC and Nyquist bin imaginary=0
+                state_ptr->H_hat[ch][ph].data[0].im = 0;
+                state_ptr->H_hat[ch][ph].data[NUM_BINS-1].im = 0;
+                H_hat_fp[ch][ph][0].im = 0.0;
+                H_hat_fp[ch][ph][NUM_BINS-1].im = 0.0;
+            }
+        }
+        //Generate X_fifo, (always for number of phases in main_state)
+        aec_state_t *main_state_ptr = &state;
+        for(int ch=0; ch<num_x_channels; ch++) {
+            for(int ph=0; ph<main_state_ptr->num_phases; ph++) {
+                state_ptr->shared_state->X_fifo[ch][ph].exp = sext(att_random_int32(seed), 6);
+                state_ptr->shared_state->X_fifo[ch][ph].hr = att_random_uint32(seed) % 5;
+                for(int i=0; i<NUM_BINS; i++) {
+                    state_ptr->shared_state->X_fifo[ch][ph].data[i].re = att_random_int32(seed) >> state_ptr->shared_state->X_fifo[ch][ph].hr;
+                    state_ptr->shared_state->X_fifo[ch][ph].data[i].im = att_random_int32(seed) >> state_ptr->shared_state->X_fifo[ch][ph].hr;
+
+                    X_fifo_fp[ch][ph][i].re = att_int32_to_double(state_ptr->shared_state->X_fifo[ch][ph].data[i].re, state_ptr->shared_state->X_fifo[ch][ph].exp);
+                    X_fifo_fp[ch][ph][i].im = att_int32_to_double(state_ptr->shared_state->X_fifo[ch][ph].data[i].im, state_ptr->shared_state->X_fifo[ch][ph].exp);
+                }
+                state_ptr->shared_state->X_fifo[ch][ph].data[0].im = 0;
+                state_ptr->shared_state->X_fifo[ch][ph].data[NUM_BINS-1].im = 0;
+                X_fifo_fp[ch][ph][0].im = 0.0;
+                X_fifo_fp[ch][ph][NUM_BINS-1].im = 0.0;
+            }
+        }
+        //Generate T
+        for(int ch=0; ch<num_x_channels; ch++) {
+            state_ptr->T[ch].exp = sext(att_random_int32(seed), 6);
+            state_ptr->T[ch].hr = att_random_uint32(seed) % 5;
+            for(int i=0; i<NUM_BINS; i++) {
+                state_ptr->T[ch].data[i].re = sext(att_random_int32(seed), (32 - state_ptr->T[ch].hr));
+                state_ptr->T[ch].data[i].im = sext(att_random_int32(seed), (32 - state_ptr->T[ch].hr));
+
+                T_fp[ch][i].re = att_int32_to_double(state_ptr->T[ch].data[i].re, state_ptr->T[ch].exp);
+                T_fp[ch][i].im = att_int32_to_double(state_ptr->T[ch].data[i].im, state_ptr->T[ch].exp);
+            }
+            state_ptr->T[ch].data[0].im = 0;
+            state_ptr->T[ch].data[NUM_BINS-1].im = 0;
+            T_fp[ch][0].im = 0.0;
+            T_fp[ch][NUM_BINS-1].im = 0.0;
+        }
+        //aec init only initialises the 2d Xfifo. Since we're using the 1d fifo for error computation, call aec_update_X_fifo_1d()
+        //to update the 1d Fifo
+        aec_update_X_fifo_1d(state_ptr);
+
+        //ref
+        for(int ych=0; ych<num_y_channels; ych++) {
+            for(int xch=0; xch<num_x_channels; xch++) {
+                for(int p=0; p<state_ptr->num_phases; p++) {
+                    aec_filter_adapt_fp(H_hat_fp[ych][xch*state_ptr->num_phases + p], X_fifo_fp[xch][p], T_fp[xch], state_ptr->shared_state->config_params.aec_core_conf.bypass);
+                }
+            }
+        }
+        //dut
+        if(!test_l2_api) { 
+            for(int ch=0; ch<num_y_channels; ch++) {
+                aec_filter_adapt(state_ptr, ch);
+            }
+        }
+        else {
+            #define NUM_CHUNKS_PER_CH (4) //spread num_phases over 4 chunks for each y-channel
+            if(!state_ptr->shared_state->config_params.aec_core_conf.bypass) {
+                for(int c=0; c<num_y_channels; c++) {
+                    int remaining_phases = num_x_channels * state_ptr->num_phases;
+                    int start_phase=0;
+                    int num_phases;
+                    for(int t=0; t<NUM_CHUNKS_PER_CH; t++) {
+                        int ch=c;
+                        if((t == NUM_CHUNKS_PER_CH-1) || (remaining_phases <= 1))
+                        {
+                            num_phases = remaining_phases;
+                            remaining_phases = 0;
+                        }
+                        else if(remaining_phases > 1) {
+                            num_phases = (uint32_t)att_random_uint32(seed) % remaining_phases;
+                            remaining_phases -= num_phases;
+                        }
+                        for(int ph=start_phase; ph<start_phase+num_phases; ph++) {
+                            aec_l2_adapt_plus_fft_gc(&state_ptr->H_hat[ch][ph], &state_ptr->X_fifo_1d[ph], &state_ptr->T[ph/state_ptr->num_phases]);
+                        }
+                        start_phase += num_phases;
+                    }
+                }
+            }
+        }
+        //Compare outputs
+        for(int ch=0; ch<num_y_channels; ch++) {
+            for(int p=0; p<num_x_channels*state_ptr->num_phases; p++) {
+                unsigned diff = att_bfp_vector_int32(
+                        (int32_t*)&state_ptr->H_hat[ch][p].data[0],
+                        state_ptr->H_hat[ch][p].exp,
+                        (double*)&H_hat_fp[ch][p][0],
+                        0,
+                        (AEC_PROC_FRAME_LENGTH/2+1)*2);
+                //printf("diff %d\n",diff);
+                max_diff = (diff > max_diff) ? diff : max_diff;
+                TEST_ASSERT_LESS_OR_EQUAL_UINT32_MESSAGE(1<<7, diff, "H_hat diff too large.");
+            }
+        }
+    }
+    printf("max_diff %d\n",max_diff);
+    }
+}
diff --git a/test/lib_aec/aec_unit_tests/src/test_ifft.xc b/test/lib_aec/aec_unit_tests/src/test_ifft.xc
new file mode 100644
index 000000000..9be3ed97a
--- /dev/null
+++ b/test/lib_aec/aec_unit_tests/src/test_ifft.xc
@@ -0,0 +1,115 @@
+// Copyright 2018-2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <xs1.h>
+#include "aec_unit_tests.h"
+#include <stdio.h>
+#include <assert.h>
+extern "C"{
+    #include "aec_defines.h"
+    #include "aec_api.h"
+}
+
+double sine_lut_ifft[AEC_PROC_FRAME_LENGTH / 4 + 1];
+//In-place N-pt complex IFFT
+void aec_inverse_fft_fp(complex_double_t *input, int length) {
+    att_bit_reverse     ((dsp_complex_fp *)input, length);
+    att_inverse_fft     ((dsp_complex_fp *)input, length, sine_lut_ifft);
+}
+
+void test_ifft() {
+    unsafe {
+        unsigned num_y_channels = 2;
+        unsigned num_x_channels = 2;
+        unsigned main_filter_phases = 6;
+        unsigned shadow_filter_phases = 2;
+
+        aec_state_t main_state, shadow_state;
+        aec_memory_pool_t aec_memory_pool;
+        aec_shadow_filt_memory_pool_t aec_shadow_memory_pool;
+        aec_shared_state_t aec_shared_state;
+
+        aec_init(&main_state, &shadow_state, &aec_shared_state, (uint8_t*)&aec_memory_pool, (uint8_t*)&aec_shadow_memory_pool, num_y_channels, num_x_channels, main_filter_phases, shadow_filter_phases);
+
+        unsigned seed = 78431;
+        int32_t [[aligned(8)]] new_frame[AEC_MAX_Y_CHANNELS+AEC_MAX_X_CHANNELS][AEC_FRAME_ADVANCE];
+        complex_double_t [[aligned(8)]] ref[AEC_MAX_Y_CHANNELS+AEC_MAX_X_CHANNELS][AEC_PROC_FRAME_LENGTH + 2];
+
+        //Init FFT for reference
+        att_make_sine_table(sine_lut_ifft, AEC_PROC_FRAME_LENGTH);
+        unsigned max_diff = 0;
+        for(unsigned itt=0;itt<(1<<10)/F;itt++) {
+            aec_frame_init(&main_state, &shadow_state, &new_frame[0], &new_frame[AEC_MAX_Y_CHANNELS]);
+            int call_type = att_random_uint32(seed) % 2; //Error->error or Y_hat->y_hat IFFT
+            int is_shadow = att_random_uint32(seed) % 2;
+            aec_state_t *state_ptr = (is_shadow == 1) ? &shadow_state : &main_state;
+            bfp_complex_s32_t *ifft_in;
+            bfp_s32_t *ifft_out;
+            if(call_type == 0) { //Error->error
+                ifft_in = &state_ptr->Error[0];
+                ifft_out = &state_ptr->error[0];
+            }
+            else { //Y_hat->y_hat
+                ifft_in = &state_ptr->Y_hat[0];
+                ifft_out = &state_ptr->y_hat[0];
+            }
+            for(int ch=0; ch<num_y_channels; ch++) {
+                ifft_in[ch].exp = - (int) (att_random_uint32(seed)%50); //Between 0 and -49
+                ifft_in[ch].hr = att_random_uint32(seed)%16; //Up to 15 bits HR
+
+                /* Generate N/2 complex frequency domain samples for DUT.
+                 * Generate N complex frequecny domain samples for Ref. The REF IFFT
+                 * is a complex N-pt IFFT. However the N complex ref freq domain samples are
+                 * symmetric around nyquist in such a way that the result of the REF complex
+                 * N-pt IFFT is real */
+                for(int i=0; i<AEC_PROC_FRAME_LENGTH/2; i++) {
+                    ifft_in[ch].data[i].re = sext(att_random_int32(seed), 32-ifft_in[ch].hr);
+                    ifft_in[ch].data[i].im = sext(att_random_int32(seed), 32-ifft_in[ch].hr);
+
+                    ref[ch][i].re = att_int32_to_double(ifft_in[ch].data[i].re, ifft_in[ch].exp);
+                    ref[ch][i].im = att_int32_to_double(ifft_in[ch].data[i].im, ifft_in[ch].exp);
+                    //Generate (N/2+1) to (N-1) indexed bins based on symmetry around Nyquist
+                    if(i){
+                        ref[ch][AEC_PROC_FRAME_LENGTH - i].re =  ref[ch][i].re;
+                        ref[ch][AEC_PROC_FRAME_LENGTH - i].im = -ref[ch][i].im;
+                    }
+                }
+                //Unpack Nyquist into bin N/2
+                ref[ch][AEC_PROC_FRAME_LENGTH/2].re = ref[ch][0].im;
+                ref[ch][AEC_PROC_FRAME_LENGTH/2].im = 0;
+                ref[ch][0].im = 0;
+                ifft_in[ch].data[AEC_PROC_FRAME_LENGTH/2].re = ifft_in[ch].data[0].im; 
+                ifft_in[ch].data[AEC_PROC_FRAME_LENGTH/2].im = 0;
+                ifft_in[ch].data[0].im = 0;
+            }
+            
+            //DUT IFFT
+            for(int ch=0; ch<num_y_channels; ch++) {
+                //printf("addr in: 0x%08x, addr out: 0x%08x\n",&ifft_in[ch].data[0], &ifft_out[ch].data[0]);
+                aec_inverse_fft(&ifft_out[ch], &ifft_in[ch]);
+            }
+            /* N-pt complex freq domain data ->N-pt complex IFFT-> N-pt complex time domain data.
+             * However the input freq domain is made to be symmetric around Nyquist in a way such that
+             * the N-pt complex IFFT time domain output is real. Since IFFT is done in-place, the imaginary
+             * fields of all the N complex time domain samples will be 0
+             * */
+
+            for(int ch=0; ch<num_y_channels; ch++) {
+                //printf("addr in: 0x%08x, addr out: 0x%08x\n",&ifft_in[ch].data[0], &ifft_out[ch].data[0]);
+                aec_inverse_fft_fp(&ref[ch][0], AEC_PROC_FRAME_LENGTH);
+            }
+
+            //Compare results
+            for(int ch=0; ch<num_y_channels; ch++) {
+                double ref_re[AEC_PROC_FRAME_LENGTH];
+                for(int i=0; i<AEC_PROC_FRAME_LENGTH; i++) {
+                    ref_re[i] = ref[ch][i].re;
+                }
+                unsigned diff = att_bfp_vector_int32((int32_t*)&ifft_out[ch].data[0], ifft_out[ch].exp, (double*)&ref_re[0], 0, AEC_PROC_FRAME_LENGTH);
+                max_diff = (diff > max_diff) ? diff : max_diff;
+                TEST_ASSERT_LESS_OR_EQUAL_UINT32_MESSAGE(1<<5, diff, "IFFT diff too large.");
+                TEST_ASSERT_EQUAL_UINT32_MESSAGE(AEC_PROC_FRAME_LENGTH, ifft_out[ch].length, "IFFT output length incorrect");
+            }
+        }
+        printf("max_diff %d\n", max_diff);
+    }
+}
diff --git a/test/lib_aec/aec_unit_tests/src/test_l2_unify_exponent.xc b/test/lib_aec/aec_unit_tests/src/test_l2_unify_exponent.xc
new file mode 100644
index 000000000..be59a10e9
--- /dev/null
+++ b/test/lib_aec/aec_unit_tests/src/test_l2_unify_exponent.xc
@@ -0,0 +1,226 @@
+// Copyright 2018-2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <xs1.h>
+#include "aec_unit_tests.h"
+#include <stdio.h>
+#include <assert.h>
+extern "C"{
+    #include "aec_defines.h"
+    #include "aec_api.h"
+}
+
+//TODO MODIFY TO TEST FOR POSITIVE AND NEGATIVE EXPONENTS!!!
+
+#define NUM_CHUNKS (20)
+#define NUM_SUBGROUPS (3)
+#define LENGTH_PER_SUBGROUP (500)
+void test_bfp_complex_s32_l2_unify_exponent() {
+    unsafe {
+    complex_s32_t mem[NUM_SUBGROUPS][LENGTH_PER_SUBGROUP];
+    bfp_complex_s32_t chunks[NUM_CHUNKS];
+    int chunk_subgroup_mapping[NUM_CHUNKS];
+    
+    dsp_complex_fp mem_float[NUM_SUBGROUPS][LENGTH_PER_SUBGROUP];
+    
+    int32_t max_diff = 0;
+    unsigned seed = 34;
+    int remaining_length[NUM_SUBGROUPS];
+    int min_reqd_headroom[NUM_SUBGROUPS];
+    int null_mapping; //null_mapping = 1 => unify everything without looking at subgroups
+    //null_mapping = 0 => unify according to subgroups
+    for(int iter=0; iter<(1<<12)/F; iter++) {
+        null_mapping = att_random_uint32(seed) % 2;
+        for(int i=0; i<NUM_SUBGROUPS; i++) {
+            remaining_length[i] = LENGTH_PER_SUBGROUP;
+            min_reqd_headroom[i] = att_random_uint32(seed) % 4;
+        }
+        //Setup input
+        for(int c=0; c<NUM_CHUNKS; c++) {
+            int subgroup = att_random_uint32(seed) % NUM_SUBGROUPS; //which subgroup the chunk belongs to
+            chunk_subgroup_mapping[c] = subgroup;
+            chunks[c].exp = sext(att_random_int32(seed), 6);
+            chunks[c].hr = (att_random_uint32(seed) % 4);
+            //generate lengths such that total adds to LENGTH_PER_SUBGROUP            
+            if(remaining_length[subgroup]) {
+                chunks[c].length = att_random_uint32(seed) % remaining_length[subgroup];
+                chunks[c].data = &mem[subgroup][LENGTH_PER_SUBGROUP - remaining_length[subgroup]];
+                //generate data
+                for(int ii=0; ii<chunks[c].length; ii++) {
+                    chunks[c].data[ii].re = att_random_int32(seed) >> chunks[c].hr;
+                    chunks[c].data[ii].im = att_random_int32(seed) >> chunks[c].hr;
+                    //keep a copy in float array
+                    mem_float[subgroup][LENGTH_PER_SUBGROUP - remaining_length[subgroup] + ii].re = att_int32_to_double(chunks[c].data[ii].re, chunks[c].exp);
+                    mem_float[subgroup][LENGTH_PER_SUBGROUP - remaining_length[subgroup] + ii].im = att_int32_to_double(chunks[c].data[ii].im, chunks[c].exp);
+                }
+                remaining_length[subgroup] -= chunks[c].length;
+            }
+            else {
+                chunks[c].length = 0;
+            }
+            //printf("chunk %d, subgroup %d. exp %d, hr %d, length %d\n", c, subgroup, chunks[c].exp, chunks[c].hr, chunks[c].length);
+        }
+        
+        int final_exp, final_hr;
+        bfp_complex_s32_t unified[NUM_SUBGROUPS];
+        if(!null_mapping) {
+            for(int sb=0; sb<NUM_SUBGROUPS; sb++) {
+                aec_l2_bfp_complex_s32_unify_exponent(chunks, &final_exp, &final_hr, chunk_subgroup_mapping, NUM_CHUNKS, sb, min_reqd_headroom[sb]);
+                //printf("%d\n",final_exp);
+                if(final_exp == INT_MIN) {
+                    assert(0);
+                }
+                bfp_complex_s32_init(&unified[sb], &mem[sb][0], final_exp, LENGTH_PER_SUBGROUP-remaining_length[sb], 0);
+                unified[sb].hr = final_hr;
+                //printf("subgroup %d, min_reqd_headroom %d: final_exp %d, final_hr %d\n",sb, min_reqd_headroom[sb], final_exp, final_hr);
+            }
+        }
+        else
+        {
+            aec_l2_bfp_complex_s32_unify_exponent(chunks, &final_exp, &final_hr, NULL, NUM_CHUNKS, 0, min_reqd_headroom[0]);
+            //printf("%d\n",final_exp);
+            if(final_exp == INT_MIN) {
+                assert(0);
+            }
+            for(int sb=1; sb<NUM_SUBGROUPS; sb++) {
+                min_reqd_headroom[sb] = min_reqd_headroom[0];
+            }
+            for(int sb=0; sb<NUM_SUBGROUPS; sb++) {
+                bfp_complex_s32_init(&unified[sb], &mem[sb][0], final_exp, LENGTH_PER_SUBGROUP-remaining_length[sb], 0);
+                unified[sb].hr = final_hr;
+                //printf("subgroup %d, min_reqd_headroom %d: final_exp %d, final_hr %d\n",sb, min_reqd_headroom[sb], final_exp, final_hr);
+            }
+        }
+
+        //check output
+        for(int sb=0; sb<NUM_SUBGROUPS; sb++) {
+            for(int i=0; i<unified[sb].length; i++) {
+                int32_t ref_int = att_double_to_int32( mem_float[sb][i].re, unified[sb].exp);
+                int32_t dut_int = unified[sb].data[i].re;
+                int32_t diff = ref_int - dut_int;
+                if(diff < 0) diff = -diff;
+                if(diff > max_diff) max_diff = diff;
+                TEST_ASSERT_INT32_WITHIN_MESSAGE(1<<1, ref_int, dut_int, "unify broke for bfp_complex_s32 re");
+
+                ref_int = att_double_to_int32( mem_float[sb][i].im, unified[sb].exp);
+                dut_int = unified[sb].data[i].im;
+                diff = ref_int - dut_int;
+                if(diff < 0) diff = -diff;
+                if(diff > max_diff) max_diff = diff;
+                TEST_ASSERT_INT32_WITHIN_MESSAGE(1<<1, ref_int, dut_int, "unify broke for bfp_complex_s32 im");
+            }
+        }
+        //check headroom
+        for(int sb=0; sb<NUM_SUBGROUPS; sb++) {
+            int actual_headroom = bfp_complex_s32_headroom(&unified[sb]);
+            //printf("hr: dut %d, actual %d\n", unified[sb].hr, actual_headroom);
+            if(unified[sb].hr > actual_headroom) {
+                printf("iter %d, bfp_complex_s32, actual headroom is less than the reported output headroom\n", iter);
+                assert(0);
+            }
+            if(unified[sb].hr < min_reqd_headroom[sb]) {
+                printf("iter %d, bfp_complex_s32, output headroom is less than the minimum required headroom\n", iter);
+                assert(0);
+            }
+        }
+    }
+    printf("max_diff = %d\n",max_diff);
+    }
+}
+
+void test_bfp_s32_l2_unify_exponent() {
+    unsafe {
+    int32_t mem[NUM_SUBGROUPS][LENGTH_PER_SUBGROUP];
+    bfp_s32_t chunks[NUM_CHUNKS];
+    int chunk_subgroup_mapping[NUM_CHUNKS];
+    
+    double mem_float[NUM_SUBGROUPS][LENGTH_PER_SUBGROUP];
+    
+    int32_t max_diff = 0;
+    unsigned seed = 34;
+    int remaining_length[NUM_SUBGROUPS];
+    int min_reqd_headroom[NUM_SUBGROUPS];
+    int null_mapping; //null_mapping = 1 => unify everything without looking at subgroups
+    //null_mapping = 0 => unify according to subgroups
+    for(int iter=0; iter<1<<12; iter++) {
+        null_mapping = att_random_uint32(seed) % 2;
+        for(int i=0; i<NUM_SUBGROUPS; i++) {
+            remaining_length[i] = LENGTH_PER_SUBGROUP;
+            min_reqd_headroom[i] = att_random_uint32(seed) % 4;
+        }
+        //Setup input
+        for(int c=0; c<NUM_CHUNKS; c++) {
+            int subgroup = att_random_uint32(seed) % NUM_SUBGROUPS; //which subgroup the chunk belongs to
+            chunk_subgroup_mapping[c] = subgroup;
+            chunks[c].exp = sext(att_random_int32(seed), 6);
+            chunks[c].hr = (att_random_uint32(seed) % 4);
+            //how to generate lengths such that total adds to LENGTH_PER_SUBGROUP
+            if(remaining_length[subgroup]) {
+                chunks[c].length = att_random_uint32(seed) % remaining_length[subgroup];
+                chunks[c].data = &mem[subgroup][LENGTH_PER_SUBGROUP - remaining_length[subgroup]];
+                //generate data
+                for(int ii=0; ii<chunks[c].length; ii++) {
+                    chunks[c].data[ii] = att_random_int32(seed) >> chunks[c].hr;
+                    //keep a copy in float array
+                    mem_float[subgroup][LENGTH_PER_SUBGROUP - remaining_length[subgroup] + ii] = att_int32_to_double(chunks[c].data[ii], chunks[c].exp);
+                }
+                remaining_length[subgroup] -= chunks[c].length;
+            }
+            else {
+                chunks[c].length = 0;
+            }
+            //printf("chunk %d, subgroup %d. exp %d, hr %d, length %d\n", c, subgroup, chunks[c].exp, chunks[c].hr, chunks[c].length);
+        }
+        
+        int final_exp, final_hr;
+        bfp_s32_t unified[NUM_SUBGROUPS];
+        if(!null_mapping) {
+            for(int sb=0; sb<NUM_SUBGROUPS; sb++) {
+                aec_l2_bfp_s32_unify_exponent(chunks, &final_exp, &final_hr, chunk_subgroup_mapping, NUM_CHUNKS, sb, min_reqd_headroom[sb]);
+                if(final_exp == INT_MIN) {
+                    assert(0);
+                }
+                bfp_s32_init(&unified[sb], &mem[sb][0], final_exp, LENGTH_PER_SUBGROUP-remaining_length[sb], 0);
+                unified[sb].hr = final_hr;
+                //printf("subgroup %d, min_reqd_headroom %d: final_exp %d, final_hr %d\n",sb, min_reqd_headroom[sb], final_exp, final_hr);
+            }
+        }
+        else
+        {
+            aec_l2_bfp_s32_unify_exponent(chunks, &final_exp, &final_hr, NULL, NUM_CHUNKS, 0, min_reqd_headroom[0]);
+            if(final_exp == INT_MIN) {
+                assert(0);
+            }
+            for(int sb=1; sb<NUM_SUBGROUPS; sb++) {
+                min_reqd_headroom[sb] = min_reqd_headroom[0];
+            }
+            for(int sb=0; sb<NUM_SUBGROUPS; sb++) {
+                bfp_s32_init(&unified[sb], &mem[sb][0], final_exp, LENGTH_PER_SUBGROUP-remaining_length[sb], 0);
+                unified[sb].hr = final_hr;
+                //printf("subgroup %d, min_reqd_headroom %d: final_exp %d, final_hr %d\n",sb, min_reqd_headroom[sb], final_exp, final_hr);
+            }
+        }
+
+        //check output
+        for(int sb=0; sb<NUM_SUBGROUPS; sb++) {
+            for(int i=0; i<unified[sb].length; i++) {
+                int32_t ref_int = att_double_to_int32( mem_float[sb][i], unified[sb].exp);
+                int32_t dut_int = unified[sb].data[i];
+                int32_t diff = ref_int - dut_int;
+                if(diff < 0) diff = -diff;
+                if(diff > max_diff) max_diff = diff;
+                TEST_ASSERT_INT32_WITHIN_MESSAGE(1<<1, ref_int, dut_int, "unify broke for bfp_s32");
+            }
+        }
+        //check headroom
+        for(int sb=0; sb<NUM_SUBGROUPS; sb++) {
+            int actual_headroom = bfp_s32_headroom(&unified[sb]);
+            //printf("hr: dut %d, actual %d\n", unified[sb].hr, actual_headroom);
+            if(unified[sb].hr > actual_headroom) {
+                printf("iter %d, bfp_s32, actual headroom is less than the reported output headroom\n", iter);
+                assert(0);
+            }
+        }
+    }
+    printf("max_diff = %d\n",max_diff);
+    }
+}
diff --git a/test/lib_aec/aec_unit_tests/src/test_update_sigma_XX.xc b/test/lib_aec/aec_unit_tests/src/test_update_sigma_XX.xc
new file mode 100644
index 000000000..2eedf1a96
--- /dev/null
+++ b/test/lib_aec/aec_unit_tests/src/test_update_sigma_XX.xc
@@ -0,0 +1,131 @@
+// Copyright 2018-2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <xs1.h>
+#include "aec_unit_tests.h"
+#include <stdio.h>
+#include <assert.h>
+extern "C"{
+    #include "aec_defines.h"
+    #include "aec_api.h"
+}
+
+#define NUM_BINS ((AEC_PROC_FRAME_LENGTH/2) + 1)
+
+void aec_update_sigma_XX_fp (double (*sigma_XX)[NUM_BINS], double *sum_X_energy, dsp_complex_fp (*X_fp)[NUM_BINS], unsigned num_channels, int sigma_xx_shift)
+{
+    for(unsigned x_ch=0; x_ch<num_channels; x_ch++) {
+        sum_X_energy[x_ch] = 0.0;
+        for(unsigned i=0; i<NUM_BINS; i++) {
+            double energy = (X_fp[x_ch][i].re*X_fp[x_ch][i].re)+(X_fp[x_ch][i].im*X_fp[x_ch][i].im);
+            sum_X_energy[x_ch] += energy;
+            sigma_XX[x_ch][i] = sigma_XX[x_ch][i]*(1.0 - ldexp(1.0, -sigma_xx_shift));
+            sigma_XX[x_ch][i] += (energy*ldexp(1.0, -sigma_xx_shift));            
+        }
+    }
+}
+
+static void update_mapping(int *mapping, int num_phases)
+{
+    int last_phase = mapping[num_phases - 1];
+    //move phases one down
+    for(int i=num_phases-1; i>=1; i--) {
+        mapping[i] = mapping[i-1];
+    }
+    mapping[0] = last_phase;
+}
+
+void test_update_sigma_XX() {
+    unsafe {
+    unsigned num_y_channels = 1;
+    unsigned num_x_channels = 1;
+    unsigned num_phases = 10;
+    aec_memory_pool_t aec_memory_pool;
+    aec_state_t state;
+    aec_shared_state_t aec_shared_state;
+    aec_init(&state, NULL, &aec_shared_state, (uint8_t*)&aec_memory_pool, NULL, num_y_channels, num_x_channels, num_phases, 0);
+    complex_s32_t X[AEC_MAX_X_CHANNELS][NUM_BINS];
+    for(unsigned ch=0; ch<num_x_channels; ch++) {
+        bfp_complex_s32_init(&state.shared_state->X[ch], X[ch], 0, NUM_BINS, 0);
+    }
+    dsp_complex_fp X_fp[AEC_MAX_X_CHANNELS][NUM_BINS];
+    double sigma_XX_fp[AEC_MAX_X_CHANNELS][NUM_BINS], sum_X_energy_fp[AEC_MAX_X_CHANNELS];
+    //initialise floating point stuff. sigma_xx_fp
+    for(unsigned x_ch=0; x_ch<num_x_channels; x_ch++) {
+        for(unsigned bin=0; bin<NUM_BINS; bin++) {
+            sigma_XX_fp[x_ch][bin] = 0.0;
+        }
+    }
+    
+    int mapping[AEC_MAIN_FILTER_PHASES];
+    bfp_complex_s32_t X_fifo_check[AEC_MAX_X_CHANNELS][AEC_MAIN_FILTER_PHASES];
+    for(int ch=0; ch<num_x_channels; ch++) {
+        for(int i=0; i<num_phases; i++) {
+            mapping[i] = i;
+            X_fifo_check[ch][i] = state.shared_state->X_fifo[ch][i];
+        }
+    }
+    int max_diff = 0;
+    int max_diff_sum_X_energy = 0;
+    unsigned seed = 2;
+    for(unsigned iter=0; iter<(1<<12)/F; iter++) {
+        for(unsigned ch=0; ch<num_x_channels; ch++) {
+            bfp_complex_s32_t *X_ptr = &state.shared_state->X[ch];
+            X_ptr->exp = sext(att_random_int32(seed), 6);
+            X_ptr->hr = (att_random_uint32(seed) % 3);
+
+            for(unsigned bin=0; bin<NUM_BINS; bin++)
+            {
+                X_ptr->data[bin].re = att_random_int32(seed) >> X_ptr->hr;
+                X_ptr->data[bin].im = att_random_int32(seed) >> X_ptr->hr;
+                X_fp[ch][bin].re = att_int32_to_double(X_ptr->data[bin].re, X_ptr->exp);
+                X_fp[ch][bin].im = att_int32_to_double(X_ptr->data[bin].im, X_ptr->exp);
+            }
+        }
+
+        for(unsigned ch=0; ch<num_x_channels; ch++) {
+            aec_update_X_fifo_and_calc_sigmaXX(&state, ch);
+        }
+        aec_update_sigma_XX_fp(sigma_XX_fp, sum_X_energy_fp, X_fp, num_x_channels, state.shared_state->config_params.aec_core_conf.sigma_xx_shift); 
+        
+        for(unsigned ch=0; ch < num_x_channels; ch++){
+            //printf("%f, %f\n",sum_X_energy_fp[ch], att_int32_to_double(state.shared_state->sum_X_energy[ch].mant, state.shared_state->sum_X_energy[ch].exp));
+            unsigned diff = att_bfp_vector_int32((int32_t*)&state.shared_state->sum_X_energy[ch].mant, state.shared_state->sum_X_energy[ch].exp, (double*)&sum_X_energy_fp[ch], 0, 1);
+            max_diff_sum_X_energy = (diff > max_diff_sum_X_energy) ? diff : max_diff_sum_X_energy;
+            TEST_ASSERT_LESS_OR_EQUAL_UINT32_MESSAGE(1<<3, diff, "sum_X_energy diff too large");
+        }
+        //compare sigma_XX
+        for(unsigned ch=0; ch < num_x_channels; ch++){
+            bfp_s32_t *sigma_ptr = &state.shared_state->sigma_XX[ch];
+            for(unsigned i=0; i<NUM_BINS; i++) {
+                uint32_t expected = att_double_to_int32(sigma_XX_fp[ch][i], sigma_ptr->exp);
+                int diff = expected - sigma_ptr->data[i];
+                if(diff < 0) diff = -diff;
+                if(diff > max_diff) max_diff = diff;
+                double dut_float = att_int32_to_double(sigma_ptr->data[i], sigma_ptr->exp);
+                if(diff > (1 << 10)) {
+                    printf("Fail. Iter %d, ch %d, bin %d, ref %f, dut (%d, %d), %f\n",iter, ch, i, sigma_XX_fp[ch][i], sigma_ptr->data[i], sigma_ptr->exp, dut_float);
+                }
+                TEST_ASSERT_INT32_WITHIN_MESSAGE(1<<10, expected, sigma_ptr->data[i], "sigma_xx broke");
+            }
+        }
+        //check X_fifo update
+        update_mapping(mapping, num_phases);
+        for(unsigned ch=0; ch < num_x_channels; ch++){
+            bfp_complex_s32_t *X_ptr = &state.shared_state->X[ch];
+            TEST_ASSERT_EQUAL_INT32(state.shared_state->X_fifo[ch][0].exp, X_ptr->exp);
+            TEST_ASSERT_EQUAL_INT32(state.shared_state->X_fifo[ch][0].hr, X_ptr->hr);
+            TEST_ASSERT_EQUAL_INT32(state.shared_state->X_fifo[ch][0].length, X_ptr->length);
+            if(memcmp(state.shared_state->X_fifo[ch][0].data, X_ptr->data, X_ptr->length*sizeof(X_ptr->data[0])))
+            {
+                printf("X data mismatch\n");
+                assert(0);
+            }
+            for(unsigned ph=0; ph<num_phases; ph++) {
+                TEST_ASSERT_EQUAL_INT32_MESSAGE(state.shared_state->X_fifo[ch][ph].data, X_fifo_check[ch][mapping[ph]].data, "X_fifo data ptr mismatch");
+            }
+        }
+    }
+    printf("max_diff = %d\n",max_diff);
+    printf("max_diff_sum_X_energy = %d\n",max_diff_sum_X_energy);
+    }
+}
diff --git a/test/lib_aec/aec_unit_tests/src/test_update_td_ema_energy.xc b/test/lib_aec/aec_unit_tests/src/test_update_td_ema_energy.xc
new file mode 100644
index 000000000..4f4c9e56e
--- /dev/null
+++ b/test/lib_aec/aec_unit_tests/src/test_update_td_ema_energy.xc
@@ -0,0 +1,89 @@
+// Copyright 2018-2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <xs1.h>
+#include "aec_unit_tests.h"
+#include <stdio.h>
+#include <assert.h>
+extern "C"{
+    #include "aec_defines.h"
+    #include "aec_api.h"
+}
+
+void update_td_ema_energy_fp(double *ema, double *input, double length, double alpha) {
+    if(!length) return;
+    double new_sample=0.0;
+    for(int i=0; i<length; i++) {
+        new_sample += input[i]*input[i];
+    }
+    *ema = (*ema * alpha) + ((1-alpha)*new_sample);
+    //*output = new_sample;
+}
+
+#define TEST_LEN (AEC_PROC_FRAME_LENGTH + 1)
+void test_update_td_ema_energy() {
+    unsafe {
+    int32_t dut_mem[TEST_LEN];
+    bfp_s32_t dut;
+    bfp_s32_init(&dut, dut_mem, TEST_LEN, 0, 0);
+    float_s32_t dut_ema;
+    dut_ema.mant = 0;
+    dut_ema.exp = -1024;
+    double ref_ema = 0.0;
+
+    double ref[TEST_LEN];
+    
+    unsigned seed = 5683;
+    int max_diff = 0;
+    for(int iter=0; iter<(1<<14)/F; iter++) {
+        //input
+        dut.exp = sext(att_random_int32(seed), 6);
+        dut.hr = att_random_uint32(seed) % 4;
+        for(int i=0; i<TEST_LEN; i++) {
+            dut.data[i] = att_random_int32(seed) >> dut.hr;
+            ref[i] = att_int32_to_double(dut.data[i], dut.exp);
+        }
+
+        //start offset
+        unsigned start_offset = att_random_uint32(seed) % TEST_LEN;
+        unsigned leftover = TEST_LEN - start_offset;
+        //length
+        int length;
+        if(leftover >= 1) {
+            length = att_random_uint32(seed) % leftover;
+        }
+        else if(leftover == 1) {
+            length = 1;
+        }
+        else {
+            continue;
+        }
+
+        //alpha
+        fixed_s32_t alpha_q30;
+        alpha_q30 = att_random_uint32(seed) >> 1;
+        //alpha_q30 = 1063004405;
+        double alpha_fp = att_int32_to_double(alpha_q30, -30);
+
+        //printf("iter %d. start_offset %d, leftover %d, length %d alpha %f\n",iter, start_offset, leftover, length, alpha_fp);
+
+
+        update_td_ema_energy_fp(&ref_ema, &ref[start_offset], length, alpha_fp);
+        
+        //dut updates ema inplace
+        aec_config_params_t cfg;
+        cfg.aec_core_conf.ema_alpha_q30 = alpha_q30;
+        aec_calc_time_domain_ema_energy(&dut_ema, &dut, start_offset, length, &cfg); 
+
+        //printf("ref %f, dut %f\n",ref, att_int32_to_double(dut_ema.mant, dut_ema.exp));
+        int dut = dut_ema.mant;
+        int ref = att_double_to_int32(ref_ema, dut_ema.exp);
+        //printf("ref 0x%x, dut 0x%x\n", ref, dut);
+        int diff = ref - dut;
+        if(diff < 0) diff = -diff;
+        if(diff > max_diff) max_diff = diff;
+        TEST_ASSERT_INT32_WITHIN_MESSAGE(1<<10, ref, dut, "Output delta is too large");
+    }
+    printf("max_diff = %d\n",max_diff);
+    }
+}
+
diff --git a/test/lib_aec/aec_unit_tests/src/test_update_total_X_energy.xc b/test/lib_aec/aec_unit_tests/src/test_update_total_X_energy.xc
new file mode 100644
index 000000000..02cb4fa96
--- /dev/null
+++ b/test/lib_aec/aec_unit_tests/src/test_update_total_X_energy.xc
@@ -0,0 +1,230 @@
+// Copyright 2018-2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <xs1.h>
+#include "aec_unit_tests.h"
+#include <stdio.h>
+#include <assert.h>
+extern "C"{
+    #include "aec_defines.h"
+    #include "aec_api.h"
+}
+
+#define NUM_BINS ((AEC_PROC_FRAME_LENGTH/2) + 1)
+
+static void update_mapping(int *mapping, int num_phases)
+{
+    //mapping gives indexes into X_fifo from most recent phases to least recent phase
+    int last_phase = mapping[num_phases - 1];
+    //move phases one down
+    for(int i=num_phases-1; i>=1; i--) {
+        mapping[i] = mapping[i-1];
+    }
+    mapping[0] = last_phase;
+}
+
+void aec_calc_X_fifo_energy_fp(
+        double (*X_energy)[NUM_BINS],
+        double *max_X_energy,
+        dsp_complex_fp (*X)[NUM_BINS],
+        dsp_complex_fp (*X_fifo)[AEC_MAIN_FILTER_PHASES][NUM_BINS],
+        const int *mapping,
+        unsigned num_channels,
+        unsigned num_phases,
+        int recalc_bin)
+{
+    int last_ph = mapping[num_phases - 1];
+    for(unsigned ch=0; ch<num_channels; ch++) {
+        for(int i=0; i<NUM_BINS; i++) {
+            double temp = (X_fifo[ch][last_ph][i].re * X_fifo[ch][last_ph][i].re) + (X_fifo[ch][last_ph][i].im * X_fifo[ch][last_ph][i].im);
+            
+            X_energy[ch][i] -= temp; //subtract energy of phase rolling out
+            temp = (X[ch][i].re * X[ch][i].re) + (X[ch][i].im * X[ch][i].im);
+            X_energy[ch][i] += temp; //Add latest X data energy
+        }
+        //do full calculation for recalc_bin
+        //pick up newest num_phases -1 and the new X data
+        double sum = (X[ch][recalc_bin].re * X[ch][recalc_bin].re) + (X[ch][recalc_bin].im * X[ch][recalc_bin].im);
+        for(unsigned p=0; p<num_phases-1; p++) {
+            int ph = mapping[p];
+            sum += (X_fifo[ch][ph][recalc_bin].re * X_fifo[ch][ph][recalc_bin].re) + (X_fifo[ch][ph][recalc_bin].im * X_fifo[ch][ph][recalc_bin].im);
+        }
+        X_energy[ch][recalc_bin] = sum;
+        max_X_energy[ch] = X_energy[ch][0];
+        for(int i=1; i<NUM_BINS; i++) {
+            if(X_energy[ch][i] > max_X_energy[ch]) {max_X_energy[ch] = X_energy[ch][i];}
+        }
+    }
+}
+
+void update_X_fifo_fp(
+        dsp_complex_fp (*X_fifo)[AEC_MAIN_FILTER_PHASES][NUM_BINS],
+        int *mapping,
+        dsp_complex_fp (*X)[NUM_BINS],
+        unsigned num_channels,
+        unsigned num_phases
+        )
+{
+    update_mapping(mapping, num_phases);
+    //mapping[0] points to the phase that has rolled out of num_phases window and is now ready to be updated with new data thus becoming the most recent phase
+    //printf("newest phase %d\n",mapping[0]);
+    for(int ch=0; ch<num_channels; ch++) {
+        for(int i=0; i<NUM_BINS; i++) {
+            X_fifo[ch][mapping[0]][i].re = X[ch][i].re;
+            X_fifo[ch][mapping[0]][i].im = X[ch][i].im;
+        }
+    }
+    
+
+
+}
+
+void test_update_total_X_energy() {
+    unsafe {
+    unsigned num_y_channels = 1;
+    unsigned num_x_channels = 1;
+    unsigned main_filter_phases = AEC_MAIN_FILTER_PHASES - 1;
+    unsigned shadow_filter_phases = AEC_MAIN_FILTER_PHASES - 5;
+
+    aec_memory_pool_t aec_memory_pool;
+    aec_shadow_filt_memory_pool_t aec_shadow_memory_pool;
+    aec_state_t state, shadow_state;
+    aec_shared_state_t aec_shared_state;
+    aec_init(&state, &shadow_state, &aec_shared_state, (uint8_t*)&aec_memory_pool, (uint8_t*)&aec_shadow_memory_pool, num_y_channels, num_x_channels, main_filter_phases, shadow_filter_phases);
+    
+    unsigned X_energy_recalc_bin = 0;
+    complex_s32_t X[AEC_MAX_X_CHANNELS][NUM_BINS];
+    for(unsigned ch=0; ch<num_x_channels; ch++) {
+        bfp_complex_s32_init(&state.shared_state->X[ch], X[ch], 0, NUM_BINS, 0);
+    }
+
+    //Initialise floating point stuff. mapping, X_energy_fp and X_fifo_fp
+    int mapping[AEC_MAIN_FILTER_PHASES];
+    dsp_complex_fp X_fp[AEC_MAX_X_CHANNELS][NUM_BINS];
+    double X_energy_fp[AEC_MAX_X_CHANNELS][NUM_BINS];
+    double X_energy_shadow_fp[AEC_MAX_X_CHANNELS][NUM_BINS];
+    double max_X_energy_fp[AEC_MAX_X_CHANNELS], max_X_energy_shadow_fp[AEC_MAX_X_CHANNELS];
+    dsp_complex_fp X_fifo_fp[AEC_MAX_X_CHANNELS][AEC_MAIN_FILTER_PHASES][NUM_BINS];
+    for(unsigned ch=0; ch<num_x_channels; ch++) {
+        for(unsigned bin=0; bin<NUM_BINS; bin++) {
+            X_energy_fp[ch][bin] = 0.0;
+            X_energy_shadow_fp[ch][bin] = 0.0;
+        }
+        for(int p=0; p<state.num_phases; p++) {
+            for(int bin=0; bin<NUM_BINS; bin++) {
+                X_fifo_fp[ch][p][bin].re = 0.0;
+                X_fifo_fp[ch][p][bin].im = 0.0;
+            }
+            mapping[p] = p;
+        }
+    } 
+    
+    double max_diff_percentage_shadow = 0.0;
+    double max_diff_percentage = 0.0;
+    int max_diff = 0;
+    unsigned seed = 2;
+    for(unsigned iter=0; iter<(1<<12)/F; iter++) {
+        for(unsigned ch=0; ch<num_x_channels; ch++) {
+            bfp_complex_s32_t *X_ptr = &state.shared_state->X[ch];
+            X_ptr->exp = sext(att_random_int32(seed), 3) - 30;
+            X_ptr->hr = (att_random_uint32(seed) % 3);
+            
+            //Generate X
+            for(unsigned bin=0; bin<NUM_BINS; bin++)
+            {
+                X_ptr->data[bin].re = att_random_int32(seed) >> X_ptr->hr;
+                X_ptr->data[bin].im = att_random_int32(seed) >> X_ptr->hr;
+                X_fp[ch][bin].re = att_int32_to_double(X_ptr->data[bin].re, X_ptr->exp);
+                X_fp[ch][bin].im = att_int32_to_double(X_ptr->data[bin].im, X_ptr->exp);
+            }
+        }
+        
+        //Calculate X_energy
+        for(unsigned ch=0; ch<num_x_channels; ch++) {
+            aec_calc_X_fifo_energy(&state, ch, X_energy_recalc_bin);
+            aec_calc_X_fifo_energy(&shadow_state, ch, X_energy_recalc_bin);
+        }
+        aec_calc_X_fifo_energy_fp(X_energy_fp, &max_X_energy_fp[0], X_fp, X_fifo_fp, mapping, num_x_channels, state.num_phases, X_energy_recalc_bin); 
+        aec_calc_X_fifo_energy_fp(X_energy_shadow_fp, &max_X_energy_shadow_fp[0], X_fp, X_fifo_fp, mapping, num_x_channels, shadow_state.num_phases, X_energy_recalc_bin); 
+        //Update X_fifo
+        for(unsigned ch=0; ch<num_x_channels; ch++) {
+            aec_update_X_fifo_and_calc_sigmaXX(&state, ch);
+        }
+        update_X_fifo_fp(X_fifo_fp, mapping, X_fp, num_x_channels, state.num_phases);
+
+        aec_update_X_fifo_1d(&state);
+        aec_update_X_fifo_1d(&shadow_state);
+
+        //Check 1d fifo update
+        int count = 0;
+        for(int i=0; i<num_x_channels; i++) {
+            for(int j=0; j<state.num_phases; j++) {
+                TEST_ASSERT_EQUAL_INT32(state.X_fifo_1d[count].data, state.shared_state->X_fifo[i][j].data);
+                TEST_ASSERT_EQUAL_INT32(state.X_fifo_1d[count].exp, state.shared_state->X_fifo[i][j].exp);
+                TEST_ASSERT_EQUAL_INT32(state.X_fifo_1d[count].hr, state.shared_state->X_fifo[i][j].hr);
+                TEST_ASSERT_EQUAL_INT32(state.X_fifo_1d[count].length, state.shared_state->X_fifo[i][j].length);
+                count++;
+            }
+        }
+        //printf("iter %d. done memcmp\n", iter);
+
+        //compare X_energy
+        //printf("iter %d\n",iter);
+        for(unsigned ch=0; ch < num_x_channels; ch++){
+            bfp_s32_t *X_energy_ptr = &state.X_energy[ch];
+            bfp_s32_t *X_energy_shadow_ptr = &shadow_state.X_energy[ch];
+            for(unsigned i=0; i<NUM_BINS; i++) {
+                double ref_double = X_energy_fp[ch][i];
+                double dut_double = att_int32_to_double(X_energy_ptr->data[i], X_energy_ptr->exp);
+                double diff_double = ref_double - dut_double;
+                if(diff_double < 0.0) diff_double = -diff_double;
+                double diff_percentage = (diff_double/ref_double) * 100;
+                if(diff_percentage > max_diff_percentage) max_diff_percentage = diff_percentage;
+                if(diff_double > 0.0002*(ref_double < 0.0 ? -ref_double : ref_double) + pow(10, -8))
+                {
+                    printf("Main filter: iter %d. ch: %d, bin: %d, diff %f outside pass limits. ref %f, dut %f\n", iter, ch, i, diff_double, ref_double, dut_double);
+                    printf("Main filter: ch %d, bin %d: ref (%f), dut (0x%x, %d)\n",ch, i, ref_double, X_energy_ptr->data[i], X_energy_ptr->exp);                    
+                    assert(0);
+                }
+                ref_double = X_energy_shadow_fp[ch][i];
+                dut_double = att_int32_to_double(X_energy_shadow_ptr->data[i], X_energy_shadow_ptr->exp);
+                diff_double = ref_double - dut_double;
+                if(diff_double < 0.0) diff_double = -diff_double;
+                diff_percentage = (diff_double/ref_double) * 100;
+                if(diff_percentage > max_diff_percentage_shadow) max_diff_percentage_shadow = diff_percentage;
+                if(diff_double > 0.002*(ref_double < 0.0 ? -ref_double : ref_double) + pow(10, -8))
+                {
+                    printf("Shadow filter: iter %d, ch: %d, bin: %d, diff %f outside pass limits. ref %f, dut %f\n", iter, ch, i, diff_double, ref_double, dut_double);
+                    printf("Shadow filter: ch %d, bin %d: ref (%f), dut (0x%x, %d)\n",ch, i, ref_double, X_energy_shadow_ptr->data[i], X_energy_shadow_ptr->exp);                    
+                    assert(0);
+                }
+            }
+            //max_X_energy
+            double ref_double = max_X_energy_fp[ch];
+            double dut_double = att_int32_to_double(state.max_X_energy[ch].mant, state.max_X_energy[ch].exp);
+            double diff_double = ref_double - dut_double;
+            if(diff_double < 0.0) {diff_double = -diff_double;}
+            if(diff_double > 0.0002*(ref_double < 0.0 ? -ref_double : ref_double) + pow(10, -8))
+            {
+                printf("Main filter: max_X_energy, iter %d. ch: %d, diff %f outside pass limits. ref %f, dut %f\n", iter, ch, diff_double, ref_double, dut_double);
+                assert(0);
+            }
+
+            ref_double = max_X_energy_shadow_fp[ch];
+            dut_double = att_int32_to_double(shadow_state.max_X_energy[ch].mant, shadow_state.max_X_energy[ch].exp);
+            diff_double = ref_double - dut_double;
+            if(diff_double < 0.0) {diff_double = -diff_double;}
+            if(diff_double > 0.002*(ref_double < 0.0 ? -ref_double : ref_double) + pow(10, -8))
+            {
+                printf("Shadow filter: max_X_energy, iter %d. ch: %d, diff %f outside pass limits. ref %f, dut %f\n", iter, ch, diff_double, ref_double, dut_double);
+                assert(0);
+            }
+        }
+        X_energy_recalc_bin += 1;
+        if(X_energy_recalc_bin == (AEC_PROC_FRAME_LENGTH/2) + 1) {
+            X_energy_recalc_bin = 0;
+        }        
+    }
+    printf("max_diff_percentage = %f\n",max_diff_percentage);
+    printf("max_diff_percentage_shadow = %f\n",max_diff_percentage_shadow);
+    }
+}
diff --git a/test/lib_aec/shared_src/python/generate_task_distribution_scheme.py b/test/lib_aec/shared_src/python/generate_task_distribution_scheme.py
new file mode 100644
index 000000000..a1de657cb
--- /dev/null
+++ b/test/lib_aec/shared_src/python/generate_task_distribution_scheme.py
@@ -0,0 +1,155 @@
+# Copyright 2021 XMOS LIMITED.
+# This Software is subject to the terms of the XMOS Public Licence: Version 1.
+import argparse
+import numpy as np
+import os.path
+
+def parse_arguments():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--config", nargs='?', default='2 2 2 10 5', help="Build configuration in '<threads> <num_y_channels> <num_x_channels> <num_main_filter_phases> <num_shadow_filter_phases>' format. Default '2 2 2 10 5'")
+    parser.add_argument("--out-dir", nargs='?', default='.', help="output directory to generate files in. Default .")
+    args = parser.parse_args()
+    return args
+
+#Schedule num_tasks tasks, each having num_channels channels over num_threads threads
+def gen_task_distribution(num_threads, num_tasks, num_channels):
+    #Scheduling logic - T0C0, T1C0, T2C0, T0C1, T1C1, T2C2 and so on
+    if num_channels:
+        remaining_jobs = num_tasks * num_channels
+    else:
+        remaining_jobs = num_tasks;
+    pas = 0
+    tdist = np.zeros((num_threads,1), dtype=np.int32)
+    thread = 0
+
+    task_index = 0
+    ch_index = 0
+    while(remaining_jobs):
+        #bit 16:9 task_index, bit8:1 ch_index, bit0, is_active 
+        tdist[thread, pas] = (1 | (((ch_index & 0xff) << 1) | ((task_index & 0xff) << 9)))
+        task_index = task_index + 1
+        if(task_index == num_tasks):
+            task_index = 0
+            if num_channels:
+                ch_index = (ch_index + 1) % num_channels
+
+        thread += 1
+        remaining_jobs -= 1
+        if(thread == num_threads and remaining_jobs): #add another pass
+            thread = 0
+            pas += 1
+            append_sch = np.zeros((num_threads, 1), dtype=np.int32)
+            tdist = np.hstack((tdist, append_sch))
+    pas += 1
+    return tdist
+
+def print_task_distribution(tdist, num_tasks, num_channels, fp_tdist_h, fp_c, task_distribution_t_str):
+    num_threads = tdist.shape[0]
+    num_passes = tdist.shape[-1]
+    threads_str = "AEC_THREAD_COUNT"
+    if num_channels:
+        passes_str = f"AEC_{num_tasks}_TASKS_AND_CHANNELS_PASSES"
+        struct_typedef = "par_tasks_and_channels_t"
+        task_distribution_t_str = task_distribution_t_str + f"{struct_typedef} par_{num_tasks}_tasks_and_channels[{threads_str}][{passes_str}];\n"
+    else:
+        passes_str = f"AEC_{num_tasks}_TASKS_PASSES"
+        struct_typedef = "par_tasks_t"
+        task_distribution_t_str = task_distribution_t_str + f"{struct_typedef} par_{num_tasks}_tasks[{threads_str}][{passes_str}];\n"
+
+    fp_tdist_h.write(f'#define {passes_str}   ({num_passes})' + '\n')
+    fp_c.write("{\n")
+    for t in range(num_threads):
+        thread_str = "  {" 
+        for p in range(num_passes):
+            task = (tdist[t][p] >> 9);
+            ch = (tdist[t][p] >> 1) & 0xff;
+            active = tdist[t][p] & 1;
+            if num_channels:
+                ph_str = "{" + f"{task}, {ch}, {active}" "}"
+            else:
+                ph_str = "{" + f"{task}, {active}" "}"
+            thread_str = thread_str + ph_str + ','
+        thread_str += "},"
+        fp_c.write(thread_str+'\n')
+    fp_c.write("},\n")
+    return task_distribution_t_str
+
+
+def create_task_distribution():
+    args = parse_arguments()
+    print('config = ',args.config)
+    print('out-dir = ',args.out_dir)
+    conf = args.config.split(' ')
+    threads = conf[0]
+    max_y_channels = conf[1]
+    max_x_channels = conf[2]
+    main_filter_phases = conf[3]
+    shadow_filter_phases = conf[4]
+    print(f"AEC task distribution generation configured for {threads} threads, max {max_y_channels} y channels, max {max_x_channels} x channels, {main_filter_phases} main_filter_phases, {shadow_filter_phases} shadow_filter_phases")
+    autogen_message = '/* Do not edit, autogenerated */ '
+    tdist_h_file_name = os.path.join(args.out_dir, "aec_task_distribution.h")
+    cfg_h_file_name = os.path.join(args.out_dir, "aec_config.h")
+    c_file_name = os.path.join(args.out_dir, "aec_task_distribution.c")
+
+    fp_tdist_h = open(tdist_h_file_name, 'w')
+    fp_cfg_h = open(cfg_h_file_name, 'w')
+    fp_c = open(c_file_name, 'w')
+    fp_tdist_h.write( '#ifndef aec_task_distribution_h_\n')
+    fp_tdist_h.write( '#define aec_task_distribution_h_\n')
+
+    fp_cfg_h.write( '#ifndef aec_config_h_\n')
+    fp_cfg_h.write( '#define aec_config_h_\n')
+    # Distribute multiple tasks across multiple channels on different cores 
+    par_tasks_and_channels_t = "typedef struct {\n" + "    int task;\n" + "    int channel;\n" + "    int is_active;\n" + "}par_tasks_and_channels_t;\n\n"  
+    par_tasks_t = "typedef struct {\n" + "    int task;\n" + "    int is_active;\n" + "}par_tasks_t;\n\n"  
+    task_distribution_t_str = par_tasks_and_channels_t + par_tasks_t + "typedef struct {\n"
+
+    num_channels = max(int(max_y_channels), int(max_x_channels))
+    num_threads = int(threads)
+    num_main_filter_phases = int(main_filter_phases)
+    num_shadow_filter_phases = int(shadow_filter_phases)
+    fp_tdist_h.write(autogen_message + '\n')    
+    fp_cfg_h.write(autogen_message + '\n')    
+    threads_str = "AEC_THREAD_COUNT"
+    threads_define = f"#define {threads_str}   ({num_threads})"
+    fp_tdist_h.write(threads_define + '\n')
+    fp_cfg_h.write(f"#define AEC_MAX_Y_CHANNELS   ({max_y_channels})\n")
+    fp_cfg_h.write(f"#define AEC_MAX_X_CHANNELS   ({max_x_channels})\n")
+    fp_cfg_h.write(f"#define AEC_MAIN_FILTER_PHASES    ({main_filter_phases})\n")
+    fp_cfg_h.write(f"#define AEC_SHADOW_FILTER_PHASES    ({shadow_filter_phases})\n")
+
+    fp_c.write( autogen_message + '\n')
+    fp_c.write( f'#include "{tdist_h_file_name}"\n')
+    fp_c.write( 'task_distribution_t tdist = {\n')
+    #Schedule 3 tasks, num_channels channels over num_threads threads
+    tdist = gen_task_distribution(num_threads, 3, num_channels)
+    task_distribution_t_str = print_task_distribution(tdist, 3, num_channels, fp_tdist_h, fp_c, task_distribution_t_str);
+
+    #Schedule 2 tasks, num_channels channels over num_threads threads
+    tdist = gen_task_distribution(num_threads, 2, num_channels)
+    task_distribution_t_str = print_task_distribution(tdist, 2, num_channels, fp_tdist_h, fp_c, task_distribution_t_str)
+
+    #Schedule 1 tasks, num_channels channels over num_threads threads
+    tdist = gen_task_distribution(num_threads, 1, num_channels)
+    task_distribution_t_str = print_task_distribution(tdist, 1, num_channels, fp_tdist_h, fp_c,  task_distribution_t_str)
+
+    #Schedule multiple tasks on different cores 
+    #Schedule 2 tasks
+    tdist = gen_task_distribution(num_threads, 2, 0)
+    task_distribution_t_str = print_task_distribution(tdist, 2, 0, fp_tdist_h, fp_c,  task_distribution_t_str)
+
+    #Schedule 3 tasks
+    tdist = gen_task_distribution(num_threads, 3, 0)
+    task_distribution_t_str = print_task_distribution(tdist, 3, 0, fp_tdist_h, fp_c,  task_distribution_t_str)
+
+    task_distribution_t_str += '}task_distribution_t;\n'
+    fp_tdist_h.write(task_distribution_t_str+'\n')
+    fp_tdist_h.write( '#endif /* aec_task_distribution_h_ */')
+    fp_cfg_h.write( '#endif /* aec_config_h_ */')
+    fp_c.write( '};\n')
+    fp_tdist_h.close()
+    fp_cfg_h.close()
+    fp_c.close()
+
+if __name__ == "__main__":
+    create_task_distribution()
diff --git a/test/lib_aec/test_aec_enhancements/CMakeLists.txt b/test/lib_aec/test_aec_enhancements/CMakeLists.txt
new file mode 100644
index 000000000..056bb4e6f
--- /dev/null
+++ b/test/lib_aec/test_aec_enhancements/CMakeLists.txt
@@ -0,0 +1,101 @@
+## App name
+set( APP_NAME  test_aec_enhancements )
+
+# Auto-generate task distribution scheme and top level config files
+
+if( NOT ${Python3_FOUND} )
+  message(FATAL_ERROR "Python3 not found for running . ") 
+endif()
+
+set( GEN_SCHEDULE_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/../shared_src/python/generate_task_distribution_scheme.py )
+set( AUTOGEN_DIR ${CMAKE_CURRENT_BINARY_DIR}/src.autogen )
+set( AUTOGEN_SOURCES ${AUTOGEN_DIR}/aec_task_distribution.c )
+set( AUTOGEN_INCLUDES ${AUTOGEN_DIR}/aec_task_distribution.h ${AUTOGEN_DIR}/aec_config.h)
+
+message(STATUS "${APP_NAME} aec build config:  ${TEST_AEC_ENHANCEMENTS_BUILD_CONFIG}" )
+set( GEN_SCHEDULE_SCRIPT_BYPRODUCTS ${AUTOGEN_SOURCES} ${AUTOGEN_INCLUDES} )
+
+unset(GEN_SCHEDULE_SCRIPT_ARGS) 
+list(APPEND GEN_SCHEDULE_SCRIPT_ARGS --out-dir ${AUTOGEN_DIR})
+list(APPEND GEN_SCHEDULE_SCRIPT_ARGS --config ${TEST_AEC_ENHANCEMENTS_BUILD_CONFIG})
+
+file(MAKE_DIRECTORY ${AUTOGEN_DIR})
+
+add_custom_command(
+OUTPUT ${GEN_SCHEDULE_SCRIPT_BYPRODUCTS}
+COMMAND ${Python3_EXECUTABLE} ${GEN_SCHEDULE_SCRIPT} ${GEN_SCHEDULE_SCRIPT_ARGS}
+COMMENT "Generating AEC task distribution and top level config" )
+
+## Depends on libraries
+list( APPEND  DEP_LIBS_XCORE  ""  )
+
+list( APPEND  DEP_LIBS        
+    lib_xs3_math
+    lib_aec 
+    ${DEP_LIBS_${CMAKE_SYSTEM_NAME}}
+)
+
+list( APPEND  DEP_LIBS ${DEP_LIBS_${CMAKE_SYSTEM_NAME}} )
+
+## Sources
+file( GLOB SOURCES_C  ${CMAKE_CURRENT_SOURCE_DIR}/../test_wav_aec/src/*.c )
+file( GLOB SOURCES_XC  ${CMAKE_CURRENT_SOURCE_DIR}/../test_wav_aec/src/*.xc )
+if ( XCORE )
+    file( GLOB SOURCES_AEC_PROCESS_FRAME ${SHARED_SRC_PATH}/aec/*.c )
+else()
+    ## Only 1 thread process_frame() builds for x86
+    file( GLOB SOURCES_AEC_PROCESS_FRAME ${SHARED_SRC_PATH}/aec/aec_process_frame_1thread.c )
+endif()
+
+file( GLOB_RECURSE XSCOPE_FILEIO_SOURCES  ${XSCOPE_FILEIO_PATH}/src/*.c )
+file( GLOB_RECURSE AUDIO_TEST_TOOLS_SOURCES ${DEPS_ROOT}/audio_test_tools/audio_test_tools/src/burners.S )
+file( GLOB SOURCES_FILE_UTILS ${SHARED_SRC_PATH}/file_utils/*.c ) 
+
+list( APPEND  SOURCES ${SOURCES_C} ${SOURCES_AEC_PROCESS_FRAME} ${SOURCES_XC} ${XSCOPE_FILEIO_SOURCES} ${SOURCES_FILE_UTILS} ${AUDIO_TEST_TOOLS_SOURCES} ${AUTOGEN_SOURCES} )
+list( APPEND  INCLUDES src ${SHARED_SRC_PATH}/aec ${SHARED_SRC_PATH}/file_utils ${AUTOGEN_DIR} ${XSCOPE_FILEIO_PATH} ${XSCOPE_FILEIO_PATH}/api )
+
+# set( XSCOPE_CONFIG config.xscope )
+get_filename_component(XSCOPE_CONFIG config.xscope ABSOLUTE)
+
+## Compile flags
+unset(COMPILE_FLAGS)
+unset(COMPILE_FLAGS_XCORE)
+
+list(APPEND   COMPILE_FLAGS_XCORE  -DTEST_WAV_XSCOPE=1 )
+
+
+##Linker flags
+unset(LINKER_FLAGS)
+list( APPEND  LINKER_FLAGS  "" )
+
+unset(LINKER_FLAGS_XCORE)
+list( APPEND  LINKER_FLAGS_XCORE  "-target=${XCORE_TARGET}"     )
+list( APPEND  LINKER_FLAGS_XCORE  "-report"                     )
+list( APPEND  LINKER_FLAGS_XCORE  "${XSCOPE_CONFIG}"            )
+
+
+list( APPEND  LINKER_FLAGS ${LINKER_FLAGS_${CMAKE_SYSTEM_NAME}} )
+list( APPEND  COMPILE_FLAGS ${COMPILE_FLAGS_${CMAKE_SYSTEM_NAME}} ${LINKER_FLAGS_${CMAKE_SYSTEM_NAME}} )
+
+#########
+## executable output directory
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
+
+add_executable( ${APP_NAME} ${SOURCES} )
+
+target_include_directories( ${APP_NAME} PRIVATE ${INCLUDES} )
+
+target_link_libraries( ${APP_NAME} ${DEP_LIBS})
+
+target_compile_options( ${APP_NAME} PRIVATE ${COMPILE_FLAGS} )
+
+#(because otherwise the set_target_properties command fails)
+string(REPLACE ";" " " LINKER_FLAGS_STR "${LINKER_FLAGS}")
+set_target_properties( ${APP_NAME} PROPERTIES LINK_FLAGS "${LINKER_FLAGS_STR}" )
+
+if ( XCORE )
+    set_target_properties( ${APP_NAME} PROPERTIES
+      SUFFIX ".xe"
+      LINK_DEPENDS  ${XSCOPE_CONFIG}
+      )
+endif()
diff --git a/test/lib_aec/test_aec_enhancements/__init__.py b/test/lib_aec/test_aec_enhancements/__init__.py
new file mode 100644
index 000000000..36fd27125
--- /dev/null
+++ b/test/lib_aec/test_aec_enhancements/__init__.py
@@ -0,0 +1,2 @@
+# Copyright 2021 XMOS LIMITED.
+# This Software is subject to the terms of the XMOS Public Licence: Version 1.
diff --git a/test/lib_aec/test_aec_enhancements/build_xe.py b/test/lib_aec/test_aec_enhancements/build_xe.py
new file mode 100644
index 000000000..beb734151
--- /dev/null
+++ b/test/lib_aec/test_aec_enhancements/build_xe.py
@@ -0,0 +1,14 @@
+# Copyright 2021 XMOS LIMITED.
+# This Software is subject to the terms of the XMOS Public Licence: Version 1.
+import configparser
+import subprocess
+import glob
+
+parser = configparser.ConfigParser()
+parser.read("parameters.cfg")
+
+build_config = f"{str(parser.get('XCBuild', 'threads'))} {str(parser.get('Config', 'y_channel_count'))} {str(parser.get('Config', 'x_channel_count'))} {str(parser.get('Config', 'main_filter_phases'))} {str(parser.get('Config', 'shadow_filter_phases'))}"
+
+cmd = f"waf configure clean build --aec-config".split(' ')
+cmd.append(build_config)
+subprocess.run(cmd, check=True)
diff --git a/test/lib_aec/test_aec_enhancements/config.xscope b/test/lib_aec/test_aec_enhancements/config.xscope
new file mode 100644
index 000000000..0d3b65e4c
--- /dev/null
+++ b/test/lib_aec/test_aec_enhancements/config.xscope
@@ -0,0 +1,10 @@
+<xSCOPEconfig ioMode="basic" enabled="true">
+  <Probe name="open_file" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="read_bytes" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="write_setup" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="write_bytes" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="seek" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="tell" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="host_quit" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+</xSCOPEconfig>
+
diff --git a/test/lib_aec/test_aec_enhancements/make_dirs.sh b/test/lib_aec/test_aec_enhancements/make_dirs.sh
new file mode 100755
index 000000000..d4870a68b
--- /dev/null
+++ b/test/lib_aec/test_aec_enhancements/make_dirs.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+# Include test utils
+. utils.sh
+
+test_dirs="$(read_config in_dir)"
+test_dirs="$test_dirs $(read_config out_dir)"
+test_dirs="$test_dirs $(read_config filter_dir)"
+
+for dir in $test_dirs; do
+    echo $dir
+    mkdir -p $dir
+    if grep -q "clean" <<< $1; then
+        rm -r $dir/*
+    fi
+done
+
diff --git a/test/lib_aec/test_aec_enhancements/parameters.cfg b/test/lib_aec/test_aec_enhancements/parameters.cfg
new file mode 100644
index 000000000..0a02ba4ae
--- /dev/null
+++ b/test/lib_aec/test_aec_enhancements/parameters.cfg
@@ -0,0 +1,16 @@
+[Binaries]
+xe_path = ../../../build/test/lib_aec/test_aec_enhancements/
+
+[Config]
+x_channel_count = 2
+y_channel_count = 2
+main_filter_phases = 10
+shadow_filter_phases = 5
+
+[XCBuild]
+threads = 2
+
+[Folders]
+in_dir = audio_in
+out_dir = audio_out
+filter_dir = filter_out
diff --git a/test/lib_aec/test_aec_enhancements/run_xc.py b/test/lib_aec/test_aec_enhancements/run_xc.py
new file mode 100644
index 000000000..c6291282f
--- /dev/null
+++ b/test/lib_aec/test_aec_enhancements/run_xc.py
@@ -0,0 +1,100 @@
+# Copyright 2021 XMOS LIMITED.
+# This Software is subject to the terms of the XMOS Public Licence: Version 1.
+import numpy as np
+import os
+import tempfile
+import shutil
+import subprocess
+import soundfile as sf
+import xscope_fileio
+import xtagctl
+import io
+from contextlib import redirect_stdout
+import re
+import scipy.io.wavfile
+import configparser
+import glob
+
+parser = configparser.ConfigParser()
+parser.read("parameters.cfg")
+aec_xe =os.path.abspath(glob.glob(f'{parser.get("Binaries", "xe_path")}/bin/*.xe')[0])
+print(os.path.abspath(aec_xe))
+in_dir = parser.get("Folders", "in_dir")
+out_dir = parser.get("Folders", "out_dir")
+
+adapt_mode_dict = {'AEC_ADAPTION_AUTO':0, 'AEC_ADAPTION_FORCE_ON':1, 'AEC_ADAPTION_FORCE_OFF': 2}
+
+dut_H_hat_file = "H_hat.bin"
+runtime_args_file = "args.bin"
+AEC_MAX_Y_CHANNELS = int(parser.get("Config", "y_channel_count"))
+AEC_MAX_X_CHANNELS = int(parser.get("Config", "x_channel_count"))
+
+def run_aec_xc(y_data, x_data, testname, adapt=-1, h_hat_dump=None, adapt_mode=adapt_mode_dict['AEC_ADAPTION_AUTO'], num_y_channels=AEC_MAX_Y_CHANNELS, num_x_channels=AEC_MAX_X_CHANNELS):
+    input_file = f"{in_dir}/input_{testname}.wav"
+    output_file = f"{out_dir}/output_{testname}.wav"
+    #input wav file always has (AEC_MAX_Y_CHANNELS + AEC_MAX_X_CHANNELS) channels, as per the build time aec configuration. Changing AEC config at runtime shouldn't affect input packing
+    tmp_folder = tempfile.mkdtemp()
+    if(y_data.ndim == 1):
+        y_data = np.atleast_2d(y_data).T
+    if(x_data.ndim == 1):
+        x_data = np.atleast_2d(x_data).T
+    
+    y_chans = y_data.shape[-1]
+    x_chans = x_data.shape[-1]
+
+    #All input wav files need to have AEC_MAX_Y_CHANNELS y channels and AEC_MAX_X_CHANNELS x channels since this is the configuration AEC is built with
+    extra_y_chans = AEC_MAX_Y_CHANNELS - y_chans
+    extra_x_chans = AEC_MAX_X_CHANNELS - x_chans
+    #duplicate last column to get required no. of channels
+    if extra_y_chans:
+        extra_y = np.tile(y_data[:,[-1]], extra_y_chans)
+        y_data = np.hstack((y_data, extra_y))
+    if extra_x_chans:
+        extra_x = np.tile(x_data[:,[-1]], extra_x_chans)
+        x_data = np.hstack((x_data, extra_x))
+    input_data = np.hstack((y_data, x_data))
+    scipy.io.wavfile.write(input_file, 16000, input_data)
+ 
+    #write runtime arguments into args.bin
+    with open(runtime_args_file, "wb") as fargs:
+        fargs.write(f"y_channels {num_y_channels}\n".encode('utf-8'))
+        fargs.write(f"x_channels {num_x_channels}\n".encode('utf-8'))
+        fargs.write(f"stop_adapting {adapt}\n".encode('utf-8'))
+        fargs.write(f"adaption_mode {adapt_mode}\n".encode('utf-8'))
+    
+    shutil.copy2(input_file, os.path.join(tmp_folder, "input.wav"))
+    shutil.copy2(runtime_args_file, os.path.join(tmp_folder, runtime_args_file))
+
+    prev_path = os.getcwd()
+    os.chdir(tmp_folder)    
+    
+    with xtagctl.acquire("XCORE-AI-EXPLORER") as adapter_id:
+        xscope_fileio.run_on_target(adapter_id, aec_xe)
+
+    os.chdir(prev_path)    
+    shutil.copy2(os.path.join(tmp_folder, "output.wav"), output_file)
+    if h_hat_dump is not None:
+        shutil.copy2(os.path.join(tmp_folder, dut_H_hat_file), h_hat_dump)
+    
+    shutil.rmtree(tmp_folder, ignore_errors=True)    
+    return input_file, output_file
+
+
+def get_h_hat(filename, aec):
+    """Gets H_hat from XC H_hat dump
+
+    WARNING: This could be dangerous, the filename argument is parsed as
+    python when aec = 'xc'.
+    """
+    H_hat = None
+
+    if aec == 'xc':
+        shutil.copy2(filename, "temp.py")
+        from temp import H_hat
+    else:
+        with open(filename, "rb") as f:
+            H_hat = np.load(f)
+    assert H_hat is not None
+    return H_hat
+
+
diff --git a/test/lib_aec/test_aec_enhancements/test_dropped_samples.py b/test/lib_aec/test_aec_enhancements/test_dropped_samples.py
new file mode 100644
index 000000000..2b288dc71
--- /dev/null
+++ b/test/lib_aec/test_aec_enhancements/test_dropped_samples.py
@@ -0,0 +1,128 @@
+# Copyright 2021 XMOS LIMITED.
+# This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+'''
+The purpose of these test is to check reconvergence behaviour after samples have been dropped from the reference audio
+A number of samples are removed from the reference causing the filter delay to become incorrect, requiring readaptation.
+'''
+
+import os
+from pathlib import Path
+
+hydra_audio_path = os.environ.get('hydra_audio_PATH', '~/hydra_audio')
+
+import numpy as np
+import scipy.signal as spsig
+import scipy.io.wavfile
+
+import soundfile as sf
+import audio_wav_utils as awu
+
+from common_utils import json_to_dict
+import wav_test_functions as wtf
+import run_xc
+
+import pytest
+
+@pytest.mark.parametrize("drop_amount", [1, 3, 10])
+@pytest.mark.parametrize("room", ["lab", "board"])
+
+def test_dropped_samples(drop_amount, room):
+    ''' test_dropped_samples - run a  mono pink noise convolved with a modelled impulse response
+    remove drop_amount samples after 10 seconds, and check for the length of time taken for 10 dB reconvergence
+        
+    pass/fail: check it takes less than 0.75 seconds for 10 dB reconvergence after the dropped samples'''
+
+    np.random.seed(42)
+    fs = 16000
+    N = fs * 20
+    testname = f"{(Path(__file__).stem)[5:]}_{drop_amount}_{room}"
+
+    y_channel_count = 1
+    x_channel_count = 1
+
+    phases = 10  # aec_parameters['phases']
+    frame_advance = 240  # aec_parameters['frame_advance']
+    fN = phases * frame_advance
+
+    # load impulse response
+    if room == "lab":
+        filename1 = "000_LAB_XTS_DUTL_fs16kHz"
+    elif room == "board":
+        filename1 = "006_BOARD_XTS_DUTL_fs16kHz"
+    else:
+        assert False
+
+    filepath = Path(hydra_audio_path, "acoustic_team_test_audio", "impulse", filename1 + ".npy")
+    h1 = np.load(filepath)
+    hN = h1.shape[0]
+    h = h1[:,0]
+
+    filename = "003_rick_mono"
+    filepath = Path(hydra_audio_path, "acoustic_team_test_audio", "playback_audio", filename + ".wav")
+    u, fs3 = sf.read(filepath)
+    u = u[:,0]
+    assert fs==fs3
+  
+    d = spsig.convolve(u, h, 'full')[:N]
+    if fN > hN:
+        d = d[hN-1:hN-fN]
+    else:
+        d = d[hN-1:N]
+
+    # ideal results
+    f_ideal = h[:fN]
+    y_ideal = spsig.convolve(f_ideal, u, 'full')[hN-1:N]
+    _, in_leq = wtf.leq_smooth(y_ideal, fs, 0.05)
+
+    # set the dropped samples
+    decim_ratio = 3
+    fs2 = decim_ratio*fs    
+    drop_start = 10*fs2
+    drop_stop = 15*fs2
+    drop_rate = int(5*fs2)
+    
+    # upsample, set dropped to nan, remove and downsample
+    u2 = spsig.resample_poly(u[hN-1:], decim_ratio, 1)
+    for n in range(drop_amount):
+        u2[(drop_start+n):(drop_stop+n):drop_rate] = np.nan
+    u2 = u2[~np.isnan(u2)]
+    u = spsig.resample_poly(u2, 1, decim_ratio)
+
+    # run AEC
+    #XC expects 4ch input
+    in_data = np.stack((d, u[:N-hN+1]), axis=0) 
+    in_data_32bit = (np.asarray(in_data * np.iinfo(np.int32).max, dtype=np.int32)).T
+    nFrames = (N-hN-1) // frame_advance -1
+
+    #run XC
+    print("Run AEC XC")
+    dut_input_file, dut_output_file = run_xc.run_aec_xc(in_data_32bit[:,:y_channel_count], in_data_32bit[:,y_channel_count:], testname, adapt_mode=run_xc.adapt_mode_dict['AEC_ADAPTION_AUTO'], num_y_channels=y_channel_count, num_x_channels=x_channel_count)
+    rate, output_wav_file = scipy.io.wavfile.read(dut_output_file, 'r')
+    error = output_wav_file 
+    _, leq_error = wtf.leq_smooth(error[:, 0], fs, 0.05)
+    time = np.arange(len(leq_error))*0.05
+    # find max deconvergence point
+    drop_idx = np.searchsorted(time, drop_start/fs2)
+    drop_idx = np.argmax(leq_error[drop_idx-10:drop_idx+10]) + (drop_idx-10)
+
+    # calculate reconvergence time
+    reconv_time = wtf.calc_attenuation_time(time[drop_idx:], leq_error[drop_idx:], -10) - time[drop_idx]
+    print("XC: reconv_time: %.2f seconds"%reconv_time)
+    assert reconv_time < 3, "XC reconv_time error"
+
+    # plot
+    if __name__ == "__main__":
+        import matplotlib.pyplot as plt
+        plt.figure()
+        plt.title("%s input signal, drop %d samples @ %d kHz at %.2f seconds"%(filename, drop_amount, fs2/1000, drop_start/fs2 ))
+        plt.plot(time, leq_error - leq_error[0])
+        plt.xlabel("Time (s)")
+        plt.ylabel("Attenuation (dB)")
+        plt.ylim([-40, 10])
+        plt.xlim([0, time[-1]])
+        #plt.show()
+
+
+if __name__ == "__main__":
+    test_dropped_samples(10, "lab")
diff --git a/test/lib_aec/test_aec_enhancements/test_impulse_response_change.py b/test/lib_aec/test_aec_enhancements/test_impulse_response_change.py
new file mode 100644
index 000000000..278065577
--- /dev/null
+++ b/test/lib_aec/test_aec_enhancements/test_impulse_response_change.py
@@ -0,0 +1,146 @@
+# Copyright 2021 XMOS LIMITED.
+# This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+''' impulse response change checker
+this test is intended to check the ability of the
+shadow filter to detect a change in the impulse response 
+and speed up filter adaptation accordingly
+
+some mono white noise convolved with a modelled impulse response is run
+a change in the impulse response happens midway through the signal
+the convergence time and total attenuation is monitored after change
+        
+pass/fail: check if the convergence rate is at least 7 dB/s
+
+'''
+import os
+from pathlib import Path
+
+hydra_audio_path = os.environ.get('hydra_audio_PATH', '~/hydra_audio')
+
+import numpy as np
+import scipy.io.wavfile
+import scipy.signal as spsig
+import soundfile as sf
+import audio_wav_utils as awu
+
+from common_utils import json_to_dict
+import wav_test_functions as wtf
+import run_xc
+
+import pytest
+
+def conv_impulse_array(x, h, fade_len):
+    n_impulses = len(h)
+    N = len(x)
+
+    y = [0]*n_impulses
+    for n in range(n_impulses):
+        y[n] = spsig.convolve(x, h[n], 'full')[:N]
+
+    y_len = len(y[0])
+    y_out = np.zeros_like(y[0])
+    sec_l = y_len//n_impulses
+    for n in range(n_impulses):
+        if n > 0:
+            y[n][:n*sec_l - fade_len//2] = 0.0
+            y[n][n*sec_l - fade_len//2:n*sec_l + fade_len//2] *= np.arange(fade_len)/fade_len
+        
+        if n < n_impulses - 1:
+            y[n][(n+1)*sec_l + fade_len//2:] = 0.0
+            y[n][(n+1)*sec_l - fade_len//2:(n+1)*sec_l + fade_len//2] *= np.flip(np.arange(fade_len)/fade_len)
+
+        y_out += y[n]
+    
+    return y_out, y
+   
+
+@pytest.mark.parametrize("adapt_config", ['AEC_ADAPTION_FORCE_ON', 'AEC_ADAPTION_AUTO'])
+def test_impulse_response_change(adapt_config):
+
+    fs = 16000
+    N = fs * 20
+    np.random.seed(500)  
+    testname = f"{(Path(__file__).stem)[5:]}_{adapt_config}"
+
+    y_channel_count = 1
+    x_channel_count = 1
+
+    phases = 10  # aec_parameters['phases']
+    frame_advance = 240  # aec_parameters['frame_advance']
+    fN = phases * frame_advance
+
+    # load impulse response
+    filename1 = "000_LAB_XTS_DUTL_fs16kHz"
+    filepath = Path(hydra_audio_path, "acoustic_team_test_audio", "impulse", filename1 + ".npy")
+    h1 = np.load(filepath)
+
+    n_impulses = 2
+    fade_len = int(0.0*fs)
+    h = [0]*n_impulses
+    for n in range(n_impulses):
+        h[n] = h1[:,n]
+    hN = len(h[0])
+    fN = 10 * 240
+
+    # filename = "006_Pink"
+    # filepath = Path(hydra_audio_path, "acoustic_team_test_audio", "point_noise", filename + ".wav")
+    # u, fs2 = sf.read(filepath)
+    # u = u[:N]
+    # assert fs==fs2
+  
+    # filename = "white"
+    u = np.random.randn(N)
+    
+    if u.ndim == 1:
+        u = u[:, np.newaxis]    
+    
+    if u.shape[0] < N:
+        u = np.tile(u, (N // u.shape[0] + 1, 1))
+
+    u = u[:N, 0]
+
+    d, _ = conv_impulse_array(u, h, fade_len)
+
+    if fN > hN:
+        d = d[hN-1:hN-fN]
+    else:
+        d = d[hN-1:]
+
+    d = d * 0.01 #20dB attenuation
+    u = u * 0.2
+    # run AEC
+    in_data = np.stack((d, u[hN-1:N]), axis=0)
+    in_data_32bit = (np.asarray(in_data * np.iinfo(np.int32).max, dtype=np.int32)).T
+    
+    #run XC
+    print("Run AEC XC")
+    dut_input_file, dut_output_file = run_xc.run_aec_xc(in_data_32bit[:,:y_channel_count], in_data_32bit[:,y_channel_count:], testname, adapt_mode=run_xc.adapt_mode_dict[adapt_config], num_y_channels=y_channel_count, num_x_channels=x_channel_count)
+
+    rate, output_wav_file = scipy.io.wavfile.read(dut_output_file, 'r')
+    error_xc = output_wav_file[:,0] 
+    _, leq_error = wtf.leq_smooth(error_xc, fs, 0.05)
+    change_index, = np.where(leq_error == leq_error.max())
+    leq_e = leq_error[int(change_index):]
+    t = np.arange(len(leq_e))*0.05
+    reconvergence_rate = wtf.calc_convergence_rate(t, leq_e)
+    print(f"XC reconvergence_rate: {reconvergence_rate}")
+    # test    
+    assert reconvergence_rate > 12,"XC reconvergence_rate error"
+
+    # plot
+    if __name__ == "__main__":
+        import matplotlib.pyplot as plt
+        plt.figure()
+        plt.title("white noise input signal,impulse response change @ %d seconds"%(N//fs//2))
+        plt.plot(time, leq_error - leq_error[0])
+        plt.xlabel("Time (s)")
+        plt.ylabel("Attenuation (dB)")
+        plt.ylim([-50, 10])
+        plt.xlim([0, time[-1]])
+        #plt.show()
+        
+
+
+if __name__ == "__main__":
+    test_impulse_response_change('AEC_ADAPTION_AUTO')
diff --git a/test/lib_aec/test_aec_enhancements/test_nyquist.py b/test/lib_aec/test_aec_enhancements/test_nyquist.py
new file mode 100644
index 000000000..573111e62
--- /dev/null
+++ b/test/lib_aec/test_aec_enhancements/test_nyquist.py
@@ -0,0 +1,93 @@
+# Copyright 2021 XMOS LIMITED.
+# This Software is subject to the terms of the XMOS Public Licence: Version 1.
+'''
+The purpose of this test is to test the Nyquist bin is present. 
+White noise is used as it gives a constant convergence rate.
+The maximum attenuation is tested, as this should be higher with the Nyquist bin present.
+'''
+
+import os
+import sys
+
+import numpy as np
+import scipy.signal as spsig
+import scipy.io.wavfile
+import subprocess
+import shutil
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+
+from common_utils import json_to_dict
+import wav_test_functions as wtf
+import audio_wav_utils as awu
+import run_xc
+
+import pytest
+
+hydra_audio_path = os.environ.get('hydra_audio_PATH', '~/hydra_audio')
+
+def calc_max_attenuation(output):
+    attenuation = output - output[0]
+    max_atten = np.min(attenuation)
+    print("Max attenuation is %f dB"%(max_atten))
+    return max_atten
+
+def test_nyquist():
+    ''' test_nyquist - run mono white noise convolved with a modelled impulse response
+    If the Nyquist is present in the AEC, the maximum attenuation should be greater than 60dB for python and 80dB for XC
+        
+    pass/fail: check there is at least 60dB attenuation for python and 80db for XC'''
+    testname = (Path(__file__).stem)[5:]
+
+    fs = 16000
+    N = fs * 10
+    np.random.seed(500)    
+
+    y_channel_count = 1
+    x_channel_count = 1
+
+    phases = 10  # aec_parameters['phases']
+    frame_advance = 240  # aec_parameters['frame_advance']
+    fN = phases * frame_advance
+
+    # load impulse response
+    filename1 = "000_LAB_XTS_DUTL_fs16kHz"
+    filepath = Path(hydra_audio_path, "acoustic_team_test_audio", "impulse", filename1 + ".npy")
+    h1 = np.load(filepath)
+    hN = h1.shape[0]
+    h = h1[:,0]
+
+    filename = "white"
+    u = np.random.randn(N)
+
+    d = spsig.convolve(u, h, 'full')[:N]
+    if fN > hN:
+        d = d[hN-1:hN-fN]
+    else:
+        d = d[hN-1:]
+
+    d = d * 0.01 #20dB attenuation
+    u = u * 0.2
+    
+    # ideal results
+    f_ideal = h[:fN]
+    y_ideal = spsig.convolve(f_ideal, u, 'full')[hN-1:N]
+    _, in_leq = wtf.leq_smooth(y_ideal, fs, 0.05)
+
+    # run AEC
+    in_data = np.stack((d, u[hN-1:N]), axis=0)
+    in_data_32bit = (np.asarray(in_data * np.iinfo(np.int32).max, dtype=np.int32)).T
+
+    print("Run AEC XC")
+    dut_input_file, dut_output_file = run_xc.run_aec_xc(in_data_32bit[:,:y_channel_count], in_data_32bit[:,y_channel_count:], testname, adapt_mode=run_xc.adapt_mode_dict['AEC_ADAPTION_FORCE_ON'], num_y_channels=y_channel_count, num_x_channels=x_channel_count)
+    rate, output_wav_file = scipy.io.wavfile.read(dut_output_file, 'r')
+    error_xc = output_wav_file[:,0]    
+    _, leq_error_xc = wtf.leq_smooth(error_xc, fs, 0.05)
+    max_atten_xc = wtf.calc_max_attenuation(leq_error_xc)
+    print('max_atten xc =',max_atten_xc)
+    assert max_atten_xc < -31, "test_nyquist fails attenuation test for XC" #Enabling frequency smoothing brings attenuation from -61dB to -52dB. python behaves similarly
+
+
+if __name__ == "__main__":
+    test_nyquist()
diff --git a/test/lib_aec/test_aec_enhancements/test_rick_convergence.py b/test/lib_aec/test_aec_enhancements/test_rick_convergence.py
new file mode 100644
index 000000000..3ac0cdeae
--- /dev/null
+++ b/test/lib_aec/test_aec_enhancements/test_rick_convergence.py
@@ -0,0 +1,139 @@
+# Copyright 2021 XMOS LIMITED.
+# This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+'''
+The purpose of this test is to test the initial convergence behaviour. 
+Pink noise is used as it gives a constant convergence rate.
+The initial attenuation rate and maximum attenuation are tested.
+The test is carried out with fixed and variable mu, which may indicate the cause of any convergence issues. 
+If the test fails with fixed mu it is an indication that something may be wrong with the normalisation. 
+If the test fails with variable mu, there may be a problem with the mu.
+'''
+
+import os
+from pathlib import Path
+
+hydra_audio_path = os.environ.get('hydra_audio_PATH', '~/hydra_audio')
+
+import numpy as np
+import scipy.io.wavfile
+
+import soundfile as sf
+import audio_wav_utils as awu
+
+from common_utils import json_to_dict
+import wav_test_functions as wtf
+import run_xc
+import configparser
+parser = configparser.ConfigParser()
+parser.read("parameters.cfg")
+filter_dir = parser.get("Folders", "filter_dir")
+
+import pytest
+
+
+@pytest.mark.parametrize("adapt_config", ['AEC_ADAPTION_FORCE_ON', 'AEC_ADAPTION_AUTO'])
+@pytest.mark.parametrize("channel_count", [1, 2])
+def test_pink_convergence(adapt_config, channel_count):
+    ''' test_pink_convergence - run mono/stereo pink noise convolved with a modelled impulse response
+    check that the output has some attenuation and AEC filter does not have any discontinuities 
+    and converges quickly, with and without a variable mu.
+        
+    pass/fail: check there is at least 10 dB of attenuation
+    pass/fail: check the samples at frame edges are a similar magnitude to the sample in frame middle
+    pass/fail: check the convergence rate over the first 2 seconds is greater than 10 dB/s
+    pass/fail: check there is at least 35 dB maximum attenuation'''
+
+    fs = 16000
+    N = fs * 10
+    testname = f"{(Path(__file__).stem)[5:]}_{adapt_config}_{channel_count}"
+
+    phases = 10  # aec_parameters['phases']
+    frame_advance = 240  # aec_parameters['frame_advance']
+    fN = phases * frame_advance
+
+    filename = "003_rick_mono"
+    filepath = Path(hydra_audio_path, "acoustic_team_test_audio", "playback_audio", filename + ".wav")
+    x, fs2 = sf.read(filepath)
+    assert fs==fs2
+    if x.ndim == 1:
+        x = x[:, np.newaxis]
+    if x.shape[1] < channel_count:
+        # if we have a mono signal, take different time slices for different channels
+        if x.shape[1] == 1:
+            xo = x[:N]
+            for ch in range(1, channel_count):
+                xo = np.concatenate((xo[:N, :], x[ch*N:(ch+1)*N]), axis=1)
+            x = xo
+        else:
+            x = np.tile(x, (1, channel_count // x.shape[1] + 1))
+    x = x[:N, :channel_count]
+    y = np.zeros((N, channel_count)) # microphone signal
+
+    # load impulse response
+    filename1 = "000_LAB_XTS_DUTL_fs16kHz"
+    filename2 = "001_LAB_XTS_DUTR_fs16kHz"
+
+    filepath = Path(hydra_audio_path, "acoustic_team_test_audio", "impulse", filename1 + ".npy")
+    h1 = np.load(filepath)
+    hN = h1.shape[0]
+
+    if channel_count == 1:
+        h = h1[:, 0]
+        y = np.atleast_2d(np.convolve(h, x[:,0], 'full')[:N]).T
+    if channel_count == 2:
+        filepath = Path(hydra_audio_path, "acoustic_team_test_audio", "impulse", filename2 + ".npy")
+        h2 = np.load(filepath)
+        for n in range(channel_count):
+            y[:,0] += np.convolve(h1[:,n],x[:, n], 'full')[:N]
+            y[:,1] += np.convolve(h2[:,n],x[:, n], 'full')[:N]
+    elif channel_count > 2:
+        assert False
+
+    # run AEC
+    in_data = np.concatenate((y, x), axis=1).T
+    nFrames = (N-hN-1) // frame_advance -1
+    in_data_32bit = (np.asarray(in_data * np.iinfo(np.int32).max, dtype=np.int32)).T
+    
+    #run XC
+    print("Run AEC XC")
+    filter_td_file = f"{filter_dir}/{testname}_h_td_xc.npy"
+    filter_fd_file = f"{filter_dir}/{testname}_H_fd_xc.npy"
+    dut_input_file, dut_output_file = run_xc.run_aec_xc(in_data_32bit[:,:channel_count], in_data_32bit[:,channel_count:], f"{testname}", adapt=nFrames, h_hat_dump=filter_fd_file, adapt_mode=run_xc.adapt_mode_dict[adapt_config], num_y_channels=channel_count, num_x_channels=channel_count)
+    rate, output_wav_file = scipy.io.wavfile.read(dut_output_file, 'r')
+    error = output_wav_file 
+    _, leq_error = wtf.leq_smooth(error[:, 0], fs, 0.05)
+    time = np.arange(len(leq_error))*0.05
+    Hxmos = run_xc.get_h_hat(filter_fd_file, 'xc')[0,0]
+    print('Hxmos.shape = ',Hxmos.shape)
+    h = np.fft.irfft(Hxmos)
+    hxmos = np.zeros(frame_advance*phases)
+    for p in range(phases):
+        hxmos[p*frame_advance: frame_advance*(p+1)] = h[p, :frame_advance]
+    np.save(filter_td_file, hxmos)
+
+    disco_res = wtf.disco_check(hxmos, phases, frame_advance)
+    convergence_rate = wtf.calc_convergence_rate(time, leq_error)
+    time_20dB = wtf.calc_attenuation_time(time, leq_error, -20)
+    time_30dB = wtf.calc_attenuation_time(time, leq_error, -30)
+    time_40dB = wtf.calc_attenuation_time(time, leq_error, -40)
+    max_atten = wtf.calc_max_attenuation(leq_error)
+    assert disco_res
+    assert max_atten < -40
+    assert convergence_rate > 12
+
+    # plot
+    if __name__ == "__main__":
+        import matplotlib.pyplot as plt
+        plt.figure()
+        plt.title("%s input signal, %s"%(filename, adapt_config))
+        plt.plot(time, leq_error - leq_error[0])
+        plt.xlabel("Time (s)")
+        plt.ylabel("Attenuation (dB)")
+        plt.ylim([-40, 10])
+        plt.xlim([0, time[-1]])
+        #plt.show()
+
+
+if __name__ == "__main__":
+    test_pink_convergence('AEC_ADAPTION_AUTO', 1)
diff --git a/test/lib_aec/test_aec_enhancements/test_skype.py b/test/lib_aec/test_aec_enhancements/test_skype.py
new file mode 100644
index 000000000..b137d68aa
--- /dev/null
+++ b/test/lib_aec/test_aec_enhancements/test_skype.py
@@ -0,0 +1,104 @@
+# Copyright 2021 XMOS LIMITED.
+# This Software is subject to the terms of the XMOS Public Licence: Version 1.
+import os
+import sys
+
+import numpy as np
+import scipy.io.wavfile
+import subprocess
+import shutil
+import soundfile as sf
+
+from common_utils import json_to_dict
+import wav_test_functions as wtf
+import audio_wav_utils as awu
+import run_xc
+import pytest
+from pathlib import Path
+
+hydra_audio_path = os.environ.get('hydra_audio_PATH', '~/hydra_audio')
+import configparser
+parser = configparser.ConfigParser()
+parser.read("parameters.cfg")
+filter_dir = parser.get("Folders", "filter_dir")
+
+@pytest.mark.parametrize("channel_count", [1, 2])
+def test_skype(channel_count):
+    ''' test_skype - run a  mono skype signal convolved with a modelled impulse response
+    check that the output has some attenuation and AEC filter does not have any discontinuities
+        
+    pass/fail: check there is at least 10 dB of attenuation
+    pass/fail: check the samples at frame edges are a similar magnitude to the sample in frame middle'''
+
+    testname = f"{(Path(__file__).stem)[5:]}_{channel_count}"
+    fs = 16000
+    N = fs * 15
+
+    np.random.seed(100)
+
+    phases = 10  # aec_parameters['phases']
+    frame_advance = 240  # aec_parameters['frame_advance']
+
+    filename = "007_skype"
+    filepath = Path(hydra_audio_path, "acoustic_team_test_audio", "playback_audio", filename + ".wav")
+    x, fs2 = sf.read(filepath)
+    x = x[:N, :channel_count]
+    if x.ndim == 1:
+        x = x[:, np.newaxis]
+    assert fs==fs2
+    y = np.zeros((N, channel_count)) # microphone signal
+  
+    # load impulse response
+    filename1 = "000_LAB_XTS_DUTL_fs16kHz"
+    filename2 = "001_LAB_XTS_DUTR_fs16kHz"
+
+    filepath = Path(hydra_audio_path, "acoustic_team_test_audio", "impulse", filename1 + ".npy")
+    h1 = np.load(filepath)
+    hN = h1.shape[0]
+
+    if channel_count == 1:
+        h = h1[:, 0]
+        y = np.atleast_2d(np.convolve(h, x[:,0], 'full')[:N]).T
+    if channel_count == 2:
+        filepath = Path(hydra_audio_path, "acoustic_team_test_audio", "impulse", filename2 + ".npy")
+        h2 = np.load(filepath)
+        for n in range(channel_count):
+            y[:,0] += np.convolve(h1[:,n], x[:,n], 'full')[:N]
+            y[:,1] += np.convolve(h2[:,n], x[:,n], 'full')[:N]
+    elif channel_count > 2:
+        assert False
+
+
+    # run AEC
+    in_data = np.concatenate((y, x), axis=1).T
+    nFrames = (N-hN-1) // frame_advance -1
+
+    #run XC
+    in_data_32bit = (np.asarray(in_data * np.iinfo(np.int32).max, dtype=np.int32)).T
+    print("Run AEC XC. nFrames = ", nFrames)
+    filter_td_file = f"{filter_dir}/{testname}_h_td_xc.npy"
+    filter_fd_file = f"{filter_dir}/{testname}_H_fd_xc.npy"
+    dut_input_file, dut_output_file = run_xc.run_aec_xc(in_data_32bit[:,:channel_count], in_data_32bit[:,channel_count:], testname, adapt=nFrames, h_hat_dump=filter_fd_file, adapt_mode=run_xc.adapt_mode_dict['AEC_ADAPTION_FORCE_ON'], num_y_channels=channel_count, num_x_channels=channel_count)
+
+    rate, output_wav_file = scipy.io.wavfile.read(dut_output_file, 'r')
+    error_xc = output_wav_file[:,0]
+    _, leq_error_xc = wtf.leq_smooth(error_xc, fs, 0.05)
+
+    Hxmos = run_xc.get_h_hat(filter_fd_file, 'xc')[0,0]
+    print('Hxmos.shape = ',Hxmos.shape)
+    #Hxmos = np.load('skype_H_fd_xc.npy')[0,0]
+    h = np.fft.irfft(Hxmos)
+    hxmos_xc = np.zeros(frame_advance*phases)
+    for p in range(phases):
+        hxmos_xc[p*frame_advance: frame_advance*(p+1)] = h[p, :frame_advance]
+    np.save(filter_td_file, hxmos_xc)
+    
+    print('Check XC')
+    disco_res_xc = wtf.disco_check(hxmos_xc, phases, frame_advance)
+    assert(disco_res_xc)
+    # check for deconvergence
+    assert leq_error_xc[-1] < leq_error_xc[20]
+
+
+if __name__ == "__main__":
+    test_skype(2)
diff --git a/test/lib_aec/test_aec_enhancements/utils.sh b/test/lib_aec/test_aec_enhancements/utils.sh
new file mode 100644
index 000000000..fb242a2d9
--- /dev/null
+++ b/test/lib_aec/test_aec_enhancements/utils.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+config_filename="$(pwd)/parameters.cfg"
+
+read_config() {
+    line=$(cat $config_filename | grep "\b$1\b")
+    echo $(echo $line | sed "s/.* *= *//g")
+}
+
+setup_env() {
+    pushd ../../../infr_scripts_pl/Build/
+    source SetupEnv
+    popd
+}
diff --git a/test/lib_aec/test_aec_profile/CMakeLists.txt b/test/lib_aec/test_aec_profile/CMakeLists.txt
new file mode 100644
index 000000000..1b2bfa068
--- /dev/null
+++ b/test/lib_aec/test_aec_profile/CMakeLists.txt
@@ -0,0 +1,111 @@
+## App name
+set( APP_NAME  test_aec_profile )
+
+# Auto-generate task distribution scheme and top level config files
+
+if( NOT ${Python3_FOUND} )
+  message(FATAL_ERROR "Python3 not found for running . ") 
+endif()
+
+set( GEN_SCHEDULE_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/../shared_src/python/generate_task_distribution_scheme.py )
+
+## Build each config
+foreach( BUILD_CONFIG ${TEST_AEC_PROFILE_BUILD_CONFIG} )
+    # Convert to config str of the form <threads>_<ychannels>_<xchannels>_<mainphases>_<shadowphases>
+    string( REPLACE " " "_" CONFIG_STR ${BUILD_CONFIG} )
+    message( STATUS "${APP_NAME} ${CONFIG_STR}" )
+    
+    ## Generate schedule for given config
+    set( AUTOGEN_DIR ${CMAKE_CURRENT_BINARY_DIR}/src.autogen_${CONFIG_STR} )
+    set( AUTOGEN_SOURCES ${AUTOGEN_DIR}/aec_task_distribution.c )
+    set( AUTOGEN_INCLUDES ${AUTOGEN_DIR}/aec_task_distribution.h ${AUTOGEN_DIR}/aec_config.h)
+    set( GEN_SCHEDULE_SCRIPT_BYPRODUCTS ${AUTOGEN_SOURCES} ${AUTOGEN_INCLUDES} )
+    unset( GEN_SCHEDULE_SCRIPT_ARGS)
+    list( APPEND GEN_SCHEDULE_SCRIPT_ARGS --out-dir ${AUTOGEN_DIR})
+    list( APPEND GEN_SCHEDULE_SCRIPT_ARGS --config ${BUILD_CONFIG})
+    file( MAKE_DIRECTORY ${AUTOGEN_DIR})
+    add_custom_command(
+        OUTPUT ${GEN_SCHEDULE_SCRIPT_BYPRODUCTS}
+        COMMAND ${Python3_EXECUTABLE} ${GEN_SCHEDULE_SCRIPT} ${GEN_SCHEDULE_SCRIPT_ARGS}
+        COMMENT "Generating AEC task distribution and top level config" )
+
+    ##
+    
+    ## Depends on libraries
+    unset( DEP_LIBS_XCORE )
+    list( APPEND  DEP_LIBS_XCORE  ""  )
+    
+    unset( DEP_LIBS )
+    list( APPEND  DEP_LIBS        
+        lib_xs3_math
+        lib_aec 
+        ${DEP_LIBS_${CMAKE_SYSTEM_NAME}}
+    )
+
+    list( APPEND  DEP_LIBS ${DEP_LIBS_${CMAKE_SYSTEM_NAME}} )
+
+    ## Sources
+    file( GLOB SOURCES_PROF  ${CMAKE_CURRENT_SOURCE_DIR}/src/*.c )
+    file( GLOB SOURCES_C  ${CMAKE_CURRENT_SOURCE_DIR}/../test_wav_aec/src/*.c )
+    file( GLOB SOURCES_XC  ${CMAKE_CURRENT_SOURCE_DIR}/../test_wav_aec/src/*.xc )
+if ( XCORE )
+    file( GLOB SOURCES_AEC_PROCESS_FRAME  ${SHARED_SRC_PATH}/aec/*.c )
+else()
+    ## Only 1 thread process_frame() builds for x86
+    file( GLOB SOURCES_AEC_PROCESS_FRAME ${SHARED_SRC_PATH}/aec/aec_process_frame_1thread.c )
+endif()
+    file( GLOB_RECURSE XSCOPE_FILEIO_SOURCES  ${XSCOPE_FILEIO_PATH}/src/*.c )
+    file( GLOB_RECURSE AUDIO_TEST_TOOLS_SOURCES ${DEPS_ROOT}/audio_test_tools/audio_test_tools/src/burners.S )
+    file( GLOB SOURCES_FILE_UTILS ${SHARED_SRC_PATH}/file_utils/*.c ) 
+    
+    unset( SOURCES )
+    list( APPEND  SOURCES ${SOURCES_C} ${SOURCES_AEC_PROCESS_FRAME} ${SOURCES_XC} ${XSCOPE_FILEIO_SOURCES} ${SOURCES_FILE_UTILS} ${AUDIO_TEST_TOOLS_SOURCES} ${SOURCES_PROF} ${AUTOGEN_SOURCES})
+    unset( INCLUDES )
+    list( APPEND  INCLUDES src ${SHARED_SRC_PATH}/aec ${SHARED_SRC_PATH}/file_utils ${XSCOPE_FILEIO_PATH} ${XSCOPE_FILEIO_PATH}/api ${CMAKE_CURRENT_SOURCE_DIR}/src/ ${AUTOGEN_DIR})
+
+    # set( XSCOPE_CONFIG config.xscope )
+    get_filename_component(XSCOPE_CONFIG config.xscope ABSOLUTE)
+
+    ## Compile flags
+    unset(COMPILE_FLAGS)
+    unset(COMPILE_FLAGS_XCORE)
+    list(APPEND   COMPILE_FLAGS_XCORE  -DTEST_WAV_XSCOPE=1 -DPROFILE_PROCESSING=1)
+
+    ##Linker flags
+    unset(LINKER_FLAGS)
+    list( APPEND  LINKER_FLAGS  "" )
+
+    unset(LINKER_FLAGS_XCORE)
+    list( APPEND  LINKER_FLAGS_XCORE  "-target=${XCORE_TARGET}"     )
+    list( APPEND  LINKER_FLAGS_XCORE  "-report"                     )
+    list( APPEND  LINKER_FLAGS_XCORE  "${XSCOPE_CONFIG}"            )
+
+
+    list( APPEND  LINKER_FLAGS ${LINKER_FLAGS_${CMAKE_SYSTEM_NAME}} )
+    list( APPEND  COMPILE_FLAGS ${COMPILE_FLAGS_${CMAKE_SYSTEM_NAME}} ${LINKER_FLAGS_${CMAKE_SYSTEM_NAME}} )
+
+    #########
+    ## executable output directory
+    set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
+
+    ## 1 thread executable
+    set ( EXEC ${APP_NAME}_${CONFIG_STR} )
+    add_executable( ${EXEC} ${SOURCES} )
+
+    target_include_directories( ${EXEC} PRIVATE ${INCLUDES} ${AUTOGEN_DIR})
+
+    target_link_libraries( ${EXEC} ${DEP_LIBS})
+
+    target_compile_options( ${EXEC} PRIVATE ${COMPILE_FLAGS} )
+
+    #(because otherwise the set_target_properties command fails)
+    string(REPLACE ";" " " LINKER_FLAGS_STR "${LINKER_FLAGS}")
+    set_target_properties( ${EXEC} PROPERTIES LINK_FLAGS "${LINKER_FLAGS_STR}" )
+
+    if ( XCORE )
+        set_target_properties( ${EXEC} PROPERTIES
+        SUFFIX ".xe"
+        LINK_DEPENDS  ${XSCOPE_CONFIG}
+        )
+    endif()
+endforeach( BUILD_CONFIG ${TEST_CONFIG} )
diff --git a/test/lib_aec/test_aec_profile/config.xscope b/test/lib_aec/test_aec_profile/config.xscope
new file mode 100644
index 000000000..0d3b65e4c
--- /dev/null
+++ b/test/lib_aec/test_aec_profile/config.xscope
@@ -0,0 +1,10 @@
+<xSCOPEconfig ioMode="basic" enabled="true">
+  <Probe name="open_file" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="read_bytes" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="write_setup" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="write_bytes" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="seek" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="tell" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="host_quit" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+</xSCOPEconfig>
+
diff --git a/test/lib_aec/test_aec_profile/src/profile.c b/test/lib_aec/test_aec_profile/src/profile.c
new file mode 100644
index 000000000..32a316eae
--- /dev/null
+++ b/test/lib_aec/test_aec_profile/src/profile.c
@@ -0,0 +1,31 @@
+// Copyright 2018-2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <platform.h>
+#include <stdio.h>
+#include <string.h>
+#include <xscope.h>
+#include <xcore/assert.h>
+#include <xcore/hwtimer.h>
+#include "profile.h"
+
+static uint64_t profile_checkpoint[N_PROFILE_POINTS] = {0};
+static uint32_t profile_checkpoint_count[N_PROFILE_POINTS] = {0};
+
+void prof(unsigned n, char *str)
+{
+    profile_checkpoint[n] += (uint64_t)get_reference_time();
+    profile_checkpoint_count[n] += 1;
+}
+
+void print_prof(unsigned start, unsigned end, unsigned frame) {
+    printf("frame %d\n",frame);
+    for(int i=start;i<end;i++)
+    {
+        if(profile_checkpoint[i] != 0) {
+            printf("Profile %d, %llu\n", i, profile_checkpoint[i]);
+        }
+    }
+    
+    memset(profile_checkpoint, 0, sizeof(profile_checkpoint));
+    memset(profile_checkpoint_count, 0, sizeof(profile_checkpoint_count));
+}
diff --git a/test/lib_aec/test_aec_profile/src/profile.h b/test/lib_aec/test_aec_profile/src/profile.h
new file mode 100644
index 000000000..403233c4f
--- /dev/null
+++ b/test/lib_aec/test_aec_profile/src/profile.h
@@ -0,0 +1,42 @@
+// Copyright 2018-2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <stdio.h>
+#include <string.h>
+
+#ifdef __XC__
+extern "C" {
+#endif
+#define N_PROFILE_POINTS    100
+/**
+ * prof() function is called for profiling a piece of code. User needs to call it
+ * in a specific way for it to work properly. 
+ * If a piece of code needs to be profiled, call
+ * prof(profiling_index_1, "start_xyz") and prof(profiling_index_2, "end_xyz") before and after
+ * that piece of code. profiling_index_1 and profiling_index_2 are unique integers less than N_PROFILE_POINTS. 
+ * xyz can be any string, as long as the same string is used prefixed with start_ and end_ for the 2 prof() calls. 
+ * The start_ and end_ prefixes are important and need to be there.
+ * profiling_indexes used in prof() calls need to be unique integers less than N_PROFILE_POINTS. The ordering
+ * of these integers is not important.
+ *
+ * Following special cases are handled:
+ * - Profiling code within a loop: prof() calls can be made for code within a loop. Only the total time taken by
+ *   that code across all iterations will be calculated. Number or iterations is not saved anywhere and the 
+ *   user needs to infer this separately if things like average cycles per iteration is needed.
+ *
+ * Following cases are NOT handled:
+ * - For code within loops, the start_ and end_ prof() calls, both need to be either within the loop 
+ *   or outside the loop. The start_ call within the loop and stop_ call outside the loop and vice-versa 
+ *   are not supported.
+ * - When profiling code within par statements, start_ and end_ calls cannot be within the par block.
+ *   They need to be outside the par block.
+ */
+void prof(unsigned n, char *str);
+
+/**
+ * print_prof() is called once at the end of a frame to print the profiling info
+ * collected in the prof() calls. Profiling info for valid profiling indexes between [start, end) is printed.
+*/
+void print_prof(unsigned start, unsigned end, unsigned frame_num);
+#ifdef __XC__
+}
+#endif
diff --git a/test/lib_aec/test_aec_profile/test_profile.py b/test/lib_aec/test_aec_profile/test_profile.py
new file mode 100644
index 000000000..3973a3123
--- /dev/null
+++ b/test/lib_aec/test_aec_profile/test_profile.py
@@ -0,0 +1,287 @@
+# Copyright 2021 XMOS LIMITED.
+# This Software is subject to the terms of the XMOS Public Licence: Version 1.
+import numpy as np
+import os
+import tempfile
+import shutil
+import subprocess
+import scipy.io.wavfile
+import scipy.signal as spsig
+import xscope_fileio
+import xtagctl
+import io
+import glob
+from contextlib import redirect_stdout
+import re
+import argparse
+import pytest
+import glob
+
+aec_src_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                              '..', 'test_wav_aec', 'src')
+
+def run_proc_with_output(cmd):
+    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
+    stdout = proc.stdout.readlines()
+    return [str(line, 'utf-8') for line in stdout]
+
+def extract_memory_stats(stdout):
+    memory_used = None
+    for line in stdout:
+        rs = re.search("Memory\savailable:\s+(\d+),\s+used:\s+(\d+).+", line)
+        if rs:
+            memory_used = int(rs.group(2))
+    return memory_used
+
+xc_in_file_name = "input.wav"
+xc_out_file_name = "output.wav"
+runtime_args_file = "args.bin"
+def run_aec_xe(aec_xe, run_config, threads, audio_in, audio_out, profile_dump_file=None):
+    #threads argument is only for logging the number of threads aec was built with into a file
+    with open(runtime_args_file, "wb") as fargs:
+        fargs.write(f"main_filter_phases {run_config.num_main_filt_phases}\n".encode('utf-8'))
+        fargs.write(f"shadow_filter_phases {run_config.num_shadow_filt_phases}\n".encode('utf-8'))
+        fargs.write(f"y_channels {run_config.num_y_channels}\n".encode('utf-8'))
+        fargs.write(f"x_channels {run_config.num_x_channels}\n".encode('utf-8'))
+    
+    tmp_folder = tempfile.mkdtemp()
+    shutil.copy2(runtime_args_file, os.path.join(tmp_folder, runtime_args_file))
+    shutil.copy2(audio_in, os.path.join(tmp_folder, xc_in_file_name))
+    shutil.copy2(runtime_args_file, os.path.join(tmp_folder, runtime_args_file))
+    
+    prev_path = os.getcwd()
+    os.chdir(tmp_folder)    
+        
+    with xtagctl.acquire("XCORE-AI-EXPLORER") as adapter_id:
+        print(f"Running on {adapter_id}")
+        with open("dut.log", "w") as ff:
+            xscope_fileio.run_on_target(adapter_id, aec_xe, stdout=ff)
+
+        xcore_stdo = []
+        #ignore lines that don't contain [DEVICE]. Remove everything till and including [DEVICE] if [DEVICE] is present
+        with open("dut.log", "r") as ff:
+            for line in ff.read().splitlines():
+                m = re.search(r'^\s*\[DEVICE\]', line)
+                if m is not None:
+                    xcore_stdo.append(re.sub(r'\[DEVICE\]\s*', '', line))
+        
+    os.chdir(prev_path)
+
+    with open(profile_dump_file, 'w') as fp:
+        for line in xcore_stdo:
+            fp.write(f"{line}\n")
+    parse_profile_log(run_config, threads, xcore_stdo, worst_case_file=f"aec_prof_{run_config.config_str()}_{threads}threads.log")
+
+    shutil.rmtree(tmp_folder, ignore_errors=True)    
+
+'''
+output: profile_file contains profiling info for all frames.
+output: worst_case_file contains profiling info for worst case frame
+output: mapping_file contains the profiling index to tag string mapping. This is useful when adding a new prof() call to look-up indexes that are already used
+        in order to avoid duplicating indexes
+'''
+def parse_profile_log(run_config, threads, prof_stdo, profile_file="parsed_profile.log", worst_case_file="worst_case.log", mapping_file="profile_index_to_tag_mapping.log"):
+    profile_strings = {}
+    profile_regex = re.compile(r'\s*prof\s*\(\s*(\d+)\s*,\s*"(.*)"\s*\)\s*;')
+    #find all aec source files that might have a prof() function call
+    aec_files = glob.glob(f'{aec_src_folder}/**/*.xc', recursive=True)
+    aec_files = aec_files + glob.glob(f'{aec_src_folder}/**/*.c', recursive=True)
+    for file in aec_files:
+        with open(file, 'r') as fd:
+            lines = fd.readlines()
+        for line in lines:
+            #look for prof(profiling_index, tag_string) type of calls
+            m = profile_regex.match(line)
+            if m:
+                if m.group(1) in profile_strings:
+                    print(f"Profiling index {m.group(1)} used more than once with tags '{profile_strings[m.group(1)]}' and '{m.group(2)}'.")
+                    assert(False)
+                #add to a dict[profile_index] = tag_string structure to create a integer index -> tag string mapping
+                profile_strings[m.group(1)] = m.group(2)
+
+    #log profile_strings in a file so it's easy for a user adding a new prof calls to look up already used indexes
+    with open(mapping_file, 'w') as fp:
+        for index in profile_strings:
+            fp.write(f'{index:<4} {profile_strings[index]}\n')
+    
+    #parse stdo output and for every frame, generate a dictionary that stores dict[tag_string] = timer_snapshot 
+    all_frames = []
+    tags = {} #dictionary that stores dict[tag_string] = timer_snapshot information
+    profile_regex = re.compile(r'Profile\s*(\d+)\s*,\s*(\d+)')
+    #look for start of frame
+    frame_regex = re.compile(r'frame\s*(\d+)')
+    frame_num = 0
+    for line in prof_stdo:
+        m = frame_regex.match(line)
+        if m:
+            if frame_num:
+                #append previous frames profiling info to all_frames
+                all_frames.append(tags)
+                tags = {} #reset tags
+            frame_num += 1
+        m = profile_regex.match(line)
+        if m:
+            prof_index = m.group(1)
+            prof_str = profile_strings[prof_index]
+            tags[profile_strings[m.group(1)]] = int(m.group(2))
+    
+    frame_num = 0
+    worst_case_frame = ()
+    with open(profile_file, 'w') as fp:
+        fp.write(f'{"Tag":<44} {"Cycles":<12} {"% of total cycles":<10}\n')
+        for tags in all_frames: #look at framewise profiling information
+            fp.write(f"Frame {frame_num}\n")
+            total_cycles = 0
+            #convert from (start_ tag_string, timer_snapshot), (end_ tag_string, timer_snapshot) type information to (tag_string without start_ or end_ prefix, timer cycles between start_ and end_ tag_string) 
+            this_frame_tags = {} #structure to store this frame's dict[tag_string] = cycles_between_start_and_end info so that we can use it later to print cycles as well as % of overall cycles
+            for tag in tags:
+                if tag.startswith('start_'):
+                    end_tag = 'end_' + tag[6:]
+                    cycles = tags[end_tag] - tags[tag]
+                    this_frame_tags[tag[6:]] = cycles
+                    total_cycles += cycles
+            #this_frame is a tuple of (dictionary dict[tag_string] = cycles_between_start_and_end, total cycle count, frame_num)
+            this_frame = (this_frame_tags, total_cycles, frame_num)
+
+            #now write this frame's info in file
+            for key, value in this_frame[0].items():
+                fp.write(f'{key:<44} {value:<12} {round((value/float(this_frame[1]))*100,2):>10}% \n')
+            fp.write(f'{"TOTAL_CYCLES":<32} {this_frame[1]}\n')
+            if frame_num == 0:
+                worst_case_frame = this_frame
+            else:
+                if worst_case_frame[1] < this_frame[1]:
+                    worst_case_frame = this_frame
+            frame_num += 1
+
+        with open(worst_case_file, 'w') as fp:
+            fp.write(f"Config: Threads ({threads}), Y_channels ({run_config.num_y_channels}), X_channels ({run_config.num_x_channels}), Main filter phases ({run_config.num_main_filt_phases}), Shadow filter phases ({run_config.num_shadow_filt_phases})\n")            
+            fp.write(f"Worst case frame = {worst_case_frame[2]}\n")
+            #in the end, print the worst case frame
+            for key, value in worst_case_frame[0].items():
+                fp.write(f'{key:<44} {value:<12} {round((value/float(worst_case_frame[1]))*100,2):>10}% \n')
+            worst_case_timer_cycles = np.float64(worst_case_frame[1])
+            fp.write(f'{"Worst_case_frame_timer(100MHz)_cycles":<32} {worst_case_timer_cycles}\n')
+            worst_case_processor_cycles = (worst_case_timer_cycles/100) * 120
+            fp.write(f'{"Worst_case_frame_processor(120MHz)_cycles":<32} {worst_case_processor_cycles}\n')
+            #0.015 is seconds_per_frame. 1/0.015 is the frames_per_second.
+            #processor_cycles_per_frame * frames_per_sec = processor_cycles_per_sec. processor_cycles_per_sec/1000000 => MCPS
+            mcps = "{:.2f}".format((worst_case_processor_cycles / 0.015) / 1000000)
+            fp.write(f'{"MCPS":<10} {mcps} MIPS\n')
+
+
+
+
+def leq_smooth(x, fs, T):
+    len_x = x.shape[0]
+    win_len = int(fs * T)
+    win_count = len_x // win_len
+    len_y = win_len * win_count
+
+    y = np.reshape(x[:len_y], (win_len, win_count), 'F')
+
+    leq = 10 * np.log10(np.mean(y ** 2.0, axis=0))
+    t = np.arange(win_count) * T
+
+    return t, leq
+
+def make_impulse(RT, t=None, fs=None):
+    scale = 0.005
+    scale_noise = 0.00005
+    a = 3.0 * np.log(10.0) / RT
+    if t is None:
+        t = np.arange(2.0*RT*fs) / fs
+    N = t.shape[0]
+    h = np.zeros(N)
+    e = np.exp(-a*t)
+    reflections = N // 100
+    reflection_index = np.random.randint(N, size=reflections)
+    for n, idx in enumerate(reflection_index):
+        if n % 2 == 0:
+            flip = 1
+        else:
+            flip = -1
+        h[idx] = flip * scale * t[idx] * e[idx]
+    h += scale_noise * np.random.randn(t.shape[0]) * e
+    return h
+
+def create_wav_input():
+    fs = 16000
+    N = fs * 10
+    np.random.seed(500)    
+
+    phases = 10
+    fN = phases * 240
+
+    # build impulse response
+    RT = 0.15
+    h = make_impulse(RT, fs=fs)
+    h = h/h.max()
+    hN = len(h)
+
+    u = np.random.randn(N)
+
+    d = spsig.convolve(u, h, 'full')[:N]
+    if fN > hN:
+        d = d[hN-1:hN-fN]
+    else:
+        d = d[hN-1:]
+
+    d = d * 0.01 #20dB attenuation
+    u = u * 0.2
+    
+    # ideal results
+    f_ideal = h[:fN]
+    y_ideal = spsig.convolve(f_ideal, u, 'full')[hN-1:N]
+    _, in_leq = leq_smooth(y_ideal, fs, 0.05)
+
+    # run AEC
+    in_data = np.stack((d, d, u[hN-1:N], u[hN-1:N]), axis=0)
+    in_data_32bit = (np.asarray(in_data * np.iinfo(np.int32).max, dtype=np.int32)).T
+    scipy.io.wavfile.write("input.wav", 16000, in_data_32bit)
+    
+
+class aec_config:
+    def __init__(self, config_str):
+        config = config_str.split()
+        assert len(config) == 4, "Incorrect length config specified!"
+        self.num_y_channels = config[0]
+        self.num_x_channels = config[1]
+        self.num_main_filt_phases = config[2]
+        self.num_shadow_filt_phases = config[3]
+    def print_config(self):
+        print("Config = ", self.num_y_channels,  self.num_x_channels, self.num_main_filt_phases, self.num_shadow_filt_phases)
+    def config_str(self):
+        return f"{self.num_y_channels}ych_{self.num_x_channels}xch_{self.num_main_filt_phases}mainph_{self.num_shadow_filt_phases}shadph"
+
+
+xe_files = glob.glob('../../../build/test/lib_aec/test_aec_profile/bin/*.xe')
+#create wav input
+create_wav_input()
+@pytest.fixture(scope="session", params=xe_files)
+def setup(request):
+    xe = os.path.abspath(request.param) #get .xe filename including path
+    #extract stem part of filename
+    name = os.path.splitext(os.path.basename(xe))[0] #This should give a string of the form test_aec_profile_<threads>_<ychannels>_<xchannels>_<mainphases>_<shadowphases>
+    config = (f"{name}".split('_'))[-5:] #Split by _ and pick up the last 5 values to get the config
+    threads = config[0]
+    rest_of_config = ' '.join(config[1:]) #remaining build config in "<ych> <xch> <mainph> <shadowph>" form
+    return xe, aec_config(rest_of_config), threads 
+
+#For every build_config, test with all specified run time configs
+@pytest.mark.parametrize("run_config", ['', '1 2 15 5'])
+def test_profile(setup, run_config):
+    #run_config is the aec runtime configuration specified in '<num_y_channels> <num_x_channels> <num_main_filter_phases> <num_shadow_filter_phases>' format
+    #if run_config is an empty string, run the configuration that was built
+    print(f"config {run_config}")
+    aec_xe, build_config, threads = setup
+    if run_config == '':
+        #test the configuration that was built
+        print(f'test build_config')
+        run_aec_xe(aec_xe, build_config, threads, "input.wav", "output.wav", "profile.log") #threads is passed in only for logging purposes
+    else:
+        #test the specified run time configuration
+        run_config = aec_config(run_config)
+        run_aec_xe(aec_xe, run_config, threads, "input.wav", "output.wav", "profile.log")
+        print('test run_config')
+
diff --git a/test/lib_aec/test_aec_spec/AudioGen.ipynb b/test/lib_aec/test_aec_spec/AudioGen.ipynb
new file mode 100644
index 000000000..c73af52df
--- /dev/null
+++ b/test/lib_aec/test_aec_spec/AudioGen.ipynb
@@ -0,0 +1,416 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import scipy.io.wavfile\n",
+    "\n",
+    "freq_1 = 440\n",
+    "freq_2 = 900\n",
+    "x = np.linspace(0, 2*np.pi, 10000)\n",
+    "y_1 = np.sin(freq_1*x)\n",
+    "y_2 = np.sin(freq_2*x)\n",
+    "signal = np.sin(x)**2 * y_1 + np.cos(x)**2 * y_2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import os.path\n",
+    "import scipy.signal\n",
+    "\n",
+    "def comb_filter(length_ms, amplitude, delay_ms, sample_rate=16000):\n",
+    "    delay = int(sample_rate * delay_ms / 1000)\n",
+    "    signal = np.zeros((sample_rate * length_ms / 1000, ))\n",
+    "    signal[40] = 1\n",
+    "    for i in range(40, sample_rate * length_ms / 1000, delay):\n",
+    "        delay_i = i + delay\n",
+    "        if delay_i >= sample_rate * length_ms / 1000:\n",
+    "            break\n",
+    "        signal[delay_i] = signal[i] * amplitude\n",
+    "    return signal\n",
+    "\n",
+    "def echo(length_ms, amplitude, delay_ms, sample_rate=16000):\n",
+    "    delay = int(sample_rate * delay_ms / 1000)\n",
+    "    signal = np.zeros((sample_rate * length_ms / 1000, ))\n",
+    "    signal[40] = 1\n",
+    "    signal[delay] = amplitude\n",
+    "    return signal\n",
+    "\n",
+    "def silence(length=None, samples=None, db=-150, sample_rate=16000):\n",
+    "    if length:\n",
+    "        samples = length*sample_rate\n",
+    "    x = np.random.normal(size=(samples,))\n",
+    "    factor = np.power(10, db / 20.0)\n",
+    "    y = x * factor\n",
+    "    return y\n",
+    "\n",
+    "def get_h(h_type, normalise=True):\n",
+    "    if h_type == 'short':\n",
+    "        h = echo(200, 0.7, 40)\n",
+    "    if h_type == 'long':\n",
+    "        h = echo(200, 0.7, 170)\n",
+    "    if h_type == 'excessive':\n",
+    "        h = echo(200, 0.7, 190)\n",
+    "    if h_type == 'decaying':\n",
+    "        h = comb_filter(500, -0.9, 12)\n",
+    "    if normalise:\n",
+    "        h = h / np.sum(np.abs(h))\n",
+    "    return h\n",
+    "    raise Exception(\"H type '%s' not valid\" % h_type)\n",
+    "    \n",
+    "def get_near_end(length, frequencies=[440], sample_rate=16000, rshift=4):\n",
+    "    x = np.linspace(0, length * 2 * np.pi, length * sample_rate)\n",
+    "    signal = np.zeros((length * sample_rate,))\n",
+    "    for freq in frequencies:\n",
+    "        signal += np.sin(freq * x)\n",
+    "    return signal / (1<<rshift)\n",
+    "\n",
+    "def get_ref_discrete(length, freq_a=1000, freq_b=2000, period=1, sample_rate=16000, rshift=4):\n",
+    "    x = np.linspace(0, length * 2 * np.pi, length * sample_rate)\n",
+    "    y_1 = np.sin(freq_a * x)\n",
+    "    y_2 = np.sin(freq_b * x)\n",
+    "    signal = np.sin(x / (period*2))**2 * y_1 + np.cos(x / (period*2))**2 * y_2\n",
+    "    return signal / (1<<rshift)\n",
+    "\n",
+    "def get_ref_continuous(length, freq_a=500, freq_b=4000, period=0.2, sample_rate=16000, rshift=4):\n",
+    "    x = np.linspace(0, length * 2 * np.pi, length * sample_rate)\n",
+    "    f = np.sin(x/period)*(freq_b-freq_a)/2 + (freq_a+freq_b)/2\n",
+    "    #f = np.tile(scipy.signal.triang(sample_rate * period), length/period + 1)[:len(x)]*(freq_b-freq_a)/2 + (freq_a+freq_b)/2\n",
+    "    y = np.cumsum(f) / sample_rate * 2 * np.pi\n",
+    "    z = np.sin(y)\n",
+    "    signal = z\n",
+    "    return signal / (1<<rshift)\n",
+    "\n",
+    "def get_ref(*args, **kwargs):\n",
+    "    return get_ref_continuous(*args, **kwargs)\n",
+    "\n",
+    "def write_data(data, filename, sample_rate=16000, dtype=np.int16, rshift=0):\n",
+    "    output = np.asarray(data*np.iinfo(np.int16).max, dtype=dtype) >> rshift\n",
+    "    scipy.io.wavfile.write(filename, sample_rate, output.T)\n",
+    "\n",
+    "def write_audio(test_class, echo, AudioIn, AudioRef, sample_rate=16000, dtype=np.int16):\n",
+    "    audio_dir = 'spec_audio'\n",
+    "    filename = '%s-%s-%s' % (test_class, echo, \"%s\")\n",
+    "    try:\n",
+    "        os.makedirs(audio_dir)\n",
+    "    except os.error:\n",
+    "        pass\n",
+    "    write_data(AudioIn, os.path.join(audio_dir, filename % \"AudioIn.wav\"), sample_rate, dtype)\n",
+    "    write_data(AudioRef, os.path.join(audio_dir, filename % \"AudioRef.wav\"), sample_rate, dtype)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib\n",
+    "import matplotlib.pyplot as plt\n",
+    "%matplotlib inline\n",
+    "\n",
+    "import scipy.signal\n",
+    "\n",
+    "length=10; freq_a=2000; freq_b=3000; period=2; sample_rate=16000; rshift=8\n",
+    "\n",
+    "x = np.linspace(0, length * 2 * np.pi, length * sample_rate)\n",
+    "f = np.sin(x/period)*(freq_b-freq_a)/2 + (freq_a+freq_b)/2\n",
+    "#f = np.tile(scipy.signal.triang(sample_rate * period), length/period + 1)[:len(x)]*(freq_b-freq_a)/2 + (freq_a+freq_b)/2\n",
+    "x = np.cumsum(f) / sample_rate * 2 * np.pi\n",
+    "plt.plot(x)\n",
+    "plt.show()\n",
+    "y = np.sin(x)\n",
+    "signal = y\n",
+    "plt.plot(f)\n",
+    "plt.show()\n",
+    "plt.plot(y)\n",
+    "plt.show()\n",
+    "plt.specgram(signal, Fs=16000)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "import matplotlib\n",
+    "import matplotlib.pyplot as plt\n",
+    "%matplotlib inline\n",
+    "\n",
+    "#plt.magnitude_spectrum(signal, Fs=10000)\n",
+    "#signal = get_ref(2, 440, 2000, period=0.5)\n",
+    "signal = get_ref(10)\n",
+    "plt.specgram(signal, Fs=16000)\n",
+    "plt.show()\n",
+    "signal = get_near_end(1, [440])\n",
+    "plt.specgram(signal, Fs=16000)\n",
+    "plt.show()\n",
+    "\n",
+    "h = get_h('short')\n",
+    "x = np.linspace(0, len(h)/16000.0, len(h))\n",
+    "plt.plot(x, h)\n",
+    "plt.show()\n",
+    "h = get_h('long')\n",
+    "x = np.linspace(0, len(h)/16000.0, len(h))\n",
+    "plt.plot(x, h)\n",
+    "plt.show()\n",
+    "h = get_h('excessive')\n",
+    "x = np.linspace(0, len(h)/16000.0, len(h))\n",
+    "plt.plot(x, h)\n",
+    "plt.show()\n",
+    "h = get_h('decaying')\n",
+    "x = np.linspace(0, len(h)/16000.0, len(h))\n",
+    "plt.plot(x, h)\n",
+    "plt.show()\n",
+    "y = silence(60)\n",
+    "x = np.linspace(0, len(y)/16000.0, len(y))\n",
+    "plt.plot(x, y)\n",
+    "plt.show()\n",
+    "print y\n",
+    "write_data(y, \"silence.wav\")\n",
+    "write_data(signal, \"test.wav\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "import scipy.signal\n",
+    "\n",
+    "ref = get_ref(2, 440, 2000, period=0.5)\n",
+    "AudioIn = scipy.signal.convolve(ref, get_h('decaying'))\n",
+    "# plt.plot(ref)\n",
+    "# plt.show()\n",
+    "# plt.plot(AudioIn)\n",
+    "# plt.show()\n",
+    "\n",
+    "ref = get_ref(2, 1000, 3000, period=0.5)\n",
+    "ref_2f = get_ref_discrete(2, 1000, 3000, period=0.5)\n",
+    "plt.figure(figsize=(8,3))\n",
+    "plt.subplot(121)\n",
+    "plt.specgram(ref, Fs=16000)\n",
+    "plt.title(\"Continuous 2-tone Reference\")\n",
+    "plt.xlabel(\"Time (s)\")\n",
+    "plt.ylabel(\"Frequency (Hz)\")\n",
+    "plt.subplot(122)\n",
+    "plt.specgram(ref_2f, Fs=16000)\n",
+    "plt.title(\"Discrete 2-tone Reference\")\n",
+    "plt.xlabel(\"Time (s)\")\n",
+    "plt.ylabel(\"Frequency (Hz)\")\n",
+    "plt.tight_layout()\n",
+    "plt.savefig(\"reference.png\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "# Generate Audio\n",
+    "from scipy.signal import convolve\n",
+    "# Simple Tests: Short Echo\n",
+    "ref = get_ref_discrete(20)\n",
+    "ref_room = convolve(ref, get_h('short'))\n",
+    "background_noise = silence(samples=len(ref_room))\n",
+    "near_end = get_near_end(10)\n",
+    "near_end = np.concatenate((background_noise[:-len(near_end)], background_noise[-len(near_end):] + near_end))\n",
+    "AudioIn = ref_room + near_end\n",
+    "AudioRef = ref_room\n",
+    "write_audio('simple', 'short', AudioIn, AudioRef)\n",
+    "\n",
+    "plt.specgram(AudioIn, Fs=16000)\n",
+    "plt.show()\n",
+    "\n",
+    "# Simple Tests: Long Echo\n",
+    "ref = get_ref(120)\n",
+    "ref_room = convolve(ref, get_h('long'))\n",
+    "background_noise = silence(samples=len(ref_room))\n",
+    "near_end = get_near_end(60)\n",
+    "near_end = np.concatenate((background_noise[:-len(near_end)], background_noise[-len(near_end):] + near_end))\n",
+    "AudioIn = ref_room + near_end\n",
+    "AudioRef = ref_room\n",
+    "write_audio('simple', 'long', AudioIn, AudioRef)\n",
+    "\n",
+    "plt.specgram(AudioIn, Fs=16000)  \n",
+    "plt.show()\n",
+    "\n",
+    "# Simple Tests: Decaying Echo\n",
+    "ref = get_ref(120)\n",
+    "ref_room = convolve(ref, get_h('decaying'))\n",
+    "background_noise = silence(samples=len(ref_room))\n",
+    "near_end = get_near_end(60)\n",
+    "near_end = np.concatenate((background_noise[:-len(near_end)], background_noise[-len(near_end):] + near_end))\n",
+    "AudioIn = ref_room + near_end\n",
+    "AudioRef = ref_room\n",
+    "write_audio('simple', 'decaying', AudioIn, AudioRef)\n",
+    "\n",
+    "plt.specgram(AudioIn, Fs=16000)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "# Multi-tone Tests: Short Echo\n",
+    "ref = get_ref(120)\n",
+    "ref_room = convolve(ref, get_h('short'))\n",
+    "background_noise = silence(samples=len(ref_room))\n",
+    "near_end = convolve(get_near_end(60, frequencies=[500, 1500, 3000]), get_h('short'))\n",
+    "near_end = np.concatenate((background_noise[:-len(near_end)], background_noise[-len(near_end):] + near_end))\n",
+    "AudioIn = ref_room + near_end\n",
+    "AudioRef = ref_room\n",
+    "write_audio('multitone', 'short', AudioIn, AudioRef)\n",
+    "\n",
+    "plt.specgram(AudioIn, Fs=16000)\n",
+    "plt.show()\n",
+    "\n",
+    "# Multi-tone Tests: Long Echo\n",
+    "ref = get_ref(120)\n",
+    "ref_room = convolve(ref, get_h('long'))\n",
+    "background_noise = silence(samples=len(ref_room))\n",
+    "near_end = get_near_end(60, frequencies=[500, 1500, 3000])\n",
+    "near_end = np.concatenate((background_noise[:-len(near_end)], background_noise[-len(near_end):] + near_end))\n",
+    "AudioIn = ref_room + near_end\n",
+    "AudioRef = ref_room\n",
+    "write_audio('multitone', 'long', AudioIn, AudioRef)\n",
+    "\n",
+    "plt.specgram(AudioIn, Fs=16000)\n",
+    "plt.show()\n",
+    "\n",
+    "# Multi-tone Tests: Decaying Echo\n",
+    "ref = get_ref(120)\n",
+    "ref_room = convolve(ref, get_h('decaying'))\n",
+    "background_noise = silence(samples=len(ref_room))\n",
+    "near_end = get_near_end(60, frequencies=[500, 1500, 3000])\n",
+    "near_end = np.concatenate((background_noise[:-len(near_end)], background_noise[-len(near_end):] + near_end))\n",
+    "AudioIn = ref_room + near_end\n",
+    "AudioRef = ref_room\n",
+    "write_audio('multitone', 'decaying', AudioIn, AudioRef)\n",
+    "\n",
+    "plt.specgram(AudioIn, Fs=16000)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sample_rate = 16000\n",
+    "# Impulse Response Tests: Short Echo\n",
+    "ref = get_ref(120)\n",
+    "ref_room = convolve(ref, get_h('short'))\n",
+    "background_noise = silence(samples=len(ref_room))\n",
+    "near_end = background_noise\n",
+    "ref_room[60*sample_rate:] = 0\n",
+    "near_end[60*sample_rate] = -1\n",
+    "AudioIn = ref_room + near_end\n",
+    "AudioRef = ref_room\n",
+    "write_audio('impulseresponse', 'short', AudioIn, AudioRef)\n",
+    "\n",
+    "plt.specgram(AudioIn, Fs=16000)\n",
+    "plt.show()\n",
+    "\n",
+    "# Impulse Response Tests: Long Echo\n",
+    "ref = get_ref(120)\n",
+    "ref_room = convolve(ref, get_h('long'))\n",
+    "background_noise = silence(samples=len(ref_room))\n",
+    "near_end = background_noise\n",
+    "ref_room[60*sample_rate:] = 0\n",
+    "near_end[60*sample_rate] = -1\n",
+    "AudioIn = ref_room + near_end\n",
+    "AudioRef = ref_room\n",
+    "write_audio('impulseresponse', 'long', AudioIn, AudioRef)\n",
+    "\n",
+    "plt.specgram(AudioIn, Fs=16000)\n",
+    "plt.show()\n",
+    "\n",
+    "# Impulse Response Tests: Decaying Echo\n",
+    "ref = get_ref(120)\n",
+    "ref_room = convolve(ref, get_h('decaying'))\n",
+    "background_noise = silence(samples=len(ref_room))\n",
+    "near_end = background_noise\n",
+    "ref_room[60*sample_rate:] = 0\n",
+    "near_end[60*sample_rate] = -1\n",
+    "AudioIn = ref_room + near_end\n",
+    "AudioRef = ref_room\n",
+    "write_audio('impulseresponse', 'decaying', AudioIn, AudioRef)\n",
+    "\n",
+    "plt.specgram(AudioIn, Fs=16000)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "# Simple Tests: Short Echo\n",
+    "ref = get_ref(120)\n",
+    "ref_room = convolve(ref, get_h('excessive'))\n",
+    "background_noise = silence(samples=len(ref_room))\n",
+    "near_end = get_near_end(60)\n",
+    "near_end = np.concatenate((background_noise[:-len(near_end)], background_noise[-len(near_end):] + near_end))\n",
+    "AudioIn = ref_room + near_end\n",
+    "AudioRef = ref_room\n",
+    "write_audio('excessive', 'excessive', AudioIn, AudioRef)\n",
+    "\n",
+    "plt.specgram(AudioIn, Fs=16000)\n",
+    "plt.show()"
+   ]
+  }
+ ],
+ "metadata": {
+  "git": {
+   "suppress_outputs": true
+  },
+  "kernelspec": {
+   "display_name": "Python 2",
+   "language": "python",
+   "name": "python2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.15"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
\ No newline at end of file
diff --git a/test/lib_aec/test_aec_spec/CMakeLists.txt b/test/lib_aec/test_aec_spec/CMakeLists.txt
new file mode 100644
index 000000000..847500806
--- /dev/null
+++ b/test/lib_aec/test_aec_spec/CMakeLists.txt
@@ -0,0 +1,100 @@
+## App name
+set( APP_NAME  test_aec_spec )
+
+# Auto-generate task distribution scheme and top level config files
+
+if( NOT ${Python3_FOUND} )
+  message(FATAL_ERROR "Python3 not found for running . ") 
+endif()
+
+set( GEN_SCHEDULE_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/../shared_src/python/generate_task_distribution_scheme.py )
+set( AUTOGEN_DIR ${CMAKE_CURRENT_BINARY_DIR}/src.autogen )
+set( AUTOGEN_SOURCES ${AUTOGEN_DIR}/aec_task_distribution.c )
+set( AUTOGEN_INCLUDES ${AUTOGEN_DIR}/aec_task_distribution.h ${AUTOGEN_DIR}/aec_config.h)
+
+message(STATUS "${APP_NAME} aec build config:  ${TEST_AEC_SPEC_BUILD_CONFIG}" )
+set( GEN_SCHEDULE_SCRIPT_BYPRODUCTS ${AUTOGEN_SOURCES} ${AUTOGEN_INCLUDES} )
+
+unset(GEN_SCHEDULE_SCRIPT_ARGS) 
+list(APPEND GEN_SCHEDULE_SCRIPT_ARGS --out-dir ${AUTOGEN_DIR})
+list(APPEND GEN_SCHEDULE_SCRIPT_ARGS --config ${TEST_AEC_SPEC_BUILD_CONFIG})
+
+file(MAKE_DIRECTORY ${AUTOGEN_DIR})
+
+add_custom_command(
+OUTPUT ${GEN_SCHEDULE_SCRIPT_BYPRODUCTS}
+COMMAND ${Python3_EXECUTABLE} ${GEN_SCHEDULE_SCRIPT} ${GEN_SCHEDULE_SCRIPT_ARGS}
+COMMENT "Generating AEC task distribution and top level config" )
+
+## Depends on libraries
+list( APPEND  DEP_LIBS_XCORE  ""  )
+
+list( APPEND  DEP_LIBS        
+    lib_xs3_math
+    lib_aec 
+    ${DEP_LIBS_${CMAKE_SYSTEM_NAME}}
+)
+
+list( APPEND  DEP_LIBS ${DEP_LIBS_${CMAKE_SYSTEM_NAME}} )
+
+## Sources
+file( GLOB SOURCES_C  ${CMAKE_CURRENT_SOURCE_DIR}/../test_wav_aec/src/*.c )
+file( GLOB SOURCES_XC  ${CMAKE_CURRENT_SOURCE_DIR}/../test_wav_aec/src/*.xc )
+if ( XCORE )
+    file( GLOB SOURCES_AEC_PROCESS_FRAME ${SHARED_SRC_PATH}/aec/*.c )
+else()
+    ## Only 1 thread process_frame() builds for x86
+    file( GLOB SOURCES_AEC_PROCESS_FRAME ${SHARED_SRC_PATH}/aec/aec_process_frame_1thread.c )
+endif()
+file( GLOB_RECURSE XSCOPE_FILEIO_SOURCES ${XSCOPE_FILEIO_PATH}/src/*.c )
+file( GLOB_RECURSE AUDIO_TEST_TOOLS_SOURCES ${DEPS_ROOT}/audio_test_tools/audio_test_tools/src/burners.S )
+file( GLOB SOURCES_FILE_UTILS ${SHARED_SRC_PATH}/file_utils/*.c )
+
+list( APPEND  SOURCES ${SOURCES_C} ${SOURCES_AEC_PROCESS_FRAME} ${SOURCES_XC} ${XSCOPE_FILEIO_SOURCES} ${SOURCES_FILE_UTILS} ${AUDIO_TEST_TOOLS_SOURCES} ${AUTOGEN_SOURCES} )
+list( APPEND  INCLUDES src ${SHARED_SRC_PATH}/aec ${SHARED_SRC_PATH}/file_utils ${AUTOGEN_DIR} ${XSCOPE_FILEIO_PATH} ${XSCOPE_FILEIO_PATH}/api )
+
+# set( XSCOPE_CONFIG config.xscope )
+get_filename_component(XSCOPE_CONFIG config.xscope ABSOLUTE)
+
+## Compile flags
+unset(COMPILE_FLAGS)
+unset(COMPILE_FLAGS_XCORE)
+
+list(APPEND   COMPILE_FLAGS_XCORE  -DTEST_WAV_XSCOPE=1 )
+
+
+##Linker flags
+unset(LINKER_FLAGS)
+list( APPEND  LINKER_FLAGS  "" )
+
+unset(LINKER_FLAGS_XCORE)
+list( APPEND  LINKER_FLAGS_XCORE  "-target=${XCORE_TARGET}"     )
+list( APPEND  LINKER_FLAGS_XCORE  "-report"                     )
+list( APPEND  LINKER_FLAGS_XCORE  "${XSCOPE_CONFIG}"            )
+
+
+list( APPEND  LINKER_FLAGS ${LINKER_FLAGS_${CMAKE_SYSTEM_NAME}} )
+list( APPEND  COMPILE_FLAGS ${COMPILE_FLAGS_${CMAKE_SYSTEM_NAME}} ${LINKER_FLAGS_${CMAKE_SYSTEM_NAME}} )
+
+#########
+## executable output directory
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
+
+add_executable( ${APP_NAME} ${SOURCES} )
+
+target_include_directories( ${APP_NAME} PRIVATE ${INCLUDES} )
+
+target_link_libraries( ${APP_NAME} ${DEP_LIBS})
+
+target_compile_options( ${APP_NAME} PRIVATE ${COMPILE_FLAGS} )
+
+#(because otherwise the set_target_properties command fails)
+string(REPLACE ";" " " LINKER_FLAGS_STR "${LINKER_FLAGS}")
+set_target_properties( ${APP_NAME} PROPERTIES LINK_FLAGS "${LINKER_FLAGS_STR}" )
+
+if ( XCORE )
+    set_target_properties( ${APP_NAME} PROPERTIES
+      SUFFIX ".xe"
+      LINK_DEPENDS  ${XSCOPE_CONFIG}
+      )
+endif()
diff --git a/test/lib_aec/test_aec_spec/aec_test_utils.py b/test/lib_aec/test_aec_spec/aec_test_utils.py
new file mode 100644
index 000000000..5e16a3890
--- /dev/null
+++ b/test/lib_aec/test_aec_spec/aec_test_utils.py
@@ -0,0 +1,187 @@
+# Copyright 2018-2021 XMOS LIMITED.
+# This Software is subject to the terms of the XMOS Public Licence: Version 1.
+from builtins import str
+from builtins import range
+import sys
+import os.path
+import configparser
+import numpy as np
+import scipy.io.wavfile
+import scipy.signal.windows
+# export PYTHONPATH=$PYTHONPATH:audio_test_tools/python
+from audio_generation import (get_filenames, get_magnitude,
+                              get_suppressed_magnitude, db)
+
+
+def files_exist(*args):
+    for filename in args:
+        if not os.path.isfile(filename):
+            return False
+    return True
+
+
+def read_config(testname, filename='test_config.cfg'):
+    parser = configparser.ConfigParser()
+    parser.read(filename)
+    cfg = {}
+    cfg['settle_time'] = parser.getint(testname, "settle_time")
+    cfg['start_fft'] = parser.getint(testname, "start_fft")
+    cfg['end_fft'] = parser.getint(testname, "end_fft")
+    cfg['ignore_exclusion'] = parser.getboolean(testname, "ignore_exclusion")
+    cfg['headroom'] = [int(x) for x in parser.get(testname, "headroom").split(',')]
+    cfg['echo'] = [x.strip() for x in parser.get(testname, "echo").split(',')]
+    cfg['reference'] = [x.strip() for x in parser.get(testname, "reference").split(',')]
+    try:
+        frequencies = [int(x) for x in parser.get(testname, "frequencies").split(',')]
+        cfg['frequencies'] = frequencies
+    except configparser.NoOptionError:
+        pass
+    return cfg
+
+
+def read_wav(filename):
+    rate, data = scipy.io.wavfile.read(filename)
+    return data.astype(float) / np.iinfo(data.dtype).max
+
+
+def get_excluded_tests():
+    excluded_tests = []
+    with open('excluded_tests.txt', 'r') as f:
+        for line in f.readlines():
+            line = line.strip()
+            excluded_tests.append(line)
+    return list(set(excluded_tests))
+
+
+def get_test_instances(testname, in_dir, out_dir):
+    """ Gets all generated tests by checking test_config.cfg and the files
+        present in the in_dir.
+
+        Any tests in excluded_tests.txt will not be included."""
+    tests = []
+    excluded_tests = get_excluded_tests()
+    cfg = read_config(testname)
+    for headroom in cfg['headroom']:
+        for echo_type in cfg['echo']:
+            for ref_type in cfg['reference']:
+                test_id = ",".join([testname, echo_type, ref_type,
+                                    str(headroom)])
+                in_filename, ref_filename, out_filename\
+                    = get_filenames(testname, echo_type, ref_type, headroom)
+                in_filename = os.path.join(in_dir, in_filename + ".wav")
+                ref_filename = os.path.join(in_dir, ref_filename + ".wav")
+                out_filename = os.path.join(out_dir,
+                                               out_filename + ".wav")
+                test_dict = {'id' : test_id,
+                             'test_type' : testname,
+                             'settle_time' : cfg['settle_time'],
+                             'headroom' : headroom,
+                             'echo' : echo_type,
+                             'reference' : ref_type,
+                             'in_filename' : in_filename,
+                             'ref_filename' : ref_filename,
+                             'out_filename' : out_filename}
+                if files_exist(in_filename, ref_filename):
+                    tests.append(test_dict)
+    return tests
+
+
+def get_section(testid, sections):
+    best_section = 'DEFAULT'
+    best_precision = np.iinfo(np.int32).min
+    testid = testid.split(',')
+    for section in sections:
+        section = section.split(',')
+        precision = -len([x for x in section if x == '*'])
+        match = True
+        for i in range(len(section)):
+            if section[i] == testid[i] or section[i] == '*':
+                continue
+            match = False
+        if match and precision > best_precision:
+            best_precision = precision
+            best_section = ','.join(section)
+    return best_section
+
+
+def get_criteria(testid, filename="criteria.cfg"):
+    parser = configparser.ConfigParser()
+    parser.read(filename)
+    criteria = {}
+    section = get_section(testid, parser.sections())
+    for key, val in parser.items(section):
+        criteria[key] = val
+    return criteria
+
+
+def get_h_hat_impulse_response(h_hat, y_channel, x_channel):
+    """Gets the impulse response of h_hat.
+
+    h_hat is an array internal to the aec with a shape as follows:
+    (y_channel_count, x_channel_count, max_phase_count, f_bin_count)
+
+    Args:
+        h_hat: h_hat array
+        y_channel: y_channel to plot
+        x_channel: x_channel to plot
+
+    Returns:
+        Impulse response of h_hat for channel pair (y_channel, x_channel)
+    """
+
+    y_channel_count = h_hat.shape[0]
+    x_channel_count = h_hat.shape[1]
+    max_phase_count = h_hat.shape[2]
+    f_bin_count     = h_hat.shape[3]
+    frame_advance = 240
+    h_hat_ir = np.zeros((max_phase_count * frame_advance,))
+
+    for phase in range(max_phase_count):
+        phase_ir = np.fft.irfft(h_hat[y_channel][x_channel][phase])
+        start   = frame_advance *  phase
+        end     = frame_advance * (phase + 1)
+        h_hat_ir[start:end] = phase_ir[:frame_advance]
+
+    return h_hat_ir
+
+
+def check_aec_output(audio_in, audio_ref, audio_out, start_s, end_s, criteria,
+                     frequencies, Fs=16000, debug=True):
+    success = True
+    start = Fs * start_s
+    end = Fs * end_s
+    window = scipy.signal.windows.hann(end - start, sym=True)
+    In = np.abs(np.fft.rfft(audio_in[start:end] * window))
+    Ref = np.abs(np.fft.rfft(audio_ref[start:end] * window))
+    Out = np.abs(np.fft.rfft(audio_out[start:end] * window))
+    # Check for near-end frequencies
+    for f in frequencies:
+        in_mag = get_magnitude(f, In, Fs, 10, normalise=True)
+        out_mag = get_magnitude(f, Out, Fs, 10, normalise=True)
+        db_out = db(out_mag, in_mag)
+        suppression_max = int(criteria['near_end_max_suppression'])
+        if db_out < suppression_max:
+            print("Check failed! Near-end Frequency: %d, db: %f (< %ddB)"\
+                  % (f, db_out, suppression_max))
+            success = False
+        if debug:
+            print("freq: %d, in: %f, out: %f, db: %f"\
+                  % (f, in_mag, out_mag, db_out))
+    # Check magnitude of suppressed frequencies
+    band_min = int(criteria['suppression_band_min'])
+    band_max = int(criteria['suppression_band_max'])
+    max_suppressed_magnitude, freq = get_suppressed_magnitude(frequencies, Out,
+                                                              Fs, 10,
+                                                              band_min=band_min,
+                                                              band_max=band_max)
+    suppression_min = int(criteria['far_end_min_suppression'])
+    db_suppressed = db(max_suppressed_magnitude, np.max(In))
+    if db_suppressed > suppression_min:
+        print("Check failed! Suppression at freq %d is %fdB (> %ddB)"\
+                % (freq, db_suppressed, suppression_min))
+        success = False
+    else:
+        print("Suppression check passed.")
+    if success:
+        print("Check passed!")
+    return success
diff --git a/test/lib_aec/test_aec_spec/all_tests.txt b/test/lib_aec/test_aec_spec/all_tests.txt
new file mode 100644
index 000000000..3fdbccfb5
--- /dev/null
+++ b/test/lib_aec/test_aec_spec/all_tests.txt
@@ -0,0 +1,82 @@
+bandlimited,random,bandlimited,4
+excessive,excessive,noise,8
+impulseresponse,decaying,noise,2
+impulseresponse,excessive,noise,2
+impulseresponse,long,noise,2
+impulseresponse,short,noise,2
+multitone,decaying,continuous,2
+multitone,decaying,continuous,4
+multitone,decaying,continuous,8
+multitone,decaying,discrete,2
+multitone,decaying,discrete,4
+multitone,decaying,discrete,8
+multitone,decaying,noise,2
+multitone,decaying,noise,4
+multitone,decaying,noise,8
+multitone,decaying,single,2
+multitone,decaying,single,4
+multitone,decaying,single,8
+multitone,long,continuous,2
+multitone,long,continuous,4
+multitone,long,continuous,8
+multitone,long,discrete,2
+multitone,long,discrete,4
+multitone,long,discrete,8
+multitone,long,noise,2
+multitone,long,noise,4
+multitone,long,noise,8
+multitone,long,single,2
+multitone,long,single,4
+multitone,long,single,8
+multitone,short,continuous,2
+multitone,short,continuous,4
+multitone,short,continuous,8
+multitone,short,discrete,2
+multitone,short,discrete,4
+multitone,short,discrete,8
+multitone,short,noise,2
+multitone,short,noise,4
+multitone,short,noise,8
+multitone,short,single,2
+multitone,short,single,4
+multitone,short,single,8
+simple,decaying,continuous,2
+simple,decaying,continuous,4
+simple,decaying,continuous,8
+simple,decaying,discrete,2
+simple,decaying,discrete,4
+simple,decaying,discrete,8
+simple,decaying,noise,2
+simple,decaying,noise,4
+simple,decaying,noise,8
+simple,decaying,single,2
+simple,decaying,single,4
+simple,decaying,single,8
+simple,long,continuous,2
+simple,long,continuous,4
+simple,long,continuous,8
+simple,long,discrete,2
+simple,long,discrete,4
+simple,long,discrete,8
+simple,long,noise,2
+simple,long,noise,4
+simple,long,noise,8
+simple,long,single,2
+simple,long,single,4
+simple,long,single,8
+simple,short,continuous,2
+simple,short,continuous,4
+simple,short,continuous,8
+simple,short,discrete,2
+simple,short,discrete,4
+simple,short,discrete,8
+simple,short,noise,2
+simple,short,noise,4
+simple,short,noise,8
+simple,short,single,2
+simple,short,single,4
+simple,short,single,8
+smallimpulseresponse,decaying,noise,2
+smallimpulseresponse,excessive,noise,2
+smallimpulseresponse,long,noise,2
+smallimpulseresponse,short,noise,2
diff --git a/test/lib_aec/test_aec_spec/build.sh b/test/lib_aec/test_aec_spec/build.sh
new file mode 100755
index 000000000..085995722
--- /dev/null
+++ b/test/lib_aec/test_aec_spec/build.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+# Include test utils
+. utils.sh
+
+build() {
+    pushd $(read_config aec_xc_dir)
+    threads=$(read_config threads)
+    x_channel_count=$(read_config x_channel_count)
+    y_channel_count=$(read_config y_channel_count)
+    phase_count=$(read_config phases)
+    sf_phase_count=$(read_config shadow_filter_phases)
+    echo "$y_channel_count"
+    echo "$phase_count"
+    echo "$sf_phase_count"
+    waf configure clean build --aec-config="$threads $x_channel_count $y_channel_count $phase_count $sf_phase_count"
+    build_error=$?
+    popd
+    return $build_error
+}
+
+#setup_env
+build
+build_error=$?
+exit $build_error
diff --git a/test/lib_aec/test_aec_spec/config.xscope b/test/lib_aec/test_aec_spec/config.xscope
new file mode 100644
index 000000000..0d3b65e4c
--- /dev/null
+++ b/test/lib_aec/test_aec_spec/config.xscope
@@ -0,0 +1,10 @@
+<xSCOPEconfig ioMode="basic" enabled="true">
+  <Probe name="open_file" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="read_bytes" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="write_setup" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="write_bytes" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="seek" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="tell" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="host_quit" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+</xSCOPEconfig>
+
diff --git a/test/lib_aec/test_aec_spec/criteria.cfg b/test/lib_aec/test_aec_spec/criteria.cfg
new file mode 100644
index 000000000..31369576b
--- /dev/null
+++ b/test/lib_aec/test_aec_spec/criteria.cfg
@@ -0,0 +1,18 @@
+[DEFAULT]
+near_end_max_suppression = -5
+far_end_min_suppression = -20
+suppression_band_min = 0
+suppression_band_max = 8000
+
+[*,*,noise,*]
+suppression_band_min = 0
+suppression_band_max = 7500
+
+[*,*,continuous,*]
+near_end_max_suppression = -6
+
+# Example entry:
+#[simple,short,noise,2]
+#near_end_max_suppression = -2
+
+
diff --git a/test/lib_aec/test_aec_spec/excluded_tests.txt b/test/lib_aec/test_aec_spec/excluded_tests.txt
new file mode 100644
index 000000000..9f4d30b17
--- /dev/null
+++ b/test/lib_aec/test_aec_spec/excluded_tests.txt
@@ -0,0 +1,52 @@
+excessive,excessive,noise,8
+multitone,decaying,discrete,2
+multitone,decaying,discrete,4
+multitone,decaying,discrete,8
+multitone,decaying,single,2
+multitone,decaying,single,4
+multitone,decaying,single,8
+multitone,long,discrete,2
+multitone,long,discrete,4
+multitone,long,discrete,8
+multitone,long,single,2
+multitone,long,single,4
+multitone,short,discrete,2
+multitone,short,discrete,4
+multitone,short,discrete,8
+multitone,short,single,2
+multitone,short,single,4
+simple,decaying,discrete,2
+simple,decaying,discrete,4
+simple,decaying,discrete,8
+simple,decaying,single,2
+simple,decaying,single,4
+simple,decaying,single,8
+simple,long,discrete,2
+simple,long,discrete,4
+simple,long,discrete,8
+simple,long,single,2
+simple,long,single,4
+simple,short,discrete,2
+simple,short,discrete,4
+simple,short,discrete,8
+simple,short,noise,4
+simple,short,single,2
+simple,short,single,4
+multitone,decaying,noise,4
+multitone,decaying,noise,8
+multitone,long,noise,8
+multitone,short,noise,8
+multitone,short,noise,2
+multitone,long,noise,2
+multitone,long,noise,4
+multitone,long,noise,8
+multitone,short,noise,4
+multitone,short,noise,8
+simple,short,noise,8
+simple,decaying,noise,8
+simple,long,noise,8
+simple,long,noise,2
+simple,short,noise,2
+simple,short,noise,8
+simple,long,noise,8
+simple,long,noise,4
diff --git a/test/lib_aec/test_aec_spec/excluded_tests_quick.txt b/test/lib_aec/test_aec_spec/excluded_tests_quick.txt
new file mode 100644
index 000000000..e8ece1e51
--- /dev/null
+++ b/test/lib_aec/test_aec_spec/excluded_tests_quick.txt
@@ -0,0 +1,73 @@
+excessive,excessive,noise,8
+multitone,decaying,continuous,2
+multitone,decaying,continuous,4
+multitone,decaying,continuous,8
+multitone,decaying,discrete,2
+multitone,decaying,discrete,4
+multitone,decaying,discrete,8
+multitone,decaying,noise,2
+multitone,decaying,noise,8
+multitone,decaying,single,2
+multitone,decaying,single,4
+multitone,decaying,single,8
+multitone,long,continuous,2
+multitone,long,continuous,4
+multitone,long,continuous,8
+multitone,long,discrete,2
+multitone,long,discrete,4
+multitone,long,discrete,8
+multitone,long,noise,2
+multitone,long,noise,4
+multitone,long,noise,8
+multitone,long,single,2
+multitone,long,single,4
+multitone,long,single,8
+multitone,short,continuous,2
+multitone,short,continuous,4
+multitone,short,continuous,8
+multitone,short,discrete,2
+multitone,short,discrete,4
+multitone,short,discrete,8
+multitone,short,noise,2
+multitone,short,noise,4
+multitone,short,noise,8
+multitone,short,single,2
+multitone,short,single,4
+multitone,short,single,8
+simple,decaying,continuous,2
+simple,decaying,continuous,4
+simple,decaying,continuous,8
+simple,decaying,discrete,2
+simple,decaying,discrete,4
+simple,decaying,discrete,8
+simple,decaying,noise,2
+simple,decaying,noise,4
+simple,decaying,noise,8
+simple,decaying,single,2
+simple,decaying,single,4
+simple,decaying,single,8
+simple,long,continuous,2
+simple,long,continuous,4
+simple,long,continuous,8
+simple,long,discrete,2
+simple,long,discrete,4
+simple,long,discrete,8
+simple,long,noise,2
+simple,long,noise,4
+simple,long,noise,8
+simple,long,single,2
+simple,long,single,4
+simple,long,single,8
+simple,short,continuous,2
+simple,short,continuous,4
+simple,short,continuous,8
+simple,short,discrete,2
+simple,short,discrete,4
+simple,short,discrete,8
+simple,short,noise,2
+simple,short,noise,4
+simple,short,noise,8
+simple,short,single,2
+simple,short,single,4
+simple,short,single,8
+multitone,decaying,noise,4
diff --git a/test/lib_aec/test_aec_spec/generate_audio.py b/test/lib_aec/test_aec_spec/generate_audio.py
new file mode 100644
index 000000000..8e6b67a2d
--- /dev/null
+++ b/test/lib_aec/test_aec_spec/generate_audio.py
@@ -0,0 +1,258 @@
+# Copyright 2018-2021 XMOS LIMITED.
+# This Software is subject to the terms of the XMOS Public Licence: Version 1.
+from builtins import str
+import configparser
+import argparse
+import numpy as np
+from scipy.signal import convolve
+
+from aec_test_utils import read_config, get_excluded_tests
+from audio_generation import get_ref, get_noise, get_near_end,\
+                             get_h, DEFAULT_SAMPLE_RATE, get_headroom_divisor
+from audio_generation import write_audio as write_audio_ag
+from timeit import default_timer as timer
+
+excluded_tests = get_excluded_tests()
+
+
+def is_excluded(testname, headroom, echo_type, ref_type):
+    test_id = ','.join([testname, echo_type, ref_type, str(headroom)])
+    return test_id in excluded_tests
+
+
+def write_audio(*args, **kwargs):
+    start_time = timer()
+    write_audio_ag(*args, **kwargs)
+    end_time = timer()
+    print("Time to write audio: %f" % (end_time - start_time))
+
+
+def generate_simple_tests(audio_dir='spec_audio', settings=None):
+    cfg = read_config('simple')
+    settle_time = cfg['settle_time']
+    headrooms = cfg['headroom']
+    echos = cfg['echo']
+    references = cfg['reference']
+    if settings:
+        echos = [settings[0]]
+        references = [settings[1]]
+        headrooms = [int(settings[2])]
+    total_time = 2*settle_time
+    for headroom in headrooms:
+        for echo_type in echos:
+            for ref_type in references:
+                if  not cfg['ignore_exclusion']\
+                    and is_excluded('simple', headroom, echo_type, ref_type):
+                    continue
+                timer_start = timer()
+                ref = get_ref(total_time, ref_type)
+                background_noise = get_noise(samples=len(ref), db=-150)
+                near_end = get_near_end(settle_time,
+                                        frequencies=cfg['frequencies'])
+                near_end = np.concatenate(
+                    (background_noise[:-len(near_end)],
+                     background_noise[-len(near_end):] + near_end))
+                ref_room = convolve(ref, get_h(echo_type))[:len(ref)]
+                AudioIn = ref_room + near_end
+                AudioRef = ref
+                write_audio('simple', echo_type, ref_type, headroom, AudioIn,
+                            AudioRef, audio_dir=audio_dir)
+                timer_end = timer()
+                print("Total time to generate: %f" % (timer_end - timer_start))
+
+
+
+def generate_multitone_tests(audio_dir='spec_audio', settings=None):
+    cfg = read_config('multitone')
+    settle_time = cfg['settle_time']
+    headrooms = cfg['headroom']
+    echos = cfg['echo']
+    references = cfg['reference']
+    if settings:
+        echos = [settings[0]]
+        references = [settings[1]]
+        headrooms = [int(settings[2])]
+    total_time = 2*settle_time
+    for headroom in headrooms:
+        for echo_type in echos:
+            for ref_type in references:
+                if  not cfg['ignore_exclusion']\
+                    and is_excluded('multitone', headroom, echo_type, ref_type):
+                    continue
+                timer_start = timer()
+                ref = get_ref(total_time, ref_type)
+                background_noise = get_noise(samples=len(ref), db=-150)
+                near_end = get_near_end(settle_time,
+                                        frequencies=cfg['frequencies'])
+                near_end = np.concatenate(
+                    (background_noise[:-len(near_end)],
+                     background_noise[-len(near_end):] + near_end))
+                ref_room = convolve(ref, get_h(echo_type))[:len(ref)]
+                AudioIn = ref_room + near_end
+                AudioRef = ref
+                write_audio('multitone', echo_type, ref_type, headroom, AudioIn,
+                            AudioRef, audio_dir=audio_dir)
+                timer_end = timer()
+                print("Total time to generate: %f" % (timer_end - timer_start))
+
+
+def generate_impulseresponse_tests(audio_dir='spec_audio', settings=None):
+    cfg = read_config('impulseresponse')
+    settle_time = cfg['settle_time']
+    headrooms = cfg['headroom']
+    echos = cfg['echo']
+    references = cfg['reference']
+    if settings:
+        echos = [settings[0]]
+        references = [settings[1]]
+        headrooms = [int(settings[2])]
+    total_time = 2*settle_time
+    for headroom in headrooms:
+        for echo_type in echos:
+            for ref_type in references:
+                if  not cfg['ignore_exclusion']\
+                    and is_excluded('impulseresponse', headroom, echo_type,
+                                    ref_type):
+                    continue
+                ref = get_ref(total_time, ref_type)
+                background_noise = get_noise(samples=len(ref), db=-90)
+                near_end = background_noise
+                ref[settle_time * DEFAULT_SAMPLE_RATE:] = \
+                        background_noise[:-settle_time * DEFAULT_SAMPLE_RATE]
+                transfer_function = get_h(echo_type, normalise=False)
+                ref_room = convolve(ref, transfer_function)[:len(ref)]
+                AudioIn = ref_room + near_end
+                AudioRef = ref
+                divisor = get_headroom_divisor(AudioIn, headroom)
+                AudioIn = AudioIn / divisor
+                AudioRef = AudioRef / divisor
+                AudioRef[(settle_time + 1) * DEFAULT_SAMPLE_RATE] = -0.99
+                write_audio('impulseresponse', echo_type, ref_type, headroom,
+                            AudioIn, AudioRef, audio_dir=audio_dir,
+                            adjust_headroom=False)
+
+
+def generate_smallimpulseresponse_tests(audio_dir='spec_audio', settings=None):
+    cfg = read_config('smallimpulseresponse')
+    settle_time = cfg['settle_time']
+    headrooms = cfg['headroom']
+    echos = cfg['echo']
+    references = cfg['reference']
+    if settings:
+        echos = [settings[0]]
+        references = [settings[1]]
+        headrooms = [int(settings[2])]
+    total_time = 2*settle_time
+    for headroom in headrooms:
+        for echo_type in echos:
+            for ref_type in references:
+                if  not cfg['ignore_exclusion']\
+                    and is_excluded('smallimpulseresponse', headroom, echo_type,
+                                    ref_type):
+                    continue
+                ref = get_ref(total_time, ref_type)
+                background_noise = get_noise(samples=len(ref), db=-90)
+                near_end = background_noise
+                ref[settle_time * DEFAULT_SAMPLE_RATE:] = \
+                        background_noise[:-settle_time * DEFAULT_SAMPLE_RATE]
+                transfer_function = get_h(echo_type, normalise=False)
+                ref_room = convolve(ref, transfer_function)[:len(ref)]
+                AudioIn = ref_room + near_end
+                AudioRef = ref
+                divisor = get_headroom_divisor(AudioIn, headroom)
+                AudioIn = AudioIn / divisor
+                AudioRef = AudioRef / divisor
+                AudioRef[(settle_time + 1) * DEFAULT_SAMPLE_RATE] = -0.99 / (1<<10)
+                write_audio('smallimpulseresponse', echo_type, ref_type, headroom,
+                            AudioIn, AudioRef, audio_dir=audio_dir,
+                            adjust_headroom=False)
+
+
+def generate_excessive_tests(audio_dir='spec_audio', settings=None):
+    cfg = read_config('excessive')
+    settle_time = cfg['settle_time']
+    headrooms = cfg['headroom']
+    echos = cfg['echo']
+    references = cfg['reference']
+    if settings:
+        echos = [settings[0]]
+        references = [settings[1]]
+        headrooms = [int(settings[2])]
+    total_time = 2*settle_time
+    for headroom in headrooms:
+        for echo_type in echos:
+            for ref_type in references:
+                if  not cfg['ignore_exclusion']\
+                    and is_excluded('excessive', headroom, echo_type, ref_type):
+                    continue
+                ref = get_ref(total_time, ref_type)
+                background_noise = get_noise(samples=len(ref), db=-150)
+                near_end = get_near_end(settle_time,
+                                        frequencies=cfg['frequencies'])
+                near_end = np.concatenate((background_noise[:-len(near_end)],
+                                           background_noise[-len(near_end):] + near_end))
+                ref_room = convolve(ref, get_h(echo_type))[:len(ref)]
+                AudioIn = ref_room + near_end
+                AudioRef = ref
+                write_audio('excessive', echo_type, ref_type, headroom,
+                            AudioIn, AudioRef, audio_dir=audio_dir)
+
+
+def generate_bandlimited_tests(audio_dir='spec_audio', settings=None):
+    cfg = read_config('bandlimited')
+    settle_time = cfg['settle_time']
+    headrooms = cfg['headroom']
+    echos = cfg['echo']
+    references = cfg['reference']
+    if settings:
+        echos = [settings[0]]
+        references = [settings[1]]
+        headrooms = [int(settings[2])]
+    total_time = 2*settle_time
+    for headroom in headrooms:
+        for echo_type in echos:
+            for ref_type in references:
+                if  not cfg['ignore_exclusion']\
+                    and is_excluded('bandlimited', headroom, echo_type, ref_type):
+                    continue
+                ref = get_ref(total_time, ref_type)
+                background_noise = get_noise(samples=len(ref), db=-150)
+                near_end = get_near_end(settle_time,
+                                        frequencies=cfg['frequencies'])
+                near_end = np.concatenate((background_noise[:-len(near_end)],
+                                           background_noise[-len(near_end):] + near_end))
+                ref_room = convolve(ref, get_h(echo_type))[:len(ref)]
+                AudioIn = ref_room + near_end
+                AudioRef = ref
+                write_audio('bandlimited', echo_type, ref_type, headroom,
+                            AudioIn, AudioRef, audio_dir=audio_dir)
+
+
+def main():
+    global excluded_tests
+    config_parser = configparser.ConfigParser()
+    config_parser.read("parameters.cfg")
+    in_dir = config_parser.get("Folders", "in_dir")
+
+    parser = argparse.ArgumentParser(description='Generate AEC test audio files.')
+    parser.add_argument('--audio-dir', type=str, help='Directory for wav outputs',
+                        default=in_dir)
+    parser.add_argument('--sub-test', type=str, default=None,
+                        help="""Specify a specific test to generate e.g.
+                                --sub-test simple,short,discrete,2""")
+    args = parser.parse_args()
+    if args.sub_test:
+        excluded_tests = []
+        testname, echo, ref, headroom = args.sub_test.split(',')
+        gen_function = globals()["generate_%s_tests" % testname]
+        gen_function(audio_dir=args.audio_dir, settings=[echo, ref, headroom])
+    else:
+        generate_simple_tests(audio_dir=args.audio_dir)
+        generate_multitone_tests(audio_dir=args.audio_dir)
+        generate_impulseresponse_tests(audio_dir=args.audio_dir)
+        generate_smallimpulseresponse_tests(audio_dir=args.audio_dir)
+        generate_excessive_tests(audio_dir=args.audio_dir)
+        generate_bandlimited_tests(audio_dir=args.audio_dir)
+
+if __name__ == "__main__":
+    main()
diff --git a/test/lib_aec/test_aec_spec/make_dirs.sh b/test/lib_aec/test_aec_spec/make_dirs.sh
new file mode 100755
index 000000000..b13899044
--- /dev/null
+++ b/test/lib_aec/test_aec_spec/make_dirs.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+# Include test utils
+. utils.sh
+
+test_dirs="$(read_config in_dir)"
+test_dirs="$test_dirs $(read_config out_dir)"
+test_dirs="$test_dirs $(read_config results_dir)"
+test_dirs="$test_dirs $(read_config results_dir)/plots/fail"
+test_dirs="$test_dirs $(read_config results_dir)/logs/fail"
+
+for dir in $test_dirs; do
+    echo $dir
+    mkdir -p $dir
+    if grep -q "clean" <<< $1; then
+        rm -r $dir/*
+    fi
+done
+
diff --git a/test/lib_aec/test_aec_spec/parameters.cfg b/test/lib_aec/test_aec_spec/parameters.cfg
new file mode 100644
index 000000000..57eea1019
--- /dev/null
+++ b/test/lib_aec/test_aec_spec/parameters.cfg
@@ -0,0 +1,16 @@
+[Binaries]
+aec_xc_dir = ../../../build/test/lib_aec/test_aec_spec
+
+[Config]
+x_channel_count = 1
+y_channel_count = 1
+phases = 20
+shadow_filter_phases = 10
+
+[XCBuild]
+threads = 2
+
+[Folders]
+in_dir = audio_in
+out_dir = audio_out
+results_dir = results
diff --git a/test/lib_aec/test_aec_spec/parse_results.py b/test/lib_aec/test_aec_spec/parse_results.py
new file mode 100644
index 000000000..269c0921c
--- /dev/null
+++ b/test/lib_aec/test_aec_spec/parse_results.py
@@ -0,0 +1,139 @@
+# Copyright 2018-2021 XMOS LIMITED.
+# This Software is subject to the terms of the XMOS Public Licence: Version 1.
+import configparser
+import subprocess
+import os.path
+import numpy as np
+from aec_test_utils import get_h_hat_impulse_response
+from plot_test import plot_test, plot_impulseresponse_test
+import shutil
+
+import xml.etree.ElementTree as ET
+
+parser = configparser.ConfigParser()
+parser.read("parameters.cfg")
+results_dir = parser.get("Folders", "results_dir")
+plot_dir_pass = os.path.join(results_dir, "plots")
+plot_dir_fail = os.path.join(results_dir, "plots/fail")
+log_dir_pass = os.path.join(results_dir, "logs")
+log_dir_fail = os.path.join(results_dir, "logs/fail")
+
+out_dir = parser.get("Folders", "out_dir")
+
+def get_tests(filename):
+    tree = ET.parse(filename)
+    test_cases = tree.findall(".//testcase")
+    tests = {}
+    for result in test_cases:
+        if not (result.find('skipped') is None):
+            # Test not run in test_check_output
+            continue
+        test = {}
+        for key_value in result.find("properties").findall('property'):
+            test[key_value.attrib['name']] = key_value.attrib['value']
+        failed = not (result.find('failure') is None)
+        test['failed'] = failed
+        try:
+            test['log'] = result.find('system-out').text
+        except AttributeError:
+            test['log'] = ""
+        test_type = test['test_type']
+        if not test_type in list(tests.keys()):
+            tests[test_type] = []
+        tests[test_type].append(test)
+    return tests
+
+
+def parse_simple_tests(tests):
+    failed_tests = []
+    for test in tests:
+        test_id = test['id']
+        plot_dir = plot_dir_pass
+        log_dir = log_dir_pass
+
+        if test['failed']:
+            failed_tests.append(test_id)
+            plot_dir = plot_dir_fail
+            log_dir = log_dir_fail
+
+        plot_filename = os.path.join(plot_dir, "%s.png" % (test_id))
+        plot_test(plot_filename, test['id'],
+                  test['in_filename'], test['ref_filename'],
+                  test['out_filename'], int(test['settle_time']))
+        log_filename = os.path.join(log_dir, "%s.log" % (test_id))
+        print("Log Filename: %s" % log_filename)
+        with open(log_filename, 'w') as f:
+            f.write(test['log'])
+    return failed_tests
+
+
+def get_h_hat(filename):
+    """Gets H_hat from XC H_hat dump
+
+    WARNING: This could be dangerous, the filename is assumed to be able
+             to be parsed as python
+    """
+    shutil.copy2(filename, "temp.py")
+    from temp import H_hat
+    assert H_hat is not None
+    return H_hat
+
+
+def parse_impulseresponse_tests(tests):
+    failed_tests = []
+    for test in tests:
+        test_id = test['id']
+        plot_dir = plot_dir_pass
+        log_dir = log_dir_pass
+
+        if test['failed']:
+            failed_tests.append(test_id)
+            plot_dir = plot_dir_fail
+            log_dir = log_dir_fail
+
+        plot_filename = os.path.join(plot_dir, "%s.png" % (test_id))
+        h_hat_filename = os.path.join(out_dir, test['id'] + "-h_hat.py")
+        h_hat = get_h_hat(h_hat_filename)
+        h_hat_ir = get_h_hat_impulse_response(h_hat, 0, 0)
+        plot_impulseresponse_test(plot_filename,
+                                  test['id'],
+                                  test['echo'],
+                                  h_hat_ir,
+                                  test['headroom'],
+                                  test['out_filename'],
+                                  int(test['settle_time']))
+        log_filename = os.path.join(log_dir, "%s.log" % (test_id))
+        print("Log Filename: %s" % log_filename)
+        with open(log_filename, 'w') as f:
+            f.write(test['log'])
+    return failed_tests
+
+
+if __name__ == "__main__":
+    tests = get_tests("results_check.xml")
+    failed_tests = []
+    total_tests = []
+    test_types = ['simple', 'multitone', 'impulseresponse',
+                  'smallimpulseresponse', 'excessive',
+                  'bandlimited']
+    for test_type in test_types and list(tests.keys()):
+        total_tests += [test['id'] for test in tests[test_type]]
+        if test_type in ['impulseresponse', 'smallimpulseresponse']:
+            failures = parse_impulseresponse_tests(tests[test_type])
+        else:
+            failures = parse_simple_tests(tests[test_type])
+        failed_tests += failures
+
+    failed_tests_filename = os.path.join(results_dir, "failed_tests.txt")
+    with open(failed_tests_filename, 'w') as f:
+        tests = failed_tests
+        tests.sort()
+        for line in tests:
+            f.write(line + '\n')
+
+    total_tests_filename = os.path.join(results_dir, "all_tests.txt")
+    total_tests = list(set(total_tests))
+    total_tests.sort()
+    with open(total_tests_filename, 'w') as f:
+        for line in total_tests:
+            f.write(line + '\n')
diff --git a/test/lib_aec/test_aec_spec/pipeline.sh b/test/lib_aec/test_aec_spec/pipeline.sh
new file mode 100755
index 000000000..17223c651
--- /dev/null
+++ b/test/lib_aec/test_aec_spec/pipeline.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+# Make directories
+echo "Creating directories..."
+./make_dirs.sh clean
+# Build
+echo "Building..."
+./build.sh
+# Generate Audio
+echo "Generating Audio..."
+if [ -z $1 ]; then
+    python generate_audio.py
+else
+    python generate_audio.py --sub-test $1
+fi
+# Process Audio
+echo "Processing Audio..."
+pytest -d -n=auto --junitxml=results_aec.xml test_process_audio.py
+# Check Audio
+echo "Checking AEC Output..."
+pytest -d -n=auto --junitxml=results_check.xml test_check_output.py
+# Parse Results
+echo "Parsing Results..."
+python parse_results.py
+# Evaluating Results
+echo "Evaluating Results..."
+pytest -d -n=auto --junitxml=results_final.xml test_evaluate_results.py
+echo "Done"
diff --git a/test/lib_aec/test_aec_spec/plot_test.py b/test/lib_aec/test_aec_spec/plot_test.py
new file mode 100644
index 000000000..c77b38767
--- /dev/null
+++ b/test/lib_aec/test_aec_spec/plot_test.py
@@ -0,0 +1,122 @@
+# Copyright 2018-2021 XMOS LIMITED.
+# This Software is subject to the terms of the XMOS Public Licence: Version 1.
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+from aec_test_utils import read_wav, get_h_hat_impulse_response
+from audio_generation import get_filenames, get_h
+
+
+def plot(test_id, audio_in, audio_ref, audio_out, start_time, end_time,
+         output_filename, sample_rate=16000):
+    start = start_time * sample_rate
+    end = end_time * sample_rate
+    plt.figure(figsize=(12,10))
+
+    plt.subplot(221)
+    plt.title("Spectrogram of AudioOut")
+    plt.specgram(audio_out, Fs=sample_rate, scale='dB')
+    plt.ylabel("Frequency (Hz)")
+    plt.xlabel("Time (s)")
+
+    plt.subplot(222)
+    plt.title("FFT of AudioOut[%ds:%ds]" % (start_time, end_time))
+    plt.magnitude_spectrum(audio_out[start:end], Fs=sample_rate,
+                           scale='dB')
+
+    plt.subplot(223)
+    plt.title("FFT of Reference[%ds:%ds]" % (start_time, end_time))
+    plt.magnitude_spectrum(audio_ref[start:end], Fs=sample_rate,
+                           scale='dB')
+
+    plt.subplot(224)
+    plt.title("FFT of AudioIn[%ds:%ds]" % (start_time, end_time))
+    plt.magnitude_spectrum(audio_in[start:end], Fs=sample_rate,
+                           scale='dB')
+
+    plt.suptitle("%s (Test, Echo, Reference, Headroom Bits)"\
+                 % test_id)
+    plt.tight_layout()
+    plt.subplots_adjust(top=0.9)
+    plt.savefig(output_filename)
+    plt.close()
+
+
+def plot_impulseresponse(test_id, audio_out, echo_type, h_hat_ir,
+                         headroom, start_time, output_filename,
+                         sample_rate=16000):
+    start = start_time * sample_rate
+    end = int((start_time + 0.3) * sample_rate)
+    N = end-start
+    x = np.linspace(0, 1000 * N / sample_rate, N)
+    plt.figure(figsize=(12,8))
+
+    plt.suptitle("%s (Test, Echo, Reference, Headroom Bits)"\
+                 % test_id)
+    plt.subplot(131)
+    ylim = np.max(np.abs(audio_out[start+50:end])) * 1.1
+    plt.ylim(-ylim, ylim)
+    plt.title("AEC Output")
+    plt.ylabel("Amplitude")
+    plt.xlabel("ms")
+    plt.plot(x, audio_out[start:end])
+
+    plt.subplot(132)
+    plt.title("h_hat internal")
+    plt.ylabel("Amplitude")
+    plt.xlabel("ms")
+    plt.plot(x, np.pad(h_hat_ir, (0,abs(N-len(h_hat_ir))), 'constant')[:N])
+
+    plt.subplot(133)
+    plt.title("h_hat external")
+    plt.ylabel("Amplitude")
+    plt.xlabel("ms")
+    echo = get_h(echo_type, normalise=False)
+    plt.plot(x, np.pad(echo, (0,abs(N-len(echo))), 'constant')[:N])
+
+    plt.tight_layout()
+    plt.subplots_adjust(top=0.9)
+    plt.savefig(output_filename)
+    plt.close()
+
+
+def plot_test(plot_filename, test_id, in_filename, ref_filename, out_filename,
+              settle_time):
+    print("Plot Filename: %s" % plot_filename)
+    in_data = read_wav(in_filename)
+    ref_data = read_wav(ref_filename)
+    out_data = read_wav(out_filename)[:,0]
+    # TODO: Read start/end times from config
+    start_time = settle_time
+    end_time = settle_time + 1
+    plot(test_id, in_data, ref_data, out_data, start_time, end_time,
+         plot_filename)
+
+
+def plot_impulseresponse_test(plot_filename, test_id, echo_type, h_hat_ir,
+                              headroom, out_filename, settle_time):
+    print("Plot Filename: %s" % plot_filename)
+    out_data = read_wav(out_filename)[:,0]
+    # TODO: Read start/end times from config
+    start_time = settle_time + 1
+    plot_impulseresponse(test_id, out_data, echo_type, h_hat_ir, int(headroom),
+                         start_time, plot_filename)
+
+
+def plot_h_hat(h_hat, y_channel, x_channel):
+    """Plots the impulse response of h_hat.
+
+    h_hat is an array internal to the aec with a shape as follows:
+    (y_channel_count, x_channel_count, max_phase_count, f_bin_count)
+
+    Args:
+        h_hat: h_hat array
+        y_channel: y_channel to plot
+        x_channel: x_channel to plot
+
+    Returns:
+        None
+    """
+    h_hat_ir = get_h_hat_impulse_response(h_hat, y_channel, x_channel)
+    plt.plot(h_hat_ir)
+    plt.show()
diff --git a/test/lib_aec/test_aec_spec/test_audio.sh b/test/lib_aec/test_aec_spec/test_audio.sh
new file mode 100755
index 000000000..cb74a1e6f
--- /dev/null
+++ b/test/lib_aec/test_aec_spec/test_audio.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+tests="simple multitone"
+headroom="2 4 8"
+echos="short long decaying"
+ref_type="discrete continuous single noise"
+
+audio_dir="spec_audio"
+out_dir="aec_out"
+xc_dir="../test_wav_aec"
+
+setup_env() {
+    pushd ../../../infr_scripts_pl/Build/
+    source SetupEnv
+    popd
+}
+
+build() {
+    pushd $xc_dir
+    waf configure
+    waf build --aec-config='3 1 1 16'
+    popd
+}
+
+make_dirs() {
+    mkdir -p $audio_dir
+    mkdir -p $out_dir
+}
+
+generate_audio() {
+    python testgen.py $audio_dir
+}
+
+test_audio() {
+    in="$audio_dir/$1-AudioIn.wav"
+    ref="$audio_dir/$1-AudioRef.wav"
+    out="$out_dir/$1-Error.wav"
+
+    echo "AudioIn: $in"
+    echo "AudioRef: $ref"
+    echo "AudioOut: $out"
+    axe --args "$xc_dir/bin/test_wav_aec.xe" $ref $in $out
+    echo "Saved: $out"
+}
+
+setup_env
+build
+make_dirs
+echo "Generating audio..."
+generate_audio
+
+echo "Running tests..."
+for t in $tests; do
+    for h in $headroom; do
+        for e in $echos; do
+            for ref in $ref_type; do
+                echo $t hr$h $e $ref
+                test_audio "$t-$e-$ref-hr$h"
+            done
+        done
+    done
+done
+echo "Done. Output audio in $out_dir"
diff --git a/test/lib_aec/test_aec_spec/test_check_output.py b/test/lib_aec/test_aec_spec/test_check_output.py
new file mode 100644
index 000000000..359b1e948
--- /dev/null
+++ b/test/lib_aec/test_aec_spec/test_check_output.py
@@ -0,0 +1,119 @@
+# Copyright 2018-2021 XMOS LIMITED.
+# This Software is subject to the terms of the XMOS Public Licence: Version 1.
+import configparser
+import pytest
+from aec_test_utils import read_wav, check_aec_output, get_test_instances,\
+                           get_criteria, read_config
+
+
+parser = configparser.ConfigParser()
+parser.read("parameters.cfg")
+
+in_dir = parser.get("Folders", "in_dir")
+out_dir = parser.get("Folders", "out_dir")
+
+
+@pytest.fixture
+def test_type(request):
+    test_name = request.node.name
+    test_type = test_name[len("test_"):test_name.index('[')]
+    return test_type
+
+
+@pytest.fixture
+def test_config(test_type):
+    return read_config(test_type)
+
+
+@pytest.mark.parametrize("test", get_test_instances('simple', in_dir, out_dir))
+def test_simple(test, test_config, record_property):
+    audio_in = read_wav(test['in_filename'])
+    audio_ref = read_wav(test['ref_filename'])
+    audio_out = read_wav(test['out_filename'])[:, 0]
+    criteria = get_criteria(test['id'])
+
+    for key in list(test.keys()):
+        record_property(key, test[key])
+
+    assert check_aec_output(audio_in, audio_ref, audio_out,
+                            test_config['start_fft'], test_config['end_fft'],
+                            criteria,
+                            frequencies=test_config['frequencies'])
+
+
+@pytest.mark.parametrize("test", get_test_instances('multitone', in_dir, out_dir))
+def test_multitone(test, test_config, record_property):
+    audio_in = read_wav(test['in_filename'])
+    audio_ref = read_wav(test['ref_filename'])
+    audio_out = read_wav(test['out_filename'])[:, 0]
+    criteria = get_criteria(test['id'])
+
+    for key in list(test.keys()):
+        record_property(key, test[key])
+
+    assert check_aec_output(audio_in, audio_ref, audio_out,
+                            test_config['start_fft'], test_config['end_fft'],
+                            criteria,
+                            frequencies=test_config['frequencies'])
+
+
+@pytest.mark.parametrize("test", get_test_instances('excessive', in_dir, out_dir))
+def test_excessive(test, test_config, record_property):
+    audio_in = read_wav(test['in_filename'])
+    audio_ref = read_wav(test['ref_filename'])
+    audio_out = read_wav(test['out_filename'])[:, 0]
+    criteria = get_criteria(test['id'])
+
+    for key in list(test.keys()):
+        record_property(key, test[key])
+
+    assert not check_aec_output(audio_in, audio_ref, audio_out,
+                              test_config['start_fft'], test_config['end_fft'],
+                              criteria,
+                              frequencies=test_config['frequencies'])
+
+
+@pytest.mark.parametrize("test", get_test_instances('impulseresponse', in_dir, out_dir))
+def test_impulseresponse(test, test_config, record_property):
+    audio_in = read_wav(test['in_filename'])
+    audio_ref = read_wav(test['ref_filename'])
+    audio_out = read_wav(test['out_filename'])[:, 0]
+    criteria = get_criteria(test['id'])
+
+    for key in list(test.keys()):
+        record_property(key, test[key])
+
+    assert True # TODO
+    #assert not check_aec_output(audio_in, audio_ref, audio_out,
+    #                          test_config['start_fft'], test_config['end_fft'],
+    #                          criteria,
+    #                          frequencies=test_config['frequencies'])
+
+
+@pytest.mark.parametrize("test", get_test_instances('bandlimited', in_dir, out_dir))
+def test_bandlimited(test, test_config, record_property):
+    audio_in = read_wav(test['in_filename'])
+    audio_ref = read_wav(test['ref_filename'])
+    audio_out = read_wav(test['out_filename'])[:, 0]
+    criteria = get_criteria(test['id'])
+
+    for key in list(test.keys()):
+        record_property(key, test[key])
+
+    assert check_aec_output(audio_in, audio_ref, audio_out,
+                            test_config['start_fft'], test_config['end_fft'],
+                            criteria,
+                            frequencies=test_config['frequencies'])
+
+
+@pytest.mark.parametrize("test", get_test_instances('smallimpulseresponse', in_dir, out_dir))
+def test_smallimpulseresponse(test, test_config, record_property):
+    audio_in = read_wav(test['in_filename'])
+    audio_ref = read_wav(test['ref_filename'])
+    audio_out = read_wav(test['out_filename'])[:, 0]
+    criteria = get_criteria(test['id'])
+
+    for key in list(test.keys()):
+        record_property(key, test[key])
+
+    assert True # TODO
diff --git a/test/lib_aec/test_aec_spec/test_config.cfg b/test/lib_aec/test_aec_spec/test_config.cfg
new file mode 100644
index 000000000..501766fc0
--- /dev/null
+++ b/test/lib_aec/test_aec_spec/test_config.cfg
@@ -0,0 +1,40 @@
+[DEFAULT]
+ignore_exclusion = False
+settle_time = 15
+start_fft = 15
+end_fft = 16
+
+[simple]
+headroom = 2, 4, 8
+echo = short, long, decaying
+reference = discrete, continuous, single, noise
+frequencies = 1500
+
+[multitone]
+headroom = 2, 4, 8
+echo = short, long, decaying
+reference = discrete, continuous, single, noise
+frequencies = 750, 1500, 3000
+
+[impulseresponse]
+headroom = 2
+echo = short, long, decaying, excessive
+reference = noise
+
+[smallimpulseresponse]
+headroom = 2
+echo = short, long, decaying, excessive
+reference = noise
+
+[excessive]
+headroom = 8
+echo = excessive
+reference = noise
+frequencies = 3500
+
+[bandlimited]
+headroom = 4
+echo = random
+reference = bandlimited
+frequencies = 500, 1500, 3000
+
diff --git a/test/lib_aec/test_aec_spec/test_evaluate_results.py b/test/lib_aec/test_aec_spec/test_evaluate_results.py
new file mode 100644
index 000000000..fb76e2345
--- /dev/null
+++ b/test/lib_aec/test_aec_spec/test_evaluate_results.py
@@ -0,0 +1,21 @@
+# Copyright 2018-2021 XMOS LIMITED.
+# This Software is subject to the terms of the XMOS Public Licence: Version 1.
+import configparser
+import os.path
+import pytest
+
+parser = configparser.ConfigParser()
+parser.read("parameters.cfg")
+results_dir = parser.get("Folders", "results_dir")
+plot_dir = os.path.join(results_dir, "plots")
+log_dir = os.path.join(results_dir, "logs")
+
+with open(os.path.join(results_dir, "all_tests.txt"), 'r') as f:
+    all_tests = f.readlines()
+with open(os.path.join(results_dir, "failed_tests.txt"), 'r') as f:
+    failed_tests = f.readlines()
+
+# Parametrize with test files
+@pytest.mark.parametrize("test", all_tests)
+def test_evaluate_results(test):
+    assert not test in failed_tests
diff --git a/test/lib_aec/test_aec_spec/test_process_audio.py b/test/lib_aec/test_aec_spec/test_process_audio.py
new file mode 100644
index 000000000..481c0d06a
--- /dev/null
+++ b/test/lib_aec/test_aec_spec/test_process_audio.py
@@ -0,0 +1,131 @@
+# Copyright 2018-2021 XMOS LIMITED.
+# This Software is subject to the terms of the XMOS Public Licence: Version 1.
+import configparser
+import subprocess
+import os.path
+import pytest
+from aec_test_utils import get_test_instances, files_exist, read_config
+
+import os
+import tempfile
+import scipy.io.wavfile
+import numpy as np
+import shutil
+import tempfile
+import xscope_fileio
+import xtagctl
+import io
+from contextlib import redirect_stdout
+import re
+import glob
+
+parser = configparser.ConfigParser()
+parser.read("parameters.cfg")
+
+in_dir   = parser.get("Folders", "in_dir")
+out_dir = parser.get("Folders", "out_dir")
+
+y_channel_count = parser.get("Config", "y_channel_count")
+x_channel_count = parser.get("Config", "x_channel_count")
+phases = parser.get("Config", "phases")
+
+aec_xe = os.path.abspath(glob.glob(f"{parser.get('Binaries', 'aec_xc_dir')}/bin/*.xe")[0])
+
+
+dut_in_wav = "input.wav"
+dut_out_wav = "output.wav"
+runtime_args_file = "args.bin"
+dut_H_hat_file = "H_hat.bin"
+def run_aec_xc(audio_in, audio_ref, audio_out, adapt=-1, h_hat_dump=None):
+    rate, y_data = scipy.io.wavfile.read(audio_in)
+    rate, x_data = scipy.io.wavfile.read(audio_ref)
+    if(y_data.ndim == 1):
+        y_data = np.atleast_2d(y_data).T
+        x_data = np.atleast_2d(x_data).T
+    data = np.hstack((y_data, x_data)) #mic+ref
+    scipy.io.wavfile.write(dut_in_wav, rate, data)
+
+    tmp_folder = tempfile.mkdtemp()
+    scipy.io.wavfile.write(os.path.join(tmp_folder, dut_in_wav), rate, data)
+    
+    prev_path = os.getcwd()
+    os.chdir(tmp_folder)    
+        
+    with open(runtime_args_file, "wb") as ref_file:
+        ref_file.write(f"stop_adapting {adapt}".encode('utf-8'))
+
+    with xtagctl.acquire("XCORE-AI-EXPLORER") as adapter_id:
+        xscope_fileio.run_on_target(adapter_id, aec_xe)
+
+    os.chdir(prev_path)
+    #test_check_output expects a 2 channel output despite building AEC for 1 y channel, so convert dut output to 2ch
+    rate, data = scipy.io.wavfile.read(os.path.join(tmp_folder, dut_out_wav))
+    if(data.ndim == 1):
+        data = np.atleast_2d(data).T
+    data = np.hstack((data, data))
+    scipy.io.wavfile.write(audio_out, rate, data)
+    if h_hat_dump != None:
+        shutil.copy2(os.path.join(tmp_folder, dut_H_hat_file), h_hat_dump)
+    shutil.rmtree(tmp_folder, ignore_errors=True)    
+
+
+@pytest.fixture
+def test_type(request):
+    test_name = request.node.name
+    test_type = test_name[len("test_process_"):test_name.index('[')]
+    return test_type
+
+
+@pytest.fixture
+def test_config(test_type):
+    return read_config(test_type)
+
+
+@pytest.mark.parametrize('test', get_test_instances('simple', in_dir, out_dir))
+def test_process_simple(test):
+    run_aec_xc(test['in_filename'], test['ref_filename'],
+               test['out_filename'])
+
+
+@pytest.mark.parametrize('test', get_test_instances('multitone', in_dir,
+                                                    out_dir))
+def test_process_multitone(test):
+    run_aec_xc(test['in_filename'], test['ref_filename'],
+               test['out_filename'])
+
+
+@pytest.mark.parametrize('test', get_test_instances('excessive', in_dir,
+                                                    out_dir))
+def test_process_excessive(test):
+    run_aec_xc(test['in_filename'], test['ref_filename'],
+               test['out_filename'])
+
+
+@pytest.mark.parametrize('test', get_test_instances('impulseresponse', in_dir,
+                                                    out_dir))
+def test_process_impulseresponse(test, test_config):
+    stop_adapt_frame = (test_config['settle_time'] * 16000) // 240
+    h_hat_xc = os.path.join(out_dir, test['id'] + "-h_hat.py")
+
+    run_aec_xc(test['in_filename'], test['ref_filename'],
+               test['out_filename'], stop_adapt_frame,
+               h_hat_xc)
+
+
+@pytest.mark.parametrize('test', get_test_instances('smallimpulseresponse',
+                                                    in_dir, out_dir))
+def test_process_smallimpulseresponse(test, test_config):
+    stop_adapt_frame = (test_config['settle_time'] * 16000) // 240
+    h_hat_xc = os.path.join(out_dir, test['id'] + "-h_hat.py")
+
+    run_aec_xc(test['in_filename'], test['ref_filename'],
+               test['out_filename'], stop_adapt_frame,
+               h_hat_xc)
+
+
+@pytest.mark.parametrize('test', get_test_instances('bandlimited', in_dir,
+                                                    out_dir))
+def test_process_bandlimited(test, test_config):
+    stop_adapt_frame = (test_config['settle_time'] * 16000) // 240
+    run_aec_xc(test['in_filename'], test['ref_filename'],
+               test['out_filename'], stop_adapt_frame)
diff --git a/test/lib_aec/test_aec_spec/utils.sh b/test/lib_aec/test_aec_spec/utils.sh
new file mode 100644
index 000000000..fb242a2d9
--- /dev/null
+++ b/test/lib_aec/test_aec_spec/utils.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+config_filename="$(pwd)/parameters.cfg"
+
+read_config() {
+    line=$(cat $config_filename | grep "\b$1\b")
+    echo $(echo $line | sed "s/.* *= *//g")
+}
+
+setup_env() {
+    pushd ../../../infr_scripts_pl/Build/
+    source SetupEnv
+    popd
+}
diff --git a/test/lib_aec/test_delay_estimator/CMakeLists.txt b/test/lib_aec/test_delay_estimator/CMakeLists.txt
new file mode 100644
index 000000000..d9b006d47
--- /dev/null
+++ b/test/lib_aec/test_delay_estimator/CMakeLists.txt
@@ -0,0 +1,99 @@
+## App name
+set( APP_NAME  test_delay_estimator )
+
+# Auto-generate task distribution scheme and top level config files
+
+if( NOT ${Python3_FOUND} )
+  message(FATAL_ERROR "Python3 not found for running . ") 
+endif()
+
+set( GEN_SCHEDULE_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/../shared_src/python/generate_task_distribution_scheme.py )
+set( AUTOGEN_DIR ${CMAKE_CURRENT_BINARY_DIR}/src.autogen )
+set( AUTOGEN_SOURCES ${AUTOGEN_DIR}/aec_task_distribution.c )
+set( AUTOGEN_INCLUDES ${AUTOGEN_DIR}/aec_task_distribution.h ${AUTOGEN_DIR}/aec_config.h)
+
+message(STATUS "${APP_NAME} aec build config:  ${TEST_DELAY_ESTIMATOR_BUILD_CONFIG}" )
+set( GEN_SCHEDULE_SCRIPT_BYPRODUCTS ${AUTOGEN_SOURCES} ${AUTOGEN_INCLUDES} )
+
+unset(GEN_SCHEDULE_SCRIPT_ARGS) 
+list(APPEND GEN_SCHEDULE_SCRIPT_ARGS --out-dir ${AUTOGEN_DIR})
+list(APPEND GEN_SCHEDULE_SCRIPT_ARGS --config ${TEST_DELAY_ESTIMATOR_BUILD_CONFIG})
+
+file(MAKE_DIRECTORY ${AUTOGEN_DIR})
+
+add_custom_command(
+OUTPUT ${GEN_SCHEDULE_SCRIPT_BYPRODUCTS}
+COMMAND ${Python3_EXECUTABLE} ${GEN_SCHEDULE_SCRIPT} ${GEN_SCHEDULE_SCRIPT_ARGS}
+COMMENT "Generating AEC task distribution and top level config" )
+
+## Depends on libraries
+list( APPEND  DEP_LIBS_XCORE  ""  )
+
+list( APPEND  DEP_LIBS        
+    lib_xs3_math
+    lib_aec 
+    ${DEP_LIBS_${CMAKE_SYSTEM_NAME}}
+)
+
+list( APPEND  DEP_LIBS ${DEP_LIBS_${CMAKE_SYSTEM_NAME}} )
+
+## Sources
+file( GLOB SOURCES_C  ${CMAKE_CURRENT_SOURCE_DIR}/../test_wav_aec/src/*.c )
+file( GLOB SOURCES_XC  ${CMAKE_CURRENT_SOURCE_DIR}/../test_wav_aec/src/*.xc )
+if ( XCORE )
+    file( GLOB SOURCES_AEC_PROCESS_FRAME ${SHARED_SRC_PATH}/aec/*.c )
+else()
+    ## Only 1 thread process_frame() builds for x86
+    file( GLOB SOURCES_AEC_PROCESS_FRAME ${SHARED_SRC_PATH}/aec/aec_process_frame_1thread.c )
+endif()
+file( GLOB_RECURSE XSCOPE_FILEIO_SOURCES ${XSCOPE_FILEIO_PATH}/src/*.c )
+file( GLOB_RECURSE AUDIO_TEST_TOOLS_SOURCES ${DEPS_ROOT}/audio_test_tools/audio_test_tools/src/burners.S )
+file( GLOB SOURCES_FILE_UTILS ${SHARED_SRC_PATH}/file_utils/*.c )
+
+list( APPEND  SOURCES ${SOURCES_C} ${SOURCES_AEC_PROCESS_FRAME} ${SOURCES_XC} ${XSCOPE_FILEIO_SOURCES} ${SOURCES_FILE_UTILS} ${AUDIO_TEST_TOOLS_SOURCES} ${AUTOGEN_SOURCES} )
+list( APPEND  INCLUDES src ${SHARED_SRC_PATH}/aec ${SHARED_SRC_PATH}/file_utils ${AUTOGEN_DIR} ${XSCOPE_FILEIO_PATH} ${XSCOPE_FILEIO_PATH}/api )
+
+# set( XSCOPE_CONFIG config.xscope )
+get_filename_component(XSCOPE_CONFIG config.xscope ABSOLUTE)
+
+## Compile flags
+unset(COMPILE_FLAGS)
+unset(COMPILE_FLAGS_XCORE)
+
+list(APPEND   COMPILE_FLAGS_XCORE  -DTEST_WAV_XSCOPE=1 )
+
+
+##Linker flags
+unset(LINKER_FLAGS)
+list( APPEND  LINKER_FLAGS  "" )
+
+unset(LINKER_FLAGS_XCORE)
+list( APPEND  LINKER_FLAGS_XCORE  "-target=${XCORE_TARGET}"     )
+list( APPEND  LINKER_FLAGS_XCORE  "-report"                     )
+list( APPEND  LINKER_FLAGS_XCORE  "${XSCOPE_CONFIG}"            )
+
+
+list( APPEND  LINKER_FLAGS ${LINKER_FLAGS_${CMAKE_SYSTEM_NAME}} )
+list( APPEND  COMPILE_FLAGS ${COMPILE_FLAGS_${CMAKE_SYSTEM_NAME}} ${LINKER_FLAGS_${CMAKE_SYSTEM_NAME}} )
+
+#########
+## executable output directory
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
+add_executable( ${APP_NAME} ${SOURCES} )
+
+target_include_directories( ${APP_NAME} PRIVATE ${INCLUDES} )
+
+target_link_libraries( ${APP_NAME} ${DEP_LIBS})
+
+target_compile_options( ${APP_NAME} PRIVATE ${COMPILE_FLAGS} )
+
+#(because otherwise the set_target_properties command fails)
+string(REPLACE ";" " " LINKER_FLAGS_STR "${LINKER_FLAGS}")
+set_target_properties( ${APP_NAME} PROPERTIES LINK_FLAGS "${LINKER_FLAGS_STR}" )
+
+if ( XCORE )
+    set_target_properties( ${APP_NAME} PROPERTIES
+      SUFFIX ".xe"
+      LINK_DEPENDS  ${XSCOPE_CONFIG}
+      )
+endif()
diff --git a/test/lib_aec/test_delay_estimator/config.xscope b/test/lib_aec/test_delay_estimator/config.xscope
new file mode 100644
index 000000000..0d3b65e4c
--- /dev/null
+++ b/test/lib_aec/test_delay_estimator/config.xscope
@@ -0,0 +1,10 @@
+<xSCOPEconfig ioMode="basic" enabled="true">
+  <Probe name="open_file" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="read_bytes" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="write_setup" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="write_bytes" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="seek" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="tell" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="host_quit" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+</xSCOPEconfig>
+
diff --git a/test/lib_aec/test_delay_estimator/filters.py b/test/lib_aec/test_delay_estimator/filters.py
new file mode 100644
index 000000000..bdc08a99c
--- /dev/null
+++ b/test/lib_aec/test_delay_estimator/filters.py
@@ -0,0 +1,102 @@
+# Copyright 2019-2021 XMOS LIMITED.
+# This Software is subject to the terms of the XMOS Public Licence: Version 1.
+from builtins import range
+from builtins import object
+import numpy as np
+import scipy.signal
+import audio_generation
+
+sample_rate = 16000
+
+frame_advance = 240
+
+
+def convolve(input_x, input_y, filter_x, filter_y):
+    """ Convolves each input with their respective filter 
+    
+    Takes into account changes in the filter per frame.
+    """
+
+    input_len = min(len(input_x), len(input_y))
+    audio_x = convolve_1ch(input_x, filter_x, input_len)
+    audio_y = convolve_1ch(input_y, filter_y, input_len)
+    return audio_x, audio_y
+
+
+def convolve_1ch(input_audio, filt, input_len):
+    num_frames = int(input_len // 240)
+    output_audio = np.array([])
+    cur_filter = filt.get_filter(0)
+    for i in range(1, num_frames+1):
+        next_filter = filt.get_filter(i)
+        if not np.array_equal(cur_filter, next_filter) or i == num_frames:
+            convolution = scipy.signal.convolve(input_audio, cur_filter)
+            convolution_slice = convolution[len(output_audio):i*frame_advance]
+            #print("Time: {}".format(i*frame_advance / sample_rate))
+            #print("Output shape: {}, Input shape: {}".format(output_audio.shape, convolution_slice.shape))
+            output_audio = np.concatenate((output_audio, convolution_slice))
+            cur_filter = next_filter
+
+    #print("Output shape: {}".format(output_audio.shape))
+    return output_audio
+
+
+class Filter(object):
+    def __init__(self):
+        raise NotImplementedError
+
+    def get_filter(self, frame_num):
+        return self._filter
+
+
+class Identity(Filter):
+    def __init__(self):
+        self._filter = np.ones(1)
+
+
+class OneImpulse(Filter):
+    def __init__(self, index):
+        self._filter = np.zeros((max(50, index+1),))
+        self._filter[index] = 1
+
+
+class Diffuse(Filter):
+    def __init__(self, seed=0, rt60=0.3):
+        a = 3.0 * np.log(10.0) / rt60
+        t = np.arange(2.0 * rt60 * sample_rate) / sample_rate
+        np.random.seed(seed)
+        self._filter = 0.01 * np.random.randn(t.shape[0]) * np.exp(-a*t)
+
+
+class ShortEcho(Filter):
+    def __init__(self):
+        self._filter = audio_generation.get_h('short')
+
+
+class ZeroAt(Filter):
+    def __init__(self, zero_time=5):
+        self._zero_time = zero_time
+
+    def get_filter(self, frame_num):
+        if frame_num * frame_advance > self._zero_time * sample_rate:
+            return np.zeros(1)
+        else:
+            return np.ones(1)
+
+
+class MovingSource(Filter):
+    def __init__(self, move_frequency=1, max_samples_moved=10):
+        self._filter = np.zeros(max_samples_moved)
+        self._filter[0] = 1
+        self._max_samples_moved = max_samples_moved
+        self._move_frequency = move_frequency
+
+    def get_filter(self, frame_num):
+        i = int(frame_num * frame_advance / (sample_rate * self._move_frequency))
+        if i % 2 == 0:
+            move = i % self._max_samples_moved
+        else:
+            move = self._max_samples_moved - (i % self._max_samples_moved)
+        self._filter = np.zeros(self._max_samples_moved)
+        self._filter[move] = 1
+        return self._filter
diff --git a/test/lib_aec/test_delay_estimator/input_wavs_fixed/jazz_4ch_record_10s.wav b/test/lib_aec/test_delay_estimator/input_wavs_fixed/jazz_4ch_record_10s.wav
new file mode 100644
index 000000000..32f33ec57
Binary files /dev/null and b/test/lib_aec/test_delay_estimator/input_wavs_fixed/jazz_4ch_record_10s.wav differ
diff --git a/test/lib_aec/test_delay_estimator/print_stats.py b/test/lib_aec/test_delay_estimator/print_stats.py
new file mode 100644
index 000000000..e369ef395
--- /dev/null
+++ b/test/lib_aec/test_delay_estimator/print_stats.py
@@ -0,0 +1,31 @@
+# Copyright 2019-2021 XMOS LIMITED.
+# This Software is subject to the terms of the XMOS Public Licence: Version 1.
+import sys
+import numpy as np
+import xml.etree.ElementTree as ET
+
+def main(show_arrays):
+    filename = 'pytest_result.xml'
+    tree = ET.parse(filename)
+    properties = tree.findall(".//testcase/properties")
+
+    csv = ""
+    lines = []
+    for result in properties:
+        for p in result.findall("property"):
+            if p.get('value')[:len('array([')] == "array([" and not show_arrays:
+                continue
+            else:
+                print("{}: {}".format(p.get('name'), p.get('value')))
+        print('')
+        #test_name = result.findall("property[@name='Test name']")
+        #convergence_time = result.findall("property[@name='Test name']")
+
+if __name__ == "__main__":
+    show_arrays = False
+    try:
+        if sys.argv[1] == "--all":
+            show_arrays = True
+    except IndexError:
+        pass
+    main(show_arrays)
diff --git a/test/lib_aec/test_delay_estimator/test_delay_estimator.py b/test/lib_aec/test_delay_estimator/test_delay_estimator.py
new file mode 100644
index 000000000..d2d8b0d6c
--- /dev/null
+++ b/test/lib_aec/test_delay_estimator/test_delay_estimator.py
@@ -0,0 +1,381 @@
+# Copyright 2018-2021 XMOS LIMITED.
+# This Software is subject to the terms of the XMOS Public Licence: Version 1.
+from builtins import range
+from builtins import object
+import tempfile
+import sys
+import os
+import warnings
+
+from scipy.signal import convolve
+import scipy.io.wavfile
+import audio_generation
+import audio_wav_utils as awu
+import pytest
+import subprocess
+import numpy as np
+
+import filters
+import xscope_fileio
+import xtagctl
+import io
+from contextlib import redirect_stdout
+import re
+import glob
+
+input_folder = os.path.abspath("input_wavs")
+output_folder = os.path.abspath("output_files")
+
+delay_calc_output_file_name = "delay.bin"
+
+
+sample_rate = 16000
+proc_frame_length = 2**9 # = 512
+frame_advance = 240
+
+xe_path = os.path.abspath(glob.glob('../../../build/test/lib_aec/test_delay_estimator/bin/*.xe')[0])
+
+class TestCase(object):
+    def __init__(self, name, h_x, h_y, aud_x=None, aud_y=None, dont_check=[], invert_check=[]):
+        self.name = name
+        self.h_x = h_x
+        self.h_y = h_y
+        self.aud_x = aud_x
+        self.aud_y = aud_y
+        self._dont_check = dont_check
+        self._invert_check = invert_check
+
+        if aud_x is None:
+            self.aud_x = audio_generation.get_noise(duration=10, db=-20)
+        if aud_y is None:
+            self.aud_y = audio_generation.get_noise(duration=10, db=-20)
+
+        # Memoization array
+        self._delay_calculated = np.zeros(self._get_num_frames())
+        self._delay = np.zeros(self._get_num_frames())
+
+
+    def get_delay(self, frame_num=0):
+        """ Get the delay between the mic and reference channels
+
+        Delay will be positive if mics arrive after reference
+
+        Delay will be negative if mics arrive before reference
+        """
+        if frame_num > self._get_num_frames():
+            raise ValueError
+
+        if self._delay_calculated[frame_num]:
+            return self._delay[frame_num]
+
+        h_x = self.h_x.get_filter(frame_num)
+        h_y = self.h_y.get_filter(frame_num)
+        h_x_prev = self.h_x.get_filter(frame_num - 1)
+        h_y_prev = self.h_y.get_filter(frame_num - 1)
+
+        if frame_num > 0 and self._delay_calculated[frame_num - 1]:
+            if np.array_equal(h_x, h_x_prev) and np.array_equal(h_y, h_y_prev):
+                self._delay_calculated[frame_num] = 1
+                self._delay[frame_num] = self._delay[frame_num - 1]
+                return self._delay[frame_num]
+
+        length = max(len(h_x), len(h_y)) * 2
+        h_x_pad = np.pad(h_x, (0, length - len(h_x)), 'constant')
+        h_y_pad = np.pad(h_y, (0, length - len(h_y)), 'constant')
+        corr = scipy.signal.correlate(h_y_pad, h_x_pad, mode='same')
+
+        delay = np.argmax(corr) - (length // 2)
+        self._delay_calculated[frame_num] = 1
+        self._delay[frame_num] = delay
+
+        return delay
+
+
+    def _get_num_frames(self):
+        input_len = min(len(self.aud_x), len(self.aud_y))
+        return int(input_len // frame_advance)
+
+
+    def get_test_name(self):
+        return self.name.lower().replace(' ', '-')
+
+
+    def __getattr__(self, name):
+        if "do_check" == name[:len("do_check")]:
+            check = name[len("do_check_"):]
+            return not check in self._dont_check
+        if "invert_check" == name[:len("invert_check")]:
+            check = name[len("invert_check_"):]
+            return check in self._invert_check
+        raise AttributeError
+
+
+class DelaySpec(object):
+    # Time in seconds to reach the correct delay
+    convergence_time = 2.0
+
+
+data, jazz = scipy.io.wavfile.read('input_wavs_fixed/jazz_4ch_record_10s.wav')
+jazz = jazz.T.astype(float) / np.iinfo(np.int32).max
+#jazz_y = np.sum(jazz[:2], axis=0)[:jazz_length*16000]
+#jazz_x = np.sum(jazz[2:], axis=0)[:jazz_length*16000]
+
+jazz_y = jazz[0, :]
+jazz_x = jazz[2, :]
+
+print(jazz_x.shape)
+print(jazz_y.shape)
+
+test_vectors = [
+    TestCase('Identical Mics', filters.Identity(), filters.Identity()),
+    TestCase('Impulse at minus 20 samples', filters.OneImpulse(20), filters.Identity(),
+             dont_check=['convergence', 'stability', 'correct']),
+    TestCase('Jazz 1000 sample delay', filters.Identity(), filters.OneImpulse(1000),
+             aud_x=jazz_x, aud_y=jazz_y, dont_check=['stability']),
+    TestCase('Impulse at 500 samples', filters.Identity(), filters.OneImpulse(500)),
+    TestCase('Impulse at 1000 samples', filters.Identity(), filters.OneImpulse(1000)),
+    TestCase('Impulse at 7000 samples', filters.Identity(), filters.OneImpulse(7000)),
+    TestCase('Jazz', filters.Identity(), filters.Identity(),
+             aud_x=jazz_x, aud_y=jazz_y, dont_check=['stability']),
+    #TestCase('Impulse at 9000 samples', filters.Identity(), filters.OneImpulse(9000)),
+]
+
+
+def write_input(test_name, input_data):
+    input_32bit = awu.convert_to_32_bit(input_data)
+    input_filename = os.path.abspath(os.path.join(
+        input_folder, test_name + "-input.wav"))
+    scipy.io.wavfile.write(input_filename, sample_rate, input_32bit.T)
+
+
+def write_output(test_name, output, xc_or_py):
+    output_filename = os.path.abspath(os.path.join(
+        output_folder, test_name + "-output-{}.txt".format(xc_or_py)))
+    np.savetxt(output_filename, output)
+
+
+def process_audio(input_data, test_name):
+    tmp_folder = tempfile.mkdtemp(suffix=os.path.basename(test_name))
+    prev_path = os.getcwd()
+    os.chdir(tmp_folder)
+    #write runtime arguments into args.bin
+    with open("args.bin", "wb") as fargs:
+        fargs.write(f"y_channels 1\n".encode('utf-8'))
+        fargs.write(f"x_channels 1\n".encode('utf-8'))
+        fargs.write(f"main_filter_phases 30\n".encode('utf-8'))
+        fargs.write(f"shadow_filter_phases 0\n".encode('utf-8'))
+        fargs.write(f"adaption_mode 1\n".encode('utf-8'))
+        #force_mu = int(0.4 * (1<<30))
+        #fargs.write(f"force_adaption_mu {force_mu}\n".encode('utf-8'))
+    # Write input data to file
+    input_32bit = awu.convert_to_32_bit(input_data)
+    scipy.io.wavfile.write('input.wav', sample_rate, input_32bit.T)
+    with xtagctl.acquire("XCORE-AI-EXPLORER") as adapter_id:
+        xscope_fileio.run_on_target(adapter_id, xe_path)
+        with open(delay_calc_output_file_name, 'r') as f:
+            output = np.array([int(l) for l in f.readlines()], dtype=float)
+        write_output(test_name, output, 'xc')
+        os.chdir(prev_path)
+        os.system("rm -r {}".format(tmp_folder))
+
+    return output.T
+
+
+@pytest.fixture
+def test_input(request):
+    test_case = request.param
+    test_name = test_case.get_test_name()
+    # Generate Audio
+    noise = audio_generation.get_noise(duration=10, db=-20)
+    audio_x, audio_y = filters.convolve(test_case.aud_x, test_case.aud_y,
+                                        test_case.h_x, test_case.h_y)
+    combined_data = np.vstack((audio_y, audio_y, audio_x, audio_x))
+    if np.max(np.abs(audio_x)) > 1:
+        warnings.warn("{}: max(abs(Mic 1)) == {}".format(test_name, np.max(np.abs(audio_x))))
+    if np.max(np.abs(audio_y)) > 1:
+        warnings.warn("{}: max(abs(Mic 0)) == {}".format(test_name, np.max(np.abs(audio_y))))
+    # Write the input audio to file
+    input_32bit = awu.convert_to_32_bit(combined_data)
+    write_input(test_name, input_32bit)
+    return (test_case, combined_data)
+
+
+def get_delay_arr(test_case, num_frames):
+    """ Get real delay on input.
+
+    Could be optimised if slow...
+    """
+
+    delay_arr = np.zeros(num_frames)
+    for i in range(num_frames):
+        delay_arr[i] = test_case.get_delay(i)
+    return delay_arr
+
+
+def get_contiguous_regions(data):
+    regions = {}
+    last_val = data[0]
+    last_i = 0
+    for i in range(1, len(data)):
+        if data[i] != last_val:
+            regions[last_i] = i - last_i
+            last_val = data[i]
+            last_i = i
+    regions[last_i] = len(data) - last_i
+    return regions
+
+
+def check_convergence(record_property, test_case, delay_arr, output):
+    """ Checks the convergence time is less than the spec
+
+    Convergence time == max time the output takes to converge when the delay is
+    constant.
+    """
+
+    #delay_arr_rounded = delay_arr - (delay_arr % frame_advance)
+    #regions = get_contiguous_regions(delay_arr_rounded == output)
+
+    #worst_convergence = np.argmin((delay_arr_rounded == output) == 1).flatten()
+    #if len(worst_convergence) == 0:
+    #    worst_convergence = -1
+    #else:
+    #    worst_convergence = worst_convergence[0]
+
+    worst_convergence = -1
+    num_frames = len(output)
+    cur_delay = delay_arr[0] - (delay_arr[0] % frame_advance)
+    cur_index = 0
+    for i in range(1, num_frames):
+        next_delay = delay_arr[i] - (delay_arr[i] % frame_advance)
+        if cur_delay != next_delay or i == num_frames-1:
+            try:
+                frames_taken = np.min(np.argwhere(output[cur_index:i] == cur_delay))
+            except ValueError:
+                # Did not converge
+                frames_taken = i - num_frames
+            if frames_taken > worst_convergence:
+                worst_convergence = frames_taken
+
+            cur_delay = next_delay
+            cur_index = i
+
+    convergence_spec_frames = int((DelaySpec.convergence_time * sample_rate)\
+                                   // frame_advance)
+    check = (worst_convergence <= convergence_spec_frames)\
+            and worst_convergence >= 0
+
+    record_property("Worst convergence (frames)", str(worst_convergence))
+    worst_convergence_secs = worst_convergence * frame_advance / float(sample_rate)
+    record_property("Worst convergence (seconds)", str(worst_convergence_secs))
+    record_property("Converged", str(check))
+
+    # Invert the check if the test vector shouldn't converge
+    if not test_case.do_check_convergence:
+        return True
+    if test_case.invert_check_convergence:
+        check = not check
+    return check
+
+
+def check_stability(record_property, test_case, delay_arr, output):
+    """ Checks that the estimated delay stays constant when the delay isn't
+    changing.
+    """
+
+    delay_arr_rounded = delay_arr - (delay_arr % frame_advance)
+    output_regions = get_contiguous_regions(output)
+    delay_regions = get_contiguous_regions(delay_arr)
+
+    num_frames = len(output)
+
+    delay_keys = list(delay_regions.keys())
+    delay_keys.sort()
+    delay_keys.append(num_frames)
+    delay_keys = np.array(delay_keys)
+
+    output_keys = list(output_regions.keys())
+    output_keys.sort()
+    output_keys.append(num_frames)
+    output_keys = np.array(output_keys)
+
+    check = True
+    max_changes = 0
+    for i, key in enumerate(delay_keys):
+        if i == len(delay_keys) - 1:
+            break
+        next_key = delay_keys[i+1]
+
+        # Maximum of 2 output regions in each delay change region
+        num_regions = len(np.where((output_keys > key) & (output_keys < next_key))[0])
+        if num_regions > 2:
+            check = False
+        if num_regions > max_changes:
+            max_changes = num_regions
+
+    record_property("Max. estimate changes", max_changes)
+    record_property("Stable", check)
+
+    # Invert the check if the test vector shouldn't check stability
+    if not test_case.do_check_stability:
+        return True
+    if test_case.invert_check_stability:
+        check = not check
+    return check
+
+
+def check_correct(record_property, test_case, delay_arr, output):
+    """ Checks that the 3 largest correct contiguous regions take up >90%
+    of the (len(output) - time till first correct region)
+    """
+
+    delay_arr_rounded = delay_arr - (delay_arr % frame_advance)
+    regions = get_contiguous_regions(delay_arr_rounded == output)
+
+    correct_regions = {}
+    for key in regions:
+        if delay_arr_rounded[key] == output[key]:
+            correct_regions[key] = regions[key]
+
+    check = False
+    num_correct_frames = 0
+    if len(correct_regions) != 0:
+        first_correct_frame = list(correct_regions.items())[0][0]
+        # Get size of 3 largest contiguous regions
+        region_sizes = list(correct_regions.values())
+        region_sizes.sort()
+        num_correct_frames = sum(region_sizes[:3])
+        if num_correct_frames > 0.9 * len(output) - first_correct_frame:
+            check = True
+
+    record_property('Num correct frames', num_correct_frames)
+    record_property('Correct', check)
+
+    # Invert the check if the test vector shouldn't check stability
+    if not test_case.do_check_correct:
+        return True
+    if test_case.invert_check_correct:
+        check = not check
+    return check
+
+
+@pytest.mark.parametrize('test_input', test_vectors, indirect=True)
+def test_all(test_input, record_property):
+    test_case, input_audio = test_input
+
+    output = process_audio(input_audio, test_case.get_test_name())
+    delay_arr = get_delay_arr(test_case, len(output))
+
+    record_property('Test name', test_case.get_test_name())
+    record_property('delay_arr', np.array_repr(delay_arr))
+    record_property('output', np.array_repr(output))
+
+    # Run checks
+    converged = check_convergence(record_property, test_case, delay_arr, output)
+    stable = check_stability(record_property, test_case, delay_arr, output)
+    correct = check_correct(record_property, test_case, delay_arr, output)
+
+    print("{}".format(test_case.name))
+    # Assert checks
+    criteria = [converged, stable, correct]
+    assert np.all(criteria), " and ".join([str(c) for c in criteria])
+
diff --git a/test/lib_aec/test_wav_aec/CMakeLists.txt b/test/lib_aec/test_wav_aec/CMakeLists.txt
new file mode 100644
index 000000000..58400f0a0
--- /dev/null
+++ b/test/lib_aec/test_wav_aec/CMakeLists.txt
@@ -0,0 +1,115 @@
+## App name
+set( APP_NAME  test_wav_aec )
+
+# Auto-generate task distribution scheme and top level config files
+
+if( NOT ${Python3_FOUND} )
+  message(FATAL_ERROR "Python3 not found for running . ") 
+endif()
+
+set( GEN_SCHEDULE_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/../shared_src/python/generate_task_distribution_scheme.py )
+set( AUTOGEN_DIR ${CMAKE_CURRENT_BINARY_DIR}/src.autogen )
+set( AUTOGEN_SOURCES ${AUTOGEN_DIR}/aec_task_distribution.c )
+set( AUTOGEN_INCLUDES ${AUTOGEN_DIR}/aec_task_distribution.h ${AUTOGEN_DIR}/aec_config.h)
+
+message(STATUS "${APP_NAME} build config:  ${TEST_WAV_AEC_BUILD_CONFIG}" )
+set( GEN_SCHEDULE_SCRIPT_BYPRODUCTS ${AUTOGEN_SOURCES} ${AUTOGEN_INCLUDES} )
+
+unset(GEN_SCHEDULE_SCRIPT_ARGS) 
+list(APPEND GEN_SCHEDULE_SCRIPT_ARGS --out-dir ${AUTOGEN_DIR})
+list(APPEND GEN_SCHEDULE_SCRIPT_ARGS --config ${TEST_WAV_AEC_BUILD_CONFIG})
+
+file(MAKE_DIRECTORY ${AUTOGEN_DIR})
+
+add_custom_command(
+OUTPUT ${GEN_SCHEDULE_SCRIPT_BYPRODUCTS}
+COMMAND ${Python3_EXECUTABLE} ${GEN_SCHEDULE_SCRIPT} ${GEN_SCHEDULE_SCRIPT_ARGS}
+COMMENT "Generating AEC task distribution and top level config" )
+
+## Depends on libraries
+list( APPEND  DEP_LIBS_XCORE  ""  )
+list( APPEND  DEP_LIBS_Linux  m   )
+
+list( APPEND  DEP_LIBS        
+    lib_aec 
+    ${DEP_LIBS_${CMAKE_SYSTEM_NAME}}
+)
+
+list( APPEND TEST_AUTOGEN_SOURCES ${AUTOGEN_SOURCES} )
+list( APPEND TEST_AUTOGEN_INCLUDES ${AUTOGEN_INCLUDES} )
+
+## Sources
+file( GLOB SOURCES_C  src/*.c )
+file( GLOB SOURCES_XC src/*.xc )
+if ( XCORE )
+    file( GLOB SOURCES_AEC_PROCESS_FRAME  ${SHARED_SRC_PATH}/aec/*.c )
+else()
+    ## Only 1 thread process_frame() builds for x86
+    file( GLOB SOURCES_AEC_PROCESS_FRAME ${SHARED_SRC_PATH}/aec/aec_process_frame_1thread.c )
+endif()
+file( GLOB SOURCES_FILE_UTILS ${SHARED_SRC_PATH}/file_utils/*.c ) 
+file( GLOB_RECURSE XSCOPE_FILEIO_SOURCES ${XSCOPE_FILEIO_PATH}/src/*.c )
+
+unset( SOURCES )
+list( APPEND  SOURCES ${SOURCES_C} ${SOURCES_AEC_PROCESS_FRAME} ${SOURCES_FILE_UTILS} ${AUTOGEN_SOURCES} )
+unset( SOURCES_XCORE )
+list( APPEND SOURCES_XCORE ${SOURCES_XC} ${XSCOPE_FILEIO_SOURCES} )
+list( APPEND SOURCES ${SOURCES_${CMAKE_SYSTEM_NAME}} )
+
+## Includes
+unset( INCLUDES )
+list( APPEND  INCLUDES src ${SHARED_SRC_PATH}/aec ${SHARED_SRC_PATH}/file_utils ${AUTOGEN_DIR} )
+unset( INCLUDES_XCORE )
+list( APPEND INCLUDES_XCORE ${XSCOPE_FILEIO_PATH} ${XSCOPE_FILEIO_PATH}/api )
+list( APPEND INCLUDES ${INCLUDES_${CMAKE_SYSTEM_NAME}} )
+
+# set( XSCOPE_CONFIG config.xscope )
+get_filename_component(XSCOPE_CONFIG config.xscope ABSOLUTE)
+
+## Compile flags
+unset(COMPILE_FLAGS)
+unset(COMPILE_FLAGS_XCORE)
+
+list(APPEND   COMPILE_FLAGS_XCORE  -DTEST_WAV_XSCOPE=1 )
+
+unset(COMPILE_FLAGS_Linux)
+list( APPEND  COMPILE_FLAGS_Linux   ""  )
+
+
+##Linker flags
+unset(LINKER_FLAGS)
+list( APPEND  LINKER_FLAGS  "" )
+
+unset(LINKER_FLAGS_XCORE)
+list( APPEND  LINKER_FLAGS_XCORE  "-target=${XCORE_TARGET}"     )
+list( APPEND  LINKER_FLAGS_XCORE  "-report"                     )
+list( APPEND  LINKER_FLAGS_XCORE  "${XSCOPE_CONFIG}"            )
+
+unset(LINKER_FLAGS_Linux)
+list( APPEND  LINKER_FLAGS_Linux  "" )
+
+list( APPEND  LINKER_FLAGS ${LINKER_FLAGS_${CMAKE_SYSTEM_NAME}} )
+list( APPEND  COMPILE_FLAGS ${COMPILE_FLAGS_${CMAKE_SYSTEM_NAME}} ${LINKER_FLAGS_${CMAKE_SYSTEM_NAME}} )
+
+#########
+## executable output directory
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
+
+add_executable( ${APP_NAME} ${SOURCES} ${SOURCES_C_APP} )
+
+target_include_directories( ${APP_NAME} PRIVATE ${INCLUDES} )
+
+target_link_libraries( ${APP_NAME} ${DEP_LIBS})
+
+target_compile_options( ${APP_NAME} PRIVATE ${COMPILE_FLAGS} )
+
+#(because otherwise the set_target_properties command fails)
+string(REPLACE ";" " " LINKER_FLAGS_STR "${LINKER_FLAGS}")
+set_target_properties( ${APP_NAME} PROPERTIES LINK_FLAGS "${LINKER_FLAGS_STR}" )
+
+if ( XCORE )
+  set_target_properties( ${APP_NAME} PROPERTIES
+      SUFFIX ".xe"
+      LINK_DEPENDS  ${XSCOPE_CONFIG}
+      )
+endif()
diff --git a/test/lib_aec/test_wav_aec/config.xscope b/test/lib_aec/test_wav_aec/config.xscope
new file mode 100644
index 000000000..0d3b65e4c
--- /dev/null
+++ b/test/lib_aec/test_wav_aec/config.xscope
@@ -0,0 +1,10 @@
+<xSCOPEconfig ioMode="basic" enabled="true">
+  <Probe name="open_file" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="read_bytes" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="write_setup" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="write_bytes" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="seek" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="tell" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+  <Probe name="host_quit" type="CONTINUOUS" datatype="UINT" units="Value" enabled="true"/>
+</xSCOPEconfig>
+
diff --git a/test/lib_aec/test_wav_aec/run_xcoreai.py b/test/lib_aec/test_wav_aec/run_xcoreai.py
new file mode 100644
index 000000000..b64f678d3
--- /dev/null
+++ b/test/lib_aec/test_wav_aec/run_xcoreai.py
@@ -0,0 +1,46 @@
+# Copyright 2018-2021 XMOS LIMITED.
+# This Software is subject to the terms of the XMOS Public Licence: Version 1.
+from builtins import range
+import sys
+import os
+import numpy as np
+import scipy.io.wavfile
+# import matplotlib
+# matplotlib.use('TkAgg')
+# import matplotlib.pyplot as plt
+import subprocess as sub
+import xtagctl
+import xscope_fileio
+import argparse
+
+package_dir = os.path.dirname(os.path.abspath(__file__))
+
+def parse_arguments():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("xe", nargs='?',
+                        help=".xe file to run")
+    args = parser.parse_args()
+    return args
+
+
+args = parse_arguments()
+assert args.xe is not None, "Specify vaild .xe file"
+aec_exe = os.path.join(package_dir, 'bin/test_wav_aec_C_app_xcoreai.xe')
+#example code to set runtime config in args.bin
+with open("args.bin", "wb") as fp:
+    fp.write("stop_adapting -1\n".encode('utf-8'))
+    fp.write("adaption_mode 0\n".encode('utf-8'))
+    fp.write("main_filter_phases 10\n".encode('utf-8'))
+    fp.write("shadow_filter_phases 5\n".encode('utf-8'))
+    fp.write("y_channels 2\n".encode('utf-8'))
+    fp.write("x_channels 2\n".encode('utf-8'))
+#Create an empty args.bin file. xscope_open_file() doesn't handle file not present. Ideally, would like
+#to use posix open with O_CREAT flag 
+#fp = open("args.bin", "wb")
+#fp.close()
+
+with xtagctl.acquire("XCORE-AI-EXPLORER") as adapter_id:
+    xscope_fileio.run_on_target(adapter_id, args.xe)
+
+
+
diff --git a/test/lib_aec/test_wav_aec/src/dump_H_hat.c b/test/lib_aec/test_wav_aec/src/dump_H_hat.c
new file mode 100644
index 000000000..6ee032e56
--- /dev/null
+++ b/test/lib_aec/test_wav_aec/src/dump_H_hat.c
@@ -0,0 +1,45 @@
+// Copyright 2017-2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+
+#include "aec_state.h"
+
+#include <stdio.h>
+#include <math.h>
+#include "fileio.h"
+
+void aec_dump_H_hat(aec_state_t *state, file_t *file_handle){
+    char strbuf[1024];
+    sprintf(strbuf, "import numpy as np\n");
+    file_write(file_handle, (uint8_t*)strbuf, strlen(strbuf));
+    sprintf(strbuf, "frame_advance = %u\n", AEC_FRAME_ADVANCE);
+    file_write(file_handle, (uint8_t*)strbuf,  strlen(strbuf));
+    sprintf(strbuf, "y_channel_count = %u\n", state->shared_state->num_y_channels);
+    file_write(file_handle, (uint8_t*)strbuf,  strlen(strbuf));
+    sprintf(strbuf, "x_channel_count = %u\n", state->shared_state->num_x_channels);
+    file_write(file_handle, (uint8_t*)strbuf,  strlen(strbuf));
+    sprintf(strbuf, "max_phase_count = %u\n", state->num_phases);
+    file_write(file_handle, (uint8_t*)strbuf,  strlen(strbuf));
+    sprintf(strbuf, "f_bin_count = %u\n", state->H_hat[0][0].length);
+    file_write(file_handle, (uint8_t*)strbuf,  strlen(strbuf));
+    sprintf(strbuf, "H_hat = np.zeros((y_channel_count, x_channel_count, max_phase_count, f_bin_count), dtype=np.complex128)\n");
+    file_write(file_handle, (uint8_t*)strbuf,  strlen(strbuf));
+
+    for(int ych=0; ych<state->shared_state->num_y_channels; ych++) {        
+        for(int xch=0; xch<state->shared_state->num_x_channels; xch++) {
+            for(int ph=0; ph<state->num_phases; ph++) {
+                sprintf(strbuf, "H_hat[%u][%u][%u] = ", ych, xch, ph);
+                file_write(file_handle, (uint8_t*)strbuf,  strlen(strbuf));
+                sprintf(strbuf, "np.asarray([");
+                file_write(file_handle, (uint8_t*)strbuf,  strlen(strbuf));
+                for(int i=0; i<state->H_hat[ych][xch*state->num_phases + ph].length; i++) {
+                    sprintf(strbuf, "%.12f + %.12fj, ", ldexp( state->H_hat[ych][xch*state->num_phases + ph].data[i].re, state->H_hat[ych][xch*state->num_phases + ph].exp),
+                    ldexp( state->H_hat[ych][xch*state->num_phases + ph].data[i].im, state->H_hat[ych][xch*state->num_phases + ph].exp));
+                    file_write(file_handle, (uint8_t*)strbuf,  strlen(strbuf));
+                }
+                sprintf(strbuf, "])\n");
+                file_write(file_handle, (uint8_t*)strbuf,  strlen(strbuf));
+            }
+        }
+    }
+}
+
diff --git a/test/lib_aec/test_wav_aec/src/dump_H_hat.h b/test/lib_aec/test_wav_aec/src/dump_H_hat.h
new file mode 100644
index 000000000..825cbe34e
--- /dev/null
+++ b/test/lib_aec/test_wav_aec/src/dump_H_hat.h
@@ -0,0 +1,12 @@
+// Copyright 2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#ifndef __DUMP_H_HAT_H__
+#define __DUMP_H_HAT_H__
+
+/*
+ * This is designed for non-real time dumping of the H_hat filter
+ * for reconstruction in python.
+ */
+void aec_dump_H_hat(aec_state_t *state, file_t *file_handle);
+
+#endif
diff --git a/test/lib_aec/test_wav_aec/src/test_wav_aec.c b/test/lib_aec/test_wav_aec/src/test_wav_aec.c
new file mode 100644
index 000000000..b5e3c0a3f
--- /dev/null
+++ b/test/lib_aec/test_wav_aec/src/test_wav_aec.c
@@ -0,0 +1,284 @@
+// Copyright 2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#if !X86_BUILD
+#ifdef __XC__
+    #define chanend_t chanend
+#else
+    #include <xcore/chanend.h>
+#endif
+#include <platform.h>
+#include <print.h>
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <limits.h>
+
+#include "aec_config.h"
+#include "aec_task_distribution.h"
+#include "aec_defines.h"
+#include "aec_api.h"
+#include "aec_memory_pool.h"
+#include "fileio.h"
+#include "wav_utils.h"
+#include "dump_H_hat.h"
+
+#if PROFILE_PROCESSING
+#include "profile.h"
+#endif
+
+extern void aec_process_frame_1thread(
+        aec_state_t *main_state,
+        aec_state_t *shadow_state,
+        int32_t (*output_main)[AEC_FRAME_ADVANCE],
+        int32_t (*output_shadow)[AEC_FRAME_ADVANCE],
+        const int32_t (*y_data)[AEC_FRAME_ADVANCE],
+        const int32_t (*x_data)[AEC_FRAME_ADVANCE]);
+
+extern void aec_process_frame_2threads(
+        aec_state_t *main_state,
+        aec_state_t *shadow_state,
+        int32_t (*output_main)[AEC_FRAME_ADVANCE],
+        int32_t (*output_shadow)[AEC_FRAME_ADVANCE],
+        const int32_t (*y_data)[AEC_FRAME_ADVANCE],
+        const int32_t (*x_data)[AEC_FRAME_ADVANCE]);
+
+#define ARG_NOT_SPECIFIED (-1)
+typedef enum {
+    Y_CHANNELS,
+    X_CHANNELS,
+    MAIN_FILTER_PHASES,
+    SHADOW_FILTER_PHASES,
+    ADAPTION_MODE,
+    FORCE_ADAPTION_MU,
+    STOP_ADAPTING,
+    NUM_RUNTIME_ARGS
+}runtime_args_indexes_t;
+
+int runtime_args[NUM_RUNTIME_ARGS];
+
+//valid_tokens_str entries and runtime_args_indexes_t need to maintain the same order so that when a runtime argument token string matches index 'i' string in valid_tokens_str, the corresponding
+//value can be updated in runtime_args[i]
+const char *valid_tokens_str[] = {"y_channels", "x_channels", "main_filter_phases", "shadow_filter_phases", "adaption_mode", "force_adaption_mu", "stop_adapting"}; //TODO autogenerate from runtime_args_indexes_t
+
+#define MAX_ARGS_BUF_SIZE (1024)
+void parse_runtime_args(int *runtime_args_arr) {
+    file_t args_file;
+    int ret = file_open(&args_file, "args.bin", "rb");
+    if(ret != 0) {
+        return;
+    }
+    char readbuf[MAX_ARGS_BUF_SIZE];
+
+    int args_file_size = get_file_size(&args_file);
+    printf("args_file_size = %d\n",args_file_size);
+    if(!args_file_size) {
+        file_close(&args_file);
+        return;
+    }
+    file_read(&args_file, (uint8_t*)readbuf, args_file_size);
+    readbuf[args_file_size] = '\0';
+
+    //printf("args %s\n",readbuf);
+    char *c = strtok(readbuf, "\n");
+    while(c != NULL) {
+        char token_str[100];
+        int token_val;
+        sscanf(c, "%s %d", token_str, &token_val);
+        //printf("token %s\n",c);
+        for(int i=0; i<sizeof(valid_tokens_str)/sizeof(valid_tokens_str[0]); i++) {
+            if(strcmp(valid_tokens_str[i], token_str) == 0) {
+                //printf("found token %s, value %d\n", valid_tokens_str[i], token_val);
+                runtime_args_arr[i] = token_val;
+            }
+        }
+        //printf("str %s val %d\n",token_str, token_val);
+        c = strtok(NULL, "\n");
+    }
+}
+
+#define Q1_30(f) ((int32_t)((double)(INT_MAX>>1) * f)) //TODO use lib_xs3_math use_exponent instead
+void aec_task(const char *input_file_name, const char *output_file_name) {
+    //check validity of compile time configuration
+    assert(AEC_MAX_Y_CHANNELS <= AEC_LIB_MAX_Y_CHANNELS);
+    assert(AEC_MAX_X_CHANNELS <= AEC_LIB_MAX_X_CHANNELS);
+    assert((AEC_MAX_Y_CHANNELS * AEC_MAX_X_CHANNELS * AEC_MAIN_FILTER_PHASES) <= (AEC_LIB_MAX_PHASES));
+    assert((AEC_MAX_Y_CHANNELS * AEC_MAX_X_CHANNELS * AEC_SHADOW_FILTER_PHASES) <= (AEC_LIB_MAX_PHASES));
+    //Initialise default values of runtime arguments
+    runtime_args[Y_CHANNELS] = AEC_MAX_Y_CHANNELS;
+    runtime_args[X_CHANNELS] = AEC_MAX_X_CHANNELS;
+    runtime_args[MAIN_FILTER_PHASES] = AEC_MAIN_FILTER_PHASES;
+    runtime_args[SHADOW_FILTER_PHASES] = AEC_SHADOW_FILTER_PHASES;
+    runtime_args[ADAPTION_MODE] = AEC_ADAPTION_AUTO; //TODO Hardcoded!
+    runtime_args[FORCE_ADAPTION_MU] = Q1_30(1.0); //TODO Hardcoded
+    runtime_args[STOP_ADAPTING] = -1;
+    parse_runtime_args(runtime_args);
+    printf("runtime args = ");
+    for(int i=0; i<NUM_RUNTIME_ARGS; i++) {
+        printf("%d ",runtime_args[i]);
+    }
+    printf("\n");
+
+    //Check validity of runtime configuration
+    assert(runtime_args[Y_CHANNELS] <= AEC_MAX_Y_CHANNELS);
+    assert(runtime_args[X_CHANNELS] <= AEC_MAX_X_CHANNELS);
+    assert((runtime_args[Y_CHANNELS] * runtime_args[X_CHANNELS] * runtime_args[MAIN_FILTER_PHASES]) <= (AEC_LIB_MAX_PHASES));
+    assert((runtime_args[Y_CHANNELS] * runtime_args[X_CHANNELS] * runtime_args[SHADOW_FILTER_PHASES]) <= (AEC_LIB_MAX_PHASES));
+    
+    //open files
+    file_t input_file, output_file, H_hat_file, delay_file;
+    int ret = file_open(&input_file, input_file_name, "rb");
+    assert((!ret) && "Failed to open file");
+    ret = file_open(&output_file, output_file_name, "wb");
+    assert((!ret) && "Failed to open file");
+    ret = file_open(&H_hat_file, "H_hat.bin", "wb");
+    assert((!ret) && "Failed to open file");
+    ret = file_open(&delay_file, "delay.bin", "wb");
+    assert((!ret) && "Failed to open file");
+
+    wav_header input_header_struct, output_header_struct;
+    unsigned input_header_size;
+    if(get_wav_header_details(&input_file, &input_header_struct, &input_header_size) != 0){
+        printf("error in att_get_wav_header_details()\n");
+        _Exit(1);
+    }
+    file_seek(&input_file, input_header_size, SEEK_SET);
+    if(input_header_struct.bit_depth != 32)
+     {
+         printf("Error: unsupported wav bit depth (%d) for %s file. Only 32 supported\n", input_header_struct.bit_depth, input_file_name);
+         _Exit(1);
+     }
+
+    if(input_header_struct.num_channels != (AEC_MAX_Y_CHANNELS+AEC_MAX_X_CHANNELS)){
+        printf("Error: wav num channels(%d) does not match aec(%u)\n", input_header_struct.num_channels, (AEC_MAX_Y_CHANNELS+AEC_MAX_X_CHANNELS));
+        _Exit(1);
+    }
+    
+
+    unsigned frame_count = wav_get_num_frames(&input_header_struct);
+
+    unsigned block_count = frame_count / AEC_FRAME_ADVANCE;
+    //printf("num frames = %d\n",block_count);
+    wav_form_header(&output_header_struct,
+            input_header_struct.audio_format,
+            AEC_MAX_Y_CHANNELS,
+            input_header_struct.sample_rate,
+            input_header_struct.bit_depth,
+            block_count*AEC_FRAME_ADVANCE);
+
+    file_write(&output_file, (uint8_t*)(&output_header_struct),  WAV_HEADER_BYTES);
+
+    int32_t input_read_buffer[AEC_FRAME_ADVANCE * (AEC_MAX_Y_CHANNELS + AEC_MAX_X_CHANNELS)] = {0};
+    int32_t output_write_buffer[AEC_FRAME_ADVANCE * (AEC_MAX_Y_CHANNELS)];
+
+    int32_t DWORD_ALIGNED frame_y[AEC_MAX_Y_CHANNELS][AEC_FRAME_ADVANCE];
+    int32_t DWORD_ALIGNED frame_x[AEC_MAX_X_CHANNELS][AEC_FRAME_ADVANCE];
+    unsigned bytes_per_frame = wav_get_num_bytes_per_frame(&input_header_struct);
+
+    //Start AEC
+    prof(0, "start_aec_init");
+    uint8_t DWORD_ALIGNED aec_memory_pool[sizeof(aec_memory_pool_t)];
+    uint8_t DWORD_ALIGNED aec_shadow_filt_memory_pool[sizeof(aec_shadow_filt_memory_pool_t)]; 
+    aec_state_t DWORD_ALIGNED main_state;
+    aec_state_t DWORD_ALIGNED shadow_state;
+    aec_shared_state_t DWORD_ALIGNED aec_shared_state;
+    
+    aec_init(&main_state, &shadow_state, &aec_shared_state,
+            &aec_memory_pool[0], &aec_shadow_filt_memory_pool[0],
+            runtime_args[Y_CHANNELS], runtime_args[X_CHANNELS],
+            runtime_args[MAIN_FILTER_PHASES], runtime_args[SHADOW_FILTER_PHASES]);
+    prof(1, "end_aec_init"); 
+
+    main_state.shared_state->config_params.coh_mu_conf.adaption_config = runtime_args[ADAPTION_MODE];
+    main_state.shared_state->config_params.coh_mu_conf.force_adaption_mu_q30 = runtime_args[FORCE_ADAPTION_MU];
+
+    for(unsigned b=0;b<block_count;b++){
+        //printf("frame %d\n",b);
+        long input_location =  wav_get_frame_start(&input_header_struct, b * AEC_FRAME_ADVANCE, input_header_size);
+        file_seek (&input_file, input_location, SEEK_SET);
+        file_read (&input_file, (uint8_t*)&input_read_buffer[0], bytes_per_frame* AEC_FRAME_ADVANCE);
+        for(unsigned f=0; f<AEC_FRAME_ADVANCE; f++){
+            for(unsigned ch=0;ch<runtime_args[Y_CHANNELS];ch++){
+                unsigned i =(f * (AEC_MAX_Y_CHANNELS+AEC_MAX_X_CHANNELS)) + ch;
+                frame_y[ch][f] = input_read_buffer[i];
+            }
+            for(unsigned ch=0;ch<runtime_args[X_CHANNELS];ch++){
+                unsigned i =(f * (AEC_MAX_Y_CHANNELS+AEC_MAX_X_CHANNELS)) + AEC_MAX_Y_CHANNELS + ch;
+                frame_x[ch][f] = input_read_buffer[i];
+            }
+        }
+        if (runtime_args[STOP_ADAPTING] > 0) {
+            runtime_args[STOP_ADAPTING]--;
+            if (runtime_args[STOP_ADAPTING] == 0) {
+                aec_dump_H_hat(&main_state, &H_hat_file);
+                //turn off adaption
+                main_state.shared_state->config_params.coh_mu_conf.adaption_config = AEC_ADAPTION_FORCE_OFF;
+            }
+        }
+        prof(2, "start_aec_process_frame");
+        // Call AEC functions to process AEC_FRAME_ADVANCE new samples of data
+        /* Resuse mic data memory for main filter output
+         * Reuse ref data memory for shadow filter output
+         */ 
+#if (AEC_THREAD_COUNT == 1)
+        aec_process_frame_1thread(&main_state, &shadow_state, frame_y, frame_x, frame_y, frame_x);
+#elif (AEC_THREAD_COUNT == 2)
+        aec_process_frame_2threads(&main_state, &shadow_state, frame_y, frame_x, frame_y, frame_x);
+#else
+        #error "C app only supported for AEC_THREAD_COUNT range [1, 2]"
+#endif
+        prof(3, "end_aec_process_frame");
+
+        prof(4, "start_aec_estimate_delay");
+        int delay = aec_estimate_delay(&main_state.shared_state->delay_estimator_params, main_state.H_hat[0], main_state.num_phases); //Delay is estimated using 1 x-y pair
+        prof(5, "end_aec_estimate_delay");
+
+        char strbuf[100];
+        sprintf(strbuf, "%d\n", delay);
+        file_write(&delay_file, (uint8_t*)strbuf,  strlen(strbuf));
+
+        for (unsigned ch=0;ch<runtime_args[Y_CHANNELS];ch++){
+            for(unsigned i=0;i<AEC_FRAME_ADVANCE;i++){
+                output_write_buffer[i*(AEC_MAX_Y_CHANNELS) + ch] = frame_y[ch][i];
+            }
+        }
+
+        file_write(&output_file, (uint8_t*)(output_write_buffer), output_header_struct.bit_depth/8 * AEC_FRAME_ADVANCE * AEC_MAX_Y_CHANNELS);
+
+        print_prof(0,6,b+1);
+    }
+    file_close(&input_file);
+    file_close(&output_file);
+    file_close(&H_hat_file);
+    file_close(&delay_file);
+    shutdown_session();
+}
+
+
+#if !X86_BUILD
+void main_tile1(chanend_t c_cross_tile)
+{
+    //Do nothing
+}
+
+#define IN_WAV_FILE_NAME    "input.wav"
+#define OUT_WAV_FILE_NAME   "output.wav"
+void main_tile0(chanend_t c_cross_tile, chanend_t xscope_chan)
+{
+#if TEST_WAV_XSCOPE
+    xscope_io_init(xscope_chan);
+#endif 
+    aec_task(IN_WAV_FILE_NAME, OUT_WAV_FILE_NAME);
+}
+#else //Linux build
+int main(int argc, char **argv) {
+    if(argc < 3) {
+        printf("Arguments missing. Expected: <input file name> <output file name>\n");
+        assert(0);
+    }
+    aec_task(argv[1], argv[2]);
+    return 0;
+}
+#endif
diff --git a/test/lib_aec/test_wav_aec/src/top_level.xc b/test/lib_aec/test_wav_aec/src/top_level.xc
new file mode 100644
index 000000000..bf93caaf1
--- /dev/null
+++ b/test/lib_aec/test_wav_aec/src/top_level.xc
@@ -0,0 +1,51 @@
+// Copyright 2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include <platform.h>
+#include <xs1.h>
+#include <xscope.h>
+#include <stdlib.h>
+#include "aec_task_distribution.h"
+#ifdef __XC__
+#define chanend_t chanend
+#else
+#include <xcore/chanend.h>
+#endif
+
+extern "C" {
+#include "xs3_math.h"
+void main_tile0(chanend, chanend);
+void main_tile1(chanend);
+}
+void burn_div() {
+    unsafe {
+    while(1) {
+        float_s32_t a, b, c;
+        a = double_to_float_s32(5.678765);
+        b = double_to_float_s32(3.5667);
+        volatile float_s32_t * unsafe p = &c;
+        for(int i=0; i<32; i++) {
+            *p = float_s32_div(b, a);
+            b = *p;
+        }
+    }
+    }
+}
+
+int main (void)
+{
+  chan c_cross_tile, xscope_chan;
+  par
+  {
+#if TEST_WAV_XSCOPE
+    xscope_host_data(xscope_chan);
+#endif
+    on tile[0]: {
+            {
+            main_tile0(c_cross_tile, xscope_chan);
+            _Exit(0);
+            }
+    }
+    on tile[1]: main_tile1(c_cross_tile);
+  }
+  return 0;
+}
diff --git a/test/lib_aec/wav_test_functions.py b/test/lib_aec/wav_test_functions.py
new file mode 100644
index 000000000..1a185ab20
--- /dev/null
+++ b/test/lib_aec/wav_test_functions.py
@@ -0,0 +1,115 @@
+import numpy as np
+
+def disco_check(h, phases, frame_advance):
+    ''' discontinuity checker
+    We expect the samples at the end of the frame to be a similar magnitude to those in the middle
+    If the samples at the edges have a much larger magnitude, this indicates that there are likely 
+    discontinuities at the frame boundaries.
+    '''
+    edge_ratio = np.zeros(phases-1)
+    for p in range(phases-1):
+        edge = frame_advance*(p+1) # filter samples on frame edges
+        mid = int(frame_advance*(p+0.5)) # filter samples in middle of frame
+        edge_ratio[p] = np.mean(np.abs(h[edge-1:edge+2]))/np.mean(np.abs(h[mid-1:mid+2]))
+
+    if np.mean(edge_ratio) > 5:
+        print("Failed discontinuity check, score %f"%np.mean(edge_ratio))
+        return False
+    else:
+        print("Passed discontinuity check, score %f"%np.mean(edge_ratio))
+        return True
+
+
+def deconverge_check(in_leq, out_leq):
+    ''' deconvergence checker
+    Check that there is some attenuation of the input signal
+    '''
+    atten = out_leq[-1] - in_leq[-1]
+    if atten > -10:
+        print("Failed deconvergence check, atten %f dB"%atten)
+        return False
+    else:
+        print("Passed deconvergence check, atten %f dB"%atten)
+        return True
+
+
+def calc_attenuation_time(time, output, target_attenuation):
+    attenuation = output - output[0]
+    target_idx = np.argmax(attenuation < target_attenuation)
+    atten_time = time[target_idx]
+
+    print("Time to %d dB attenuation is %f s"%(target_attenuation, atten_time))
+    return atten_time
+
+
+def calc_convergence_rate(time, output):
+
+    idx_2 = np.searchsorted(time, 2)
+    convergence_rate = (output[0]-output[idx_2])/2.0
+
+    print("Convergence rate is %f dB/s"%convergence_rate)
+    return convergence_rate
+
+
+def calc_max_attenuation(output):
+    attenuation = output - output[0]
+    max_atten = np.min(attenuation)
+    print("Max attenuation is %f dB"%(max_atten))
+    return max_atten
+
+def calc_atten_difference(output1, output2, start_ind, stop_ind):
+    atten1 = output1 - output1[0]
+    atten2 = output2 - output2[0]
+    diff = np.mean(abs(atten1[start_ind:stop_ind] - atten2[start_ind:stop_ind]))
+    print("Average attenuation difference is %f dB"%(diff))
+    return diff
+
+
+def calc_deconvergence(output, fs, stop_adapt_samples, restart_adapt_samples):
+    attenuation = output - output[0]
+    win_len = int(fs * 0.05)
+    in_ind = stop_adapt_samples // win_len - 2
+    out_ind = restart_adapt_samples // win_len + 2
+    in_atten = attenuation[in_ind]
+    out_atten = attenuation[out_ind]
+    atten_diff = abs(in_atten - out_atten)
+    atten_percent = atten_diff/abs(in_atten)*100
+    print('Deconvergence is %f dB'%(atten_diff))
+    return atten_diff, atten_percent
+
+
+def leq(x):
+    return 10 * np.log10(np.mean(x ** 2.0))
+
+def leq_smooth(x, fs, T):
+    len_x = x.shape[0]
+    win_len = int(fs * T)
+    win_count = len_x // win_len
+    len_y = win_len * win_count
+
+    y = np.reshape(x[:len_y], (win_len, win_count), 'F')
+
+    leq = 10 * np.log10(np.mean(y ** 2.0, axis=0))
+    t = np.arange(win_count) * T
+
+    return t, leq
+
+def make_impulse(RT, t=None, fs=None):
+    scale = 0.005
+    scale_noise = 0.00005
+    a = 3.0 * np.log(10.0) / RT
+    if t is None:
+        t = np.arange(2.0*RT*fs) / fs
+    N = t.shape[0]
+    h = np.zeros(N)
+    e = np.exp(-a*t)
+    reflections = N // 100
+    reflection_index = np.random.randint(N, size=reflections)
+    for n, idx in enumerate(reflection_index):
+        if n % 2 == 0:
+            flip = 1
+        else:
+            flip = -1
+        h[idx] = flip * scale * t[idx] * e[idx]
+    h += scale_noise * np.random.randn(t.shape[0]) * e
+    return h
diff --git a/test/lib_agc/CMakeLists.txt b/test/lib_agc/CMakeLists.txt
new file mode 100644
index 000000000..29c2d48ed
--- /dev/null
+++ b/test/lib_agc/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectory(test_process_frame)
diff --git a/test/lib_agc/test_process_frame/CMakeLists.txt b/test/lib_agc/test_process_frame/CMakeLists.txt
new file mode 100644
index 000000000..3daf9bf9a
--- /dev/null
+++ b/test/lib_agc/test_process_frame/CMakeLists.txt
@@ -0,0 +1,64 @@
+#copy conftest.py in the build directory since pytest_collect_file only collects tests from the directory tree where conftest.py is present
+configure_file( conftest.py conftest.py COPYONLY )
+
+## Sources
+# Unity
+set(UNITY_PATH ${DEPS_ROOT}/Unity/src)
+file(GLOB UNITY_SOURCES ${UNITY_PATH}/*.c)
+
+## executable output directory
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
+
+# Set unity runner generate script
+set(GEN_RUNNER_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/generate_unity_runner.py)
+
+# Create directory for runner files
+set(RUNNERS_DIR ${CMAKE_CURRENT_BINARY_DIR}/src.runners)
+file(MAKE_DIRECTORY ${RUNNERS_DIR})
+
+file(GLOB TEST_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/src/*.c)
+
+# For every source file in aec_unit_tests/src
+foreach(testfile ${TEST_SOURCES})
+    get_filename_component(TESTNAME ${testfile} NAME_WLE)
+
+    # Create runner file directory
+    file(MAKE_DIRECTORY ${RUNNERS_DIR}/${TESTNAME})
+
+    #########
+    ## Create runner file
+    set(RUNNER_FILE ${RUNNERS_DIR}/${TESTNAME}/${TESTNAME}_Runner.c)
+    set(GEN_RUNNER_SCRIPT_BYPRODUCTS ${RUNNER_FILE})
+
+    unset(GEN_RUNNER_SCRIPT_ARGS)
+    list(APPEND GEN_RUNNER_SCRIPT_ARGS --project-root ${DEPS_ROOT})
+    list(APPEND GEN_RUNNER_SCRIPT_ARGS --source-file ${testfile} )
+    list(APPEND GEN_RUNNER_SCRIPT_ARGS --runner-file ${RUNNER_FILE})
+
+    ## Add command to generate runner file
+    add_custom_command(
+        OUTPUT ${RUNNER_FILE}
+        COMMAND ${Python3_EXECUTABLE} ${GEN_RUNNER_SCRIPT} ${GEN_RUNNER_SCRIPT_ARGS}
+        COMMENT "Generating AGC runner")
+
+    #########
+    ## Add a build target
+    add_executable(${TESTNAME} ${testfile} ${RUNNER_FILE} ${UNITY_SOURCES} ${CMAKE_CURRENT_SOURCE_DIR}/../../shared/pseudo_rand/pseudo_rand.c)
+
+    target_include_directories(${TESTNAME} PRIVATE src
+                                                   ${UNITY_PATH}
+                                                   ${CMAKE_CURRENT_SOURCE_DIR}/../../shared/pseudo_rand)
+
+    target_link_libraries(${TESTNAME} lib_agc lib_xs3_math)
+
+    if(XCORE)
+        target_compile_options(${TESTNAME} PRIVATE "-DUNITY_SUPPORT_64"
+                                                   "-Wno-xcore-fptrgroup"
+                                                   "-report"
+                                                   "-DSPEEDUP_FACTOR=${TEST_SPEEDUP_FACTOR}")
+        target_link_options(${TESTNAME} PRIVATE "-target=${XCORE_TARGET}")
+        set_target_properties(${TESTNAME} PROPERTIES SUFFIX ".xe")
+    else()
+        target_link_libraries(${TESTNAME} m)
+    endif(XCORE)
+endforeach(testfile ${TEST_SOURCES})
diff --git a/test/lib_agc/test_process_frame/conftest.py b/test/lib_agc/test_process_frame/conftest.py
new file mode 100644
index 000000000..ccbd94ab9
--- /dev/null
+++ b/test/lib_agc/test_process_frame/conftest.py
@@ -0,0 +1,101 @@
+# Copyright 2022 XMOS LIMITED.
+# This Software is subject to the terms of the XMOS Public Licence: Version 1.
+import pytest
+import subprocess
+import xtagctl
+
+
+def pytest_collect_file(parent, path):
+    if(path.ext == ".xe"):
+        print('path = ', path)
+        return UnityTestSource.from_parent(parent, fspath=path)
+
+class UnityTestSource(pytest.File):
+    def collect(self):
+        # Find the binary built from the runner for this test file
+        #
+        # Assume the following directory layout:
+        # unit_tests/       <- Test root directory
+        # |-- bin/          <- Compiled binaries of the test runners
+        # |-- conftest.py   <- This file
+        # |-- runners/      <- Auto-generated buildable source of test binaries
+        # |-- src/          <- Unity test functions
+        print("self.name ", self.fspath)
+        yield UnityTestExecutable.from_parent(self, fspath=self.fspath, name=self.name)
+
+
+class UnityTestExecutable(pytest.Item):
+    def __init__(self, fspath, name, parent):
+        super(UnityTestExecutable, self).__init__(name, parent)
+        self.fspath = fspath
+        self._nodeid = self.name  # Override the naming to suit C better
+
+    def runtest(self):
+        # Run the binary in the simulator
+        simulator_fail = False
+        test_output = None
+        try:
+            print("run xrun for executable ", self.fspath)
+            with xtagctl.acquire("XCORE-AI-EXPLORER") as adapter_id:
+                test_output = subprocess.check_output(['xrun', '--io', '--adapter-id', adapter_id, self.fspath], text=True, stderr=subprocess.STDOUT)
+        except subprocess.CalledProcessError as e:
+            # Unity exits non-zero if an assertion fails
+            simulator_fail = True
+            test_output = e.output
+
+        # Parse the Unity output
+        unity_pass = False
+        test_output = test_output.split('\n')
+        for line in test_output:
+            if 'test' in line:
+                test_report = line.split(':')
+                # Unity output is as follows:
+                #   <test_source>:<line_number>:<test_case>:PASS
+                #   <test_source>:<line_number>:<test_case>:FAIL:<failure_reason>
+                test_source = test_report[0]
+                line_number = test_report[1]
+                test_case = test_report[2]
+                result = test_report[3]
+                failure_reason = None
+                print(('\n {}()'.format(test_case)), end=' ')
+                if result == 'PASS':
+                    unity_pass = True
+                    continue
+                if result == 'FAIL':
+                    failure_reason = test_report[4]
+                    print('')  # Insert line break after test_case print
+                    raise UnityTestException(self, {'test_source': test_source,
+                                                    'line_number': line_number,
+                                                    'test_case': test_case,
+                                                    'failure_reason':
+                                                        failure_reason})
+
+        if simulator_fail:
+            raise Exception(self, "Simulation failed.")
+        if not unity_pass:
+            raise Exception(self, "Unity test output not found.")
+        print('')  # Insert line break after final test_case which passed
+
+    def repr_failure(self, excinfo):
+        if isinstance(excinfo.value, UnityTestException):
+            return '\n'.join([str(self.parent).strip('<>'),
+                              '{}:{}:{}()'.format(
+                                    excinfo.value[1]['test_source'],
+                                    excinfo.value[1]['line_number'],
+                                    excinfo.value[1]['test_case']),
+                              'Failure reason:',
+                              excinfo.value[1]['failure_reason']])
+        else:
+            return str(excinfo.value)
+
+    def reportinfo(self):
+        # It's not possible to give sensible line number info for an executable
+        # so we return it as 0.
+        #
+        # The source line number will instead be recovered from the Unity print
+        # statements.
+        return self.fspath, 0, self.name
+
+
+class UnityTestException(Exception):
+    pass
diff --git a/test/lib_agc/test_process_frame/generate_unity_runner.py b/test/lib_agc/test_process_frame/generate_unity_runner.py
new file mode 100644
index 000000000..8bd92a374
--- /dev/null
+++ b/test/lib_agc/test_process_frame/generate_unity_runner.py
@@ -0,0 +1,27 @@
+# Copyright 2022 XMOS LIMITED.
+# This Software is subject to the terms of the XMOS Public Licence: Version 1.
+import subprocess
+import sys
+import argparse
+from pathlib import Path
+
+def parse_arguments():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--project-root", nargs='?', help="Project root directory")
+    parser.add_argument("--source-file", nargs='?', help="source file.")
+    parser.add_argument("--runner-file", nargs='?', help="runner file.")
+    args = parser.parse_args()
+    return args
+
+if __name__ == "__main__":
+    args = parse_arguments()
+
+    print(f"in python: root {args.project_root}, source {args.source_file}, runner {args.runner_file}")
+
+    runner_generator = Path(args.project_root) / 'Unity' / 'auto' / 'generate_test_runner.rb'
+
+    try:
+        subprocess.check_call(['ruby', runner_generator, args.source_file, args.runner_file])
+    except OSError as e:
+        print("Ruby generator failed\n\t{}".format(e), file=sys.stderr)
+        exit(1)
diff --git a/test/lib_agc/test_process_frame/pytest.ini b/test/lib_agc/test_process_frame/pytest.ini
new file mode 100644
index 000000000..514146f60
--- /dev/null
+++ b/test/lib_agc/test_process_frame/pytest.ini
@@ -0,0 +1,2 @@
+[pytest]
+testpaths = ../../../build/test/lib_agc/test_process_frame
diff --git a/test/lib_agc/test_process_frame/src/test_input_output.c b/test/lib_agc/test_process_frame/src/test_input_output.c
new file mode 100644
index 000000000..ceabfda7e
--- /dev/null
+++ b/test/lib_agc/test_process_frame/src/test_input_output.c
@@ -0,0 +1,63 @@
+// Copyright 2022 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include "test_process_frame.h"
+#include <bfp_math.h>
+#include <pseudo_rand.h>
+
+// This test checks that agc_process_frame() can be safely performed in-place on the input
+// array, and additionally that the input array is not altered when a separate output array
+// is provided. Two identically configured AGC instances are used for this test. For each
+// iteration, a random frame of data is generated, and stored in an array that will not be
+// passed to either instance. Then one AGC operates in-place on a copy of the input, and
+// the other writes its output into a new buffer. The output from the in-place operation
+// must match the other output, and the input to the non-in-place must be unchanged when
+// compared with the original input buffer that was stored.
+
+void test_input_output() {
+    int32_t input[AGC_FRAME_ADVANCE];
+    int32_t input0[AGC_FRAME_ADVANCE];
+    int32_t input1[AGC_FRAME_ADVANCE];
+    int32_t output1[AGC_FRAME_ADVANCE];
+    bfp_s32_t input_bfp;
+
+    bfp_s32_init(&input_bfp, input, FRAME_EXP, AGC_FRAME_ADVANCE, 0);
+
+    // Config and meta-data can be shared between AGC instances
+    agc_config_t conf = AGC_PROFILE_COMMS;
+    conf.lc_enabled = 1;
+    agc_meta_data_t md;
+
+    agc_state_t agc0;
+    agc_init(&agc0, &conf);
+
+    agc_state_t agc1;
+    agc_init(&agc1, &conf);
+
+    // Random seed
+    unsigned seed = 34090;
+
+    for (unsigned iter = 0; iter < (1<<12)/F; ++iter) {
+        for (unsigned idx = 0; idx < AGC_FRAME_ADVANCE; ++idx) {
+            input[idx] = pseudo_rand_int32(&seed);
+            input0[idx] = input[idx];
+            input1[idx] = input[idx];
+        }
+
+        // Random scale from zero to one
+        float_s32_t scale = {pseudo_rand_uint32(&seed), -32};
+        bfp_s32_headroom(&input_bfp);
+        float_s32_t in_power = float_s64_to_float_s32(bfp_s32_energy(&input_bfp));
+
+        // Set meta-data to random values
+        md.vad_flag = pseudo_rand_uint8(&seed) & 1;    // Boolean
+        md.aec_ref_power = float_s32_mul(in_power, scale);
+        md.aec_corr_factor = (float_s32_t){pseudo_rand_uint32(&seed), -32};
+
+        agc_process_frame(&agc0, input0, input0, &md);
+
+        agc_process_frame(&agc1, output1, input1, &md);
+
+        TEST_ASSERT_EQUAL_INT32_ARRAY(input, input1, AGC_FRAME_ADVANCE);
+        TEST_ASSERT_EQUAL_INT32_ARRAY(output1, input0, AGC_FRAME_ADVANCE);
+    }
+}
diff --git a/test/lib_agc/test_process_frame/src/test_lc_transitions.c b/test/lib_agc/test_process_frame/src/test_lc_transitions.c
new file mode 100644
index 000000000..608b4ce91
--- /dev/null
+++ b/test/lib_agc/test_process_frame/src/test_lc_transitions.c
@@ -0,0 +1,138 @@
+// Copyright 2022 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include "test_process_frame.h"
+#include <bfp_math.h>
+#include <pseudo_rand.h>
+
+// Frames of random data are processed by an AGC instance, which has its meta-data set to
+// transition to a particular loss-control scenario: near-end speech, far-end speech,
+// double-talk or silence. The loss-control is expected to transition to the new state
+// within a fixed number of frames.
+//
+// Transitions between every pair of states is tested, except near-end to far-end, and
+// double-talk to far-end. The code in the AGC implementation doesn't appear to be able
+// to transition between these states without a period of silence between (Issue #111).
+
+// Expect the LC state to transition within this number of frames
+#define TRANSITION_FRAMES 50
+
+struct lc_test_params {
+    float correlation;    // The aec_corr_factor to set in the AGC meta-data
+    float power_scale;    // Proportion of the total frame energy that is set as the far power
+    float silence_scale;  // Factor to scale the input frame as "silence" requires a small input
+};
+
+#define PARAMS_NEAR (struct lc_test_params){ \
+    .correlation = TEST_LC_NEAR_CORR, \
+    .power_scale = TEST_LC_NEAR_POWER_SCALE, \
+    .silence_scale = TEST_LC_NON_SILENCE_SCALE \
+    }
+
+#define PARAMS_FAR (struct lc_test_params){ \
+    .correlation = TEST_LC_FAR_CORR, \
+    .power_scale = TEST_LC_FAR_POWER_SCALE, \
+    .silence_scale = TEST_LC_NON_SILENCE_SCALE \
+    }
+
+#define PARAMS_DOUBLE_TALK (struct lc_test_params){ \
+    .correlation = TEST_LC_DT_CORR, \
+    .power_scale = TEST_LC_DT_POWER_SCALE, \
+    .silence_scale = TEST_LC_NON_SILENCE_SCALE \
+    }
+
+#define PARAMS_SILENCE (struct lc_test_params){ \
+    .correlation = TEST_LC_SILENCE_CORR, \
+    .power_scale = TEST_LC_SILENCE_POWER_SCALE, \
+    .silence_scale = TEST_LC_SILENCE_SCALE \
+    }
+
+// Random seed
+unsigned seed = 30289;
+
+static void perform_transition(agc_state_t *agc, struct lc_test_params *params, float_s32_t expected)
+{
+    int32_t input[AGC_FRAME_ADVANCE];
+    int32_t output[AGC_FRAME_ADVANCE];
+    bfp_s32_t input_bfp;
+
+    bfp_s32_init(&input_bfp, input, FRAME_EXP, AGC_FRAME_ADVANCE, 0);
+
+    agc_meta_data_t md;
+    md.vad_flag = AGC_META_DATA_NO_VAD;
+
+    // Scale input frame by 0.5 to avoid AGC adaption upper threshold
+    float_s32_t scale = float_to_float_s32(0.5 * params->silence_scale);
+
+    for (unsigned frame = 0; frame < TRANSITION_FRAMES; ++frame) {
+        for (unsigned idx = 0; idx < AGC_FRAME_ADVANCE; ++idx) {
+            input[idx] = pseudo_rand_int32(&seed);
+        }
+        bfp_s32_headroom(&input_bfp);
+        bfp_s32_scale(&input_bfp, &input_bfp, scale);
+        bfp_s32_use_exponent(&input_bfp, FRAME_EXP);
+
+        float_s32_t input_energy = float_s64_to_float_s32(bfp_s32_energy(&input_bfp));
+
+        md.aec_ref_power = float_s32_mul(input_energy, float_to_float_s32(params->power_scale));
+        md.aec_corr_factor = float_to_float_s32(params->correlation);
+        agc_process_frame(agc, output, input, &md);
+
+        // Return here if successfully transitioned to the expected state
+        if ((agc->lc_gain.mant == expected.mant) && (agc->lc_gain.exp == expected.exp)) {
+            return;
+        }
+    }
+
+    // Have failed to transition to the expected state
+    TEST_ASSERT(0);
+}
+
+void test_lc_transitions() {
+    agc_state_t agc;
+    agc_config_t conf = AGC_PROFILE_COMMS;
+    conf.adapt_on_vad = 0;
+    conf.lc_enabled = 1;
+
+    for (unsigned iter = 0; iter < (1<<10)/F; ++iter) {
+        agc_init(&agc, &conf);
+
+        // Far-end only
+        perform_transition(&agc, &PARAMS_FAR, conf.lc_gain_min);
+
+        // Silence
+        perform_transition(&agc, &PARAMS_SILENCE, conf.lc_gain_silence);
+
+        // Double-talk
+        perform_transition(&agc, &PARAMS_DOUBLE_TALK, conf.lc_gain_double_talk);
+
+        // Silence
+        perform_transition(&agc, &PARAMS_SILENCE, conf.lc_gain_silence);
+
+        // Near-end only
+        perform_transition(&agc, &PARAMS_NEAR, conf.lc_gain_max);
+
+        // Silence
+        perform_transition(&agc, &PARAMS_SILENCE, conf.lc_gain_silence);
+
+        // Far-end only
+        perform_transition(&agc, &PARAMS_FAR, conf.lc_gain_min);
+
+        // Double-talk
+        perform_transition(&agc, &PARAMS_DOUBLE_TALK, conf.lc_gain_double_talk);
+
+        // Near-end only
+        perform_transition(&agc, &PARAMS_NEAR, conf.lc_gain_max);
+
+        // Double-talk
+        perform_transition(&agc, &PARAMS_DOUBLE_TALK, conf.lc_gain_double_talk);
+
+        // Silence
+        perform_transition(&agc, &PARAMS_SILENCE, conf.lc_gain_silence);
+
+        // Far-end only
+        perform_transition(&agc, &PARAMS_FAR, conf.lc_gain_min);
+
+        // Near-end only
+        perform_transition(&agc, &PARAMS_NEAR, conf.lc_gain_max);
+    }
+}
diff --git a/test/lib_agc/test_process_frame/src/test_loss_control.c b/test/lib_agc/test_process_frame/src/test_loss_control.c
new file mode 100644
index 000000000..be0d43b64
--- /dev/null
+++ b/test/lib_agc/test_process_frame/src/test_loss_control.c
@@ -0,0 +1,127 @@
+// Copyright 2022 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include "test_process_frame.h"
+#include <bfp_math.h>
+#include <pseudo_rand.h>
+
+// Frames of random data are created and processed by four independent instances of the AGC
+// (with one input being scaled to provide low input energy for the "silence" scenario).
+// Each AGC instance should get into a particular loss-control scenario: near-end speech,
+// far-end speech, double-talk or silence. The AEC meta-data is specifically set for each
+// AGC instance to simulate the different scenarios based on the far power and correlation
+// values (which would usually come from the AEC). The actual lc_gain achieved must equal
+// the expected gain for that scenario from the AGC configuration profile that is used.
+// The output energy of the final frames in each case is also compared with the expected
+// ordering (except "silence" as this was scaled on input).
+
+void test_loss_control() {
+    int32_t input[AGC_FRAME_ADVANCE];
+    int32_t output_near[AGC_FRAME_ADVANCE];
+    int32_t output_far[AGC_FRAME_ADVANCE];
+    int32_t output_double_talk[AGC_FRAME_ADVANCE];
+    int32_t output_silence[AGC_FRAME_ADVANCE];
+    bfp_s32_t input_bfp, output_near_bfp, output_far_bfp, output_double_talk_bfp;
+
+    bfp_s32_init(&input_bfp, input, FRAME_EXP, AGC_FRAME_ADVANCE, 0);
+    bfp_s32_init(&output_near_bfp, output_near, FRAME_EXP, AGC_FRAME_ADVANCE, 0);
+    bfp_s32_init(&output_far_bfp, output_far, FRAME_EXP, AGC_FRAME_ADVANCE, 0);
+    bfp_s32_init(&output_double_talk_bfp, output_double_talk, FRAME_EXP, AGC_FRAME_ADVANCE, 0);
+
+    // Random seed
+    unsigned seed = 38480;
+
+    agc_state_t agc_near;
+    agc_config_t conf_near = AGC_PROFILE_COMMS;
+    conf_near.adapt_on_vad = 0;
+    conf_near.lc_enabled = 1;
+
+    agc_meta_data_t md_near;
+    md_near.vad_flag = AGC_META_DATA_NO_VAD;
+    md_near.aec_corr_factor = float_to_float_s32(TEST_LC_NEAR_CORR);
+
+    agc_state_t agc_far;
+    agc_config_t conf_far = AGC_PROFILE_COMMS;
+    conf_far.adapt_on_vad = 0;
+    conf_far.lc_enabled = 1;
+
+    agc_meta_data_t md_far;
+    md_far.vad_flag = AGC_META_DATA_NO_VAD;
+    md_far.aec_corr_factor = float_to_float_s32(TEST_LC_FAR_CORR);
+
+    agc_state_t agc_double_talk;
+    agc_config_t conf_double_talk = AGC_PROFILE_COMMS;
+    conf_double_talk.adapt_on_vad = 0;
+    conf_double_talk.lc_enabled = 1;
+
+    agc_meta_data_t md_double_talk;
+    md_double_talk.vad_flag = AGC_META_DATA_NO_VAD;
+    md_double_talk.aec_corr_factor = float_to_float_s32(TEST_LC_DT_CORR);
+
+    agc_state_t agc_silence;
+    agc_config_t conf_silence = AGC_PROFILE_COMMS;
+    conf_silence.adapt_on_vad = 0;
+    conf_silence.lc_enabled = 1;
+
+    agc_meta_data_t md_silence;
+    md_silence.vad_flag = AGC_META_DATA_NO_VAD;
+    md_silence.aec_corr_factor = float_to_float_s32(TEST_LC_SILENCE_CORR);
+
+    // Scale the input by 0.5 to avoid the AGC adaption upper threshold
+    float_s32_t scale = float_to_float_s32(0.5);
+    float_s32_t scale_silence = float_to_float_s32(TEST_LC_SILENCE_SCALE);
+
+    unsigned num_frames = conf_near.lc_n_frame_far;
+    if (num_frames < conf_near.lc_n_frame_near) {
+        num_frames = conf_near.lc_n_frame_near;
+    }
+
+    for (unsigned iter = 0; iter < (1<<10)/F; ++iter) {
+        agc_init(&agc_near, &conf_near);
+        agc_init(&agc_far, &conf_far);
+        agc_init(&agc_double_talk, &conf_double_talk);
+        agc_init(&agc_silence, &conf_silence);
+
+        for (unsigned frame = 0; frame < num_frames; ++frame) {
+            for (unsigned idx = 0; idx < AGC_FRAME_ADVANCE; ++idx) {
+                input[idx] = pseudo_rand_int32(&seed);
+            }
+            bfp_s32_headroom(&input_bfp);
+            bfp_s32_scale(&input_bfp, &input_bfp, scale);
+            bfp_s32_use_exponent(&input_bfp, FRAME_EXP);
+
+            float_s32_t input_energy = float_s64_to_float_s32(bfp_s32_energy(&input_bfp));
+
+            md_near.aec_ref_power = float_s32_mul(input_energy, float_to_float_s32(TEST_LC_NEAR_POWER_SCALE));
+            agc_process_frame(&agc_near, output_near, input, &md_near);
+
+            md_far.aec_ref_power = float_s32_mul(input_energy, float_to_float_s32(TEST_LC_FAR_POWER_SCALE));
+            agc_process_frame(&agc_far, output_far, input, &md_far);
+
+            md_double_talk.aec_ref_power = float_s32_mul(input_energy, float_to_float_s32(TEST_LC_DT_POWER_SCALE));
+            agc_process_frame(&agc_double_talk, output_double_talk, input, &md_double_talk);
+
+            bfp_s32_scale(&input_bfp, &input_bfp, scale_silence);
+            bfp_s32_use_exponent(&input_bfp, FRAME_EXP);
+
+            input_energy = float_s64_to_float_s32(bfp_s32_energy(&input_bfp));
+            md_silence.aec_ref_power = float_s32_mul(input_energy, float_to_float_s32(TEST_LC_SILENCE_POWER_SCALE));
+            agc_process_frame(&agc_silence, output_silence, input, &md_silence);
+        }
+
+        TEST_ASSERT_EQUAL_FLOAT(float_s32_to_float(conf_near.lc_gain_max), float_s32_to_float(agc_near.lc_gain));
+        TEST_ASSERT_EQUAL_FLOAT(float_s32_to_float(conf_far.lc_gain_min), float_s32_to_float(agc_far.lc_gain));
+        TEST_ASSERT_EQUAL_FLOAT(float_s32_to_float(conf_double_talk.lc_gain_double_talk), float_s32_to_float(agc_double_talk.lc_gain));
+        TEST_ASSERT_EQUAL_FLOAT(float_s32_to_float(conf_silence.lc_gain_silence), float_s32_to_float(agc_silence.lc_gain));
+
+        bfp_s32_headroom(&output_near_bfp);
+        float_s32_t output_near_energy = float_s64_to_float_s32(bfp_s32_energy(&output_near_bfp));
+        bfp_s32_headroom(&output_far_bfp);
+        float_s32_t output_far_energy = float_s64_to_float_s32(bfp_s32_energy(&output_far_bfp));
+        bfp_s32_headroom(&output_double_talk_bfp);
+        float_s32_t output_double_talk_energy = float_s64_to_float_s32(bfp_s32_energy(&output_double_talk_bfp));
+
+        // This test assumes: lc_gain_near > lc_gain_double_talk > lc_gain_far
+        TEST_ASSERT(float_s32_gt(output_near_energy, output_double_talk_energy));
+        TEST_ASSERT(float_s32_gt(output_double_talk_energy, output_far_energy));
+    }
+}
diff --git a/test/lib_agc/test_process_frame/src/test_lower_threshold.c b/test/lib_agc/test_process_frame/src/test_lower_threshold.c
new file mode 100644
index 000000000..396b0389d
--- /dev/null
+++ b/test/lib_agc/test_process_frame/src/test_lower_threshold.c
@@ -0,0 +1,85 @@
+// Copyright 2022 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include "test_process_frame.h"
+#include <bfp_math.h>
+#include <pseudo_rand.h>
+
+// A single iteration of this test generates frames of random data and processes them with
+// the AGC. Within a certain number of frames, the AGC is expected to adapt to get the
+// maximum sample of the frame above the configured lower_threshold. Then the remaining
+// number of frames in the test are processed to ensure that the maximum sample remains above
+// that threshold. The AGC is reset for each test iteration.
+
+// Number of frames allowed for the AGC to get the sample below the threshold
+#define MAX_ADAPT_FRAMES 10
+// Total number of frames to test
+#define MAX_TEST_FRAMES (MAX_ADAPT_FRAMES + 30)
+
+void test_lower_threshold() {
+    int32_t input[AGC_FRAME_ADVANCE];
+    int32_t output[AGC_FRAME_ADVANCE];
+    bfp_s32_t input_bfp, output_bfp;
+    bfp_s32_init(&input_bfp, input, FRAME_EXP, AGC_FRAME_ADVANCE, 0);
+    bfp_s32_init(&output_bfp, output, FRAME_EXP, AGC_FRAME_ADVANCE, 0);
+
+    agc_state_t agc;
+    agc_config_t conf = AGC_PROFILE_ASR;
+    conf.adapt_on_vad = 0;
+    conf.lc_enabled = 0;
+
+    agc_meta_data_t md;
+    md.vad_flag = AGC_META_DATA_NO_VAD;
+    md.aec_ref_power = AGC_META_DATA_NO_AEC;
+    md.aec_corr_factor = AGC_META_DATA_NO_AEC;
+
+    // Random seed
+    unsigned seed = 11533;
+
+    // Max gain is 1000, so scale the input by a factor larger than 1/1000 from the
+    // lower_threshold which the AGC is trying to exceed.
+    float_s32_t scale = float_s32_mul(float_to_float_s32(0.0011), conf.lower_threshold);
+
+    for (unsigned iter = 0; iter < (1<<12)/F; ++iter) {
+        unsigned frame;
+
+        agc_init(&agc, &conf);
+
+        for (frame = 0; frame < MAX_ADAPT_FRAMES; ++frame) {
+            for (unsigned idx = 0; idx < AGC_FRAME_ADVANCE; ++idx) {
+                input[idx] = pseudo_rand_int32(&seed);
+            }
+            bfp_s32_headroom(&input_bfp);
+            bfp_s32_scale(&input_bfp, &input_bfp, scale);
+            bfp_s32_use_exponent(&input_bfp, FRAME_EXP);
+
+            agc_process_frame(&agc, output, input, &md);
+
+            bfp_s32_headroom(&output_bfp);
+            bfp_s32_abs(&output_bfp, &output_bfp);
+            float_s32_t max = bfp_s32_max(&output_bfp);
+
+            if (float_s32_gte(max, agc.config.lower_threshold)) {
+                break;
+            }
+        }
+
+        TEST_ASSERT(frame < MAX_ADAPT_FRAMES);
+
+        for (; frame < MAX_TEST_FRAMES; ++frame) {
+            for (unsigned idx = 0; idx < AGC_FRAME_ADVANCE; ++idx) {
+                input[idx] = pseudo_rand_int32(&seed);
+            }
+            bfp_s32_headroom(&input_bfp);
+            bfp_s32_scale(&input_bfp, &input_bfp, scale);
+            bfp_s32_use_exponent(&input_bfp, FRAME_EXP);
+
+            agc_process_frame(&agc, output, input, &md);
+
+            bfp_s32_headroom(&output_bfp);
+            bfp_s32_abs(&output_bfp, &output_bfp);
+            float_s32_t max = bfp_s32_max(&output_bfp);
+
+            TEST_ASSERT_FALSE(float_s32_gt(agc.config.lower_threshold, max));
+        }
+    }
+}
diff --git a/test/lib_agc/test_process_frame/src/test_max_gain.c b/test/lib_agc/test_process_frame/src/test_max_gain.c
new file mode 100644
index 000000000..d94f0a31c
--- /dev/null
+++ b/test/lib_agc/test_process_frame/src/test_max_gain.c
@@ -0,0 +1,67 @@
+// Copyright 2022 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include "test_process_frame.h"
+#include <bfp_math.h>
+#include <pseudo_rand.h>
+
+// The AGC is configured with a gain that is greater than the maximum gain setting. The
+// upper_threshold and lower_threshold are set to extremes to avoid interfering with
+// the test. Frames of random input data (scaled to avoid overflow) are processed by
+// the AGC and the output is checked to ensure that the maximum gain has been applied.
+
+#define TEST_GAIN 200
+#define TEST_MAX_GAIN 100
+#if TEST_MAX_GAIN > TEST_GAIN
+#error "gain must be greater than max_gain for this test"
+#endif
+
+void test_max_gain() {
+    int32_t input[AGC_FRAME_ADVANCE];
+    int32_t output[AGC_FRAME_ADVANCE];
+    bfp_s32_t input_bfp;
+    bfp_s32_init(&input_bfp, input, FRAME_EXP, AGC_FRAME_ADVANCE, 0);
+
+    agc_state_t agc;
+    agc_config_t conf = AGC_PROFILE_ASR;
+    conf.adapt_on_vad = 0;
+    conf.soft_clipping = 0;
+    conf.lc_enabled = 0;
+    conf.max_gain = float_to_float_s32(TEST_MAX_GAIN);
+    // Set the upper and lower thresholds to extremes to avoid interfering
+    conf.lower_threshold = float_to_float_s32(0);
+    conf.upper_threshold = float_to_float_s32(1);
+    agc_init(&agc, &conf);
+
+    agc_meta_data_t md;
+    md.vad_flag = AGC_META_DATA_NO_VAD;
+    md.aec_ref_power = AGC_META_DATA_NO_AEC;
+    md.aec_corr_factor = AGC_META_DATA_NO_AEC;
+
+    // Random seed
+    unsigned seed = 4747;
+
+    // Scale the input down by the maximum gain so there is room to increase it fully
+    float_s32_t scale = float_s32_div(float_to_float_s32(1), conf.max_gain);
+
+    for (unsigned iter = 0; iter < (1<<12)/F; ++iter) {
+        // Reset the gain for each frame
+        agc.config.gain = float_to_float_s32(TEST_GAIN);
+
+        for (unsigned idx = 0; idx < AGC_FRAME_ADVANCE; ++idx) {
+            input[idx] = pseudo_rand_int32(&seed);
+        }
+        bfp_s32_headroom(&input_bfp);
+        bfp_s32_scale(&input_bfp, &input_bfp, scale);
+        bfp_s32_use_exponent(&input_bfp, FRAME_EXP);
+
+        agc_process_frame(&agc, output, input, &md);
+
+        for (unsigned idx = 0; idx < AGC_FRAME_ADVANCE; ++idx) {
+            TEST_ASSERT_EQUAL_INT32(input[idx] * TEST_MAX_GAIN, output[idx]);
+        }
+
+        // Also check the configured gain parameter has been clamped as expected
+        TEST_ASSERT_EQUAL_INT32(agc.config.max_gain.mant, agc.config.gain.mant);
+        TEST_ASSERT_EQUAL_INT(agc.config.max_gain.exp, agc.config.gain.exp);
+    }
+}
diff --git a/test/lib_agc/test_process_frame/src/test_min_gain.c b/test/lib_agc/test_process_frame/src/test_min_gain.c
new file mode 100644
index 000000000..2badf90e6
--- /dev/null
+++ b/test/lib_agc/test_process_frame/src/test_min_gain.c
@@ -0,0 +1,67 @@
+// Copyright 2022 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include "test_process_frame.h"
+#include <bfp_math.h>
+#include <pseudo_rand.h>
+
+// The AGC is configured with a gain that is less than the minimum gain setting. The
+// upper_threshold and lower_threshold are set to extremes to avoid interfering with
+// the test. A frame of random input data (scaled to avoid overflow) is processed by
+// the AGC and the output is checked to ensure that the minimum gain has been applied.
+
+#define TEST_GAIN 50
+#define TEST_MIN_GAIN 100
+#if TEST_MIN_GAIN < TEST_GAIN
+#error "gain must be less than min_gain for this test"
+#endif
+
+void test_min_gain() {
+    int32_t input[AGC_FRAME_ADVANCE];
+    int32_t output[AGC_FRAME_ADVANCE];
+    bfp_s32_t input_bfp;
+    bfp_s32_init(&input_bfp, input, FRAME_EXP, AGC_FRAME_ADVANCE, 0);
+
+    agc_state_t agc;
+    agc_config_t conf = AGC_PROFILE_ASR;
+    conf.adapt_on_vad = 0;
+    conf.soft_clipping = 0;
+    conf.lc_enabled = 0;
+    conf.min_gain = float_to_float_s32(TEST_MIN_GAIN);
+    // Set the upper and lower thresholds to extremes to avoid interfering
+    conf.lower_threshold = float_to_float_s32(0);
+    conf.upper_threshold = float_to_float_s32(1);
+    agc_init(&agc, &conf);
+
+    agc_meta_data_t md;
+    md.vad_flag = AGC_META_DATA_NO_VAD;
+    md.aec_ref_power = AGC_META_DATA_NO_AEC;
+    md.aec_corr_factor = AGC_META_DATA_NO_AEC;
+
+    // Random seed
+    unsigned seed = 9608;
+
+    // Scale the input down by the min_gain so there is room to increase it fully
+    float_s32_t scale = float_s32_div(float_to_float_s32(1), conf.min_gain);
+
+    for (unsigned iter = 0; iter < (1<<12)/F; ++iter) {
+        // Reset the gain for each frame
+        agc.config.gain = float_to_float_s32(TEST_GAIN);
+
+        for (unsigned idx = 0; idx < AGC_FRAME_ADVANCE; ++idx) {
+            input[idx] = pseudo_rand_int32(&seed);
+        }
+        bfp_s32_headroom(&input_bfp);
+        bfp_s32_scale(&input_bfp, &input_bfp, scale);
+        bfp_s32_use_exponent(&input_bfp, FRAME_EXP);
+
+        agc_process_frame(&agc, output, input, &md);
+
+        for (unsigned idx = 0; idx < AGC_FRAME_ADVANCE; ++idx) {
+            TEST_ASSERT_EQUAL_INT32(input[idx] * TEST_MIN_GAIN, output[idx]);
+        }
+
+        // Also check the configured gain parameter has been clamped as expected
+        TEST_ASSERT_EQUAL_INT32(agc.config.min_gain.mant, agc.config.gain.mant);
+        TEST_ASSERT_EQUAL_INT(agc.config.min_gain.exp, agc.config.gain.exp);
+    }
+}
diff --git a/test/lib_agc/test_process_frame/src/test_no_gain.c b/test/lib_agc/test_process_frame/src/test_no_gain.c
new file mode 100644
index 000000000..bcacdbd6c
--- /dev/null
+++ b/test/lib_agc/test_process_frame/src/test_no_gain.c
@@ -0,0 +1,40 @@
+// Copyright 2022 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include "test_process_frame.h"
+#include <xs3_math.h>
+#include <pseudo_rand.h>
+
+// In this test, the AGC is configured to use a fixed gain of 1, so no gain is expected to be
+// applied to the incoming frame. The test generates frames of random data and processes them
+// with the AGC, checking that the output is within tolerance: up to two bits can be lost in
+// the BFP multiplication by the fixed gain of one that occurs inside the AGC for this test.
+
+void test_no_gain() {
+    int32_t input[AGC_FRAME_ADVANCE];
+    int32_t output[AGC_FRAME_ADVANCE];
+
+    // Random seed
+    unsigned seed = 40190;
+
+    agc_state_t agc;
+    agc_config_t conf = AGC_PROFILE_FIXED_GAIN;
+    conf.gain = float_to_float_s32(1);
+    agc_init(&agc, &conf);
+
+    agc_meta_data_t md;
+    md.vad_flag = AGC_META_DATA_NO_VAD;
+    md.aec_ref_power = AGC_META_DATA_NO_AEC;
+    md.aec_corr_factor = AGC_META_DATA_NO_AEC;
+
+    for (unsigned iter = 0; iter < (1<<12)/F; ++iter) {
+        for (unsigned idx = 0; idx < AGC_FRAME_ADVANCE; ++idx) {
+            input[idx] = pseudo_rand_int32(&seed);
+        }
+
+        agc_process_frame(&agc, output, input, &md);
+
+        for (unsigned idx = 0; idx < AGC_FRAME_ADVANCE; ++idx) {
+            TEST_ASSERT_INT32_WITHIN(3, input[idx], output[idx]);
+        }
+    }
+}
diff --git a/test/lib_agc/test_process_frame/src/test_process_frame.h b/test/lib_agc/test_process_frame/src/test_process_frame.h
new file mode 100644
index 000000000..82ffaa6f5
--- /dev/null
+++ b/test/lib_agc/test_process_frame/src/test_process_frame.h
@@ -0,0 +1,36 @@
+// Copyright 2022 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#ifndef AGC_UNIT_TESTS_
+#define AGC_UNIT_TESTS_
+
+#include <agc_api.h>
+#include "unity.h"
+
+#define FRAME_EXP -31
+
+// Set F to an integer greater than 1 to speedup testing (by reducing iterations) by a factor of F times
+#undef F
+#if SPEEDUP_FACTOR
+    #define F (SPEEDUP_FACTOR)
+#else
+    #define F 1
+#endif
+
+// Parameters for use in loss control tests. TEST_LC_*_CORR is the meta-data aec_corr_factor, and
+// TEST_LC_*_POWER_SCALE is the proportion of the input frame energy to set as the far power. Then the
+// input frame will behave as in one of the scenarios: near-end, far-end, double-talk or silence.
+#define TEST_LC_NEAR_CORR 0.1
+#define TEST_LC_NEAR_POWER_SCALE 0.1
+#define TEST_LC_FAR_CORR 0.995
+#define TEST_LC_FAR_POWER_SCALE 0.95
+#define TEST_LC_DT_CORR 0.5
+#define TEST_LC_DT_POWER_SCALE 0.5
+#define TEST_LC_SILENCE_CORR 0
+#define TEST_LC_SILENCE_POWER_SCALE 0.5
+
+// An input frame for "silence" requires low energy, so a factor is used to scale the raw input frame
+// data in the silence case; for non-silence (near-end, far-end, double-talk), no scaling is required.
+#define TEST_LC_NON_SILENCE_SCALE 1
+#define TEST_LC_SILENCE_SCALE 0.1
+
+#endif  // AGC_UNIT_TESTS_
diff --git a/test/lib_agc/test_process_frame/src/test_reset.c b/test/lib_agc/test_process_frame/src/test_reset.c
new file mode 100644
index 000000000..954757501
--- /dev/null
+++ b/test/lib_agc/test_process_frame/src/test_reset.c
@@ -0,0 +1,57 @@
+// Copyright 2022 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include "test_process_frame.h"
+#include <bfp_math.h>
+#include <pseudo_rand.h>
+
+// A number of frames of random data are processed by the AGC, with the first output frame
+// saved for later. The AGC is then "reset" by performing the initialisation again. Then
+// the first input frame is processed again to ensure that it matches the output frame that
+// was saved from earlier.
+
+// Number of frames to process before resetting
+#define NUM_FRAMES 10
+
+void test_reset() {
+    int32_t input0[AGC_FRAME_ADVANCE];
+    int32_t output0[AGC_FRAME_ADVANCE];
+    int32_t input[AGC_FRAME_ADVANCE];
+    int32_t output[AGC_FRAME_ADVANCE];
+
+    agc_state_t agc;
+    agc_config_t conf = AGC_PROFILE_ASR;
+    conf.adapt_on_vad = 0;
+    conf.lc_enabled = 0;
+
+    agc_meta_data_t md;
+    md.vad_flag = AGC_META_DATA_NO_VAD;
+    md.aec_ref_power = AGC_META_DATA_NO_AEC;
+    md.aec_corr_factor = AGC_META_DATA_NO_AEC;
+
+    // Random seed
+    unsigned seed = 8895;
+
+    for (unsigned iter = 0; iter < (1<<12)/F; ++iter) {
+        agc_init(&agc, &conf);
+
+        for (unsigned idx = 0; idx < AGC_FRAME_ADVANCE; ++idx) {
+            input0[idx] = pseudo_rand_int32(&seed);
+        }
+
+        agc_process_frame(&agc, output0, input0, &md);
+
+        for (unsigned frames = 1; frames < NUM_FRAMES; ++frames) {
+            for (unsigned idx = 0; idx < AGC_FRAME_ADVANCE; ++idx) {
+                input[idx] = pseudo_rand_int32(&seed);
+            }
+
+            agc_process_frame(&agc, output, input, &md);
+        }
+
+        agc_init(&agc, &conf);
+
+        agc_process_frame(&agc, output, input0, &md);
+
+        TEST_ASSERT_EQUAL_INT32_ARRAY(output, output0, AGC_FRAME_ADVANCE);
+    }
+}
diff --git a/test/lib_agc/test_process_frame/src/test_simple_gain.c b/test/lib_agc/test_process_frame/src/test_simple_gain.c
new file mode 100644
index 000000000..720bb49e0
--- /dev/null
+++ b/test/lib_agc/test_process_frame/src/test_simple_gain.c
@@ -0,0 +1,66 @@
+// Copyright 2022 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include "test_process_frame.h"
+#include <bfp_math.h>
+#include <pseudo_rand.h>
+
+// In this test, the AGC is configured to use the "fixed gain" profile. Frames of random data
+// are processed with the AGC and the output frame energy must have increased by a factor of
+// the square of the fixed gain. Also every non-zero input sample is checked to ensure that its
+// magnitude has increased, and input samples of zero must be unchanged.
+
+void test_simple_gain() {
+    int32_t input[AGC_FRAME_ADVANCE];
+    int32_t output[AGC_FRAME_ADVANCE];
+    bfp_s32_t input_bfp, output_bfp;
+
+    bfp_s32_init(&input_bfp, input, FRAME_EXP, AGC_FRAME_ADVANCE, 0);
+    bfp_s32_init(&output_bfp, output, FRAME_EXP, AGC_FRAME_ADVANCE, 0);
+
+    // Random seed
+    unsigned seed = 57195;
+
+    agc_state_t agc;
+    agc_init(&agc, &AGC_PROFILE_FIXED_GAIN);
+
+    agc_meta_data_t md;
+    md.vad_flag = AGC_META_DATA_NO_VAD;
+    md.aec_ref_power = AGC_META_DATA_NO_AEC;
+    md.aec_corr_factor = AGC_META_DATA_NO_AEC;
+
+    // Scale down the input so that the fixed gain doesn't overflow
+    float_s32_t scale = float_s32_div(float_to_float_s32(1), agc.config.gain);
+
+    for (unsigned iter = 0; iter < (1<<12)/F; ++iter) {
+        for (unsigned idx = 0; idx < AGC_FRAME_ADVANCE; ++idx) {
+            input[idx] = pseudo_rand_int32(&seed);
+        }
+        bfp_s32_headroom(&input_bfp);
+        bfp_s32_scale(&input_bfp, &input_bfp, scale);
+        bfp_s32_use_exponent(&input_bfp, FRAME_EXP);
+
+        float_s32_t input_energy = float_s64_to_float_s32(bfp_s32_energy(&input_bfp));
+
+        // expected_output_energy = input_energy * agc.config.gain * agc.config.gain
+        float_s32_t expected_output_energy = input_energy;
+        expected_output_energy = float_s32_mul(expected_output_energy, agc.config.gain);
+        expected_output_energy = float_s32_mul(expected_output_energy, agc.config.gain);
+
+        agc_process_frame(&agc, output, input, &md);
+
+        bfp_s32_headroom(&output_bfp);
+        float_s32_t output_energy = float_s64_to_float_s32(bfp_s32_energy(&output_bfp));
+
+        TEST_ASSERT_EQUAL_FLOAT(float_s32_to_float(expected_output_energy), float_s32_to_float(output_energy));
+
+        for (unsigned idx = 0; idx < AGC_FRAME_ADVANCE; ++idx) {
+            if (input[idx] > 0) {
+                TEST_ASSERT_GREATER_THAN_INT32(input[idx], output[idx]);
+            } else if (input[idx] < 0) {
+                TEST_ASSERT_LESS_THAN_INT32(input[idx], output[idx]);
+            } else {
+                TEST_ASSERT_EQUAL_INT32(0, output[idx]);
+            }
+        }
+    }
+}
diff --git a/test/lib_agc/test_process_frame/src/test_soft_clipping.c b/test/lib_agc/test_process_frame/src/test_soft_clipping.c
new file mode 100644
index 000000000..942b961b4
--- /dev/null
+++ b/test/lib_agc/test_process_frame/src/test_soft_clipping.c
@@ -0,0 +1,75 @@
+// Copyright 2022 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include "test_process_frame.h"
+#include <bfp_math.h>
+#include <pseudo_rand.h>
+
+// Frames of random data are generated (scaled to avoid overflow), and processed by two
+// instances of the AGC: one has soft_clipping enabled, the other has it disabled. The
+// outputs are compared to ensure that soft-clipping has been applied.
+
+void test_soft_clipping() {
+    int32_t input[AGC_FRAME_ADVANCE];
+    int32_t output_clip[AGC_FRAME_ADVANCE];
+    int32_t output_no_clip[AGC_FRAME_ADVANCE];
+    bfp_s32_t input_bfp;
+    bfp_s32_init(&input_bfp, input, FRAME_EXP, AGC_FRAME_ADVANCE, 0);
+
+    agc_state_t agc_clip;
+    agc_config_t conf_clip = AGC_PROFILE_ASR;
+    conf_clip.adapt_on_vad = 0;
+    conf_clip.soft_clipping = 1;
+    conf_clip.lc_enabled = 0;
+    agc_init(&agc_clip, &conf_clip);
+
+    agc_state_t agc_no_clip;
+    agc_config_t conf_no_clip = AGC_PROFILE_ASR;
+    conf_no_clip.adapt_on_vad = 0;
+    conf_no_clip.soft_clipping = 0;
+    conf_no_clip.lc_enabled = 0;
+    agc_init(&agc_no_clip, &conf_no_clip);
+
+    // Meta-data is constant and can be shared by the two AGC instances
+    agc_meta_data_t md;
+    md.vad_flag = AGC_META_DATA_NO_VAD;
+    md.aec_ref_power = AGC_META_DATA_NO_AEC;
+    md.aec_corr_factor = AGC_META_DATA_NO_AEC;
+
+    // Random seed
+    unsigned seed = 62809;
+
+    // Scale the input down so that there is enough room to apply the max gain
+    float_s32_t scale = float_s32_div(float_to_float_s32(1), conf_clip.max_gain);
+    float_s32_t zero = float_to_float_s32(0);
+    // This is the threshold above which soft-clipping is applied
+    float_s32_t thresh = float_to_float_s32(0.5);
+
+    for (unsigned iter = 0; iter < (1<<12)/F; ++iter) {
+        for (unsigned idx = 0; idx < AGC_FRAME_ADVANCE; ++idx) {
+            input[idx] = pseudo_rand_int32(&seed);
+        }
+        bfp_s32_headroom(&input_bfp);
+        bfp_s32_scale(&input_bfp, &input_bfp, scale);
+        bfp_s32_use_exponent(&input_bfp, FRAME_EXP);
+
+        agc_process_frame(&agc_clip, output_clip, input, &md);
+
+        agc_process_frame(&agc_no_clip, output_no_clip, input, &md);
+
+        for (unsigned idx = 0; idx < AGC_FRAME_ADVANCE; ++idx) {
+            float_s32_t input_fl = {input[idx], FRAME_EXP};
+            float_s32_t output_no_clip_fl = {output_no_clip[idx], FRAME_EXP};
+
+            if (float_s32_gte(input_fl, zero) && float_s32_gt(output_no_clip_fl, thresh)) {
+                // Positive sample, greater than thresh
+                TEST_ASSERT_GREATER_OR_EQUAL_INT32(output_clip[idx], output_no_clip[idx]);
+            } else if (float_s32_gt(zero, input_fl) && float_s32_gt(float_s32_sub(zero, thresh), output_no_clip_fl)) {
+                // Negative sample, less than -thresh
+                TEST_ASSERT_LESS_OR_EQUAL_INT32(output_clip[idx], output_no_clip[idx]);
+            } else {
+                // No soft-clipping should be applied
+                TEST_ASSERT_EQUAL_INT32(output_clip[idx], output_no_clip[idx]);
+            }
+        }
+    }
+}
diff --git a/test/lib_agc/test_process_frame/src/test_upper_threshold.c b/test/lib_agc/test_process_frame/src/test_upper_threshold.c
new file mode 100644
index 000000000..6a45a7132
--- /dev/null
+++ b/test/lib_agc/test_process_frame/src/test_upper_threshold.c
@@ -0,0 +1,77 @@
+// Copyright 2022 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include "test_process_frame.h"
+#include <bfp_math.h>
+#include <pseudo_rand.h>
+
+// A single iteration of this test generates frames of random data and processes them with
+// the AGC. Within a certain number of frames, the AGC is expected to adapt to get the
+// maximum sample of the frame within the configured upper_threshold. Then the remaining
+// number of frames in the test are processed to ensure that the samples remain below that
+// threshold. The AGC is reset for each test iteration.
+
+// Number of frames allowed for the AGC to get the sample below the threshold
+#define MAX_ADAPT_FRAMES 10
+// Total number of frames to test
+#define MAX_TEST_FRAMES (MAX_ADAPT_FRAMES + 30)
+
+void test_upper_threshold() {
+    int32_t input[AGC_FRAME_ADVANCE];
+    int32_t output[AGC_FRAME_ADVANCE];
+    bfp_s32_t output_bfp;
+    bfp_s32_init(&output_bfp, output, FRAME_EXP, AGC_FRAME_ADVANCE, 0);
+
+    agc_state_t agc;
+    agc_config_t conf = AGC_PROFILE_ASR;
+    conf.adapt_on_vad = 0;
+    conf.lc_enabled = 0;
+
+    // Set initial gain to a lower value to save time adapting
+    conf.gain = float_to_float_s32(1);
+
+    agc_meta_data_t md;
+    md.vad_flag = AGC_META_DATA_NO_VAD;
+    md.aec_ref_power = AGC_META_DATA_NO_AEC;
+    md.aec_corr_factor = AGC_META_DATA_NO_AEC;
+
+    // Random seed
+    unsigned seed = 16395;
+
+    for (unsigned iter = 0; iter < (1<<12)/F; ++iter) {
+        unsigned frame;
+
+        agc_init(&agc, &conf);
+
+        for (frame = 0; frame < MAX_ADAPT_FRAMES; ++frame) {
+            for (unsigned idx = 0; idx < AGC_FRAME_ADVANCE; ++idx) {
+                input[idx] = pseudo_rand_int32(&seed);
+            }
+
+            agc_process_frame(&agc, output, input, &md);
+
+            bfp_s32_headroom(&output_bfp);
+            bfp_s32_abs(&output_bfp, &output_bfp);
+            float_s32_t max = bfp_s32_max(&output_bfp);
+
+            if (float_s32_gte(agc.config.upper_threshold, max)) {
+                break;
+            }
+        }
+
+        TEST_ASSERT(frame < MAX_ADAPT_FRAMES);
+
+        for (; frame < MAX_TEST_FRAMES; ++frame) {
+            for (unsigned idx = 0; idx < AGC_FRAME_ADVANCE; ++idx) {
+                input[idx] = pseudo_rand_int32(&seed);
+            }
+
+            agc_process_frame(&agc, output, input, &md);
+
+            bfp_s32_headroom(&output_bfp);
+            bfp_s32_abs(&output_bfp, &output_bfp);
+            float_s32_t max = bfp_s32_max(&output_bfp);
+
+            TEST_ASSERT_FALSE(float_s32_gt(max, agc.config.upper_threshold));
+        }
+    }
+}
diff --git a/test/lib_agc/test_process_frame/src/test_vad_flag.c b/test/lib_agc/test_process_frame/src/test_vad_flag.c
new file mode 100644
index 000000000..d52974c49
--- /dev/null
+++ b/test/lib_agc/test_process_frame/src/test_vad_flag.c
@@ -0,0 +1,74 @@
+// Copyright 2022 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#include "test_process_frame.h"
+#include <bfp_math.h>
+#include <pseudo_rand.h>
+
+// Take an input frame of random data and run it through two AGC instances: one always sets
+// the VAD indicator to true, the other always to false. The VAD output should be greater
+// than the non-VAD output for a non-zero input sample; an input sample of zero should be
+// unchanged.
+
+void test_vad_flag() {
+    int32_t input[AGC_FRAME_ADVANCE];
+    int32_t output_vad0[AGC_FRAME_ADVANCE];
+    int32_t output_vad1[AGC_FRAME_ADVANCE];
+    bfp_s32_t input_bfp;
+
+    bfp_s32_init(&input_bfp, input, FRAME_EXP, AGC_FRAME_ADVANCE, 0);
+
+    // Random seed
+    unsigned seed = 62336;
+
+    agc_state_t agc_vad0;
+    agc_config_t conf_vad0 = AGC_PROFILE_COMMS;
+    // Set the upper and lower threshold to one so that AGC adaption with VAD always gains
+    conf_vad0.lower_threshold = float_to_float_s32(1);
+    conf_vad0.upper_threshold = float_to_float_s32(1);
+    conf_vad0.lc_enabled = 0;
+    agc_init(&agc_vad0, &conf_vad0);
+
+    agc_meta_data_t md_vad0;
+    md_vad0.vad_flag = 0;
+    md_vad0.aec_ref_power = AGC_META_DATA_NO_AEC;
+    md_vad0.aec_corr_factor = AGC_META_DATA_NO_AEC;
+
+    agc_state_t agc_vad1;
+    agc_config_t conf_vad1 = AGC_PROFILE_COMMS;
+    // Set the upper and lower threshold to one so that AGC adaption with VAD always gains
+    conf_vad1.lower_threshold = float_to_float_s32(1);
+    conf_vad1.upper_threshold = float_to_float_s32(1);
+    conf_vad1.lc_enabled = 0;
+    agc_init(&agc_vad1, &conf_vad1);
+
+    agc_meta_data_t md_vad1;
+    md_vad1.vad_flag = 1;
+    md_vad1.aec_ref_power = AGC_META_DATA_NO_AEC;
+    md_vad1.aec_corr_factor = AGC_META_DATA_NO_AEC;
+
+    // Scale the input to allow room to apply the max gain
+    float_s32_t scale = float_s32_div(float_to_float_s32(1), conf_vad0.max_gain);
+
+    for (unsigned iter = 0; iter < (1<<12)/F; ++iter) {
+        for (unsigned idx = 0; idx < AGC_FRAME_ADVANCE; ++idx) {
+            input[idx] = pseudo_rand_int32(&seed);
+        }
+        bfp_s32_headroom(&input_bfp);
+        bfp_s32_scale(&input_bfp, &input_bfp, scale);
+        bfp_s32_use_exponent(&input_bfp, FRAME_EXP);
+
+        agc_process_frame(&agc_vad0, output_vad0, input, &md_vad0);
+
+        agc_process_frame(&agc_vad1, output_vad1, input, &md_vad1);
+
+        for (unsigned idx = 0; idx < AGC_FRAME_ADVANCE; ++idx) {
+            if (input[idx] > 0) {
+                TEST_ASSERT_GREATER_THAN_INT32(output_vad0[idx], output_vad1[idx]);
+            } else if (input[idx] < 0) {
+                TEST_ASSERT_LESS_THAN_INT32(output_vad0[idx], output_vad1[idx]);
+            } else {
+                TEST_ASSERT_EQUAL_INT32(output_vad0[idx], output_vad1[idx]);
+            }
+        }
+    }
+}
diff --git a/test/shared/CMakeLists.txt b/test/shared/CMakeLists.txt
new file mode 100644
index 000000000..14f9a0682
--- /dev/null
+++ b/test/shared/CMakeLists.txt
@@ -0,0 +1,25 @@
+
+set( LIB_NAME test_shared )
+
+file( GLOB_RECURSE  SHARED_SOURCES_C   *.c  )
+
+
+list( APPEND  INCLUDE_DIRS pseudo_rand
+                            testing )
+
+###########
+
+## cmake doesn't recognize XC files. Tell it to treat them as C files
+set_source_files_properties( ${SHARED_SOURCES_XC} PROPERTIES LANGUAGE C )
+
+list( APPEND  SHARED_SOURCES        ${SHARED_SOURCES_C}  )
+
+
+list( APPEND  SHARED_SOURCES   ${SHARED_SOURCES_${CMAKE_SYSTEM_NAME}} )
+
+## Add library called "test_shared"
+add_library( ${LIB_NAME} ${SHARED_SOURCES} )
+
+target_include_directories( ${LIB_NAME} PUBLIC ${INCLUDE_DIRS} )
+
+target_link_libraries( ${LIB_NAME} lib_xs3_math )
diff --git a/test/shared/pseudo_rand/pseudo_rand.c b/test/shared/pseudo_rand/pseudo_rand.c
new file mode 100644
index 000000000..5dafacb13
--- /dev/null
+++ b/test/shared/pseudo_rand/pseudo_rand.c
@@ -0,0 +1,111 @@
+// Copyright 2020-2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+// XMOS Public License: Version 1
+
+#include "pseudo_rand.h"
+
+#include <assert.h>
+
+int pseudo_rand(int* state)
+{
+  const int a = 1664525;
+  const int c = 1013904223;
+  *state = (int)((long long)a * (*state) + c);
+  return *state;
+}
+
+
+int8_t  pseudo_rand_int8(unsigned *r){
+    pseudo_rand((int*)r);
+    return (int8_t)*r;
+}
+
+uint8_t pseudo_rand_uint8(unsigned *r){
+    pseudo_rand((int*)r);
+    return (uint8_t)*r;
+}
+
+int16_t  pseudo_rand_int16(unsigned *r){
+    pseudo_rand((int*)r);
+    return (int16_t)*r;
+}
+
+uint16_t pseudo_rand_uint16(unsigned *r){
+    pseudo_rand((int*)r);
+    return (uint16_t)*r;
+}
+
+int32_t  pseudo_rand_int32(unsigned *r){
+    pseudo_rand((int*)r);
+    return (int32_t)*r;
+}
+
+uint32_t pseudo_rand_uint32(unsigned *r){
+    pseudo_rand((int*)r);
+    return (uint32_t)*r;
+}
+
+int64_t  pseudo_rand_int64(unsigned *r){
+    pseudo_rand((int*)r);
+    int64_t a = (int64_t)*r;
+    pseudo_rand((int*)r);
+    int64_t b = (int64_t)*r;
+    return (int64_t)(a + (b<<32));
+}
+
+uint64_t pseudo_rand_uint64(unsigned *r){
+    pseudo_rand((int*)r);
+    int64_t a = (int64_t)*r;
+    pseudo_rand((int*)r);
+    int64_t b = (int64_t)*r;
+    return (uint64_t)(a + (b<<32));
+}
+
+
+int32_t pseudo_rand_int(
+    unsigned *r, 
+    int32_t min, 
+    int32_t max)
+{
+    uint32_t delta = max - min;
+    uint32_t d = pseudo_rand_uint32(r) % delta;
+    return min + d;
+}
+
+uint32_t pseudo_rand_uint(
+    unsigned *r, 
+    uint32_t min, 
+    uint32_t max)
+{
+    uint32_t delta = max - min;
+    uint32_t d = pseudo_rand_uint32(r) % delta;
+    return min + d;
+}
+
+
+void pseudo_rand_bytes(unsigned *r, char* buffer, unsigned size){
+#ifdef __xcore__
+    assert((((unsigned)buffer) & 0x3) == 0);
+#endif
+
+    unsigned b = 0;
+
+    while(size >= sizeof(unsigned)){
+        pseudo_rand((int*)r);
+
+        char* rch = (char*) r;
+
+        for(int i = 0; i < sizeof(unsigned); i++)
+            buffer[b++] = rch[i];
+
+        size -= sizeof(unsigned);
+    }
+    
+    pseudo_rand((int*)r);
+    unsigned tmp = *r;
+    while(size){
+        buffer[b++] = (char) (tmp & 0xFF);
+        tmp >>= 8;
+        size--;
+    }
+}
diff --git a/test/shared/pseudo_rand/pseudo_rand.h b/test/shared/pseudo_rand/pseudo_rand.h
new file mode 100644
index 000000000..4b804744a
--- /dev/null
+++ b/test/shared/pseudo_rand/pseudo_rand.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#include <stdint.h>
+
+int pseudo_rand(int* state);
+
+
+int8_t   pseudo_rand_int8(unsigned *r);
+uint8_t  pseudo_rand_uint8(unsigned *r);
+int16_t  pseudo_rand_int16(unsigned *r);
+uint16_t pseudo_rand_uint16(unsigned *r);
+int32_t  pseudo_rand_int32(unsigned *r);
+uint32_t pseudo_rand_uint32(unsigned *r);
+int64_t  pseudo_rand_int64(unsigned *r);
+uint64_t pseudo_rand_uint64(unsigned *r);
+
+int32_t  pseudo_rand_int(unsigned *r, int32_t min, int32_t max);
+uint32_t pseudo_rand_uint(unsigned *r, uint32_t min, uint32_t max);
+
+void pseudo_rand_bytes(unsigned *r, char* buffer, unsigned size);
diff --git a/test/shared/testing/testing.h b/test/shared/testing/testing.h
new file mode 100644
index 000000000..8be584845
--- /dev/null
+++ b/test/shared/testing/testing.h
@@ -0,0 +1,18 @@
+// Copyright 2020-2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+#pragma once
+
+#include "xs3_math_conf.h"
+#include "xs3_api.h"
+#include "xs3_math_types.h"
+
+
+/**
+ * xCore: Get 100MHz reference clock timestamp
+ * x86: Return 0
+ */
+C_API 
+unsigned getTimestamp();
+
+#define SEED_FROM_FUNC_NAME()    get_seed(__func__, sizeof(__func__))
+C_API unsigned get_seed(const char* str, const unsigned len);
diff --git a/test/shared/testing/testing_misc.c b/test/shared/testing/testing_misc.c
new file mode 100644
index 000000000..b3e59f3c6
--- /dev/null
+++ b/test/shared/testing/testing_misc.c
@@ -0,0 +1,39 @@
+// Copyright 2020-2021 XMOS LIMITED.
+// This Software is subject to the terms of the XMOS Public Licence: Version 1.
+// XMOS Public License: Version 1
+
+#include "testing.h"
+
+#include <math.h>
+#include <stdio.h>
+
+#ifdef __xcore__
+ #include "xcore/hwtimer.h"
+#endif
+
+
+unsigned get_seed(
+    const char* str, 
+    const unsigned len)
+{
+  unsigned seed = 0;
+  int left = len;
+
+  while(left > 0){
+    unsigned v = ((unsigned*)str)[0];
+    seed = seed ^ v;
+    left -= 4;
+    str = &str[4];
+  }
+
+  return seed;
+}
+
+unsigned getTimestamp()
+{
+#if __xcore__
+  return get_reference_time();
+#else
+  return 0;
+#endif
+}
diff --git a/test/README.rst b/tools/checks/README.rst
similarity index 55%
rename from test/README.rst
rename to tools/checks/README.rst
index 6baf1fa86..482508652 100644
--- a/test/README.rst
+++ b/tools/checks/README.rst
@@ -1,14 +1,14 @@
-====================
-Avona Hardware Tests
-====================
+=====================
+Avona Hardware Checks
+=====================
 
-This document describes the hardware tests for the Avona Voice Reference Design.  
+This document describes the hardware checks for the Avona Voice Reference Design.  
 
 *************
 Prerequisites
 *************
 
-All tests require Linux or MacOS.  Most tests run on either the Explorer Board or the Avona reference design evaluation kit.
+All checks require Linux or MacOS.  Most tests run on either the Explorer Board or the Avona reference design evaluation kit.
 
 The following software applications are required.  If necessary, download and follow the installation instructions for each application.
 
@@ -33,31 +33,31 @@ Then use pip to install the required modules.
 
     $ pip install pytest
 
-The tests require several firmware configurations.  To build and install all those configurations, run the following command in the root of the Avona repository:
+The checks require several firmware configurations.  To build and install all those configurations, run the following command in the root of the Avona repository:
 
 .. code-block:: console
 
-    $ ./test/build_test_configs.sh
+    $ ./tools/checks/build_check_configs.sh
 
-*****
-Tests
-*****
+******
+Checks
+******
 
 Wakeword Detection
 ==================
 
-The wakeword detection test verifies the ability of the Avona reference design to correctly detect wakewords.  It is currently only supported on the Explorer board and this test assumes you have set the WW_PATH environment variable to point to the the Amazon WakeWord library.
+The wakeword detection check verifies the ability of the Avona reference design to correctly detect wakewords.  It is currently only supported on the Explorer board and this test assumes you have set the WW_PATH environment variable to point to the the Amazon WakeWord library.
 
-Run the following command to execute the test:
+Run the following command to execute the check:
 
 .. code-block:: console
 
-    $ cd test
-    $ ./test_wakeword_detection.sh -c 1 ../applications/dist/sw_avona_TEST_USB_MICS.xe ${WW_PATH}/sample-wakeword/alexas.list | tee test_wakeword_detection.log
+    $ cd tools/checks
+    $ ./check_wakeword_detection.sh -c 1 ../../dist/sw_avona_CHECK_USB_MICS.xe ${WW_PATH}/sample-wakeword/alexas.list | tee check_wakeword_detection.log
 
-This generates the `test_wakeword_detection.log` log file.  
+This generates the `check_wakeword_detection.log` log file.  
 
-To verify the test results, run:
+To verify the results, run:
 
 .. code-block:: console
 
diff --git a/test/build_test_configs.sh b/tools/checks/build_check_configs.sh
similarity index 100%
rename from test/build_test_configs.sh
rename to tools/checks/build_check_configs.sh
diff --git a/test/test_wakeword_detection.sh b/tools/checks/check_wakeword_detection.sh
similarity index 79%
rename from test/test_wakeword_detection.sh
rename to tools/checks/check_wakeword_detection.sh
index ef60f549d..4a8205db6 100755
--- a/test/test_wakeword_detection.sh
+++ b/tools/checks/check_wakeword_detection.sh
@@ -5,17 +5,17 @@ source helpers.sh
 
 help()
 {
-   echo "Voice reference design wakeword detection test"
+   echo "Voice reference design wakeword detection check"
    echo
    echo "Syntax:"
-   echo "test_wakeword_detection.sh [-c|h] <firmware.xe> <list_file>"
+   echo "check_wakeword_detection.sh [-c|h] <firmware.xe> <list_file>"
    echo 
    echo "Options:"
    echo "h     Print this Help."
    echo "c     Number of channels in input wavs"
    echo
    echo "Example that saves output to a log file:"
-   echo "   $ test_wakeword_detection.sh -c 1 <firmware.xe> <list_file> | tee test_wakeword_detection.log"
+   echo "   $ check_wakeword_detection.sh -c 1 <firmware.xe> <list_file> | tee check_wakeword_detection.log"
    echo 
    
 }
@@ -53,7 +53,7 @@ sleep $(get_firmware_startup_duration)
 
 # play the input wav files
 for WAV_FILE in $WAV_LIST; do
-    echo "Wakeword Test: $WAV_FILE"
+    echo "Wakeword Check: $WAV_FILE"
     sox $WAV_DIR/$WAV_FILE $SOX_PLAY_OPTS -t wav - $REMIX_PATTERN | sox -t wav - -t $DEVICE_DRIVER "$DEVICE_NAME"
     sleep 2
 done
diff --git a/test/helpers.sh b/tools/checks/helpers.sh
similarity index 100%
rename from test/helpers.sh
rename to tools/checks/helpers.sh
diff --git a/test/test_wakeword_detection.py b/tools/checks/test_wakeword_detection.py
similarity index 76%
rename from test/test_wakeword_detection.py
rename to tools/checks/test_wakeword_detection.py
index aca439c1b..2a84b8797 100644
--- a/test/test_wakeword_detection.py
+++ b/tools/checks/test_wakeword_detection.py
@@ -2,16 +2,16 @@
 # of successful wakeword recognition.
 # This file may be ran with pytest. For verbose success output, add the -rP
 # argument.
-# Running this with python will parse the log to find the success ratio. 
+# Running this with python will parse the log to find the success ratio.
 
 import re
 
 # Examine this log file
-default_log_file = "test_wakeword_detection.log"
+default_log_file = "check_wakeword_detection.log"
 
 # These are log-specific criteria for use in tallying
-regex_test = 'Wakeword Test: '
-regex_detected = 'Detected:'
+regex_test = "Wakeword Check: "
+regex_detected = "Detected:"
 
 # Initialize variables
 test_count = 0
@@ -25,13 +25,14 @@
             test_count += 1
         for match in re.finditer(regex_detected, line, re.S):
             success_count += 1
-    print(f"{success_count} out of {test_count} passed.")			
+    print(f"{success_count} out of {test_count} passed.")
 
 # This is called automatically by running pytest in the same directory.
 def func_report():
     return success_count
 
+
 def test_success():
     # define the pass threshold (60%)
-    assert func_report() >= test_count * .6
-    print(f"{success_count} out of {test_count} passed.")			
+    assert func_report() >= test_count * 0.6
+    print(f"{success_count} out of {test_count} passed.")