diff --git a/backends/cadence/CMakeLists.txt b/backends/cadence/CMakeLists.txt
index 3cd880622c..6c71909c47 100644
--- a/backends/cadence/CMakeLists.txt
+++ b/backends/cadence/CMakeLists.txt
@@ -23,7 +23,6 @@ include(${EXECUTORCH_ROOT}/build/Utils.cmake)
 
 # Let files say "include <executorch/path/to/header.h>".
 set(_common_include_directories ${EXECUTORCH_ROOT}/..)
-set(TARGET_DIR reference)
 
 if(EXECUTORCH_CADENCE_CPU_RUNNER)
   include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
@@ -61,6 +60,9 @@ if(EXECUTORCH_CADENCE_CPU_RUNNER)
                                       ${_common_include_directories}
   )
 
+  set(TARGET_DIR reference)
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/kernels)
+
   target_link_libraries(
   cadence_runner
   executorch
diff --git a/backends/cadence/build_cadence_fusionG3.sh b/backends/cadence/build_cadence_fusionG3.sh
new file mode 100644
index 0000000000..7a4dd68fb3
--- /dev/null
+++ b/backends/cadence/build_cadence_fusionG3.sh
@@ -0,0 +1,91 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -euo pipefail
+
+unset CMAKE_PREFIX_PATH
+unset XTENSA_CORE
+export XTENSA_CORE=FCV_FG3GP
+git submodule sync
+git submodule update --init
+./install_requirements.sh
+
+rm -rf cmake-out
+
+STEPWISE_BUILD=false
+
+if $STEPWISE_BUILD; then
+    echo "Building ExecuTorch"
+    cmake -DCMAKE_INSTALL_PREFIX=cmake-out \
+        -DCMAKE_TOOLCHAIN_FILE=./backends/cadence/cadence.cmake  \
+        -DCMAKE_BUILD_TYPE=Release \
+        -DEXECUTORCH_ENABLE_EVENT_TRACER=OFF \
+        -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \
+        -DEXECUTORCH_BUILD_EXECUTOR_RUNNER=ON \
+        -DEXECUTORCH_BUILD_PTHREADPOOL=OFF \
+        -DEXECUTORCH_BUILD_CPUINFO=OFF \
+        -DEXECUTORCH_ENABLE_LOGGING=ON \
+        -DEXECUTORCH_USE_DL=OFF \
+        -DEXECUTORCH_BUILD_CADENCE=OFF \
+        -DFLATC_EXECUTABLE="$(which flatc)" \
+        -DHAVE_FNMATCH_H=OFF \
+        -Bcmake-out .
+
+    echo "Building any Cadence-specific binaries on top"
+    cmake -DBUCK2="$BUCK" \
+        -DCMAKE_TOOLCHAIN_FILE=/home/zonglinpeng/ws/zonglinpeng/executorch/backends/cadence/cadence.cmake \
+        -DCMAKE_INSTALL_PREFIX=cmake-out \
+        -DCMAKE_BUILD_TYPE=Release \
+        -DEXECUTORCH_BUILD_HOST_TARGETS=ON \
+        -DEXECUTORCH_BUILD_EXECUTOR_RUNNER=ON \
+        -DEXECUTORCH_BUILD_PTHREADPOOL=OFF \
+        -DEXECUTORCH_BUILD_CADENCE=ON \
+        -DFLATC_EXECUTABLE="$(which flatc)" \
+        -DEXECUTORCH_ENABLE_LOGGING=ON \
+        -DEXECUTORCH_ENABLE_PROGRAM_VERIFICATION=ON \
+        -DEXECUTORCH_USE_DL=OFF \
+        -DBUILD_EXECUTORCH_PORTABLE_OPS=ON \
+        -DEXECUTORCH_BUILD_KERNELS_CUSTOM=OFF \
+        -DPYTHON_EXECUTABLE=python3 \
+        -DEXECUTORCH_FUSION_G3_OPT=ON \
+        -DEXECUTORCH_BUILD_GFLAGS=ON \
+        -DHAVE_FNMATCH_H=OFF \
+        -Bcmake-out/backends/cadence \
+        backends/cadence
+    cmake --build cmake-out/backends/cadence  -j8
+else
+    echo "Building Cadence toolchain with ExecuTorch packages"
+    cmake_prefix_path="${PWD}/cmake-out/lib/cmake/ExecuTorch;${PWD}/cmake-out/third-party/gflags"
+    cmake -DBUCK2="$BUCK" \
+        -DCMAKE_PREFIX_PATH="${cmake_prefix_path}" \
+        -DHAVE_SYS_STAT_H=ON \
+        -DCMAKE_TOOLCHAIN_FILE=./backends/cadence/cadence.cmake \
+        -DCMAKE_INSTALL_PREFIX=cmake-out \
+        -DCMAKE_BUILD_TYPE=Release \
+        -DEXECUTORCH_BUILD_HOST_TARGETS=ON \
+        -DEXECUTORCH_BUILD_EXECUTOR_RUNNER=ON \
+        -DEXECUTORCH_BUILD_PTHREADPOOL=OFF \
+        -DEXECUTORCH_BUILD_CPUINFO=OFF \
+        -DEXECUTORCH_BUILD_FLATC=OFF \
+        -DEXECUTORCH_BUILD_CADENCE=ON \
+        -DFLATC_EXECUTABLE="$(which flatc)" \
+        -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \
+        -DEXECUTORCH_ENABLE_LOGGING=ON \
+        -DEXECUTORCH_ENABLE_PROGRAM_VERIFICATION=ON \
+        -DEXECUTORCH_USE_DL=OFF \
+        -DBUILD_EXECUTORCH_PORTABLE_OPS=ON \
+        -DEXECUTORCH_BUILD_KERNELS_CUSTOM=OFF \
+        -DPYTHON_EXECUTABLE=python3 \
+        -DEXECUTORCH_FUSION_G3_OPT=ON \
+        -DHAVE_FNMATCH_H=OFF \
+        -Bcmake-out
+    cmake --build cmake-out --target install --config Release -j8
+fi
+
+echo "Run simple model to verify cmake build"
+python3 -m examples.portable.scripts.export --model_name="add"
+xt-run --turbo cmake-out/executor_runner  --model_path=add.pte
diff --git a/backends/cadence/build_cadence_xtensa.sh b/backends/cadence/build_cadence_hifi4.sh
similarity index 96%
rename from backends/cadence/build_cadence_xtensa.sh
rename to backends/cadence/build_cadence_hifi4.sh
index eebd0707d1..28a3812752 100644
--- a/backends/cadence/build_cadence_xtensa.sh
+++ b/backends/cadence/build_cadence_hifi4.sh
@@ -8,6 +8,8 @@
 set -euo pipefail
 
 unset CMAKE_PREFIX_PATH
+unset XTENSA_CORE
+export XTENSA_CORE=nxp_rt600_RI23_11_newlib
 git submodule sync
 git submodule update --init
 ./install_requirements.sh
@@ -53,7 +55,7 @@ if $STEPWISE_BUILD; then
         -DHAVE_FNMATCH_H=OFF \
         -Bcmake-out/backends/cadence \
         backends/cadence
-    cmake --build cmake-out/backends/cadence  -j16
+    cmake --build cmake-out/backends/cadence  -j8
 else
     echo "Building Cadence toolchain with ExecuTorch packages"
     cmake_prefix_path="${PWD}/cmake-out/lib/cmake/ExecuTorch;${PWD}/cmake-out/third-party/gflags"
@@ -79,7 +81,7 @@ else
         -DEXECUTORCH_NNLIB_OPT=ON \
         -DHAVE_FNMATCH_H=OFF \
         -Bcmake-out
-    cmake --build cmake-out --target install --config Release -j16
+    cmake --build cmake-out --target install --config Release -j8
 fi
 
 echo "Run simple model to verify cmake build"
diff --git a/backends/cadence/hifi/operators/op_mean.cpp b/backends/cadence/hifi/operators/op_mean.cpp
index ed5ed3359e..65a1f6b7d4 100644
--- a/backends/cadence/hifi/operators/op_mean.cpp
+++ b/backends/cadence/hifi/operators/op_mean.cpp
@@ -145,8 +145,7 @@ Tensor& mean_dim_out(
   ET_SWITCH_REALHB_TYPES(in.scalar_type(), ctx, "mean.out", CTYPE_IN, [&] {
     ET_SWITCH_FLOATH_TYPES(out.scalar_type(), ctx, "mean.out", CTYPE_OUT, [&] {
       CTYPE_OUT* out_data = out.mutable_data_ptr<CTYPE_OUT>();
-      const size_t num =
-          torch::executor::exeget_reduced_dim_product(in, dim_list);
+      const size_t num = torch::executor::get_reduced_dim_product(in, dim_list);
       for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) {
         CTYPE_OUT sum = 0;
         if (in.numel() > 0) {