-
Notifications
You must be signed in to change notification settings - Fork 388
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Update on "[ET-VK] Using a single GPU buffer for all tensor uniforms."
This diff changes Tensor class to store all uniforms in a single uniform buffer. Entities stored in uniforms ie. size, stride, numel and logical limits are now stored in a single buffer and their offsets are stored as unsigned ints in Tensor class. Other changes includes: Adding a new ctor for ParamsBuffer class to allow allocation with size without data ptr. Adding an offset input to Buffer::data function. Adding an offset parameter to BufferBindInfo ctor, so additional offset can be supplied when binding a buffer. Differential Revision: [D65841750](https://our.internmc.facebook.com/intern/diff/D65841750/) [ghstack-poisoned]
- Loading branch information
Showing
51 changed files
with
535 additions
and
357 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# Copyright (c) Meta Platforms, Inc. and affiliates. | ||
# All rights reserved. | ||
# | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
# | ||
# A helper CMake file to trigger C++ unit tests. | ||
# | ||
|
||
if(BUILD_TESTING) | ||
# This contains the list of tests which are always built | ||
add_subdirectory(extension/evalue_util/test) | ||
add_subdirectory(extension/kernel_util/test) | ||
add_subdirectory(extension/memory_allocator/test) | ||
add_subdirectory(extension/parallel/test) | ||
add_subdirectory(extension/pytree/test) | ||
add_subdirectory(kernels/portable/cpu/util/test) | ||
add_subdirectory(kernels/prim_ops/test) | ||
add_subdirectory(kernels/test) | ||
add_subdirectory(runtime/core/exec_aten/testing_util/test) | ||
add_subdirectory(runtime/core/exec_aten/util/test) | ||
add_subdirectory(runtime/core/portable_type/test) | ||
add_subdirectory(runtime/core/test) | ||
add_subdirectory(runtime/executor/test) | ||
add_subdirectory(runtime/kernel/test) | ||
add_subdirectory(runtime/platform/test) | ||
add_subdirectory(test/utils) | ||
endif() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
#!/bin/bash | ||
# Copyright (c) Meta Platforms, Inc. and affiliates. | ||
# All rights reserved. | ||
# | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
set -euo pipefail | ||
|
||
unset CMAKE_PREFIX_PATH | ||
unset XTENSA_CORE | ||
export XTENSA_CORE=FCV_FG3GP | ||
git submodule sync | ||
git submodule update --init | ||
./install_requirements.sh | ||
|
||
rm -rf cmake-out | ||
|
||
STEPWISE_BUILD=false | ||
|
||
if $STEPWISE_BUILD; then | ||
echo "Building ExecuTorch" | ||
cmake -DCMAKE_INSTALL_PREFIX=cmake-out \ | ||
-DCMAKE_TOOLCHAIN_FILE=./backends/cadence/cadence.cmake \ | ||
-DCMAKE_BUILD_TYPE=Release \ | ||
-DEXECUTORCH_ENABLE_EVENT_TRACER=OFF \ | ||
-DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \ | ||
-DEXECUTORCH_BUILD_EXECUTOR_RUNNER=ON \ | ||
-DEXECUTORCH_BUILD_PTHREADPOOL=OFF \ | ||
-DEXECUTORCH_BUILD_CPUINFO=OFF \ | ||
-DEXECUTORCH_ENABLE_LOGGING=ON \ | ||
-DEXECUTORCH_USE_DL=OFF \ | ||
-DEXECUTORCH_BUILD_CADENCE=OFF \ | ||
-DFLATC_EXECUTABLE="$(which flatc)" \ | ||
-DHAVE_FNMATCH_H=OFF \ | ||
-Bcmake-out . | ||
|
||
echo "Building any Cadence-specific binaries on top" | ||
cmake -DBUCK2="$BUCK" \ | ||
-DCMAKE_TOOLCHAIN_FILE=/home/zonglinpeng/ws/zonglinpeng/executorch/backends/cadence/cadence.cmake \ | ||
-DCMAKE_INSTALL_PREFIX=cmake-out \ | ||
-DCMAKE_BUILD_TYPE=Release \ | ||
-DEXECUTORCH_BUILD_HOST_TARGETS=ON \ | ||
-DEXECUTORCH_BUILD_EXECUTOR_RUNNER=ON \ | ||
-DEXECUTORCH_BUILD_PTHREADPOOL=OFF \ | ||
-DEXECUTORCH_BUILD_CADENCE=ON \ | ||
-DFLATC_EXECUTABLE="$(which flatc)" \ | ||
-DEXECUTORCH_ENABLE_LOGGING=ON \ | ||
-DEXECUTORCH_ENABLE_PROGRAM_VERIFICATION=ON \ | ||
-DEXECUTORCH_USE_DL=OFF \ | ||
-DBUILD_EXECUTORCH_PORTABLE_OPS=ON \ | ||
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=OFF \ | ||
-DPYTHON_EXECUTABLE=python3 \ | ||
-DEXECUTORCH_FUSION_G3_OPT=ON \ | ||
-DEXECUTORCH_BUILD_GFLAGS=ON \ | ||
-DHAVE_FNMATCH_H=OFF \ | ||
-Bcmake-out/backends/cadence \ | ||
backends/cadence | ||
cmake --build cmake-out/backends/cadence -j8 | ||
else | ||
echo "Building Cadence toolchain with ExecuTorch packages" | ||
cmake_prefix_path="${PWD}/cmake-out/lib/cmake/ExecuTorch;${PWD}/cmake-out/third-party/gflags" | ||
cmake -DBUCK2="$BUCK" \ | ||
-DCMAKE_PREFIX_PATH="${cmake_prefix_path}" \ | ||
-DHAVE_SYS_STAT_H=ON \ | ||
-DCMAKE_TOOLCHAIN_FILE=./backends/cadence/cadence.cmake \ | ||
-DCMAKE_INSTALL_PREFIX=cmake-out \ | ||
-DCMAKE_BUILD_TYPE=Release \ | ||
-DEXECUTORCH_BUILD_HOST_TARGETS=ON \ | ||
-DEXECUTORCH_BUILD_EXECUTOR_RUNNER=ON \ | ||
-DEXECUTORCH_BUILD_PTHREADPOOL=OFF \ | ||
-DEXECUTORCH_BUILD_CPUINFO=OFF \ | ||
-DEXECUTORCH_BUILD_FLATC=OFF \ | ||
-DEXECUTORCH_BUILD_CADENCE=ON \ | ||
-DFLATC_EXECUTABLE="$(which flatc)" \ | ||
-DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \ | ||
-DEXECUTORCH_ENABLE_LOGGING=ON \ | ||
-DEXECUTORCH_ENABLE_PROGRAM_VERIFICATION=ON \ | ||
-DEXECUTORCH_USE_DL=OFF \ | ||
-DBUILD_EXECUTORCH_PORTABLE_OPS=ON \ | ||
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=OFF \ | ||
-DPYTHON_EXECUTABLE=python3 \ | ||
-DEXECUTORCH_FUSION_G3_OPT=ON \ | ||
-DHAVE_FNMATCH_H=OFF \ | ||
-Bcmake-out | ||
cmake --build cmake-out --target install --config Release -j8 | ||
fi | ||
|
||
echo "Run simple model to verify cmake build" | ||
python3 -m examples.portable.scripts.export --model_name="add" | ||
xt-run --turbo cmake-out/executor_runner --model_path=add.pte |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
# Copyright (c) Qualcomm Innovation Center, Inc. | ||
# All rights reserved | ||
# | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
from typing import Dict | ||
|
||
import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper | ||
|
||
import torch | ||
|
||
from .node_visitor import NodeVisitor, register_node_visitor | ||
from .qnn_constants import OpElementWiseCos, QNN_OP_PACKAGE_NAME_QTI_AISW | ||
|
||
|
||
@register_node_visitor | ||
class Cos(NodeVisitor): | ||
target = ["aten.cos.default"] | ||
|
||
def __init__(self, *args) -> None: | ||
super().__init__(*args) | ||
|
||
def define_node( | ||
self, | ||
node: torch.fx.Node, | ||
nodes_to_wrappers: Dict[torch.fx.Node, PyQnnWrapper.TensorWrapper], | ||
) -> PyQnnWrapper.PyQnnOpWrapper: | ||
input_node = node.args[0] | ||
input_tensor = self.get_tensor(input_node, node) | ||
input_tensor_wrapper = self.define_tensor( | ||
input_node, | ||
input_tensor, | ||
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE, | ||
nodes_to_wrappers, | ||
is_input_tensor=True, | ||
) | ||
|
||
output_tensor = self.get_tensor(node, node) | ||
output_tensor_wrapper = self.define_tensor( | ||
node, | ||
output_tensor, | ||
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE, | ||
nodes_to_wrappers, | ||
is_input_tensor=False, | ||
) | ||
|
||
cos_op = PyQnnWrapper.PyQnnOpWrapper( | ||
node.name, | ||
QNN_OP_PACKAGE_NAME_QTI_AISW, | ||
OpElementWiseCos.op_name, | ||
) | ||
cos_op.AddInputTensors([input_tensor_wrapper]) | ||
cos_op.AddOutputTensors([output_tensor_wrapper]) | ||
|
||
return cos_op |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
# Copyright (c) Qualcomm Innovation Center, Inc. | ||
# All rights reserved | ||
# | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
from typing import Dict | ||
|
||
import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper | ||
|
||
import torch | ||
|
||
from .node_visitor import NodeVisitor, register_node_visitor | ||
from .qnn_constants import OpElementWiseSin, QNN_OP_PACKAGE_NAME_QTI_AISW | ||
|
||
|
||
@register_node_visitor | ||
class Sin(NodeVisitor): | ||
target = ["aten.sin.default"] | ||
|
||
def __init__(self, *args) -> None: | ||
super().__init__(*args) | ||
|
||
def define_node( | ||
self, | ||
node: torch.fx.Node, | ||
nodes_to_wrappers: Dict[torch.fx.Node, PyQnnWrapper.TensorWrapper], | ||
) -> PyQnnWrapper.PyQnnOpWrapper: | ||
input_node = node.args[0] | ||
input_tensor = self.get_tensor(input_node, node) | ||
input_tensor_wrapper = self.define_tensor( | ||
input_node, | ||
input_tensor, | ||
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE, | ||
nodes_to_wrappers, | ||
is_input_tensor=True, | ||
) | ||
|
||
output_tensor = self.get_tensor(node, node) | ||
output_tensor_wrapper = self.define_tensor( | ||
node, | ||
output_tensor, | ||
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE, | ||
nodes_to_wrappers, | ||
is_input_tensor=False, | ||
) | ||
|
||
sin_op = PyQnnWrapper.PyQnnOpWrapper( | ||
node.name, | ||
QNN_OP_PACKAGE_NAME_QTI_AISW, | ||
OpElementWiseSin.op_name, | ||
) | ||
sin_op.AddInputTensors([input_tensor_wrapper]) | ||
sin_op.AddOutputTensors([output_tensor_wrapper]) | ||
|
||
return sin_op |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.