diff --git a/.github/actions/stress-build-and-run/action.yml b/.github/actions/stress-build-and-run/action.yml deleted file mode 100644 index e5f5907251b..00000000000 --- a/.github/actions/stress-build-and-run/action.yml +++ /dev/null @@ -1,35 +0,0 @@ -name: "Build and run stress tests" -description: "Action for stress testing" - -inputs: - arch: - description: 'grayskull or wormhole_b0' - required: true - dispatch: - description: 'fast or slow' - required: true - machine: - description: 'Virtual machine or Bare metal' - required: true - -runs: - using: "composite" - steps: - - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@2.0.0 - - name: Set up dynamic env vars for build - run: | - echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV - - name: Build tt-metal and libs - run: make build - - name: Build tt-metal CPP tests - run: make tests - - name: Run pre/post regression tests in a loop - run: | - source build/python_env/bin/activate - ./tests/scripts/run_tests.sh --tt-arch ${{ arch }} --pipeline-type stress_post_commit --dispatch-mode ${{ dispatch }} - - name: Upload watcher log - if: always() - uses: actions/upload-artifact@v4 - with: - name: watcher-log-${{ arch }}-${{ machine }} - path: generated/watcher/watcher.log diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 5455162209a..1f622729b7d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -11,7 +11,6 @@ Table of Contents - [Setting up Git](#setting-up-git) - [Setting logger level](#setting-logger-level) - [Building and viewing the documentation locally](#building-and-viewing-the-documentation-locally) - - [Cleaning the dev environment with `make nuke`](#cleaning-the-dev-environment-with-make-nuke) - [Tests in tt-metal](#tests-in-tt-metal) - [Running post-commit regressions](#running-post-commit-regressions) - [Adding post-commit tests](#adding-post-commit-tests) @@ -174,14 +173,6 @@ $ cd ${TT_METAL_HOME} && ./docs/spellcheck.sh update Commit your changes and the personal dictionary, at docs/aspell-dictionary.pws, that is changed. -### Cleaning the dev environment with `make nuke` - -Normally, `make clean` only clears out build artifacts. It does **not** delete -the built Python dev environment stored at `build/python_env/`. - -To delete absolutely everything including the Python environment, use `make -nuke`. - ## Tests in tt-metal Ensure you're in a developer Python environment with necessary environment variables @@ -201,8 +192,8 @@ You must run post-commit regressions before you commit something. These regressions will also run after every pushed commit to the GitHub repo. ``` -make build -make tests +cmake --build build --target install +cmake --build build --target tests ./tests/scripts/run_tests.sh --tt-arch $ARCH_NAME --pipeline-type post_commit ``` @@ -252,7 +243,7 @@ a specific one you'd like to run. 1. Build the API integration tests using the make command, ``` -make tests +cmake --build build --target tests ``` 2. Run the test binaries from the path **${TT_METAL_HOME}/build/test/tt_metal** @@ -268,7 +259,7 @@ fast dispatch, you can 1. Build the unit tests: ``` - make tests + cmake --build build --target tests ``` 2. Run the test: ``` diff --git a/INSTALLING.md b/INSTALLING.md index 8bcd8135958..33779cb8379 100644 --- a/INSTALLING.md +++ b/INSTALLING.md @@ -97,7 +97,8 @@ export TT_METAL_ENV=dev 4. Build & activate. -NEW!! CMake Support +We use CMake for our build flows. + ```sh ./build_metal.sh @@ -110,13 +111,6 @@ Note about Python environments: You do not have to use `create_venv.sh`. If you are less familiar with Python and its various environment tools, just use `create_venv.sh` as shown above. -Old Makefile Flow -```sh -make build - -source build/python_env/bin/activate -``` - 5. Start coding You are all set! Visit the [TT-NN Basic examples page](https://tenstorrent.github.io/tt-metal/latest/ttnn/ttnn/usage.html#basic-examples) or get started with [simple kernels on TT-Metalium](https://github.com/tenstorrent/tt-metal/blob/main/README.md) diff --git a/Makefile b/Makefile deleted file mode 100644 index c475ba304f6..00000000000 --- a/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -.SUFFIXES: - -MAKEFLAGS := --jobs=$(shell nproc) - -# Setup CONFIG, DEVICE_RUNNER, and out/build dirs first -TT_METAL_HOME ?= $(shell git rev-parse --show-toplevel) -ARCH_NAME ?= grayskull - -include ./module.mk diff --git a/docs/source/tt-metalium/tt_metal/examples/dram_loopback.rst b/docs/source/tt-metalium/tt_metal/examples/dram_loopback.rst index fc6d9e12bad..e1c60621103 100644 --- a/docs/source/tt-metalium/tt_metal/examples/dram_loopback.rst +++ b/docs/source/tt-metalium/tt_metal/examples/dram_loopback.rst @@ -20,8 +20,7 @@ depending on the most up-to-date installation methods. export ARCH_NAME= export TT_METAL_HOME= - make build - make programming_examples/loopback + ./build_metal.sh ./build/programming_examples/loopback Silicon accelerator setup diff --git a/docs/source/tt-metalium/tt_metal/examples/eltwise_binary.rst b/docs/source/tt-metalium/tt_metal/examples/eltwise_binary.rst index c9daff81213..1b5dbf6ed39 100644 --- a/docs/source/tt-metalium/tt_metal/examples/eltwise_binary.rst +++ b/docs/source/tt-metalium/tt_metal/examples/eltwise_binary.rst @@ -19,8 +19,7 @@ depending on the most up-to-date installation methods. export ARCH_NAME= export TT_METAL_HOME= - make build - make programming_examples/eltwise_binary + ./build_metal.sh ./build/programming_examples/eltwise_binary New buffers diff --git a/docs/source/tt-metalium/tt_metal/examples/eltwise_sfpu.rst b/docs/source/tt-metalium/tt_metal/examples/eltwise_sfpu.rst index 180ecd828cf..91f1af4869f 100644 --- a/docs/source/tt-metalium/tt_metal/examples/eltwise_sfpu.rst +++ b/docs/source/tt-metalium/tt_metal/examples/eltwise_sfpu.rst @@ -19,8 +19,7 @@ depending on the most up-to-date installation methods. export ARCH_NAME= export TT_METAL_HOME= - make build - make programming_examples/eltwise_sfpu + ./build_metal.sh ./build/programming_examples/eltwise_sfpu Circular buffers for data movement to/from compute engine diff --git a/docs/source/tt-metalium/tt_metal/examples/matmul_multi_core.rst b/docs/source/tt-metalium/tt_metal/examples/matmul_multi_core.rst index 4833b768f72..1bd4cc42818 100644 --- a/docs/source/tt-metalium/tt_metal/examples/matmul_multi_core.rst +++ b/docs/source/tt-metalium/tt_metal/examples/matmul_multi_core.rst @@ -27,8 +27,7 @@ depending on the most up-to-date installation methods. export ARCH_NAME= export TT_METAL_HOME= - make build - make programming_examples/matmul_multi_core + ./build_metal.sh ./build/programming_examples/matmul_multi_core Accessing all the cores diff --git a/docs/source/tt-metalium/tt_metal/examples/matmul_multi_core_optimized.rst b/docs/source/tt-metalium/tt_metal/examples/matmul_multi_core_optimized.rst index 5a5fc75f740..e8ac2bf7551 100644 --- a/docs/source/tt-metalium/tt_metal/examples/matmul_multi_core_optimized.rst +++ b/docs/source/tt-metalium/tt_metal/examples/matmul_multi_core_optimized.rst @@ -9,8 +9,7 @@ The Tensix core architecture's secret weapon is its full user control over memor export ARCH_NAME= export TT_METAL_HOME= - make build - make programming_examples/matmul_multi_core + ./build_metal.sh ./build/programming_examples/matmul_multi_core_reuse ./build/programming_examples/matmul_multi_core_reuse_mcast diff --git a/docs/source/tt-metalium/tt_metal/examples/matmul_single_core.rst b/docs/source/tt-metalium/tt_metal/examples/matmul_single_core.rst index 67327c4d3f3..a99345e5de5 100644 --- a/docs/source/tt-metalium/tt_metal/examples/matmul_single_core.rst +++ b/docs/source/tt-metalium/tt_metal/examples/matmul_single_core.rst @@ -18,8 +18,7 @@ depending on the most up-to-date installation methods. export ARCH_NAME= export TT_METAL_HOME= - make build - make programming_examples/matmul_single_core + ./build_metal.sh ./build/programming_examples/matmul_single_core Host Code diff --git a/infra/git_hooks/module.mk b/infra/git_hooks/module.mk deleted file mode 100644 index 431a9154b53..00000000000 --- a/infra/git_hooks/module.mk +++ /dev/null @@ -1,9 +0,0 @@ -GIT_HOOKS = $(OUT)/git_hooks - -git_hooks: $(GIT_HOOKS)/.installed - -$(GIT_HOOKS)/.installed: python_env/dev - mkdir -p $(GIT_HOOKS) - bash -c "source $(PYTHON_ENV)/bin/activate && pre-commit install" - bash -c "source $(PYTHON_ENV)/bin/activate && pre-commit install --hook-type commit-msg" - touch $@ diff --git a/models/utility_functions.py b/models/utility_functions.py index 6d139b5ddf5..b2bd6d6cc0f 100644 --- a/models/utility_functions.py +++ b/models/utility_functions.py @@ -117,6 +117,9 @@ def enable_persistent_kernel_cache(): """ Enables persistent compiled kernel caching - disables recompiling the kernels for the duration of running process if built_kernels/.../hash directory with kernel binaries is present. """ + logger.warning( + "Persistent kernel cache is enabled. Cache invalidation may fail after a rebase and may require deleting the build directory." + ) tt_lib.device.EnablePersistentKernelCache() diff --git a/module.mk b/module.mk deleted file mode 100644 index 95125643bed..00000000000 --- a/module.mk +++ /dev/null @@ -1,168 +0,0 @@ -CONFIG ?= assert -ENABLE_PROFILER ?= 0 -ENABLE_TRACY ?= 0 -ENABLE_CODE_TIMERS ?= 0 -# TODO: enable OUT to be per config (this impacts all scripts that run tests) -# OUT ?= build_$(DEVICE_RUNNER)_$(CONFIG) -OUT ?= $(TT_METAL_HOME)/build -PREFIX ?= $(OUT) - -# Disable by default, use negative instead for consistency with BBE -TT_METAL_VERSIM_DISABLED ?= 1 - -CONFIG_CFLAGS = -CONFIG_LDFLAGS = - -# For production builds so the final pybinded so has all binaries + symbols -TT_METAL_CREATE_STATIC_LIB ?= 0 - -ifeq ($(CONFIG), release) -CONFIG_CFLAGS += -O3 -else ifeq ($(CONFIG), ci) # significantly smaller artifacts -CONFIG_CFLAGS += -O3 -DDEBUG=DEBUG -CONFIG_LDFLAGS += -Wl,--verbose -else ifeq ($(CONFIG), assert) -CONFIG_CFLAGS += -O3 -g -DDEBUG=DEBUG -else ifeq ($(CONFIG), asan) -CONFIG_CFLAGS += -O3 -g -DDEBUG=DEBUG -fsanitize=address -CONFIG_LDFLAGS += -fsanitize=address -else ifeq ($(CONFIG), ubsan) -CONFIG_CFLAGS += -O3 -g -DDEBUG=DEBUG -fsanitize=undefined -CONFIG_LDFLAGS += -fsanitize=undefined -else ifeq ($(CONFIG), debug) -CONFIG_CFLAGS += -O0 -g -DDEBUG=DEBUG -else -$(error Unknown value for CONFIG "$(CONFIG)") -endif - -ifeq ($(TT_METAL_VERSIM_DISABLED),0) - UMD_VERSIM_STUB = 0 -else - # Need to always define this versim disabled flag for cpp - CONFIG_CFLAGS += -DTT_METAL_VERSIM_DISABLED - UMD_VERSIM_STUB = 1 -endif -ifeq ($(ENABLE_CODE_TIMERS), 1) -CONFIG_CFLAGS += -DTT_ENABLE_CODE_TIMERS -endif - -# Gate certain dev env requirements behind this -ifeq ("$(TT_METAL_ENV)", "dev") -TT_METAL_ENV_IS_DEV = 1 -endif - -OBJDIR = $(OUT)/obj -LIBDIR = $(OUT)/lib -BINDIR = $(OUT)/bin -INCDIR = $(OUT)/include -TESTDIR = $(OUT)/test -DOCSDIR = $(OUT)/docs -TOOLS = $(OUT)/tools - -UMD_HOME = $(TT_METAL_HOME)/tt_metal/third_party/umd -UMD_VERSIM_HEADERS = $(TT_METAL_VERSIM_ROOT)/versim/ -UMD_USER_ROOT = $(TT_METAL_HOME) -# Top level flags, compiler, defines etc. - -ifeq ("$(ARCH_NAME)", "wormhole_b0") - BASE_INCLUDES=-Itt_metal/hw/inc -Itt_metal/hw/inc/wormhole -Itt_metal/hw/inc/wormhole/wormhole_b0_defines -I$(UMD_HOME)/src/firmware/riscv/wormhole -else - BASE_INCLUDES=-Itt_metal/hw/inc -Itt_metal/hw/inc/$(ARCH_NAME) -I$(UMD_HOME)/src/firmware/riscv/$(ARCH_NAME) -endif - -# TODO: rk reduce this to one later -BASE_INCLUDES+=-I./ -I./tt_metal/ -I$(UMD_HOME)/ - -#WARNINGS ?= -Wall -Wextra -WARNINGS ?= -Wdelete-non-virtual-dtor -Wreturn-type -Wswitch -Wuninitialized -Wno-unused-parameter -CC ?= gcc -CXX ?= g++ -CFLAGS ?= -MMD $(WARNINGS) -I. $(CONFIG_CFLAGS) -mavx2 -DBUILD_DIR=\"$(OUT)\" -CXXFLAGS ?= --std=c++17 -fvisibility-inlines-hidden -Werror - -LDFLAGS ?= $(CONFIG_LDFLAGS) -L$(LIBDIR) \ - -ldl \ - -lz \ - -lboost_thread \ - -lboost_filesystem \ - -lboost_system \ - -lboost_regex \ - -lpthread \ - -latomic \ - -lhwloc -ifdef TT_METAL_ENV_IS_DEV -LDFLAGS += \ - -Wl,-rpath,$(PREFIX)/lib -endif -SHARED_LIB_FLAGS = -shared -fPIC -STATIC_LIB_FLAGS = -fPIC -ifeq ($(findstring clang,$(CC)),clang) -WARNINGS += -Wno-c++11-narrowing -Wno-c++2a-extensions -LDFLAGS += -lstdc++ -else -WARNINGS += -Wmaybe-uninitialized -LDFLAGS += -lstdc++ -endif - -# For GDDR5 bug in WH -ifneq (,$(filter "$(ARCH_NAME)","wormhole" "wormhole_b0")) - ISSUE_3487_FIX = 1 -endif - -set_up_kernels: - $(MAKE) -f $(TT_METAL_HOME)/tt_metal/hw/Makefile-runtime -C $(TT_METAL_HOME)/tt_metal/hw prepare - -set_up_kernels/clean: - $(MAKE) -f $(TT_METAL_HOME)/tt_metal/hw/Makefile-runtime -C $(TT_METAL_HOME)/tt_metal/hw clean - -ifeq ($(ENABLE_PROFILER), 1) -CFLAGS += -DPROFILER -endif - -ifeq ($(ENABLE_TRACY), 1) -CFLAGS += -DTRACY_ENABLE -fno-omit-frame-pointer -fPIC -LDFLAGS += -ltracy -rdynamic -TOOLS_TO_BUILD += \ - tracy_tools -endif - -LIBS_TO_BUILD = -ifdef TT_METAL_ENV_IS_DEV -LIBS_TO_BUILD += \ - python_env/dev \ - python_env/dev/editable \ - python_env/dev/stubs \ - git_hooks -endif - -LIBS_TO_BUILD += \ - tracy \ - set_up_kernels \ - umd_device \ - tools \ - tt_metal \ - tt_eager \ - ttnn - -# These must be in dependency order (enforces no circular deps) -include $(UMD_HOME)/device/module.mk -include $(TT_METAL_HOME)/tt_metal/common/common.mk -include $(TT_METAL_HOME)/tt_metal/module.mk -include $(TT_METAL_HOME)/tt_eager/module.mk -include $(TT_METAL_HOME)/ttnn/module.mk -include $(TT_METAL_HOME)/tt_metal/python_env/module.mk -include $(TT_METAL_HOME)/tests/module.mk - -# only include these modules if we're in development -ifdef TT_METAL_ENV_IS_DEV -include $(TT_METAL_HOME)/infra/git_hooks/module.mk -endif - -build: $(LIBS_TO_BUILD) $(TOOLS_TO_BUILD) - -clean: set_up_kernels/clean eager_package/clean tracy_tools_clean - test -d build && find build -mindepth 1 -maxdepth 1 ! -path "build/python_env" -exec rm -rf {} + || true - rm -rf dist/ - -nuke: clean python_env/clean - rm -rf $(OUT) diff --git a/tests/module.mk b/tests/module.mk deleted file mode 100644 index 79e7eea02be..00000000000 --- a/tests/module.mk +++ /dev/null @@ -1,14 +0,0 @@ -TEST_INCLUDES = -I$(TT_METAL_HOME)/tests/ -I$(TT_METAL_HOME)/tt_eager/. - -include $(TT_METAL_HOME)/tests/tt_metal/tt_metal/module.mk -include $(TT_METAL_HOME)/tests/tt_eager/module.mk -include $(TT_METAL_HOME)/tests/ttnn/module.mk - -TESTS_TO_BUILD = \ - tests/tt_metal \ - tests/tt_eager \ - tests/ttnn \ - -tests: tests/all - -tests/all: $(TESTS_TO_BUILD) diff --git a/tests/tt_eager/module.mk b/tests/tt_eager/module.mk deleted file mode 100644 index 00111eae1bd..00000000000 --- a/tests/tt_eager/module.mk +++ /dev/null @@ -1,53 +0,0 @@ -# Every variable in subdir must be prefixed with subdir (emulating a namespace) -TT_EAGER_TESTS += \ - tests/tt_eager/ops/ccl/test_all_gather_utils \ - tests/tt_eager/ops/ccl/test_all_gather_sharded_indexing_helpers \ - tests/tt_eager/ops/ccl/test_ccl_helpers \ - tests/tt_eager/ops/test_average_pool \ - tests/tt_eager/ops/test_eltwise_binary_op \ - tests/tt_eager/ops/test_eltwise_unary_op \ - tests/tt_eager/ops/test_softmax_op \ - tests/tt_eager/ops/test_layernorm_op \ - tests/tt_eager/ops/test_multi_queue_api \ - tests/tt_eager/ops/test_transpose_op \ - tests/tt_eager/ops/test_transpose_wh_single_core \ - tests/tt_eager/ops/test_transpose_wh_multi_core \ - tests/tt_eager/ops/test_reduce_op \ - tests/tt_eager/ops/test_bcast_op \ - tests/tt_eager/ops/test_bmm_op \ - tests/tt_eager/ops/test_pad_op \ - tests/tt_eager/ops/test_tilize_op \ - tests/tt_eager/ops/test_tilize_zero_padding \ - tests/tt_eager/ops/test_tilize_op_channels_last \ - tests/tt_eager/ops/test_tilize_zero_padding_channels_last \ - tests/tt_eager/ops/test_sfpu \ - tests/tt_eager/ops/test_fold_op \ - tests/tt_eager/tensors/test_copy_and_move \ - tests/tt_eager/tensors/test_host_device_loopback \ - tests/tt_eager/tensors/test_raw_host_memory_pointer \ - tests/tt_eager/tensors/test_async_tensor_apis \ - tests/tt_eager/tensors/test_ranks \ - tests/tt_eager/integration_tests/test_bert \ - -TT_EAGER_TESTS_SRCS = $(addprefix tests/tt_eager/, $(addsuffix .cpp, $(TT_EAGER_TESTS:tests/%=%))) - -TT_EAGER_TESTS_INCLUDES = $(TEST_INCLUDES) $(TT_EAGER_INCLUDES) -TT_EAGER_TESTS_LDFLAGS = $(TT_METAL_TESTS_LDFLAGS) $(TT_LIB_LDFLAGS) -lgtest -lgtest_main - -TT_EAGER_TESTS_OBJS = $(addprefix $(OBJDIR)/, $(TT_EAGER_TESTS_SRCS:.cpp=.o)) -TT_EAGER_TESTS_DEPS = $(addprefix $(OBJDIR)/, $(TT_EAGER_TESTS_SRCS:.cpp=.d)) - --include $(TT_EAGER_TESTS_DEPS) - -tests/tt_eager: $(TT_EAGER_TESTS) -tests/tt_eager/%: $(TESTDIR)/tt_eager/%; - -.PRECIOUS: $(TESTDIR)/tt_eager/% -$(TESTDIR)/tt_eager/%: $(OBJDIR)/tt_eager/tests/%.o $(TT_DNN_LIB) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(TT_EAGER_TESTS_INCLUDES) -o $@ $^ $(TT_EAGER_TESTS_LDFLAGS) - -.PRECIOUS: $(OBJDIR)/tt_eager/tests/%.o -$(OBJDIR)/tt_eager/tests/%.o: tests/tt_eager/%.cpp - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(TT_EAGER_TESTS_INCLUDES) -c -o $@ $< diff --git a/tests/tt_metal/tt_metal/gtest_smoke/module.mk b/tests/tt_metal/tt_metal/gtest_smoke/module.mk deleted file mode 100644 index 367eeae0ae1..00000000000 --- a/tests/tt_metal/tt_metal/gtest_smoke/module.mk +++ /dev/null @@ -1,33 +0,0 @@ -# Every variable in subdir must be prefixed with subdir (emulating a namespace) - -TT_METAL_TESTS_HOME = tests/tt_metal/tt_metal -TT_METAL_GTEST_SMOKE_SRCS_HOME = tests/tt_metal/tt_metal/gtest_smoke - -TT_METAL_GTEST_SMOKE = ${TT_METAL_GTEST_SMOKE_SRCS_HOME}/tests_main.cpp -TT_METAL_GTEST_SMOKE += $(wildcard ${TT_METAL_GTEST_SMOKE_SRCS_HOME}/*.cpp) -TT_METAL_GTEST_SMOKE += $(wildcard ${TT_METAL_GTEST_SMOKE_SRCS_HOME}/*/*.cpp) -TT_METAL_GTEST_SMOKE += $(wildcard ${TT_METAL_GTEST_SMOKE_SRCS_HOME}/*/*/*.cpp) - -TT_METAL_GTEST_SMOKE_OBJ_HOME = tt_metal/tests/gtest_smoke/ -TT_METAL_GTEST_SMOKE_SRCS = $(patsubst $(TT_METAL_GTEST_SMOKE_SRCS_HOME)%, $(TT_METAL_GTEST_SMOKE_OBJ_HOME)%, $(TT_METAL_GTEST_SMOKE)) - -TT_METAL_GTEST_SMOKE_INCLUDES = $(TEST_INCLUDES) $(TT_METAL_INCLUDES) -TT_METAL_GTEST_SMOKE_LDFLAGS = $(TT_METAL_UNIT_TESTS_LDFLAGS) -ltt_metal -ldl -lstdc++fs -pthread -lyaml-cpp -lgtest -lgtest_main -lm - -TT_METAL_GTEST_SMOKE_OBJS = $(addprefix $(OBJDIR)/, $(TT_METAL_GTEST_SMOKE_SRCS:.cpp=.o)) -TT_METAL_GTEST_SMOKE_DEPS = $(addprefix $(OBJDIR)/, $(TT_METAL_GTEST_SMOKE_SRCS:.cpp=.d)) - --include $(TT_METAL_GTEST_SMOKE_DEPS) - -# Each module has a top level target as the entrypoint which must match the subdir name -tests/tt_metal/gtest_smoke: $(TESTDIR)/tt_metal/gtest_smoke - -.PRECIOUS: $(TESTDIR)/tt_metal/gtest_smoke -$(TESTDIR)/tt_metal/gtest_smoke: $(TT_METAL_GTEST_SMOKE_OBJS) $(TT_METAL_LIB) $(TT_DNN_LIB) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(TT_METAL_GTEST_SMOKE_INCLUDES) -o $@ $^ $(TT_METAL_GTEST_SMOKE_LDFLAGS) - -.PRECIOUS: $(OBJDIR)/$(TT_METAL_GTEST_SMOKE_OBJ_HOME)/%.o -$(OBJDIR)/$(TT_METAL_GTEST_SMOKE_OBJ_HOME)/%.o: $(TT_METAL_GTEST_SMOKE_SRCS_HOME)/%.cpp - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(TT_METAL_GTEST_SMOKE_INCLUDES) -c -o $@ $< diff --git a/tests/tt_metal/tt_metal/module.mk b/tests/tt_metal/tt_metal/module.mk deleted file mode 100644 index 191eadd6ad1..00000000000 --- a/tests/tt_metal/tt_metal/module.mk +++ /dev/null @@ -1,119 +0,0 @@ -include $(TT_METAL_HOME)/tests/tt_metal/tt_metal/unit_tests_common/module.mk -include $(TT_METAL_HOME)/tests/tt_metal/tt_metal/unit_tests/module.mk -include $(TT_METAL_HOME)/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/module.mk -include $(TT_METAL_HOME)/tests/tt_metal/tt_metal/unit_tests_fast_dispatch_single_chip_multi_queue/module.mk -include $(TT_METAL_HOME)/tests/tt_metal/tt_metal/unit_tests_frequent/module.mk -include $(TT_METAL_HOME)/tests/tt_metal/tt_metal/gtest_smoke/module.mk - -# Programming examples for external users -include $(TT_METAL_HOME)/tt_metal/programming_examples/module.mk - -# Every variable in subdir must be prefixed with subdir (emulating a namespace) -TT_METAL_TESTS += \ - tests/tt_metal/test_bmm \ - tests/tt_metal/perf_microbenchmark/dispatch/test_pgm_dispatch \ - tests/tt_metal/perf_microbenchmark/dispatch/test_bw_and_latency \ - tests/tt_metal/perf_microbenchmark/dispatch/test_dispatcher \ - tests/tt_metal/perf_microbenchmark/dispatch/test_prefetcher \ - tests/tt_metal/perf_microbenchmark/ethernet/test_ethernet_read_and_send_data \ - tests/tt_metal/perf_microbenchmark/ethernet/test_workers_and_erisc_datamover_unidirectional \ - tests/tt_metal/perf_microbenchmark/ethernet/test_ethernet_bidirectional_bandwidth_no_edm \ - tests/tt_metal/perf_microbenchmark/ethernet/test_ethernet_hop_latencies_no_edm \ - tests/tt_metal/perf_microbenchmark/routing/test_tx_rx \ - tests/tt_metal/perf_microbenchmark/routing/test_mux_demux \ - tests/tt_metal/perf_microbenchmark/routing/test_mux_demux_2level \ - tests/tt_metal/perf_microbenchmark/routing/test_tunnel_1cq \ - tests/tt_metal/perf_microbenchmark/routing/test_tunnel_2cq \ - tests/tt_metal/perf_microbenchmark/routing/test_uni_tunnel \ - tests/tt_metal/perf_microbenchmark/routing/test_uni_tunnel_single_chip \ - tests/tt_metal/perf_microbenchmark/routing/test_bi_tunnel \ - tests/tt_metal/perf_microbenchmark/noc/test_noc_unicast_vs_multicast_to_single_core_latency \ - tests/tt_metal/perf_microbenchmark/old/matmul/matmul_global_l1 \ - tests/tt_metal/perf_microbenchmark/old/matmul/matmul_local_l1 \ - tests/tt_metal/perf_microbenchmark/old/noc/test_noc_read_global_l1 \ - tests/tt_metal/perf_microbenchmark/old/noc/test_noc_read_local_l1 \ - tests/tt_metal/perf_microbenchmark/old/pcie/test_enqueue_rw_buffer \ - tests/tt_metal/perf_microbenchmark/old/pcie/test_rw_buffer \ - tests/tt_metal/perf_microbenchmark/old/pcie/test_rw_device_dram \ - tests/tt_metal/perf_microbenchmark/old/pcie/test_rw_device_l1 \ - tests/tt_metal/perf_microbenchmark/1_compute_mm/test_compute_mm \ - tests/tt_metal/perf_microbenchmark/2_noc_adjacent/test_noc_adjacent \ - tests/tt_metal/perf_microbenchmark/2_noc_rtor/test_noc_rtor \ - tests/tt_metal/perf_microbenchmark/3_pcie_transfer/test_rw_buffer \ - tests/tt_metal/perf_microbenchmark/3_pcie_transfer/test_pull_from_pcie \ - tests/tt_metal/perf_microbenchmark/6_dram_offchip/test_dram_offchip \ - tests/tt_metal/perf_microbenchmark/7_kernel_launch/test_kernel_launch \ - tests/tt_metal/perf_microbenchmark/8_dram_adjacent_core_read/test_dram_read \ - tests/tt_metal/perf_microbenchmark/noc/test_noc_unicast_vs_multicast_to_single_core_latency \ - tests/tt_metal/test_add_two_ints \ - tests/tt_metal/test_compile_args \ - tests/tt_metal/test_eltwise_binary \ - tests/tt_metal/test_matmul_single_tile_bfp8b \ - tests/tt_metal/test_matmul_single_tile_output_in_l1 \ - tests/tt_metal/test_dram_loopback_single_core \ - tests/tt_metal/test_datacopy_bfp8b \ - tests/tt_metal/test_datacopy \ - tests/tt_metal/test_datacopy_output_in_l1 \ - tests/tt_metal/test_dataflow_cb \ - tests/tt_metal/test_transpose_hc \ - tests/tt_metal/test_transpose_wh \ - tests/tt_metal/test_multiple_programs \ - tests/tt_metal/test_multi_core_kernel \ - tests/tt_metal/test_unpack_tilize \ - tests/tt_metal/test_unpack_untilize \ - tests/tt_metal/test_interleaved_layouts \ - tests/tt_metal/test_interleaved_l1_buffer \ - tests/tt_metal/test_bcast \ - tests/tt_metal/test_generic_binary_reader_matmul_large_block \ - tests/tt_metal/test_l1_to_l1_multi_core \ - tests/tt_metal/test_dram_copy_sticks_multi_core \ - tests/tt_metal/test_reduce_h \ - tests/tt_metal/test_reduce_w \ - tests/tt_metal/test_reduce_hw \ - tests/tt_metal/test_untilize_eltwise_binary \ - tests/tt_metal/test_bfp8_conversion \ - tests/tt_metal/test_bfp4_conversion \ - tests/tt_metal/tt_dispatch/test_enqueue_program \ - tests/tt_metal/test_core_range_set \ - tests/tt_metal/test_compile_sets_kernel_binaries \ - tests/tt_metal/test_compile_program \ - # test/tt_metal/test_datacopy_multi_core_multi_dram \ # this does not compile - # tests/tt_metal/test_dram_to_l1_multicast \ # these tests have all been converted to gtest - # tests/tt_metal/test_dram_to_l1_multicast_loopback_src \ - # tests/tt_metal/test_dram_loopback_single_core_db \ - # tests/tt_metal/test_matmul_multi_tile \ - # tests/tt_metal/test_matmul_large_block \ - # tests/tt_metal/test_matmul_single_core \ - # tests/tt_metal/test_matmul_single_core_small \ - # tests/tt_metal/test_matmul_multi_core_single_dram \ - # tests/tt_metal/test_matmul_multi_core_multi_dram \ - # tests/tt_metal/test_matmul_multi_core_multi_dram_in0_mcast \ - # tests/tt_metal/test_matmul_multi_core_multi_dram_in1_mcast \ - # tests/tt_metal/test_matmul_multi_core_multi_dram_in0_mcast_in1_mcast \ - # tests/tt_metal/test_matmul_single_tile \ - # tests/tt_metal/test_flatten \ - -TT_METAL_TESTS_SRCS = $(addprefix tests/tt_metal/, $(addsuffix .cpp, $(TT_METAL_TESTS:tests/%=%))) - -TT_METAL_TESTS_INCLUDES = $(TEST_INCLUDES) $(TT_METAL_INCLUDES) -TT_METAL_TESTS_LDFLAGS = $(LDFLAGS) -ltt_metal -ldl -lstdc++fs -pthread -lyaml-cpp -lgtest -lm - -TT_METAL_TESTS_OBJS = $(addprefix $(OBJDIR)/, $(TT_METAL_TESTS_SRCS:.cpp=.o)) -TT_METAL_TESTS_DEPS = $(addprefix $(OBJDIR)/, $(TT_METAL_TESTS_SRCS:.cpp=.d)) - --include $(TT_METAL_TESTS_DEPS) - -# Each module has a top level target as the entrypoint which must match the subdir name -tests/tt_metal: $(TT_METAL_TESTS) programming_examples tests/tt_metal/gtest_smoke tests/tt_metal/unit_tests tests/tt_metal/unit_tests_fast_dispatch tests/tt_metal/unit_tests_fast_dispatch_single_chip_multi_queue tests/tt_metal/unit_tests_frequent -tests/tt_metal/all: $(TT_METAL_TESTS) -tests/tt_metal/%: $(TESTDIR)/tt_metal/% ; - -.PRECIOUS: $(TESTDIR)/tt_metal/% -$(TESTDIR)/tt_metal/%: $(OBJDIR)/tt_metal/tests/%.o $(TT_METAL_LIB) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(TT_METAL_TESTS_INCLUDES) -o $@ $^ $(TT_METAL_TESTS_LDFLAGS) - -.PRECIOUS: $(OBJDIR)/tt_metal/tests/%.o -$(OBJDIR)/tt_metal/tests/%.o: tests/tt_metal/tt_metal/%.cpp - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(TT_METAL_TESTS_INCLUDES) -c -o $@ $< diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/common/work_split.hpp b/tests/tt_metal/tt_metal/perf_microbenchmark/common/work_split.hpp index 1a642387b00..8b4884c3e4b 100644 --- a/tests/tt_metal/tt_metal/perf_microbenchmark/common/work_split.hpp +++ b/tests/tt_metal/tt_metal/perf_microbenchmark/common/work_split.hpp @@ -19,59 +19,6 @@ namespace tt { namespace tt_metal { -// splits the tiles evenly between num_cores, -// with option of padding where necessary -struct TilesSplit { - int num_cores_; - int total_tiles_; - int tpc_; // unclipped tiles per core - - inline TilesSplit(int num_cores, int total_tiles) : num_cores_(num_cores), total_tiles_(total_tiles) { - tpc_ = div_up(total_tiles_, num_cores_); - } - - // number of tiles per core for div_up split - inline uint32_t get_tpc() const { return tpc_; } - - // number of tiles per core for close to even split with multiples of 8 going - // to each core - inline uint32_t get_clipped_tpc(int icore) const { - auto result = (tpc_ * (icore + 1) > total_tiles_) ? (total_tiles_ - tpc_ * (icore + 1)) : tpc_; - return result; - } -}; - -struct CoreGridDesc { - uint32_t x_, y_; - CoreGridDesc(Device *dev) { - auto gs = dev->compute_with_storage_grid_size(); - x_ = gs.x; - y_ = gs.y; - TT_ASSERT(x_ > 0 && y_ > 0); - } - uint32_t total_cores() const { return x_ * y_; } - CoreCoord wrap_core(int icore) const { - TT_ASSERT(icore < total_cores()); - CoreCoord core = {(std::size_t)icore % x_, (std::size_t)icore / x_}; - return core; - } - - int numcores_dividing_numtiles(int num_tiles, int block_size = 1) { - // since we will be splitting num_tiles into num_cores we need to find - // num_cores such that num_tiles % num_cores = 0, so that it's evenly - // divided since we don't support leftovers at the moment - // TODO(AP): optimize if needed, O(max_cores) atm - uint32_t max_cores = total_cores(); - TT_ASSERT(max_cores % block_size == 0 || max_cores == 1); - if (max_cores > num_tiles) - max_cores = num_tiles; - for (int j = max_cores; j >= 1; j--) - if (num_tiles % j == 0) - return j; - return 1; - } -}; - // Given a number of tiles and number of cores available // Set the largest number of cores less than the number of tiles // Returns the number of cores as well as the number of tiles per core diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_prefetcher.cpp b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_prefetcher.cpp index 15352ce79ac..905f2e1f01e 100644 --- a/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_prefetcher.cpp +++ b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_prefetcher.cpp @@ -30,8 +30,7 @@ constexpr uint32_t DRAM_EXEC_BUF_DEFAULT_PAGE_SIZE = 1 << DRAM_EXEC_BUF_DEFAULT_ constexpr uint32_t DEFAULT_HUGEPAGE_ISSUE_BUFFER_SIZE = 256 * 1024 * 1024; constexpr uint32_t DEFAULT_HUGEPAGE_COMPLETION_BUFFER_SIZE = 256 * 1024 * 1024; constexpr uint32_t DEFAULT_PREFETCH_Q_ENTRIES = 1024; -constexpr uint32_t DEFAULT_MAX_PREFETCH_COMMAND_SIZE = 64 * 1024; -constexpr uint32_t DEFAULT_CMDDAT_Q_SIZE = 128 * 1024; +constexpr uint32_t DEFAULT_CMDDAT_Q_SIZE = 128 * 1024 + 2 * sizeof(CQPrefetchCmd) + 2 * sizeof(CQDispatchCmd); constexpr uint32_t DEFAULT_SCRATCH_DB_SIZE = 128 * 1024; constexpr uint32_t DEFAULT_ITERATIONS = 10000; @@ -128,7 +127,6 @@ void init(int argc, char **argv) { log_info(LogTest, " -cs: cmddat q size (default {})", DEFAULT_CMDDAT_Q_SIZE); log_info(LogTest, "-pdcs: prefetch_d cmddat cb size (default {})", dispatch_constants::get(CoreType::WORKER).prefetch_d_buffer_size()); log_info(LogTest, " -ss: scratch cb size (default {})", DEFAULT_SCRATCH_DB_SIZE); - log_info(LogTest, " -mc: max command size (default {})", DEFAULT_MAX_PREFETCH_COMMAND_SIZE); log_info(LogTest, " -pcies: size of data to transfer in pcie bw test type (default: {})", PCIE_TRANSFER_SIZE_DEFAULT); log_info(LogTest, " -dpgs: dram page size in dram bw test type (default: {})", DRAM_PAGE_SIZE_DEFAULT); log_info(LogTest, " -dpgr: dram pages to read in dram bw test type (default: {})", DRAM_PAGES_TO_READ_DEFAULT); @@ -148,8 +146,8 @@ void init(int argc, char **argv) { hugepage_issue_buffer_size_g = test_args::get_command_option_uint32(input_args, "-hp", DEFAULT_HUGEPAGE_ISSUE_BUFFER_SIZE); prefetch_q_entries_g = test_args::get_command_option_uint32(input_args, "-hq", DEFAULT_PREFETCH_Q_ENTRIES); cmddat_q_size_g = test_args::get_command_option_uint32(input_args, "-cs", DEFAULT_CMDDAT_Q_SIZE); + max_prefetch_command_size_g = cmddat_q_size_g; // note: half this for best perf scratch_db_size_g = test_args::get_command_option_uint32(input_args, "-ss", DEFAULT_SCRATCH_DB_SIZE); - max_prefetch_command_size_g = test_args::get_command_option_uint32(input_args, "-mc", DEFAULT_MAX_PREFETCH_COMMAND_SIZE); use_coherent_data_g = test_args::has_command_option(input_args, "-c"); readback_every_iteration_g = !test_args::has_command_option(input_args, "-rb"); pcie_transfer_size_g = test_args::get_command_option_uint32(input_args, "-pcies", PCIE_TRANSFER_SIZE_DEFAULT); @@ -309,8 +307,8 @@ void add_prefetcher_cmd_to_hostq(vector& cmds, cmds.push_back(0); } uint32_t new_size = (cmds.size() - prior_end) * sizeof(uint32_t); - TT_ASSERT(new_size <= max_prefetch_command_size_g, "Generated prefetcher command exceeds max command size"); - TT_ASSERT((new_size >> dispatch_constants::PREFETCH_Q_LOG_MINSIZE) < 0xFFFF, "HostQ command too large to represent"); + TT_FATAL(new_size <= max_prefetch_command_size_g, "Generated prefetcher command {} exceeds max command size {}", new_size, max_prefetch_command_size_g); + TT_FATAL((new_size >> dispatch_constants::PREFETCH_Q_LOG_MINSIZE) < 0xFFFF, "HostQ command too large to represent"); sizes.push_back(new_size >> dispatch_constants::PREFETCH_Q_LOG_MINSIZE); } @@ -2608,11 +2606,11 @@ int main(int argc, char **argv) { if (test_type_g >= 2) { perf_test_g = true; } - if (test_type_g == 2) { + if (test_type_g == 3) { perf_test_g = true; log_info(LogTest, "PCIE transfer size {}", std::to_string(pcie_transfer_size_g)); } - if (test_type_g == 3) { + if (test_type_g == 4) { perf_test_g = true; log_info(LogTest, "DRAM page size {}", std::to_string(dram_page_size_g)); log_info(LogTest, "DRAM pages to read {}", std::to_string(dram_pages_to_read_g)); @@ -2675,6 +2673,7 @@ int main(int argc, char **argv) { float bw = (long int)bytes_of_data_g * iterations_g / (elapsed_seconds.count() * 1000.0 * 1000.0 * 1000.0); std::stringstream ss; ss << std::fixed << std::setprecision(3) << bw; + log_info(LogTest, "Sent {} bytes", bytes_of_data_g * iterations_g); log_info(LogTest, "BW: {} GB/s", ss.str()); } } diff --git a/tests/tt_metal/tt_metal/unit_tests/module.mk b/tests/tt_metal/tt_metal/unit_tests/module.mk deleted file mode 100644 index 0d4a383ef78..00000000000 --- a/tests/tt_metal/tt_metal/unit_tests/module.mk +++ /dev/null @@ -1,31 +0,0 @@ -# Every variable in subdir must be prefixed with subdir (emulating a namespace) - -TT_METAL_UNIT_TESTS_SRCS_HOME = tests/tt_metal/tt_metal/unit_tests - -TT_METAL_UNIT_TESTS = ${TT_METAL_UNIT_TESTS_SRCS_HOME}/tests_main.cpp -TT_METAL_UNIT_TESTS += $(wildcard ${TT_METAL_UNIT_TESTS_SRCS_HOME}/*/*.cpp) -TT_METAL_UNIT_TESTS += $(wildcard ${TT_METAL_UNIT_TESTS_SRCS_HOME}/*/*/*.cpp) - -TT_METAL_UNIT_TESTS_OBJ_HOME = tt_metal/tests/unit_tests/ -TT_METAL_UNIT_TESTS_SRCS = $(patsubst $(TT_METAL_UNIT_TESTS_SRCS_HOME)%, $(TT_METAL_UNIT_TESTS_OBJ_HOME)%, $(TT_METAL_UNIT_TESTS)) - -TT_METAL_UNIT_TESTS_INCLUDES = $(TEST_INCLUDES) $(TT_METAL_INCLUDES) -I$(TT_METAL_HOME)/tests/tt_metal/tt_metal/unit_tests/common -TT_METAL_UNIT_TESTS_LDFLAGS = $(TT_METAL_TESTS_LDFLAGS) -ltt_metal -ldl -lstdc++fs -pthread -lyaml-cpp -lgtest -lgtest_main -lm - -TT_METAL_UNIT_TESTS_OBJS = $(addprefix $(OBJDIR)/, $(TT_METAL_UNIT_TESTS_SRCS:.cpp=.o)) -TT_METAL_UNIT_TESTS_DEPS = $(addprefix $(OBJDIR)/, $(TT_METAL_UNIT_TESTS_SRCS:.cpp=.d)) - --include $(TT_METAL_UNIT_TESTS_DEPS) - -# Each module has a top level target as the entrypoint which must match the subdir name -tests/tt_metal/unit_tests: $(TESTDIR)/tt_metal/unit_tests - -.PRECIOUS: $(TESTDIR)/tt_metal/unit_tests -$(TESTDIR)/tt_metal/unit_tests: $(TT_METAL_UNIT_TESTS_OBJS) $(TT_METAL_UNIT_TESTS_COMMON_OBJS) $(TT_METAL_LIB) $(TT_DNN_LIB) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(TT_METAL_UNIT_TESTS_INCLUDES) -o $@ $^ $(TT_METAL_UNIT_TESTS_LDFLAGS) - -.PRECIOUS: $(OBJDIR)/$(TT_METAL_UNIT_TESTS_OBJ_HOME)/%.o -$(OBJDIR)/$(TT_METAL_UNIT_TESTS_OBJ_HOME)/%.o: $(TT_METAL_UNIT_TESTS_SRCS_HOME)/%.cpp - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(TT_METAL_UNIT_TESTS_INCLUDES) -c -o $@ $< diff --git a/tests/tt_metal/tt_metal/unit_tests_common/module.mk b/tests/tt_metal/tt_metal/unit_tests_common/module.mk deleted file mode 100644 index 71a0855a8f1..00000000000 --- a/tests/tt_metal/tt_metal/unit_tests_common/module.mk +++ /dev/null @@ -1,22 +0,0 @@ -# Every variable in subdir must be prefixed with subdir (emulating a namespace) - -TT_METAL_UNIT_TESTS_COMMON_SRCS_HOME = tests/tt_metal/tt_metal/unit_tests_common - -TT_METAL_UNIT_TESTS_COMMON = $(wildcard ${TT_METAL_UNIT_TESTS_COMMON_SRCS_HOME}/*/*.cpp) -TT_METAL_UNIT_TESTS_COMMON += $(wildcard ${TT_METAL_UNIT_TESTS_COMMON_SRCS_HOME}/*/*/*.cpp) - -TT_METAL_UNIT_TESTS_COMMON_OBJ_HOME = tt_metal/tests/unit_tests_common/ -TT_METAL_UNIT_TESTS_COMMON_SRCS = $(patsubst $(TT_METAL_UNIT_TESTS_COMMON_SRCS_HOME)%, $(TT_METAL_UNIT_TESTS_COMMON_OBJ_HOME)%, $(TT_METAL_UNIT_TESTS_COMMON)) - -TT_METAL_UNIT_TESTS_COMMON_INCLUDES = $(TEST_INCLUDES) $(TT_METAL_INCLUDES) -I$(TT_METAL_HOME)/$(TT_METAL_UNIT_TESTS_COMMON_SRCS_HOME)/common - -TT_METAL_UNIT_TESTS_COMMON_OBJS = $(addprefix $(OBJDIR)/, $(TT_METAL_UNIT_TESTS_COMMON_SRCS:.cpp=.o)) -TT_METAL_UNIT_TESTS_COMMON_DEPS = $(addprefix $(OBJDIR)/, $(TT_METAL_UNIT_TESTS_COMMON_SRCS:.cpp=.d)) - --include $(TT_METAL_UNIT_TESTS_COMMON_DEPS) - -# This module doesn't build as its own executable, it's just included in unit_tests and unit_tests_fast_dispatch -.PRECIOUS: $(OBJDIR)/$(TT_METAL_UNIT_TESTS_COMMON_OBJ_HOME)/%.o -$(OBJDIR)/$(TT_METAL_UNIT_TESTS_COMMON_OBJ_HOME)/%.o: $(TT_METAL_UNIT_TESTS_COMMON_SRCS_HOME)/%.cpp - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(TT_METAL_UNIT_TESTS_COMMON_INCLUDES) -c -o $@ $< diff --git a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/module.mk b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/module.mk deleted file mode 100644 index 52bf539cd03..00000000000 --- a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/module.mk +++ /dev/null @@ -1,31 +0,0 @@ -# Every variable in subdir must be prefixed with subdir (emulating a namespace) - -TT_METAL_UNIT_TESTS_FAST_DISPATCH_SRCS_HOME = tests/tt_metal/tt_metal/unit_tests_fast_dispatch - -TT_METAL_UNIT_TESTS_FAST_DISPATCH = ${TT_METAL_UNIT_TESTS_FAST_DISPATCH_SRCS_HOME}/tests_main.cpp -TT_METAL_UNIT_TESTS_FAST_DISPATCH += $(wildcard ${TT_METAL_UNIT_TESTS_FAST_DISPATCH_SRCS_HOME}/*/*.cpp) -TT_METAL_UNIT_TESTS_FAST_DISPATCH += $(wildcard ${TT_METAL_UNIT_TESTS_FAST_DISPATCH_SRCS_HOME}/*/*/*.cpp) - -TT_METAL_UNIT_TESTS_FAST_DISPATCH_OBJ_HOME = tt_metal/tests/unit_tests_fast_dispatch/ -TT_METAL_UNIT_TESTS_FAST_DISPATCH_SRCS = $(patsubst $(TT_METAL_UNIT_TESTS_FAST_DISPATCH_SRCS_HOME)%, $(TT_METAL_UNIT_TESTS_FAST_DISPATCH_OBJ_HOME)%, $(TT_METAL_UNIT_TESTS_FAST_DISPATCH)) - -TT_METAL_UNIT_TESTS_FAST_DISPATCH_INCLUDES = $(TEST_INCLUDES) $(TT_METAL_INCLUDES) -I$(TT_METAL_HOME)/$(TT_METAL_UNIT_TESTS_FAST_DISPATCH_SRCS_HOME)/common -TT_METAL_UNIT_TESTS_FAST_DISPATCH_LDFLAGS = $(TT_METAL_UNIT_TESTS_LDFLAGS) - -TT_METAL_UNIT_TESTS_FAST_DISPATCH_OBJS = $(addprefix $(OBJDIR)/, $(TT_METAL_UNIT_TESTS_FAST_DISPATCH_SRCS:.cpp=.o)) -TT_METAL_UNIT_TESTS_FAST_DISPATCH_DEPS = $(addprefix $(OBJDIR)/, $(TT_METAL_UNIT_TESTS_FAST_DISPATCH_SRCS:.cpp=.d)) - --include $(TT_METAL_UNIT_TESTS_FAST_DISPATCH_DEPS) - -# Each module has a top level target as the entrypoint which must match the subdir name -tests/tt_metal/unit_tests_fast_dispatch: $(TESTDIR)/tt_metal/unit_tests_fast_dispatch - -.PRECIOUS: $(TESTDIR)/tt_metal/unit_tests_fast_dispatch -$(TESTDIR)/tt_metal/unit_tests_fast_dispatch: $(TT_METAL_UNIT_TESTS_FAST_DISPATCH_OBJS) $(TT_METAL_UNIT_TESTS_COMMON_OBJS) $(TT_METAL_LIB) $(TT_DNN_LIB) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(TT_METAL_UNIT_TESTS_FAST_DISPATCH_INCLUDES) -o $@ $^ $(TT_METAL_UNIT_TESTS_FAST_DISPATCH_LDFLAGS) - -.PRECIOUS: $(OBJDIR)/$(TT_METAL_UNIT_TESTS_FAST_DISPATCH_OBJ_HOME)/%.o -$(OBJDIR)/$(TT_METAL_UNIT_TESTS_FAST_DISPATCH_OBJ_HOME)/%.o: $(TT_METAL_UNIT_TESTS_FAST_DISPATCH_SRCS_HOME)/%.cpp - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(TT_METAL_UNIT_TESTS_FAST_DISPATCH_INCLUDES) -c -o $@ $< diff --git a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch_single_chip_multi_queue/module.mk b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch_single_chip_multi_queue/module.mk deleted file mode 100644 index d276857f7d8..00000000000 --- a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch_single_chip_multi_queue/module.mk +++ /dev/null @@ -1,31 +0,0 @@ -# Every variable in subdir must be prefixed with subdir (emulating a namespace) - -TT_METAL_UNIT_TESTS_FAST_DISPATCH_SINGLE_CHIP_MULTI_QUEUE_SRCS_HOME = tests/tt_metal/tt_metal/unit_tests_fast_dispatch_single_chip_multi_queue - -TT_METAL_UNIT_TESTS_FAST_DISPATCH_SINGLE_CHIP_MULTI_QUEUE = ${TT_METAL_UNIT_TESTS_FAST_DISPATCH_SINGLE_CHIP_MULTI_QUEUE_SRCS_HOME}/tests_main.cpp -TT_METAL_UNIT_TESTS_FAST_DISPATCH_SINGLE_CHIP_MULTI_QUEUE += $(wildcard ${TT_METAL_UNIT_TESTS_FAST_DISPATCH_SINGLE_CHIP_MULTI_QUEUE_SRCS_HOME}/*/*.cpp) -TT_METAL_UNIT_TESTS_FAST_DISPATCH_SINGLE_CHIP_MULTI_QUEUE += $(wildcard ${TT_METAL_UNIT_TESTS_FAST_DISPATCH_SINGLE_CHIP_MULTI_QUEUE_SRCS_HOME}/*/*/*.cpp) - -TT_METAL_UNIT_TESTS_FAST_DISPATCH_SINGLE_CHIP_MULTI_QUEUE_OBJ_HOME = tt_metal/tests/unit_tests_fast_dispatch_single_chip_multi_queue/ -TT_METAL_UNIT_TESTS_FAST_DISPATCH_SINGLE_CHIP_MULTI_QUEUE_SRCS = $(patsubst $(TT_METAL_UNIT_TESTS_FAST_DISPATCH_SINGLE_CHIP_MULTI_QUEUE_SRCS_HOME)%, $(TT_METAL_UNIT_TESTS_FAST_DISPATCH_SINGLE_CHIP_MULTI_QUEUE_OBJ_HOME)%, $(TT_METAL_UNIT_TESTS_FAST_DISPATCH_SINGLE_CHIP_MULTI_QUEUE)) - -TT_METAL_UNIT_TESTS_FAST_DISPATCH_SINGLE_CHIP_MULTI_QUEUE_INCLUDES = $(TEST_INCLUDES) $(TT_METAL_INCLUDES) -I$(TT_METAL_HOME)/$(TT_METAL_UNIT_TESTS_FAST_DISPATCH_SINGLE_CHIP_MULTI_QUEUE_SRCS_HOME)/common -TT_METAL_UNIT_TESTS_FAST_DISPATCH_SINGLE_CHIP_MULTI_QUEUE_LDFLAGS = $(TT_METAL_UNIT_TESTS_LDFLAGS) - -TT_METAL_UNIT_TESTS_FAST_DISPATCH_SINGLE_CHIP_MULTI_QUEUE_OBJS = $(addprefix $(OBJDIR)/, $(TT_METAL_UNIT_TESTS_FAST_DISPATCH_SINGLE_CHIP_MULTI_QUEUE_SRCS:.cpp=.o)) -TT_METAL_UNIT_TESTS_FAST_DISPATCH_SINGLE_CHIP_MULTI_QUEUE_DEPS = $(addprefix $(OBJDIR)/, $(TT_METAL_UNIT_TESTS_FAST_DISPATCH_SINGLE_CHIP_MULTI_QUEUE_SRCS:.cpp=.d)) - --include $(TT_METAL_UNIT_TESTS_FAST_DISPATCH_SINGLE_CHIP_MULTI_QUEUE_DEPS) - -# Each module has a top level target as the entrypoint which must match the subdir name -tests/tt_metal/unit_tests_fast_dispatch_single_chip_multi_queue: $(TESTDIR)/tt_metal/unit_tests_fast_dispatch_single_chip_multi_queue - -.PRECIOUS: $(TESTDIR)/tt_metal/unit_tests_fast_dispatch_single_chip_multi_queue -$(TESTDIR)/tt_metal/unit_tests_fast_dispatch_single_chip_multi_queue: $(TT_METAL_UNIT_TESTS_FAST_DISPATCH_SINGLE_CHIP_MULTI_QUEUE_OBJS) $(TT_METAL_UNIT_TESTS_COMMON_OBJS) $(TT_METAL_LIB) $(TT_DNN_LIB) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(TT_METAL_UNIT_TESTS_FAST_DISPATCH_SINGLE_CHIP_MULTI_QUEUE_INCLUDES) -o $@ $^ $(TT_METAL_UNIT_TESTS_FAST_DISPATCH_SINGLE_CHIP_MULTI_QUEUE_LDFLAGS) - -.PRECIOUS: $(OBJDIR)/$(TT_METAL_UNIT_TESTS_FAST_DISPATCH_SINGLE_CHIP_MULTI_QUEUE_OBJ_HOME)/%.o -$(OBJDIR)/$(TT_METAL_UNIT_TESTS_FAST_DISPATCH_SINGLE_CHIP_MULTI_QUEUE_OBJ_HOME)/%.o: $(TT_METAL_UNIT_TESTS_FAST_DISPATCH_SINGLE_CHIP_MULTI_QUEUE_SRCS_HOME)/%.cpp - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(TT_METAL_UNIT_TESTS_FAST_DISPATCH_SINGLE_CHIP_MULTI_QUEUE_INCLUDES) -c -o $@ $< diff --git a/tests/tt_metal/tt_metal/unit_tests_frequent/module.mk b/tests/tt_metal/tt_metal/unit_tests_frequent/module.mk deleted file mode 100644 index a649bec13c0..00000000000 --- a/tests/tt_metal/tt_metal/unit_tests_frequent/module.mk +++ /dev/null @@ -1,29 +0,0 @@ -# Every variable in subdir must be prefixed with subdir (emulating a namespace) - -TT_METAL_UNIT_TESTS_FREQUENT_SRCS_HOME = tests/tt_metal/tt_metal/unit_tests_frequent - -TT_METAL_UNIT_TESTS_FREQUENT = $(wildcard ${TT_METAL_UNIT_TESTS_FREQUENT_SRCS_HOME}/**/*.cpp) - -TT_METAL_UNIT_TESTS_FREQUENT_OBJ_HOME = tt_metal/tests/unit_tests_frequent/ -TT_METAL_UNIT_TESTS_FREQUENT_SRCS = $(patsubst $(TT_METAL_UNIT_TESTS_FREQUENT_SRCS_HOME)%, $(TT_METAL_UNIT_TESTS_FREQUENT_OBJ_HOME)%, $(TT_METAL_UNIT_TESTS_FREQUENT)) - -TT_METAL_UNIT_TESTS_FREQUENT_INCLUDES = $(TEST_INCLUDES) $(TT_METAL_INCLUDES) -I$(TT_METAL_HOME)/$(TT_METAL_UNIT_TESTS_FREQUENT_SRCS_HOME)/common -TT_METAL_UNIT_TESTS_FREQUENT_LDFLAGS = $(TT_METAL_UNIT_TESTS_LDFLAGS) - -TT_METAL_UNIT_TESTS_FREQUENT_OBJS = $(addprefix $(OBJDIR)/, $(TT_METAL_UNIT_TESTS_FREQUENT_SRCS:.cpp=.o)) -TT_METAL_UNIT_TESTS_FREQUENT_DEPS = $(addprefix $(OBJDIR)/, $(TT_METAL_UNIT_TESTS_FREQUENT_SRCS:.cpp=.d)) - --include $(TT_METAL_UNIT_TESTS_FREQUENT_DEPS) - -# Each module has a top level target as the entrypoint which must match the subdir name -tests/tt_metal/unit_tests_frequent: $(TESTDIR)/tt_metal/unit_tests_frequent - -.PRECIOUS: $(TESTDIR)/tt_metal/unit_tests_frequent -$(TESTDIR)/tt_metal/unit_tests_frequent: $(TT_METAL_UNIT_TESTS_FREQUENT_OBJS) $(TT_METAL_LIB) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(TT_METAL_UNIT_TESTS_FREQUENT_INCLUDES) -o $@ $^ $(TT_METAL_UNIT_TESTS_FREQUENT_LDFLAGS) - -.PRECIOUS: $(OBJDIR)/$(TT_METAL_UNIT_TESTS_FREQUENT_OBJ_HOME)/%.o -$(OBJDIR)/$(TT_METAL_UNIT_TESTS_FREQUENT_OBJ_HOME)/%.o: $(TT_METAL_UNIT_TESTS_FREQUENT_SRCS_HOME)/%.cpp - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(TT_METAL_UNIT_TESTS_FREQUENT_INCLUDES) -c -o $@ $< diff --git a/tests/ttnn/module.mk b/tests/ttnn/module.mk deleted file mode 100644 index 3a356f51a1f..00000000000 --- a/tests/ttnn/module.mk +++ /dev/null @@ -1,5 +0,0 @@ -include $(TT_METAL_HOME)/tests/ttnn/unit_tests/module.mk - -# Only builds the tests, and specifically tests/ttnn/unit_tests in tests/ttnn/unit_tests/module.mk -.PHONY: tests/ttnn -tests/ttnn: tests/ttnn/unit_tests diff --git a/tests/ttnn/unit_tests/module.mk b/tests/ttnn/unit_tests/module.mk deleted file mode 100644 index 1bd79b20c2b..00000000000 --- a/tests/ttnn/unit_tests/module.mk +++ /dev/null @@ -1,37 +0,0 @@ -# Every variable in subdir must be prefixed with subdir (emulating a namespace) - -TTNN_UNIT_TESTS_HOME_DIR = $(TT_METAL_HOME)/tests/ttnn/unit_tests - -TTNN_UNIT_TESTS_DIRS := $(TTNN_UNIT_TESTS_HOME_DIR) $(TTNN_UNIT_TESTS_HOME_DIR)/gtests - -TTNN_UNIT_TESTS_SRCS := $(foreach dir,$(TTNN_UNIT_TESTS_DIRS),$(wildcard $(dir)/*.cpp)) - -TTNN_UNIT_TESTS_INCLUDES := $(TEST_INCLUDES) $(TTNN_PYBIND11_INCLUDES) - -TTNN_UNIT_TESTS_LDFLAGS := $(TTNN_PYBIND11_LDFLAGS) -lttnn -ldl -lstdc++fs -lgtest -lgtest_main -pthread -lm - -TTNN_UNIT_TESTS_OBJS := $(addprefix $(OBJDIR)/, $(TTNN_UNIT_TESTS_SRCS:$(TTNN_UNIT_TESTS_HOME_DIR)/%.cpp=ttnn/tests/unit_tests/%.o)) -TTNN_UNIT_TESTS_DEPS := $(TTNN_UNIT_TESTS_OBJS:.o=.d) - --include $(TTNN_UNIT_TESTS_DEPS) - -tests/ttnn/unit_tests: $(TESTDIR)/ttnn/unit_tests - -.PRECIOUS: $(OBJDIR)/ttnn/tests/%.o -$(OBJDIR)/ttnn/tests/%.o: $(TT_METAL_HOME)/tests/ttnn/%.cpp - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(TTNN_UNIT_TESTS_INCLUDES) -c -o $@ $< - -.PHONY: tests/ttnn/unit_tests_run -tests/ttnn/unit_tests_run: tests/ttnn/unit_tests - @echo "Running all ttnn unit tests" - @if [ -f "$(TT_METAL_HOME)/build/test/ttnn/unit_tests" ]; then \ - $(TT_METAL_HOME)/build/test/ttnn/unit_tests; \ - else \ - echo "Test binary not found!"; \ - fi - -.PRECIOUS: $(TESTDIR)/ttnn/unit_tests -$(TESTDIR)/ttnn/unit_tests: $(TTNN_UNIT_TESTS_OBJS) $(TT_METAL_LIB) $(TT_DNN_LIB) $(TTNN_LIB) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) -o $@ $^ $(TTNN_UNIT_TESTS_LDFLAGS) diff --git a/tests/ttnn/unit_tests/operations/test_repeat.py b/tests/ttnn/unit_tests/operations/test_repeat.py index ff8f174a396..da693ae55e4 100644 --- a/tests/ttnn/unit_tests/operations/test_repeat.py +++ b/tests/ttnn/unit_tests/operations/test_repeat.py @@ -13,15 +13,13 @@ def test_repeat(device): torch_input_tensor = torch.randn((1, 2, 4, 4), dtype=torch.bfloat16) - repeat_shape = torch.randn((1, 2, 1, 1), dtype=torch.bfloat16) + repeat_shape = (1, 2, 1, 1) - input_tensor1 = ttnn.from_torch(repeat_shape, layout=ttnn.TILE_LAYOUT) - input_tensor1 = ttnn.to_device(input_tensor1, device) - torch_result = torch_input_tensor.repeat(repeat_shape.shape) + torch_result = torch_input_tensor.repeat(repeat_shape) input_tensor = ttnn.from_torch(torch_input_tensor, layout=ttnn.TILE_LAYOUT, device=device) - output = ttnn.repeat(input_tensor, input_tensor1.shape) + output = ttnn.repeat(input_tensor, ttnn.Shape(repeat_shape)) output = ttnn.to_torch(output) assert_with_pcc(torch_result, output, 0.9999) diff --git a/tt_eager/module.mk b/tt_eager/module.mk deleted file mode 100644 index c228d86778b..00000000000 --- a/tt_eager/module.mk +++ /dev/null @@ -1,32 +0,0 @@ -# Change for later when eager is split out -TT_LIBS_HOME ?= $(TT_METAL_HOME) -TT_METAL_BASE_INCLUDES = $(BASE_INCLUDES) -EAGER_OUTPUT_DIR = $(OUT)/dist - -TT_EAGER_INCLUDES = $(TT_METAL_BASE_INCLUDES) -Itt_eager/ -I ttnn/cpp/ - -include tt_eager/queue/module.mk -include tt_eager/tensor/module.mk -include tt_eager/tt_dnn/module.mk -include tt_eager/tt_lib/module.mk - -TT_LIBS_TO_BUILD = tt_eager/tensor \ - tt_eager/tt_dnn \ - tt_eager/queue \ - tt_eager/tt_lib \ - - -ifdef TT_METAL_ENV_IS_DEV -TT_LIBS_TO_BUILD += \ - $(TT_LIB_LIB_LOCAL_SO) -endif - -tt_eager: $(TT_LIBS_TO_BUILD) - -eager_package: python_env/dev - source build/python_env/bin/activate - python -m build --outdir $(EAGER_OUTPUT_DIR) - -eager_package/clean: - rm -rf tt_eager/*.egg-info - rm -rf $(EAGER_OUTPUT_DIR) diff --git a/tt_eager/queue/module.mk b/tt_eager/queue/module.mk deleted file mode 100644 index f908396a203..00000000000 --- a/tt_eager/queue/module.mk +++ /dev/null @@ -1,23 +0,0 @@ -QUEUE_SRCS = \ - tt_eager/queue/queue.cpp \ - -QUEUE_LIB = $(LIBDIR)/libqueue.a -QUEUE_DEFINES = -QUEUE_INCLUDES = $(TT_EAGER_INCLUDES) -QUEUE_CFLAGS = $(CFLAGS) -Werror -Wno-int-to-pointer-cast - -QUEUE_OBJS = $(addprefix $(OBJDIR)/, $(QUEUE_SRCS:.cpp=.o)) -QUEUE_DEPS = $(addprefix $(OBJDIR)/, $(QUEUE_SRCS:.cpp=.d)) - --include $(QUEUE_DEPS) - -# Each module has a top level target as the entrypoint which must match the subdir name -tt_eager/queue: $(QUEUE_LIB) - -$(QUEUE_LIB): $(COMMON_LIB) $(TT_METAL_LIB) $(QUEUE_OBJS) - @mkdir -p $(LIBDIR) - ar rcs -o $@ $(QUEUE_OBJS) - -$(OBJDIR)/tt_eager/queue/%.o: tt_eager/queue/%.cpp - @mkdir -p $(@D) - $(CXX) $(QUEUE_CFLAGS) $(CXXFLAGS) $(STATIC_LIB_FLAGS) $(QUEUE_INCLUDES) $(QUEUE_DEFINES) -c -o $@ $< diff --git a/tt_eager/tensor/module.mk b/tt_eager/tensor/module.mk deleted file mode 100644 index f986138f91e..00000000000 --- a/tt_eager/tensor/module.mk +++ /dev/null @@ -1,27 +0,0 @@ -TENSOR_SRCS = \ - tt_eager/tensor/tensor_impl.cpp \ - tt_eager/tensor/tensor.cpp \ - tt_eager/tensor/types.cpp \ - tt_eager/tensor/tensor_utils.cpp \ - tt_eager/tensor/serialization.cpp \ - -TENSOR_LIB = $(LIBDIR)/libtensor.a -TENSOR_DEFINES = -TENSOR_INCLUDES = $(TT_EAGER_INCLUDES) -TENSOR_CFLAGS = $(CFLAGS) -Werror -Wno-int-to-pointer-cast - -TENSOR_OBJS = $(addprefix $(OBJDIR)/, $(TENSOR_SRCS:.cpp=.o)) -TENSOR_DEPS = $(addprefix $(OBJDIR)/, $(TENSOR_SRCS:.cpp=.d)) - --include $(TENSOR_DEPS) - -# Each module has a top level target as the entrypoint which must match the subdir name -tt_eager/tensor: $(TENSOR_LIB) - -$(TENSOR_LIB): $(COMMON_LIB) $(TT_METAL_LIB) $(TENSOR_OBJS) $(QUEUE_LIB) - @mkdir -p $(LIBDIR) - ar rcs -o $@ $(TENSOR_OBJS) - -$(OBJDIR)/tt_eager/tensor/%.o: tt_eager/tensor/%.cpp - @mkdir -p $(@D) - $(CXX) $(TENSOR_CFLAGS) $(CXXFLAGS) $(STATIC_LIB_FLAGS) $(TENSOR_INCLUDES) $(TENSOR_DEFINES) -c -o $@ $< diff --git a/tt_eager/tt_dnn/module.mk b/tt_eager/tt_dnn/module.mk deleted file mode 100644 index ce43f49e5e6..00000000000 --- a/tt_eager/tt_dnn/module.mk +++ /dev/null @@ -1,228 +0,0 @@ -TT_DNN_SRCS = \ - tt_eager/tt_dnn/op_library/auto_format.cpp \ - tt_eager/tt_dnn/op_library/data_transfer/data_transfer_op.cpp \ - tt_eager/tt_dnn/op_library/layout_conversion/layout_conversion_op.cpp \ - tt_eager/tt_dnn/op_library/all_gather/all_gather_op.cpp \ - tt_eager/tt_dnn/op_library/all_gather/multi_core/all_gather_op_multi_core.cpp \ - tt_eager/tt_dnn/op_library/ccl/reduce_scatter/reduce_scatter_op.cpp \ - tt_eager/tt_dnn/op_library/ccl/reduce_scatter/host/reduce_scatter_full_worker_grid.cpp \ - tt_eager/tt_dnn/op_library/ccl/ccl_common.cpp \ - tt_eager/tt_dnn/op_library/sharded/sharded_op.cpp \ - tt_eager/tt_dnn/op_library/sharded/multi_core/sharded_op_multi_core.cpp \ - tt_eager/tt_dnn/op_library/sharded_partial/sharded_op_partial.cpp \ - tt_eager/tt_dnn/op_library/sharded_partial/multi_core/sharded_op_partial_multi_core.cpp \ - tt_eager/tt_dnn/op_library/copy/copy_op.cpp \ - tt_eager/tt_dnn/op_library/copy/multi_core/copy_op_multi_core.cpp \ - tt_eager/tt_dnn/op_library/move/move_op.cpp \ - tt_eager/tt_dnn/op_library/move/multi_core/move_op_multi_core.cpp \ - tt_eager/tt_dnn/op_library/move/multi_core/move_op_multi_core_overlap.cpp \ - tt_eager/tt_dnn/op_library/move/multi_core/move_op_multi_core_sharded.cpp \ - tt_eager/tt_dnn/op_library/eltwise_binary/eltwise_binary_op.cpp \ - tt_eager/tt_dnn/op_library/eltwise_binary/multi_core/eltwise_binary_op_multi_core.cpp \ - tt_eager/tt_dnn/op_library/eltwise_unary/eltwise_unary_op.cpp \ - tt_eager/tt_dnn/op_library/eltwise_unary/multi_core/eltwise_unary_op_multi_core.cpp \ - tt_eager/tt_dnn/op_library/eltwise_unary/multi_core/eltwise_unary_op_sharded.cpp \ - tt_eager/tt_dnn/op_library/pad/pad_op.cpp \ - tt_eager/tt_dnn/op_library/pad/pad_op_multi_core.cpp \ - tt_eager/tt_dnn/op_library/unpad/multi_core/unpad_op_multi_core.cpp \ - tt_eager/tt_dnn/op_library/unpad/unpad_op.cpp \ - tt_eager/tt_dnn/op_library/indexed_fill/multi_core/indexed_fill_op_multi_core.cpp \ - tt_eager/tt_dnn/op_library/indexed_fill/indexed_fill_op.cpp \ - tt_eager/tt_dnn/op_library/non_zero_indices/single_core/non_zero_indices_op_single_core.cpp \ - tt_eager/tt_dnn/op_library/non_zero_indices/non_zero_indices_op.cpp \ - tt_eager/tt_dnn/op_library/fill_rm/fill_rm_op.cpp \ - tt_eager/tt_dnn/op_library/fully_connected/fully_connected_op.cpp \ - tt_eager/tt_dnn/op_library/pool/average_pool.cpp \ - tt_eager/tt_dnn/op_library/pool/max_pool.cpp \ - tt_eager/tt_dnn/op_library/pool/max_pool_single_core.cpp \ - tt_eager/tt_dnn/op_library/pool/max_pool_multi_core.cpp \ - tt_eager/tt_dnn/op_library/transpose/transpose_op.cpp \ - tt_eager/tt_dnn/op_library/transpose/wh_multi_core/transpose_wh_op_multi_core.cpp \ - tt_eager/tt_dnn/op_library/transpose/hc_multi_core/transpose_hc_op_multi_core.cpp \ - tt_eager/tt_dnn/op_library/transpose/cn_multi_core/transpose_cn_op_multi_core.cpp \ - tt_eager/tt_dnn/op_library/reduce/reduce_op.cpp \ - tt_eager/tt_dnn/op_library/reduce/single_core_hw/reduce_op_single_core_hw.cpp \ - tt_eager/tt_dnn/op_library/reduce/multi_core_h/reduce_op_multi_core_h.cpp \ - tt_eager/tt_dnn/op_library/reduce/multi_core_w/reduce_op_multi_core_w.cpp \ - tt_eager/tt_dnn/op_library/bcast/bcast_op.cpp \ - tt_eager/tt_dnn/op_library/bcast/multi_core_h/bcast_op_multi_core_h.cpp \ - tt_eager/tt_dnn/op_library/bcast/multi_core_h/bcast_op_sharded_h.cpp \ - tt_eager/tt_dnn/op_library/bcast/multi_core_w/bcast_op_multi_core_w.cpp \ - tt_eager/tt_dnn/op_library/bcast/multi_core_hw/bcast_op_multi_core_hw.cpp \ - tt_eager/tt_dnn/op_library/bmm/bmm_op.cpp \ - tt_eager/tt_dnn/op_library/bmm/single_core/bmm_op_single_core_tilize_untilize.cpp \ - tt_eager/tt_dnn/op_library/bmm/multi_core/bmm_op_multi_core.cpp \ - tt_eager/tt_dnn/op_library/bmm/multi_core_reuse/bmm_op_multi_core_reuse.cpp \ - tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_padding/bmm_op_multi_core_reuse_padding.cpp \ - tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_1d_optimized/bmm_op_multi_core_reuse_mcast_1d_optimized.cpp \ - tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_mcast_2d_optimized/bmm_op_multi_core_reuse_mcast_2d_optimized.cpp \ - tt_eager/tt_dnn/op_library/bmm/multi_core_reuse_optimized/bmm_op_multi_core_reuse_optimized.cpp \ - tt_eager/tt_dnn/op_library/downsample/downsample_op.cpp \ - tt_eager/tt_dnn/op_library/conv/conv_op.cpp \ - tt_eager/tt_dnn/op_library/sliding_window_op_infra/sliding_window.cpp \ - tt_eager/tt_dnn/op_library/sliding_window_op_infra/halo_op.cpp \ - tt_eager/tt_dnn/op_library/conv/optimized_conv_op.cpp \ - tt_eager/tt_dnn/op_library/conv/multi_core_optimized_conv/optimized_conv_op.cpp \ - tt_eager/tt_dnn/op_library/conv/multi_core_optimized_conv_sharded/optimized_conv_op_sharded.cpp \ - tt_eager/tt_dnn/op_library/conv/multi_core_optimized_conv_sharded/optimized_conv_op_sharded_v2.cpp \ - tt_eager/tt_dnn/op_library/tilize/tilize_multi_core/tilize_op_multi_core.cpp \ - tt_eager/tt_dnn/op_library/tilize/tilize_single_core/tilize_op_single_core.cpp \ - tt_eager/tt_dnn/op_library/tilize/tilize_op.cpp \ - tt_eager/tt_dnn/op_library/untilize/multi_core/untilize_op_multi_core.cpp \ - tt_eager/tt_dnn/op_library/untilize/single_core/untilize_op_single_core.cpp \ - tt_eager/tt_dnn/op_library/untilize/untilize_op.cpp \ - tt_eager/tt_dnn/op_library/untilize/untilize_with_halo_op.cpp \ - tt_eager/tt_dnn/op_library/untilize/untilize_with_halo_op_v2.cpp \ - tt_eager/tt_dnn/op_library/softmax/multi_core/softmax_op_multi_core.cpp \ - tt_eager/tt_dnn/op_library/softmax/softmax_op.cpp \ - tt_eager/tt_dnn/op_library/sdpa/multi_core/sdpa_op_multi_core.cpp \ - tt_eager/tt_dnn/op_library/sdpa/sdpa_op.cpp \ - tt_eager/tt_dnn/op_library/moreh_helper_functions.cpp \ - tt_eager/tt_dnn/op_library/moreh_adam/moreh_adam.cpp \ - tt_eager/tt_dnn/op_library/moreh_adam/moreh_adam_op.cpp \ - tt_eager/tt_dnn/op_library/moreh_adamw/moreh_adamw.cpp \ - tt_eager/tt_dnn/op_library/moreh_adamw/moreh_adamw_op.cpp \ - tt_eager/tt_dnn/op_library/moreh_arange/moreh_arange_op.cpp \ - tt_eager/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_op.cpp \ - tt_eager/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/moreh_clip_grad_norm_step1.cpp \ - tt_eager/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/moreh_clip_grad_norm_step2.cpp \ - tt_eager/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/moreh_clip_grad_norm_step3.cpp \ - tt_eager/tt_dnn/op_library/moreh_nll_loss/moreh_nll_loss_op.cpp \ - tt_eager/tt_dnn/op_library/moreh_nll_loss/moreh_nll_loss_step1/moreh_nll_loss_step1.cpp \ - tt_eager/tt_dnn/op_library/moreh_nll_loss/moreh_nll_loss_step2/moreh_nll_loss_step2.cpp \ - tt_eager/tt_dnn/op_library/moreh_nll_loss_backward/moreh_nll_loss_backward_op.cpp \ - tt_eager/tt_dnn/op_library/moreh_nll_loss_backward/moreh_nll_loss_backward/moreh_nll_loss_backward.cpp \ - tt_eager/tt_dnn/op_library/moreh_softmax/moreh_softmax_op.cpp \ - tt_eager/tt_dnn/op_library/moreh_softmax/softmax_w_small/softmax_w_small.cpp \ - tt_eager/tt_dnn/op_library/moreh_softmax/softmax_h_small/softmax_h_small.cpp \ - tt_eager/tt_dnn/op_library/moreh_softmax/softmax_w_large/softmax_w_large.cpp \ - tt_eager/tt_dnn/op_library/moreh_softmax/softmax_h_large/softmax_h_large.cpp \ - tt_eager/tt_dnn/op_library/moreh_softmax/softmax_c_large/softmax_c_large.cpp \ - tt_eager/tt_dnn/op_library/moreh_softmax_backward/moreh_softmax_backward_op.cpp \ - tt_eager/tt_dnn/op_library/moreh_softmax_backward/softmax_backward_w_small/softmax_backward_w_small.cpp \ - tt_eager/tt_dnn/op_library/moreh_softmax_backward/softmax_backward_h_small/softmax_backward_h_small.cpp \ - tt_eager/tt_dnn/op_library/moreh_softmax_backward/softmax_backward_w_large/softmax_backward_w_large.cpp \ - tt_eager/tt_dnn/op_library/moreh_softmax_backward/softmax_backward_h_large/softmax_backward_h_large.cpp \ - tt_eager/tt_dnn/op_library/moreh_softmax_backward/softmax_backward_c_large/softmax_backward_c_large.cpp \ - tt_eager/tt_dnn/op_library/moreh_sum/moreh_sum_h_impl/moreh_sum_h_impl.cpp \ - tt_eager/tt_dnn/op_library/moreh_sum/moreh_sum_w_impl/moreh_sum_w_impl.cpp \ - tt_eager/tt_dnn/op_library/moreh_sum/moreh_sum_nc_impl/moreh_sum_nc_impl.cpp \ - tt_eager/tt_dnn/op_library/moreh_sum/moreh_sum_op.cpp \ - tt_eager/tt_dnn/op_library/moreh_sum_backward/moreh_sum_backward_impl/moreh_sum_backward_impl.cpp \ - tt_eager/tt_dnn/op_library/prod/prod_nc/prod_nc.cpp \ - tt_eager/tt_dnn/op_library/prod/prod_nc_op.cpp \ - tt_eager/tt_dnn/op_library/prod/prod_op_all.cpp \ - tt_eager/tt_dnn/op_library/prod/single_core/prod_op_all_single_core.cpp \ - tt_eager/tt_dnn/op_library/moreh_sum_backward/moreh_sum_backward_op.cpp \ - tt_eager/tt_dnn/op_library/moreh_mean/moreh_mean_h/moreh_mean_h.cpp \ - tt_eager/tt_dnn/op_library/moreh_mean/moreh_mean_w/moreh_mean_w.cpp \ - tt_eager/tt_dnn/op_library/moreh_mean/moreh_mean_nc/moreh_mean_nc.cpp \ - tt_eager/tt_dnn/op_library/moreh_mean/moreh_mean_op.cpp \ - tt_eager/tt_dnn/op_library/moreh_mean_backward/moreh_mean_backward.cpp \ - tt_eager/tt_dnn/op_library/moreh_mean_backward/moreh_mean_backward_op.cpp \ - tt_eager/tt_dnn/op_library/layernorm/multi_core/layernorm_op_multi_core.cpp \ - tt_eager/tt_dnn/op_library/layernorm/layernorm_op.cpp \ - tt_eager/tt_dnn/op_library/moreh_bmm/moreh_bmm_op.cpp \ - tt_eager/tt_dnn/op_library/moreh_bmm_backward/moreh_bmm_backward_op.cpp \ - tt_eager/tt_dnn/op_library/moreh_linear/moreh_linear_op.cpp \ - tt_eager/tt_dnn/op_library/moreh_linear_backward/moreh_linear_backward_op.cpp \ - tt_eager/tt_dnn/op_library/moreh_linear_backward/bias_backward_h/moreh_bias_backward_multi_core_h.cpp \ - tt_eager/tt_dnn/op_library/moreh_linear_backward/bias_backward_hw/moreh_bias_backward_single_core_hw.cpp \ - tt_eager/tt_dnn/op_library/moreh_matmul/multi_core/moreh_matmul_op_multi_core.cpp \ - tt_eager/tt_dnn/op_library/moreh_matmul/moreh_matmul_op.cpp \ - tt_eager/tt_dnn/op_library/moreh_matmul_backward/moreh_matmul_backward_op.cpp \ - tt_eager/tt_dnn/op_library/moreh_norm/moreh_norm_op.cpp \ - tt_eager/tt_dnn/op_library/moreh_norm/moreh_norm_h/moreh_norm_h.cpp \ - tt_eager/tt_dnn/op_library/moreh_norm/moreh_norm_w/moreh_norm_w.cpp \ - tt_eager/tt_dnn/op_library/moreh_norm/moreh_norm_other/moreh_norm_other.cpp \ - tt_eager/tt_dnn/op_library/moreh_norm_backward/moreh_norm_backward_op.cpp \ - tt_eager/tt_dnn/op_library/moreh_norm_backward/moreh_norm_backward.cpp \ - tt_eager/tt_dnn/op_library/moreh_dot/single_core/moreh_dot_op_single_core.cpp \ - tt_eager/tt_dnn/op_library/moreh_dot/moreh_dot_op.cpp \ - tt_eager/tt_dnn/op_library/moreh_dot_backward/single_core/moreh_dot_backward_op_single_core.cpp \ - tt_eager/tt_dnn/op_library/moreh_dot_backward/moreh_dot_backward_op.cpp \ - tt_eager/tt_dnn/op_library/moreh_layernorm/moreh_layernorm_op.cpp \ - tt_eager/tt_dnn/op_library/moreh_layernorm_backward/moreh_layernorm_backward_op.cpp \ - tt_eager/tt_dnn/op_library/moreh_layernorm_backward/input_grad/moreh_layernorm_backward_input_grad.cpp \ - tt_eager/tt_dnn/op_library/moreh_layernorm_backward/gamma_beta_grad/moreh_layernorm_backward_gamma_beta_grad.cpp \ - tt_eager/tt_dnn/op_library/moreh_groupnorm/moreh_groupnorm_op.cpp \ - tt_eager/tt_dnn/op_library/moreh_groupnorm/moreh_groupnorm.cpp \ - tt_eager/tt_dnn/op_library/moreh_groupnorm_backward/moreh_groupnorm_backward_op.cpp \ - tt_eager/tt_dnn/op_library/moreh_groupnorm_backward/input_grad/moreh_groupnorm_backward_input_grad.cpp \ - tt_eager/tt_dnn/op_library/moreh_groupnorm_backward/gamma_beta_grad/moreh_groupnorm_backward_gamma_beta_grad.cpp \ - tt_eager/tt_dnn/op_library/moreh_cumsum/moreh_cumsum_nc/moreh_cumsum_nc.cpp \ - tt_eager/tt_dnn/op_library/moreh_cumsum/moreh_cumsum_op.cpp \ - tt_eager/tt_dnn/op_library/moreh_sgd/moreh_sgd_op.cpp \ - tt_eager/tt_dnn/op_library/moreh_sgd/moreh_sgd.cpp \ - tt_eager/tt_dnn/op_library/groupnorm/groupnorm_op.cpp \ - tt_eager/tt_dnn/op_library/reshape/reshape_op.cpp \ - tt_eager/tt_dnn/op_library/permute/permute_op.cpp \ - tt_eager/tt_dnn/op_library/composite/composite_ops.cpp\ - tt_eager/tt_dnn/op_library/backward/backward_ops.cpp\ - tt_eager/tt_dnn/op_library/optimizer/optimizer_ops.cpp\ - tt_eager/tt_dnn/op_library/complex/complex_ops.cpp\ - tt_eager/tt_dnn/op_library/loss/loss_op.cpp\ - tt_eager/tt_dnn/op_library/transformer_tms/transformer_tms.cpp \ - tt_eager/tt_dnn/op_library/transformer_tms/multi_core_split_query_key_value_and_split_heads/multi_core_split_query_key_value_and_split_heads.cpp \ - tt_eager/tt_dnn/op_library/transformer_tms/multi_core_concatenate_heads/multi_core_concatenate_heads.cpp \ - tt_eager/tt_dnn/op_library/transformer_tms/multi_core_attn_matmul/multi_core_attn_matmul.cpp \ - tt_eager/tt_dnn/op_library/transformer_tms/multi_core_group_attn_matmul/multi_core_group_attn_matmul.cpp \ - tt_eager/tt_dnn/op_library/transformer_tms/multi_core_ssm_eltwise_mul/multi_core_ssm_eltwise_mul.cpp \ - tt_eager/tt_dnn/op_library/transformer_tms/multi_core_ssm_1d_sum_reduce/multi_core_ssm_1d_sum_reduce.cpp \ - tt_eager/tt_dnn/op_library/run_operation.cpp \ - tt_eager/tt_dnn/op_library/split/split_tiled.cpp \ - tt_eager/tt_dnn/op_library/split/split_last_dim_two_chunks_tiled.cpp \ - tt_eager/tt_dnn/op_library/operation_history.cpp \ - tt_eager/tt_dnn/op_library/concat/multi_core/concat_op_multi_core.cpp \ - tt_eager/tt_dnn/op_library/concat/concat_op.cpp \ - tt_eager/tt_dnn/op_library/repeat/multi_core/repeat_op_multi_core.cpp \ - tt_eager/tt_dnn/op_library/repeat/repeat_op.cpp \ - tt_eager/tt_dnn/op_library/nlp_tms/nlp_tms.cpp \ - tt_eager/tt_dnn/op_library/nlp_tms/nlp_create_qkv_heads_falcon7b.cpp \ - tt_eager/tt_dnn/op_library/nlp_tms/nlp_create_qkv_heads_decode.cpp \ - tt_eager/tt_dnn/op_library/nlp_tms/nlp_create_qkv_heads.cpp \ - tt_eager/tt_dnn/op_library/nlp_tms/nlp_concat_heads.cpp \ - tt_eager/tt_dnn/op_library/nlp_tms/nlp_concat_heads_decode.cpp \ - tt_eager/tt_dnn/op_library/nlp_tms/nlp_kv_cache_load_slice.cpp \ - tt_eager/tt_dnn/op_library/nlp_tms/multi_core_create_qkv_heads/multi_core_create_qkv_heads.cpp \ - tt_eager/tt_dnn/op_library/nlp_tms/multi_core_create_q_and_kv_heads_separate/multi_core_create_q_and_kv_heads.cpp \ - tt_eager/tt_dnn/op_library/rotate_half/single_core/rotate_half_op_single_core.cpp \ - tt_eager/tt_dnn/op_library/rotate_half/rotate_half_op.cpp \ - tt_eager/tt_dnn/op_library/rotary_embedding/multi_core/rotary_embedding_op_multi_core.cpp \ - tt_eager/tt_dnn/op_library/rotary_embedding/rotary_embedding_op.cpp \ - tt_eager/tt_dnn/op_library/embeddings/embeddings_op.cpp \ - tt_eager/tt_dnn/op_library/update_cache/multi_core/update_cache_op_multi_core.cpp \ - tt_eager/tt_dnn/op_library/update_cache/update_cache_op.cpp \ - tt_eager/tt_dnn/op_library/upsample/multi_core/upsample_op_multi_core.cpp \ - tt_eager/tt_dnn/op_library/upsample/single_core/upsample_op_single_core.cpp \ - tt_eager/tt_dnn/op_library/upsample/upsample_op.cpp \ - tt_eager/tt_dnn/op_library/fold/fold_op.cpp \ - tt_eager/tt_dnn/op_library/fold/single_core/fold_op_single_core.cpp \ - tt_eager/tt_dnn/op_library/fold/multi_core/fold_op_multi_core.cpp \ - tt_eager/tt_dnn/op_library/moreh_getitem/moreh_getitem_op.cpp \ - tt_eager/tt_dnn/op_library/moreh_getitem/moreh_getitem_rm/moreh_getitem_rm.cpp \ - tt_eager/tt_dnn/op_library/moreh_getitem/moreh_getitem_tilized/moreh_getitem_tilized.cpp \ - tt_eager/tt_dnn/op_library/scan/scan_op.cpp \ - tt_eager/tt_dnn/op_library/topk/topk_op.cpp \ - tt_eager/tt_dnn/op_library/topk/single_core/single_core_topk.cpp \ - -TT_DNN_LIB = $(LIBDIR)/libtt_dnn.a -TT_DNN_DEFINES = -TT_DNN_INCLUDES = $(TT_EAGER_INCLUDES) -TT_DNN_CFLAGS = $(CFLAGS) -Werror -Wno-int-to-pointer-cast - -TT_DNN_OBJS = $(addprefix $(OBJDIR)/, $(TT_DNN_SRCS:.cpp=.o)) -TT_DNN_DEPS = $(addprefix $(OBJDIR)/, $(TT_DNN_SRCS:.cpp=.d)) - --include $(TT_DNN_DEPS) - -# Each module has a top level target as the entrypoint which must match the subdir name -tt_eager/tt_dnn: $(TT_DNN_LIB) - -$(TT_DNN_LIB): $(COMMON_LIB) $(TT_METAL_LIB) $(TENSOR_LIB) $(TT_DNN_OBJS) - @mkdir -p $(LIBDIR) - ar rcs -o $@ $(TT_DNN_OBJS) - -$(OBJDIR)/tt_eager/tt_dnn/%.o: tt_eager/tt_dnn/%.cpp - @mkdir -p $(@D) - $(CXX) $(TT_DNN_CFLAGS) $(CXXFLAGS) $(STATIC_LIB_FLAGS) $(TT_DNN_INCLUDES) $(TT_DNN_DEFINES) -c -o $@ $< diff --git a/tt_eager/tt_dnn/op_library/moreh_adam/moreh_adam.cpp b/tt_eager/tt_dnn/op_library/moreh_adam/moreh_adam.cpp index 2feeb064f42..a0065d2437b 100644 --- a/tt_eager/tt_dnn/op_library/moreh_adam/moreh_adam.cpp +++ b/tt_eager/tt_dnn/op_library/moreh_adam/moreh_adam.cpp @@ -37,15 +37,13 @@ operation::ProgramWithCallbacks moreh_adam_( // Device Setup //////////////////////////////////////////////////////////////////////////// tt_metal::Device *device = param.device(); - - tt_metal::CoreGridDesc core_grid(device); - const auto num_cores_y = core_grid.y_; - CoreCoord core_grid_coord(core_grid.x_, num_cores_y); + auto grid = device->compute_with_storage_grid_size(); + const auto num_cores_y = grid.y; // auto compute_with_storage_grid_size = device->compute_with_storage_grid_size(); // uint32_t num_cores_x = compute_with_storage_grid_size.x; // uint32_t num_cores_y = compute_with_storage_grid_size.y; - auto [num_cores, all_cores, core_group_1, core_group_2, num_tiles_per_core_group_1, num_tiles_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, num_tiles); + auto [num_cores, all_cores, core_group_1, core_group_2, num_tiles_per_core_group_1, num_tiles_per_core_group_2] = tt_metal::split_work_to_cores(grid, num_tiles); //////////////////////////////////////////////////////////////////////////// // CircularBuffer Setup diff --git a/tt_eager/tt_dnn/op_library/moreh_adamw/moreh_adamw.cpp b/tt_eager/tt_dnn/op_library/moreh_adamw/moreh_adamw.cpp index c17cc662a59..3f0d618224c 100644 --- a/tt_eager/tt_dnn/op_library/moreh_adamw/moreh_adamw.cpp +++ b/tt_eager/tt_dnn/op_library/moreh_adamw/moreh_adamw.cpp @@ -37,15 +37,13 @@ operation::ProgramWithCallbacks moreh_adamw_( // Device Setup //////////////////////////////////////////////////////////////////////////// tt_metal::Device *device = param.device(); - - tt_metal::CoreGridDesc core_grid(device); - const auto num_cores_y = core_grid.y_; - CoreCoord core_grid_coord(core_grid.x_, num_cores_y); + auto grid = device->compute_with_storage_grid_size(); + const auto num_cores_y = grid.y; // auto compute_with_storage_grid_size = device->compute_with_storage_grid_size(); // uint32_t num_cores_x = compute_with_storage_grid_size.x; // uint32_t num_cores_y = compute_with_storage_grid_size.y; - auto [num_cores, all_cores, core_group_1, core_group_2, num_tiles_per_core_group_1, num_tiles_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, num_tiles); + auto [num_cores, all_cores, core_group_1, core_group_2, num_tiles_per_core_group_1, num_tiles_per_core_group_2] = tt_metal::split_work_to_cores(grid, num_tiles); //////////////////////////////////////////////////////////////////////////// // CircularBuffer Setup diff --git a/tt_eager/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/moreh_clip_grad_norm_step1.cpp b/tt_eager/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/moreh_clip_grad_norm_step1.cpp index 6b2a5608c7d..50c5c6f4182 100644 --- a/tt_eager/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/moreh_clip_grad_norm_step1.cpp +++ b/tt_eager/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/moreh_clip_grad_norm_step1.cpp @@ -52,16 +52,15 @@ operation::ProgramWithCallbacks moreh_clip_grad_norm_step1_impl( //////////////////////////////////////////////////////////////////////////// // Core Setup //////////////////////////////////////////////////////////////////////////// - tt_metal::CoreGridDesc core_grid(device); - const auto num_cores_y = core_grid.y_; - CoreCoord core_grid_coord = {core_grid.x_, num_cores_y}; + auto grid = device->compute_with_storage_grid_size(); + const auto num_cores_y = grid.y; const auto [num_cores_to_be_used, all_cores, core_group_1, core_group_2, num_inputs_per_core_group_1, - num_inputs_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, num_inputs); + num_inputs_per_core_group_2] = tt_metal::split_work_to_cores(grid, num_inputs); TT_ASSERT(core_group_2.ranges().empty()); TT_ASSERT(num_inputs_per_core_group_1 == 1); TT_ASSERT(num_inputs_per_core_group_2 == 0); diff --git a/tt_eager/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/moreh_clip_grad_norm_step3.cpp b/tt_eager/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/moreh_clip_grad_norm_step3.cpp index e2298da64a5..1660991cb9e 100644 --- a/tt_eager/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/moreh_clip_grad_norm_step3.cpp +++ b/tt_eager/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/moreh_clip_grad_norm_step3.cpp @@ -38,16 +38,16 @@ operation::ProgramWithCallbacks moreh_clip_grad_norm_step3_impl( //////////////////////////////////////////////////////////////////////////// // Core Setup //////////////////////////////////////////////////////////////////////////// - tt_metal::CoreGridDesc core_grid(device); - const auto num_cores_y = core_grid.y_; - CoreCoord core_grid_coord = {core_grid.x_, num_cores_y}; + auto grid = device->compute_with_storage_grid_size(); + const auto num_cores_y = grid.y; + const auto [num_cores_to_be_used, all_cores, core_group_1, core_group_2, num_inputs_per_core_group_1, - num_inputs_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, num_inputs); + num_inputs_per_core_group_2] = tt_metal::split_work_to_cores(grid, num_inputs); TT_ASSERT(core_group_2.ranges().empty()); TT_ASSERT(num_inputs_per_core_group_1 == 1); TT_ASSERT(num_inputs_per_core_group_2 == 0); diff --git a/tt_eager/tt_dnn/op_library/moreh_cumsum/moreh_cumsum_nc/moreh_cumsum_nc.cpp b/tt_eager/tt_dnn/op_library/moreh_cumsum/moreh_cumsum_nc/moreh_cumsum_nc.cpp index 111e53bd6dc..0b7f99c5958 100644 --- a/tt_eager/tt_dnn/op_library/moreh_cumsum/moreh_cumsum_nc/moreh_cumsum_nc.cpp +++ b/tt_eager/tt_dnn/op_library/moreh_cumsum/moreh_cumsum_nc/moreh_cumsum_nc.cpp @@ -57,9 +57,8 @@ operation::ProgramWithCallbacks moreh_cumsum_nc( //////////////////////////////////////////////////////////////////////////// // Core Setup //////////////////////////////////////////////////////////////////////////// - CoreGridDesc core_grid(device); - const auto num_cores_y = core_grid.y_; - CoreCoord core_grid_coord = {core_grid.x_, num_cores_y}; + auto grid = device->compute_with_storage_grid_size(); + const auto num_cores_y = grid.y; const uint32_t in0_t = 2; // input const uint32_t in1_t = 1; // zero @@ -71,7 +70,7 @@ operation::ProgramWithCallbacks moreh_cumsum_nc( core_group_1, core_group_2, num_cols_per_core_group_1, - num_cols_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, num_tiles_per_chip); + num_cols_per_core_group_2] = tt_metal::split_work_to_cores(grid, num_tiles_per_chip); //////////////////////////////////////////////////////////////////////////// // CircularBuffer Setup diff --git a/tt_eager/tt_dnn/op_library/moreh_groupnorm/moreh_groupnorm.cpp b/tt_eager/tt_dnn/op_library/moreh_groupnorm/moreh_groupnorm.cpp index be86f98b6dc..1fd817e5e33 100644 --- a/tt_eager/tt_dnn/op_library/moreh_groupnorm/moreh_groupnorm.cpp +++ b/tt_eager/tt_dnn/op_library/moreh_groupnorm/moreh_groupnorm.cpp @@ -101,9 +101,8 @@ operation::ProgramWithCallbacks moreh_groupnorm_impl( //////////////////////////////////////////////////////////////////////////// // Core Setup //////////////////////////////////////////////////////////////////////////// - tt_metal::CoreGridDesc core_grid(device); - const auto num_cores_y = core_grid.y_; - CoreCoord core_grid_coord(core_grid.x_, num_cores_y); + auto grid = device->compute_with_storage_grid_size(); + const auto num_cores_y = grid.y; const auto [num_cores_to_be_used, @@ -111,7 +110,7 @@ operation::ProgramWithCallbacks moreh_groupnorm_impl( core_group_1, core_group_2, num_rows_per_core_group_1, - num_rows_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, num_rows); + num_rows_per_core_group_2] = tt_metal::split_work_to_cores(grid, num_rows); log_debug(LogTest, fmt::format("num_cores_to_be_used: {}", num_cores_to_be_used).c_str()); log_debug(LogTest, fmt::format("num_rows_per_core_group_1: {}", num_rows_per_core_group_1).c_str()); diff --git a/tt_eager/tt_dnn/op_library/moreh_groupnorm_backward/gamma_beta_grad/moreh_groupnorm_backward_gamma_beta_grad.cpp b/tt_eager/tt_dnn/op_library/moreh_groupnorm_backward/gamma_beta_grad/moreh_groupnorm_backward_gamma_beta_grad.cpp index 44cf94e8d90..29b635e1e17 100644 --- a/tt_eager/tt_dnn/op_library/moreh_groupnorm_backward/gamma_beta_grad/moreh_groupnorm_backward_gamma_beta_grad.cpp +++ b/tt_eager/tt_dnn/op_library/moreh_groupnorm_backward/gamma_beta_grad/moreh_groupnorm_backward_gamma_beta_grad.cpp @@ -75,9 +75,8 @@ operation::ProgramWithCallbacks moreh_groupnorm_backward_gamma_beta_grad_impl( //////////////////////////////////////////////////////////////////////////// // Core Setup //////////////////////////////////////////////////////////////////////////// - tt_metal::CoreGridDesc core_grid(device); - const auto num_cores_y = core_grid.y_; - CoreCoord core_grid_coord(core_grid.x_, num_cores_y); + auto grid = device->compute_with_storage_grid_size(); + const auto num_cores_y = grid.y; const auto [num_cores_to_be_used, @@ -85,7 +84,7 @@ operation::ProgramWithCallbacks moreh_groupnorm_backward_gamma_beta_grad_impl( core_group_1, core_group_2, num_channels_per_core_group_1, - num_channels_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, num_channels); + num_channels_per_core_group_2] = tt_metal::split_work_to_cores(grid, num_channels); log_debug(LogTest, fmt::format("num_cores_to_be_used: {}", num_cores_to_be_used).c_str()); log_debug(LogTest, fmt::format("num_channels_per_core_group_1: {}", num_channels_per_core_group_1).c_str()); diff --git a/tt_eager/tt_dnn/op_library/moreh_groupnorm_backward/input_grad/moreh_groupnorm_backward_input_grad.cpp b/tt_eager/tt_dnn/op_library/moreh_groupnorm_backward/input_grad/moreh_groupnorm_backward_input_grad.cpp index b990eedff0e..cb3bcb684b9 100644 --- a/tt_eager/tt_dnn/op_library/moreh_groupnorm_backward/input_grad/moreh_groupnorm_backward_input_grad.cpp +++ b/tt_eager/tt_dnn/op_library/moreh_groupnorm_backward/input_grad/moreh_groupnorm_backward_input_grad.cpp @@ -71,9 +71,8 @@ operation::ProgramWithCallbacks moreh_groupnorm_backward_input_grad_impl( //////////////////////////////////////////////////////////////////////////// // Core Setup //////////////////////////////////////////////////////////////////////////// - tt_metal::CoreGridDesc core_grid(device); - const auto num_cores_y = core_grid.y_; - CoreCoord core_grid_coord(core_grid.x_, num_cores_y); + auto grid = device->compute_with_storage_grid_size(); + const auto num_cores_y = grid.y; const auto [num_cores_to_be_used, @@ -81,7 +80,7 @@ operation::ProgramWithCallbacks moreh_groupnorm_backward_input_grad_impl( core_group_1, core_group_2, num_rows_per_core_group_1, - num_rows_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, num_rows); + num_rows_per_core_group_2] = tt_metal::split_work_to_cores(grid, num_rows); log_debug(LogTest, fmt::format("num_cores_to_be_used: {}", num_cores_to_be_used).c_str()); log_debug(LogTest, fmt::format("num_rows_per_core_group_1: {}", num_rows_per_core_group_1).c_str()); diff --git a/tt_eager/tt_dnn/op_library/moreh_layernorm/moreh_layernorm_op.cpp b/tt_eager/tt_dnn/op_library/moreh_layernorm/moreh_layernorm_op.cpp index 879db0daac2..ee715c83d32 100644 --- a/tt_eager/tt_dnn/op_library/moreh_layernorm/moreh_layernorm_op.cpp +++ b/tt_eager/tt_dnn/op_library/moreh_layernorm/moreh_layernorm_op.cpp @@ -127,9 +127,8 @@ operation::ProgramWithCallbacks moreh_layernorm_impl( // Core Setup //////////////////////////////////////////////////////////////////////////// const auto NCHt = N * C * Ht; - tt_metal::CoreGridDesc core_grid(device); - const auto num_cores_y = core_grid.y_; - CoreCoord core_grid_coord = {core_grid.x_, num_cores_y}; + auto grid = device->compute_with_storage_grid_size(); + const auto num_cores_y = grid.y; // core_group_2 works more. // If number of working cores is 108 and NCHt is 110, @@ -140,7 +139,7 @@ operation::ProgramWithCallbacks moreh_layernorm_impl( core_group_1, core_group_2, num_rows_per_core_group_1, - num_rows_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, NCHt); + num_rows_per_core_group_2] = tt_metal::split_work_to_cores(grid, NCHt); //////////////////////////////////////////////////////////////////////////// // CircularBuffer Setup diff --git a/tt_eager/tt_dnn/op_library/moreh_layernorm_backward/gamma_beta_grad/moreh_layernorm_backward_gamma_beta_grad.cpp b/tt_eager/tt_dnn/op_library/moreh_layernorm_backward/gamma_beta_grad/moreh_layernorm_backward_gamma_beta_grad.cpp index d25ec17a1ad..f0dd80b6cfc 100644 --- a/tt_eager/tt_dnn/op_library/moreh_layernorm_backward/gamma_beta_grad/moreh_layernorm_backward_gamma_beta_grad.cpp +++ b/tt_eager/tt_dnn/op_library/moreh_layernorm_backward/gamma_beta_grad/moreh_layernorm_backward_gamma_beta_grad.cpp @@ -95,9 +95,8 @@ operation::ProgramWithCallbacks moreh_layernorm_backward_gamma_beta_grad_impl( //////////////////////////////////////////////////////////////////////////// // Core Setup //////////////////////////////////////////////////////////////////////////// - tt_metal::CoreGridDesc core_grid(device); - const auto num_cores_y = core_grid.y_; - CoreCoord core_grid_coord = {core_grid.x_, num_cores_y}; + auto grid = device->compute_with_storage_grid_size(); + const auto num_cores_y = grid.y; const auto [num_cores_to_be_used, @@ -105,7 +104,7 @@ operation::ProgramWithCallbacks moreh_layernorm_backward_gamma_beta_grad_impl( core_group_1, core_group_2, num_cols_per_core_group_1, - num_cols_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, Wt); + num_cols_per_core_group_2] = tt_metal::split_work_to_cores(grid, Wt); //////////////////////////////////////////////////////////////////////////// // CircularBuffer Setup diff --git a/tt_eager/tt_dnn/op_library/moreh_layernorm_backward/input_grad/moreh_layernorm_backward_input_grad.cpp b/tt_eager/tt_dnn/op_library/moreh_layernorm_backward/input_grad/moreh_layernorm_backward_input_grad.cpp index ea191cb71b4..76431a3f537 100644 --- a/tt_eager/tt_dnn/op_library/moreh_layernorm_backward/input_grad/moreh_layernorm_backward_input_grad.cpp +++ b/tt_eager/tt_dnn/op_library/moreh_layernorm_backward/input_grad/moreh_layernorm_backward_input_grad.cpp @@ -106,9 +106,8 @@ operation::ProgramWithCallbacks moreh_layernorm_backward_input_grad_impl( //////////////////////////////////////////////////////////////////////////// // Core Setup //////////////////////////////////////////////////////////////////////////// - tt_metal::CoreGridDesc core_grid(device); - const auto num_cores_y = core_grid.y_; - CoreCoord core_grid_coord = {core_grid.x_, num_cores_y}; + auto grid = device->compute_with_storage_grid_size(); + const auto num_cores_y = grid.y; const auto [num_cores_to_be_used, @@ -116,7 +115,7 @@ operation::ProgramWithCallbacks moreh_layernorm_backward_input_grad_impl( core_group_1, core_group_2, num_rows_per_core_group_1, - num_rows_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, NCHt); + num_rows_per_core_group_2] = tt_metal::split_work_to_cores(grid, NCHt); //////////////////////////////////////////////////////////////////////////// // CircularBuffer Setup diff --git a/tt_eager/tt_dnn/op_library/moreh_linear_backward/bias_backward_h/moreh_bias_backward_multi_core_h.cpp b/tt_eager/tt_dnn/op_library/moreh_linear_backward/bias_backward_h/moreh_bias_backward_multi_core_h.cpp index f865b702d0f..99d29233657 100644 --- a/tt_eager/tt_dnn/op_library/moreh_linear_backward/bias_backward_h/moreh_bias_backward_multi_core_h.cpp +++ b/tt_eager/tt_dnn/op_library/moreh_linear_backward/bias_backward_h/moreh_bias_backward_multi_core_h.cpp @@ -41,9 +41,8 @@ operation::ProgramWithCallbacks moreh_bias_backward_multi_core_h(const Tensor &o //////////////////////////////////////////////////////////////////////////// // This should allocate a DRAM buffer on the device Device *device = output_grad.device(); - CoreGridDesc core_grid(device); - const auto num_cores_y = core_grid.y_; - CoreCoord core_grid_coord(core_grid.x_, num_cores_y); + auto grid = device->compute_with_storage_grid_size(); + const auto num_cores_y = grid.y; const auto [num_cores_to_be_used, @@ -51,7 +50,7 @@ operation::ProgramWithCallbacks moreh_bias_backward_multi_core_h(const Tensor &o core_group_1, core_group_2, num_cols_per_core_group_1, - num_cols_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, Wt); + num_cols_per_core_group_2] = tt_metal::split_work_to_cores(grid, Wt); //////////////////////////////////////////////////////////////////////////// // CircularBuffer Setup diff --git a/tt_eager/tt_dnn/op_library/moreh_matmul/multi_core/moreh_matmul_op_multi_core.cpp b/tt_eager/tt_dnn/op_library/moreh_matmul/multi_core/moreh_matmul_op_multi_core.cpp index 46890e860cb..c8e1f0c12fb 100644 --- a/tt_eager/tt_dnn/op_library/moreh_matmul/multi_core/moreh_matmul_op_multi_core.cpp +++ b/tt_eager/tt_dnn/op_library/moreh_matmul/multi_core/moreh_matmul_op_multi_core.cpp @@ -174,16 +174,16 @@ operation::ProgramWithCallbacks moreh_matmul_multi_core( //////////////////////////////////////////////////////////////////////////// // Core Grid Configuration For Workload //////////////////////////////////////////////////////////////////////////// - CoreGridDesc core_grid(device); - const auto num_cores_y {core_grid.y_}; - CoreCoord core_grid_coord = {core_grid.x_, num_cores_y}; + auto grid = device->compute_with_storage_grid_size(); + const auto num_cores_y = grid.y; + const auto [num_cores, all_cores, core_group_1, core_group_2, num_output_tiles_per_core_group_1, - num_output_tiles_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, num_output_tiles); + num_output_tiles_per_core_group_2] = tt_metal::split_work_to_cores(grid, num_output_tiles); log_debug(LogOp, "{}:{} num_output_tiles: {}", __func__, __LINE__, num_output_tiles); log_debug(LogOp, "{}:{} num_output_tiles_per_core_group1: {}, 2: {} ", __func__, __LINE__, num_output_tiles_per_core_group_1, num_output_tiles_per_core_group_2); diff --git a/tt_eager/tt_dnn/op_library/moreh_mean/moreh_mean_nc/moreh_mean_nc.cpp b/tt_eager/tt_dnn/op_library/moreh_mean/moreh_mean_nc/moreh_mean_nc.cpp index 9114b8a58ff..37882ee9255 100644 --- a/tt_eager/tt_dnn/op_library/moreh_mean/moreh_mean_nc/moreh_mean_nc.cpp +++ b/tt_eager/tt_dnn/op_library/moreh_mean/moreh_mean_nc/moreh_mean_nc.cpp @@ -56,9 +56,8 @@ operation::ProgramWithCallbacks moreh_mean_nc(const Tensor &input, const Tensor //////////////////////////////////////////////////////////////////////////// // Core Setup //////////////////////////////////////////////////////////////////////////// - CoreGridDesc core_grid(device); - const auto num_cores_y = core_grid.y_; - CoreCoord core_grid_coord(core_grid.x_, num_cores_y); + auto grid = device->compute_with_storage_grid_size(); + const auto num_cores_y = grid.y; const uint32_t in0_t = 2; // input const uint32_t in1_t = 1; // zero @@ -71,7 +70,7 @@ operation::ProgramWithCallbacks moreh_mean_nc(const Tensor &input, const Tensor core_group_1, core_group_2, num_cols_per_core_group_1, - num_cols_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, num_output_tiles); + num_cols_per_core_group_2] = tt_metal::split_work_to_cores(grid, num_output_tiles); //////////////////////////////////////////////////////////////////////////// // CircularBuffer Setup diff --git a/tt_eager/tt_dnn/op_library/moreh_mean_backward/moreh_mean_backward.cpp b/tt_eager/tt_dnn/op_library/moreh_mean_backward/moreh_mean_backward.cpp index bb2e2b6a45d..fd230522221 100644 --- a/tt_eager/tt_dnn/op_library/moreh_mean_backward/moreh_mean_backward.cpp +++ b/tt_eager/tt_dnn/op_library/moreh_mean_backward/moreh_mean_backward.cpp @@ -63,9 +63,8 @@ operation::ProgramWithCallbacks moreh_mean_backward_program(const Tensor &output //////////////////////////////////////////////////////////////////////////// // Core Setup //////////////////////////////////////////////////////////////////////////// - CoreGridDesc core_grid(device); - const auto num_cores_y = core_grid.y_; - CoreCoord core_grid_coord(core_grid.x_, num_cores_y); + auto grid = device->compute_with_storage_grid_size(); + const auto num_cores_y = grid.y; const uint32_t in0_t = 2; // input const uint32_t in1_t = 1; // zero @@ -78,7 +77,7 @@ operation::ProgramWithCallbacks moreh_mean_backward_program(const Tensor &output core_group_1, core_group_2, num_cols_per_core_group_1, - num_cols_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, num_input_grad_tiles); + num_cols_per_core_group_2] = tt_metal::split_work_to_cores(grid, num_input_grad_tiles); //////////////////////////////////////////////////////////////////////////// // CircularBuffer Setup diff --git a/tt_eager/tt_dnn/op_library/moreh_norm/moreh_norm_h/moreh_norm_h.cpp b/tt_eager/tt_dnn/op_library/moreh_norm/moreh_norm_h/moreh_norm_h.cpp index 51a17f0cef1..5dd7bc29576 100644 --- a/tt_eager/tt_dnn/op_library/moreh_norm/moreh_norm_h/moreh_norm_h.cpp +++ b/tt_eager/tt_dnn/op_library/moreh_norm/moreh_norm_h/moreh_norm_h.cpp @@ -53,9 +53,8 @@ operation::ProgramWithCallbacks moreh_norm_h_impl(const Tensor &input, float p, //////////////////////////////////////////////////////////////////////////// // Core Setup //////////////////////////////////////////////////////////////////////////// - tt_metal::CoreGridDesc core_grid(device); - const auto num_cores_y = core_grid.y_; - CoreCoord core_grid_coord(core_grid.x_, num_cores_y); + auto grid = device->compute_with_storage_grid_size(); + const auto num_cores_y = grid.y; const auto [num_cores_to_be_used, @@ -63,7 +62,7 @@ operation::ProgramWithCallbacks moreh_norm_h_impl(const Tensor &input, float p, core_group_1, core_group_2, num_cols_per_core_group_1, - num_cols_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, N * C * Wt); + num_cols_per_core_group_2] = tt_metal::split_work_to_cores(grid, N * C * Wt); //////////////////////////////////////////////////////////////////////////// // CircularBuffer Setup diff --git a/tt_eager/tt_dnn/op_library/moreh_norm/moreh_norm_other/moreh_norm_other.cpp b/tt_eager/tt_dnn/op_library/moreh_norm/moreh_norm_other/moreh_norm_other.cpp index 5d9b734fa80..9890250cc2d 100644 --- a/tt_eager/tt_dnn/op_library/moreh_norm/moreh_norm_other/moreh_norm_other.cpp +++ b/tt_eager/tt_dnn/op_library/moreh_norm/moreh_norm_other/moreh_norm_other.cpp @@ -66,9 +66,8 @@ operation::ProgramWithCallbacks moreh_norm_other_impl(const Tensor &input, float //////////////////////////////////////////////////////////////////////////// // Core Setup //////////////////////////////////////////////////////////////////////////// - tt_metal::CoreGridDesc core_grid(device); - const auto num_cores_y = core_grid.y_; - CoreCoord core_grid_coord(core_grid.x_, num_cores_y); + auto grid = device->compute_with_storage_grid_size(); + const auto num_cores_y = grid.y; const auto [num_cores_to_be_used, @@ -76,7 +75,7 @@ operation::ProgramWithCallbacks moreh_norm_other_impl(const Tensor &input, float core_group_1, core_group_2, num_output_tiles_per_core_group_1, - num_output_tiles_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, num_output_tiles); + num_output_tiles_per_core_group_2] = tt_metal::split_work_to_cores(grid, num_output_tiles); //////////////////////////////////////////////////////////////////////////// // CircularBuffer Setup diff --git a/tt_eager/tt_dnn/op_library/moreh_norm/moreh_norm_w/moreh_norm_w.cpp b/tt_eager/tt_dnn/op_library/moreh_norm/moreh_norm_w/moreh_norm_w.cpp index c6c3e17ba98..869c05e2bb3 100644 --- a/tt_eager/tt_dnn/op_library/moreh_norm/moreh_norm_w/moreh_norm_w.cpp +++ b/tt_eager/tt_dnn/op_library/moreh_norm/moreh_norm_w/moreh_norm_w.cpp @@ -53,9 +53,8 @@ operation::ProgramWithCallbacks moreh_norm_w_impl(const Tensor &input, float p, //////////////////////////////////////////////////////////////////////////// // Core Setup //////////////////////////////////////////////////////////////////////////// - tt_metal::CoreGridDesc core_grid(device); - const auto num_cores_y = core_grid.y_; - CoreCoord core_grid_coord(core_grid.x_, num_cores_y); + auto grid = device->compute_with_storage_grid_size(); + const auto num_cores_y = grid.y; const auto [num_cores_to_be_used, @@ -63,7 +62,7 @@ operation::ProgramWithCallbacks moreh_norm_w_impl(const Tensor &input, float p, core_group_1, core_group_2, num_rows_per_core_group_1, - num_rows_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, N * C * Ht); + num_rows_per_core_group_2] = tt_metal::split_work_to_cores(grid, N * C * Ht); //////////////////////////////////////////////////////////////////////////// // CircularBuffer Setup diff --git a/tt_eager/tt_dnn/op_library/moreh_norm_backward/moreh_norm_backward.cpp b/tt_eager/tt_dnn/op_library/moreh_norm_backward/moreh_norm_backward.cpp index 33b9ebd164f..06316987348 100644 --- a/tt_eager/tt_dnn/op_library/moreh_norm_backward/moreh_norm_backward.cpp +++ b/tt_eager/tt_dnn/op_library/moreh_norm_backward/moreh_norm_backward.cpp @@ -95,9 +95,8 @@ operation::ProgramWithCallbacks moreh_norm_backward_( //////////////////////////////////////////////////////////////////////////// // Core Setup //////////////////////////////////////////////////////////////////////////// - tt_metal::CoreGridDesc core_grid(device); - const auto num_cores_y = core_grid.y_; - CoreCoord core_grid_coord(core_grid.x_, num_cores_y); + auto grid = device->compute_with_storage_grid_size(); + const auto num_cores_y = grid.y; const auto [num_cores_to_be_used, @@ -105,7 +104,7 @@ operation::ProgramWithCallbacks moreh_norm_backward_( core_group_1, core_group_2, num_input_tiles_per_core_group_1, - num_input_tiles_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, num_input_tiles); + num_input_tiles_per_core_group_2] = tt_metal::split_work_to_cores(grid, num_input_tiles); //////////////////////////////////////////////////////////////////////////// // CircularBuffer Setup diff --git a/tt_eager/tt_dnn/op_library/moreh_sum/moreh_sum_nc_impl/moreh_sum_nc_impl.cpp b/tt_eager/tt_dnn/op_library/moreh_sum/moreh_sum_nc_impl/moreh_sum_nc_impl.cpp index 9d93b52388a..47e2eab19ef 100644 --- a/tt_eager/tt_dnn/op_library/moreh_sum/moreh_sum_nc_impl/moreh_sum_nc_impl.cpp +++ b/tt_eager/tt_dnn/op_library/moreh_sum/moreh_sum_nc_impl/moreh_sum_nc_impl.cpp @@ -68,9 +68,8 @@ operation::ProgramWithCallbacks moreh_sum_nc_impl(const Tensor &input, const Ten //////////////////////////////////////////////////////////////////////////// // Core Setup //////////////////////////////////////////////////////////////////////////// - CoreGridDesc core_grid(device); - const auto num_cores_y = core_grid.y_; - CoreCoord core_grid_coord = {core_grid.x_, num_cores_y}; + auto grid = device->compute_with_storage_grid_size(); + const auto num_cores_y = grid.y; const uint32_t in0_t = 2; // input const uint32_t in1_t = 1; // zero @@ -82,7 +81,7 @@ operation::ProgramWithCallbacks moreh_sum_nc_impl(const Tensor &input, const Ten core_group_1, core_group_2, num_cols_per_core_group_1, - num_cols_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, num_output_tiles); + num_cols_per_core_group_2] = tt_metal::split_work_to_cores(grid, num_output_tiles); //////////////////////////////////////////////////////////////////////////// // CircularBuffer Setup diff --git a/tt_eager/tt_dnn/op_library/moreh_sum_backward/moreh_sum_backward_impl/moreh_sum_backward_impl.cpp b/tt_eager/tt_dnn/op_library/moreh_sum_backward/moreh_sum_backward_impl/moreh_sum_backward_impl.cpp index 352d5ab5972..933f5d15b0a 100644 --- a/tt_eager/tt_dnn/op_library/moreh_sum_backward/moreh_sum_backward_impl/moreh_sum_backward_impl.cpp +++ b/tt_eager/tt_dnn/op_library/moreh_sum_backward/moreh_sum_backward_impl/moreh_sum_backward_impl.cpp @@ -91,9 +91,8 @@ operation::ProgramWithCallbacks moreh_sum_backward_impl(const Tensor &output_gra //////////////////////////////////////////////////////////////////////////// // Core Setup //////////////////////////////////////////////////////////////////////////// - CoreGridDesc core_grid(device); - const auto num_cores_y = core_grid.y_; - CoreCoord core_grid_coord = {core_grid.x_, num_cores_y}; + auto grid = device->compute_with_storage_grid_size(); + const auto num_cores_y = grid.y; const uint32_t in0_t = 2; // input const uint32_t in1_t = 1; // zero @@ -104,7 +103,7 @@ operation::ProgramWithCallbacks moreh_sum_backward_impl(const Tensor &output_gra core_group_1, core_group_2, num_cols_per_core_group_1, - num_cols_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, num_input_grad_tiles); + num_cols_per_core_group_2] = tt_metal::split_work_to_cores(grid, num_input_grad_tiles); //////////////////////////////////////////////////////////////////////////// // CircularBuffer Setup diff --git a/tt_eager/tt_dnn/op_library/prod/prod_nc/prod_nc.cpp b/tt_eager/tt_dnn/op_library/prod/prod_nc/prod_nc.cpp index 88d499a8460..ba2c73a2590 100644 --- a/tt_eager/tt_dnn/op_library/prod/prod_nc/prod_nc.cpp +++ b/tt_eager/tt_dnn/op_library/prod/prod_nc/prod_nc.cpp @@ -55,9 +55,8 @@ operation::ProgramWithCallbacks prod_nc_format(const Tensor &input, const Tensor //////////////////////////////////////////////////////////////////////////// // Core Setup //////////////////////////////////////////////////////////////////////////// - CoreGridDesc core_grid(device); - const auto num_cores_y = core_grid.y_; - CoreCoord core_grid_coord = {core_grid.x_, num_cores_y}; + auto grid = device->compute_with_storage_grid_size(); + const auto num_cores_y = grid.y; const uint32_t in0_t = 2; // input const uint32_t in1_t = 1; // zero @@ -69,7 +68,7 @@ operation::ProgramWithCallbacks prod_nc_format(const Tensor &input, const Tensor core_group_1, core_group_2, num_cols_per_core_group_1, - num_cols_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, num_output_tiles); + num_cols_per_core_group_2] = tt_metal::split_work_to_cores(grid, num_output_tiles); //////////////////////////////////////////////////////////////////////////// // CircularBuffer Setup diff --git a/tt_eager/tt_dnn/op_library/work_split.hpp b/tt_eager/tt_dnn/op_library/work_split.hpp index a4e05aefa25..7b2a330c864 100644 --- a/tt_eager/tt_dnn/op_library/work_split.hpp +++ b/tt_eager/tt_dnn/op_library/work_split.hpp @@ -11,59 +11,11 @@ #include "tt_metal/common/assert.hpp" #include "tt_metal/common/core_coord.h" #include "tt_metal/common/math.hpp" - #include "tt_metal/host_api.hpp" - namespace tt { namespace tt_metal { -// splits the tiles evenly between num_cores, -// with option of padding where necessary -struct TilesSplit { - int num_cores_; - int total_tiles_; - int tpc_; // unclipped tiles per core - - inline TilesSplit(int num_cores, int total_tiles) : num_cores_(num_cores), total_tiles_(total_tiles) { - tpc_ = div_up(total_tiles_, num_cores_); - } - - // number of tiles per core for div_up split - inline uint32_t get_tpc() const { return tpc_; } - - // number of tiles per core for close to even split with multiples of 8 going to each core - inline uint32_t get_clipped_tpc(int icore) const { - auto result = ( tpc_*(icore+1) > total_tiles_ ) ? ( total_tiles_ - tpc_*(icore+1) ) : tpc_; - return result; - } -}; - -struct CoreGridDesc { - uint32_t x_, y_; - CoreGridDesc(Device* dev) { auto gs = dev->compute_with_storage_grid_size(); x_ = gs.x; y_ = gs.y; TT_ASSERT(x_ > 0 && y_ > 0); } - uint32_t total_cores() const { return x_*y_; } - CoreCoord wrap_core(int icore) const { - TT_ASSERT(icore < total_cores()); - CoreCoord core = {(std::size_t) icore % x_, (std::size_t) icore / x_}; - return core; - } - - int numcores_dividing_numtiles(int num_tiles, int block_size = 1) const { - // since we will be splitting num_tiles into num_cores we need to find num_cores such that - // num_tiles % num_cores = 0, so that it's evenly divided since we don't support leftovers at the moment - // TODO(AP): optimize if needed, O(max_cores) atm - uint32_t max_cores = total_cores(); - TT_ASSERT(max_cores % block_size == 0 || max_cores == 1); - if (max_cores > num_tiles) - max_cores = num_tiles; - for (int j = max_cores; j >= 1; j--) - if (num_tiles % j == 0) - return j; - return 1; - } -}; - // Given a number of tiles and number of cores available // Set the largest number of cores less than the number of tiles // Returns the number of cores as well as the number of tiles per core @@ -99,7 +51,7 @@ inline int find_max_divisor(uint32_t val, uint32_t start_max_div) { return result; } -inline int find_max_block_size(uint32_t val, uint32_t max_block_size=8) { +inline int find_max_block_size(uint32_t val, uint32_t max_block_size = 8) { int result = 1; for (int find_divisor = max_block_size; find_divisor >= 1; find_divisor--) { if (val % find_divisor == 0) { @@ -110,8 +62,12 @@ inline int find_max_block_size(uint32_t val, uint32_t max_block_size=8) { return result; } -inline std::set num_cores_to_corerange_set(const CoreCoord start_core, const uint32_t target_num_cores, const CoreCoord grid_size, const bool row_wise = false) { - uint32_t num_cores_x = grid_size.x; +inline std::set num_cores_to_corerange_set( + const CoreCoord start_core, + const uint32_t target_num_cores, + const CoreCoord grid_size, + const bool row_wise = false) { + uint32_t num_cores_x = grid_size.x; uint32_t num_cores_y = grid_size.y; uint32_t total_available_cores = 0; TT_FATAL(start_core.x < num_cores_x && start_core.y < num_cores_y, "Start core must be within grid size"); @@ -126,8 +82,12 @@ inline std::set num_cores_to_corerange_set(const CoreCoord start_core // Partial Cols total_available_cores += num_cores_y - start_core.y; } - TT_FATAL(target_num_cores <= total_available_cores, "Target number of cores {} is greater than total number of available cores {}", target_num_cores, total_available_cores); - std::set all_cores_set; + TT_FATAL( + target_num_cores <= total_available_cores, + "Target number of cores {} is greater than total number of available cores {}", + target_num_cores, + total_available_cores); + std::set all_cores_set; uint32_t leftover_size = target_num_cores; CoreCoord s_core = start_core; if (row_wise) { @@ -173,17 +133,19 @@ inline std::set num_cores_to_corerange_set(const CoreCoord start_core all_cores_set.insert(leftover_block); } } - return all_cores_set; + return all_cores_set; } // TODO: Get rid of old function -inline std::set num_cores_to_corerange_set(const uint32_t target_num_cores, const CoreCoord grid_size, const bool row_wise = false) { - return num_cores_to_corerange_set({0, 0}, target_num_cores, grid_size, row_wise); +inline std::set num_cores_to_corerange_set( + const uint32_t target_num_cores, const CoreCoord grid_size, const bool row_wise = false) { + return num_cores_to_corerange_set({0, 0}, target_num_cores, grid_size, row_wise); } // TODO: Switch num_cores_to_corerange_set to always return CoreRangeSet -inline CoreRangeSet num_cores_to_core_range_set(const uint32_t target_num_cores, const CoreCoord grid_size, const bool row_wise = false) { - return CoreRangeSet(num_cores_to_corerange_set({0, 0}, target_num_cores, grid_size, row_wise)); +inline CoreRangeSet num_cores_to_core_range_set( + const uint32_t target_num_cores, const CoreCoord grid_size, const bool row_wise = false) { + return CoreRangeSet(num_cores_to_corerange_set({0, 0}, target_num_cores, grid_size, row_wise)); } // This function takes in the core grid size, as well as the number of units of work to divide between the cores @@ -191,84 +153,76 @@ inline CoreRangeSet num_cores_to_core_range_set(const uint32_t target_num_cores, // the greater amount of work, and the CoreRangeSet that does less work if work cannot be evenly divided // If it can be evenly divided, the second CoreRangeSet is the same as the first, and the last is empty // The last 2 args are the units of work for the two core grids -inline std::tuple split_work_to_cores(const CoreCoord grid_size, const uint32_t units_to_divide, const bool row_wise = false) { +inline std::tuple split_work_to_cores( + const CoreCoord grid_size, const uint32_t units_to_divide, const bool row_wise = false) { ZoneScoped; - uint32_t num_cores_x = grid_size.x, num_cores_y = grid_size.y; - auto target_num_cores = std::min(units_to_divide, num_cores_x * num_cores_y); - CoreRangeSet all_cores(num_cores_to_corerange_set(target_num_cores, grid_size, row_wise)); - - std::set core_group_1_set; - std::set core_group_2_set; - uint32_t units_per_core_group_1 = units_to_divide / target_num_cores; - uint32_t units_per_core_group_2 = 0; + uint32_t num_cores_x = grid_size.x, num_cores_y = grid_size.y; + auto target_num_cores = std::min(units_to_divide, num_cores_x * num_cores_y); + CoreRangeSet all_cores(num_cores_to_corerange_set(target_num_cores, grid_size, row_wise)); + + std::set core_group_1_set; + std::set core_group_2_set; + uint32_t units_per_core_group_1 = units_to_divide / target_num_cores; + uint32_t units_per_core_group_2 = 0; // Evenly divided units to all target cores - if (units_to_divide % target_num_cores == 0) { - core_group_1_set = all_cores.ranges(); - // Uneven division of units across cores - // This case should only be hit when there are more units of work than a full grid of cores - // which is implicitly assumed in the following logic - } else { + if (units_to_divide % target_num_cores == 0) { + core_group_1_set = all_cores.ranges(); + // Uneven division of units across cores + // This case should only be hit when there are more units of work than a full grid of cores + // which is implicitly assumed in the following logic + } else { // Group of cores that do more work core_group_1_set = num_cores_to_corerange_set(units_to_divide % target_num_cores, grid_size, row_wise); auto last_block_group_1 = (*core_group_1_set.rbegin()); auto last_block_all_cores = (*all_cores.ranges().rbegin()); if (row_wise) { // Case where only the last row is divided between core group 1 and 2 - if (last_block_group_1.end.y == last_block_all_cores.end.y && last_block_group_1.end.x != last_block_all_cores.end.x) { + if (last_block_group_1.end.y == last_block_all_cores.end.y && + last_block_group_1.end.x != last_block_all_cores.end.x) { CoreRange leftover_block( - {last_block_group_1.end.x + 1, last_block_group_1.end.y}, - last_block_all_cores.end - ); + {last_block_group_1.end.x + 1, last_block_group_1.end.y}, last_block_all_cores.end); core_group_2_set.insert(leftover_block); } else { // Case where a middle row is divided between core group 1 and 2 if (last_block_group_1.end.x != num_cores_x - 1) { CoreRange leftover_stick( {last_block_group_1.end.x + 1, last_block_group_1.end.y}, - {num_cores_x - 1, last_block_group_1.end.y} - ); + {num_cores_x - 1, last_block_group_1.end.y}); core_group_2_set.insert(leftover_stick); } // Remaining rows of cores that does less work - CoreRange leftover_block( - {0, last_block_group_1.end.y + 1}, - last_block_all_cores.end - ); + CoreRange leftover_block({0, last_block_group_1.end.y + 1}, last_block_all_cores.end); core_group_2_set.insert(leftover_block); } } else { // Case where only the last column is divided between core group 1 and 2 - if (last_block_group_1.end.x == last_block_all_cores.end.x && last_block_group_1.end.y != last_block_all_cores.end.y) { + if (last_block_group_1.end.x == last_block_all_cores.end.x && + last_block_group_1.end.y != last_block_all_cores.end.y) { CoreRange leftover_block( - {last_block_group_1.end.x, last_block_group_1.end.y + 1}, - last_block_all_cores.end - ); + {last_block_group_1.end.x, last_block_group_1.end.y + 1}, last_block_all_cores.end); core_group_2_set.insert(leftover_block); } else { // Case where a middle column is divided between core group 1 and 2 if (last_block_group_1.end.y != num_cores_y - 1) { CoreRange leftover_stick( {last_block_group_1.end.x, last_block_group_1.end.y + 1}, - {last_block_group_1.end.x, num_cores_y - 1} - ); + {last_block_group_1.end.x, num_cores_y - 1}); core_group_2_set.insert(leftover_stick); } // Remaining columns of cores that does less work - CoreRange leftover_block( - {last_block_group_1.end.x + 1, 0}, - last_block_all_cores.end - ); + CoreRange leftover_block({last_block_group_1.end.x + 1, 0}, last_block_all_cores.end); core_group_2_set.insert(leftover_block); } } - units_per_core_group_2 = units_per_core_group_1; + units_per_core_group_2 = units_per_core_group_1; units_per_core_group_1++; - } - CoreRangeSet core_group_1(core_group_1_set); - CoreRangeSet core_group_2(core_group_2_set); + } + CoreRangeSet core_group_1(core_group_1_set); + CoreRangeSet core_group_2(core_group_2_set); - return std::make_tuple(target_num_cores, all_cores, core_group_1, core_group_2, units_per_core_group_1, units_per_core_group_2); + return std::make_tuple( + target_num_cores, all_cores, core_group_1, core_group_2, units_per_core_group_1, units_per_core_group_2); } -} // namespace tt_metal -} // namespace tt +} // namespace tt_metal +} // namespace tt diff --git a/tt_eager/tt_lib/csrc/module.mk b/tt_eager/tt_lib/csrc/module.mk deleted file mode 100644 index 97597c2159d..00000000000 --- a/tt_eager/tt_lib/csrc/module.mk +++ /dev/null @@ -1,39 +0,0 @@ -TT_LIB_LIB = $(LIBDIR)/libtt_lib_csrc.so -TT_LIB_LIB_LOCAL_SO = tt_eager/tt_lib/_C.so -TT_LIB_DEFINES = -TT_LIB_INCLUDES = $(TT_EAGER_INCLUDES) $(shell python3-config --includes) -Itt_metal/third_party/pybind11/include -TT_LIB_LDFLAGS = -ltt_dnn -ltensor -lqueue -ltt_metal -lyaml-cpp $(shell python3-config --ldflags --embed) $(LDFLAGS) -TT_LIB_CFLAGS = $(CFLAGS) -Werror -Wno-int-to-pointer-cast -fno-var-tracking - -TT_LIB_SRCS = \ - tt_eager/tt_lib/csrc/tt_lib_bindings.cpp \ - tt_eager/tt_lib/csrc/type_caster.cpp \ - tt_eager/tt_lib/csrc/tt_lib_bindings_tensor.cpp \ - tt_eager/tt_lib/csrc/tt_lib_bindings_tensor_composite_ops.cpp \ - tt_eager/tt_lib/csrc/tt_lib_bindings_tensor_backward_ops.cpp \ - tt_eager/tt_lib/csrc/tt_lib_bindings_tensor_pytensor.cpp \ - tt_eager/tt_lib/csrc/tt_lib_bindings_tensor_dm_ops.cpp \ - tt_eager/tt_lib/csrc/tt_lib_bindings_tensor_custom_bmm_ops.cpp \ - tt_eager/tt_lib/csrc/tt_lib_bindings_tensor_xary_ops.cpp \ - -TT_LIB_OBJS = $(addprefix $(OBJDIR)/, $(TT_LIB_SRCS:.cpp=.o)) -TT_LIB_DEPS = $(addprefix $(OBJDIR)/, $(TT_LIB_SRCS:.cpp=.d)) - --include $(TT_LIB_DEPS) - -tt_lib: $(TT_LIB_LIB) - -# Link obj files into shared lib -$(TT_LIB_LIB): $(TT_LIB_OBJS) $(TT_DNN_LIB) $(TENSOR_LIB) $(TT_METAL_LIB) $(QUEUE_LIB) - @mkdir -p $(LIBDIR) - $(CXX) $(TT_LIB_CFLAGS) $(CXXFLAGS) $(SHARED_LIB_FLAGS) -o $@ $(TT_LIB_OBJS) $(TT_LIB_LDFLAGS) - -$(TT_LIB_LIB_LOCAL_SO): $(TT_LIB_LIB) - cp -fp $^ $@ - -# Compile obj files -$(OBJDIR)/tt_eager/tt_lib/csrc/%.o: tt_eager/tt_lib/csrc/%.cpp - @mkdir -p $(@D) - $(CXX) $(TT_LIB_CFLAGS) $(CXXFLAGS) $(STATIC_LIB_FLAGS) $(TT_LIB_INCLUDES) -c -o $@ $< - -tt_lib/csrc: $(TT_LIB_LIB) diff --git a/tt_eager/tt_lib/module.mk b/tt_eager/tt_lib/module.mk deleted file mode 100644 index 9c4904ce7b2..00000000000 --- a/tt_eager/tt_lib/module.mk +++ /dev/null @@ -1,3 +0,0 @@ -include tt_eager/tt_lib/csrc/module.mk - -tt_eager/tt_lib: tt_lib/csrc diff --git a/tt_metal/common/common.mk b/tt_metal/common/common.mk deleted file mode 100644 index 0fa15611f4c..00000000000 --- a/tt_metal/common/common.mk +++ /dev/null @@ -1,81 +0,0 @@ -# Include this file in your makefile by copying the following lines -# include ./common/common.mk - -# Make sure we don't print what is executed. If you want the echoing, run make with SILENT=0 -ifneq ($(SILENT),0) -.SILENT: -endif - -# QP - 'quiet piping' - to be used at the end of commands that print lots of stuff -QP ?= > /dev/null - -# Q - 'quiet' - te be used at the beggining of a command, if you want to suppress the printout -# of the command itself. Set Q=@ to make everything quiet. -Q ?= - -# Color related stuff -# source: http://vmrob.com/colorized-makefiles/ -# -# Set colors -RED =\033[0;31m -GREEN =\033[0;32m -YELLOW =\033[0;33m -BLUE =\033[0;34m -NC =\033[0m - -TITLE =\033[7;34m -SUBTITLE =\033[1;34m - -#NOCOLOR=1 - -ifndef NOCOLOR - ERROR_COLOR =\033[7;31m - WARN_COLOR =\033[7;33m - SUCCESS_COLOR=\033[7;32m -endif - -OK_STRING=$(SUCCESS_COLOR)[OK]$(NC) -ERROR_STRING=$(ERROR_COLOR)[ERROR]$(NC) -WARN_STRING=$(WARN_COLOR)[WARNING]$(NC) -SUCCESS_STRING=$(SUCCESS_COLOR)[SUCCESS]$(NC) - -PRETTY_2_COL = awk '{ printf "%-50s %-10s\n",$$1, $$2; }' -PRINT_ERROR = printf "$@ $(ERROR_STRING)\n" | $(PRETTY_2_COL) && printf "$(CMD)\n$$LOG\n" && false -PRINT_WARNING = printf "$@ $(WARN_STRING)\n" | $(PRETTY_2_COL) && printf "$(CMD)\n$$LOG\n" -PRINT_OK = printf "$@ $(OK_STRING)\n" | $(PRETTY_2_COL) -PRINT_SUCCESS = printf "$@ $(SUCCESS_STRING)\n" | $(PRETTY_2_COL) -PRINT_TARGET = printf "${TITLE}$@${NC}\n" - -BUILD_CMD = LOG=$$($(CMD) 2>&1) ; if [ $$? -eq 1 ]; then $(PRINT_ERROR); elif [ "$$LOG" != "" ] ; then $(PRINT_WARNING); else $(PRINT_OK); fi; - - -# Paramters for diff to show columns and ignore // style comments -# -COLUMN_DIFF_ARGS=-W 24 --suppress-common-lines --side-by-side --ignore-all-space --ignore-matching-lines="^\/\/" - -OS := $(shell uname) -ifeq ($(OS),Darwin) -NUM_PROCS ?= `sysctl -n hw.ncpu` -else -NUM_PROCS ?= `nproc` -endif - -space := $(subst ,, ) # https://stackoverflow.com/questions/10571658/gnu-make-convert-spaces-to-colons - -SILENT_ERRORS := 2> /dev/null ||: # Hide stderr and don't report errors to make. - -# Debug -ifeq ($(DEBUG_BUILD),1) - CXX_OPT=-O0 -g -else -# Fast - CXX_OPT=-O3 -endif - -# Some magic to get the directory of the -THIS_MAKEFILE := $(lastword $(MAKEFILE_LIST)) -CALLER_MAKEFILE_LIST := $(filter-out $(THIS_MAKEFILE),$(MAKEFILE_LIST)) -MAKEFILE_PATH := $(abspath $(lastword $(CALLER_MAKEFILE_LIST))) -CURRENT_DIR := $(notdir $(patsubst %/,%,$(dir $(MAKEFILE_PATH)))) - -MKDIR_P ?= mkdir -p diff --git a/tt_metal/common/module.mk b/tt_metal/common/module.mk deleted file mode 100644 index a1c59fd0360..00000000000 --- a/tt_metal/common/module.mk +++ /dev/null @@ -1,42 +0,0 @@ -# Every variable in subdir must be prefixed with subdir (emulating a namespace) - -include $(TT_METAL_HOME)/tt_metal/tracy.mk - -COMMON_INCLUDES = $(BASE_INCLUDES) - -ifeq ("$(ARCH_NAME)", "wormhole_b0") - COMMON_INCLUDES+= -Isrc/firmware/riscv/wormhole - COMMON_INCLUDES+= -Isrc/firmware/riscv/wormhole/wormhole_b0_defines - COMMON_INCLUDES+= -I$(UMD_HOME)/device/wormhole/. - COMMON_INCLUDES+= -I$(UMD_HOME)/src/firmware/riscv/wormhole -else - COMMON_INCLUDES+= -Isrc/firmware/riscv/$(ARCH_NAME) - COMMON_INCLUDES+= -I$(UMD_HOME)/device/$(ARCH_NAME)/. - COMMON_INCLUDES+= -I$(UMD_HOME)/src/firmware/riscv/$(ARCH_NAME) -endif - -ifeq ("$(ARCH_NAME)", "wormhole") - COMMON_INCLUDES+= -Isrc/firmware/riscv/wormhole - COMMON_INCLUDES+= -Isrc/firmware/riscv/wormhole/wormhole_a0_defines - COMMON_INCLUDES+= -I$(UMD_HOME)/device/wormhole/. - COMMON_INCLUDES+= -I$(UMD_HOME)/src/firmware/riscv/wormhole -endif - -COMMON_DEFINES = -COMMON_INCLUDES += -I$(TT_METAL_HOME)/tt_metal/common/. -COMMON_CFLAGS = $(CFLAGS) -Werror - -COMMON_SRCS += \ - $(wildcard tt_metal/common/*.cpp) - -COMMON_OBJS = $(addprefix $(OBJDIR)/, $(COMMON_SRCS:.cpp=.o)) -COMMON_DEPS = $(addprefix $(OBJDIR)/, $(COMMON_SRCS:.cpp=.d)) - --include $(COMMON_DEPS) - -# Each module has a top level target as the entrypoint which must match the subdir name -common: $(COMMON_OBJS) umd_device $(TRACY_OBJS) - -$(OBJDIR)/tt_metal/common/%.o: tt_metal/common/%.cpp - @mkdir -p $(@D) - $(CXX) $(COMMON_CFLAGS) $(CXXFLAGS) $(STATIC_LIB_FLAGS) $(COMMON_INCLUDES) $(COMMON_DEFINES) -c -o $@ $< diff --git a/tt_metal/detail/module.mk b/tt_metal/detail/module.mk deleted file mode 100644 index cbb76690a18..00000000000 --- a/tt_metal/detail/module.mk +++ /dev/null @@ -1,18 +0,0 @@ -# Every variable in subdir must be prefixed with subdir (emulating a namespace) -TT_METAL_DETAIL_CFLAGS = $(CFLAGS) -Werror -Wno-int-to-pointer-cast - -TT_METAL_DETAIL_SRCS = \ - tt_metal/detail/reports/compilation_reporter.cpp \ - tt_metal/detail/reports/memory_reporter.cpp \ - -TT_METAL_DETAIL_OBJS = $(addprefix $(OBJDIR)/, $(TT_METAL_DETAIL_SRCS:.cpp=.o)) -TT_METAL_DETAIL_DEPS = $(addprefix $(OBJDIR)/, $(TT_METAL_DETAIL_SRCS:.cpp=.d)) - --include $(TT_METAL_DETAIL_DEPS) - -# Each module has a top level target as the entrypoint which must match the subdir name -tt_metal/detail: $(TT_METAL_DETAIL_OBJS) - -$(OBJDIR)/tt_metal/detail/%.o: tt_metal/detail/%.cpp - @mkdir -p $(@D) - $(CXX) $(TT_METAL_DETAIL_CFLAGS) $(CXXFLAGS) $(STATIC_LIB_FLAGS) $(TT_METAL_INCLUDES) $(TT_METAL_DEFINES) -c -o $@ $< diff --git a/tt_metal/impl/module.mk b/tt_metal/impl/module.mk deleted file mode 100644 index 004f9e8b197..00000000000 --- a/tt_metal/impl/module.mk +++ /dev/null @@ -1,34 +0,0 @@ -# Every variable in subdir must be prefixed with subdir (emulating a namespace) -TT_METAL_IMPL_DEFINES = -TT_METAL_IMPL_INCLUDES = $(COMMON_INCLUDES) -I$(TT_METAL_HOME)/tt_metal/impl -I$(TT_METAL_HOME)/. -TT_METAL_IMPL_CFLAGS = $(CFLAGS) -Werror -Wno-int-to-pointer-cast - -TT_METAL_IMPL_SRCS = \ - tt_metal/impl/device/device.cpp \ - tt_metal/impl/device/multi_device.cpp \ - tt_metal/impl/buffers/buffer.cpp \ - tt_metal/impl/buffers/circular_buffer.cpp \ - tt_metal/impl/buffers/semaphore.cpp \ - tt_metal/impl/kernels/kernel.cpp \ - tt_metal/impl/allocator/algorithms/free_list.cpp \ - tt_metal/impl/allocator/allocator.cpp \ - tt_metal/impl/allocator/basic_allocator.cpp \ - tt_metal/impl/allocator/l1_banking_allocator.cpp \ - tt_metal/impl/program/program.cpp \ - tt_metal/impl/dispatch/debug_tools.cpp \ - tt_metal/impl/dispatch/command_queue.cpp \ - tt_metal/impl/debug/dprint_server.cpp \ - tt_metal/impl/debug/watcher_server.cpp \ - tt_metal/impl/trace/trace.cpp - -TT_METAL_IMPL_OBJS = $(addprefix $(OBJDIR)/, $(TT_METAL_IMPL_SRCS:.cpp=.o)) -TT_METAL_IMPL_DEPS = $(addprefix $(OBJDIR)/, $(TT_METAL_IMPL_SRCS:.cpp=.d)) - --include $(TT_METAL_IMPL_DEPS) - -# Each module has a top level target as the entrypoint which must match the subdir name -tt_metal/impl: $(COMMON_OBJS) $(TT_METAL_IMPL_OBJS) - -$(OBJDIR)/tt_metal/impl/%.o: tt_metal/impl/%.cpp - @mkdir -p $(@D) - $(CXX) $(TT_METAL_IMPL_CFLAGS) $(CXXFLAGS) $(STATIC_LIB_FLAGS) $(TT_METAL_IMPL_INCLUDES) $(TT_METAL_IMPL_DEFINES) -c -o $@ $< diff --git a/tt_metal/jit_build/module.mk b/tt_metal/jit_build/module.mk deleted file mode 100644 index 0ebf16ac6f2..00000000000 --- a/tt_metal/jit_build/module.mk +++ /dev/null @@ -1,25 +0,0 @@ -# Every variable in subdir must be prefixed with subdir (emulating a namespace) -JIT_BUILD_DEFINES = -JIT_BUILD_INCLUDES += -I$(TT_METAL_HOME)/tt_metal/jit_build $(BASE_INCLUDES) $(COMMON_INCLUDES) -JIT_BUILD_CFLAGS = $(CFLAGS) -Werror - -JIT_BUILD_SRCS_RELATIVE = \ - jit_build/build.cpp \ - jit_build/genfiles.cpp \ - jit_build/data_format.cpp \ - jit_build/settings.cpp \ - jit_build/kernel_args.cpp - -JIT_BUILD_SRCS = $(addprefix tt_metal/, $(JIT_BUILD_SRCS_RELATIVE)) - -JIT_BUILD_OBJS = $(addprefix $(OBJDIR)/, $(JIT_BUILD_SRCS:.cpp=.o)) -JIT_BUILD_DEPS = $(addprefix $(OBJDIR)/, $(JIT_BUILD_SRCS:.cpp=.d)) - --include $(JIT_BUILD_DEPS) - -# Each module has a top level target as the entrypoint which must match the subdir name -jit_build: $(COMMON_OBJS) $(JIT_BUILD_LIB) - -$(OBJDIR)/tt_metal/jit_build/%.o: tt_metal/jit_build/%.cpp - @mkdir -p $(@D) - $(CXX) $(JIT_BUILD_CFLAGS) $(CXXFLAGS) $(STATIC_LIB_FLAGS) $(JIT_BUILD_INCLUDES) $(JIT_BUILD_DEFINES) -c -o $@ $< diff --git a/tt_metal/llrt/module.mk b/tt_metal/llrt/module.mk deleted file mode 100644 index 80063c7d10e..00000000000 --- a/tt_metal/llrt/module.mk +++ /dev/null @@ -1,26 +0,0 @@ -# Every variable in subdir must be prefixed with subdir (emulating a namespace) -LLRT_DEFINES = -LLRT_INCLUDES = $(COMMON_INCLUDES) -I$(TT_METAL_HOME)/tt_metal/llrt -LLRT_CFLAGS = $(CFLAGS) -Werror -Wno-int-to-pointer-cast - -LLRT_SRCS_RELATIVE = \ - llrt/tt_cluster.cpp \ - llrt/llrt.cpp \ - llrt/rtoptions.cpp \ - llrt/tt_memory.cpp \ - llrt/tt_hexfile.cpp \ - llrt/tlb_config.cpp - -LLRT_SRCS = $(addprefix tt_metal/, $(LLRT_SRCS_RELATIVE)) - -LLRT_OBJS = $(addprefix $(OBJDIR)/, $(LLRT_SRCS:.cpp=.o)) -LLRT_DEPS = $(addprefix $(OBJDIR)/, $(LLRT_SRCS:.cpp=.d)) - --include $(LLRT_DEPS) - -# Each module has a top level target as the entrypoint which must match the subdir name -llrt: $(LLRT_OBJS) $(DEVICE_OBJS) $(COMMON_OBJS) - -$(OBJDIR)/tt_metal/llrt/%.o: tt_metal/llrt/%.cpp - @mkdir -p $(@D) - $(CXX) $(LLRT_CFLAGS) $(CXXFLAGS) $(STATIC_LIB_FLAGS) $(LLRT_INCLUDES) $(LLRT_DEFINES) -c -o $@ $< diff --git a/tt_metal/module.mk b/tt_metal/module.mk deleted file mode 100644 index 1783394aa25..00000000000 --- a/tt_metal/module.mk +++ /dev/null @@ -1,51 +0,0 @@ -# Every variable in subdir must be prefixed with subdir (emulating a namespace) - -CFLAGS += -DFMT_HEADER_ONLY -I$(TT_METAL_HOME)/tt_metal/third_party/fmt - -include $(TT_METAL_HOME)/tt_metal/common/module.mk -include $(TT_METAL_HOME)/tt_metal/jit_build/module.mk -include $(TT_METAL_HOME)/tt_metal/llrt/module.mk -include $(TT_METAL_HOME)/tt_metal/tools/module.mk - -ifeq ($(TT_METAL_CREATE_STATIC_LIB), 1) -TT_METAL_LIB = $(LIBDIR)/libtt_metal.a -else -TT_METAL_LIB = $(LIBDIR)/libtt_metal.so -endif -TT_METAL_LDFLAGS = $(LDFLAGS) -TT_METAL_INCLUDES = $(COMMON_INCLUDES) -TT_METAL_CFLAGS = $(CFLAGS) -Werror -Wno-int-to-pointer-cast - -include $(TT_METAL_HOME)/tt_metal/impl/module.mk -include $(TT_METAL_HOME)/tt_metal/detail/module.mk - -TT_METAL_SRCS = \ - tt_metal/tt_metal.cpp - -TT_METAL_OBJS = $(addprefix $(OBJDIR)/, $(TT_METAL_SRCS:.cpp=.o)) -TT_METAL_DEPS = $(addprefix $(OBJDIR)/, $(TT_METAL_SRCS:.cpp=.d)) - --include $(TT_METAL_DEPS) - -# Each module has a top level target as the entrypoint which must match the subdir name -tt_metal: $(TT_METAL_LIB) tools - -TT_METAL_AND_DEPS_OBJS = $(COMMON_OBJS) $(TT_METAL_OBJS) $(DEVICE_OBJS) $(TT_METAL_IMPL_OBJS) $(TT_METAL_DETAIL_OBJS) $(LLRT_OBJS) $(JIT_BUILD_OBJS) $(PROFILER_OBJS) $(TRACY_OBJS) - -ifeq ($(TT_METAL_CREATE_STATIC_LIB), 1) -# If production build, release all of tt_metal as a full static library for later build with Eager wheel -$(TT_METAL_LIB): $(TT_METAL_AND_DEPS_OBJS) - @mkdir -p $(LIBDIR) - ar rcs -o $@ $^ -else -$(TT_METAL_LIB): $(TT_METAL_AND_DEPS_OBJS) - @mkdir -p $(LIBDIR) - $(CXX) $(TT_METAL_CFLAGS) $(CXXFLAGS) $(SHARED_LIB_FLAGS) -o $@ $^ $(TT_METAL_LDFLAGS) -endif - -# TODO: rk: need to use a general way to do the following directives, note that using tt_metal/%.o will -# include EVERYTHING under tt_metal, forcing the build step to use only build directives in this file -# rather than the specialized ones in each submodule -$(OBJDIR)/tt_metal/tt_metal.o: tt_metal/tt_metal.cpp - @mkdir -p $(@D) - $(CXX) $(TT_METAL_CFLAGS) $(CXXFLAGS) $(STATIC_LIB_FLAGS) $(TT_METAL_INCLUDES) -c -o $@ $< diff --git a/tt_metal/programming_examples/add_2_integers_in_compute/module.mk b/tt_metal/programming_examples/add_2_integers_in_compute/module.mk deleted file mode 100644 index 82668ebf892..00000000000 --- a/tt_metal/programming_examples/add_2_integers_in_compute/module.mk +++ /dev/null @@ -1,15 +0,0 @@ -ADD_IN_COMPUTE_EXAMPLE_SRC = $(TT_METAL_HOME)/tt_metal/programming_examples/add_2_integers_in_compute/add_2_integers_in_compute.cpp - -ADD_IN_COMPUTE_EXAMPLES_DEPS = $(PROGRAMMING_EXAMPLES_OBJDIR)/add_2_integers_in_compute.d - --include $(ADD_IN_COMPUTE_EXAMPLES_DEPS) - -.PRECIOUS: $(PROGRAMMING_EXAMPLES_TESTDIR)/add_2_integers_in_compute -$(PROGRAMMING_EXAMPLES_TESTDIR)/add_2_integers_in_compute: $(PROGRAMMING_EXAMPLES_OBJDIR)/add_2_integers_in_compute.o $(TT_METAL_LIB) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(PROGRAMMING_EXAMPLES_INCLUDES) -o $@ $^ $(LDFLAGS) $(PROGRAMMING_EXAMPLES_LDFLAGS) - -.PRECIOUS: $(PROGRAMMING_EXAMPLES_OBJDIR)/add_2_integers_in_compute.o -$(PROGRAMMING_EXAMPLES_OBJDIR)/add_2_integers_in_compute.o: $(ADD_IN_COMPUTE_EXAMPLE_SRC) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(PROGRAMMING_EXAMPLES_INCLUDES) -c -o $@ $< diff --git a/tt_metal/programming_examples/add_2_integers_in_riscv/module.mk b/tt_metal/programming_examples/add_2_integers_in_riscv/module.mk deleted file mode 100644 index 8b8aaa01d44..00000000000 --- a/tt_metal/programming_examples/add_2_integers_in_riscv/module.mk +++ /dev/null @@ -1,15 +0,0 @@ -ADD_IN_RISCV_EXAMPLE_SRC = $(TT_METAL_HOME)/tt_metal/programming_examples/add_2_integers_in_riscv/add_2_integers_in_riscv.cpp - -ADD_IN_RISCV_EXAMPLES_DEPS = $(PROGRAMMING_EXAMPLES_OBJDIR)/add_2_integers_in_riscv.d - --include $(ADD_IN_RISCV_EXAMPLES_DEPS) - -.PRECIOUS: $(PROGRAMMING_EXAMPLES_TESTDIR)/add_2_integers_in_riscv -$(PROGRAMMING_EXAMPLES_TESTDIR)/add_2_integers_in_riscv: $(PROGRAMMING_EXAMPLES_OBJDIR)/add_2_integers_in_riscv.o $(TT_METAL_LIB) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(PROGRAMMING_EXAMPLES_INCLUDES) -o $@ $^ $(LDFLAGS) $(PROGRAMMING_EXAMPLES_LDFLAGS) - -.PRECIOUS: $(PROGRAMMING_EXAMPLES_OBJDIR)/add_2_integers_in_riscv.o -$(PROGRAMMING_EXAMPLES_OBJDIR)/add_2_integers_in_riscv.o: $(ADD_IN_RISCV_EXAMPLE_SRC) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(PROGRAMMING_EXAMPLES_INCLUDES) -c -o $@ $< diff --git a/tt_metal/programming_examples/contributed/module.mk b/tt_metal/programming_examples/contributed/module.mk deleted file mode 100644 index 8209e29da8e..00000000000 --- a/tt_metal/programming_examples/contributed/module.mk +++ /dev/null @@ -1,4 +0,0 @@ -PROGRAMMING_EXAMPLES_CONTRIB_TESTDIR=$(PROGRAMMING_EXAMPLES_TESTDIR)/contributed -PROGRAMMING_EXAMPLES_CONTRIB_OBJDIR=$(PROGRAMMING_EXAMPLES_OBJDIR)/contributed - -include $(TT_METAL_HOME)/tt_metal/programming_examples/contributed/vecadd/module.mk diff --git a/tt_metal/programming_examples/contributed/vecadd/module.mk b/tt_metal/programming_examples/contributed/vecadd/module.mk deleted file mode 100644 index dd2ceee1d25..00000000000 --- a/tt_metal/programming_examples/contributed/vecadd/module.mk +++ /dev/null @@ -1,15 +0,0 @@ -LOOPBACK_EXAMPLE_SRC = $(TT_METAL_HOME)/tt_metal/programming_examples/contributed/vecadd/vecadd.cpp - -LOOPBACK_EXAMPLES_DEPS = $(PROGRAMMING_EXAMPLES_CONTRIB_OBJDIR)/vecadd.d - --include $(LOOPBACK_EXAMPLES_DEPS) - -.PRECIOUS: $(PROGRAMMING_EXAMPLES_CONTRIB_TESTDIR)/vecadd -$(PROGRAMMING_EXAMPLES_CONTRIB_TESTDIR)/vecadd: $(PROGRAMMING_EXAMPLES_CONTRIB_OBJDIR)/vecadd.o $(TT_METAL_LIB) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(PROGRAMMING_EXAMPLES_INCLUDES) -o $@ $^ $(LDFLAGS) $(PROGRAMMING_EXAMPLES_LDFLAGS) - -.PRECIOUS: $(PROGRAMMING_EXAMPLES_CONTRIB_OBJDIR)/vecadd.o -$(PROGRAMMING_EXAMPLES_CONTRIB_OBJDIR)/vecadd.o: $(LOOPBACK_EXAMPLE_SRC) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(PROGRAMMING_EXAMPLES_INCLUDES) -c -o $@ $< diff --git a/tt_metal/programming_examples/eltwise_binary/module.mk b/tt_metal/programming_examples/eltwise_binary/module.mk deleted file mode 100644 index b358c9214c4..00000000000 --- a/tt_metal/programming_examples/eltwise_binary/module.mk +++ /dev/null @@ -1,15 +0,0 @@ -ELTWISE_BINARY_EXAMPLE_SRC = $(TT_METAL_HOME)/tt_metal/programming_examples/eltwise_binary/eltwise_binary.cpp - -ELTWISE_BINARY_EXAMPLES_DEPS = $(PROGRAMMING_EXAMPLES_OBJDIR)/eltwise_binary.d - --include $(ELTWISE_BINARY_EXAMPLES_DEPS) - -.PRECIOUS: $(PROGRAMMING_EXAMPLES_TESTDIR)/eltwise_binary -$(PROGRAMMING_EXAMPLES_TESTDIR)/eltwise_binary: $(PROGRAMMING_EXAMPLES_OBJDIR)/eltwise_binary.o $(TT_METAL_LIB) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(PROGRAMMING_EXAMPLES_INCLUDES) -o $@ $^ $(LDFLAGS) $(PROGRAMMING_EXAMPLES_LDFLAGS) - -.PRECIOUS: $(PROGRAMMING_EXAMPLES_OBJDIR)/eltwise_binary.o -$(PROGRAMMING_EXAMPLES_OBJDIR)/eltwise_binary.o: $(ELTWISE_BINARY_EXAMPLE_SRC) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(PROGRAMMING_EXAMPLES_INCLUDES) -c -o $@ $< diff --git a/tt_metal/programming_examples/eltwise_sfpu/module.mk b/tt_metal/programming_examples/eltwise_sfpu/module.mk deleted file mode 100644 index c7402ab9cf0..00000000000 --- a/tt_metal/programming_examples/eltwise_sfpu/module.mk +++ /dev/null @@ -1,15 +0,0 @@ -ELTWISE_SFPU_EXAMPLE_SRC = $(TT_METAL_HOME)/tt_metal/programming_examples/eltwise_sfpu/eltwise_sfpu.cpp - -ELTWISE_SFPU_EXAMPLES_DEPS = $(PROGRAMMING_EXAMPLES_OBJDIR)/eltwise_sfpu.d - --include $(ELTWISE_SFPU_EXAMPLES_DEPS) - -.PRECIOUS: $(PROGRAMMING_EXAMPLES_TESTDIR)/eltwise_sfpu -$(PROGRAMMING_EXAMPLES_TESTDIR)/eltwise_sfpu: $(PROGRAMMING_EXAMPLES_OBJDIR)/eltwise_sfpu.o $(TT_METAL_LIB) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(PROGRAMMING_EXAMPLES_INCLUDES) -o $@ $^ $(LDFLAGS) $(PROGRAMMING_EXAMPLES_LDFLAGS) - -.PRECIOUS: $(PROGRAMMING_EXAMPLES_OBJDIR)/eltwise_sfpu.o -$(PROGRAMMING_EXAMPLES_OBJDIR)/eltwise_sfpu.o: $(ELTWISE_SFPU_EXAMPLE_SRC) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(PROGRAMMING_EXAMPLES_INCLUDES) -c -o $@ $< diff --git a/tt_metal/programming_examples/hello_world_compute_kernel/module.mk b/tt_metal/programming_examples/hello_world_compute_kernel/module.mk deleted file mode 100644 index cf21b167aa4..00000000000 --- a/tt_metal/programming_examples/hello_world_compute_kernel/module.mk +++ /dev/null @@ -1,15 +0,0 @@ -ADD_IN_COMPUTE_EXAMPLE_SRC = $(TT_METAL_HOME)/tt_metal/programming_examples/hello_world_compute_kernel/hello_world_compute_kernel.cpp - -ADD_IN_COMPUTE_EXAMPLES_DEPS = $(PROGRAMMING_EXAMPLES_OBJDIR)/hello_world_compute_kernel.d - --include $(ADD_IN_COMPUTE_EXAMPLES_DEPS) - -.PRECIOUS: $(PROGRAMMING_EXAMPLES_TESTDIR)/hello_world_compute_kernel -$(PROGRAMMING_EXAMPLES_TESTDIR)/hello_world_compute_kernel: $(PROGRAMMING_EXAMPLES_OBJDIR)/hello_world_compute_kernel.o $(TT_METAL_LIB) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(PROGRAMMING_EXAMPLES_INCLUDES) -o $@ $^ $(LDFLAGS) $(PROGRAMMING_EXAMPLES_LDFLAGS) - -.PRECIOUS: $(PROGRAMMING_EXAMPLES_OBJDIR)/hello_world_compute_kernel.o -$(PROGRAMMING_EXAMPLES_OBJDIR)/hello_world_compute_kernel.o: $(ADD_IN_COMPUTE_EXAMPLE_SRC) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(PROGRAMMING_EXAMPLES_INCLUDES) -c -o $@ $< diff --git a/tt_metal/programming_examples/hello_world_datamovement_kernel/module.mk b/tt_metal/programming_examples/hello_world_datamovement_kernel/module.mk deleted file mode 100644 index 890a58810fe..00000000000 --- a/tt_metal/programming_examples/hello_world_datamovement_kernel/module.mk +++ /dev/null @@ -1,15 +0,0 @@ -ADD_IN_COMPUTE_EXAMPLE_SRC = $(TT_METAL_HOME)/tt_metal/programming_examples/hello_world_datamovement_kernel/hello_world_datamovement_kernel.cpp - -ADD_IN_COMPUTE_EXAMPLES_DEPS = $(PROGRAMMING_EXAMPLES_OBJDIR)/hello_world_datamovement_kernel.d - --include $(ADD_IN_COMPUTE_EXAMPLES_DEPS) - -.PRECIOUS: $(PROGRAMMING_EXAMPLES_TESTDIR)/hello_world_datamovement_kernel -$(PROGRAMMING_EXAMPLES_TESTDIR)/hello_world_datamovement_kernel: $(PROGRAMMING_EXAMPLES_OBJDIR)/hello_world_datamovement_kernel.o $(TT_METAL_LIB) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(PROGRAMMING_EXAMPLES_INCLUDES) -o $@ $^ $(LDFLAGS) $(PROGRAMMING_EXAMPLES_LDFLAGS) - -.PRECIOUS: $(PROGRAMMING_EXAMPLES_OBJDIR)/hello_world_datamovement_kernel.o -$(PROGRAMMING_EXAMPLES_OBJDIR)/hello_world_datamovement_kernel.o: $(ADD_IN_COMPUTE_EXAMPLE_SRC) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(PROGRAMMING_EXAMPLES_INCLUDES) -c -o $@ $< diff --git a/tt_metal/programming_examples/hello_world_datatypes_kernel/module.mk b/tt_metal/programming_examples/hello_world_datatypes_kernel/module.mk deleted file mode 100644 index 4c35b480f86..00000000000 --- a/tt_metal/programming_examples/hello_world_datatypes_kernel/module.mk +++ /dev/null @@ -1,15 +0,0 @@ -ADD_IN_COMPUTE_EXAMPLE_SRC = $(TT_METAL_HOME)/tt_metal/programming_examples/hello_world_datatypes_kernel/hello_world_datatypes_kernel.cpp - -ADD_IN_COMPUTE_EXAMPLES_DEPS = $(PROGRAMMING_EXAMPLES_OBJDIR)/hello_world_datatypes_kernel.d - --include $(ADD_IN_COMPUTE_EXAMPLES_DEPS) - -.PRECIOUS: $(PROGRAMMING_EXAMPLES_TESTDIR)/hello_world_datatypes_kernel -$(PROGRAMMING_EXAMPLES_TESTDIR)/hello_world_datatypes_kernel: $(PROGRAMMING_EXAMPLES_OBJDIR)/hello_world_datatypes_kernel.o $(TT_METAL_LIB) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(PROGRAMMING_EXAMPLES_INCLUDES) -o $@ $^ $(LDFLAGS) $(PROGRAMMING_EXAMPLES_LDFLAGS) - -.PRECIOUS: $(PROGRAMMING_EXAMPLES_OBJDIR)/hello_world_datatypes_kernel.o -$(PROGRAMMING_EXAMPLES_OBJDIR)/hello_world_datatypes_kernel.o: $(ADD_IN_COMPUTE_EXAMPLE_SRC) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(PROGRAMMING_EXAMPLES_INCLUDES) -c -o $@ $< diff --git a/tt_metal/programming_examples/loopback/module.mk b/tt_metal/programming_examples/loopback/module.mk deleted file mode 100644 index 46c67112efe..00000000000 --- a/tt_metal/programming_examples/loopback/module.mk +++ /dev/null @@ -1,15 +0,0 @@ -LOOPBACK_EXAMPLE_SRC = $(TT_METAL_HOME)/tt_metal/programming_examples/loopback/loopback.cpp - -LOOPBACK_EXAMPLES_DEPS = $(PROGRAMMING_EXAMPLES_OBJDIR)/loopback.d - --include $(LOOPBACK_EXAMPLES_DEPS) - -.PRECIOUS: $(PROGRAMMING_EXAMPLES_TESTDIR)/loopback -$(PROGRAMMING_EXAMPLES_TESTDIR)/loopback: $(PROGRAMMING_EXAMPLES_OBJDIR)/loopback.o $(TT_METAL_LIB) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(PROGRAMMING_EXAMPLES_INCLUDES) -o $@ $^ $(LDFLAGS) $(PROGRAMMING_EXAMPLES_LDFLAGS) - -.PRECIOUS: $(PROGRAMMING_EXAMPLES_OBJDIR)/loopback.o -$(PROGRAMMING_EXAMPLES_OBJDIR)/loopback.o: $(LOOPBACK_EXAMPLE_SRC) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(PROGRAMMING_EXAMPLES_INCLUDES) -c -o $@ $< diff --git a/tt_metal/programming_examples/matmul_common/work_split.hpp b/tt_metal/programming_examples/matmul_common/work_split.hpp index 23a76c4861e..b3e458e9c5a 100644 --- a/tt_metal/programming_examples/matmul_common/work_split.hpp +++ b/tt_metal/programming_examples/matmul_common/work_split.hpp @@ -17,52 +17,6 @@ namespace tt { namespace tt_metal { -// splits the tiles evenly between num_cores, -// with option of padding where necessary -struct TilesSplit { - int num_cores_; - int total_tiles_; - int tpc_; // unclipped tiles per core - - inline TilesSplit(int num_cores, int total_tiles) : num_cores_(num_cores), total_tiles_(total_tiles) { - tpc_ = div_up(total_tiles_, num_cores_); - } - - // number of tiles per core for div_up split - inline uint32_t get_tpc() const { return tpc_; } - - // number of tiles per core for close to even split with multiples of 8 going to each core - inline uint32_t get_clipped_tpc(int icore) const { - auto result = ( tpc_*(icore+1) > total_tiles_ ) ? ( total_tiles_ - tpc_*(icore+1) ) : tpc_; - return result; - } -}; - -struct CoreGridDesc { - uint32_t x_, y_; - CoreGridDesc(Device* dev) { auto gs = dev->compute_with_storage_grid_size(); x_ = gs.x; y_ = gs.y; TT_ASSERT(x_ > 0 && y_ > 0); } - uint32_t total_cores() const { return x_*y_; } - CoreCoord wrap_core(int icore) const { - TT_ASSERT(icore < total_cores()); - CoreCoord core = {(std::size_t) icore % x_, (std::size_t) icore / x_}; - return core; - } - - int numcores_dividing_numtiles(int num_tiles, int block_size = 1) const { - // since we will be splitting num_tiles into num_cores we need to find num_cores such that - // num_tiles % num_cores = 0, so that it's evenly divided since we don't support leftovers at the moment - // TODO(AP): optimize if needed, O(max_cores) atm - uint32_t max_cores = total_cores(); - TT_ASSERT(max_cores % block_size == 0 || max_cores == 1); - if (max_cores > num_tiles) - max_cores = num_tiles; - for (int j = max_cores; j >= 1; j--) - if (num_tiles % j == 0) - return j; - return 1; - } -}; - // Given a number of tiles and number of cores available // Set the largest number of cores less than the number of tiles // Returns the number of cores as well as the number of tiles per core diff --git a/tt_metal/programming_examples/matmul_multi_core/module.mk b/tt_metal/programming_examples/matmul_multi_core/module.mk deleted file mode 100644 index 5caf59539b6..00000000000 --- a/tt_metal/programming_examples/matmul_multi_core/module.mk +++ /dev/null @@ -1,15 +0,0 @@ -MATMUL_MULTI_CORE_EXAMPLE_SRC = $(TT_METAL_HOME)/tt_metal/programming_examples/matmul_multi_core/matmul_multi_core.cpp - -MATMUL_MULTI_CORE_EXAMPLES_DEPS = $(PROGRAMMING_EXAMPLES_OBJDIR)/matmul_multi_core.d - --include $(MATMUL_MULTI_CORE_EXAMPLES_DEPS) - -.PRECIOUS: $(PROGRAMMING_EXAMPLES_TESTDIR)/matmul_multi_core -$(PROGRAMMING_EXAMPLES_TESTDIR)/matmul_multi_core: $(PROGRAMMING_EXAMPLES_OBJDIR)/matmul_multi_core.o $(TT_METAL_LIB) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(PROGRAMMING_EXAMPLES_INCLUDES) -o $@ $^ $(LDFLAGS) $(PROGRAMMING_EXAMPLES_LDFLAGS) - -.PRECIOUS: $(PROGRAMMING_EXAMPLES_OBJDIR)/matmul_multi_core.o -$(PROGRAMMING_EXAMPLES_OBJDIR)/matmul_multi_core.o: $(MATMUL_MULTI_CORE_EXAMPLE_SRC) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(PROGRAMMING_EXAMPLES_INCLUDES) -c -o $@ $< diff --git a/tt_metal/programming_examples/matmul_multicore_reuse/module.mk b/tt_metal/programming_examples/matmul_multicore_reuse/module.mk deleted file mode 100644 index b9f559f1e46..00000000000 --- a/tt_metal/programming_examples/matmul_multicore_reuse/module.mk +++ /dev/null @@ -1,15 +0,0 @@ -MATMUL_MULTI_CORE_REUSE_EXAMPLE_SRC = $(TT_METAL_HOME)/tt_metal/programming_examples/matmul_multicore_reuse/matmul_multicore_reuse.cpp - -MATMUL_MULTI_CORE_REUSE_EXAMPLES_DEPS = $(PROGRAMMING_EXAMPLES_OBJDIR)/matmul_multicore_reuse.d - --include $(MATMUL_MULTI_CORE_REUSE_EXAMPLES_DEPS) - -.PRECIOUS: $(PROGRAMMING_EXAMPLES_TESTDIR)/matmul_multicore_reuse -$(PROGRAMMING_EXAMPLES_TESTDIR)/matmul_multicore_reuse: $(PROGRAMMING_EXAMPLES_OBJDIR)/matmul_multicore_reuse.o $(TT_METAL_LIB) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(PROGRAMMING_EXAMPLES_INCLUDES) -o $@ $^ $(LDFLAGS) $(PROGRAMMING_EXAMPLES_LDFLAGS) - -.PRECIOUS: $(PROGRAMMING_EXAMPLES_OBJDIR)/matmul_multicore_reuse.o -$(PROGRAMMING_EXAMPLES_OBJDIR)/matmul_multicore_reuse.o: $(MATMUL_MULTI_CORE_REUSE_EXAMPLE_SRC) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(PROGRAMMING_EXAMPLES_INCLUDES) -c -o $@ $< diff --git a/tt_metal/programming_examples/matmul_multicore_reuse_mcast/module.mk b/tt_metal/programming_examples/matmul_multicore_reuse_mcast/module.mk deleted file mode 100644 index 343c2dc80b4..00000000000 --- a/tt_metal/programming_examples/matmul_multicore_reuse_mcast/module.mk +++ /dev/null @@ -1,15 +0,0 @@ -MATMUL_MULTI_CORE_REUSE_MCAST_EXAMPLE_SRC = $(TT_METAL_HOME)/tt_metal/programming_examples/matmul_multicore_reuse_mcast/matmul_multicore_reuse_mcast.cpp - -MATMUL_MULTI_CORE_REUSE_MCAST_EXAMPLES_DEPS = $(PROGRAMMING_EXAMPLES_OBJDIR)/matmul_multicore_reuse_mcast.d - --include $(MATMUL_MULTI_CORE_REUSE_MCAST_EXAMPLES_DEPS) - -.PRECIOUS: $(PROGRAMMING_EXAMPLES_TESTDIR)/matmul_multicore_reuse_mcast -$(PROGRAMMING_EXAMPLES_TESTDIR)/matmul_multicore_reuse_mcast: $(PROGRAMMING_EXAMPLES_OBJDIR)/matmul_multicore_reuse_mcast.o $(TT_METAL_LIB) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(PROGRAMMING_EXAMPLES_INCLUDES) -o $@ $^ $(LDFLAGS) $(PROGRAMMING_EXAMPLES_LDFLAGS) - -.PRECIOUS: $(PROGRAMMING_EXAMPLES_OBJDIR)/matmul_multicore_reuse_mcast.o -$(PROGRAMMING_EXAMPLES_OBJDIR)/matmul_multicore_reuse_mcast.o: $(MATMUL_MULTI_CORE_REUSE_MCAST_EXAMPLE_SRC) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(PROGRAMMING_EXAMPLES_INCLUDES) -c -o $@ $< diff --git a/tt_metal/programming_examples/matmul_single_core/module.mk b/tt_metal/programming_examples/matmul_single_core/module.mk deleted file mode 100644 index 4e8c9a7db7f..00000000000 --- a/tt_metal/programming_examples/matmul_single_core/module.mk +++ /dev/null @@ -1,15 +0,0 @@ -MATMUL_SINGLE_CORE_EXAMPLE_SRC = $(TT_METAL_HOME)/tt_metal/programming_examples/matmul_single_core/matmul_single_core.cpp - -MATMUL_SINGLE_CORE_EXAMPLES_DEPS = $(PROGRAMMING_EXAMPLES_OBJDIR)/matmul_single_core.d - --include $(MATMUL_SINGLE_CORE_EXAMPLES_DEPS) - -.PRECIOUS: $(PROGRAMMING_EXAMPLES_TESTDIR)/matmul_single_core -$(PROGRAMMING_EXAMPLES_TESTDIR)/matmul_single_core: $(PROGRAMMING_EXAMPLES_OBJDIR)/matmul_single_core.o $(TT_METAL_LIB) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(PROGRAMMING_EXAMPLES_INCLUDES) -o $@ $^ $(LDFLAGS) $(PROGRAMMING_EXAMPLES_LDFLAGS) - -.PRECIOUS: $(PROGRAMMING_EXAMPLES_OBJDIR)/matmul_single_core.o -$(PROGRAMMING_EXAMPLES_OBJDIR)/matmul_single_core.o: $(MATMUL_SINGLE_CORE_EXAMPLE_SRC) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(PROGRAMMING_EXAMPLES_INCLUDES) -c -o $@ $< diff --git a/tt_metal/programming_examples/module.mk b/tt_metal/programming_examples/module.mk deleted file mode 100644 index 450ae1c99d0..00000000000 --- a/tt_metal/programming_examples/module.mk +++ /dev/null @@ -1,60 +0,0 @@ -PROGRAMMING_EXAMPLES_TESTDIR = $(OUT)/programming_examples -PROGRAMMING_EXAMPLES_OBJDIR = $(OBJDIR)/programming_examples - -PROGRAMMING_EXAMPLES_INCLUDES = $(COMMON_INCLUDES) -PROGRAMMING_EXAMPLES_LDFLAGS = -ltt_metal -ldl -lstdc++fs -pthread -lyaml-cpp -lm - -include $(TT_METAL_HOME)/tt_metal/programming_examples/hello_world_compute_kernel/module.mk -include $(TT_METAL_HOME)/tt_metal/programming_examples/hello_world_datamovement_kernel/module.mk -include $(TT_METAL_HOME)/tt_metal/programming_examples/hello_world_datatypes_kernel/module.mk -include $(TT_METAL_HOME)/tt_metal/programming_examples/add_2_integers_in_riscv/module.mk -include $(TT_METAL_HOME)/tt_metal/programming_examples/add_2_integers_in_compute/module.mk -include $(TT_METAL_HOME)/tt_metal/programming_examples/loopback/module.mk -include $(TT_METAL_HOME)/tt_metal/programming_examples/eltwise_binary/module.mk -include $(TT_METAL_HOME)/tt_metal/programming_examples/eltwise_sfpu/module.mk -include $(TT_METAL_HOME)/tt_metal/programming_examples/profiler/module.mk -include $(TT_METAL_HOME)/tt_metal/programming_examples/matmul_single_core/module.mk -include $(TT_METAL_HOME)/tt_metal/programming_examples/matmul_multi_core/module.mk -include $(TT_METAL_HOME)/tt_metal/programming_examples/matmul_multicore_reuse/module.mk -include $(TT_METAL_HOME)/tt_metal/programming_examples/matmul_multicore_reuse_mcast/module.mk -include $(TT_METAL_HOME)/tt_metal/programming_examples/contributed/module.mk - -# Need to depend on set_up_kernels. - -PROFILER_TESTS += \ - programming_examples/profiler/test_custom_cycle_count\ - programming_examples/profiler/test_full_buffer\ - programming_examples/profiler/test_multi_op - -CONTRIBUTED_EXAMPLES += \ - programming_examples/contributed/vecadd - -programming_examples: programming_examples/hello_world_compute_kernel \ - programming_examples/hello_world_datamovement_kernel \ - programming_examples/hello_world_datatypes_kernel \ - programming_examples/add_2_integers_in_riscv \ - programming_examples/add_2_integers_in_compute \ - programming_examples/loopback \ - programming_examples/eltwise_binary \ - programming_examples/eltwise_sfpu \ - programming_examples/matmul_single_core \ - programming_examples/matmul_multi_core \ - programming_examples/matmul_multicore_reuse \ - programming_examples/matmul_multicore_reuse_mcast \ - $(PROFILER_TESTS) \ - $(CONTRIBUTED_EXAMPLES) - -programming_examples/hello_world_compute_kernel:$(PROGRAMMING_EXAMPLES_TESTDIR)/hello_world_compute_kernel -programming_examples/hello_world_datamovement_kernel:$(PROGRAMMING_EXAMPLES_TESTDIR)/hello_world_datamovement_kernel -programming_examples/hello_world_datatypes_kernel:$(PROGRAMMING_EXAMPLES_TESTDIR)/hello_world_datatypes_kernel -programming_examples/add_2_integers_in_riscv:$(PROGRAMMING_EXAMPLES_TESTDIR)/add_2_integers_in_riscv -programming_examples/add_2_integers_in_compute:$(PROGRAMMING_EXAMPLES_TESTDIR)/add_2_integers_in_compute -programming_examples/loopback: $(PROGRAMMING_EXAMPLES_TESTDIR)/loopback; -programming_examples/eltwise_binary: $(PROGRAMMING_EXAMPLES_TESTDIR)/eltwise_binary; -programming_examples/eltwise_sfpu: $(PROGRAMMING_EXAMPLES_TESTDIR)/eltwise_sfpu; -programming_examples/matmul_single_core: $(PROGRAMMING_EXAMPLES_TESTDIR)/matmul_single_core; -programming_examples/matmul_multi_core: $(PROGRAMMING_EXAMPLES_TESTDIR)/matmul_multi_core; -programming_examples/matmul_multicore_reuse: $(PROGRAMMING_EXAMPLES_TESTDIR)/matmul_multicore_reuse; -programming_examples/matmul_multicore_reuse_mcast: $(PROGRAMMING_EXAMPLES_TESTDIR)/matmul_multicore_reuse_mcast; -programming_examples/profiler/%: $(PROGRAMMING_EXAMPLES_TESTDIR)/profiler/%; -programming_examples/contributed/%: $(PROGRAMMING_EXAMPLES_TESTDIR)/contributed/%; diff --git a/tt_metal/programming_examples/profiler/module.mk b/tt_metal/programming_examples/profiler/module.mk deleted file mode 100644 index 1f06240562f..00000000000 --- a/tt_metal/programming_examples/profiler/module.mk +++ /dev/null @@ -1,9 +0,0 @@ -.PRECIOUS: $(PROGRAMMING_EXAMPLES_TESTDIR)/profiler/% -$(PROGRAMMING_EXAMPLES_TESTDIR)/profiler/%: $(PROGRAMMING_EXAMPLES_OBJDIR)/profiler/%.o $(TT_METAL_LIB) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(PROGRAMMING_EXAMPLES_INCLUDES) -o $@ $^ $(LDFLAGS) $(PROGRAMMING_EXAMPLES_LDFLAGS) - -.PRECIOUS: $(PROGRAMMING_EXAMPLES_OBJDIR)/profiler/%.o -$(PROGRAMMING_EXAMPLES_OBJDIR)/profiler/%.o: $(TT_METAL_HOME)/tt_metal/programming_examples/profiler/%/*.cpp - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(PROGRAMMING_EXAMPLES_INCLUDES) -c -o $@ $< diff --git a/tt_metal/python_env/module.mk b/tt_metal/python_env/module.mk deleted file mode 100644 index d2eb0997abb..00000000000 --- a/tt_metal/python_env/module.mk +++ /dev/null @@ -1,49 +0,0 @@ -# Every variable in subdir must be prefixed with subdir (emulating a namespace) -PYTHON_ENV ?= $(OUT)/python_env - -# Each module has a top level target as the entrypoint which must match the subdir name -python_env: $(PYTHON_ENV)/.installed - -python_env/dev: $(PYTHON_ENV)/.installed-dev - -python_env/dev/editable: $(PYTHON_ENV)/.installed-dev-editable - -python_env/dev/stubs: $(PYTHON_ENV)/.installed-stubs - -python_env/clean: - rm -rf $(PYTHON_ENV) - -# .PRECIOUS: $(PYTHON_ENV)/.installed $(PYTHON_ENV)/% -$(PYTHON_ENV)/.installed: - python3 -m venv $(PYTHON_ENV) - bash -c "source $(PYTHON_ENV)/bin/activate && python3 -m pip config set global.extra-index-url https://download.pytorch.org/whl/cpu" - echo "Installing python env build backend requirements..." - bash -c "source $(PYTHON_ENV)/bin/activate && python3 -m pip install --upgrade setuptools wheel" - touch $@ - -$(PYTHON_ENV)/%: $(PYTHON_ENV)/.installed - bash -c "source $(PYTHON_ENV)/bin/activate" - -ifdef TT_METAL_ENV_IS_DEV -# Once we split this out and put this python_env module.mk declaration at the end, then we'll actually properly depend on these local sos being installed -$(PYTHON_ENV)/.installed-dev: $(PYTHON_ENV)/.installed $(TT_LIB_LIB_LOCAL_SO) $(TTNN_PYBIND11_LOCAL_SO) tt_metal/python_env/requirements-dev.txt -else -$(PYTHON_ENV)/.installed-dev: $(PYTHON_ENV)/.installed tt_metal/python_env/requirements-dev.txt -endif - echo "Installing dev environment packages..." - bash -c "source $(PYTHON_ENV)/bin/activate && python3 -m pip install -r tt_metal/python_env/requirements-dev.txt" - echo "Installing editable dev version of tt_eager packages..." - bash -c "source $(PYTHON_ENV)/bin/activate && pip install -e ." - touch $@ - -$(PYTHON_ENV)/.installed-dev-editable: - echo "Installing editable dev version of ttnn package..." - bash -c "source $(PYTHON_ENV)/bin/activate && pip install -e ttnn" - touch $@ - -$(PYTHON_ENV)/.installed-stubs: $(PYTHON_ENV)/.installed-dev $(PYTHON_ENV)/.installed-dev-editable - echo "Generating stubs..." - bash -c "source $(PYTHON_ENV)/bin/activate && stubgen -m tt_lib -m tt_lib.device -m tt_lib.profiler -m tt_lib.tensor -m tt_lib.operations -m tt_lib.operations.primary -m tt_lib.operations.primary.transformers -o tt_eager" - bash -c "source $(PYTHON_ENV)/bin/activate && stubgen -p ttnn._ttnn -o ttnn" - bash -c "sed -i 's/\._C/tt_lib/g' tt_eager/tt_lib/__init__.pyi" - touch $@ diff --git a/tt_metal/third_party/sfpi b/tt_metal/third_party/sfpi index 964bd7786c8..f050df206be 160000 --- a/tt_metal/third_party/sfpi +++ b/tt_metal/third_party/sfpi @@ -1 +1 @@ -Subproject commit 964bd7786c8bccf130294acbd0546203baffb7d4 +Subproject commit f050df206be4da5e898cfb7aed1c7465997d77aa diff --git a/tt_metal/tools/module.mk b/tt_metal/tools/module.mk deleted file mode 100644 index dde5a8f5ca3..00000000000 --- a/tt_metal/tools/module.mk +++ /dev/null @@ -1,29 +0,0 @@ -# Every variable in subdir must be prefixed with subdir (emulating a namespace) - -# include $(TT_METAL_HOME)/tt_metal/tools/tt_gdb/module.mk # needs to compiled after llrt and tt_metal -include $(TT_METAL_HOME)/tt_metal/tools/profiler/module.mk -include $(TT_METAL_HOME)/tt_metal/tools/watcher_dump/module.mk - -TOOLS = \ - tools/memset - -TOOLS_SRCS = $(addprefix tt_metal/, $(addsuffix .cpp, $(TOOLS))) - -TOOLS_INCLUDES = $(COMMON_INCLUDES) -I$(TT_METAL_HOME)/tools -TOOLS_LDFLAGS = $(LDFLAGS) -lyaml-cpp -lstdc++fs -lhwloc -lm - -TOOLS_DEPS = $(addprefix $(OBJDIR)/, $(TOOLS_SRCS:.cpp=.d)) - --include $(TOOLS_DEPS) - -# Each module has a top level target as the entrypoint which must match the subdir name -tools: $(OBJDIR)/tt_metal/tools/memset tools/profiler tools/watcher_dump #tools/tt_gdb - -.PRECIOUS: $(OBJDIR)/tools/% -$(OBJDIR)/tt_metal/tools/memset: $(OBJDIR)/tt_metal/tools/memset.o $(COMMON_OBJS) $(LLRT_OBJS) $(DEVICE_OBJS) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(TOOLS_INCLUDES) -o $@ $^ $(TOOLS_LDFLAGS) - -$(OBJDIR)/tt_metal/tools/memset.o: tt_metal/tools/memset.cpp - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(TOOLS_INCLUDES) -c -o $@ $< diff --git a/tt_metal/tools/profiler/module.mk b/tt_metal/tools/profiler/module.mk deleted file mode 100644 index fc930eb6262..00000000000 --- a/tt_metal/tools/profiler/module.mk +++ /dev/null @@ -1,21 +0,0 @@ -# Every variable in subdir must be prefixed with subdir (emulating a namespace) -PROFILER_INCLUDES = $(BASE_INCLUDES) - -PROFILER_DEFINES = -PROFILER_INCLUDES += -Itools/profiler -PROFILER_CFLAGS = $(CFLAGS) -Werror - -PROFILER_SRCS += \ - $(wildcard tt_metal/tools/profiler/*.cpp) - -PROFILER_OBJS = $(addprefix $(OBJDIR)/, $(PROFILER_SRCS:.cpp=.o)) -PROFILER_DEPS = $(addprefix $(OBJDIR)/, $(PROFILER_SRCS:.cpp=.d)) - --include $(PROFILER_DEPS) - -# Each module has a top level target as the entrypoint which must match the subdir name -tools/profiler: $(PROFILER_OBJS) - -$(OBJDIR)/tt_metal/tools/profiler/%.o: tt_metal/tools/profiler/%.cpp - @mkdir -p $(@D) - $(CXX) $(PROFILER_CFLAGS) $(CXXFLAGS) $(STATIC_LIB_FLAGS) $(PROFILER_INCLUDES) $(PROFILER_DEFINES) -c -o $@ $< diff --git a/tt_metal/tools/tt_gdb/module.mk b/tt_metal/tools/tt_gdb/module.mk deleted file mode 100644 index ad79a697d17..00000000000 --- a/tt_metal/tools/tt_gdb/module.mk +++ /dev/null @@ -1,24 +0,0 @@ -# Every variable in subdir must be prefixed with subdir (emulating a namespace) - -TT_GDB_LIB = $(LIBDIR)/libtt_gdb.a -TT_GDB_DEFINES = -TT_GDB_INCLUDES = $(COMMON_INCLUDES) -I$(TT_METAL_HOME)/tt_metal/tools/tt_gdb -I$(TT_METAL_HOME)/tt_metal/third_party/json -TT_GDB_CFLAGS = $(CFLAGS) -Werror -Wno-int-to-pointer-cast - -TT_GDB_SRCS = tt_metal/tools/tt_gdb/tt_gdb.cpp - -TT_GDB_OBJS = $(addprefix $(OBJDIR)/, $(TT_GDB_SRCS:.cpp=.o)) -TT_GDB_DEPS = $(addprefix $(OBJDIR)/, $(TT_GDB_SRCS:.cpp=.d)) - --include $(TT_GDB_DEPS) - -# Each module has a top level target as the entrypoint which must match the subdir name -tools/tt_gdb: $(TT_GDB_LIB) - -$(TT_GDB_LIB): $(COMMON_LIB) $(TT_GDB_OBJS) - @mkdir -p $(@D) - ar rcs -o $@ $(TT_GDB_OBJS) - -$(OBJDIR)/tt_metal/tools/tt_gdb/%.o: tt_metal/tools/tt_gdb/%.cpp - @mkdir -p $(@D) - $(CXX) $(TT_GDB_CFLAGS) $(CXXFLAGS) $(STATIC_LIB_FLAGS) $(TT_GDB_INCLUDES) $(TT_GDB_DEFINES) -c -o $@ $< diff --git a/tt_metal/tools/watcher_dump/module.mk b/tt_metal/tools/watcher_dump/module.mk deleted file mode 100644 index 5fd351b5619..00000000000 --- a/tt_metal/tools/watcher_dump/module.mk +++ /dev/null @@ -1,21 +0,0 @@ -# Every variable in subdir must be prefixed with subdir (emulating a namespace) -WATCHER_DUMP_INCLUDES = $(COMMON_INCLUDES) -Itools/watcher_dump -WATCHER_DUMP_DEFINES = -WATCHER_DUMP_LDFLAGS = $(LDFLAGS) -ltt_metal -lyaml-cpp - -WATCHER_DUMP_SRCS += \ - $(wildcard tt_metal/tools/watcher_dump/*.cpp) -WATCHER_DUMP_OBJS = $(addprefix $(OBJDIR)/, $(WATCHER_DUMP_SRCS:.cpp=.o)) -WATCHER_DUMP_DEPS = $(addprefix $(OBJDIR)/, $(WATCHER_DUMP_SRCS:.cpp=.d)) - --include $(WATCHER_DUMP_DEPS) -# Each module has a top level target as the entrypoint which must match the subdir name -tools/watcher_dump: $(WATCHER_DUMP_OBJS) $(OUT)/tools/watcher_dump - -$(OBJDIR)/tt_metal/tools/watcher_dump/%.o: tt_metal/tools/watcher_dump/%.cpp - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(STATIC_LIB_FLAGS) $(WATCHER_DUMP_INCLUDES) $(WATCHER_DUMP_DEFINES) -c -o $@ $< - -$(OUT)/tools/watcher_dump: $(WATCHER_DUMP_OBJS) $(OUT)/lib/libtt_metal.so - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(WATCHER_DUMP_INCLUDES) $(WATCHER_DUMP_DEFINES) -o $@ $^ $(WATCHER_DUMP_LDFLAGS) diff --git a/tt_metal/tracy.mk b/tt_metal/tracy.mk deleted file mode 100644 index 6fb9ba2e86a..00000000000 --- a/tt_metal/tracy.mk +++ /dev/null @@ -1,36 +0,0 @@ -TRACY_LIB = $(LIBDIR)/libtracy.so -TRACY_INCLUDES = -I$(TT_METAL_HOME)/tt_metal/third_party/tracy/public/tracy/ -TRACY_LDFLAGS = $(LDFLAGS) -TRACY_DEFINES = -DTRACY_NO_CONTEXT_SWITCH -TRACY_BUILD_DIR = $(TT_METAL_HOME)/build/tools/profiler/bin -TRACY_REPO = $(TT_METAL_HOME)/tt_metal/third_party/tracy - -#TRACY_DEFINES = -DTRACY_SAMPLING_HZ=40000 -DTRACY_NO_SYSTEM_TRACING -DTRACY_NO_CALLSTACK -DTRACY_NO_CALLSTACK_INLINES -TRACY_SRCS = \ - tt_metal/third_party/tracy/public/TracyClient.cpp - -TRACY_OBJS = $(addprefix $(OBJDIR)/, $(TRACY_SRCS:.cpp=.o)) -TRACY_DEPS = $(addprefix $(OBJDIR)/, $(TRACY_SRCS:.cpp=.d)) - --include $(TRACY_DEPS) - -tracy: $(TRACY_LIB) - -$(TRACY_LIB): $(TRACY_OBJS) - @mkdir -p $(LIBDIR) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(SHARED_LIB_FLAGS) -o $@ $^ $(TRACY_LDFLAGS) - -$(OBJDIR)/tt_metal/third_party/tracy/public/%.o: tt_metal/third_party/tracy/public/%.cpp - @mkdir -p $(@D) - $(CXX) $(CFLAGS) $(CXXFLAGS) $(STATIC_LIB_FLAGS) $(TRACY_INCLUDES) $(TRACY_DEFINES) -c -o $@ $< - -tracy_tools: - mkdir -p $(TRACY_BUILD_DIR) - cd $(TRACY_REPO)/csvexport/build/unix && $(MAKE) - cp $(TRACY_REPO)/csvexport/build/unix/csvexport-release $(TRACY_BUILD_DIR) - cd $(TRACY_REPO)/capture/build/unix && $(MAKE) - cp $(TRACY_REPO)/capture/build/unix/capture-release $(TRACY_BUILD_DIR) - -tracy_tools_clean: - cd $(TRACY_REPO)/csvexport/build/unix && $(MAKE) clean - cd $(TRACY_REPO)/capture/build/unix && $(MAKE) clean diff --git a/ttnn/cpp/pybind11/operations/__init__.hpp b/ttnn/cpp/pybind11/operations/__init__.hpp index df3682824ff..6f1dcfb8919 100644 --- a/ttnn/cpp/pybind11/operations/__init__.hpp +++ b/ttnn/cpp/pybind11/operations/__init__.hpp @@ -7,22 +7,22 @@ #include #include -#include "ccl.hpp" -#include "conv2d.hpp" -#include "core.hpp" -#include "creation.hpp" -#include "data_movement.hpp" -#include "embedding.hpp" -#include "kv_cache.hpp" -#include "matmul.hpp" -#include "maxpool2d.hpp" -#include "normalization.hpp" -#include "pool.hpp" #include "pybind11/operations/binary.hpp" -#include "reduction.hpp" -#include "ternary.hpp" -#include "transformer.hpp" -#include "unary.hpp" +#include "pybind11/operations/ccl.hpp" +#include "pybind11/operations/conv2d.hpp" +#include "pybind11/operations/core.hpp" +#include "pybind11/operations/creation.hpp" +#include "pybind11/operations/data_movement.hpp" +#include "pybind11/operations/embedding.hpp" +#include "pybind11/operations/kv_cache.hpp" +#include "pybind11/operations/matmul.hpp" +#include "pybind11/operations/maxpool2d.hpp" +#include "pybind11/operations/normalization.hpp" +#include "pybind11/operations/pool.hpp" +#include "pybind11/operations/reduction.hpp" +#include "pybind11/operations/ternary.hpp" +#include "pybind11/operations/transformer.hpp" +#include "pybind11/operations/unary.hpp" namespace py = pybind11; diff --git a/ttnn/cpp/pybind11/operations/data_movement.hpp b/ttnn/cpp/pybind11/operations/data_movement.hpp index dadb3551275..0b8484a895f 100644 --- a/ttnn/cpp/pybind11/operations/data_movement.hpp +++ b/ttnn/cpp/pybind11/operations/data_movement.hpp @@ -6,6 +6,7 @@ #include #include + #include "ttnn/operations/data_movement.hpp" namespace py = pybind11; @@ -14,8 +15,9 @@ namespace ttnn { namespace operations { namespace data_movement { void py_module(py::module& module) { - - module.def("permute", &permute, + module.def( + "permute", + &permute, py::arg("input_tensor"), py::arg("order"), R"doc( @@ -34,7 +36,9 @@ Example:: )doc"); - module.def("concat", &concat, + module.def( + "concat", + &concat, py::arg("input_tensor"), py::arg("dim") = 0, py::kw_only(), @@ -75,11 +79,34 @@ The algorithms available for upsampling are 'nearest' for now. * :attr:`scale_factor`: multiplier for spatial size. Has to match input size if it is a tuple. )doc", ttnn::pybind_arguments_t{ - py::arg("input_tensor"), - py::arg("scale_factor"), - py::arg("memory_config") = std::nullopt - } - ); + py::arg("input_tensor"), py::arg("scale_factor"), py::arg("memory_config") = std::nullopt}); + + ttnn::bind_registered_operation( + module, + ttnn::repeat, + R"doc( +repeat(input_tensor: ttnn.Tensor, shape : ttnn.Shape) -> ttnn.Tensor + +Returns a new tensor filled with repetition of input :attr:`input_tensor` according to number of times specified in :attr:`shape`. + +Args: + * :attr:`input_tensor`: the input_tensor to apply the repeate operation. + * :attr:`shape`: The number of repetitions for each element. + +Keyword Args: + * :attr:`memory_config`: the memory configuration to use for the operation + +Example:: + + >>> tensor = ttnn.repeat(ttnn.from_torch(torch.tensor([[1, 2], [3, 4]]), 2,)), device) + >>> print(tensor) + tensor([[1, 2], + [1, 2], + [3, 4], + [3, 4]]) + )doc", + ttnn::pybind_arguments_t{ + py::arg("input_tensor"), py::arg("shape"), py::kw_only(), py::arg("memory_config") = std::nullopt}); } } // namespace data_movement diff --git a/ttnn/cpp/ttnn/op_library/binary/binary_op.hpp b/ttnn/cpp/ttnn/op_library/binary/binary_op.hpp index b1739c4a0fb..006097c3633 100644 --- a/ttnn/cpp/ttnn/op_library/binary/binary_op.hpp +++ b/ttnn/cpp/ttnn/op_library/binary/binary_op.hpp @@ -121,7 +121,7 @@ struct ExecuteBinary { input_shape_a[-3] == input_shape_b[-3]) { tt::log_warning(tt::LogOp, "Using repeat op to broadcast batch dim"); Shape repeats({input_shape_a[0], 1, 1, 1}); - input_tensor_b = repeat(input_tensor_b, repeats.value(), output_memory_config); + input_tensor_b = tt::tt_metal::repeat(input_tensor_b, repeats.value(), output_memory_config); } return operation::run( diff --git a/ttnn/cpp/ttnn/operations/data_movement.hpp b/ttnn/cpp/ttnn/operations/data_movement.hpp index 3c142fa9281..13ebf3eec10 100644 --- a/ttnn/cpp/ttnn/operations/data_movement.hpp +++ b/ttnn/cpp/ttnn/operations/data_movement.hpp @@ -6,6 +6,7 @@ #include "tt_eager/tt_dnn/op_library/concat/concat_op.hpp" #include "tt_eager/tt_dnn/op_library/permute/permute_op.hpp" +#include "tt_eager/tt_dnn/op_library/repeat/repeat_op.hpp" #include "tt_eager/tt_dnn/op_library/upsample/upsample_op.hpp" #include "ttnn/cpp/ttnn/operations/core.hpp" @@ -254,7 +255,36 @@ struct UpSample { } }; +struct Repeat { + static inline const std::array input_tensor_schemas() { + return {ttnn::TensorSchema{ + 4, // min rank + 4, // max rank + {ttnn::bfloat16, ttnn::bfloat8_b, ttnn::int32, ttnn::uint32}, + {ttnn::TILE_LAYOUT, ttnn::ROW_MAJOR_LAYOUT}, + true, // can_be_on_device + false, // can_be_on_cpu + false, // can_be_scalar + false}}; // is_optional + } + + template + static auto input_tensors_to_validate(const ttnn::Tensor& input_tensor, Args&&... args) { + return std::make_tuple(input_tensor); + } + + static ttnn::Tensor execute_on_worker_thread( + const ttnn::Tensor& input_tensor, + const Shape& shape, + std::optional output_mem_config = std::nullopt) { + MemoryConfig mem_config = output_mem_config.value_or(input_tensor.memory_config()); + auto output_tensor = tt::tt_metal::repeat(input_tensor, shape.value(), mem_config); + return output_tensor; + } +}; + } // namespace data_movement } // namespace operations constexpr auto upsample = ttnn::register_operation("ttnn::upsample"); +constexpr auto repeat = ttnn::register_operation("ttnn::repeat"); } // namespace ttnn diff --git a/ttnn/module.mk b/ttnn/module.mk deleted file mode 100644 index 0193bbda814..00000000000 --- a/ttnn/module.mk +++ /dev/null @@ -1,57 +0,0 @@ -TTNN_LIB = $(LIBDIR)/libttnn.a -TTNN_PYBIND11_LIB = $(LIBDIR)/_ttnn.so -TTNN_PYBIND11_LOCAL_SO = ttnn/ttnn/_ttnn.so - -TTNN_DEFINES = - -TTNN_INCLUDES = $(TT_EAGER_INCLUDES) $(TT_LIB_INCLUDES) -Ittnn/cpp -Ittnn/cpp/ttnn -TTNN_PYBIND11_INCLUDES = $(TTNN_INCLUDES) $(shell python3-config --includes) -Itt_metal/third_party/pybind11/include - -TTNN_LDFLAGS = -ltt_dnn -ltensor -ltt_metal -lyaml-cpp $(LDFLAGS) -TTNN_PYBIND11_LDFLAGS = $(TTNN_LDFLAGS) -lttnn - -TTNN_CFLAGS = $(CFLAGS) -Werror -Wno-int-to-pointer-cast -fno-var-tracking - -TTNN_SRC_DIR = ttnn/cpp/ttnn -TTNN_SRCS = $(wildcard $(TTNN_SRC_DIR)/*.cpp) $(wildcard $(TTNN_SRC_DIR)/*/*.cpp) $(wildcard $(TTNN_SRC_DIR)/*/*/*.cpp) - -TTNN_PYBIND11_SRCS = \ - ttnn/cpp/pybind11/__init__.cpp - -TTNN_OBJS = $(addprefix $(OBJDIR)/, $(TTNN_SRCS:.cpp=.o)) -TTNN_PYBIND11_OBJS = $(addprefix $(OBJDIR)/, $(TTNN_PYBIND11_SRCS:.cpp=.o)) - -TTNN_DEPS = $(addprefix $(OBJDIR)/, $(TTNN_SRCS:.cpp=.d)) -TTNN_PYBIND11_DEPS = $(addprefix $(OBJDIR)/, $(TTNN_PYBIND11_SRCS:.cpp=.d)) - --include $(TTNN_DEPS) --include $(TTNN_PYBIND11_DEPS) - -TTNN_LIBS_TO_BUILD = $(TTNN_LIB) \ - $(TTNN_PYBIND11_LIB) \ - -ifdef TT_METAL_ENV_IS_DEV -TTNN_LIBS_TO_BUILD += \ - $(TTNN_PYBIND11_LOCAL_SO) -endif - -ttnn: $(TTNN_LIBS_TO_BUILD) - -$(TTNN_LIB): $(TTNN_OBJS) $(TT_DNN_LIB) $(TENSOR_LIB) $(TT_METAL_LIB) $(TT_LIB_LIB) - @mkdir -p $(LIBDIR) - ar rcs -o $@ $(TTNN_OBJS) - -$(TTNN_PYBIND11_LIB): $(TTNN_PYBIND11_OBJS) $(TTNN_LIB) - @mkdir -p $(LIBDIR) - $(CXX) $(TTNN_CFLAGS) $(CXXFLAGS) $(SHARED_LIB_FLAGS) -o $@ $(TTNN_PYBIND11_OBJS) $(TTNN_PYBIND11_LDFLAGS) - -$(TTNN_PYBIND11_LOCAL_SO): $(TTNN_PYBIND11_LIB) - cp -fp $^ $@ - -$(OBJDIR)/ttnn/cpp/ttnn/%.o: ttnn/cpp/ttnn/%.cpp - @mkdir -p $(@D) - $(CXX) $(TTNN_CFLAGS) $(CXXFLAGS) $(STATIC_LIB_FLAGS) $(TTNN_INCLUDES) -MMD -MP -c -o $@ $< - -$(OBJDIR)/ttnn/cpp/pybind11/%.o: ttnn/cpp/pybind11/%.cpp - @mkdir -p $(@D) - $(CXX) $(TTNN_CFLAGS) $(CXXFLAGS) $(STATIC_LIB_FLAGS) $(TTNN_PYBIND11_INCLUDES) -MMD -MP -c -o $@ $< diff --git a/ttnn/ttnn/operations/data_movement.py b/ttnn/ttnn/operations/data_movement.py index b2ea41764bc..2dd2bf9a5f8 100644 --- a/ttnn/ttnn/operations/data_movement.py +++ b/ttnn/ttnn/operations/data_movement.py @@ -370,62 +370,7 @@ def _golden_function(tensor, shape, **_): return tensor.repeat(shape[0], shape[1], shape[2], shape[3]) -def _repeat_validate_input_tensors(operation_name, input_tensor, *args, **kwargs): - ttnn.validate_input_tensor( - operation_name, - input_tensor, - ranks=(2, 3, 4), - dtypes=(ttnn.bfloat16, ttnn.bfloat8_b, ttnn.uint16, ttnn.int32, ttnn.uint32), - layouts=(ttnn.TILE_LAYOUT, ttnn.ROW_MAJOR_LAYOUT), - can_be_on_device=True, - can_be_on_cpu=True, - ) - - -@ttnn.register_operation( - name="ttnn.repeat", - validate_input_tensors=_repeat_validate_input_tensors, - golden_function=_golden_function, -) -def repeat( - input_tensor: ttnn.Tensor, - shape: ttnn.Shape, - memory_config: ttnn.MemoryConfig = ttnn.DRAM_MEMORY_CONFIG, -) -> ttnn.Tensor: - r""" - repeat(input_tensor: ttnn.Tensor, shape : ttnn.Shape) -> ttnn.Tensor - - Returns a new tensor filled with repetition of input :attr:`input_tensor` according to number of times specified in :attr:`shape`. - - Args: - * :attr:`input_tensor`: the input_tensor to apply the repeate operation. - * :attr:`shape`: The number of repetitions for each element. - - Example:: - - >>> tensor = ttnn.repeat(ttnn.from_torch(torch.tensor([[1, 2], [3, 4]]), 2,)), device) - >>> print(tensor) - tensor([[1, 2], - [1, 2], - [3, 4], - [3, 4]]) - - """ - - if not isinstance(shape, ttnn.Shape): - raise RuntimeError("ttnn: Expected shape to be a ttnn.Shape") - - rank = len(input_tensor.shape) - if rank == 4: - output_tensor = ttl.tensor.repeat(input_tensor, shape, output_mem_config=memory_config) - *batch, _, _ = output_tensor.shape - *_, h, w = output_tensor.shape - *_, padded_h, padded_w = output_tensor.shape.with_tile_padding() - - output_tensor = ttnn.reshape(output_tensor, shape=ttnn.Shape(batch + [h, w], batch + [padded_h, padded_w])) - return output_tensor - else: - raise NotImplementedError +repeat = ttnn.register_operation(golden_function=_golden_function)(ttnn._ttnn.operations.data_movement.repeat) def _golden_function(input_tensor: ttnn.Tensor, scale_factor: Tuple[float, float], **_):