Merge branch 'main' into BERT_large_sharded

tenstorrent · Dec 4, 2023 · 65447a0 · 65447a0
2 parents 2121a9a + 04a1f3a
commit 65447a0
Show file tree

Hide file tree

Showing 15 changed files with 213 additions and 160 deletions.
diff --git a/.github/actions/install-metal-deps/dependencies.json b/.github/actions/install-metal-deps/dependencies.json
@@ -2,7 +2,7 @@
   "ubuntu-20.04": [
     "software-properties-common=0.99.9.12",
     "build-essential=12.8ubuntu1.1",
-    "python3.8-venv=3.8.10-0ubuntu1~20.04.8",
+    "python3.8-venv=3.8.10-0ubuntu1~20.04.9",
     "libgoogle-glog-dev=0.4.0-1build1",
     "libyaml-cpp-dev=0.6.2-4ubuntu1",
     "libboost-all-dev=1.71.0.0ubuntu2",

diff --git a/.github/workflows/all-static-checks.yaml b/.github/workflows/all-static-checks.yaml
@@ -5,6 +5,9 @@ on:
   push:
     branches: ["main"]
   workflow_call:
+  pull_request:
+    branches:
+      - "main"
 
 jobs:
   check-syseng-assets-dev:

diff --git a/.gitignore b/.gitignore
@@ -101,7 +101,7 @@ tests/end_to_end_tests/env
 
 # Exclude files that should not be here
 tt_metal/device/
-device/
+/device/
 src/firmware/riscv/targets/erisc/src/api/
 src/firmware/riscv/targets/erisc/src/eth_routing.cpp
 src/firmware/riscv/targets/erisc/src/eth_routing_v2.cpp

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -48,8 +48,7 @@ All contributions require:
     feature support request or bug report under Issues to get help with finding
     an appropriate project to get a maintainer's attention.
 - a pull request (PR).
-  - Your PR must be approved by appropriate reviewers. We do not accept PRs
-    from forked repositories.
+  - Your PR must be approved by appropriate reviewers.
 
 Furthermore, all PRs must follow the [contribution
 standards](#contribution-standards).
@@ -267,13 +266,16 @@ If you are using a machine with bare metal machine specs, please use
 - A PR must be opened for any code change with the following criteria:
   - Be approved, by a maintaining team member and any codeowners whose modules
     are relevant for the PR.
-  - Pass post-commit tests.
-  - Pass model performance tests.
-  - Pass profiler regression post-commit tests.
-  - Pass Python packaging post-commit tests.
+  - Pass any required post-commit pipelines rebased on the latest main. These
+    pipelines will generally, but not always, be defined in
+    `.github/workflows/all-post-commit-workflows.yaml`.
   - Pass any acceptance criteria mandated in the original issue.
   - Pass any testing criteria mandated by codeowners whose modules are relevant
     for the PR.
+- Avoid opening/re-opening/push new commits to PRs before you're ready for
+  review and start running pipelines. This is because we don't want to clog
+  our pipelines with unnecessary runs that developers may know will fail
+  anyways.
 
 ### New feature and design specifications
 
@@ -288,7 +290,6 @@ If you are using a machine with bare metal machine specs, please use
 - Any release must be externally-available artifacts generated by a workflow
   on a protected branch.
 
-
 ### Logging, assertions, and exceptions
 
 - Use Loguru for Python logging.

diff --git a/README.md b/README.md
@@ -77,7 +77,7 @@ First, perform an update and install the dependencies:
 
 ```
 sudo apt update
-sudo apt install software-properties-common=0.99.9.12 build-essential=12.8ubuntu1.1 python3.8-venv=3.8.10-0ubuntu1~20.04.8 libgoogle-glog-dev=0.4.0-1build1 libyaml-cpp-dev=0.6.2-4ubuntu1 libboost-all-dev=1.71.0.0ubuntu2 libsndfile1=1.0.28-7ubuntu0.2 libhwloc-dev
+sudo apt install software-properties-common=0.99.9.12 build-essential=12.8ubuntu1.1 python3.8-venv=3.8.10-0ubuntu1~20.04.9 libgoogle-glog-dev=0.4.0-1build1 libyaml-cpp-dev=0.6.2-4ubuntu1 libboost-all-dev=1.71.0.0ubuntu2 libsndfile1=1.0.28-7ubuntu0.2 libhwloc-dev
 ```
 
 2. Now continue to following sections to [install](#installing-accelerator-level-dependencies) accelerator-level dependencies and then the [required](#installing-system-level-dependencies-after-accelerator-level-dependencies) system-level dependencies that require the driver.

diff --git a/infra/machine_setup/scripts/download_public_machine_setup_assets.sh b/infra/machine_setup/scripts/download_public_machine_setup_assets.sh
@@ -34,7 +34,7 @@ GS_TT_SMI_FILENAME=tt-smi_2023-06-16-0283a02404487eea
 WH_TT_SMI_FILENAME=tt-smi-8.6.0.0_2023-08-22-492ad2b9ef82a243
 GS_TT_FLASH_FILENAME=tt-flash_2023-06-28-91e1cc1ef8caea8f
 WH_TT_FLASH_FILENAME=tt-flash_7.D.0.0_2023-08-08-7ab3bd015206a6ff
-GS_TT_DRIVER_FILENAME=install_ttkmd_1.23.bash
+GS_TT_DRIVER_FILENAME=install_ttkmd_1.26.bash
 
 PYBUDA_GS_RELEASE_ID=$(curl -L -H "Authorization: Bearer $GITHUB_TOKEN" \
 	-H "X-GitHub-Api-Version: 2022-11-28" \

diff --git a/tests/tt_eager/python_api_testing/sweep_tests/pytests/test_sweep_conv.py b/tests/tt_eager/python_api_testing/sweep_tests/pytests/test_sweep_conv.py
@@ -40,8 +40,6 @@ def run_conv_as_large_matmul(conv_op_test_params, pytorch_inputs_and_golden, dev
     pad_h = ctp.pad_h
     pad_w = ctp.pad_w
 
-    # torch.manual_seed(0)
-
     A_pyt = pytorch_inputs_and_golden[0]
     B_pyt = pytorch_inputs_and_golden[1]
 
@@ -102,7 +100,9 @@ def run_conv_as_large_matmul(conv_op_test_params, pytorch_inputs_and_golden, dev
 
     return passing_pcc
 
+
 def test_sweep_conv_tt(device):
+    torch.manual_seed(27182)
     test_bench = generate_conv_tb()
     pytorch_conv_golden_tb = generate_conv_tb_with_pytorch_golden(test_bench)
     passing = True
@@ -130,9 +130,7 @@ def test_sweep_conv_tt(device):
             assert conv_op_test_params.test_level == TestLevel.OP_FULL_COMPUTE
             full_op_compute_tests += 1
         try:
-            passing_ = run_conv_as_large_matmul(
-                conv_op_test_params, pytorch_inputs_and_golden, device
-            )
+            passing_ = run_conv_as_large_matmul(conv_op_test_params, pytorch_inputs_and_golden, device)
             if passing_:
                 passing_tests.append(conv_op_test_params)
             else:

diff --git a/tests/tt_eager/python_api_testing/unit_testing/test_max_pool.py b/tests/tt_eager/python_api_testing/unit_testing/test_max_pool.py
@@ -16,12 +16,12 @@
 from models.utility_functions import comp_pcc
 from models.utility_functions import skip_for_wormhole_b0
 
+from functools import reduce
+import operator
+
 
 def volume(shape):
-    vol = 1.0
-    for d in shape:
-        vol *= d
-    return vol
+    return reduce(operator.mul, shape, 1)
 
 
 ## max-pool params:

diff --git a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/common/test_utils.hpp b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/common/test_utils.hpp
@@ -80,8 +80,9 @@ inline bool FilesMatchesString(string file_name, const string& expected) {
 
     // Go through line-by-line
     string line_a, line_b;
-    int line_num = 1;
+    int line_num = 0;
     while (getline(file, line_a) && getline(expect_stream, line_b)) {
+        line_num++;
         if (line_a != line_b) {
             tt::log_info(
                 tt::LogTest,
@@ -93,23 +94,24 @@ inline bool FilesMatchesString(string file_name, const string& expected) {
             );
             return false;
         }
-        line_num++;
     }
 
     // Make sure that there's no lines left over in either stream
     if (getline(file, line_a)) {
         tt::log_info(
             tt::LogTest,
-            "Test Error: file {} has more lines than expected.",
-            file_name
+            "Test Error: file {} has more lines than expected (>{}).",
+            file_name,
+            line_num
         );
         return false;
     }
     if (getline(expect_stream, line_b)) {
         tt::log_info(
             tt::LogTest,
-            "Test Error: file {} has less lines than expected.",
-            file_name
+            "Test Error: file {} has less lines than expected ({}).",
+            file_name,
+            line_num
         );
         return false;
     }

diff --git a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/dprint/test_raise_wait.cpp b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/dprint/test_raise_wait.cpp
@@ -217,9 +217,6 @@ TestConstCharStrNC{4,4}
 TestStrBR{4,4}
 +++++++++++++++)";
 TEST_F(CommandQueueWithDPrintFixture, TestPrintRaiseWait) {
-    // Disable for now, see https://github.com/tenstorrent-metal/tt-metal/issues/4096
-    GTEST_SKIP();
-
     // Device already set up by gtest fixture.
     Device *device = this->device_;
 

diff --git a/tt_metal/detail/tt_metal.hpp b/tt_metal/detail/tt_metal.hpp
@@ -330,7 +330,7 @@ namespace tt::tt_metal{
             // Create valid PCIe address ranges
             // This implementation assumes contiguous ranges and aggregates the ranges into one bounds check
             // TODO: consider checking multiple ranges to detect straddling transactions
-            uint64_t pcie_chan_base_addr = tt::Cluster::instance().get_pcie_base_addr_from_device();
+            uint64_t pcie_chan_base_addr = tt::Cluster::instance().get_pcie_base_addr_from_device(device->id());
             uint64_t pcie_chan_end_addr = pcie_chan_base_addr;
             for (int pcie_chan = 0; pcie_chan < tt::Cluster::instance().get_num_host_channels(device->id()); pcie_chan++) {
                 pcie_chan_end_addr += tt::Cluster::instance().get_host_channel_size(device->id(), pcie_chan);

diff --git a/tt_metal/impl/debug/dprint_server.cpp b/tt_metal/impl/debug/dprint_server.cpp
@@ -109,13 +109,14 @@ struct DebugPrintServerContext {
 
     void SetMute(bool mute_print_server) { mute_print_server_ = mute_print_server; }
 
-    void WaitForNoNewDataProcessed() {
-        // Simply poll the flag every few ms to check whether new data is still being processed.
+    void WaitForPrintsFinished() {
+        // Simply poll the flag every few ms to check whether new data is still being processed,
+        // or whether any cores are waiting for a signal to be raised.
         // TODO(dma): once we have access to the device is there a way we can poll the device to
-        // help here?
+        // check whether more print data is coming?
         do {
             std::this_thread::sleep_for(std::chrono::milliseconds(5));
-        } while (new_data_processed_);
+        } while (hart_waiting_on_signal_.size() > 0 || new_data_processed_);
     }
 
 private:
@@ -377,8 +378,12 @@ void DebugPrintServerContext::thread_poll(
     // Main print loop, go through all chips/cores/harts on the device and poll for any print data
     // written.
     while (true) {
-        if (stop_print_server_)
-            break;
+        if (stop_print_server_) {
+            // If the stop signal was received, exit the print server thread, but wait for any
+            // existing prints to be wrapped up first.
+            if (hart_waiting_on_signal_.size() == 0 && !new_data_processed_)
+                break;
+        }
 
         // Flag for whether any new print data was found in this round of polling.
         bool new_print_data = false;
@@ -452,7 +457,7 @@ void tt_await_debug_print_server() {
         // Call the wait function for the print server, with a timeout
         auto future = std::async(
             std::launch::async,
-            &DebugPrintServerContext::WaitForNoNewDataProcessed,
+            &DebugPrintServerContext::WaitForPrintsFinished,
             DebugPrintServerContext::inst
         );
         if (future.wait_for(std::chrono::seconds(1)) == std::future_status::timeout) {

diff --git a/tt_metal/impl/dispatch/command_queue_interface.hpp b/tt_metal/impl/dispatch/command_queue_interface.hpp
@@ -46,7 +46,7 @@ class SystemMemoryWriter {
    public:
     SystemMemoryCQWriteInterface cq_write_interface;
     SystemMemoryWriter(Device* device) :
-        m_dma_buf_size(tt::Cluster::instance().get_m_dma_buf_size()),
+        m_dma_buf_size(tt::Cluster::instance().get_m_dma_buf_size(device->id())),
         hugepage_start((char*) tt::Cluster::instance().host_dma_address(0, device->id(), 0)),
         fast_write_callable(
             tt::Cluster::instance().get_fast_pcie_static_tlb_write_callable(device->id())) {