Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixup some error messages #4209

Merged
merged 4 commits into from
Dec 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ operation::ProgramWithCallbacks moreh_layernorm_impl(
} else if (core_group_2.core_coord_in_core_ranges(core)) {
num_rows_per_core = num_rows_per_core_group_2;
} else {
TT_ASSERT(false, "Core not in specified core ranges.");
TT_THROW("Core not in specified core ranges.");
}

const std::vector<uint32_t> reader_runtime_args{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ operation::ProgramWithCallbacks moreh_layernorm_backward_gamma_beta_grad_impl(
} else if (core_group_2.core_coord_in_core_ranges(core)) {
num_cols_per_core = num_cols_per_core_group_2;
} else {
TT_ASSERT(false, "Core not in specified core ranges.");
TT_THROW("Core not in specified core ranges.");
}

const std::vector<uint32_t> reader_runtime_args{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ operation::ProgramWithCallbacks moreh_layernorm_backward_input_grad_impl(
} else if (core_group_2.core_coord_in_core_ranges(core)) {
num_rows_per_core = num_rows_per_core_group_2;
} else {
TT_ASSERT(false, "Core not in specified core ranges.");
TT_THROW("Core not in specified core ranges.");
}

const std::vector<uint32_t> reader_runtime_args{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ operation::ProgramWithCallbacks moreh_matmul_multi_core(
} else if (core_group_2.core_coord_in_core_ranges(core)) {
num_output_tiles_per_core = num_output_tiles_per_core_group_2;
} else {
TT_ASSERT(false, "Core not in specified core ranges");
TT_THROW("Core not in specified core ranges");
}

tt_metal::SetRuntimeArgs(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ operation::ProgramWithCallbacks moreh_sum_multi_core(const Tensor &src, const Te
} else if (core_group_2.core_coord_in_core_ranges(core)) {
num_tiles_per_core = num_cols_per_core_group_2;
} else {
TT_ASSERT(false, "Core not in specified core ranges.");
TT_THROW("Core not in specified core ranges.");
}

SetRuntimeArgs(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ operation::ProgramWithCallbacks moreh_softmax_c_large(const Tensor &input, Tenso
} else if (core_group_2.core_coord_in_core_ranges(core)) {
num_tiles_per_core = num_tiles_per_core_group_2;
} else {
TT_ASSERT(false, "Core not in specified core ranges");
TT_THROW("Core not in specified core ranges");
}

vector<uint32_t> reader_args = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ operation::ProgramWithCallbacks moreh_softmax_h_large(const Tensor &input, Tenso
} else if (core_group_2.core_coord_in_core_ranges(core)) {
num_tiles_per_core = num_tiles_per_core_group_2;
} else {
TT_ASSERT(false, "Core not in specified core ranges");
TT_THROW("Core not in specified core ranges");
}

float scaler = 1.0f;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ operation::ProgramWithCallbacks moreh_softmax_h_small(const Tensor &input, Tenso
} else if (core_group_2.core_coord_in_core_ranges(core)) {
num_tiles_per_core = num_tiles_per_core_group_2;
} else {
TT_ASSERT(false, "Core not in specified core ranges");
TT_THROW("Core not in specified core ranges");
}

float scaler = 1.0f;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ operation::ProgramWithCallbacks moreh_softmax_w_large(const Tensor &input, Tenso
} else if (core_group_2.core_coord_in_core_ranges(core)) {
num_tiles_per_core = num_tiles_per_core_group_2;
} else {
TT_ASSERT(false, "Core not in specified core ranges");
TT_THROW("Core not in specified core ranges");
}

float scaler = 1.0f;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ operation::ProgramWithCallbacks moreh_softmax_w_small(const Tensor &input, Tenso
} else if (core_group_2.core_coord_in_core_ranges(core)) {
num_tiles_per_core = num_tiles_per_core_group_2;
} else {
TT_ASSERT(false, "Core not in specified core ranges");
TT_THROW("Core not in specified core ranges");
}

float scaler = 1.0f;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ operation::ProgramWithCallbacks moreh_softmax_backward_c_large(const Tensor &out
} else if (core_group_2.core_coord_in_core_ranges(core)) {
num_tiles_per_core = num_tiles_per_core_group_2;
} else {
TT_ASSERT(false, "Core not in specified core ranges");
TT_THROW("Core not in specified core ranges");
}

vector<uint32_t> reader_args = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ operation::ProgramWithCallbacks moreh_softmax_backward_h_large(const Tensor &out
} else if (core_group_2.core_coord_in_core_ranges(core)) {
num_tiles_per_core = num_tiles_per_core_group_2;
} else {
TT_ASSERT(false, "Core not in specified core ranges");
TT_THROW("Core not in specified core ranges");
}

float scaler = 1.0f;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ operation::ProgramWithCallbacks moreh_softmax_backward_h_small(const Tensor &out
} else if (core_group_2.core_coord_in_core_ranges(core)) {
num_tiles_per_core = num_tiles_per_core_group_2;
} else {
TT_ASSERT(false, "Core not in specified core ranges");
TT_THROW("Core not in specified core ranges");
}

float scaler = 1.0f;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ operation::ProgramWithCallbacks moreh_softmax_backward_w_large(const Tensor &out
} else if (core_group_2.core_coord_in_core_ranges(core)) {
num_tiles_per_core = num_tiles_per_core_group_2;
} else {
TT_ASSERT(false, "Core not in specified core ranges");
TT_THROW("Core not in specified core ranges");
}

float scaler = 1.0f;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ operation::ProgramWithCallbacks moreh_softmax_backward_w_small(const Tensor &out
} else if (core_group_2.core_coord_in_core_ranges(core)) {
num_tiles_per_core = num_tiles_per_core_group_2;
} else {
TT_ASSERT(false, "Core not in specified core ranges");
TT_THROW("Core not in specified core ranges");
}

float scaler = 1.0f;
Expand Down
28 changes: 26 additions & 2 deletions tt_metal/common/assert.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,20 +104,44 @@ void tt_assert_message(std::ostream& os, T const& t, Ts const&... ts) {
tt_assert_message(os, ts...);
}

template <typename... Ts>
void tt_assert_log_message(Ts const&... ts) {
std::string fmt;
for (int i = 0; i < sizeof...(ts); i++) {
fmt += "{} ";
}
log_fatal(fmt.c_str(), ts...);
}

template <typename... Ts>
void tt_assert_log_message(const char *t, Ts const&... ts) {
log_fatal(t, ts...);
}

template <typename... Ts>
void tt_assert_log_message(const std::string& t, Ts const&... ts) {
log_fatal(t.c_str(), ts...);
}

template <typename... Ts>
[[ noreturn ]] void tt_throw(char const* file, int line, const std::string& assert_type, char const* condition_str, Ts const&... messages) {
std::stringstream trace_message_ss = {};
trace_message_ss << assert_type << " @ " << file << ":" << line << ": " << condition_str << std::endl;
if constexpr (sizeof...(messages) > 0) {
trace_message_ss << "info:" << std::endl;
tt_assert_message(trace_message_ss, messages...);
tt_assert_log_message(messages...);
}
trace_message_ss << "backtrace:\n";
trace_message_ss << tt::assert::backtrace_to_string(100, 3, " --- ");
trace_message_ss << std::flush;
Logger::get().flush();
throw std::runtime_error(trace_message_ss.str());
}

template <typename... Ts>
void tt_assert(char const* file, int line, const std::string& assert_type, char const* condition_str, Ts const&... messages) {
::tt::assert::tt_throw(file, line, assert_type, condition_str, messages...);
}

template <typename... Ts>
Expand All @@ -133,14 +157,14 @@ void tt_assert(char const* file, int line, const std::string& assert_type, bool
// https://stackoverflow.com/questions/55933541/else-without-previous-if-error-when-defining-macro-with-arguments/55933720#55933720
#ifdef DEBUG
#ifndef TT_ASSERT
#define TT_ASSERT(condition, ...) do{ if (not (condition)) ::tt::assert::tt_assert(__FILE__, __LINE__, "TT_ASSERT", (condition), #condition, ##__VA_ARGS__); }while(0)
#define TT_ASSERT(condition, ...) do{ if (not (condition)) tt::assert::tt_assert(__FILE__, __LINE__, "TT_ASSERT", (condition), #condition, ##__VA_ARGS__); } while(0)
#endif
#else
#define TT_ASSERT(condition, ...)
#endif

#ifndef TT_THROW
#define TT_THROW(...) ::tt::assert::tt_throw(__FILE__, __LINE__, "TT_THROW", "tt::exception", ##__VA_ARGS__)
#define TT_THROW(...) tt::assert::tt_throw(__FILE__, __LINE__, "TT_THROW", "tt::exception", ##__VA_ARGS__)
#endif

#ifndef TT_FATAL
Expand Down
2 changes: 1 addition & 1 deletion tt_metal/common/bfloat8.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ inline std::vector<float> unpack_bfp8_tiles_into_float_vec(const std::vector<uin
man_shifted = _mm256_and_si256(_mm256_sll_epi32(man_shifted, _mm_set_epi64x(0, 1)), _mm256_set1_epi32(0x7f)); // One more shift to clear 6th bit
man = _mm256_blendv_epi8(man_shifted, man, select_mask); // Choose new mantissa or keep old mantissa based on 0 initial condition.
// Assert if the exponent and corresponding mantissa for a datum are non-zero and the subtraction bias (shift_cnt) for that data is greater than the exponent value
TT_ASSERT(!(_mm256_movemask_ps(_mm256_castsi256_ps(_mm256_cmpgt_epi32(exp_vector, _mm256_setzero_si256()))) & _mm256_movemask_ps(_mm256_castsi256_ps(_mm256_cmpgt_epi32(shift_cnt, exp_vector))) & !_mm256_movemask_ps(_mm256_castsi256_ps(select_mask))) , tt::LogModel, "Device returned incorrect data for Bfp8 formats: The Shift Count for a non-zero exponent is greater than the exponent value.");
TT_ASSERT(!(_mm256_movemask_ps(_mm256_castsi256_ps(_mm256_cmpgt_epi32(exp_vector, _mm256_setzero_si256()))) & _mm256_movemask_ps(_mm256_castsi256_ps(_mm256_cmpgt_epi32(shift_cnt, exp_vector))) & !_mm256_movemask_ps(_mm256_castsi256_ps(select_mask))) , "Device returned incorrect data for Bfp8 formats: The Shift Count for a non-zero exponent is greater than the exponent value.");
exp_vector = _mm256_blendv_epi8(_mm256_sub_epi32(exp_vector, _mm256_add_epi32(rebias_offset, shift_cnt)), _mm256_setzero_si256(), select_mask); // Choose new (rebiased exponent) or keep previous exponent based on mantissa intiial condition

sign = _mm256_sll_epi32(sign, _mm_set_epi64x(0, 31)); // Shift sign
Expand Down
10 changes: 10 additions & 0 deletions tt_metal/common/logger.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,16 @@ static void log_error(char const* fmt, Args&&... args) {
log_error(LogAlways, fmt, std::forward<Args>(args)...);
}

template <typename... Args>
static void log_fatal(LogType type, char const* fmt, Args&&... args) {
Logger::get().log_level_type(Logger::Level::Fatal, type, fmt, std::forward<Args>(args)...);
}

template <typename... Args>
static void log_fatal(char const* fmt, Args&&... args) {
log_fatal(LogAlways, fmt, std::forward<Args>(args)...);
}

#undef LOGGER_TYPES

} // namespace tt
3 changes: 1 addition & 2 deletions tt_metal/common/metal_soc_descriptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -259,8 +259,7 @@ void metal_SocDescriptor::load_dispatch_and_banking_config(uint32_t harvesting_m
uint32_t num_harvested_noc_rows = mask_bitset.count();

if (num_harvested_noc_rows > 2) {
TT_THROW(
tt::LogDevice, "At most two rows can be harvested, but detected {} harvested rows", num_harvested_noc_rows);
TT_THROW("At most two rows can be harvested, but detected {} harvested rows", num_harvested_noc_rows);
}
if (num_harvested_noc_rows == 1 and this->arch == tt::ARCH::GRAYSKULL) {
TT_THROW("One row harvested Grayskull is not supported");
Expand Down
10 changes: 1 addition & 9 deletions tt_metal/impl/buffers/circular_buffer_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ class CircularBufferConfig {
}
if (total_size > buffer.size()) {
TT_THROW(
tt::LogMetal,
"Requested {} B but dynamic circular buffer cannot be larger than allocated L1 buffer of {} B",
total_size,
buffer.size());
Expand All @@ -50,28 +49,24 @@ class CircularBufferConfig {
CircularBufferConfig set_page_size(uint8_t buffer_index, uint32_t page_size) {
if (buffer_index > NUM_CIRCULAR_BUFFERS - 1) {
TT_THROW(
tt::LogMetal,
"Buffer index ({}) exceeds max number of circular buffers per core ({})",
buffer_index,
NUM_CIRCULAR_BUFFERS);
}
if (this->buffer_indices_.find(buffer_index) == this->buffer_indices_.end()) {
TT_THROW(
tt::LogMetal,
"Illegal circular buffer index {}. Page size can only be specified for buffer indices configured "
"during config creation",
buffer_index);
}
if (this->total_size_ % page_size != 0) {
TT_THROW(
tt::LogMetal,
"Total circular buffer size {} B must be divisible by page size {} B",
this->total_size_,
page_size);
}
if (page_size % sizeof(uint32_t) != 0) {
TT_THROW(
tt::LogMetal, "Page size must be divisible by sizeof(uint32_t) because buffers holds uint32_t values");
TT_THROW("Page size must be divisible by sizeof(uint32_t) because buffers holds uint32_t values");
}

this->page_sizes_[buffer_index] = page_size;
Expand All @@ -81,7 +76,6 @@ class CircularBufferConfig {
CircularBufferConfig set_total_size(uint32_t total_size) {
if (dynamic_cb_ and total_size > this->max_size_.value()) {
TT_THROW(
tt::LogMetal,
"Cannot grow circular buffer to {} B. This is larger than associated dynamically allocated L1 buffer "
"of {} B",
total_size,
Expand Down Expand Up @@ -118,7 +112,6 @@ class CircularBufferConfig {
void set_config(const std::map<uint8_t, tt::DataFormat> &data_format_spec) {
if (data_format_spec.size() > NUM_CIRCULAR_BUFFERS) {
TT_THROW(
tt::LogMetal,
"Only {} circular buffer slots are available but data formats are specified for {} indices",
NUM_CIRCULAR_BUFFERS,
data_format_spec.size());
Expand All @@ -127,7 +120,6 @@ class CircularBufferConfig {
for (const auto &[buffer_index, data_format] : data_format_spec) {
if (buffer_index > NUM_CIRCULAR_BUFFERS - 1) {
TT_THROW(
tt::LogMetal,
"Buffer index ({}) exceeds max number of circular buffers per core ({})",
buffer_index,
NUM_CIRCULAR_BUFFERS);
Expand Down
4 changes: 2 additions & 2 deletions tt_metal/llrt/llrt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -336,10 +336,10 @@ void wait_until_cores_done(chip_id_t device_id,
std::unordered_set<CoreCoord>& not_done_phys_cores) {

// poll the cores until the set of not done cores is empty
int loop_count = 0;
int loop_count = 1;
while (!not_done_phys_cores.empty()) {
// Print not-done cores
if (loop_count % 20 == 0) {
if (loop_count % 1000 == 0) {
string not_done_cores_str = "Not done phys cores: ";
for (const auto &core : not_done_phys_cores) {
not_done_cores_str += (core.str() + " ");
Expand Down
2 changes: 1 addition & 1 deletion tt_metal/llrt/tt_cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ tt_device &Cluster::get_driver(chip_id_t device_id) const {

const metal_SocDescriptor &Cluster::get_soc_desc(chip_id_t chip) const {
if (this->sdesc_per_chip_.find(chip) == this->sdesc_per_chip_.end()) {
TT_FATAL("Cannot access soc descriptor for {} before device driver is initialized! Call initialize_device_driver({}) first", chip, chip);
TT_THROW("Cannot access soc descriptor for {} before device driver is initialized! Call initialize_device_driver({}) first", chip, chip);
}
return this->sdesc_per_chip_.at(chip);
}
Expand Down
Loading