From a7a6e96171f4bc09dc04307616c4c5695f641e6b Mon Sep 17 00:00:00 2001 From: Mahesh Madhav Date: Fri, 6 Sep 2024 20:20:56 +0000 Subject: [PATCH] Replace expensive computations Replace hot FDIVs with FMULs through precomputing inverse values outside the loops. FDIVs are always more expensive than FMULs, although the amount varies per CPU microarchitecture. --- src/simplex/HEkk.cpp | 7 ++++--- src/simplex/HEkkDual.cpp | 14 ++++++++------ src/simplex/HEkkDual.h | 1 + src/util/HFactor.cpp | 11 ++++++----- src/util/HFactor.h | 1 + src/util/HighsSparseMatrix.cpp | 3 ++- 6 files changed, 22 insertions(+), 15 deletions(-) diff --git a/src/simplex/HEkk.cpp b/src/simplex/HEkk.cpp index b94d0959f6..0aa3788341 100644 --- a/src/simplex/HEkk.cpp +++ b/src/simplex/HEkk.cpp @@ -2124,6 +2124,7 @@ void HEkk::updateDualSteepestEdgeWeights( const double col_aq_scale = simplex_nla_.variableScaleFactor(variable_in); const double col_ap_scale = simplex_nla_.basicColScaleFactor(row_out); + const double inv_col_ap_scale = 1.0 / col_ap_scale; const bool DSE_check = false; HVector alt_dual_steepest_edge_column; @@ -2163,7 +2164,7 @@ void HEkk::updateDualSteepestEdgeWeights( HighsInt DSE_array_count = 0; for (HighsInt iRow = 0; iRow < num_row; iRow++) { const double dual_steepest_edge_array_value = - dual_steepest_edge_array[iRow] / col_ap_scale; + dual_steepest_edge_array[iRow] * inv_col_ap_scale; if (dual_steepest_edge_array_value) DSE_array_count++; if (std::abs(dual_steepest_edge_array_value) > dse_column_value_tolerance || @@ -2192,7 +2193,7 @@ void HEkk::updateDualSteepestEdgeWeights( (int)alt_dual_steepest_edge_column.count, (int)DSE_array_count); for (HighsInt iRow = 0; iRow < num_row; iRow++) { const double dual_steepest_edge_array_value = - dual_steepest_edge_array[iRow] / col_ap_scale; + dual_steepest_edge_array[iRow] * inv_col_ap_scale; if (alt_dual_steepest_edge_column.array[iRow] != 0 && dual_steepest_edge_array_value != 0) { const double dse_column_error = @@ -2234,7 +2235,7 @@ void HEkk::updateDualSteepestEdgeWeights( double basic_col_scale = simplex_nla_.basicColScaleFactor(iRow); aa_iRow /= basic_col_scale; aa_iRow *= col_aq_scale; - dual_steepest_edge_array_value /= col_ap_scale; + dual_steepest_edge_array_value *= inv_col_ap_scale; } if (DSE_check) { const double pivotal_column_error = diff --git a/src/simplex/HEkkDual.cpp b/src/simplex/HEkkDual.cpp index 6a6ca9d13e..5e2e49755f 100644 --- a/src/simplex/HEkkDual.cpp +++ b/src/simplex/HEkkDual.cpp @@ -402,6 +402,7 @@ void HEkkDual::initialiseInstance() { solver_num_col = ekk_instance_.lp_.num_col_; solver_num_row = ekk_instance_.lp_.num_row_; solver_num_tot = solver_num_col + solver_num_row; + inv_solver_num_row = 1.0 / solver_num_row; a_matrix = &ekk_instance_.lp_.a_matrix_; simplex_nla = &ekk_instance_.simplex_nla_; @@ -1276,7 +1277,7 @@ void HEkkDual::iterateTasks() { chooseRow(); // Disable slice when too sparse - if (1.0 * row_ep.count / solver_num_row < 0.01) slice_PRICE = 0; + if (1.0 * row_ep.count * inv_solver_num_row < 0.01) slice_PRICE = 0; analysis->simplexTimerStart(Group1Clock); // #pragma omp parallel @@ -1498,7 +1499,7 @@ void HEkkDual::chooseRow() { move_out = delta_primal < 0 ? -1 : 1; // Update the record of average row_ep (pi_p) density. This ignores // any BTRANs done for skipped candidates - const double local_row_ep_density = (double)row_ep.count / solver_num_row; + const double local_row_ep_density = (double)row_ep.count * inv_solver_num_row; ekk_instance_.updateOperationResultDensity( local_row_ep_density, ekk_instance_.info_.row_ep_density); } @@ -1798,7 +1799,7 @@ void HEkkDual::chooseColumnSlice(HVector* row_ep) { analysis->simplexTimerStop(Chuzc0Clock); // const HighsInt solver_num_row = ekk_instance_.lp_.num_row_; - const double local_density = 1.0 * row_ep->count / solver_num_row; + const double local_density = 1.0 * row_ep->count * inv_solver_num_row; bool use_col_price; bool use_row_price_w_switch; HighsSimplexInfo& info = ekk_instance_.info_; @@ -1961,7 +1962,7 @@ void HEkkDual::updateFtran() { analysis->pointer_serial_factor_clocks); if (analysis->analyse_simplex_summary_data) analysis->operationRecordAfter(kSimplexNlaFtran, col_aq); - const double local_col_aq_density = (double)col_aq.count / solver_num_row; + const double local_col_aq_density = (double)col_aq.count * inv_solver_num_row; ekk_instance_.updateOperationResultDensity( local_col_aq_density, ekk_instance_.info_.col_aq_density); // Save the pivot value computed column-wise - used for numerical checking @@ -2002,7 +2003,8 @@ void HEkkDual::updateFtranBFRT() { if (time_updateFtranBFRT) { analysis->simplexTimerStop(FtranBfrtClock); } - const double local_col_BFRT_density = (double)col_BFRT.count / solver_num_row; + const double local_col_BFRT_density = + (double)col_BFRT.count * inv_solver_num_row; ekk_instance_.updateOperationResultDensity( local_col_BFRT_density, ekk_instance_.info_.col_BFRT_density); } @@ -2043,7 +2045,7 @@ void HEkkDual::updateFtranDSE(HVector* DSE_Vector) { analysis->operationRecordAfter(kSimplexNlaFtranDse, *DSE_Vector); analysis->simplexTimerStop(FtranDseClock); const double local_row_DSE_density = - (double)DSE_Vector->count / solver_num_row; + (double)DSE_Vector->count * inv_solver_num_row; ekk_instance_.updateOperationResultDensity( local_row_DSE_density, ekk_instance_.info_.row_DSE_density); } diff --git a/src/simplex/HEkkDual.h b/src/simplex/HEkkDual.h index 573abc921d..d80826c637 100644 --- a/src/simplex/HEkkDual.h +++ b/src/simplex/HEkkDual.h @@ -388,6 +388,7 @@ class HEkkDual { HighsInt solver_num_row; HighsInt solver_num_col; HighsInt solver_num_tot; + double inv_solver_num_row; // 1.0 / solver_num_row const HighsSparseMatrix* a_matrix; const HSimplexNla* simplex_nla; diff --git a/src/util/HFactor.cpp b/src/util/HFactor.cpp index c7436fe55a..baf1bd1370 100644 --- a/src/util/HFactor.cpp +++ b/src/util/HFactor.cpp @@ -211,6 +211,7 @@ void HFactor::setupGeneral( num_row = num_row_; num_col = num_col_; num_basic = num_basic_; + inv_num_row = 1.0 / num_row; this->a_matrix_valid = true; a_start = a_start_; a_index = a_index_; @@ -1543,7 +1544,7 @@ void HFactor::ftranL(HVector& rhs, const double expected_density, } // Determine style of solve - double current_density = 1.0 * rhs.count / num_row; + double current_density = 1.0 * rhs.count * inv_num_row; const bool sparse_solve = rhs.count < 0 || current_density > kHyperCancel || expected_density > kHyperFtranL; if (sparse_solve) { @@ -1591,7 +1592,7 @@ void HFactor::btranL(HVector& rhs, const double expected_density, factor_timer.start(FactorBtranLower, factor_timer_clock_pointer); // Determine style of solve - const double current_density = 1.0 * rhs.count / num_row; + const double current_density = 1.0 * rhs.count * inv_num_row; const bool sparse_solve = rhs.count < 0 || current_density > kHyperCancel || expected_density > kHyperBtranL; if (sparse_solve) { @@ -1666,7 +1667,7 @@ void HFactor::ftranU(HVector& rhs, const double expected_density, // The regular part // // Determine style of solve - const double current_density = 1.0 * rhs.count / num_row; + const double current_density = 1.0 * rhs.count * inv_num_row; const bool sparse_solve = rhs.count < 0 || current_density > kHyperCancel || expected_density > kHyperFtranU; if (sparse_solve) { @@ -1720,7 +1721,7 @@ void HFactor::ftranU(HVector& rhs, const double expected_density, rhs_synthetic_tick * 15 + (u_pivot_count - num_row) * 10; factor_timer.stop(use_clock, factor_timer_clock_pointer); if (report_ftran_upper_sparse) { - const double final_density = 1.0 * rhs.count / num_row; + const double final_density = 1.0 * rhs.count * inv_num_row; printf( "FactorFtranUpperSps: expected_density = %10.4g; current_density = " "%10.4g; final_density = %10.4g\n", @@ -1773,7 +1774,7 @@ void HFactor::btranU(HVector& rhs, const double expected_density, // The regular part // // Determine style of solve - const double current_density = 1.0 * rhs.count / num_row; + const double current_density = 1.0 * rhs.count * inv_num_row; const bool sparse_solve = rhs.count < 0 || current_density > kHyperCancel || expected_density > kHyperBtranU; if (sparse_solve) { diff --git a/src/util/HFactor.h b/src/util/HFactor.h index d06a79af67..2f75260d8a 100644 --- a/src/util/HFactor.h +++ b/src/util/HFactor.h @@ -347,6 +347,7 @@ class HFactor { HighsInt num_row; HighsInt num_col; HighsInt num_basic; + double inv_num_row; // 1.0/num_row private: bool a_matrix_valid; diff --git a/src/util/HighsSparseMatrix.cpp b/src/util/HighsSparseMatrix.cpp index ce8e37695b..16648e1506 100644 --- a/src/util/HighsSparseMatrix.cpp +++ b/src/util/HighsSparseMatrix.cpp @@ -1383,6 +1383,7 @@ void HighsSparseMatrix::priceByRowWithSwitch( assert(HighsInt(result.size) == this->num_col_); assert(HighsInt(result.index.size()) == this->num_col_); if (expected_density <= kHyperPriceDensity) { + double inv_num_col = 1.0 / this->num_col_; for (HighsInt ix = next_index; ix < column.count; ix++) { HighsInt iRow = column.index[ix]; // Determine whether p_end_ or the next start_ ends the loop @@ -1394,7 +1395,7 @@ void HighsSparseMatrix::priceByRowWithSwitch( } // Possibly switch to standard row-wise price HighsInt row_num_nz = to_iEl - this->start_[iRow]; - double local_density = (1.0 * result.count) / this->num_col_; + double local_density = (1.0 * result.count) * inv_num_col; bool switch_to_dense = result.count + row_num_nz >= this->num_col_ || local_density > switch_density; if (switch_to_dense) break;