Skip to content

Commit

Permalink
Replace expensive computations
Browse files Browse the repository at this point in the history
Replace hot FDIVs with FMULs through precomputing inverse values
outside the loops. FDIVs are always more expensive than FMULs,
although the amount varies per CPU microarchitecture.
  • Loading branch information
heshpdx committed Sep 6, 2024
1 parent 5ce7a27 commit a7a6e96
Show file tree
Hide file tree
Showing 6 changed files with 22 additions and 15 deletions.
7 changes: 4 additions & 3 deletions src/simplex/HEkk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2124,6 +2124,7 @@ void HEkk::updateDualSteepestEdgeWeights(

const double col_aq_scale = simplex_nla_.variableScaleFactor(variable_in);
const double col_ap_scale = simplex_nla_.basicColScaleFactor(row_out);
const double inv_col_ap_scale = 1.0 / col_ap_scale;

const bool DSE_check = false;
HVector alt_dual_steepest_edge_column;
Expand Down Expand Up @@ -2163,7 +2164,7 @@ void HEkk::updateDualSteepestEdgeWeights(
HighsInt DSE_array_count = 0;
for (HighsInt iRow = 0; iRow < num_row; iRow++) {
const double dual_steepest_edge_array_value =
dual_steepest_edge_array[iRow] / col_ap_scale;
dual_steepest_edge_array[iRow] * inv_col_ap_scale;
if (dual_steepest_edge_array_value) DSE_array_count++;
if (std::abs(dual_steepest_edge_array_value) >
dse_column_value_tolerance ||
Expand Down Expand Up @@ -2192,7 +2193,7 @@ void HEkk::updateDualSteepestEdgeWeights(
(int)alt_dual_steepest_edge_column.count, (int)DSE_array_count);
for (HighsInt iRow = 0; iRow < num_row; iRow++) {
const double dual_steepest_edge_array_value =
dual_steepest_edge_array[iRow] / col_ap_scale;
dual_steepest_edge_array[iRow] * inv_col_ap_scale;
if (alt_dual_steepest_edge_column.array[iRow] != 0 &&
dual_steepest_edge_array_value != 0) {
const double dse_column_error =
Expand Down Expand Up @@ -2234,7 +2235,7 @@ void HEkk::updateDualSteepestEdgeWeights(
double basic_col_scale = simplex_nla_.basicColScaleFactor(iRow);
aa_iRow /= basic_col_scale;
aa_iRow *= col_aq_scale;
dual_steepest_edge_array_value /= col_ap_scale;
dual_steepest_edge_array_value *= inv_col_ap_scale;
}
if (DSE_check) {
const double pivotal_column_error =
Expand Down
14 changes: 8 additions & 6 deletions src/simplex/HEkkDual.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,7 @@ void HEkkDual::initialiseInstance() {
solver_num_col = ekk_instance_.lp_.num_col_;
solver_num_row = ekk_instance_.lp_.num_row_;
solver_num_tot = solver_num_col + solver_num_row;
inv_solver_num_row = 1.0 / solver_num_row;

a_matrix = &ekk_instance_.lp_.a_matrix_;
simplex_nla = &ekk_instance_.simplex_nla_;
Expand Down Expand Up @@ -1276,7 +1277,7 @@ void HEkkDual::iterateTasks() {
chooseRow();

// Disable slice when too sparse
if (1.0 * row_ep.count / solver_num_row < 0.01) slice_PRICE = 0;
if (1.0 * row_ep.count * inv_solver_num_row < 0.01) slice_PRICE = 0;

analysis->simplexTimerStart(Group1Clock);
// #pragma omp parallel
Expand Down Expand Up @@ -1498,7 +1499,7 @@ void HEkkDual::chooseRow() {
move_out = delta_primal < 0 ? -1 : 1;
// Update the record of average row_ep (pi_p) density. This ignores
// any BTRANs done for skipped candidates
const double local_row_ep_density = (double)row_ep.count / solver_num_row;
const double local_row_ep_density = (double)row_ep.count * inv_solver_num_row;
ekk_instance_.updateOperationResultDensity(
local_row_ep_density, ekk_instance_.info_.row_ep_density);
}
Expand Down Expand Up @@ -1798,7 +1799,7 @@ void HEkkDual::chooseColumnSlice(HVector* row_ep) {
analysis->simplexTimerStop(Chuzc0Clock);

// const HighsInt solver_num_row = ekk_instance_.lp_.num_row_;
const double local_density = 1.0 * row_ep->count / solver_num_row;
const double local_density = 1.0 * row_ep->count * inv_solver_num_row;
bool use_col_price;
bool use_row_price_w_switch;
HighsSimplexInfo& info = ekk_instance_.info_;
Expand Down Expand Up @@ -1961,7 +1962,7 @@ void HEkkDual::updateFtran() {
analysis->pointer_serial_factor_clocks);
if (analysis->analyse_simplex_summary_data)
analysis->operationRecordAfter(kSimplexNlaFtran, col_aq);
const double local_col_aq_density = (double)col_aq.count / solver_num_row;
const double local_col_aq_density = (double)col_aq.count * inv_solver_num_row;
ekk_instance_.updateOperationResultDensity(
local_col_aq_density, ekk_instance_.info_.col_aq_density);
// Save the pivot value computed column-wise - used for numerical checking
Expand Down Expand Up @@ -2002,7 +2003,8 @@ void HEkkDual::updateFtranBFRT() {
if (time_updateFtranBFRT) {
analysis->simplexTimerStop(FtranBfrtClock);
}
const double local_col_BFRT_density = (double)col_BFRT.count / solver_num_row;
const double local_col_BFRT_density =
(double)col_BFRT.count * inv_solver_num_row;
ekk_instance_.updateOperationResultDensity(
local_col_BFRT_density, ekk_instance_.info_.col_BFRT_density);
}
Expand Down Expand Up @@ -2043,7 +2045,7 @@ void HEkkDual::updateFtranDSE(HVector* DSE_Vector) {
analysis->operationRecordAfter(kSimplexNlaFtranDse, *DSE_Vector);
analysis->simplexTimerStop(FtranDseClock);
const double local_row_DSE_density =
(double)DSE_Vector->count / solver_num_row;
(double)DSE_Vector->count * inv_solver_num_row;
ekk_instance_.updateOperationResultDensity(
local_row_DSE_density, ekk_instance_.info_.row_DSE_density);
}
Expand Down
1 change: 1 addition & 0 deletions src/simplex/HEkkDual.h
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,7 @@ class HEkkDual {
HighsInt solver_num_row;
HighsInt solver_num_col;
HighsInt solver_num_tot;
double inv_solver_num_row; // 1.0 / solver_num_row

const HighsSparseMatrix* a_matrix;
const HSimplexNla* simplex_nla;
Expand Down
11 changes: 6 additions & 5 deletions src/util/HFactor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ void HFactor::setupGeneral(
num_row = num_row_;
num_col = num_col_;
num_basic = num_basic_;
inv_num_row = 1.0 / num_row;
this->a_matrix_valid = true;
a_start = a_start_;
a_index = a_index_;
Expand Down Expand Up @@ -1543,7 +1544,7 @@ void HFactor::ftranL(HVector& rhs, const double expected_density,
}

// Determine style of solve
double current_density = 1.0 * rhs.count / num_row;
double current_density = 1.0 * rhs.count * inv_num_row;
const bool sparse_solve = rhs.count < 0 || current_density > kHyperCancel ||
expected_density > kHyperFtranL;
if (sparse_solve) {
Expand Down Expand Up @@ -1591,7 +1592,7 @@ void HFactor::btranL(HVector& rhs, const double expected_density,
factor_timer.start(FactorBtranLower, factor_timer_clock_pointer);

// Determine style of solve
const double current_density = 1.0 * rhs.count / num_row;
const double current_density = 1.0 * rhs.count * inv_num_row;
const bool sparse_solve = rhs.count < 0 || current_density > kHyperCancel ||
expected_density > kHyperBtranL;
if (sparse_solve) {
Expand Down Expand Up @@ -1666,7 +1667,7 @@ void HFactor::ftranU(HVector& rhs, const double expected_density,
// The regular part
//
// Determine style of solve
const double current_density = 1.0 * rhs.count / num_row;
const double current_density = 1.0 * rhs.count * inv_num_row;
const bool sparse_solve = rhs.count < 0 || current_density > kHyperCancel ||
expected_density > kHyperFtranU;
if (sparse_solve) {
Expand Down Expand Up @@ -1720,7 +1721,7 @@ void HFactor::ftranU(HVector& rhs, const double expected_density,
rhs_synthetic_tick * 15 + (u_pivot_count - num_row) * 10;
factor_timer.stop(use_clock, factor_timer_clock_pointer);
if (report_ftran_upper_sparse) {
const double final_density = 1.0 * rhs.count / num_row;
const double final_density = 1.0 * rhs.count * inv_num_row;
printf(
"FactorFtranUpperSps: expected_density = %10.4g; current_density = "
"%10.4g; final_density = %10.4g\n",
Expand Down Expand Up @@ -1773,7 +1774,7 @@ void HFactor::btranU(HVector& rhs, const double expected_density,
// The regular part
//
// Determine style of solve
const double current_density = 1.0 * rhs.count / num_row;
const double current_density = 1.0 * rhs.count * inv_num_row;
const bool sparse_solve = rhs.count < 0 || current_density > kHyperCancel ||
expected_density > kHyperBtranU;
if (sparse_solve) {
Expand Down
1 change: 1 addition & 0 deletions src/util/HFactor.h
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,7 @@ class HFactor {
HighsInt num_row;
HighsInt num_col;
HighsInt num_basic;
double inv_num_row; // 1.0/num_row

private:
bool a_matrix_valid;
Expand Down
3 changes: 2 additions & 1 deletion src/util/HighsSparseMatrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1383,6 +1383,7 @@ void HighsSparseMatrix::priceByRowWithSwitch(
assert(HighsInt(result.size) == this->num_col_);
assert(HighsInt(result.index.size()) == this->num_col_);
if (expected_density <= kHyperPriceDensity) {
double inv_num_col = 1.0 / this->num_col_;
for (HighsInt ix = next_index; ix < column.count; ix++) {
HighsInt iRow = column.index[ix];
// Determine whether p_end_ or the next start_ ends the loop
Expand All @@ -1394,7 +1395,7 @@ void HighsSparseMatrix::priceByRowWithSwitch(
}
// Possibly switch to standard row-wise price
HighsInt row_num_nz = to_iEl - this->start_[iRow];
double local_density = (1.0 * result.count) / this->num_col_;
double local_density = (1.0 * result.count) * inv_num_col;
bool switch_to_dense = result.count + row_num_nz >= this->num_col_ ||
local_density > switch_density;
if (switch_to_dense) break;
Expand Down

0 comments on commit a7a6e96

Please sign in to comment.