Skip to content

Commit

Permalink
Improve performance of add_row_number (#8076)
Browse files Browse the repository at this point in the history
Fixes #8055
  • Loading branch information
radeusgd authored Oct 17, 2023
1 parent b51b986 commit e9fa127
Show file tree
Hide file tree
Showing 11 changed files with 322 additions and 90 deletions.
Original file line number Diff line number Diff line change
@@ -1,36 +1,37 @@
from Standard.Base import all
import Standard.Base.Errors.Common.Unsupported_Argument_Types
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument

import project.Data.Column.Column
import project.Data.Set_Mode.Set_Mode
import project.Data.Sort_Column.Sort_Column
import project.Data.Table.Table
import project.Data.Type.Value_Type.Bits
import project.Internal.Java_Problems
import project.Internal.Multi_Value_Key.Ordered_Multi_Value_Key
import project.Internal.Multi_Value_Key.Unordered_Multi_Value_Key
import project.Internal.Problem_Builder.Problem_Builder
import project.Internal.Table_Helpers
from project.Errors import Duplicate_Output_Column_Names
from project.Internal.Java_Exports import make_long_builder

polyglot java import org.enso.base.arrays.MutableLongArrayWrapper
polyglot java import java.lang.ArithmeticException
polyglot java import org.enso.table.data.column.storage.numeric.LongRangeStorage
polyglot java import org.enso.table.data.column.storage.numeric.LongStorage
polyglot java import org.enso.table.operations.AddRowNumber

## PRIVATE
add_row_number : Table -> Text -> Integer -> Integer -> Text | Integer | Regex | Vector (Integer | Text | Regex) -> Vector (Text | Sort_Column) | Text | Sort_Column -> Problem_Behavior -> Table
add_row_number table name from step group_by order_by on_problems =
problem_builder = Problem_Builder.new error_on_missing_columns=True
grouping_columns = table.columns_helper.select_columns_helper group_by Case_Sensitivity.Default True problem_builder
Unordered_Multi_Value_Key.validate_grouping_columns grouping_columns problem_builder
ordering = Table_Helpers.resolve_order_by table.columns order_by problem_builder
problem_builder.attach_problems_before on_problems <|

handle_arithmetic_exception _ =
Error.throw (Illegal_Argument.Error "The row number has exceeded the 64-bit integer range. BigInteger numbering is currently not supported. Please use a smaller start/step.")

problem_builder.attach_problems_before on_problems <| Panic.catch ArithmeticException handler=handle_arithmetic_exception <| Panic.catch Unsupported_Argument_Types handler=handle_arithmetic_exception <|
new_column = case ordering.is_empty of
True ->
case grouping_columns.is_empty of
True -> make_range_column name from step table.row_count
False -> make_grouped_enumeration name grouping_columns from step
False -> make_grouped_ordered_enumeration name grouping_columns ordering from step
False -> make_grouped_enumeration name grouping_columns from step on_problems
False -> make_grouped_ordered_enumeration name grouping_columns ordering from step on_problems

renamed_table = rename_columns_if_needed table name on_problems Table.new
renamed_table.set new_column name set_mode=Set_Mode.Add
Expand Down Expand Up @@ -60,48 +61,26 @@ make_range_column name start step length =
Column.from_storage name storage

## PRIVATE
make_grouped_enumeration name grouping_columns start step =
n = grouping_columns.at 0 . length
Java_Problems.with_problem_aggregator Problem_Behavior.Report_Warning java_problem_aggregator->
column_builder = make_long_builder n bits=Bits.Bits_64 java_problem_aggregator
0.up_to n . fold Map.empty grouping_counters-> ix->
key = Unordered_Multi_Value_Key.from_row grouping_columns ix
enum_index = grouping_counters.get key 0
column_builder.appendLong (nth_index start step enum_index)
new_counters = grouping_counters.insert key (enum_index + 1)
new_counters
storage = column_builder.seal
Column.from_storage name storage
make_grouped_enumeration name grouping_columns start step on_problems =
grouping_java_columns = grouping_columns.map .java_column
Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
new_storage = AddRowNumber.create_grouped_numbering start step grouping_java_columns java_problem_aggregator
Column.from_storage name new_storage

## PRIVATE
If the `grouping_columns` are empty, all rows are considered to be in the same group.
make_grouped_ordered_enumeration name grouping_columns ordering from step =
ordering_columns = ordering.map .column
ordering_flip_directions = ordering.map descriptor-> case descriptor.associated_selector.direction of
Sort_Direction.Ascending -> False
Sort_Direction.Descending -> True
n = ordering_columns.at 0 . length
grouped_rows = (0.up_to n).fold Map.empty grouped_rows-> ix->
key = Unordered_Multi_Value_Key.from_row grouping_columns ix
new_grouped_rows = case grouped_rows.get key of
Nothing ->
builder = Vector.new_builder
builder.append ix
grouped_rows.insert key builder
existing_builder ->
existing_builder.append ix
grouped_rows
new_grouped_rows
make_grouped_ordered_enumeration name grouping_columns ordering from step on_problems =
ordering_columns = ordering.map c->
c.column.java_column
directions = ordering.map c->
c.associated_selector.direction.to_sign

long_array = MutableLongArrayWrapper.new n
grouping_java_columns = grouping_columns.map .java_column

grouped_rows.each row_group_builder->
row_group = row_group_builder.to_vector
sorted_group = row_group.sort on=ix->
Ordered_Multi_Value_Key.from_row ordering_columns ordering_flip_directions ix
sorted_group.each_with_index enum_ix-> row_ix->
enum_value = nth_index from step enum_ix
long_array.set row_ix enum_value

storage = LongStorage.fromArray long_array.getUnderlyingArray
Column.from_storage name storage
new_storage = case grouping_java_columns.is_empty of
True ->
AddRowNumber.create_ordered_numbering from step ordering_columns directions
False ->
Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
AddRowNumber.create_grouped_ordered_numbering from step ordering_columns directions grouping_java_columns java_problem_aggregator
Column.from_storage name new_storage
11 changes: 7 additions & 4 deletions distribution/lib/Standard/Test/0.0.0-dev/src/Bench.enso
Original file line number Diff line number Diff line change
Expand Up @@ -230,11 +230,14 @@ type Bench
duration_ns = conf.iterations * conf.seconds * 1000000000
phase_start = System.nano_time
stop_ns = phase_start + duration_ns
go = durations -> cur_ns ->
if cur_ns > stop_ns then durations else
durations_builder = Vector.new_builder
go cur_ns =
if cur_ns > stop_ns then Nothing else
dur = Bench.single_call act
@Tail_Call go (durations + [dur]) (cur_ns + dur)
durations = go [] phase_start
durations_builder.append dur
@Tail_Call go (cur_ns + dur)
go phase_start
durations = durations_builder.to_vector
sum = durations.reduce (_ + _)
run_iters = durations.length
avg = (sum / run_iters) / 1000000
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ public int compare(Object thisValue, Object thatValue) {

// Booleans
if (thisValue instanceof Boolean thisBool && thatValue instanceof Boolean thatBool) {
if (thisBool == thatBool) {
if (thisBool.booleanValue() == thatBool.booleanValue()) {
return 0;
}
return thisBool ? 1 : -1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,14 @@ public LongRangeStorage(long start, long step, int size) {
super(size);
this.start = start;
this.step = step;
verifyBounds();
}

@SuppressWarnings("unused")
private void verifyBounds() throws ArithmeticException {
long lastIdx = size - 1;
// Computing this value will throw an exception if it overflows.
long lastValue = Math.addExact(start, Math.multiplyExact(step, lastIdx));
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.table.Column;
import org.enso.table.data.table.Table;
import org.enso.table.data.table.problems.FloatingPointGrouping;
import org.enso.table.problems.ColumnAggregatedProblemAggregator;
import org.enso.table.problems.ProblemAggregator;
import org.enso.table.util.ConstantList;
Expand Down Expand Up @@ -85,15 +84,8 @@ private MultiValueIndex(
Context context = Context.getCurrent();
for (int i = 0; i < size; i++) {
KeyType key = keyFactory.apply(i);

if (key.hasFloatValues()) {
final int row = i;
key.floatColumnPositions()
.forEach(
columnIx ->
groupingProblemAggregator.reportColumnAggregatedProblem(
new FloatingPointGrouping(keyColumns[columnIx].getName(), row)));
}
key.checkAndReportFloatingEquality(
groupingProblemAggregator, columnIx -> keyColumns[columnIx].getName());

List<Integer> ids = this.locs.computeIfAbsent(key, x -> new ArrayList<>());
ids.add(i);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import java.util.List;
import org.enso.base.polyglot.NumericConverter;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.table.problems.FloatingPointGrouping;
import org.enso.table.problems.ColumnAggregatedProblemAggregator;

/** The base class for keys used for sorting/grouping rows by a set of columns. */
public abstract class MultiValueKeyBase {
Expand Down Expand Up @@ -59,7 +61,8 @@ public boolean hasAnyNulls() {

/* Checks if any cell contains float values.
It takes value folding into account, i.e. a float value that can be coerced to an integer without loss of precision is not considered floating.
It takes value folding into account, i.e. a float value that can be coerced to an integer without loss of
precision is not considered floating.
*/
public boolean hasFloatValues() {
if (!floatsComputed) {
Expand All @@ -70,28 +73,30 @@ public boolean hasFloatValues() {
return hasFloatValues;
}

private boolean findFloats() {
for (int i = 0; i < storages.length; i++) {
Object value = this.get(i);
if (NumericConverter.isFloatLike(value)) {
return true;
public interface ColumnNameMapping {
String getColumnName(int columnIx);
}

public void checkAndReportFloatingEquality(
ColumnAggregatedProblemAggregator problemAggregator, ColumnNameMapping columnNameMapping) {
if (hasFloatValues()) {
for (int columnIx = 0; columnIx < storages.length; columnIx++) {
Object value = this.get(columnIx);
if (NumericConverter.isFloatLike(value)) {
problemAggregator.reportColumnAggregatedProblem(
new FloatingPointGrouping(columnNameMapping.getColumnName(columnIx), rowIndex));
}
}
}
return false;
}

/**
* Finds which columns contain a float value at this index position and returns their positions in
* this index.
*/
public List<Integer> floatColumnPositions() {
List<Integer> result = new ArrayList<>();
private boolean findFloats() {
for (int i = 0; i < storages.length; i++) {
Object value = this.get(i);
if (NumericConverter.isFloatLike(value)) {
result.add(i);
return true;
}
}
return result;
return false;
}
}
Loading

0 comments on commit e9fa127

Please sign in to comment.