Implement Decimal support for Postgres backend (#10216)

* treat scale nothing as unspecifed * cast to decimal * float int biginteger * conversion failure ints * loss of decimal precision * precision loss for mixed column to float * mixed columns * loss of precision on inexact float conversion * cleanup, reuse * changelog * review * no fits bd * no warning on 0.1 conversion * fmt * big_decimal_fetcher * default fetcher and statement setting * round-trip d * fix warning * expr +10 * double builder retype to bigdecimal * Use BD fetcher for underspecified postgres numeric column, not inferred builder, and do not use biginteger builder for integral bigdecimal values * fix tests * fix test * cast_op_type * no-ops for other dialects * Types * sum + avg * avg + sum test * fix test * update agg type inference test * wip * is_int8, stddev * more doc, overflow check * fmt * finish round-trip test * wip
enso-org · Jul 2, 2024 · 48fb999 · 48fb999
1 parent 08ec3ac
commit 48fb999
Show file tree

Hide file tree

Showing 16 changed files with 225 additions and 49 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -57,6 +57,7 @@
 - [Implemented fallback to Windows-1252 encoding for `Encoding.Default`.][10190]
 - [Added Table.duplicates component][10323]
 - [Renamed `Table.order_by` to `Table.sort`][10372]
+- [Implemented `Decimal` support for Postgres backend.][10216]
 
 [debug-shortcuts]:
 
@@ -67,6 +68,7 @@
 [10190]: https://github.com/enso-org/enso/pull/10190
 [10323]: https://github.com/enso-org/enso/pull/10323
 [10372]: https://github.com/enso-org/enso/pull/10372
+[10216]: https://github.com/enso-org/enso/pull/10216
 
 <br/>![Release Notes](/docs/assets/tags/release_notes.svg)
 

diff --git a/distribution/lib/Standard/AWS/0.0.0-dev/src/Database/Redshift/Internal/Redshift_Dialect.enso b/distribution/lib/Standard/AWS/0.0.0-dev/src/Database/Redshift/Internal/Redshift_Dialect.enso
@@ -130,6 +130,20 @@ type Redshift_Dialect
         _ = [approximate_result_type, infer_result_type_from_database_callback]
         column
 
+    ## PRIVATE
+       Add an extra cast to adjust the output type of certain operations with
+       certain arguments.
+
+       It is used when the normal type inference provided by the database engine
+       needs to be adjusted.
+
+       In most cases this method will just return the expression unchanged, it
+       is used only to override the type in cases where the default one that the
+       database uses is not what we want.
+    cast_op_type self (op_kind:Text) (args:(Vector Internal_Column)) (expression:SQL_Expression) =
+        _ = [op_kind, args]
+        expression
+
     ## PRIVATE
     prepare_fetch_types_query : SQL_Expression -> Context -> SQL_Statement
     prepare_fetch_types_query self expression context =

diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Aggregate_Helper.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Aggregate_Helper.enso
@@ -29,7 +29,7 @@ make_aggregate_column : DB_Table -> Aggregate_Column -> Text -> Dialect -> (Any
 make_aggregate_column table aggregate as dialect infer_return_type problem_builder =
     is_non_empty_selector v = v.is_nothing.not && v.not_empty
     simple_aggregate op_kind columns =
-        expression = SQL_Expression.Operation op_kind (columns.map c->c.expression)
+        expression = dialect.cast_op_type op_kind columns (SQL_Expression.Operation op_kind (columns.map c->c.expression))
         sql_type_ref = infer_return_type op_kind columns expression
         Internal_Column.Value as sql_type_ref expression
 

diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Column_Fetcher.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Column_Fetcher.enso
@@ -97,6 +97,19 @@ big_integer_fetcher =
         make_builder_from_java_object_builder java_builder
     Column_Fetcher.Value fetch_value make_builder
 
+## PRIVATE
+big_decimal_fetcher : Column_Fetcher
+big_decimal_fetcher =
+    fetch_value rs i =
+        big_decimal = rs.getBigDecimal i
+        if rs.wasNull then Nothing else
+            big_decimal
+    make_builder initial_size java_problem_aggregator =
+        _ = java_problem_aggregator
+        java_builder = Java_Exports.make_bigdecimal_builder initial_size
+        make_builder_from_java_object_builder java_builder
+    Column_Fetcher.Value fetch_value make_builder
+
 ## PRIVATE
 text_fetcher : Value_Type -> Column_Fetcher
 text_fetcher value_type =
@@ -178,14 +191,14 @@ default_fetcher_for_value_type value_type =
         # We currently don't distinguish timestamps without a timezone on the Enso value side.
         Value_Type.Date_Time has_timezone ->
             if has_timezone then date_time_fetcher else local_date_time_fetcher
-        # If we can determine that scale = 0
+        ## If we can determine that scale <= 0, we use BigIntegerBuilder.
+           Otherwise, we use BigDecimalBuilder, since it's possible some values
+           will be BigDecimal.
         Value_Type.Decimal _ scale ->
             is_guaranteed_integer = scale.is_nothing.not && scale <= 0
             case is_guaranteed_integer of
                 True  -> big_integer_fetcher
-                # If we cannot guarantee that the column is integer, we will fall back to Float values, since there is no BigDecimal implementation yet.
-                # In another place this will trigger a Inexact_Type_Coercion warning.
-                False -> double_fetcher
+                False -> big_decimal_fetcher
         _ -> fallback_fetcher
 
 ## PRIVATE

diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso
@@ -39,6 +39,8 @@ import project.SQL_Type.SQL_Type
 from project.Errors import SQL_Error, Unsupported_Database_Operation
 from project.Internal.IR.Operation_Metadata import Date_Period_Metadata
 
+polyglot java import java.sql.Types
+
 ## PRIVATE
 
    The dialect of PostgreSQL databases.
@@ -195,6 +197,36 @@ type Postgres_Dialect
                         Illegal_State.Error "The type computed by our logic is Char, but the Database computed a non-text type ("+db_type.to_display_text+"). This should never happen and should be reported as a bug in the Database library."
             False -> column
 
+    ## PRIVATE
+       Add an extra cast to adjust the output type of certain operations with
+       certain arguments.
+
+       It is used when the normal type inference provided by the database engine
+       needs to be adjusted.
+
+       In most cases this method will just return the expression unchanged, it
+       is used only to override the type in cases where the default one that the
+       database uses is not what we want.
+    cast_op_type self (op_kind:Text) (args:(Vector Internal_Column)) (expression:SQL_Expression) =
+        is_int8 ic = ic.sql_type_reference.get.typeid == Types.BIGINT
+        is_int ic =
+            typeid = ic.sql_type_reference.get.typeid
+            typeid == Types.SMALLINT || typeid == Types.INTEGER || typeid == Types.BIGINT
+
+        cast_to = case op_kind of
+            "SUM" ->
+                if is_int8 (args.at 0) then "numeric(1000,0)" else Nothing
+            "AVG" ->
+                if is_int (args.at 0) then "float8" else Nothing
+            "STDDEV_POP" ->
+                if is_int (args.at 0) then "float8" else Nothing
+            "STDDEV_SAMP" ->
+                if is_int (args.at 0) then "float8" else Nothing
+            _ -> Nothing
+
+        if cast_to.is_nothing then expression else
+            SQL_Expression.Operation "CAST" [expression, SQL_Expression.Literal cast_to]
+
     ## PRIVATE
     prepare_fetch_types_query : SQL_Expression -> Context -> SQL_Statement
     prepare_fetch_types_query self expression context =

diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso
@@ -184,6 +184,20 @@ type SQLite_Dialect
                 new_sql_type_reference = SQL_Type_Reference.from_constant sql_type
                 Internal_Column.Value column.name new_sql_type_reference new_expression
 
+    ## PRIVATE
+       Add an extra cast to adjust the output type of certain operations with
+       certain arguments.
+
+       It is used when the normal type inference provided by the database engine
+       needs to be adjusted.
+
+       In most cases this method will just return the expression unchanged, it
+       is used only to override the type in cases where the default one that the
+       database uses is not what we want.
+    cast_op_type self (op_kind:Text) (args:(Vector Internal_Column)) (expression:SQL_Expression) =
+        _ = [op_kind, args]
+        expression
+
     ## PRIVATE
     prepare_fetch_types_query : SQL_Expression -> Context -> SQL_Statement
     prepare_fetch_types_query self expression context =

diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Statement_Setter.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Statement_Setter.enso
@@ -41,7 +41,8 @@ fill_hole_default stmt i type_hint value = case value of
             big_decimal = NumericConverter.bigIntegerAsBigDecimal value
             stmt.setBigDecimal i big_decimal
         False -> stmt.setLong i value
-    _ : Float   -> stmt.setDouble i value
+    _ : Decimal   -> stmt.setBigDecimal i value.big_decimal
+    _ : Float     -> stmt.setDouble i value
     _ : Text      -> stmt.setString i value
     _ : Date_Time ->
         has_timezone = case type_hint of

diff --git a/distribution/lib/Standard/Snowflake/0.0.0-dev/src/Internal/Snowflake_Dialect.enso b/distribution/lib/Standard/Snowflake/0.0.0-dev/src/Internal/Snowflake_Dialect.enso
@@ -184,6 +184,20 @@ type Snowflake_Dialect
         _ = [approximate_result_type, infer_result_type_from_database_callback]
         column
 
+    ## PRIVATE
+       Add an extra cast to adjust the output type of certain operations with
+       certain arguments.
+
+       It is used when the normal type inference provided by the database engine
+       needs to be adjusted.
+
+       In most cases this method will just return the expression unchanged, it
+       is used only to override the type in cases where the default one that the
+       database uses is not what we want.
+    cast_op_type self (op_kind:Text) (args:(Vector Internal_Column)) (expression:SQL_Expression) =
+        _ = [op_kind, args]
+        expression
+
     ## PRIVATE
     prepare_fetch_types_query : SQL_Expression -> Context -> SQL_Statement
     prepare_fetch_types_query self expression context =

diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Java_Exports.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Java_Exports.enso
@@ -5,6 +5,7 @@ import project.Internal.Storage
 import project.Value_Type.Bits
 import project.Value_Type.Value_Type
 
+polyglot java import org.enso.table.data.column.builder.BigDecimalBuilder
 polyglot java import org.enso.table.data.column.builder.BigIntegerBuilder
 polyglot java import org.enso.table.data.column.builder.BoolBuilder
 polyglot java import org.enso.table.data.column.builder.DateBuilder
@@ -36,6 +37,11 @@ make_biginteger_builder : Integer -> ProblemAggregator -> BigIntegerBuilder
 make_biginteger_builder initial_size java_problem_aggregator=(Missing_Argument.ensure_present "java_problem_aggregator") =
     BigIntegerBuilder.new initial_size java_problem_aggregator
 
+## PRIVATE
+make_bigdecimal_builder : Integer -> BigDecimalBuilder
+make_bigdecimal_builder initial_size =
+    BigDecimalBuilder.new initial_size
+
 ## PRIVATE
 make_string_builder : Integer -> Value_Type -> StringBuilder
 make_string_builder initial_size value_type=Value_Type.Char =

diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/BigDecimalBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/BigDecimalBuilder.java
@@ -36,11 +36,6 @@ public void appendRawNoGrow(BigDecimal value) {
     data[currentSize++] = value;
   }
 
-  @Override
-  public void append(Object o) {
-    appendNoGrow(o);
-  }
-
   @Override
   public boolean accepts(Object o) {
     return o instanceof BigDecimal;

diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/BigIntegerBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/BigIntegerBuilder.java
@@ -7,6 +7,7 @@
 import org.enso.table.data.column.storage.numeric.AbstractLongStorage;
 import org.enso.table.data.column.storage.numeric.BigIntegerStorage;
 import org.enso.table.data.column.storage.type.AnyObjectType;
+import org.enso.table.data.column.storage.type.BigDecimalType;
 import org.enso.table.data.column.storage.type.BigIntegerType;
 import org.enso.table.data.column.storage.type.FloatType;
 import org.enso.table.data.column.storage.type.IntegerType;
@@ -37,7 +38,9 @@ public void retypeToMixed(Object[] items) {
 
   @Override
   public boolean canRetypeTo(StorageType type) {
-    return type instanceof FloatType || type instanceof AnyObjectType;
+    return type instanceof FloatType
+        || type instanceof BigDecimalType
+        || type instanceof AnyObjectType;
   }
 
   @Override
@@ -53,6 +56,16 @@ public TypedBuilder retypeTo(StorageType type) {
         }
       }
       return res;
+    } else if (type instanceof BigDecimalType) {
+      BigDecimalBuilder res = new BigDecimalBuilder(currentSize);
+      for (int i = 0; i < currentSize; i++) {
+        if (data[i] == null) {
+          res.appendNulls(1);
+        } else {
+          res.appendNoGrow(data[i]);
+        }
+      }
+      return res;
     } else if (type instanceof AnyObjectType) {
       Object[] widenedData = Arrays.copyOf(data, data.length, Object[].class);
       ObjectBuilder res = new MixedBuilder(widenedData);

diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/DoubleBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/DoubleBuilder.java
@@ -11,6 +11,7 @@
 import org.enso.table.data.column.storage.numeric.AbstractLongStorage;
 import org.enso.table.data.column.storage.numeric.BigIntegerStorage;
 import org.enso.table.data.column.storage.numeric.DoubleStorage;
+import org.enso.table.data.column.storage.type.BigDecimalType;
 import org.enso.table.data.column.storage.type.BigIntegerType;
 import org.enso.table.data.column.storage.type.BooleanType;
 import org.enso.table.data.column.storage.type.FloatType;
@@ -39,12 +40,26 @@ public void retypeToMixed(Object[] items) {
 
   @Override
   public boolean canRetypeTo(StorageType type) {
-    return false;
+    return type instanceof BigDecimalType;
   }
 
   @Override
   public TypedBuilder retypeTo(StorageType type) {
-    throw new UnsupportedOperationException();
+    if (type instanceof BigDecimalType) {
+      BigDecimalBuilder res = new BigDecimalBuilder(currentSize);
+      for (int i = 0; i < currentSize; i++) {
+        if (isNothing.get(i)) {
+          res.appendNulls(1);
+        } else {
+          double d = Double.longBitsToDouble(data[i]);
+          BigDecimal bigDecimal = BigDecimal.valueOf(d);
+          res.appendNoGrow(bigDecimal);
+        }
+      }
+      return res;
+    } else {
+      throw new UnsupportedOperationException();
+    }
   }
 
   @Override

diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/InferredBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/InferredBuilder.java
@@ -9,6 +9,7 @@
 import org.enso.base.polyglot.NumericConverter;
 import org.enso.base.polyglot.Polyglot_Utils;
 import org.enso.table.data.column.storage.Storage;
+import org.enso.table.data.column.storage.type.BigDecimalType;
 import org.enso.table.data.column.storage.type.BigIntegerType;
 import org.enso.table.data.column.storage.type.BooleanType;
 import org.enso.table.data.column.storage.type.DateTimeType;
@@ -138,9 +139,7 @@ private record RetypeInfo(Class<?> clazz, StorageType type) {}
           new RetypeInfo(Long.class, IntegerType.INT_64),
           new RetypeInfo(Double.class, FloatType.FLOAT_64),
           new RetypeInfo(String.class, TextType.VARIABLE_LENGTH),
-          // TODO [RW] I think BigDecimals should not be coerced to floats, we should add Decimal
-          // support to in-memory tables at some point
-          // new RetypeInfo(BigDecimal.class, StorageType.FLOAT_64),
+          new RetypeInfo(BigDecimal.class, BigDecimalType.INSTANCE),
           new RetypeInfo(LocalDate.class, DateType.INSTANCE),
           new RetypeInfo(LocalTime.class, TimeOfDayType.INSTANCE),
           new RetypeInfo(ZonedDateTime.class, DateTimeType.INSTANCE),

diff --git a/test/Table_Tests/src/Common_Table_Operations/Column_Operations_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Column_Operations_Spec.enso
@@ -947,7 +947,7 @@ add_specs suite_builder setup =
                 result.to_vector.should_equal [0, 1, 3, 4, 0, -1, -3, -4]
                 result.name . should_equal "round([x])"
 
-            group_builder.specify "should allow round on a float column (to >0 decimal places)" <|
+            group_builder.specify "should allow round on a "+type.to_text+" column (to >0 decimal places)" <|
                 table = table_builder [["x", [0.51, 0.59, 3.51, 3.59, -0.51, -0.59, -3.51, -3.59]]]
                 result = table.at "x" . cast type . round 1
                 # TODO why it's becoming an Int?
@@ -956,7 +956,7 @@ add_specs suite_builder setup =
                 result.to_vector.should_equal [0.5, 0.6, 3.5, 3.6, -0.5, -0.6, -3.5, -3.6]
                 result.name . should_equal "round([x])"
 
-            group_builder.specify "should allow round on a float column (to <0 decimal places)" <|
+            group_builder.specify "should allow round on a "+type.to_text+" column (to <0 decimal places)" <|
                 table = table_builder [["x", [51.2, 59.3, 351.45, 359.11, -51.2, -59.3, -351.23, -359.69]]]
                 result = table.at "x" . cast type . round -1
                 result.to_vector.should_equal [50.0, 60.0, 350.0, 360.0, -50.0, -60.0, -350.0, -360.0]
@@ -981,8 +981,9 @@ add_specs suite_builder setup =
                 result.name . should_equal "floor([x])"
 
         test_floatlike Value_Type.Float
-        if setup.test_selection.supports_decimal_type then
-            test_floatlike Value_Type.Decimal
+        group_builder.specify "should allow round on a Decimal column" pending="https://github.com/enso-org/enso/issues/10344" <|
+            if setup.test_selection.supports_decimal_type then
+              test_floatlike Value_Type.Decimal
 
         group_builder.specify "should allow round on an int column" <|
             table = table_builder [["x", [1, 9, 31, 39, -1, -9, -31, -39]]]
@@ -1020,7 +1021,7 @@ add_specs suite_builder setup =
                 decimal_col.value_type.is_decimal . should_be_true
                 decimal_col2 = decimal_col + decimal_col*decimal_col
                 [(.floor), (.ceil), (.truncate), (x-> x.round 0), (x-> x.round 2)].each op->
-                    op decimal_col2 . to_vector . should_equal [i1 + i1*i1 . to_float]
+                    op decimal_col2 . to_vector . should_equal [i1 + i1*i1]
 
         group_builder.specify "should allow Nothing/NULL" <|
             table = table_builder [["x", [Nothing, 0.51, 0.59, 3.51, Nothing, 3.59, -0.51, -0.59, -3.51, -3.59]]]