Support for BigInteger values in Table (#7715)

- Fixes #7354 - And also closes #7712 - Refactors how we handle numeric ops - ensuring that the 'kernels' are placed all in one place and selected based on storage types.
enso-org · Sep 12, 2023 · 8b6e70b · 8b6e70b
1 parent a7fc333
commit 8b6e70b
Show file tree

Hide file tree

Showing 78 changed files with 2,584 additions and 985 deletions.
diff --git a/distribution/lib/Standard/AWS/0.0.0-dev/src/Database/Redshift/Internal/Redshift_Dialect.enso b/distribution/lib/Standard/AWS/0.0.0-dev/src/Database/Redshift/Internal/Redshift_Dialect.enso
@@ -1,9 +1,9 @@
 from Standard.Base import all
 
-from Standard.Table import Aggregate_Column
-from Standard.Table import Value_Type
+from Standard.Table import Aggregate_Column, Value_Type
 
 import Standard.Database.Connection.Connection.Connection
+import Standard.Database.Data.Column.Column
 import Standard.Database.Data.Dialect
 import Standard.Database.Data.SQL.Builder
 import Standard.Database.Data.SQL_Statement.SQL_Statement
@@ -168,3 +168,10 @@ type Redshift_Dialect
     fetch_primary_key : Connection -> Text -> Vector Text ! Nothing
     fetch_primary_key self connection table_name =
         Dialect.default_fetch_primary_key connection table_name
+
+    ## PRIVATE
+    value_type_for_upload_of_existing_column : Column -> Value_Type
+    value_type_for_upload_of_existing_column self column =
+        ## TODO special behaviour for big integer columns should be added here, once we start testing this dialect again
+           See: https://docs.aws.amazon.com/redshift/latest/dg/r_Numeric_types201.html#r_Numeric_types201-decimal-or-numeric-type
+        column.value_type
diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso
@@ -5,6 +5,7 @@ import Standard.Table.Internal.Problem_Builder.Problem_Builder
 from Standard.Table import Aggregate_Column, Join_Kind, Value_Type
 
 import project.Connection.Connection.Connection
+import project.Data.Column.Column
 import project.Data.SQL.Builder
 import project.Data.SQL_Statement.SQL_Statement
 import project.Data.SQL_Type.SQL_Type
@@ -231,6 +232,17 @@ type Dialect
         _ = [replace_params, action]
         Unimplemented.throw "This is an interface only."
 
+    ## PRIVATE
+       Determines the value type to use when uploading the given column to the
+       Database.
+
+       This will usually just be `column.value_type`, but it allows the database
+       to do custom fallback handling for datatypes that are not supported.
+    value_type_for_upload_of_existing_column : Column -> Value_Type
+    value_type_for_upload_of_existing_column self column =
+        _ = column
+        Unimplemented.throw "This is an interface only."
+
 ## PRIVATE
 
    The dialect of SQLite databases.

diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso
@@ -2027,7 +2027,15 @@ type Table
             False ->
                 sql = preprocessed.to_sql
                 column_type_suggestions = preprocessed.internal_columns.map .sql_type_reference
-                self.connection.read_statement sql column_type_suggestions
+                materialized_table = self.connection.read_statement sql column_type_suggestions
+
+                expected_types = self.columns.map .value_type
+                actual_types = materialized_table.columns.map .value_type
+                expected_types.zip actual_types . fold materialized_table acc-> types_pair->
+                    expected_type = types_pair.first
+                    actual_type = types_pair.second
+                    if expected_type == actual_type then acc else
+                        Warning.attach (Inexact_Type_Coercion.Warning expected_type actual_type) acc
 
     ## PRIVATE
        Creates a query corresponding to this table.

diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Column_Fetcher.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Column_Fetcher.enso
@@ -1,4 +1,5 @@
 from Standard.Base import all
+import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
 
 import Standard.Table.Data.Column.Column as Materialized_Column
 import Standard.Table.Data.Type.Value_Type.Bits
@@ -83,6 +84,18 @@ long_fetcher bits =
         Builder.Value append (seal_java_builder java_builder)
     Column_Fetcher.Value fetch_value make_builder
 
+## PRIVATE
+big_integer_fetcher : Column_Fetcher
+big_integer_fetcher =
+    fetch_value rs i =
+        big_decimal = rs.getBigDecimal i
+        if rs.wasNull then Nothing else
+            big_decimal.toBigIntegerExact
+    make_builder initial_size =
+        java_builder = Java_Exports.make_biginteger_builder initial_size
+        make_builder_from_java_object_builder java_builder
+    Column_Fetcher.Value fetch_value make_builder
+
 ## PRIVATE
 text_fetcher : Value_Type -> Column_Fetcher
 text_fetcher value_type =
@@ -145,6 +158,14 @@ default_fetcher_for_value_type value_type =
         Value_Type.Time -> time_fetcher
         # We currently don't distinguish timestamps without a timezone on the Enso value side.
         Value_Type.Date_Time _ -> date_time_fetcher
+        # If we can determine that scale = 0
+        Value_Type.Decimal _ scale ->
+            is_guaranteed_integer = scale.is_nothing.not && scale <= 0
+            case is_guaranteed_integer of
+                True  -> big_integer_fetcher
+                # If we cannot guarantee that the column is integer, we will fall back to Float values, since there is no BigDecimal implementation yet.
+                # TODO I think we should add a warning somewhere
+                False -> double_fetcher
         _ -> fallback_fetcher
 
 ## PRIVATE

diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso
@@ -4,6 +4,7 @@ import Standard.Base.Errors.Illegal_State.Illegal_State
 import Standard.Base.Errors.Unimplemented.Unimplemented
 
 import Standard.Table.Data.Aggregate_Column.Aggregate_Column
+import Standard.Table.Data.Column.Column as Materialized_Column
 import Standard.Table.Internal.Problem_Builder.Problem_Builder
 import Standard.Table.Internal.Vector_Builder.Vector_Builder
 from Standard.Table import Value_Type
@@ -252,6 +253,29 @@ type Postgres_Dialect
     if_replace_params_supports self replace_params ~action =
         if supported_replace_params.contains replace_params then action else replace_params.throw_unsupported
 
+
+    ## PRIVATE
+    value_type_for_upload_of_existing_column : Column -> Value_Type
+    value_type_for_upload_of_existing_column self column = case column of
+        # Return the type as-is for database columns.
+        _ : Column -> column.value_type
+        _ : Materialized_Column ->
+            base_type = column.value_type
+            case base_type of
+                Value_Type.Decimal precision scale ->
+                    # We cannot have a specified scale and no precision, so special handling is needed for this:
+                    case precision.is_nothing && scale.is_nothing.not of
+                        True ->
+                            needed_precision = column.java_column.getStorage.getMaxPrecisionStored
+                            new_type = case needed_precision <= 1000 of
+                                # If the precision is small enough that our number will fit, we create a column with maximum supported precision.
+                                True -> Value_Type.Decimal 1000 scale
+                                # If the needed precision is too big, we cannot set it, so we set the precision to unlimited. This loses scale.
+                                False -> Value_Type.Decimal Nothing Nothing
+                            Warning.attach (Inexact_Type_Coercion.Warning base_type new_type) new_type
+                        False -> base_type
+                _ -> base_type
+
 ## PRIVATE
 make_internal_generator_dialect =
     cases = [["LOWER", Base_Generator.make_function "LOWER"], ["UPPER", Base_Generator.make_function "UPPER"]]

diff --git a/...ribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Type_Mapping.enso b/...ribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Type_Mapping.enso
@@ -34,8 +34,11 @@ type Postgres_Type_Mapping
                 SQL_Type.Value Types.REAL "float4"
             Value_Type.Float Bits.Bits_64 ->
                 SQL_Type.Value Types.DOUBLE "float8"
-            Value_Type.Decimal precision scale ->
-                SQL_Type.Value Types.DECIMAL "decimal" precision scale
+            Value_Type.Decimal precision scale -> case precision of
+                # If precision is not set, scale is also lost because SQL is unable to express a scale without a precision.
+                Nothing -> SQL_Type.Value Types.DECIMAL "decimal" Nothing Nothing
+                # Scale can be set or not, if precision is given, so no check needed.
+                _       -> SQL_Type.Value Types.DECIMAL "decimal" precision scale
             Value_Type.Char size variable ->
                 case variable of
                     True  ->

diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQL_Type_Mapping.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQL_Type_Mapping.enso
@@ -1,4 +1,5 @@
 from Standard.Base import all
+import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
 import Standard.Base.Errors.Unimplemented.Unimplemented
 
 import Standard.Table.Data.Type.Value_Type.Value_Type
@@ -90,7 +91,11 @@ type SQL_Type_Mapping
 
 ## PRIVATE
 default_sql_type_to_text sql_type =
-    suffix = if sql_type.precision.is_nothing then "" else
-        if sql_type.scale.is_nothing then "(" + sql_type.precision.to_text + ")" else
-            " (" + sql_type.precision.to_text + "," + sql_type.scale.to_text + ")"
+    suffix = case sql_type.precision of
+        Nothing ->
+            if sql_type.scale.is_nothing.not then Error.throw (Illegal_Argument.Error "It is not possible to specify a scale but no precision in SQL, but got "+sql_type.to_text) else
+                ""
+        _ : Integer ->
+            if sql_type.scale.is_nothing then "(" + sql_type.precision.to_text + ")" else
+                " (" + sql_type.precision.to_text + "," + sql_type.scale.to_text + ")"
     sql_type.name.trim + suffix
diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso
@@ -3,9 +3,8 @@ import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
 import Standard.Base.Errors.Illegal_State.Illegal_State
 import Standard.Base.Runtime.Ref.Ref
 
-import Standard.Table.Data.Aggregate_Column.Aggregate_Column
 import Standard.Table.Internal.Problem_Builder.Problem_Builder
-from Standard.Table import Value_Type
+from Standard.Table import Value_Type, Aggregate_Column
 from Standard.Table.Data.Aggregate_Column.Aggregate_Column import all
 
 import project.Connection.Connection.Connection
@@ -269,6 +268,10 @@ type SQLite_Dialect
     if_replace_params_supports self replace_params ~action =
         if supported_replace_params.contains replace_params then action else replace_params.throw_unsupported
 
+    ## PRIVATE
+    value_type_for_upload_of_existing_column : Column -> Value_Type
+    value_type_for_upload_of_existing_column self column = column.value_type
+
 ## PRIVATE
 make_internal_generator_dialect =
     text = [starts_with, contains, ends_with, make_case_sensitive, ["REPLACE", replace]]+concat_ops+trim_ops

diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Statement_Setter.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Statement_Setter.enso
@@ -1,9 +1,11 @@
 from Standard.Base import all
 import Standard.Base.Errors.Illegal_State.Illegal_State
 
+polyglot java import java.math.BigDecimal as Java_Big_Decimal
 polyglot java import java.sql.PreparedStatement
 polyglot java import java.sql.Types as Java_Types
 
+polyglot java import org.enso.base.polyglot.NumericConverter
 polyglot java import org.enso.database.JDBCUtils
 
 type Statement_Setter
@@ -31,7 +33,11 @@ type Statement_Setter
 fill_hole_default stmt i value = case value of
     Nothing       -> stmt.setNull i Java_Types.NULL
     _ : Boolean   -> stmt.setBoolean i value
-    _ : Integer   -> stmt.setLong i value
+    _ : Integer   -> case NumericConverter.isBigInteger value of
+        True  ->
+            big_decimal = NumericConverter.bigIntegerAsBigDecimal value
+            stmt.setBigDecimal i big_decimal
+        False -> stmt.setLong i value
     _ : Decimal   -> stmt.setDouble i value
     _ : Text      -> stmt.setString i value
     _ : Date_Time -> JDBCUtils.setZonedDateTime stmt i value

diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Upload_Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Upload_Table.enso
@@ -59,7 +59,7 @@ create_table_implementation connection table_name structure primary_key temporar
    Does not check if the table already exists - so if it does, it may fail with
    `SQL_Error`. The caller should perform the check for better error handling.
 internal_create_table_structure connection table_name structure primary_key temporary on_problems =
-    aligned_structure = align_structure structure
+    aligned_structure = align_structure connection structure
     resolved_primary_key = resolve_primary_key aligned_structure primary_key
     validate_structure connection.base_connection.column_naming_helper aligned_structure <|
         create_table_statement = prepare_create_table_statement connection table_name aligned_structure resolved_primary_key temporary on_problems
@@ -217,21 +217,26 @@ raise_duplicated_primary_key_error source_table primary_key original_panic =
 
 ## PRIVATE
 align_structure : Database_Table | In_Memory_Table | Vector Column_Description -> Vector Column_Description
-align_structure table_or_columns = case table_or_columns of
-    vector : Vector -> if vector.is_empty then Error.throw (Illegal_Argument.Error "A table with no columns cannot be created. The `structure` must consist of at list one column description.") else
+align_structure connection table_or_columns = case table_or_columns of
+    vector : Vector         -> align_vector_structure vector
+    table : Database_Table  -> structure_from_existing_table connection table
+    table : In_Memory_Table -> structure_from_existing_table connection table
+
+## PRIVATE
+align_vector_structure vector =
+    if vector.is_empty then Error.throw (Illegal_Argument.Error "A table with no columns cannot be created. The `structure` must consist of at list one column description.") else
         vector.map def-> case def of
             _ : Column_Description -> def
             _ : Function ->
                 Error.throw (Illegal_Argument.Error "The structure should be a vector of Column_Description. Maybe some arguments of Column_Description are missing?")
             _ ->
                 Error.throw (Illegal_Argument.Error "The structure must be an existing Table or vector of Column_Description.")
-    table : Database_Table  -> structure_from_existing_table table
-    table : In_Memory_Table -> structure_from_existing_table table
 
 ## PRIVATE
-structure_from_existing_table table =
+structure_from_existing_table connection table =
     table.columns.map column->
-        Column_Description.Value column.name column.value_type
+        value_type = connection.dialect.value_type_for_upload_of_existing_column column
+        Column_Description.Value column.name value_type
 
 ## PRIVATE
    Verifies that the provided structure is valid, and runs the provided action
@@ -255,9 +260,10 @@ validate_structure column_naming_helper structure ~action =
    Returns the name of the first column in the provided table structure.
    It also verifies that the structure is correct.
    Used to provide the default value for `primary_key` in `create_table`.
-first_column_name_in_structure structure =
-    aligned = align_structure structure
-    aligned.first.name
+first_column_name_in_structure structure = case structure of
+    vector : Vector -> align_vector_structure vector . first . name
+    table : Database_Table  -> table.column_names.first
+    table : In_Memory_Table -> table.column_names.first
 
 ## PRIVATE
    Creates a statement that will create a table with structure determined by the

diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso
@@ -60,7 +60,7 @@ type Column
              example_from_vector =
                  Column.from_vector "My Column" [1, 2, 3, 4, 5]
     from_vector : Text -> Vector -> Value_Type | Auto -> Column ! Invalid_Value_Type
-    from_vector name items value_type=Auto =
+    from_vector (name : Text) (items : Vector) (value_type : Auto | Value_Type = Auto) =
         ## If the type does not accept date-time-like values, we can skip the
            additional logic for polyglot conversions that would normally be used,
            which is quite costly - so if we can guarantee it is unnecessary,
@@ -1118,12 +1118,12 @@ type Column
         common_type.if_not_error <|
             storage = self.java_column.getStorage
             storage_type = Storage.from_value_type_strict common_type
-            new_st = case default of
+            new_st = Java_Problems.unpack_value_with_aggregated_problems Problem_Behavior.Report_Warning <| case default of
                 Column.Value java_col ->
                     other_storage = java_col.getStorage
                     storage.fillMissingFrom other_storage storage_type
                 _ ->
-                    storage.fillMissing default
+                    storage.fillMissing default storage_type
             col = Java_Column.new self.name new_st
             Column.Value col
 
@@ -1768,7 +1768,7 @@ type Column
     cast self value_type on_problems=Problem_Behavior.Report_Warning =
         Cast_Helpers.check_cast_compatibility self.value_type value_type <|
             target_storage_type = Storage.from_value_type value_type on_problems
-            cast_problem_builder = Cast_Helpers.new_java_problem_builder self.name value_type
+            cast_problem_builder = Cast_Helpers.new_java_problem_builder self.name target_storage_type
             new_storage = self.java_column.getStorage.cast target_storage_type cast_problem_builder.to_java
             problems = cast_problem_builder.get_problems
             on_problems.attach_problems_before problems <|
@@ -1937,7 +1937,7 @@ type Column
 
              example_at = Examples.integer_column.at 0
     at : Integer -> (Any | Nothing) ! Index_Out_Of_Bounds
-    at self index =
+    at self (index : Integer) =
         valid_index = (index >= 0) && (index < self.length)
         if valid_index.not then Error.throw (Index_Out_Of_Bounds.Error index self.length) else
             storage = self.java_column.getStorage
@@ -2204,9 +2204,10 @@ type Column
 run_vectorized_many_op : Column -> Text -> (Any -> Any -> Any) -> Vector -> Text|Nothing -> Boolean -> Column
 run_vectorized_many_op column name fallback_fn operands new_name=Nothing skip_nulls=False =
     effective_operands = Vector.unify_vector_or_element operands
+    all_operands = [column]+effective_operands
     effective_new_name = new_name.if_nothing <|
-        naming_helper.function_name name [column]+effective_operands
-    common_type = Value_Type_Helpers.find_common_type_for_arguments effective_operands
+        naming_helper.function_name name all_operands
+    common_type = Value_Type_Helpers.find_common_type_for_arguments all_operands
     common_type.if_not_error <|
         problem_builder = MapOperationProblemBuilder.new effective_new_name
         storage_type = resolve_storage_type common_type

diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Enso_Types.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Enso_Types.enso
@@ -5,6 +5,7 @@ import project.Data.Type.Value_Type.Bits
 import project.Data.Type.Value_Type.Value_Type
 
 polyglot java import org.enso.table.data.column.storage.type.IntegerType
+polyglot java import org.enso.base.polyglot.NumericConverter
 
 ## PRIVATE
    Finds the most specific `Value_Type` that can be used to hold the given
@@ -22,13 +23,16 @@ most_specific_value_type value use_smallest=False =
         _ : Date        -> Value_Type.Date
         _ : Time_Of_Day -> Value_Type.Time
         _ : Date_Time   -> Value_Type.Date_Time
-        i : Integer     -> case use_smallest of
-            False -> Value_Type.Integer Bits.Bits_64
-            True ->
-                storage_type = IntegerType.smallestFitting i
-                value_type = Storage.to_value_type storage_type
-                # We do a small rewrite here - for integers we always return the Integer type, even if the value is small enough to fit in a Byte.
-                if value_type == Value_Type.Byte then Value_Type.Integer Bits.Bits_16 else value_type
+        i : Integer     ->
+            case NumericConverter.isBigInteger i of
+                False -> case use_smallest of
+                    False -> Value_Type.Integer Bits.Bits_64
+                    True ->
+                        storage_type = IntegerType.smallestFitting i
+                        value_type = Storage.to_value_type storage_type
+                        # We do a small rewrite here - for integers we always return the Integer type, even if the value is small enough to fit in a Byte.
+                        if value_type == Value_Type.Byte then Value_Type.Integer Bits.Bits_16 else value_type
+                True -> Value_Type.Decimal precision=Nothing scale=0
         text : Text     -> case use_smallest of
             False -> Value_Type.Char size=Nothing variable_length=True
             True  -> Value_Type.Char size=text.length variable_length=False