Skip to content

Commit

Permalink
Support for BigInteger values in Table (#7715)
Browse files Browse the repository at this point in the history
- Fixes #7354
- And also closes #7712
- Refactors how we handle numeric ops - ensuring that the 'kernels' are placed all in one place and selected based on storage types.
  • Loading branch information
radeusgd authored Sep 12, 2023
1 parent a7fc333 commit 8b6e70b
Show file tree
Hide file tree
Showing 78 changed files with 2,584 additions and 985 deletions.
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from Standard.Base import all

from Standard.Table import Aggregate_Column
from Standard.Table import Value_Type
from Standard.Table import Aggregate_Column, Value_Type

import Standard.Database.Connection.Connection.Connection
import Standard.Database.Data.Column.Column
import Standard.Database.Data.Dialect
import Standard.Database.Data.SQL.Builder
import Standard.Database.Data.SQL_Statement.SQL_Statement
Expand Down Expand Up @@ -168,3 +168,10 @@ type Redshift_Dialect
fetch_primary_key : Connection -> Text -> Vector Text ! Nothing
fetch_primary_key self connection table_name =
Dialect.default_fetch_primary_key connection table_name

## PRIVATE
value_type_for_upload_of_existing_column : Column -> Value_Type
value_type_for_upload_of_existing_column self column =
## TODO special behaviour for big integer columns should be added here, once we start testing this dialect again
See: https://docs.aws.amazon.com/redshift/latest/dg/r_Numeric_types201.html#r_Numeric_types201-decimal-or-numeric-type
column.value_type
12 changes: 12 additions & 0 deletions distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import Standard.Table.Internal.Problem_Builder.Problem_Builder
from Standard.Table import Aggregate_Column, Join_Kind, Value_Type

import project.Connection.Connection.Connection
import project.Data.Column.Column
import project.Data.SQL.Builder
import project.Data.SQL_Statement.SQL_Statement
import project.Data.SQL_Type.SQL_Type
Expand Down Expand Up @@ -231,6 +232,17 @@ type Dialect
_ = [replace_params, action]
Unimplemented.throw "This is an interface only."

## PRIVATE
Determines the value type to use when uploading the given column to the
Database.

This will usually just be `column.value_type`, but it allows the database
to do custom fallback handling for datatypes that are not supported.
value_type_for_upload_of_existing_column : Column -> Value_Type
value_type_for_upload_of_existing_column self column =
_ = column
Unimplemented.throw "This is an interface only."

## PRIVATE

The dialect of SQLite databases.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2027,7 +2027,15 @@ type Table
False ->
sql = preprocessed.to_sql
column_type_suggestions = preprocessed.internal_columns.map .sql_type_reference
self.connection.read_statement sql column_type_suggestions
materialized_table = self.connection.read_statement sql column_type_suggestions

expected_types = self.columns.map .value_type
actual_types = materialized_table.columns.map .value_type
expected_types.zip actual_types . fold materialized_table acc-> types_pair->
expected_type = types_pair.first
actual_type = types_pair.second
if expected_type == actual_type then acc else
Warning.attach (Inexact_Type_Coercion.Warning expected_type actual_type) acc

## PRIVATE
Creates a query corresponding to this table.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from Standard.Base import all
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument

import Standard.Table.Data.Column.Column as Materialized_Column
import Standard.Table.Data.Type.Value_Type.Bits
Expand Down Expand Up @@ -83,6 +84,18 @@ long_fetcher bits =
Builder.Value append (seal_java_builder java_builder)
Column_Fetcher.Value fetch_value make_builder

## PRIVATE
big_integer_fetcher : Column_Fetcher
big_integer_fetcher =
fetch_value rs i =
big_decimal = rs.getBigDecimal i
if rs.wasNull then Nothing else
big_decimal.toBigIntegerExact
make_builder initial_size =
java_builder = Java_Exports.make_biginteger_builder initial_size
make_builder_from_java_object_builder java_builder
Column_Fetcher.Value fetch_value make_builder

## PRIVATE
text_fetcher : Value_Type -> Column_Fetcher
text_fetcher value_type =
Expand Down Expand Up @@ -145,6 +158,14 @@ default_fetcher_for_value_type value_type =
Value_Type.Time -> time_fetcher
# We currently don't distinguish timestamps without a timezone on the Enso value side.
Value_Type.Date_Time _ -> date_time_fetcher
# If we can determine that scale = 0
Value_Type.Decimal _ scale ->
is_guaranteed_integer = scale.is_nothing.not && scale <= 0
case is_guaranteed_integer of
True -> big_integer_fetcher
# If we cannot guarantee that the column is integer, we will fall back to Float values, since there is no BigDecimal implementation yet.
# TODO I think we should add a warning somewhere
False -> double_fetcher
_ -> fallback_fetcher

## PRIVATE
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import Standard.Base.Errors.Illegal_State.Illegal_State
import Standard.Base.Errors.Unimplemented.Unimplemented

import Standard.Table.Data.Aggregate_Column.Aggregate_Column
import Standard.Table.Data.Column.Column as Materialized_Column
import Standard.Table.Internal.Problem_Builder.Problem_Builder
import Standard.Table.Internal.Vector_Builder.Vector_Builder
from Standard.Table import Value_Type
Expand Down Expand Up @@ -252,6 +253,29 @@ type Postgres_Dialect
if_replace_params_supports self replace_params ~action =
if supported_replace_params.contains replace_params then action else replace_params.throw_unsupported


## PRIVATE
value_type_for_upload_of_existing_column : Column -> Value_Type
value_type_for_upload_of_existing_column self column = case column of
# Return the type as-is for database columns.
_ : Column -> column.value_type
_ : Materialized_Column ->
base_type = column.value_type
case base_type of
Value_Type.Decimal precision scale ->
# We cannot have a specified scale and no precision, so special handling is needed for this:
case precision.is_nothing && scale.is_nothing.not of
True ->
needed_precision = column.java_column.getStorage.getMaxPrecisionStored
new_type = case needed_precision <= 1000 of
# If the precision is small enough that our number will fit, we create a column with maximum supported precision.
True -> Value_Type.Decimal 1000 scale
# If the needed precision is too big, we cannot set it, so we set the precision to unlimited. This loses scale.
False -> Value_Type.Decimal Nothing Nothing
Warning.attach (Inexact_Type_Coercion.Warning base_type new_type) new_type
False -> base_type
_ -> base_type

## PRIVATE
make_internal_generator_dialect =
cases = [["LOWER", Base_Generator.make_function "LOWER"], ["UPPER", Base_Generator.make_function "UPPER"]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,11 @@ type Postgres_Type_Mapping
SQL_Type.Value Types.REAL "float4"
Value_Type.Float Bits.Bits_64 ->
SQL_Type.Value Types.DOUBLE "float8"
Value_Type.Decimal precision scale ->
SQL_Type.Value Types.DECIMAL "decimal" precision scale
Value_Type.Decimal precision scale -> case precision of
# If precision is not set, scale is also lost because SQL is unable to express a scale without a precision.
Nothing -> SQL_Type.Value Types.DECIMAL "decimal" Nothing Nothing
# Scale can be set or not, if precision is given, so no check needed.
_ -> SQL_Type.Value Types.DECIMAL "decimal" precision scale
Value_Type.Char size variable ->
case variable of
True ->
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from Standard.Base import all
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
import Standard.Base.Errors.Unimplemented.Unimplemented

import Standard.Table.Data.Type.Value_Type.Value_Type
Expand Down Expand Up @@ -90,7 +91,11 @@ type SQL_Type_Mapping

## PRIVATE
default_sql_type_to_text sql_type =
suffix = if sql_type.precision.is_nothing then "" else
if sql_type.scale.is_nothing then "(" + sql_type.precision.to_text + ")" else
" (" + sql_type.precision.to_text + "," + sql_type.scale.to_text + ")"
suffix = case sql_type.precision of
Nothing ->
if sql_type.scale.is_nothing.not then Error.throw (Illegal_Argument.Error "It is not possible to specify a scale but no precision in SQL, but got "+sql_type.to_text) else
""
_ : Integer ->
if sql_type.scale.is_nothing then "(" + sql_type.precision.to_text + ")" else
" (" + sql_type.precision.to_text + "," + sql_type.scale.to_text + ")"
sql_type.name.trim + suffix
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@ import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
import Standard.Base.Errors.Illegal_State.Illegal_State
import Standard.Base.Runtime.Ref.Ref

import Standard.Table.Data.Aggregate_Column.Aggregate_Column
import Standard.Table.Internal.Problem_Builder.Problem_Builder
from Standard.Table import Value_Type
from Standard.Table import Value_Type, Aggregate_Column
from Standard.Table.Data.Aggregate_Column.Aggregate_Column import all

import project.Connection.Connection.Connection
Expand Down Expand Up @@ -269,6 +268,10 @@ type SQLite_Dialect
if_replace_params_supports self replace_params ~action =
if supported_replace_params.contains replace_params then action else replace_params.throw_unsupported

## PRIVATE
value_type_for_upload_of_existing_column : Column -> Value_Type
value_type_for_upload_of_existing_column self column = column.value_type

## PRIVATE
make_internal_generator_dialect =
text = [starts_with, contains, ends_with, make_case_sensitive, ["REPLACE", replace]]+concat_ops+trim_ops
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from Standard.Base import all
import Standard.Base.Errors.Illegal_State.Illegal_State

polyglot java import java.math.BigDecimal as Java_Big_Decimal
polyglot java import java.sql.PreparedStatement
polyglot java import java.sql.Types as Java_Types

polyglot java import org.enso.base.polyglot.NumericConverter
polyglot java import org.enso.database.JDBCUtils

type Statement_Setter
Expand Down Expand Up @@ -31,7 +33,11 @@ type Statement_Setter
fill_hole_default stmt i value = case value of
Nothing -> stmt.setNull i Java_Types.NULL
_ : Boolean -> stmt.setBoolean i value
_ : Integer -> stmt.setLong i value
_ : Integer -> case NumericConverter.isBigInteger value of
True ->
big_decimal = NumericConverter.bigIntegerAsBigDecimal value
stmt.setBigDecimal i big_decimal
False -> stmt.setLong i value
_ : Decimal -> stmt.setDouble i value
_ : Text -> stmt.setString i value
_ : Date_Time -> JDBCUtils.setZonedDateTime stmt i value
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ create_table_implementation connection table_name structure primary_key temporar
Does not check if the table already exists - so if it does, it may fail with
`SQL_Error`. The caller should perform the check for better error handling.
internal_create_table_structure connection table_name structure primary_key temporary on_problems =
aligned_structure = align_structure structure
aligned_structure = align_structure connection structure
resolved_primary_key = resolve_primary_key aligned_structure primary_key
validate_structure connection.base_connection.column_naming_helper aligned_structure <|
create_table_statement = prepare_create_table_statement connection table_name aligned_structure resolved_primary_key temporary on_problems
Expand Down Expand Up @@ -217,21 +217,26 @@ raise_duplicated_primary_key_error source_table primary_key original_panic =

## PRIVATE
align_structure : Database_Table | In_Memory_Table | Vector Column_Description -> Vector Column_Description
align_structure table_or_columns = case table_or_columns of
vector : Vector -> if vector.is_empty then Error.throw (Illegal_Argument.Error "A table with no columns cannot be created. The `structure` must consist of at list one column description.") else
align_structure connection table_or_columns = case table_or_columns of
vector : Vector -> align_vector_structure vector
table : Database_Table -> structure_from_existing_table connection table
table : In_Memory_Table -> structure_from_existing_table connection table

## PRIVATE
align_vector_structure vector =
if vector.is_empty then Error.throw (Illegal_Argument.Error "A table with no columns cannot be created. The `structure` must consist of at list one column description.") else
vector.map def-> case def of
_ : Column_Description -> def
_ : Function ->
Error.throw (Illegal_Argument.Error "The structure should be a vector of Column_Description. Maybe some arguments of Column_Description are missing?")
_ ->
Error.throw (Illegal_Argument.Error "The structure must be an existing Table or vector of Column_Description.")
table : Database_Table -> structure_from_existing_table table
table : In_Memory_Table -> structure_from_existing_table table

## PRIVATE
structure_from_existing_table table =
structure_from_existing_table connection table =
table.columns.map column->
Column_Description.Value column.name column.value_type
value_type = connection.dialect.value_type_for_upload_of_existing_column column
Column_Description.Value column.name value_type

## PRIVATE
Verifies that the provided structure is valid, and runs the provided action
Expand All @@ -255,9 +260,10 @@ validate_structure column_naming_helper structure ~action =
Returns the name of the first column in the provided table structure.
It also verifies that the structure is correct.
Used to provide the default value for `primary_key` in `create_table`.
first_column_name_in_structure structure =
aligned = align_structure structure
aligned.first.name
first_column_name_in_structure structure = case structure of
vector : Vector -> align_vector_structure vector . first . name
table : Database_Table -> table.column_names.first
table : In_Memory_Table -> table.column_names.first

## PRIVATE
Creates a statement that will create a table with structure determined by the
Expand Down
15 changes: 8 additions & 7 deletions distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ type Column
example_from_vector =
Column.from_vector "My Column" [1, 2, 3, 4, 5]
from_vector : Text -> Vector -> Value_Type | Auto -> Column ! Invalid_Value_Type
from_vector name items value_type=Auto =
from_vector (name : Text) (items : Vector) (value_type : Auto | Value_Type = Auto) =
## If the type does not accept date-time-like values, we can skip the
additional logic for polyglot conversions that would normally be used,
which is quite costly - so if we can guarantee it is unnecessary,
Expand Down Expand Up @@ -1118,12 +1118,12 @@ type Column
common_type.if_not_error <|
storage = self.java_column.getStorage
storage_type = Storage.from_value_type_strict common_type
new_st = case default of
new_st = Java_Problems.unpack_value_with_aggregated_problems Problem_Behavior.Report_Warning <| case default of
Column.Value java_col ->
other_storage = java_col.getStorage
storage.fillMissingFrom other_storage storage_type
_ ->
storage.fillMissing default
storage.fillMissing default storage_type
col = Java_Column.new self.name new_st
Column.Value col

Expand Down Expand Up @@ -1768,7 +1768,7 @@ type Column
cast self value_type on_problems=Problem_Behavior.Report_Warning =
Cast_Helpers.check_cast_compatibility self.value_type value_type <|
target_storage_type = Storage.from_value_type value_type on_problems
cast_problem_builder = Cast_Helpers.new_java_problem_builder self.name value_type
cast_problem_builder = Cast_Helpers.new_java_problem_builder self.name target_storage_type
new_storage = self.java_column.getStorage.cast target_storage_type cast_problem_builder.to_java
problems = cast_problem_builder.get_problems
on_problems.attach_problems_before problems <|
Expand Down Expand Up @@ -1937,7 +1937,7 @@ type Column

example_at = Examples.integer_column.at 0
at : Integer -> (Any | Nothing) ! Index_Out_Of_Bounds
at self index =
at self (index : Integer) =
valid_index = (index >= 0) && (index < self.length)
if valid_index.not then Error.throw (Index_Out_Of_Bounds.Error index self.length) else
storage = self.java_column.getStorage
Expand Down Expand Up @@ -2204,9 +2204,10 @@ type Column
run_vectorized_many_op : Column -> Text -> (Any -> Any -> Any) -> Vector -> Text|Nothing -> Boolean -> Column
run_vectorized_many_op column name fallback_fn operands new_name=Nothing skip_nulls=False =
effective_operands = Vector.unify_vector_or_element operands
all_operands = [column]+effective_operands
effective_new_name = new_name.if_nothing <|
naming_helper.function_name name [column]+effective_operands
common_type = Value_Type_Helpers.find_common_type_for_arguments effective_operands
naming_helper.function_name name all_operands
common_type = Value_Type_Helpers.find_common_type_for_arguments all_operands
common_type.if_not_error <|
problem_builder = MapOperationProblemBuilder.new effective_new_name
storage_type = resolve_storage_type common_type
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import project.Data.Type.Value_Type.Bits
import project.Data.Type.Value_Type.Value_Type

polyglot java import org.enso.table.data.column.storage.type.IntegerType
polyglot java import org.enso.base.polyglot.NumericConverter

## PRIVATE
Finds the most specific `Value_Type` that can be used to hold the given
Expand All @@ -22,13 +23,16 @@ most_specific_value_type value use_smallest=False =
_ : Date -> Value_Type.Date
_ : Time_Of_Day -> Value_Type.Time
_ : Date_Time -> Value_Type.Date_Time
i : Integer -> case use_smallest of
False -> Value_Type.Integer Bits.Bits_64
True ->
storage_type = IntegerType.smallestFitting i
value_type = Storage.to_value_type storage_type
# We do a small rewrite here - for integers we always return the Integer type, even if the value is small enough to fit in a Byte.
if value_type == Value_Type.Byte then Value_Type.Integer Bits.Bits_16 else value_type
i : Integer ->
case NumericConverter.isBigInteger i of
False -> case use_smallest of
False -> Value_Type.Integer Bits.Bits_64
True ->
storage_type = IntegerType.smallestFitting i
value_type = Storage.to_value_type storage_type
# We do a small rewrite here - for integers we always return the Integer type, even if the value is small enough to fit in a Byte.
if value_type == Value_Type.Byte then Value_Type.Integer Bits.Bits_16 else value_type
True -> Value_Type.Decimal precision=Nothing scale=0
text : Text -> case use_smallest of
False -> Value_Type.Char size=Nothing variable_length=True
True -> Value_Type.Char size=text.length variable_length=False
Expand Down
Loading

0 comments on commit 8b6e70b

Please sign in to comment.