Skip to content

Commit

Permalink
Handle ints and longs in normalization (#14362)
Browse files Browse the repository at this point in the history
* generate airbyte_type:integer

* normalization accepts `airbyte_type: integer`

* handles ints+longs

* update avro for consistency

* delete long type for now, treat all ints as longs

* update avro type mappings

{type:number, airbyte_type:integer} -> long
{type:number, airbyte_type:big_integer} -> string (i.e. "unbounded integer")

* fix test

* remove long handling

* Revert "remove long handling"

This reverts commit 33ade8d.

* Revert "update avro type mappings"

This reverts commit 5b0349b.

* Revert "delete long type for now, treat all ints as longs"

This reverts commit 018efd4.

* Revert "update avro for consistency"

This reverts commit bcf47c6.

* newline@eof

* update test

* slightly better local tests

* fix test

* missed a few cases

* postgres tests use correct hostnames

* fix normalization

* fix int macro

* add test case

* normalization test output

* handle int/long correctly

* fix types for other DBs

* uint32 -> bigint; tests

* add type value assertions

* more test updates

* regenerate output

* reconcile big_integer to match docs

* update comment

* fix type

* fix mysql constructor call

* bigint only has 38 digits

* fix s3 ints, fix DAT test case

* big_integer should be string

* reduce to 28 digit big_ints

* fix test setup, mysql

* kill big_integer tests

* regenerate output

* version bumps

* auto-bump connector version [ci skip]

* auto-bump connector version [ci skip]

* auto-bump connector version [ci skip]

* auto-bump connector version [ci skip]

Co-authored-by: Octavia Squidington III <[email protected]>
  • Loading branch information
edgao and octavia-squidington-iii authored Jul 26, 2022
1 parent d465461 commit b2dd470
Show file tree
Hide file tree
Showing 143 changed files with 1,242 additions and 610 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@
- name: BigQuery
sourceDefinitionId: bfd1ddf8-ae8a-4620-b1d7-55597d2ba08c
dockerRepository: airbyte/source-bigquery
dockerImageTag: 0.1.9
dockerImageTag: 0.2.0
documentationUrl: https://docs.airbyte.io/integrations/sources/bigquery
icon: bigquery.svg
sourceType: database
Expand Down Expand Up @@ -604,7 +604,7 @@
- name: MySQL
sourceDefinitionId: 435bb9a5-7887-4809-aa58-28c27df0d7ad
dockerRepository: airbyte/source-mysql
dockerImageTag: 0.5.17
dockerImageTag: 0.6.0
documentationUrl: https://docs.airbyte.io/integrations/sources/mysql
icon: mysql.svg
sourceType: database
Expand Down Expand Up @@ -754,7 +754,7 @@
- name: Postgres
sourceDefinitionId: decd338e-5647-4c0b-adf4-da0e75f5a750
dockerRepository: airbyte/source-postgres
dockerImageTag: 0.4.37
dockerImageTag: 0.4.38
documentationUrl: https://docs.airbyte.io/integrations/sources/postgres
icon: postgresql.svg
sourceType: database
Expand Down Expand Up @@ -961,7 +961,7 @@
- name: TiDB
sourceDefinitionId: 0dad1a35-ccf8-4d03-b73e-6788c00b13ae
dockerRepository: airbyte/source-tidb
dockerImageTag: 0.1.5
dockerImageTag: 0.2.0
documentationUrl: https://docs.airbyte.io/integrations/sources/tidb
icon: tidb.svg
sourceType: database
Expand Down
8 changes: 4 additions & 4 deletions airbyte-config/init/src/main/resources/seed/source_specs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -829,7 +829,7 @@
supportsNormalization: false
supportsDBT: false
supported_destination_sync_modes: []
- dockerImage: "airbyte/source-bigquery:0.1.9"
- dockerImage: "airbyte/source-bigquery:0.2.0"
spec:
documentationUrl: "https://docs.airbyte.io/integrations/sources/bigquery"
connectionSpecification:
Expand Down Expand Up @@ -5755,7 +5755,7 @@
supportsNormalization: false
supportsDBT: false
supported_destination_sync_modes: []
- dockerImage: "airbyte/source-mysql:0.5.17"
- dockerImage: "airbyte/source-mysql:0.6.0"
spec:
documentationUrl: "https://docs.airbyte.io/integrations/sources/mysql"
connectionSpecification:
Expand Down Expand Up @@ -7040,7 +7040,7 @@
supportsNormalization: false
supportsDBT: false
supported_destination_sync_modes: []
- dockerImage: "airbyte/source-postgres:0.4.37"
- dockerImage: "airbyte/source-postgres:0.4.38"
spec:
documentationUrl: "https://docs.airbyte.com/integrations/sources/postgres"
connectionSpecification:
Expand Down Expand Up @@ -9343,7 +9343,7 @@
supportsNormalization: false
supportsDBT: false
supported_destination_sync_modes: []
- dockerImage: "airbyte/source-tidb:0.1.5"
- dockerImage: "airbyte/source-tidb:0.2.0"
spec:
documentationUrl: "https://docs.airbyte.io/integrations/sources/tidb"
connectionSpecification:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,8 @@ public Date getDateValue(final FieldValue fieldValue, final DateFormat dateForma
public JsonSchemaType getJsonType(final StandardSQLTypeName bigQueryType) {
return switch (bigQueryType) {
case BOOL -> JsonSchemaType.BOOLEAN;
case INT64, FLOAT64, NUMERIC, BIGNUMERIC -> JsonSchemaType.NUMBER;
case INT64 -> JsonSchemaType.INTEGER;
case FLOAT64, NUMERIC, BIGNUMERIC -> JsonSchemaType.NUMBER;
case STRING, BYTES, TIMESTAMP, DATE, TIME, DATETIME -> JsonSchemaType.STRING;
case ARRAY -> JsonSchemaType.ARRAY;
case STRUCT -> JsonSchemaType.OBJECT;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,12 @@ public JDBCType getFieldType(final JsonNode field) {
}

@Override
public JsonSchemaType getJsonType(JDBCType jdbcType) {
public JsonSchemaType getJsonType(final JDBCType jdbcType) {
return switch (jdbcType) {
case BIT, BOOLEAN -> JsonSchemaType.BOOLEAN;
case TINYINT, SMALLINT -> JsonSchemaType.NUMBER;
case INTEGER -> JsonSchemaType.NUMBER;
case BIGINT -> JsonSchemaType.NUMBER;
case TINYINT, SMALLINT -> JsonSchemaType.INTEGER;
case INTEGER -> JsonSchemaType.INTEGER;
case BIGINT -> JsonSchemaType.INTEGER;
case FLOAT, DOUBLE -> JsonSchemaType.NUMBER;
case REAL -> JsonSchemaType.NUMBER;
case NUMERIC, DECIMAL -> JsonSchemaType.NUMBER;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -301,9 +301,9 @@ private static void assertExpectedOutputTypes(final Connection connection) throw
final Map<String, JsonSchemaType> expected = ImmutableMap.<String, JsonSchemaType>builder()
.put("bit", JsonSchemaType.BOOLEAN)
.put("boolean", JsonSchemaType.BOOLEAN)
.put("smallint", JsonSchemaType.NUMBER)
.put("int", JsonSchemaType.NUMBER)
.put("bigint", JsonSchemaType.NUMBER)
.put("smallint", JsonSchemaType.INTEGER)
.put("int", JsonSchemaType.INTEGER)
.put("bigint", JsonSchemaType.INTEGER)
.put("float", JsonSchemaType.NUMBER)
.put("double", JsonSchemaType.NUMBER)
.put("real", JsonSchemaType.NUMBER)
Expand Down
4 changes: 2 additions & 2 deletions airbyte-integrations/bases/base-normalization/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,5 @@ WORKDIR /airbyte
ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh"
ENTRYPOINT ["/airbyte/entrypoint.sh"]

LABEL io.airbyte.version=0.2.11
LABEL io.airbyte.name=airbyte/normalization
LABEL io.airbyte.version=0.2.12
LABEL io.airbyte.name=airbyte/normalization
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@

{# int ------------------------------------------------- #}
{% macro default__type_int() %}
int
{% endmacro %}

{% macro mysql__type_int() %}
signed
{% endmacro %}

Expand Down Expand Up @@ -116,6 +120,48 @@
{% endmacro %}


{# very_large_integer --------------------------------------- --#}
{#
Most databases don't have a true unbounded numeric datatype, so we use a really big numeric field.
Our type terminology unfortunately collides with DB terminology (i.e. "big_integer" means different things in different contexts)
so this macro needs to be called very_large_integer.
#}
{%- macro type_very_large_integer() -%}
{{ adapter.dispatch('type_very_large_integer')() }}
{%- endmacro -%}
{% macro default__type_very_large_integer() %}
numeric
{% endmacro %}
{% macro snowflake__type_very_large_integer() %}
numeric
{% endmacro %}
{% macro mysql__type_very_large_integer() %}
decimal(38, 0)
{% endmacro %}
{% macro clickhouse__type_very_large_integer() %}
decimal128(0)
{% endmacro %}
{# timestamp ------------------------------------------------- --#}
{% macro mysql__type_timestamp() %}
time
{% endmacro %}
{%- macro sqlserver__type_timestamp() -%}
{#-- in TSQL timestamp is really datetime --#}
{#-- https://docs.microsoft.com/en-us/sql/t-sql/functions/date-and-time-data-types-and-functions-transact-sql?view=sql-server-ver15#DateandTimeDataTypes --#}
datetime
{%- endmacro -%}
{% macro clickhouse__type_timestamp() %}
DateTime64
{% endmacro %}
{# timestamp with time zone ------------------------------------------------- #}
{%- macro type_timestamp_with_timezone() -%}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,8 @@ vars:
multiple_column_names_conflicts_stg: test_normalization._airbyte_raw_multiple_column_names_conflicts
multiple_column_names_conflicts_scd: test_normalization._airbyte_raw_multiple_column_names_conflicts
multiple_column_names_conflicts: test_normalization._airbyte_raw_multiple_column_names_conflicts
types_testing_ab1: test_normalization._airbyte_raw_types_testing
types_testing_ab2: test_normalization._airbyte_raw_types_testing
types_testing_stg: test_normalization._airbyte_raw_types_testing
types_testing_scd: test_normalization._airbyte_raw_types_testing
types_testing: test_normalization._airbyte_raw_types_testing
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ sources:
- name: _airbyte_raw_multiple_column_names_conflicts
- name: _airbyte_raw_pos_dedup_cdcx
- name: _airbyte_raw_renamed_dedup_cdc_excluded
- name: _airbyte_raw_types_testing
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
name: airbyte_utils
version: "1.0"
version: '1.0'
config-version: 2
profile: normalize
model-paths:
- models
- models
docs-paths:
- docs
- docs
analysis-paths:
- analysis
- analysis
test-paths:
- tests
- tests
seed-paths:
- data
- data
macro-paths:
- macros
- macros
target-path: ../build
log-path: ../logs
packages-install-path: /dbt
clean-targets:
- build
- dbt_modules
- build
- dbt_modules
quoting:
database: true
schema: false
Expand All @@ -42,10 +42,10 @@ models:
+tags: airbyte_internal_views
+materialized: view
dispatch:
- macro_namespace: dbt_utils
search_order:
- airbyte_utils
- dbt_utils
- macro_namespace: dbt_utils
search_order:
- airbyte_utils
- dbt_utils
vars:
json_column: _airbyte_data
models_to_source:
Expand Down Expand Up @@ -83,3 +83,8 @@ vars:
multiple_column_names_conflicts_stg: test_normalization._airbyte_raw_multiple_column_names_conflicts
multiple_column_names_conflicts_scd: test_normalization._airbyte_raw_multiple_column_names_conflicts
multiple_column_names_conflicts: test_normalization._airbyte_raw_multiple_column_names_conflicts
types_testing_ab1: test_normalization._airbyte_raw_types_testing
types_testing_ab2: test_normalization._airbyte_raw_types_testing
types_testing_stg: test_normalization._airbyte_raw_types_testing
types_testing_scd: test_normalization._airbyte_raw_types_testing
types_testing: test_normalization._airbyte_raw_types_testing
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ sources:
- name: _airbyte_raw_multiple_column_names_conflicts
- name: _airbyte_raw_pos_dedup_cdcx
- name: _airbyte_raw_renamed_dedup_cdc_excluded
- name: _airbyte_raw_types_testing
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ vars:
unnest_alias_ab2: test_normalization._airbyte_raw_unnest_alias
unnest_alias_ab3: test_normalization._airbyte_raw_unnest_alias
unnest_alias: test_normalization._airbyte_raw_unnest_alias
arrays_ab1: test_normalization._airbyte_raw_arrays
arrays_ab2: test_normalization._airbyte_raw_arrays
arrays_ab3: test_normalization._airbyte_raw_arrays
arrays: test_normalization._airbyte_raw_arrays
nested_stream_with_co_2g_names_partition_ab1: test_normalization._airbyte_raw_nested_s__lting_into_long_names
nested_stream_with_co_2g_names_partition_ab2: test_normalization._airbyte_raw_nested_s__lting_into_long_names
nested_stream_with_co_2g_names_partition_ab3: test_normalization._airbyte_raw_nested_s__lting_into_long_names
Expand All @@ -91,6 +95,10 @@ vars:
unnest_alias_children_ab2: test_normalization._airbyte_raw_unnest_alias
unnest_alias_children_ab3: test_normalization._airbyte_raw_unnest_alias
unnest_alias_children: test_normalization._airbyte_raw_unnest_alias
arrays_nested_array_parent_ab1: test_normalization._airbyte_raw_arrays
arrays_nested_array_parent_ab2: test_normalization._airbyte_raw_arrays
arrays_nested_array_parent_ab3: test_normalization._airbyte_raw_arrays
arrays_nested_array_parent: test_normalization._airbyte_raw_arrays
nested_stream_with_co_3double_array_data_ab1: test_normalization._airbyte_raw_nested_s__lting_into_long_names
nested_stream_with_co_3double_array_data_ab2: test_normalization._airbyte_raw_nested_s__lting_into_long_names
nested_stream_with_co_3double_array_data_ab3: test_normalization._airbyte_raw_nested_s__lting_into_long_names
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ sources:
schema: false
identifier: false
tables:
- name: _airbyte_raw_arrays
- name: _airbyte_raw_conflict_stream_array
- name: _airbyte_raw_conflict_stream_name
- name: _airbyte_raw_conflict_stream_scalar
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,8 @@ vars:
multiple_column_names_conflicts_stg: test_normalization._airbyte_raw_multiple_column_names_conflicts
multiple_column_names_conflicts_scd: test_normalization._airbyte_raw_multiple_column_names_conflicts
multiple_column_names_conflicts: test_normalization._airbyte_raw_multiple_column_names_conflicts
types_testing_ab1: test_normalization._airbyte_raw_types_testing
types_testing_ab2: test_normalization._airbyte_raw_types_testing
types_testing_stg: test_normalization._airbyte_raw_types_testing
types_testing_scd: test_normalization._airbyte_raw_types_testing
types_testing: test_normalization._airbyte_raw_types_testing
Loading

0 comments on commit b2dd470

Please sign in to comment.