From bf39198b2dc5f338102c296b564d6e6f8e63d8f5 Mon Sep 17 00:00:00 2001 From: Charlie Bini <5003326+cbini@users.noreply.github.com> Date: Thu, 14 Sep 2023 19:55:45 +0000 Subject: [PATCH 1/2] rework partitions --- src/dbt/kippmiami/dbt_project.yml | 3 + src/dbt/kippmiami/models/fldoe/sources.yml | 15 ++ .../models/fldoe/staging/stg_fldoe__fast.sql | 152 ++++++++++++++++++ src/teamster/core/fivetran/assets.py | 8 +- .../kippmiami/fldoe/config/assets.yaml | 28 ++-- tests/fldoe/test_fldoe_resource.py | 22 ++- 6 files changed, 210 insertions(+), 18 deletions(-) create mode 100644 src/dbt/kippmiami/models/fldoe/sources.yml create mode 100644 src/dbt/kippmiami/models/fldoe/staging/stg_fldoe__fast.sql diff --git a/src/dbt/kippmiami/dbt_project.yml b/src/dbt/kippmiami/dbt_project.yml index 27c2c5950e..aad28e8116 100644 --- a/src/dbt/kippmiami/dbt_project.yml +++ b/src/dbt/kippmiami/dbt_project.yml @@ -36,6 +36,9 @@ models: extracts: +schema: extracts +materialized: table + fldoe: + +schema: fldoe + +materialized: table deanslist: +schema: deanslist iready: diff --git a/src/dbt/kippmiami/models/fldoe/sources.yml b/src/dbt/kippmiami/models/fldoe/sources.yml new file mode 100644 index 0000000000..8d80d228a1 --- /dev/null +++ b/src/dbt/kippmiami/models/fldoe/sources.yml @@ -0,0 +1,15 @@ +version: 2 + +sources: + - name: fldoe + schema: kippmiami_fldoe + tags: + - stage_external_sources + tables: + - name: src_fldoe__fast + external: + location: "{{ var('cloud_storage_uri_base') }}/fldoe/fast/*" + options: + hive_partition_uri_prefix: "{{ var('cloud_storage_uri_base') }}/fldoe/fast/" + format: AVRO + enable_logical_types: true diff --git a/src/dbt/kippmiami/models/fldoe/staging/stg_fldoe__fast.sql b/src/dbt/kippmiami/models/fldoe/staging/stg_fldoe__fast.sql new file mode 100644 index 0000000000..21c5a7ab05 --- /dev/null +++ b/src/dbt/kippmiami/models/fldoe/staging/stg_fldoe__fast.sql @@ -0,0 +1,152 @@ +select + _dagster_partition_school_year, + _dagster_partition_term, + student_id, + local_id, + student_name, + test_reason, + test_oppnumber, + primary_exceptionality, + section_504, + english_language_learner_ell_status, + ethnicity, + gender_k_12, + gender_postsecondary_only, + enrolled_district, + enrolled_grade, + enrolled_school, + testing_location, + safe_cast(date_taken as date) as date_taken, + safe_cast(test_completion_date as date) as test_completion_date, + {# student_dob, #} + coalesce( + fast_grade_3_ela_reading_achievement_level, + grade_3_fast_ela_reading_achievement_level, + fast_grade_4_ela_reading_achievement_level, + grade_4_fast_ela_reading_achievement_level, + fast_grade_5_ela_reading_achievement_level, + grade_5_fast_ela_reading_achievement_level, + fast_grade_6_ela_reading_achievement_level, + grade_6_fast_ela_reading_achievement_level, + fast_grade_7_ela_reading_achievement_level, + grade_7_fast_ela_reading_achievement_level, + fast_grade_8_ela_reading_achievement_level, + grade_8_fast_ela_reading_achievement_level + ) as ela_reading_achievement_level, + + coalesce( + fast_grade_3_ela_reading_percentile_rank, + grade_3_fast_ela_reading_percentile_rank, + fast_grade_4_ela_reading_percentile_rank, + grade_4_fast_ela_reading_percentile_rank, + fast_grade_5_ela_reading_percentile_rank, + grade_5_fast_ela_reading_percentile_rank, + fast_grade_6_ela_reading_percentile_rank, + grade_6_fast_ela_reading_percentile_rank, + fast_grade_7_ela_reading_percentile_rank, + grade_7_fast_ela_reading_percentile_rank, + fast_grade_8_ela_reading_percentile_rank, + grade_8_fast_ela_reading_percentile_rank + ) as ela_reading_percentile_rank, + + coalesce( + fast_grade_3_ela_reading_scale_score, + grade_3_fast_ela_reading_scale_score.long_value, + safe_cast(grade_3_fast_ela_reading_scale_score.string_value as int), + fast_grade_4_ela_reading_scale_score, + grade_4_fast_ela_reading_scale_score, + fast_grade_5_ela_reading_scale_score, + grade_5_fast_ela_reading_scale_score, + fast_grade_6_ela_reading_scale_score, + grade_6_fast_ela_reading_scale_score, + fast_grade_7_ela_reading_scale_score, + grade_7_fast_ela_reading_scale_score, + fast_grade_8_ela_reading_scale_score, + grade_8_fast_ela_reading_scale_score + ) as ela_reading_scale_score, + + coalesce( + fast_grade_3_mathematics_achievement_level, + grade_3_fast_mathematics_achievement_level, + fast_grade_4_mathematics_achievement_level, + grade_4_fast_mathematics_achievement_level, + fast_grade_5_mathematics_achievement_level, + grade_5_fast_mathematics_achievement_level, + fast_grade_6_mathematics_achievement_level, + grade_6_fast_mathematics_achievement_level, + fast_grade_7_mathematics_achievement_level, + grade_7_fast_mathematics_achievement_level, + fast_grade_8_mathematics_achievement_level, + grade_8_fast_mathematics_achievement_level + ) as mathematics_achievement_level, + + coalesce( + fast_grade_3_mathematics_percentile_rank, + grade_3_fast_mathematics_percentile_rank, + fast_grade_4_mathematics_percentile_rank, + grade_4_fast_mathematics_percentile_rank, + fast_grade_5_mathematics_percentile_rank, + grade_5_fast_mathematics_percentile_rank, + fast_grade_6_mathematics_percentile_rank, + grade_6_fast_mathematics_percentile_rank, + fast_grade_7_mathematics_percentile_rank, + grade_7_fast_mathematics_percentile_rank, + fast_grade_8_mathematics_percentile_rank, + grade_8_fast_mathematics_percentile_rank + ) as mathematics_percentile_rank, + + coalesce( + safe_cast(fast_grade_3_mathematics_scale_score as int), + grade_3_fast_mathematics_scale_score.long_value, + safe_cast(grade_3_fast_mathematics_scale_score.string_value as int), + fast_grade_4_mathematics_scale_score.long_value, + safe_cast(fast_grade_4_mathematics_scale_score.string_value as int), + grade_4_fast_mathematics_scale_score.long_value, + safe_cast(grade_4_fast_mathematics_scale_score.string_value as int), + fast_grade_5_mathematics_scale_score.long_value, + safe_cast(fast_grade_5_mathematics_scale_score.string_value as int), + grade_5_fast_mathematics_scale_score.long_value, + safe_cast(grade_5_fast_mathematics_scale_score.string_value as int), + fast_grade_6_mathematics_scale_score.long_value, + safe_cast(fast_grade_6_mathematics_scale_score.string_value as int), + grade_6_fast_mathematics_scale_score.long_value, + safe_cast(grade_6_fast_mathematics_scale_score.string_value as int), + fast_grade_7_mathematics_scale_score.long_value, + safe_cast(fast_grade_7_mathematics_scale_score.string_value as int), + grade_7_fast_mathematics_scale_score.long_value, + safe_cast(grade_7_fast_mathematics_scale_score.string_value as int), + fast_grade_8_mathematics_scale_score.long_value, + safe_cast(fast_grade_8_mathematics_scale_score.string_value as int), + grade_8_fast_mathematics_scale_score.long_value, + safe_cast(grade_8_fast_mathematics_scale_score.string_value as int) + ) as mathematics_scale_score, + + `1_reading_prose_and_poetry_performance`, + `2_reading_informational_text_performance`, + `3_reading_across_genres_vocabulary_performance`, + + `1_number_sense_and_additive_reasoning_performance`, + `1_number_sense_and_operations_and_algebraic_reasoning_performance`, + `1_number_sense_and_operations_and_probability_performance`, + `1_number_sense_and_operations_performance`, + `1_number_sense_and_operations_with_whole_numbers_performance`, + `2_number_sense_and_multiplicative_reasoning_performance`, + `2_number_sense_and_operations_with_fractions_and_decimals_performance`, + + `2_algebraic_reasoning_performance`, + `3_algebraic_reasoning_performance`, + + `2_proportional_reasoning_and_relationships_performance`, + + `3_fractional_reasoning_performance`, + + `3_geometric_reasoning_data_analysis_and_probability_performance`, + `3_geometric_reasoning_measurement_and_data_analysis_and_probability_performance`, + `3_geometric_reasoning_performance`, + `4_geometric_reasoning_measurement_and_data_analysis_and_probability_performance`, + `4_geometric_reasoning_performance`, + + `3_linear_relationships_data_analysis_and_functions_performance`, + + `4_data_analysis_and_probability_performance`, +from {{ source('fldoe', 'src_fldoe__fast') }} diff --git a/src/teamster/core/fivetran/assets.py b/src/teamster/core/fivetran/assets.py index cb79a98c04..f552679185 100644 --- a/src/teamster/core/fivetran/assets.py +++ b/src/teamster/core/fivetran/assets.py @@ -16,9 +16,9 @@ def generate_materializations( - tracked_asset_keys: list[AssetKey], + asset_keys: list[AssetKey], ) -> Iterator[AssetMaterialization]: - for asset_key in tracked_asset_keys: + for asset_key in asset_keys: yield AssetMaterialization( asset_key=asset_key, description="Table generated via Fivetran sync" ) @@ -79,7 +79,7 @@ def _assets(context: OpExecutionContext) -> Any: output_name="_".join(materialization.asset_key.path), metadata=materialization.metadata, ) - # else: - # yield materialization + else: + yield materialization return [_assets] diff --git a/src/teamster/kippmiami/fldoe/config/assets.yaml b/src/teamster/kippmiami/fldoe/config/assets.yaml index 269e0f5ea0..f42fd6b979 100644 --- a/src/teamster/kippmiami/fldoe/config/assets.yaml +++ b/src/teamster/kippmiami/fldoe/config/assets.yaml @@ -1,12 +1,22 @@ assets: - asset_name: fast - remote_filepath: /teamster-kippmiami/couchdrop/fldoe/fast/(?P\d+)/(?PPM\d) - remote_file_regex: .*\.csv + remote_filepath: /teamster-kippmiami/couchdrop/fldoe/fast/(?P) + remote_file_regex: .*(?P).*\.csv partition_keys: - school_year: - - "2023" - - "2022" - term: - - PM1 - - PM2 - - PM3 + school_year_term: + - 2022/PM1 + - 2022/PM2 + - 2022/PM3 + grade_level_subject: + - 3\w*ELAReading + - 3\w*Mathematics + - 4\w*ELAReading + - 4\w*Mathematics + - 5\w*ELAReading + - 5\w*Mathematics + - 6\w*ELAReading + - 6\w*Mathematics + - 7\w*ELAReading + - 7\w*Mathematics + - 8\w*ELAReading + - 8\w*Mathematics diff --git a/tests/fldoe/test_fldoe_resource.py b/tests/fldoe/test_fldoe_resource.py index d3e398abb8..42ffc0ffaf 100644 --- a/tests/fldoe/test_fldoe_resource.py +++ b/tests/fldoe/test_fldoe_resource.py @@ -39,16 +39,28 @@ def _test_resource(remote_filepath, remote_file_regex): def test_fast(): - remote_filepath = ( - r"/teamster-kippmiami/couchdrop/fldoe/fast/(?P\d+)/(?PPM\d)" - ) + remote_filepath = r"/teamster-kippmiami/couchdrop/fldoe/fast/(?P)" + remote_file_regex = r"(?P)\.csv" remote_filepath_regex_composed = regex_pattern_replace( - pattern=remote_filepath, replacements={"school_year": "2022", "term": "PM3"} + pattern=remote_filepath, + replacements={ + "school_year_term": "2022/PM1", + "grade_level_subject": "3\w*ELAReading", + }, + ) + + remote_file_regex_composed = regex_pattern_replace( + pattern=remote_file_regex, + replacements={ + "school_year_term": "2022/PM1", + "grade_level_subject": "3\w*ELAReading", + }, ) print(remote_filepath_regex_composed) _test_resource( - remote_filepath=remote_filepath_regex_composed, remote_file_regex=r".*\.csv" + remote_filepath=remote_filepath_regex_composed, + remote_file_regex=remote_file_regex_composed, ) From bfe08226f38d536f4d77a774c7ed5a6f96f19ba9 Mon Sep 17 00:00:00 2001 From: Charlie Bini <5003326+cbini@users.noreply.github.com> Date: Thu, 14 Sep 2023 20:39:42 +0000 Subject: [PATCH 2/2] fix regex --- src/teamster/kippmiami/fldoe/assets.py | 8 +++++--- tests/fldoe/test_fldoe_resource.py | 19 +++++++------------ 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/src/teamster/kippmiami/fldoe/assets.py b/src/teamster/kippmiami/fldoe/assets.py index 08ccc0b6b0..b6839d1825 100644 --- a/src/teamster/kippmiami/fldoe/assets.py +++ b/src/teamster/kippmiami/fldoe/assets.py @@ -19,10 +19,12 @@ ssh_resource_key="ssh_couchdrop", partitions_def=MultiPartitionsDefinition( { - "school_year": StaticPartitionsDefinition( - a["partition_keys"]["school_year"] + "school_year_term": StaticPartitionsDefinition( + a["partition_keys"]["school_year_term"] + ), + "grade_level_subject": StaticPartitionsDefinition( + a["partition_keys"]["grade_level_subject"] ), - "term": StaticPartitionsDefinition(a["partition_keys"]["term"]), } ), **a, diff --git a/tests/fldoe/test_fldoe_resource.py b/tests/fldoe/test_fldoe_resource.py index 42ffc0ffaf..fe5bb6d69f 100644 --- a/tests/fldoe/test_fldoe_resource.py +++ b/tests/fldoe/test_fldoe_resource.py @@ -39,23 +39,18 @@ def _test_resource(remote_filepath, remote_file_regex): def test_fast(): - remote_filepath = r"/teamster-kippmiami/couchdrop/fldoe/fast/(?P)" - remote_file_regex = r"(?P)\.csv" + partition_keys = { + "school_year_term": "2022/PM2", + "grade_level_subject": "3\w*ELAReading", + } remote_filepath_regex_composed = regex_pattern_replace( - pattern=remote_filepath, - replacements={ - "school_year_term": "2022/PM1", - "grade_level_subject": "3\w*ELAReading", - }, + pattern=r"/teamster-kippmiami/couchdrop/fldoe/fast/(?P)", + replacements=partition_keys, ) remote_file_regex_composed = regex_pattern_replace( - pattern=remote_file_regex, - replacements={ - "school_year_term": "2022/PM1", - "grade_level_subject": "3\w*ELAReading", - }, + pattern=r".*(?P).*\.csv", replacements=partition_keys ) print(remote_filepath_regex_composed)