forked from dbt-labs/dbt-project-evaluator
-
Notifications
You must be signed in to change notification settings - Fork 0
/
dbt_project.yml
182 lines (163 loc) · 5.62 KB
/
dbt_project.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
name: 'dbt_project_evaluator'
version: '1.0.0'
config-version: 2
require-dbt-version: [">=1.6.0-rc1", "<2.0.0"]
model-paths: ["models"]
analysis-paths: ["analysis"]
test-paths: ["tests"]
seed-paths: ["seeds"]
macro-paths: ["macros"]
snapshot-paths: ["snapshots"]
target-path: "target" # directory which will store compiled SQL files
clean-targets: # directories to be removed by `dbt clean`
- "target"
- "dbt_packages"
dispatch:
- macro_namespace: dbt
search_order: ['dbt_project_evaluator', 'dbt']
models:
dbt_project_evaluator:
+schema: dbt_project_evaluator
+materialized: "{{ 'table' if target.type in ['duckdb'] else 'view' }}"
marts:
core:
int_all_graph_resources:
+materialized: table
int_direct_relationships:
# required for BigQuery and Redshift for performance/memory reasons
+materialized: "{{ 'table' if target.type in ['bigquery', 'redshift', 'databricks'] else 'view' }}"
int_all_dag_relationships:
# required for BigQuery, Redshift, and Databricks for performance/memory reasons
+materialized: "{{ 'table' if target.type in ['bigquery', 'redshift', 'databricks'] else 'view' }}"
# Disable entire coverage suite
# Tests need to be enabled 1 by 1 or for the entire suite
# As of first half 2024 only a handful will be covered at the NYT
dag:
+materialized: table
fct_staging_dependent_on_staging:
+enabled: false
fct_source_fanout:
+enabled: false
fct_rejoining_of_upstream_concepts:
+enabled: false
fct_model_fanout:
+enabled: false
fct_marts_or_intermediate_dependent_on_source:
+enabled: false
fct_direct_join_to_source:
+enabled: false
fct_duplicate_sources:
+enabled: true
fct_hard_coded_references:
+enabled: true
fct_multiple_sources_joined:
+enabled: false
fct_root_models:
+enabled: false
fct_staging_dependent_on_marts_or_intermediate:
+enabled: false
fct_unused_sources:
+enabled: false
fct_too_many_joins:
+enabled: false
tests:
fct_missing_primary_key_tests:
+enabled: true
fct_test_coverage:
+enabled: false
documentation:
fct_undocumented_models:
+enabled: true
fct_documentation_coverage:
+enabled: false
fct_undocumented_source_tables:
+enabled: true
fct_undocumented_sources:
+enabled: true
structure:
fct_model_naming_conventions:
+enabled: false
fct_source_directories:
+enabled: true
fct_model_directories:
+enabled: true
fct_test_directories:
+enabled: true
performance:
fct_chained_views_dependencies:
+enabled: false
fct_exposure_parents_materializations:
+enabled: true
governance:
fct_public_models_without_contracts:
+enabled: true
fct_exposures_dependent_on_private_models:
+enabled: true
fct_undocumented_public_models:
+enabled: true
staging:
graph:
stg_node_relationships:
+materialized: table
variables:
stg_naming_convention_folders:
# required for Redshift because listagg runs only on tables
+materialized: "{{ 'table' if target.type == 'redshift' else 'view' }}"
stg_naming_convention_prefixes:
# required for Redshift because listagg runs only on tables
+materialized: "{{ 'table' if target.type == 'redshift' else 'view' }}"
tests:
# Override so downstream packages use dbt_project_evaluator
+schema: dbt_project_evaluator
vars:
exclude_packages: ["dig_dbt_sources"]
# -- Tests and docs coverage variables --
documentation_coverage_target: 100
test_coverage_target: 100
primary_key_test_macros: [["dbt.test_unique", "dbt.test_not_null"], ["dbt_utils.test_unique_combination_of_columns"]]
# -- Graph variables --
# node types to test for primary key coverage. acceptable node types: model, source, snapshot, seed
enforced_primary_key_node_types: ["model"]
# -- DAG variables --
models_fanout_threshold: 3
too_many_joins_threshold: 7
# -- Naming conventions variables --
# to add a new "model type", update the variable model_types
# and create new variables with the names <model_type>_folder_name and/or <model_type>_prefixes
model_types: ['base', 'staging', 'intermediate', 'marts', 'other']
base_folder_name: 'base'
staging_folder_name: 'staging'
intermediate_folder_name: 'intermediate'
marts_folder_name: 'marts'
base_prefixes: ['base_']
staging_prefixes: ['stg_']
intermediate_prefixes: ['int_']
marts_prefixes: ['fct_', 'dim_']
other_prefixes: ['rpt_']
# -- Performance variables --
chained_views_threshold: "{{ 5 if target.type not in ['athena', 'trino'] else 4 }}"
# -- Execution variables --
insert_batch_size: "{{ 500 if target.type in ['athena', 'bigquery'] else 10000 }}"
max_depth_dag: "{{ 9 if target.type in ['bigquery', 'spark', 'databricks'] else 4 if target.type in ['athena', 'trino'] else -1 }}"
# -- Code complexity variables --
comment_chars: ["--"]
token_costs: {
"and": 0.1,
"or": 0.1,
"when": 0.5,
"coalesce": 1,
"distinct": 1,
"greatest": 1,
"least": 1,
"group": 1,
"join": 1,
"order": 1,
"select": 1,
"where": 1,
"having": 2,
"flatten": 3,
"unnest": 3,
"pivot": 3,
"partition by": 3,
"qualify": 3,
}