dbt_project.yml

name: 'dbt_project_evaluator'
version: '1.0.0'
config-version: 2

require-dbt-version: [">=1.6.0-rc1", "<2.0.0"]

model-paths: ["models"]
analysis-paths: ["analysis"]
test-paths: ["tests"]
seed-paths: ["seeds"]
macro-paths: ["macros"]
snapshot-paths: ["snapshots"]

target-path: "target"  # directory which will store compiled SQL files
clean-targets:         # directories to be removed by `dbt clean`
  - "target"
  - "dbt_packages"


dispatch:
  - macro_namespace: dbt
    search_order: ['dbt_project_evaluator', 'dbt']

models:
  dbt_project_evaluator:
    +schema: dbt_project_evaluator
    +materialized: "{{ 'table' if target.type in ['duckdb'] else 'view' }}"
    marts:
      core:
        int_all_graph_resources:
          +materialized: table   
        int_direct_relationships:
          # required for BigQuery and Redshift for performance/memory reasons
          +materialized: "{{ 'table' if target.type in ['bigquery', 'redshift', 'databricks'] else 'view' }}"
        int_all_dag_relationships:
          # required for BigQuery, Redshift, and Databricks for performance/memory reasons
          +materialized: "{{ 'table' if target.type in ['bigquery', 'redshift', 'databricks'] else 'view' }}"
    # Disable entire coverage suite
    # Tests need to be enabled 1 by 1 or for the entire suite
    # As of first half 2024 only a handful will be covered at the NYT
      dag:
        +materialized: table
        fct_staging_dependent_on_staging: 
          +enabled: false
        fct_source_fanout: 
          +enabled: false
        fct_rejoining_of_upstream_concepts: 
          +enabled: false
        fct_model_fanout: 
          +enabled: false
        fct_marts_or_intermediate_dependent_on_source: 
          +enabled: false
        fct_direct_join_to_source: 
          +enabled: false
        fct_duplicate_sources: 
          +enabled: true
        fct_hard_coded_references: 
          +enabled: true
        fct_multiple_sources_joined: 
          +enabled: false
        fct_root_models: 
          +enabled: false
        fct_staging_dependent_on_marts_or_intermediate: 
          +enabled: false
        fct_unused_sources: 
          +enabled: false
        fct_too_many_joins: 
          +enabled: false
      tests:
        fct_missing_primary_key_tests: 
          +enabled: true
        fct_test_coverage: 
          +enabled: false
      documentation:
        fct_undocumented_models: 
          +enabled: true
        fct_documentation_coverage: 
          +enabled: false
        fct_undocumented_source_tables: 
          +enabled: true
        fct_undocumented_sources:  
          +enabled: true
      structure:
        fct_model_naming_conventions:  
          +enabled: false
        fct_source_directories:  
          +enabled: true
        fct_model_directories:  
          +enabled: true
        fct_test_directories:  
          +enabled: true
      performance:
        fct_chained_views_dependencies:  
          +enabled: false
        fct_exposure_parents_materializations:  
          +enabled: true
      governance:
        fct_public_models_without_contracts:  
          +enabled: true
        fct_exposures_dependent_on_private_models:  
          +enabled: true
        fct_undocumented_public_models:  
          +enabled: true

    staging:
      graph:
        stg_node_relationships:
          +materialized: table
      variables:
        stg_naming_convention_folders:
          # required for Redshift because listagg runs only on tables
          +materialized: "{{ 'table' if target.type == 'redshift' else 'view' }}"
        stg_naming_convention_prefixes:
          # required for Redshift because listagg runs only on tables
          +materialized: "{{ 'table' if target.type == 'redshift' else 'view' }}"

tests:
  # Override so downstream packages use dbt_project_evaluator
  +schema: dbt_project_evaluator

vars:
  exclude_packages: ["dig_dbt_sources"]
  
  # -- Tests and docs coverage variables --
  documentation_coverage_target: 100
  test_coverage_target: 100

  primary_key_test_macros: [["dbt.test_unique", "dbt.test_not_null"], ["dbt_utils.test_unique_combination_of_columns"]]

  # -- Graph variables --
  # node types to test for primary key coverage. acceptable node types: model, source, snapshot, seed
  enforced_primary_key_node_types: ["model"]

  # -- DAG variables --
  models_fanout_threshold: 3
  too_many_joins_threshold: 7

  # -- Naming conventions variables --
  # to add a new "model type", update the variable model_types 
  # and create new variables with the names <model_type>_folder_name and/or <model_type>_prefixes 
  model_types: ['base', 'staging', 'intermediate', 'marts', 'other']

  base_folder_name: 'base'
  staging_folder_name: 'staging'
  intermediate_folder_name: 'intermediate'
  marts_folder_name: 'marts'
  
  base_prefixes: ['base_']
  staging_prefixes: ['stg_']
  intermediate_prefixes: ['int_']
  marts_prefixes: ['fct_', 'dim_']
  other_prefixes: ['rpt_']

  # -- Performance variables --
  chained_views_threshold: "{{ 5 if target.type not in ['athena', 'trino'] else 4 }}"

  # -- Execution variables --
  insert_batch_size: "{{ 500 if target.type in ['athena', 'bigquery'] else 10000 }}"
  max_depth_dag: "{{ 9 if target.type in ['bigquery', 'spark', 'databricks'] else 4 if target.type in ['athena', 'trino'] else -1 }}"

  # -- Code complexity variables --
  comment_chars: ["--"]
  token_costs: {
    "and": 0.1,
    "or": 0.1,
    "when": 0.5,
    "coalesce": 1,
    "distinct": 1,
    "greatest": 1,
    "least": 1,
    "group": 1,
    "join": 1,
    "order": 1,
    "select": 1,
    "where": 1,
    "having": 2,
    "flatten": 3,
    "unnest": 3,
    "pivot": 3,
    "partition by": 3,
    "qualify": 3,
    }