diff --git a/dev/404.html b/dev/404.html deleted file mode 100644 index 78e5f544..00000000 --- a/dev/404.html +++ /dev/null @@ -1,958 +0,0 @@ - - - -
- - - - - - - - - - - - - - - -brickflow_plugins.airflow.operators.external_tasks.MapDagSchedule
-
-
-¶get_schedule(wf_id: str, **args: str)
-
-¶Function that the sensors defined while deriving this class should -override.
- - -get_task_run_status(wf_id: str, task_id: str, run_date: str = None, cluster_id: str = None, **args: str)
-
-¶Function that the sensors defined while deriving this class should -override.
- -brickflow_plugins/airflow/operators/external_tasks.py
brickflow_plugins.airflow.operators.external_tasks.MapDagScheduleHelper(okta_conn_id: str)
-
-¶
- Bases: MapDagSchedule
brickflow_plugins/airflow/operators/external_tasks.py
get_access_token() -> str
-
-¶brickflow_plugins/airflow/operators/external_tasks.py
get_airflow_api_url(cluster_id: str) -> str
-
-¶get_okta_client_id() -> str
-
-¶get_okta_client_secret() -> str
-
-¶get_okta_url() -> str
-
-¶get_schedule(wf_id: str, **kwargs: str)
-
-¶get_task_run_status(wf_id: str, task_id: str, run_date: str = None, cluster_id: str = None, **args: str)
-
-¶brickflow_plugins/airflow/operators/external_tasks.py
get_version(cluster_id: str) -> str
-
-¶brickflow_plugins/airflow/operators/external_tasks.py
brickflow_plugins.airflow.operators.external_tasks.TaskDependencySensor(external_dag_id, external_task_id, okta_conn_id, allowed_states = None, execution_delta = None, execution_delta_json = None, cluster_id = None, *args, **kwargs)
-
-¶
- Bases: BaseSensorOperator
brickflow_plugins/airflow/operators/external_tasks.py
allowed_states = allowed_states
-
-
- instance-attribute
-
-
-¶cluster_id = cluster_id
-
-
- instance-attribute
-
-
-¶dbx_wf_id = kwargs.get('dbx_wf_id')
-
-
- instance-attribute
-
-
-¶execution_delta = execution_delta
-
-
- instance-attribute
-
-
-¶execution_delta_json = execution_delta_json
-
-
- instance-attribute
-
-
-¶external_dag_id = external_dag_id
-
-
- instance-attribute
-
-
-¶external_task_id = external_task_id
-
-
- instance-attribute
-
-
-¶okta_conn_id = okta_conn_id
-
-
- instance-attribute
-
-
-¶poke(context)
-
-¶brickflow_plugins/airflow/operators/external_tasks.py
brickflow_plugins.airflow.operators.native_operators.BashOperatorModifier
-
-
-¶brickflow_plugins.airflow.operators.native_operators.BranchPythonOperatorModifier
-
-
-¶brickflow_plugins.airflow.operators.native_operators.ShortCircuitOperatorModifier
-
-
-¶brickflow.cli.CdktfCmd
-
-
-¶
- Bases: click.Group
get_command(ctx: click.Context, cmd_name: str) -> Optional[click.Command]
-
-¶brickflow/cli/__init__.py
brickflow.cli.bundle() -> None
-
-¶CLI for proxying to databricks bundles cli.
- - -brickflow.cli.bundles_proxy_command() -> click.Command
-
-¶brickflow/cli/__init__.py
brickflow.cli.cdktf() -> None
-
-¶CLI for proxying to cdktf cli.
- - -brickflow.cli.cdktf_command(base_command: Optional[str] = None) -> click.Command
-
-¶brickflow/cli/__init__.py
brickflow.cli.cdktf_env_set_options(f: Callable) -> Callable
-
-¶brickflow/cli/__init__.py
123 -124 -125 -126 -127 -128 -129 -130 -131 -132 -133 -134 -135 -136 -137 -138 -139 -140 -141 -142 -143 -144 -145 -146 -147 -148 -149 -150 -151 -152 -153 -154 -155 -156 -157 -158 -159 -160 -161 -162 -163 -164 -165 -166 -167 -168 -169 -170 -171 -172 -173 -174 -175 -176 -177 -178 -179 -180 -181 -182 -183 -184 -185 -186 -187 -188 -189 -190 -191 -192 -193 -194 -195 -196 -197 -198 -199 -200 -201 -202 -203 -204 -205 -206 -207 -208 -209 -210 -211 -212 -213 -214 -215 -216 -217 -218 -219 -220 -221 -222 |
|
brickflow.cli.cli() -> None
-
-¶CLI for managing Databricks Workflows
- - -brickflow.cli.deploy(**kwargs: Any) -> None
-
-¶CLI for deploying workflow projects.
- -brickflow/cli/__init__.py
brickflow.cli.destroy(**kwargs: Any) -> None
-
-¶CLI for destroying workflow projects.
- -brickflow/cli/__init__.py
brickflow.cli.diff(**kwargs: Any) -> None
-
-¶CLI for identifying diff in projects (only cdktf supported).
- -brickflow/cli/__init__.py
brickflow.cli.disable_project_name_in_env() -> None
-
-¶brickflow/cli/__init__.py
brickflow.cli.docs() -> None
-
-¶Use to open docs in your browser...
- -brickflow/cli/__init__.py
brickflow.cli.get_cdktf_specific_args(**kwargs: Dict[str, Any]) -> List[str]
-
-¶brickflow.cli.get_deployment_mode(**kwargs: Dict[str, Any]) -> BrickflowDeployMode
-
-¶brickflow/cli/__init__.py
brickflow.cli.make_cdktf_json(**kwargs: Any) -> None
-
-¶brickflow/cli/__init__.py
brickflow.cli.sync(**kwargs: Any) -> None
-
-¶Synchronize your bundle tree to databricks workspace (only supported by bundle deployment mode).
- -brickflow/cli/__init__.py
brickflow.engine.compute.Cluster
-
-
-
- dataclass
-
-
-¶aws_attributes: Optional[Dict[str, Any]] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶custom_tags: Optional[Dict[str, str]] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶data_security_mode: str = DataSecurityMode.SINGLE_USER
-
-
- class-attribute
- instance-attribute
-
-
-¶dlt_auto_scale_mode: Optional[str] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶driver_instance_pool_id: Optional[str] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶driver_node_type_id: Optional[str] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶enable_elastic_disk: Optional[bool] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶existing_cluster_id: Optional[str] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶init_scripts: Optional[List[Dict[str, str]]] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶instance_pool_id: Optional[str] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶is_new_job_cluster: bool
-
-
- property
-
-
-¶job_task_field_dict: Dict[str, str]
-
-
- property
-
-
-¶max_workers: Optional[int] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶min_workers: Optional[int] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶name: str
-
-
- instance-attribute
-
-
-¶node_type_id: str
-
-
- instance-attribute
-
-
-¶num_workers: Optional[int] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶policy_id: Optional[str] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶runtime_engine: Optional[Literal['STANDARD', 'PHOTON']] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶spark_conf: Optional[Dict[str, str]] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶spark_env_vars: Optional[Dict[str, str]] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶spark_version: str
-
-
- instance-attribute
-
-
-¶__hash__() -> int
-
-¶__post_init__() -> None
-
-¶as_dict(is_dlt_cluster: bool = False, allowed_fields: Optional[List[str]] = None, remove_fields: Optional[List[str]] = None) -> Dict[str, Any]
-
-¶brickflow/engine/compute.py
autoscale(is_dlt_cluster: bool = False) -> Dict[str, Any]
-
-¶brickflow/engine/compute.py
cleanup(d: Dict[str, Any], allowed_fields: Optional[List[str]] = None, remove_fields: Optional[List[str]] = None) -> None
-
-
- staticmethod
-
-
-¶brickflow/engine/compute.py
from_existing_cluster(existing_cluster_id: str) -> 'Cluster'
-
-
- classmethod
-
-
-¶brickflow/engine/compute.py
validate() -> None
-
-¶brickflow/engine/compute.py
brickflow.engine.compute.Runtimes
-
-
-¶RUNTIME_10_4_X_AARCH64_PHOTON_SCALA2_12_LTS = '10.4.x-aarch64-photon-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_10_4_X_AARCH64_SCALA2_12_LTS = '10.4.x-aarch64-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_10_4_X_CPU_ML_SCALA2_12_LTS = '10.4.x-cpu-ml-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_10_4_X_GPU_ML_SCALA2_12_LTS = '10.4.x-gpu-ml-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_10_4_X_PHOTON_SCALA2_12_LTS = '10.4.x-photon-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_10_4_X_SCALA2_12_LTS = '10.4.x-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_10_5_X_AARCH64_PHOTON_SCALA2_12 = '10.5.x-aarch64-photon-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_10_5_X_AARCH64_SCALA2_12 = '10.5.x-aarch64-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_10_5_X_CPU_ML_SCALA2_12 = '10.5.x-cpu-ml-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_10_5_X_GPU_ML_SCALA2_12 = '10.5.x-gpu-ml-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_10_5_X_PHOTON_SCALA2_12 = '10.5.x-photon-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_10_5_X_SCALA2_12 = '10.5.x-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_11_0_X_AARCH64_PHOTON_SCALA2_12 = '11.0.x-aarch64-photon-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_11_0_X_AARCH64_SCALA2_12 = '11.0.x-aarch64-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_11_0_X_CPU_ML_SCALA2_12 = '11.0.x-cpu-ml-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_11_0_X_GPU_ML_SCALA2_12 = '11.0.x-gpu-ml-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_11_0_X_PHOTON_SCALA2_12 = '11.0.x-photon-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_11_0_X_SCALA2_12 = '11.0.x-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_11_1_X_AARCH64_PHOTON_SCALA2_12 = '11.1.x-aarch64-photon-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_11_1_X_AARCH64_SCALA2_12 = '11.1.x-aarch64-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_11_1_X_CPU_ML_SCALA2_12 = '11.1.x-cpu-ml-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_11_1_X_GPU_ML_SCALA2_12 = '11.1.x-gpu-ml-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_11_1_X_PHOTON_SCALA2_12 = '11.1.x-photon-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_11_1_X_SCALA2_12 = '11.1.x-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_11_2_X_AARCH64_PHOTON_SCALA2_12 = '11.2.x-aarch64-photon-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_11_2_X_AARCH64_SCALA2_12 = '11.2.x-aarch64-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_11_2_X_CPU_ML_SCALA2_12 = '11.2.x-cpu-ml-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_11_2_X_GPU_ML_SCALA2_12 = '11.2.x-gpu-ml-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_11_2_X_PHOTON_SCALA2_12 = '11.2.x-photon-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_11_2_X_SCALA2_12 = '11.2.x-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_11_3_X_AARCH64_PHOTON_SCALA2_12 = '11.3.x-aarch64-photon-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_11_3_X_AARCH64_SCALA2_12 = '11.3.x-aarch64-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_11_3_X_CPU_ML_SCALA2_12 = '11.3.x-cpu-ml-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_11_3_X_GPU_ML_SCALA2_12 = '11.3.x-gpu-ml-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_11_3_X_PHOTON_SCALA2_12 = '11.3.x-photon-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_11_3_X_SCALA2_12 = '11.3.x-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_7_3_X_CPU_ML_SCALA2_12_LTS = '7.3.x-cpu-ml-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_7_3_X_GPU_ML_SCALA2_12_LTS = '7.3.x-gpu-ml-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_7_3_X_HLS_SCALA2_12_LTS = '7.3.x-hls-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_7_3_X_SCALA2_12_LTS = '7.3.x-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_9_1_X_AARCH64_SCALA2_12_LTS = '9.1.x-aarch64-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_9_1_X_CPU_ML_SCALA2_12_LTS = '9.1.x-cpu-ml-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_9_1_X_GPU_ML_SCALA2_12_LTS = '9.1.x-gpu-ml-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_9_1_X_PHOTON_SCALA2_12_LTS = '9.1.x-photon-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶RUNTIME_9_1_X_SCALA2_12_LTS = '9.1.x-scala2.12'
-
-
- class-attribute
- instance-attribute
-
-
-¶brickflow.context.context.BRANCH_SKIP_EXCEPT = 'branch_skip_except'
-
-
- module-attribute
-
-
-¶brickflow.context.context.RETURN_VALUE_KEY = 'return_value'
-
-
- module-attribute
-
-
-¶brickflow.context.context.SKIP_EXCEPT_HACK = 'brickflow_hack_skip_all'
-
-
- module-attribute
-
-
-¶brickflow.context.context.T = TypeVar('T')
-
-
- module-attribute
-
-
-¶brickflow.context.context.ctx = Context()
-
-
- module-attribute
-
-
-¶brickflow.context.context.BrickflowBuiltInTaskVariables
-
-
-¶
- Bases: Enum
job_id = 'brickflow_job_id'
-
-
- class-attribute
- instance-attribute
-
-
-¶parent_run_id = 'brickflow_parent_run_id'
-
-
- class-attribute
- instance-attribute
-
-
-¶run_id = 'brickflow_run_id'
-
-
- class-attribute
- instance-attribute
-
-
-¶start_date = 'brickflow_start_date'
-
-
- class-attribute
- instance-attribute
-
-
-¶start_time = 'brickflow_start_time'
-
-
- class-attribute
- instance-attribute
-
-
-¶task_key = 'brickflow_task_key'
-
-
- class-attribute
- instance-attribute
-
-
-¶task_retry_count = 'brickflow_task_retry_count'
-
-
- class-attribute
- instance-attribute
-
-
-¶brickflow.context.context.BrickflowInternalVariables
-
-
-¶
- Bases: Enum
env = BrickflowEnvVars.BRICKFLOW_ENV.value.lower()
-
-
- class-attribute
- instance-attribute
-
-
-¶only_run_tasks = 'brickflow_internal_only_run_tasks'
-
-
- class-attribute
- instance-attribute
-
-
-¶task_id = 'brickflow_internal_task_name'
-
-
- class-attribute
- instance-attribute
-
-
-¶workflow_id = 'brickflow_internal_workflow_name'
-
-
- class-attribute
- instance-attribute
-
-
-¶workflow_prefix = 'brickflow_internal_workflow_prefix'
-
-
- class-attribute
- instance-attribute
-
-
-¶workflow_suffix = 'brickflow_internal_workflow_suffix'
-
-
- class-attribute
- instance-attribute
-
-
-¶brickflow.context.context.BrickflowTaskComs
-
-
-
- dataclass
-
-
-¶dbutils: Optional[Any] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶storage: Dict[str, Any] = field(init=False, default_factory=lambda : {})
-
-
- class-attribute
- instance-attribute
-
-
-¶get(task_id: str, key: Optional[str] = None) -> Any
-
-¶brickflow/context/context.py
put(task_id: str, key: str, value: Any) -> None
-
-¶brickflow/context/context.py
brickflow.context.context.BrickflowTaskComsDict
-
-
-
- dataclass
-
-
-¶brickflow.context.context.BrickflowTaskComsObject
-
-
-
- dataclass
-
-
-¶to_encoded_value: str
-
-
- property
-
-
-¶value: Any
-
-
- property
-
-
-¶from_encoded_value(encoded_value: Union[str, bytes]) -> BrickflowTaskComsObject
-
-
- classmethod
-
-
-¶brickflow/context/context.py
brickflow.context.context.Context() -> None
-
-¶brickflow/context/context.py
current_project: Optional[str]
-
-
- property
-
-
-¶current_task: Optional[str]
-
-
- property
-
-
-¶dbutils: DBUtils
-
-
- property
-
-
-¶env: str
-
-
- property
-
-
-¶log: logging.Logger
-
-
- property
-
-
-¶spark: SparkSession
-
-
- property
-
-
-¶task_coms: BrickflowTaskComs
-
-
- property
-
-
-¶dbutils_widget_get_or_else(key: str, debug: Optional[str]) -> Optional[str]
-
-¶brickflow/context/context.py
get_by_env(purpose: str, *, default: Optional[T] = None, local: Optional[T] = None, dev: Optional[T] = None, non_prod: Optional[T] = None, test: Optional[T] = None, qa: Optional[T] = None, prod: Optional[T] = None, uat: Optional[T] = None, **kwargs: Optional[T]) -> Optional[T]
-
-¶brickflow/context/context.py
get_parameter(key: str, debug: Optional[str] = None) -> Optional[str]
-
-¶brickflow/context/context.py
get_return_value(task_key: Union[str, Callable]) -> Any
-
-¶is_local() -> bool
-
-¶job_id(*, debug: Optional[str] = None) -> Any
-
-¶This function fetches the job_id value using the bind_variable decorator. -The implementation is intentionally empty because the decorator handles the logic.
- -brickflow/context/context.py
parent_run_id(*, debug: Optional[str] = None) -> Any
-
-¶This function fetches the parent_run_id value using the bind_variable decorator. -The implementation is intentionally empty because the decorator handles the logic.
- -brickflow/context/context.py
run_id(*, debug: Optional[str] = None) -> Any
-
-¶This function fetches the run_id value using the bind_variable decorator. -The implementation is intentionally empty because the decorator handles the logic.
- -brickflow/context/context.py
set_current_project(project: str) -> None
-
-¶skip_all_except(branch_task: Union[Callable, str]) -> None
-
-¶brickflow/context/context.py
skip_all_following() -> None
-
-¶start_date(*, debug: Optional[str] = None) -> Any
-
-¶This function fetches the start_date value using the bind_variable decorator. -The implementation is intentionally empty because the decorator handles the logic.
- -brickflow/context/context.py
start_time(*, debug: Optional[str] = None) -> Any
-
-¶This function fetches the start_time value using the bind_variable decorator. -The implementation is intentionally empty because the decorator handles the logic.
- -brickflow/context/context.py
task_key(*, debug: Optional[str] = None) -> Any
-
-¶This function fetches the task_key value using the bind_variable decorator. -The implementation is intentionally empty because the decorator handles the logic.
- -brickflow/context/context.py
task_retry_count(*, debug: Optional[str] = None) -> Any
-
-¶This function fetches the task_retry_count value using the bind_variable decorator. -The implementation is intentionally empty because the decorator handles the logic.
- -brickflow/context/context.py
brickflow.context.context.ContextMode
-
-
-¶brickflow.context.context.TaskComsObjectResult
-
-
-¶brickflow.context.context.bind_variable(builtin: BrickflowBuiltInTaskVariables) -> Callable
-
-¶brickflow/context/context.py
brickflow.engine.project.Project
-
-
-
- dataclass
-
-
-¶batch: bool = True
-
-
- class-attribute
- instance-attribute
-
-
-¶bundle_base_path: Optional[str] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶bundle_obj_name: Optional[str] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶codegen_kwargs: Optional[Dict[str, Any]] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶codegen_mechanism: Optional[Type[CodegenInterface]] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶debug_execute_task: Optional[str] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶debug_execute_workflow: Optional[str] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶entry_point_path: Optional[str] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶git_reference: Optional[str] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶git_repo: Optional[str] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶libraries: Optional[List[TaskLibrary]] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶mode: Optional[str] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶name: str
-
-
- instance-attribute
-
-
-¶provider: Optional[str] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶s3_backend: Optional[Dict[str, str]] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶__enter__() -> _Project
-
-¶brickflow/engine/project.py
__exit__(exc_type, exc_val, exc_tb) -> None
-
-¶brickflow/engine/project.py
__post_init__() -> None
-
-¶brickflow/engine/project.py
169 -170 -171 -172 -173 -174 -175 -176 -177 -178 -179 -180 -181 -182 -183 -184 -185 -186 -187 -188 -189 -190 -191 -192 -193 -194 -195 -196 -197 -198 -199 -200 -201 -202 -203 -204 -205 -206 -207 -208 -209 -210 -211 -212 -213 -214 -215 -216 -217 -218 -219 -220 -221 -222 -223 -224 -225 -226 -227 -228 -229 -230 -231 -232 -233 -234 -235 -236 -237 -238 -239 -240 |
|
get_git_ref() -> Optional[str]
-
-¶brickflow/engine/project.py
brickflow_plugins.secrets.BRICKFLOW_SECRETS_BACKEND = 'brickflow_secrets_backend'
-
-
- module-attribute
-
-
-¶brickflow_plugins.secrets.brickflow_secrets_backend_plugin_impl = pluggy.HookimplMarker(BRICKFLOW_SECRETS_BACKEND)
-
-
- module-attribute
-
-
-¶brickflow_plugins.secrets.brickflow_secrets_plugin_spec = pluggy.HookspecMarker(BRICKFLOW_SECRETS_BACKEND)
-
-
- module-attribute
-
-
-¶brickflow_plugins.secrets.AbstractSecretsHelper
-
-
-¶
- Bases: abc.ABC
PROTOCOL_STARTS_WITH: Optional[Union[str, List[str]]] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶get_secret_value_from_url(url_parsed_result: ParseResult)
-
-¶brickflow_plugins/secrets/__init__.py
brickflow_plugins.secrets.B64SecretsHelper
-
-
-¶
- Bases: AbstractSecretsHelper
brickflow_plugins.secrets.Base64BrickflowSecretPluginImpl
-
-
-¶
- Bases: BrickflowSecretPluginSpec
brickflow_plugins.secrets.BrickflowSecretPluginSpec
-
-
-¶brickflow_plugins.secrets.BrickflowSecretsBackend
-
-
-¶brickflow_plugins.secrets.CerberusBrickflowSecretPluginImpl
-
-
-¶
- Bases: BrickflowSecretPluginSpec
brickflow_plugins.secrets.CerberusSecretsHelper
-
-
-¶
- Bases: AbstractSecretsHelper
PROTOCOL_STARTS_WITH = 'cerberus'
-
-
- class-attribute
- instance-attribute
-
-
-¶parse_path_and_key(path: Optional[str]) -> Optional[Tuple[str, str]]
-
-
- staticmethod
-
-
-¶brickflow_plugins/secrets/__init__.py
brickflow_plugins.secrets.DatabricksSecretsBrickflowSecretPluginImpl
-
-
-¶
- Bases: BrickflowSecretPluginSpec
brickflow_plugins.secrets.get_brickflow_tasks_hook() -> BrickflowSecretPluginSpec
-
-
- cached
-
-
-¶brickflow_plugins/secrets/__init__.py
brickflow.engine.task.Task
-
-
-
- dataclass
-
-
-¶brickflow_default_params: Dict[str, str]
-
-
- property
-
-
-¶builtin_notebook_params: Dict[str, str]
-
-
- property
-
-
-¶cluster: Cluster
-
-
- instance-attribute
-
-
-¶custom_execute_callback: Optional[Callable] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶custom_task_parameters: Dict[str, str]
-
-
- property
-
-
-¶databricks_task_type_str: str
-
-
- property
-
-
-¶depends_on: List[Union[Callable, str]] = field(default_factory=lambda : [])
-
-
- class-attribute
- instance-attribute
-
-
-¶depends_on_names: Iterator[str]
-
-
- property
-
-
-¶description: Optional[str] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶libraries: List[TaskLibrary] = field(default_factory=lambda : [])
-
-
- class-attribute
- instance-attribute
-
-
-¶name: str
-
-
- property
-
-
-¶parents: List[str]
-
-
- property
-
-
-¶task_func: Callable
-
-
- instance-attribute
-
-
-¶task_func_name: str
-
-
- property
-
-
-¶task_id: str
-
-
- instance-attribute
-
-
-¶task_settings: Optional[TaskSettings] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶task_type: TaskType = TaskType.BRICKFLOW_TASK
-
-
- class-attribute
- instance-attribute
-
-
-¶trigger_rule: BrickflowTriggerRule = BrickflowTriggerRule.ALL_SUCCESS
-
-
- class-attribute
- instance-attribute
-
-
-¶workflow: Workflow
-
-
- instance-attribute
-
-
-¶execute() -> Any
-
-¶brickflow/engine/task.py
get_obj_dict(entrypoint: str) -> Dict[str, Any]
-
-¶brickflow/engine/task.py
get_runtime_parameter_values() -> Dict[str, Any]
-
-¶brickflow/engine/task.py
handle_notebook_path(entrypoint: str) -> str
-
-
- staticmethod
-
-
-¶brickflow/engine/task.py
is_valid_task_signature() -> None
-
-¶brickflow/engine/task.py
should_skip() -> Tuple[bool, Optional[str]]
-
-¶brickflow/engine/task.py
brickflow.engine.task.EmailNotifications
-
-
-
- dataclass
-
-
-¶on_failure: Optional[List[str]] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶on_start: Optional[List[str]] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶on_success: Optional[List[str]] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶to_tf_dict() -> Dict[str, Optional[List[str]]]
-
-¶brickflow.engine.task.JarTaskLibrary
-
-
-
- dataclass
-
-
-¶
- Bases: StorageBasedTaskLibrary
Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
jar |
-
- str
- |
-
-
-
- String to s3/dbfs path for jar - |
- - required - | -
brickflow.engine.task.EggTaskLibrary
-
-
-
- dataclass
-
-
-¶
- Bases: StorageBasedTaskLibrary
Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
egg |
-
- str
- |
-
-
-
- String to s3/dbfs path for egg - |
- - required - | -
brickflow.engine.task.WheelTaskLibrary
-
-
-
- dataclass
-
-
-¶
- Bases: StorageBasedTaskLibrary
Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
whl |
-
- str
- |
-
-
-
- String to s3/dbfs path for whl - |
- - required - | -
brickflow.engine.task.PypiTaskLibrary
-
-
-
- dataclass
-
-
-¶
- Bases: TaskLibrary
Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
package |
-
- str
- |
-
-
-
- The package in pypi i.e. requests, requests==x.y.z, git+https://github.com/Nike-Inc/brickflow.git - |
- - required - | -
repo |
-
- Optional[str]
- |
-
-
-
- The repository where the package can be found. By default pypi is used - |
-
- None
- |
-
dict: Dict[str, Union[str, Dict[str, str]]]
-
-
- property
-
-
-¶package: str
-
-
- instance-attribute
-
-
-¶repo: Optional[str] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶brickflow.engine.task.MavenTaskLibrary
-
-
-
- dataclass
-
-
-¶
- Bases: TaskLibrary
Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
coordinates |
-
- str
- |
-
-
-
- Gradle-style Maven coordinates. For example: org.jsoup:jsoup:1.7.2. - |
- - required - | -
repo |
-
- Optional[str]
- |
-
-
-
- Maven repo to install the Maven package from. -If omitted, both Maven Central Repository and Spark Packages are searched. - |
-
- None
- |
-
exclusions |
-
- Optional[List[str]]
- |
-
-
-
- List of dependences to exclude. For example: ["slf4j:slf4j", "*:hadoop-client"]. -Maven dependency exclusions: -https://maven.apache.org/guides/introduction/introduction-to-optional-and-excludes-dependencies.html. - |
-
- None
- |
-
coordinates: str
-
-
- instance-attribute
-
-
-¶dict: Dict[str, Union[str, Dict[str, str]]]
-
-
- property
-
-
-¶exclusions: Optional[List[str]] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶repo: Optional[str] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶brickflow.engine.task.CranTaskLibrary
-
-
-
- dataclass
-
-
-¶
- Bases: TaskLibrary
Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
package |
-
- str
- |
-
-
-
- The name of the CRAN package to install. - |
- - required - | -
repo |
-
- Optional[str]
- |
-
-
-
- The repository where the package can be found. If not specified, the default CRAN repo is used. - |
-
- None
- |
-
dict: Dict[str, Union[str, Dict[str, str]]]
-
-
- property
-
-
-¶package: str
-
-
- instance-attribute
-
-
-¶repo: Optional[str] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶brickflow.engine.task.BrickflowTriggerRule
-
-
-¶brickflow.engine.task.BrickflowTaskEnvVars
-
-
-¶brickflow.engine.task.TaskSettings
-
-
-
- dataclass
-
-
-¶email_notifications: Optional[EmailNotifications] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶max_retries: Optional[int] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶min_retry_interval_millis: Optional[int] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶notification_settings: Optional[TaskNotificationSettings] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶retry_on_timeout: Optional[bool] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶timeout_seconds: Optional[int] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶merge(other: Optional['TaskSettings']) -> 'TaskSettings'
-
-¶brickflow/engine/task.py
to_tf_dict() -> Dict[str, Optional[str] | Optional[int] | Optional[bool] | Optional[Dict[str, Optional[List[str]]]]]
-
-¶brickflow/engine/task.py
brickflow.engine.task.TaskType
-
-
-¶
- Bases: Enum
BRICKFLOW_TASK = 'brickflow_task'
-
-
- class-attribute
- instance-attribute
-
-
-¶CUSTOM_PYTHON_TASK = 'custom_python_task'
-
-
- class-attribute
- instance-attribute
-
-
-¶DLT = 'pipeline_task'
-
-
- class-attribute
- instance-attribute
-
-
-¶NOTEBOOK_TASK = 'notebook_task'
-
-
- class-attribute
- instance-attribute
-
-
-¶SQL = 'sql_task'
-
-
- class-attribute
- instance-attribute
-
-
-¶brickflow.engine.workflow.Workflow
-
-
-
- dataclass
-
-
-¶active_task: Optional[str] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶bfs_layers: List[str]
-
-
- property
-
-
-¶clusters: List[Cluster] = field(default_factory=lambda : [])
-
-
- class-attribute
- instance-attribute
-
-
-¶common_task_parameters: Optional[Dict[str, str]] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶default_cluster: Optional[Cluster] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶default_task_settings: TaskSettings = TaskSettings()
-
-
- class-attribute
- instance-attribute
-
-
-¶email_notifications: Optional[WorkflowEmailNotifications] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶graph: nx.DiGraph = field(default_factory=nx.DiGraph)
-
-
- class-attribute
- instance-attribute
-
-
-¶libraries: List[TaskLibrary] = field(default_factory=lambda : [])
-
-
- class-attribute
- instance-attribute
-
-
-¶max_concurrent_runs: int = 1
-
-
- class-attribute
- instance-attribute
-
-
-¶max_tasks_in_workflow: int = 100
-
-
- class-attribute
- instance-attribute
-
-
-¶name: str
-
-
- property
-
-
-¶notification_settings: Optional[WorkflowNotificationSettings] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶permissions: WorkflowPermissions = WorkflowPermissions()
-
-
- class-attribute
- instance-attribute
-
-
-¶prefix: str = field(default_factory=lambda : config('BRICKFLOW_WORKFLOW_PREFIX', ''))
-
-
- class-attribute
- instance-attribute
-
-
-¶run_as_service_principal: Optional[str] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶run_as_user: Optional[str] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶schedule_quartz_expression: Optional[str] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶suffix: str = field(default_factory=lambda : config('BRICKFLOW_WORKFLOW_SUFFIX', ''))
-
-
- class-attribute
- instance-attribute
-
-
-¶tags: Optional[Dict[str, str]] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶tasks: Dict[str, Task] = field(default_factory=lambda : {})
-
-
- class-attribute
- instance-attribute
-
-
-¶timezone: str = 'UTC'
-
-
- class-attribute
- instance-attribute
-
-
-¶trigger: Optional[Trigger] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶unique_new_clusters: List[Cluster]
-
-
- property
-
-
-¶webhook_notifications: Optional[WorkflowWebhookNotifications] = None
-
-
- class-attribute
- instance-attribute
-
-
-¶bfs_task_iter() -> Iterator[Task]
-
-¶check_no_active_task() -> None
-
-¶dlt_task(task_func: Optional[Callable] = None, name: Optional[str] = None, depends_on: Optional[Union[Callable, str, List[Union[Callable, str]]]] = None) -> Callable
-
-¶brickflow/engine/workflow.py
get_task(task_id: str) -> Task
-
-¶notebook_task(task_func: Optional[Callable] = None, name: Optional[str] = None, cluster: Optional[Cluster] = None, libraries: Optional[List[TaskLibrary]] = None, task_settings: Optional[TaskSettings] = None, depends_on: Optional[Union[Callable, str, List[Union[Callable, str]]]] = None) -> Callable
-
-¶brickflow/engine/workflow.py
parents(node: str) -> Iterator
-
-¶pop_task(task_id: str) -> None
-
-¶task(task_func: Optional[Callable] = None, name: Optional[str] = None, cluster: Optional[Cluster] = None, libraries: Optional[List[TaskLibrary]] = None, task_type: TaskType = TaskType.BRICKFLOW_TASK, depends_on: Optional[Union[Callable, str, List[Union[Callable, str]]]] = None, trigger_rule: BrickflowTriggerRule = BrickflowTriggerRule.ALL_SUCCESS, custom_execute_callback: Optional[Callable] = None, task_settings: Optional[TaskSettings] = None) -> Callable
-
-¶brickflow/engine/workflow.py
task_exists(task_id: str) -> bool
-
-¶task_iter() -> Iterator[Task]
-
-¶unique_new_clusters_dict() -> List[Dict[str, Any]]
-
-¶brickflow/engine/workflow.py
validate_new_clusters_with_unique_names() -> None
-
-¶brickflow/engine/workflow.py
brickflow.engine.workflow.User
-
-
-¶brickflow.engine.workflow.Group
-
-
-¶brickflow.engine.workflow.ServicePrincipal
-
-
-¶
- Bases: ScimEntity
brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor(databricks_host: str, databricks_secrets_scope: str, databricks_secrets_key: str, dependency_job_id: int, delta: timedelta, timeout_seconds: int, poke_interval_seconds: int = 60)
-
-¶This is used to have dependencies on the databricks workflow
- -WorkflowDependencySensor( - databricks_host=https://your_workspace_url.cloud.databricks.com, - databricks_secrets_scope="brickflow-demo-tobedeleted", - databricks_secrets_key="service_principle_id" - dependency_job_id=job_id, - poke_interval=20, - timeout=60, - delta=timedelta(days=1) -)
-brickflow_plugins/databricks/workflow_dependency_sensor.py
databricks_host = databricks_host
-
-
- instance-attribute
-
-
-¶databricks_secrets_key = databricks_secrets_key
-
-
- instance-attribute
-
-
-¶databricks_secrets_scope = databricks_secrets_scope
-
-
- instance-attribute
-
-
-¶delta = delta
-
-
- instance-attribute
-
-
-¶dependency_job_id = dependency_job_id
-
-
- instance-attribute
-
-
-¶log = logging
-
-
- instance-attribute
-
-
-¶poke_interval = poke_interval_seconds
-
-
- instance-attribute
-
-
-¶start_time = time.time()
-
-
- instance-attribute
-
-
-¶timeout = timeout_seconds
-
-
- instance-attribute
-
-
-¶execute()
-
-¶brickflow_plugins/databricks/workflow_dependency_sensor.py
get_http_session()
-
-
- cached
-
-
-¶brickflow_plugins/databricks/workflow_dependency_sensor.py
get_retry_class(max_retries)
-
-¶brickflow_plugins/databricks/workflow_dependency_sensor.py
get_the_execution_date() -> str
-
-¶brickflow_plugins/databricks/workflow_dependency_sensor.py
get_token()
-
-
- cached
-
-
-¶brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensorException
-
-
-¶
- Bases: Exception
brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensorTimeOutException
-
-
-¶
- Bases: TimeoutError
pip install databricks-cli
and then databricks configure -t
which
- will configure the databricks cli with a token. The first step is to initialize the project. It will do the following:
-To initialize the project inside the bfs shell run:
- -It will prompt you for the:
-For now all the bundle.yml files will be code generated so you can add the following to your .gitignore file:
- -To deploy run the following command
- -To destroy run the following command
- -This page provides documentation for our command line tools.
-CLI for managing Databricks Workflows
-Usage:
- -Options:
- -CLI for proxying to databricks bundles cli.
-Usage:
- -Options:
- -CLI for proxying to cdktf cli.
-Usage:
- -Options:
- -CLI for deploying workflow projects.
-Usage:
- -Options:
- --auto-approve Auto approve brickflow pipeline without
- being prompted to approve.
- --deploy-mode [cdktf|bundle] Which deployment framework to use to deploy.
- [default: cdktf]
- --force-acquire-lock Force acquire lock for databricks bundles
- deploy.
- -p, --profile TEXT The databricks profile to use for
- authenticating to databricks during
- deployment.
- --git-provider TEXT The github provider for brickflow this is
- used for configuring github on DBX jobs.
- --git-ref TEXT The commit/tag/branch to use in github.
- -r, --repo-url TEXT The github url in which to run brickflow
- with.
- -e, --env TEXT Set the environment value, certain tags
- [TBD] get added to the workflows based on
- this value.
- -w, --workflow TEXT Provide the workflow file names which you
- want to deploy, each file name separated by
- space! Example: bf deploy -p DEFAULT -l -w
- wf1.py -w wf2.py
- -wd, --workflows-dir DIRECTORY Provide the workflow directory that has to
- be deployed
- -l, --local-mode Set the environment flag to local and other
- components [TBD] are disabled in local mode.
- --help Show this message and exit.
-
CLI for destroying workflow projects.
-Usage:
- -Options:
- --auto-approve Auto approve brickflow pipeline without
- being prompted to approve.
- --deploy-mode [cdktf|bundle] Which deployment framework to use to deploy.
- [default: cdktf]
- --force-acquire-lock Force acquire lock for databricks bundles
- destroy.
- -p, --profile TEXT The databricks profile to use for
- authenticating to databricks during
- deployment.
- --git-provider TEXT The github provider for brickflow this is
- used for configuring github on DBX jobs.
- --git-ref TEXT The commit/tag/branch to use in github.
- -r, --repo-url TEXT The github url in which to run brickflow
- with.
- -e, --env TEXT Set the environment value, certain tags
- [TBD] get added to the workflows based on
- this value.
- -w, --workflow TEXT Provide the workflow file names which you
- want to deploy, each file name separated by
- space! Example: bf deploy -p DEFAULT -l -w
- wf1.py -w wf2.py
- -wd, --workflows-dir DIRECTORY Provide the workflow directory that has to
- be deployed
- -l, --local-mode Set the environment flag to local and other
- components [TBD] are disabled in local mode.
- --help Show this message and exit.
-
CLI for identifying diff in projects (only cdktf supported).
-Usage:
- -Options:
- -p, --profile TEXT The databricks profile to use for
- authenticating to databricks during
- deployment.
- --git-provider TEXT The github provider for brickflow this is
- used for configuring github on DBX jobs.
- --git-ref TEXT The commit/tag/branch to use in github.
- -r, --repo-url TEXT The github url in which to run brickflow
- with.
- -e, --env TEXT Set the environment value, certain tags
- [TBD] get added to the workflows based on
- this value.
- -w, --workflow TEXT Provide the workflow file names which you
- want to deploy, each file name separated by
- space! Example: bf deploy -p DEFAULT -l -w
- wf1.py -w wf2.py
- -wd, --workflows-dir DIRECTORY Provide the workflow directory that has to
- be deployed
- -l, --local-mode Set the environment flag to local and other
- components [TBD] are disabled in local mode.
- --help Show this message and exit.
-
Use to open docs in your browser...
-Usage:
- -Options:
- -Initialize your project with Brickflow...
-Usage:
- -Options:
- -n, --project-name TEXT
- -g, --git-https-url TEXT Provide the github URL for your project,
- example: https://github.com/nike-eda-
- apla/brickflow
- -wd, --workflows-dir DIRECTORY
- -bfv, --brickflow-version TEXT
- -sev, --spark-expectations-version TEXT
- --help Show this message and exit.
-
Manage one to many brickflow projects
-Usage:
- -Options:
- -Adds a project to the brickflow-multi-project.yml file and a entrypoint.py file in workflows dir
-Usage:
- -Options:
- --name TEXT Name of the project
- --path-from-repo-root-to-project-root DIRECTORY
- Path from repo root to project root
- --path-project-root-to-workflows-dir TEXT
- Path from project root to workflows dir
- --deployment-mode [bundle] Deployment mode
- -g, --git-https-url TEXT Provide the github URL for your project,
- example: https://github.com/nike-eda-
- apla/brickflow
- -bfv, --brickflow-version TEXT
- -sev, --spark-expectations-version TEXT
- --skip-entrypoint Skip creating entrypoint.py file
- --help Show this message and exit.
-
Deploy projects in the brickflow-multi-project.yml file
-Usage:
- -Options:
- --force-acquire-lock Force acquire lock for databricks bundles destroy.
- --auto-approve Auto approve brickflow pipeline without being prompted
- to approve.
- -p, --profile TEXT The databricks profile to use for authenticating to
- databricks during deployment.
- --project [] Select the project of workflows you would like to
- deploy.
- -e, --env TEXT Set the environment value, certain tags [TBD] get
- added to the workflows based on this value.
- --help Show this message and exit.
-
Destroy projects in the brickflow-multi-project.yml file
-Usage:
- -Options:
- --force-acquire-lock Force acquire lock for databricks bundles destroy.
- --auto-approve Auto approve brickflow pipeline without being prompted
- to approve.
- -p, --profile TEXT The databricks profile to use for authenticating to
- databricks during deployment.
- --project [] Select the project of workflows you would like to
- deploy.
- -e, --env TEXT Set the environment value, certain tags [TBD] get
- added to the workflows based on this value.
- --help Show this message and exit.
-
Lists all projects in the brickflow-multi-project.yml file
-Usage:
- -Options:
- -Removes a project from the brickflow-multi-project.yml file
-Usage:
- -Options:
- -Synth the bundle.yml for project
-Usage:
- -Options:
- -p, --profile TEXT The databricks profile to use for authenticating to
- databricks during deployment.
- --project [] Select the project of workflows you would like to
- deploy.
- -e, --env TEXT Set the environment value, certain tags [TBD] get added
- to the workflows based on this value.
- --help Show this message and exit.
-
Synchronize your bundle tree to databricks workspace (only supported by bundle deployment mode).
-Usage:
- -Options:
- --deploy-mode [bundle] Which deployment framework to use to deploy.
- [default: bundle]
- --watch Enable filewatcher to sync files over.
- --full Run a full sync.
- --interval-duration TEXT File system polling interval (for --watch).
- --debug TEXT File system polling interval (for --watch).
- -p, --profile TEXT The databricks profile to use for
- authenticating to databricks during
- deployment.
- --git-provider TEXT The github provider for brickflow this is
- used for configuring github on DBX jobs.
- --git-ref TEXT The commit/tag/branch to use in github.
- -r, --repo-url TEXT The github url in which to run brickflow
- with.
- -e, --env TEXT Set the environment value, certain tags
- [TBD] get added to the workflows based on
- this value.
- -w, --workflow TEXT Provide the workflow file names which you
- want to deploy, each file name separated by
- space! Example: bf deploy -p DEFAULT -l -w
- wf1.py -w wf2.py
- -wd, --workflows-dir DIRECTORY Provide the workflow directory that has to
- be deployed
- -l, --local-mode Set the environment flag to local and other
- components [TBD] are disabled in local mode.
- --help Show this message and exit.
-
Environment Variable | -Default Value | -Deploment Mode Support | -Description | -
---|---|---|---|
BRICKFLOW_ENV | -local | -bundle & cdktf (deprecated) | -The environment name for Brickflow | -
BRICKFLOW_FORCE_DEPLOY | -False | -cdktf (deprecated) | -Flag indicating whether to force deployment | -
BRICKFLOW_DEPLOYMENT_MODE | -cdktf (deprecated) | -bundle & cdktf (deprecated) | -The deployment mode for Brickflow (cdktf, bundles) | -
BRICKFLOW_GIT_REPO | -N/A | -bundle & cdktf (deprecated) | -The URL of the Git repository for Brickflow | -
BRICKFLOW_GIT_REF | -N/A | -bundle & cdktf (deprecated) | -The Git reference (branch, tag, commit) for Brickflow | -
BRICKFLOW_GIT_PROVIDER | -github | -bundle & cdktf (deprecated) | -The Git provider (e.g., GitHub, GitLab) for Brickflow | -
DATABRICKS_CONFIG_PROFILE | -default | -bundle & cdktf (deprecated) | -The profile name for Databricks configuration | -
BRICKFLOW_DEPLOY_ONLY_WORKFLOWS | -N/A | -bundle & cdktf (deprecated) | -List of workflows to deploy exclusively | -
BRICKFLOW_WORKFLOW_PREFIX | -N/A | -bundle & cdktf (deprecated) | -Prefix to add to workflow names during deployment | -
BRICKFLOW_WORKFLOW_SUFFIX | -N/A | -bundle & cdktf (deprecated) | -Suffix to add to workflow names during deployment | -
BRICKFLOW_S3_BACKEND_BUCKET | -N/A | -cdktf (deprecated) | -The name of the S3 bucket for Brickflow backend | -
BRICKFLOW_S3_BACKEND_KEY | -N/A | -cdktf (deprecated) | -The key or path in the S3 bucket for Brickflow backend | -
BRICKFLOW_S3_BACKEND_REGION | -N/A | -cdktf (deprecated) | -The AWS region for the S3 backend | -
BRICKFLOW_S3_BACKEND_DYNAMODB_TABLE | -N/A | -cdktf (deprecated) | -The DynamoDB table name for tracking S3 backend | -
BRICKFLOW_INTERACTIVE_MODE | -True | -bundle & cdktf (deprecated) | -Flag indicating whether to enable interactive mode | -
BRICKFLOW_BUNDLE_BASE_PATH | -/Users/ ${workspace.current_user.userName} |
-bundle | -The base path for the bundle in the S3 backend | -
BRICKFLOW_BUNDLE_OBJ_NAME | -.brickflow_bundles | -bundle | -The name of the folder post appended to your base path | -
BRICKFLOW_BUNDLE_CLI_EXEC | -databricks | -bundle | -The executable command for bundle execution. By default it will be downloaded on the fly. | -
BRICKFLOW_BUNDLE_NO_DOWNLOAD | -False | -bundle | -Flag indicating whether to skip downloading the databricks bundle cli. Useful if you are in locked down network. | -
BRICKFLOW_BUNDLE_CLI_VERSION | -0.200.0 | -bundle | -The version of the bundle CLI tool | -
BRICKFLOW_MONOREPO_PATH_TO_BUNDLE_ROOT | -N/A | -bundle & cdktf (deprecated) | -The path to the bundle root directory in a monorepo. Default assumes you are not using a monorepo | -
This allows for adding suffixes or prefixes in the name of the workflow:
-Setting the above is semantically the same as doing this in code:
-wf = Workflow(
- "thanks",
- prefix="so_long_", # same as BRICKFLOW_WORKFLOW_PREFIX
- suffix="_and_thanks_for_all_the_fish" # same as BRICKFLOW_WORKFLOW_SUFFIX
-)
-
wf.name
would then result in "so_long_and_thanks_for_all_the_fish"
this is to allow 'unique' names while deploying the same workflow to same environments while still needing to keep them -separate.
-For example, consider this scenario:
-inventory_upsert
;feature_1
and feature_2
;dev_inventory_upsert
;dev_inventory_upsert_feature_1
and dev_inventory_upsert_feature_2
.Ideal usage for this is in CI/CD pipelines.
- - - - - - - - -Airflow Operator | -Databricks Native Equivalent | -Will Implement | -Link to Issues | -Link to Impl | -Link to Docs | -
---|---|---|---|---|---|
Snowflake Operator | -- | - | - | - | - |
Branch Python Operator | -- | - | - | - | - |
Slack Operator | -- | - | - | - | - |
Email Operator | -- | - | - | - | - |
Task Dependency Sensor | -- | - | - | - | - |
Canary Operator | -- | - | - | - | - |
Bash Operator | -- | - | - | - | - |
Short Circuit Operator | -- | - | - | - | - |
S3 Sensor | -- | - | - | - | - |
Compute Bash Operator | -Look at Bash Operator | -- | - | - | - |
Compute Python Operator | -Use a task | -- | - | - | - |
EMR Operator | -Use a task | -- | - | - | - |
Spark Operator | -Use a task | -- | - | - | - |
Python Operator | -Use a task | -- | - | - | - |
Dummy Operator | -Use a task | -- | - | - | - |
Genie Snowflake Operator | -Look at snowflake operator | -- | - | - | - |
Genie Hive Operator | -N/A | -- | - | - | - |
Genie S3 Dist CP Operator | -N/A | -- | - | - | - |
Athena Operator | -Use DBSQL | -- | - | - | - |
Nike EMR Operator | -Use a task | -- | - | - | - |
Nike Spark Submit Operator | -Use a task | -- | - | - | - |
Compute S3 Prefix Sensor | -Look at S3 sensor | -- | - | - | - |
The objective of Brickflow is to provide a thin layer on top of databricks workflows to help deploy -and manage workflows in Databricks. It also provides plugins/extras to be able to run airflow -operators directly in the workflows.
-Object | -Airflow | -Brickflow | -
---|---|---|
Collection of Workflows | -Airflow Cluster (Airflow Dag Bag) | -Project/Entrypoint | -
Workflow | -Airflow Dag | -Workflow | -
Task | -Airflow Operator | -Task | -
Schedule | -Unix Cron | -Quartz Cron | -
Inter Task Communication | -XComs | -Task Values | -
Connections to External Services | -Airflow Connections | -Cerberus Connection Builder | -
Variables to Tasks | -Variables | -Task Parameters | -
Context values (execution_date, etc.) | -Airflow Macros, context["ti"] | -ctx.<task parameter> | -
BrickFlow is a CLI tool for development and deployment of Python based Databricks Workflows in a declarative way.
-brickflow
aims to improve development experience for building any pipelines on databricks via:
Issues with brickflow
? Found a bug?
-Have a great idea for an addition? Want to improve the documentation? Please feel
-free to file an issue.
To contribute please fork and create a pull request.
- - - - - - - - -The project is similar to a map cluster it can be composed of various different Workflows or dags.
-Here is an example of an entrypoint. -Click the plus buttons to understand all the parts of the entrypoint file.
-# Databricks notebook source # (1)!
-
-import examples.brickflow_examples.workflows
-
-from brickflow import Project, PypiTaskLibrary, MavenTaskLibrary
-
-ARTIFACTORY = ""
-
-
-def main() -> None:
- """Project entrypoint"""
- with Project(
- "brickflow-demo", # (3)!
- git_repo="https://github.com/Nike-Inc/brickflow", # (4)!
- provider="github", # (5)!
- libraries=[ # (6)!
- PypiTaskLibrary(package="brickflow==1.0.0 --extra-index-url " + ARTIFACTORY),
- MavenTaskLibrary(coordinates="com.cronutils:cron-utils:9.2.0"),
- ],
- ) as f:
- f.add_pkg(examples.brickflow_examples.workflows) # (7)!
-
-
-if __name__ == "__main__": # (2)!
- main()
-
bf init
bf init
pip install databricks-cli
and then databricks configure -t
which
- will configure the databricks cli with a token. We recommend to use docker container for development purposes as it's easier to have version upgrades by changing the docker version.
-Add the following alias to your profile or zsh_profile:
- -Please change your directory to the root of your project. Then run the bfs
command.
This will launch the bash shell inside the container. It will do the following:
-~/.ssh
directory as read-only to the ~/.ssh
in the container.~/.databrickscfg
file as read-only to the ~/.databrickscfg
in the container.You will also need to install any required packages of your respective project inside the docker container.
-If the brickflow version in your container is outdated and needed to upgrade then run the below command in your shell which pull the latest docker image
- -Alternatively instead of docker you can install locally but you will need to resolve all the deps.
-The project relies on terraform and cdktf to deploy your python projects.
-brew install node
npm install -g cdktf-cli
pip install brickflow[deploy]
pip install brickflow[cerberus]
pip install brickflow[airflow]
The first step is to initialize the project. It will do the following:
-To initialize the project inside the bfs shell run:
- -It will prompt you for the:
-BrickFlow is a CLI tool for development and deployment of Python based Databricks Workflows in a declarative way.
"},{"location":"#concept","title":"Concept","text":"brickflow
aims to improve development experience for building any pipelines on databricks via:
Issues with brickflow
? Found a bug? Have a great idea for an addition? Want to improve the documentation? Please feel free to file an issue.
To contribute please fork and create a pull request.
"},{"location":"bundles-quickstart/","title":"Bundles (Recommended)","text":""},{"location":"bundles-quickstart/#prerequisites","title":"Prerequisites","text":"pip install databricks-cli
and then databricks configure -t
which will configure the databricks cli with a token. The first step is to initialize the project. It will do the following:
To initialize the project inside the bfs shell run:
bf init\n
It will prompt you for the:
For now all the bundle.yml files will be code generated so you can add the following to your .gitignore file:
**/bundle.yml\n
To deploy run the following command
bf deploy --deploy-mode=bundle -p \"<profile>\" -wd <workflows directory>\n
To destroy run the following command
bf destroy --deploy-mode=bundle -p \"<profile>\" -wd <workflows directory>\n
This allows for adding suffixes or prefixes in the name of the workflow:
Setting the above is semantically the same as doing this in code:
wf = Workflow(\n\"thanks\",\nprefix=\"so_long_\", # same as BRICKFLOW_WORKFLOW_PREFIX\nsuffix=\"_and_thanks_for_all_the_fish\" # same as BRICKFLOW_WORKFLOW_SUFFIX\n)\n
wf.name
would then result in \"so_long_and_thanks_for_all_the_fish\"
this is to allow 'unique' names while deploying the same workflow to same environments while still needing to keep them separate.
For example, consider this scenario:
inventory_upsert
;feature_1
and feature_2
;dev_inventory_upsert
;dev_inventory_upsert_feature_1
and dev_inventory_upsert_feature_2
.Ideal usage for this is in CI/CD pipelines.
"},{"location":"highlevel/","title":"HighLevel","text":""},{"location":"highlevel/#brickflow-overview","title":"Brickflow Overview","text":"The objective of Brickflow is to provide a thin layer on top of databricks workflows to help deploy and manage workflows in Databricks. It also provides plugins/extras to be able to run airflow operators directly in the workflows.
"},{"location":"highlevel/#brickflow-to-airflow-term-mapping","title":"Brickflow to Airflow Term Mapping","text":"Object Airflow Brickflow Collection of Workflows Airflow Cluster (Airflow Dag Bag) Project/Entrypoint Workflow Airflow Dag Workflow Task Airflow Operator Task Schedule Unix Cron Quartz Cron Inter Task Communication XComs Task Values Connections to External Services Airflow Connections Cerberus Connection Builder Variables to Tasks Variables Task Parameters Context values (execution_date, etc.) Airflow Macros, context[\"ti\"] ctx.<task parameter>"},{"location":"limitations/","title":"Limitations","text":"The project is similar to a map cluster it can be composed of various different Workflows or dags.
Here is an example of an entrypoint. Click the plus buttons to understand all the parts of the entrypoint file.
entrypoint.py# Databricks notebook source # (1)!\nimport examples.brickflow_examples.workflows\nfrom brickflow import Project, PypiTaskLibrary, MavenTaskLibrary\nARTIFACTORY = \"\"\ndef main() -> None:\n\"\"\"Project entrypoint\"\"\"\nwith Project(\n\"brickflow-demo\", # (3)!\ngit_repo=\"https://github.com/Nike-Inc/brickflow\", # (4)!\nprovider=\"github\", # (5)!\nlibraries=[ # (6)!\nPypiTaskLibrary(package=\"brickflow==1.0.0 --extra-index-url \" + ARTIFACTORY),\nMavenTaskLibrary(coordinates=\"com.cronutils:cron-utils:9.2.0\"),\n],\n) as f:\nf.add_pkg(examples.brickflow_examples.workflows) # (7)!\nif __name__ == \"__main__\": # (2)!\nmain()\n
bf init
bf init
pip install databricks-cli
and then databricks configure -t
which will configure the databricks cli with a token. We recommend to use docker container for development purposes as it's easier to have version upgrades by changing the docker version.
Add the following alias to your profile or zsh_profile:
alias bfs='docker run -it --rm --name brickflow -v \"$PWD\":/usr/src/brickflow -v ~/.databrickscfg:/root/.databrickscfg:ro -v ~/.ssh:/root/.ssh:ro -w /usr/src/brickflow <DOCKERHUB_URL_REPLACE>/brickflow:latest'\n
Please change your directory to the root of your project. Then run the bfs
command.
bfs\n
This will launch the bash shell inside the container. It will do the following:
~/.ssh
directory as read-only to the ~/.ssh
in the container.~/.databrickscfg
file as read-only to the ~/.databrickscfg
in the container.You will also need to install any required packages of your respective project inside the docker container.
If the brickflow version in your container is outdated and needed to upgrade then run the below command in your shell which pull the latest docker image
docker pull <DOCKERHUB_URL_REPLACE>/brickflow:latest\n
Alternatively instead of docker you can install locally but you will need to resolve all the deps.
The project relies on terraform and cdktf to deploy your python projects.
brew install node
npm install -g cdktf-cli
pip install brickflow[deploy]
pip install brickflow[cerberus]
pip install brickflow[airflow]
The first step is to initialize the project. It will do the following:
To initialize the project inside the bfs shell run:
bf init\n
It will prompt you for the:
A task in Databricks workflows refers to a single unit of work that is executed as part of a larger data processing pipeline. Tasks are typically designed to perform a specific set of operations on data, such as loading data from a source, transforming the data, and storing it in a destination. In brickflow, tasks as designed in such a way that
Assuming, that this is already read - workflow and workflow object is created
"},{"location":"tasks/#task","title":"Task","text":"Databricks workflow task can be created by decorating a python function with brickflow's task function
taskfrom brickflow import Workflow\nwf = Workflow(...)\n@wf.task # (1)!\ndef start():\npass\n@wf.task(name=\"custom_end\") # (2)!\ndef end():\npass\n
Define task dependency by using a variable \"depends_on\" in the task function. You can provide the dependent tasks as direct python callables or string or list of callables/strings
task_dependencyfrom brickflow import Workflow\nwf = Workflow(...)\n@wf.task\ndef start():\npass\n@wf.task(depends_on=start) # (1)!\ndef bronze_layer():\npass\n@wf.task(depends_on=\"bronze_layer\") # (2)!\ndef x_silver():\npass\n@wf.task(depends_on=bronze_layer)\ndef y_silver():\npass\n@wf.task(depends_on=[x_silver, y_silver]) # (3)!\ndef xy_gold():\npass\n@wf.task(name=\"custom_z_gold\", depends_on=[x_silver, \"y_silver\"]) # (4)!\ndef z_gold():\npass\n@wf.task(depends_on=[\"xy_gold\", \"custom_z_gold\"]) # (5)!\ndef end():\npass\n
Task parameters can be defined as key value pairs in the function definition on which task is defined
task_parametersfrom brickflow import Workflow\nwf = Workflow(...)\n@wf.task\ndef task_function(*, test=\"var\", test1=\"var1\"): # (1)!\nprint(test)\nprint(test1)\n
In the workflows section, we saw how the common task parameters are created at the workflow level. Now in this section, we shall see how to use the common task parameters
use_common_task_parametersfrom brickflow import Workflow, ctx\nwf = Workflow(...)\n@wf.task\ndef common_params():\nimport some_pyspark_function # (1)!\ncatalog_env = ctx.dbutils_widget_get_or_else(key=\"catalog\", debug=\"local\") # (2)!\nsome_pyspark_function(catalog_env) # (3)!\n
There are many inbuilt task parameters that be accessed using brickflow context like above
inbuilt_task_parametersfrom brickflow import Workflow, ctx\nwf = Workflow(...)\n@wf.task\ndef inbuilt_params():\nprint(ctx.dbutils_widget_get_or_else(\nkey=\"brickflow_env\", # (1)! \ndebug=\"local\"))\nprint(ctx.dbutils_widget_get_or_else(\nkey=\"brickflow_run_id\", # (2)! \ndebug=\"788868\"))\nprint(ctx.dbutils_widget_get_or_else(\nkey=\"brickflow_job_id\", # (3)! \ndebug=\"987987987987987\"))\nprint(ctx.dbutils_widget_get_or_else(\nkey=\"brickflow_start_date\", # (4)! \ndebug=\"2023-05-03\"))\nprint(ctx.dbutils_widget_get_or_else(\nkey=\"brickflow_start_time\", # (5)! \ndebug=\"1683102411626\"))\nprint(ctx.dbutils_widget_get_or_else(\nkey=\"brickflow_task_retry_count\", # (6)! \ndebug=\"2\"))\nprint(ctx.dbutils_widget_get_or_else(\nkey=\"brickflow_parent_run_id\", # (7)! \ndebug=\"788869\"))\nprint(ctx.dbutils_widget_get_or_else(\nkey=\"brickflow_task_key\", # (8)! \ndebug=\"inbuilt_params\"))\nprint(ctx.dbutils_widget_get_or_else(\nkey=\"brickflow_internal_workflow_name\", # (9)! \ndebug=\"Sample_Workflow\"))\nprint(ctx.dbutils_widget_get_or_else(\nkey=\"brickflow_internal_task_name\", # (10)! \ndebug=\"inbuilt_params\"))\nprint(ctx.dbutils_widget_get_or_else(\nkey=\"brickflow_internal_workflow_prefix\", # (11)! \ndebug=\"inbuilt_params\"))\nprint(ctx.dbutils_widget_get_or_else(\nkey=\"brickflow_internal_workflow_suffix\", # (12)! \ndebug=\"inbuilt_params\"))\n
There is a flexibility to use different clusters for each task or assign custom clusters
clustersfrom brickflow import Workflow, Cluster\nwf = Workflow(...)\n@wf.task(cluster=Cluster(...)) # (1)!\ndef custom_cluster():\npass\n
There is a flexibility to use specific libraries for a particular task
librariesfrom brickflow import Workflow\nwf = Workflow(...)\n@wf.task(libraries=[...]) # (1)!\ndef custom_libraries():\npass\n
There are different task types that are supported by brickflow right now. The default task type that is used by brickflow is NOTEBOOK
task_typesfrom brickflow import Workflow, TaskType, BrickflowTriggerRule, TaskResponse\nwf = Workflow(...)\n@wf.task\ndef notebook_task():\npass\n@wf.task(task_type=TaskType.DLT)\ndef dlt_task():\npass\n@wf.task(\ntask_type=TaskType.CUSTOM_PYTHON_TASK, # (1)!\ntrigger_rule=BrickflowTriggerRule.NONE_FAILED, # (2)!\ncustom_execute_callback=lambda x: TaskResponse(x.name, \npush_return_value=True), # (3)!\n)\ndef custom_python_task():\npass\n
There are two types of trigger rules that can be applied on a task. It can be either ALL_SUCCESS or NONE_FAILED
task_typesfrom brickflow import Workflow, BrickflowTriggerRule\nwf = Workflow(...)\n@wf.task(\ntrigger_rule=BrickflowTriggerRule.NONE_FAILED # (1)!\n)\ndef none_failed_task():\npass\n@wf.task(\ntrigger_rule=BrickflowTriggerRule.ALL_SUCCESS # (2)!\n)\ndef all_success_task():\npass\n
We have adopted/extended certain airflow operators that might be needed to run as a task in databricks workflows. Typically for airflow operators we return the operator and brickflow will execute the operator based on task return type.
"},{"location":"tasks/#bash-operator","title":"Bash Operator","text":"You will be able to use bash operator as below
bash_operatorfrom brickflow import Workflow\nfrom brickflow_plugins import BashOperator\nwf = Workflow(...)\n@wf.task\ndef bash_task():\nreturn BashOperator(task_id=bash_task.__name__, \nbash_command=\"ls -ltr\") # (1)!\n
Even if you migrate to databricks workflows, brickflow gives you the flexibility to have a dependency on the airflow job
task_dependency_sensorfrom brickflow import Workflow, ctx\nfrom brickflow_plugins import TaskDependencySensor\nwf = Workflow(...)\n@wf.task\ndef airflow_external_task_dependency_sensor():\nimport base64\ndata = base64.b64encode(\nctx.dbutils.secrets.get(\"brickflow-demo-tobedeleted\", \"okta_conn_id\").encode(\n\"utf-8\"\n)\n).decode(\"utf-8\")\nreturn TaskDependencySensor(\ntask_id=\"sensor\",\ntimeout=180,\nokta_conn_id=f\"b64://{data}\",\nexternal_dag_id=\"external_airlfow_dag\",\nexternal_task_id=\"hello\",\nallowed_states=[\"success\"],\nexecution_delta=None,\nexecution_delta_json=None,\ncluster_id=\"your_cluster_id\",\n)\n
"},{"location":"workflows/","title":"Workflows","text":"A Workflow is similar to an Airflow dag that lets you encapsulate a set of tasks.
Here is an example of a workflow. Click the plus buttons to understand all the parts of the workflow file.
workflow.pyfrom datetime import timedelta\nfrom brickflow import Workflow, Cluster, WorkflowPermissions, User, \\\n TaskSettings, EmailNotifications, PypiTaskLibrary, MavenTaskLibrary\nwf = Workflow( # (1)!\n\"wf_test\", # (2)!\ndefault_cluster=Cluster.from_existing_cluster(\"your_existing_cluster_id\"), # (3)!\n# Optional parameters below\nschedule_quartz_expression=\"0 0/20 0 ? * * *\", # (4)!\ntimezone=\"UTC\", # (5)!\ndefault_task_settings=TaskSettings( # (6)!\nemail_notifications=EmailNotifications(\non_start=[\"email@nike.com\"],\non_success=[\"email@nike.com\"],\non_failure=[\"email@nike.com\"]\n),\ntimeout_seconds=timedelta(hours=2).seconds\n),\nlibraries=[ # (7)!\nPypiTaskLibrary(package=\"requests\"),\nMavenTaskLibrary(coordinates=\"com.cronutils:cron-utils:9.2.0\"),\n],\ntags={ # (8)!\n\"product_id\": \"brickflow_demo\",\n\"slack_channel\": \"nike-sole-brickflow-support\"\n},\nmax_concurrent_runs=1, # (9)!\npermissions=WorkflowPermissions( # (10)!\ncan_manage_run=[User(\"abc@abc.com\")],\ncan_view=[User(\"abc@abc.com\")],\ncan_manage=[User(\"abc@abc.com\")],\n),\nprefix=\"feature-jira-xxx\", # (11)!\nsuffix=\"_qa1\", # (12)!\ncommon_task_parameters={ # (13)!\n\"catalog\": \"development\",\n\"database\": \"your_database\"\n},\n)\n@wf.task() # (14)!\ndef task_function(*, test=\"var\"):\nreturn \"hello world\"\n
There are two ways to define the cluster for the workflow or a task
"},{"location":"workflows/#using-an-existing-cluster","title":"Using an existing cluster","text":"existing_clusterfrom brickflow import Cluster\ndefault_cluster=Cluster.from_existing_cluster(\"your_existing_cluster_id\")\n
"},{"location":"workflows/#use-a-job-cluster","title":"Use a job cluster","text":"job_clusterfrom brickflow import Cluster\ndefault_cluster=Cluster(\nname=\"your_cluster_name\",\nspark_version='11.3.x-scala2.12',\nnode_type_id='m6g.xlarge',\ndriver_node_type_id='m6g.xlarge',\nmin_workers=1,\nmax_workers=3,\nenable_elastic_disk=True,\npolicy_id='your_policy_id',\naws_attributes={\n\"first_on_demand\": 1,\n\"availability\": \"SPOT_WITH_FALLBACK\",\n\"instance_profile_arn\": \"arn:aws:iam::XXXX:instance-profile/XXXX/group/XX\",\n\"spot_bid_price_percent\": 100,\n\"ebs_volume_type\": \"GENERAL_PURPOSE_SSD\",\n\"ebs_volume_count\": 3,\n\"ebs_volume_size\": 100\n}\n)\n
"},{"location":"workflows/#permissions","title":"Permissions","text":"Brickflow provides an opportunity to manage permissions on the workflows. You can provide individual users or to a group or to a ServicePrincipal that can help manage, run or view the workflows.
Below example is for reference
manage_permissionsfrom brickflow import WorkflowPermissions, User, Group, ServicePrincipal\npermissions=WorkflowPermissions(\ncan_manage_run=[\nUser(\"abc@abc.com\"), \nGroup(\"app.xyz.team.Developer\"), \nServicePrincipal(\"ServicePrinciple_dbx_url.app.xyz.team.Developer\")\n],\ncan_view=[User(\"abc@abc.com\")],\ncan_manage=[User(\"abc@abc.com\")],\n)\n
"},{"location":"workflows/#tags","title":"Tags","text":"Using brickflow, custom tags can be created on the workflow - but there are also some default tags that are created while the job is deployed.
The defaults tags that gets automatically attached to the workflow are below
Use the below reference to define more tags and attach to the workflow. These can be used for collecting various metrics and build dashboards.
configure_tagstags={\n\"product_id\": \"brickflow_demo\",\n\"slack_channel\": \"nike-sole-brickflow-support\"\n}\n
"},{"location":"workflows/#schedule","title":"Schedule","text":"Databricks workflows uses Quartz cron expression unlike airflow's unix based cron scheduler. A typical Quartz cron expression have six or seven fields, seperated by spaces
second minute hour day_of_month month day_of_week year(optional)\n
Below is a sample quartz_cron_expressionschedule_quartz_expression=\"0 0/20 0 ? * * *\"\n
"},{"location":"workflows/#tasksettings","title":"Tasksettings","text":"Task setting at workflow level can be used to have common setting defined that will be applicable for all the tasks. Below is a sample that can be used for reference and all the parameters in TaskSettings are optional task_settings
from datetime import timedelta\nfrom brickflow import TaskSettings, EmailNotifications\ndefault_task_settings=TaskSettings(\nemail_notifications=EmailNotifications(\non_start=[\"email@nike.com\"],\non_success=[\"email@nike.com\"],\non_failure=[\"email@nike.com\"]\n),\ntimeout_seconds=timedelta(hours=2).seconds,\nmax_retries=2,\nmin_retry_interval_millis=60000,\nretry_on_timeout=True\n)\n
"},{"location":"workflows/#libraries","title":"Libraries","text":"Brickflow allows to specify libraries that are need to be installed and used across different tasks. There are many ways to install library from different repositories/sources
librariesfrom brickflow import PypiTaskLibrary, MavenTaskLibrary, StorageBasedTaskLibrary, \\\n JarTaskLibrary, EggTaskLibrary, WheelTaskLibrary\nlibraries=[\nPypiTaskLibrary(package=\"requests\"),\nMavenTaskLibrary(coordinates=\"com.cronutils:cron-utils:9.2.0\"),\nStorageBasedTaskLibrary(\"s3://...\"),\nStorageBasedTaskLibrary(\"dbfs://...\"),\nJarTaskLibrary(\"s3://...\"),\nJarTaskLibrary(\"dbfs://...\"),\nEggTaskLibrary(\"s3://...\"),\nEggTaskLibrary(\"dbfs://...\"),\nWheelTaskLibrary(\"s3://...\"),\nWheelTaskLibrary(\"dbfs://...\"),\n]\n
"},{"location":"workflows/#common-task-parameters","title":"Common task parameters","text":"Define the common parameters that can be used in all the tasks. Example could be database name, secrets_id etc
common_task_parameterscommon_task_parameters={\n\"catalog\": \"development\",\n\"database\": \"your_database\"\n}\n
"},{"location":"api/airflow_external_task_dependency/","title":"AirflowTaskDependencySensor","text":""},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks-attributes","title":"Attributes","text":""},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks-classes","title":"Classes","text":""},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.MapDagSchedule","title":"brickflow_plugins.airflow.operators.external_tasks.MapDagSchedule
","text":""},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.MapDagSchedule-functions","title":"Functions","text":""},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.MapDagSchedule.get_schedule","title":"get_schedule(wf_id: str, **args: str)
","text":"Function that the sensors defined while deriving this class should override.
Source code inbrickflow_plugins/airflow/operators/external_tasks.py
def get_schedule(self, wf_id: str, **args):\n\"\"\"\n Function that the sensors defined while deriving this class should\n override.\n \"\"\"\nraise Exception(\"Override me.\")\n
"},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.MapDagSchedule.get_task_run_status","title":"get_task_run_status(wf_id: str, task_id: str, run_date: str = None, cluster_id: str = None, **args: str)
","text":"Function that the sensors defined while deriving this class should override.
Source code inbrickflow_plugins/airflow/operators/external_tasks.py
def get_task_run_status(\nself, wf_id: str, task_id: str, run_date=None, cluster_id=None, **args\n):\n\"\"\"\n Function that the sensors defined while deriving this class should\n override.\n \"\"\"\nraise Exception(\"Override me.\")\n
"},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.MapDagScheduleHelper","title":"brickflow_plugins.airflow.operators.external_tasks.MapDagScheduleHelper(okta_conn_id: str)
","text":" Bases: MapDagSchedule
brickflow_plugins/airflow/operators/external_tasks.py
def __init__(self, okta_conn_id: str):\nself._okta_conn: Connection = Connection.get_connection_from_secrets(\nokta_conn_id\n)\n
"},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.MapDagScheduleHelper-functions","title":"Functions","text":""},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.MapDagScheduleHelper.get_access_token","title":"get_access_token() -> str
","text":"Source code in brickflow_plugins/airflow/operators/external_tasks.py
def get_access_token(self) -> str:\nokta_url = self.get_okta_url()\nclient_id = self.get_okta_client_id()\nclient_secret = self.get_okta_client_secret()\nokta_url = os.getenv(\"OKTA_URL\", okta_url)\npayload = (\n\"client_id=\"\n+ client_id\n+ \"&client_secret=\"\n+ client_secret\n+ \"&grant_type=client_credentials\"\n)\nheaders = {\n\"Content-Type\": \"application/x-www-form-urlencoded\",\n\"cache-control\": \"no-cache\",\n}\nresponse = requests.post(okta_url, data=payload, headers=headers, timeout=600)\nif (\nresponse.status_code < HTTPStatus.OK\nor response.status_code > HTTPStatus.PARTIAL_CONTENT\n):\nlog.error(\n\"Failed request to Okta for JWT status_code={} response={} client_id={}\".format(\nresponse.status_code, response.text, client_id\n)\n)\ntoken_data = response.json()[\"access_token\"]\nreturn token_data\n
"},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.MapDagScheduleHelper.get_airflow_api_url","title":"get_airflow_api_url(cluster_id: str) -> str
","text":"Source code in brickflow_plugins/airflow/operators/external_tasks.py
def get_airflow_api_url(self, cluster_id: str) -> str:\n# TODO: templatize this to a env variable\nbase_api_url = f\"https://proxy.us-east-1.map.nike.com/{cluster_id}\"\nreturn base_api_url\n
"},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.MapDagScheduleHelper.get_okta_client_id","title":"get_okta_client_id() -> str
","text":"Source code in brickflow_plugins/airflow/operators/external_tasks.py
def get_okta_client_id(self) -> str:\nreturn self._okta_conn.login\n
"},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.MapDagScheduleHelper.get_okta_client_secret","title":"get_okta_client_secret() -> str
","text":"Source code in brickflow_plugins/airflow/operators/external_tasks.py
def get_okta_client_secret(self) -> str:\nreturn self._okta_conn.get_password()\n
"},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.MapDagScheduleHelper.get_okta_url","title":"get_okta_url() -> str
","text":"Source code in brickflow_plugins/airflow/operators/external_tasks.py
def get_okta_url(self) -> str:\nconn_type = self._okta_conn.conn_type\nhost = self._okta_conn.host\nschema = self._okta_conn.schema\nreturn f\"{conn_type}://{host}/{schema}\"\n
"},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.MapDagScheduleHelper.get_schedule","title":"get_schedule(wf_id: str, **kwargs: str)
","text":"get work flow schedule cron syntax
Source code inbrickflow_plugins/airflow/operators/external_tasks.py
def get_schedule(self, wf_id: str, **kwargs):\n\"\"\"\n get work flow schedule cron syntax\n \"\"\"\nraise Exception(\"Do not have implementation\")\n
"},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.MapDagScheduleHelper.get_task_run_status","title":"get_task_run_status(wf_id: str, task_id: str, run_date: str = None, cluster_id: str = None, **args: str)
","text":"Source code in brickflow_plugins/airflow/operators/external_tasks.py
def get_task_run_status(\nself, wf_id: str, task_id: str, run_date=None, cluster_id=None, **args\n):\ntoken_data = self.get_access_token()\napi_url = self.get_airflow_api_url(cluster_id)\nversion_nr = self.get_version(cluster_id)\ndag_id = wf_id\nheaders = {\n\"Content-Type\": \"application/json\",\n\"cache-control\": \"no-cache\",\n\"Authorization\": \"Bearer \" + token_data,\n}\no_task_status = \"UKN\"\nsession = requests.Session()\nretries = Retry(\ntotal=5, backoff_factor=1, status_forcelist=[502, 503, 504, 500]\n)\nsession.mount(\"https://\", HTTPAdapter(max_retries=retries))\nif version_nr.startswith(\"1.\"):\nlog.info(\"this is 1.x cluster\")\nurl = (\napi_url\n+ \"/api/experimental\"\n+ \"/dags/\"\n+ dag_id\n+ \"/dag_runs/\"\n+ run_date\n+ \"/tasks/\"\n+ task_id\n)\nelse:\nurl = (\napi_url\n+ \"/api/v1/dags/\"\n+ dag_id\n+ \"/dagRuns/scheduled__\"\n+ run_date\n+ \"/taskInstances/\"\n+ task_id\n)\nlog.info(f\"url= {url.replace(' ', '')}\")\nresponse = session.get(url.replace(\" \", \"\"), headers=headers)\nlog.info(\nf\"response.status_code= {response.status_code} response.text= {response.text}\"\n)\nif response.status_code == 200:\nlog.info(f\"response= {response.text}\")\njson_obj = json.loads(response.text)\nif type(json_obj) == dict:\no_task_status = json_obj[\"state\"]\nreturn o_task_status\nreturn o_task_status\n
"},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.MapDagScheduleHelper.get_version","title":"get_version(cluster_id: str) -> str
","text":"Source code in brickflow_plugins/airflow/operators/external_tasks.py
def get_version(self, cluster_id: str) -> str:\nsession = requests.Session()\nretries = Retry(\ntotal=10, backoff_factor=1, status_forcelist=[502, 503, 504, 500]\n)\nsession.mount(\"https://\", HTTPAdapter(max_retries=retries))\nversion_check_url = (\nself.get_airflow_api_url(cluster_id) + \"/admin/rest_api/api?api=version\"\n)\nlogging.info(version_check_url)\notoken = self.get_access_token()\nheaders = {\"Authorization\": \"Bearer \" + otoken, \"Accept\": \"application/json\"}\nout_version = \"UKN\"\nresponse = session.get(version_check_url, headers=headers, verify=False)\nif response.status_code == HTTPStatus.OK:\nout_version = response.json()[\"output\"]\nlog.info(response.text.encode(\"utf8\"))\nsession.close()\nreturn out_version\n
"},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.TaskDependencySensor","title":"brickflow_plugins.airflow.operators.external_tasks.TaskDependencySensor(external_dag_id, external_task_id, okta_conn_id, allowed_states = None, execution_delta = None, execution_delta_json = None, cluster_id = None, *args, **kwargs)
","text":" Bases: BaseSensorOperator
brickflow_plugins/airflow/operators/external_tasks.py
def __init__(\nself,\nexternal_dag_id,\nexternal_task_id,\nokta_conn_id,\nallowed_states=None,\nexecution_delta=None,\nexecution_delta_json=None,\ncluster_id=None,\n*args,\n**kwargs,\n):\nsuper(TaskDependencySensor, self).__init__(*args, **kwargs)\nself.okta_conn_id = okta_conn_id\nself.allowed_states = allowed_states or [\"success\"]\nif execution_delta_json and execution_delta:\nraise Exception(\n\"Only one of `execution_date` or `execution_delta_json` maybe provided to Sensor; not more than one.\"\n)\nself.external_dag_id = external_dag_id\nself.external_task_id = external_task_id\nself.allowed_states = allowed_states\nself.execution_delta = execution_delta\nself.execution_delta_json = execution_delta_json\nself.cluster_id = cluster_id\nself._poke_count = 0\nself.dbx_wf_id = kwargs.get(\"dbx_wf_id\")\n
"},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.TaskDependencySensor-attributes","title":"Attributes","text":""},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.TaskDependencySensor.allowed_states","title":"allowed_states = allowed_states
instance-attribute
","text":""},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.TaskDependencySensor.cluster_id","title":"cluster_id = cluster_id
instance-attribute
","text":""},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.TaskDependencySensor.dbx_wf_id","title":"dbx_wf_id = kwargs.get('dbx_wf_id')
instance-attribute
","text":""},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.TaskDependencySensor.execution_delta","title":"execution_delta = execution_delta
instance-attribute
","text":""},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.TaskDependencySensor.execution_delta_json","title":"execution_delta_json = execution_delta_json
instance-attribute
","text":""},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.TaskDependencySensor.external_dag_id","title":"external_dag_id = external_dag_id
instance-attribute
","text":""},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.TaskDependencySensor.external_task_id","title":"external_task_id = external_task_id
instance-attribute
","text":""},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.TaskDependencySensor.okta_conn_id","title":"okta_conn_id = okta_conn_id
instance-attribute
","text":""},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.TaskDependencySensor-functions","title":"Functions","text":""},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.TaskDependencySensor.poke","title":"poke(context)
","text":"Source code in brickflow_plugins/airflow/operators/external_tasks.py
def poke(self, context):\nlog.info(f\"executing poke.. {self._poke_count}\")\nself._poke_count = self._poke_count + 1\nlogging.info(\"Poking.. {0} round\".format(str(self._poke_count)))\nexec_time = context[\"execution_date\"]\ntask_status = MapDagScheduleHelper(self.okta_conn_id).get_task_run_status(\nwf_id=self.external_dag_id,\ntask_id=self.external_task_id,\nrun_date=exec_time,\ncluster_id=self.cluster_id,\n)\nlog.info(f\"task_status= {task_status}\")\nif task_status not in self.allowed_states:\ncount = 0\nelse:\ncount = 1\nreturn count\n
"},{"location":"api/airflow_native_operators/","title":"AirflowNativeOperators","text":""},{"location":"api/airflow_native_operators/#brickflow_plugins.airflow.operators.native_operators-attributes","title":"Attributes","text":""},{"location":"api/airflow_native_operators/#brickflow_plugins.airflow.operators.native_operators-classes","title":"Classes","text":""},{"location":"api/airflow_native_operators/#brickflow_plugins.airflow.operators.native_operators.BashOperatorModifier","title":"brickflow_plugins.airflow.operators.native_operators.BashOperatorModifier
","text":" Bases: OperatorModifier
modify(operator: BashOperator, task: Task, workflow: Workflow) -> Optional[BashOperator]
","text":"Source code in brickflow_plugins/airflow/operators/native_operators.py
@check_if(BashOperator)\ndef modify(\nself, operator: BashOperator, task: Task, workflow: Workflow\n) -> Optional[\"BashOperator\"]:\nf = types.MethodType(_bash_execute, operator)\noperator.execute = f\noperator.on_kill = _bash_empty_on_kill\nreturn operator\n
"},{"location":"api/airflow_native_operators/#brickflow_plugins.airflow.operators.native_operators.BranchPythonOperatorModifier","title":"brickflow_plugins.airflow.operators.native_operators.BranchPythonOperatorModifier
","text":" Bases: OperatorModifier
modify(operator: BranchPythonOperator, task: Task, workflow: Workflow) -> Optional[BranchPythonOperator]
","text":"Source code in brickflow_plugins/airflow/operators/native_operators.py
@check_if(BranchPythonOperator)\ndef modify(\nself, operator: BranchPythonOperator, task: Task, workflow: Workflow\n) -> Optional[\"BranchPythonOperator\"]:\nf = types.MethodType(_skip_all_except, operator)\noperator.skip_all_except = f\nreturn operator\n
"},{"location":"api/airflow_native_operators/#brickflow_plugins.airflow.operators.native_operators.ShortCircuitOperatorModifier","title":"brickflow_plugins.airflow.operators.native_operators.ShortCircuitOperatorModifier
","text":" Bases: OperatorModifier
modify(operator: ShortCircuitOperator, task: Task, workflow: Workflow) -> Optional[ShortCircuitOperator]
","text":"Source code in brickflow_plugins/airflow/operators/native_operators.py
@check_if(ShortCircuitOperator)\ndef modify(\nself, operator: ShortCircuitOperator, task: Task, workflow: Workflow\n) -> Optional[\"ShortCircuitOperator\"]:\nf = types.MethodType(_short_circuit_execute, operator)\noperator.execute = f\nreturn operator\n
"},{"location":"api/airflow_native_operators/#brickflow_plugins.airflow.operators.native_operators-functions","title":"Functions","text":""},{"location":"api/cli/","title":"CLI","text":""},{"location":"api/cli/#brickflow.cli-attributes","title":"Attributes","text":""},{"location":"api/cli/#brickflow.cli-classes","title":"Classes","text":""},{"location":"api/cli/#brickflow.cli.CdktfCmd","title":"brickflow.cli.CdktfCmd
","text":" Bases: click.Group
get_command(ctx: click.Context, cmd_name: str) -> Optional[click.Command]
","text":"Source code in brickflow/cli/__init__.py
def get_command(self, ctx: click.Context, cmd_name: str) -> Optional[click.Command]:\nif cmd_name == BrickflowDeployMode.CDKTF.value:\nreturn cdktf_command()\nelif cmd_name == BrickflowDeployMode.BUNDLE.value:\nreturn bundles_proxy_command()\n# elif cmd_name in [\"deploy\", \"diff\"]:\n# return cdktf_command(cmd_name)\nelse:\nrv = click.Group.get_command(self, ctx, cmd_name)\nif rv is not None:\nreturn rv\nraise ctx.fail(f\"No such command '{cmd_name}'.\")\n
"},{"location":"api/cli/#brickflow.cli-functions","title":"Functions","text":""},{"location":"api/cli/#brickflow.cli.bundle","title":"brickflow.cli.bundle() -> None
","text":"CLI for proxying to databricks bundles cli.
Source code inbrickflow/cli/__init__.py
@cli.command\ndef bundle() -> None:\n\"\"\"CLI for proxying to databricks bundles cli.\"\"\"\n# Hack for having bundle show up as a command in brickflow\n# with documentation.\npass # pragma: no cover\n
"},{"location":"api/cli/#brickflow.cli.bundles_proxy_command","title":"brickflow.cli.bundles_proxy_command() -> click.Command
","text":"Source code in brickflow/cli/__init__.py
def bundles_proxy_command() -> click.Command:\ndef run_bundle_command(args: Optional[List[str]] = None, **_: Any) -> None:\nbundle_cli_setup()\nbundle_cli = config(\nBrickflowEnvVars.BRICKFLOW_BUNDLE_CLI_EXEC.value, \"databricks\"\n)\nlog_important_versions(bundle_cli)\nexec_command(bundle_cli, \"bundle\", args or [])\n@click.command(\nname=\"bundles_cmd\",\nshort_help=\"CLI for proxying to databricks bundles cli..\",\ncontext_settings={\"ignore_unknown_options\": True},\nadd_help_option=False,\n)\n@click.argument(\"args\", nargs=-1)\ndef cmd(args: List[str]) -> None:\n# check to make sure you are in project root and then set python path to whole dir\nrun_bundle_command(args=args)\nreturn cmd\n
"},{"location":"api/cli/#brickflow.cli.cdktf","title":"brickflow.cli.cdktf() -> None
","text":"CLI for proxying to cdktf cli.
Source code inbrickflow/cli/__init__.py
@cli.command\ndef cdktf() -> None:\n\"\"\"CLI for proxying to cdktf cli.\"\"\"\n# Hack for having cdktf show up as a command in brickflow\n# with documentation.\npass # pragma: no cover\n
"},{"location":"api/cli/#brickflow.cli.cdktf_command","title":"brickflow.cli.cdktf_command(base_command: Optional[str] = None) -> click.Command
","text":"Source code in brickflow/cli/__init__.py
def cdktf_command(base_command: Optional[str] = None) -> click.Command:\n@click.command(\nname=\"cdktf_cmd\",\nshort_help=\"CLI for proxying to CDKTF cli.\",\ncontext_settings={\"ignore_unknown_options\": True},\nadd_help_option=False,\ndeprecated=True,\n)\n@click.argument(\"args\", nargs=-1)\ndef cmd(args: Tuple[str]) -> None:\n# check to make sure you are in project root and then set python path to whole dir\nexec_cdktf_command(base_command, args)\nreturn cmd\n
"},{"location":"api/cli/#brickflow.cli.cdktf_env_set_options","title":"brickflow.cli.cdktf_env_set_options(f: Callable) -> Callable
","text":"Source code in brickflow/cli/__init__.py
def cdktf_env_set_options(f: Callable) -> Callable:\ndef local_mode_callback(ctx: click.Context, param: str, value: Any) -> None: # noqa\n# pylint: disable=unused-argument\nif value is not None and value is True:\n_ilog.info(\n\"Configuring environment to %s...\",\nBrickflowDefaultEnvs.LOCAL.value,\n)\nos.environ[\nBrickflowEnvVars.BRICKFLOW_ENV.value\n] = BrickflowDefaultEnvs.LOCAL.value\ndef deploy_only_workflows(\nctx: click.Context, param: str, value: Any\n) -> None: # noqa\n# pylint: disable=unused-argument\nif value:\nfor file in value:\nif file[-3:] != \".py\":\nraise ClickException(\"Should pass only python files as workflows\")\n_ilog.info(\"Brickflow will only deploy workflows: %s\", \", \".join(value))\nos.environ[\nBrickflowEnvVars.BRICKFLOW_DEPLOY_ONLY_WORKFLOWS.value\n] = \",\".join(value)\ndef set_up_cdktf_for_workflow_dir(\nctx: click.Context, param: str, value: Any # noqa\n) -> None:\nif value is not None:\nreturn value\noptions = [\nclick.option(\n\"--local-mode\",\n\"-l\",\nis_flag=True,\ncallback=local_mode_callback,\nhelp=\"Set the environment flag to local and other components [TBD] are disabled in local mode.\",\n),\nclick.option(\n\"--workflows-dir\",\n\"-wd\",\ntype=click.Path(exists=True, file_okay=False),\nprompt=INTERACTIVE_MODE,\ncallback=set_up_cdktf_for_workflow_dir,\nhelp=\"Provide the workflow directory that has to be deployed\",\n),\nclick.option(\n\"--workflow\",\n\"-w\",\ntype=str,\nmultiple=True,\ncallback=deploy_only_workflows,\nhelp=\"\"\"Provide the workflow file names which you want to deploy, each file name separated by space!\n Example: bf deploy -p DEFAULT -l -w wf1.py -w wf2.py\"\"\",\n),\nclick.option(\n\"--env\",\n\"-e\",\ndefault=BrickflowDefaultEnvs.LOCAL.value,\ntype=str,\ncallback=bind_env_var(BrickflowEnvVars.BRICKFLOW_ENV.value),\nhelp=\"Set the environment value, certain tags [TBD] get added to the workflows based on this value.\",\n),\nclick.option(\n\"--repo-url\",\n\"-r\",\ndefault=None,\ntype=str,\ncallback=bind_env_var(BrickflowEnvVars.BRICKFLOW_GIT_REPO.value),\nhelp=\"The github url in which to run brickflow with.\",\n),\nclick.option(\n\"--git-ref\",\ndefault=None,\ntype=str,\ncallback=bind_env_var(BrickflowEnvVars.BRICKFLOW_GIT_REF.value),\nhelp=\"The commit/tag/branch to use in github.\",\n),\nclick.option(\n\"--git-provider\",\ndefault=None,\ntype=str,\ncallback=bind_env_var(BrickflowEnvVars.BRICKFLOW_GIT_PROVIDER.value),\nhelp=\"The github provider for brickflow this is used for configuring github on DBX jobs.\",\n),\nclick.option(\n\"--profile\",\n\"-p\",\ndefault=None,\ntype=str,\ncallback=bind_env_var(\nBrickflowEnvVars.BRICKFLOW_DATABRICKS_CONFIG_PROFILE.value\n),\nhelp=\"The databricks profile to use for authenticating to databricks during deployment.\",\n),\n]\nfor option in options:\nf = option(f)\nreturn f\n
"},{"location":"api/cli/#brickflow.cli.cli","title":"brickflow.cli.cli() -> None
","text":"CLI for managing Databricks Workflows
Source code inbrickflow/cli/__init__.py
@click.group(invoke_without_command=True, no_args_is_help=True, cls=CdktfCmd)\n@click.version_option(prog_name=\"brickflow\")\ndef cli() -> None:\n\"\"\"CLI for managing Databricks Workflows\"\"\"\n
"},{"location":"api/cli/#brickflow.cli.deploy","title":"brickflow.cli.deploy(**kwargs: Any) -> None
","text":"CLI for deploying workflow projects.
Source code inbrickflow/cli/__init__.py
@cli.command\n@click.option(\n\"--auto-approve\",\ntype=bool,\nis_flag=True,\nshow_default=True,\ndefault=False,\nhelp=\"Auto approve brickflow pipeline without being prompted to approve.\",\n)\n@click.option(\n\"--deploy-mode\",\ntype=click.Choice([\"cdktf\", \"bundle\"]),\nshow_default=True,\ndefault=\"cdktf\",\nhelp=\"Which deployment framework to use to deploy.\",\n)\n@click.option(\n\"--force-acquire-lock\",\ntype=bool,\nis_flag=True,\nshow_default=True,\ndefault=False,\nhelp=\"Force acquire lock for databricks bundles deploy.\",\n)\n@cdktf_env_set_options\ndef deploy(**kwargs: Any) -> None:\n\"\"\"CLI for deploying workflow projects.\"\"\"\n# Hack for having cdktf show up as a command in brickflow\n# with documentation.\ndeploy_mode = get_deployment_mode(**kwargs)\nif deploy_mode == BrickflowDeployMode.CDKTF:\nmake_cdktf_json(**kwargs)\nexec_cdktf_command(\"deploy\", get_cdktf_specific_args(**kwargs))\nelse:\ndisable_project_name_in_env()\nbundle_deploy(**kwargs)\n
"},{"location":"api/cli/#brickflow.cli.destroy","title":"brickflow.cli.destroy(**kwargs: Any) -> None
","text":"CLI for destroying workflow projects.
Source code inbrickflow/cli/__init__.py
@cli.command\n@click.option(\n\"--auto-approve\",\ntype=bool,\nis_flag=True,\nshow_default=True,\ndefault=False,\nhelp=\"Auto approve brickflow pipeline without being prompted to approve.\",\n)\n@click.option(\n\"--deploy-mode\",\ntype=click.Choice([\"cdktf\", \"bundle\"]),\nshow_default=True,\ndefault=\"cdktf\",\nhelp=\"Which deployment framework to use to deploy.\",\n)\n@click.option(\n\"--force-acquire-lock\",\ntype=bool,\nis_flag=True,\nshow_default=True,\ndefault=False,\nhelp=\"Force acquire lock for databricks bundles destroy.\",\n)\n@cdktf_env_set_options\ndef destroy(**kwargs: Any) -> None:\n\"\"\"CLI for destroying workflow projects.\"\"\"\n# Hack for having cdktf show up as a command in brickflow\n# with documentation.\ndeploy_mode = get_deployment_mode(**kwargs)\nif deploy_mode == BrickflowDeployMode.CDKTF:\nmake_cdktf_json(**kwargs)\nexec_cdktf_command(\"destroy\", get_cdktf_specific_args(**kwargs))\nelse:\ndisable_project_name_in_env()\nbundle_destroy(**kwargs)\n
"},{"location":"api/cli/#brickflow.cli.diff","title":"brickflow.cli.diff(**kwargs: Any) -> None
","text":"CLI for identifying diff in projects (only cdktf supported).
Source code inbrickflow/cli/__init__.py
@cli.command\n@cdktf_env_set_options\ndef diff(**kwargs: Any) -> None:\n\"\"\"CLI for identifying diff in projects (only cdktf supported).\"\"\"\n# Hack for having cdktf show up as a command in brickflow\n# with documentation.\nmake_cdktf_json(**kwargs)\nexec_cdktf_command(\"diff\", [])\n
"},{"location":"api/cli/#brickflow.cli.disable_project_name_in_env","title":"brickflow.cli.disable_project_name_in_env() -> None
","text":"Source code in brickflow/cli/__init__.py
def disable_project_name_in_env() -> None:\n# TODO: delete this when deploy commands are gone\n# used for legacy bundles deploy and destroy commands\n# disable multiple projects in same directory\nos.environ[BrickflowEnvVars.BRICKFLOW_USE_PROJECT_NAME.value] = \"False\"\n
"},{"location":"api/cli/#brickflow.cli.docs","title":"brickflow.cli.docs() -> None
","text":"Use to open docs in your browser...
Source code inbrickflow/cli/__init__.py
@cli.command\ndef docs() -> None:\n\"\"\"Use to open docs in your browser...\"\"\"\ndocs_site = \"https://verbose-garbanzo-6b8a1ae2.pages.github.io/\"\nwebbrowser.open(docs_site, new=2)\nclick.echo(f\"Opening browser for docs... site: {docs_site}\")\n
"},{"location":"api/cli/#brickflow.cli.get_cdktf_specific_args","title":"brickflow.cli.get_cdktf_specific_args(**kwargs: Dict[str, Any]) -> List[str]
","text":"Source code in brickflow/cli/__init__.py
def get_cdktf_specific_args(**kwargs: Dict[str, Any]) -> List[str]:\nargs = []\nif kwargs.get(\"auto_approve\", False) is True:\nargs.append(\"--auto-approve\")\nreturn args\n
"},{"location":"api/cli/#brickflow.cli.get_deployment_mode","title":"brickflow.cli.get_deployment_mode(**kwargs: Dict[str, Any]) -> BrickflowDeployMode
","text":"Source code in brickflow/cli/__init__.py
def get_deployment_mode(**kwargs: Dict[str, Any]) -> BrickflowDeployMode:\n# set deployment mode for cdktf or bundle\nos.environ[BrickflowEnvVars.BRICKFLOW_DEPLOYMENT_MODE.value] = str(\nkwargs.get(\"deploy_mode\", BrickflowDeployMode.CDKTF.value)\n)\nif (\nkwargs.get(\"deploy_mode\", BrickflowDeployMode.CDKTF.value)\n== BrickflowDeployMode.CDKTF.value\n):\nreturn BrickflowDeployMode.CDKTF\nelse:\nreturn BrickflowDeployMode.BUNDLE\n
"},{"location":"api/cli/#brickflow.cli.make_cdktf_json","title":"brickflow.cli.make_cdktf_json(**kwargs: Any) -> None
","text":"Source code in brickflow/cli/__init__.py
def make_cdktf_json(**kwargs: Any) -> None:\nwd: Optional[str] = kwargs.get(\"workflows_dir\")\nif wd is None:\nraise ValueError(\n\"workflows_dir not set, please set it using --workflows-dir or -wd\"\n)\nidempotent_cdktf_out(wd)\n
"},{"location":"api/cli/#brickflow.cli.sync","title":"brickflow.cli.sync(**kwargs: Any) -> None
","text":"Synchronize your bundle tree to databricks workspace (only supported by bundle deployment mode).
Source code inbrickflow/cli/__init__.py
@cli.command\n@click.option(\n\"--deploy-mode\",\ntype=click.Choice([\"bundle\"]),\nshow_default=True,\ndefault=\"bundle\",\nhelp=\"Which deployment framework to use to deploy.\",\n)\n@click.option(\n\"--watch\",\ntype=bool,\nis_flag=True,\nshow_default=True,\ndefault=False,\nhelp=\"Enable filewatcher to sync files over.\",\n)\n@click.option(\n\"--full\",\ntype=bool,\nis_flag=True,\nshow_default=True,\ndefault=False,\nhelp=\"Run a full sync.\",\n)\n@click.option(\n\"--interval-duration\",\ntype=str,\nshow_default=True,\ndefault=None,\nhelp=\"File system polling interval (for --watch).\",\n)\n@click.option(\n\"--debug\",\ntype=str,\nshow_default=True,\ndefault=None,\nhelp=\"File system polling interval (for --watch).\",\n)\n@cdktf_env_set_options\ndef sync(**kwargs: Any) -> None:\n\"\"\"Synchronize your bundle tree to databricks workspace (only supported by bundle deployment mode).\"\"\"\ndeploy_mode = get_deployment_mode(**kwargs)\nif deploy_mode == BrickflowDeployMode.BUNDLE:\nbundle_sync(**kwargs)\nelse:\nraise ClickException(\n\"Unsupported deploy mode for sync; currently only supports bundle deploy mode.\"\n)\n
"},{"location":"api/compute/","title":"Compute","text":""},{"location":"api/compute/#brickflow.engine.compute-classes","title":"Classes","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster","title":"brickflow.engine.compute.Cluster
dataclass
","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster-attributes","title":"Attributes","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.aws_attributes","title":"aws_attributes: Optional[Dict[str, Any]] = None
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.custom_tags","title":"custom_tags: Optional[Dict[str, str]] = None
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.data_security_mode","title":"data_security_mode: str = DataSecurityMode.SINGLE_USER
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.dlt_auto_scale_mode","title":"dlt_auto_scale_mode: Optional[str] = None
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.driver_instance_pool_id","title":"driver_instance_pool_id: Optional[str] = None
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.driver_node_type_id","title":"driver_node_type_id: Optional[str] = None
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.enable_elastic_disk","title":"enable_elastic_disk: Optional[bool] = None
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.existing_cluster_id","title":"existing_cluster_id: Optional[str] = None
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.init_scripts","title":"init_scripts: Optional[List[Dict[str, str]]] = None
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.instance_pool_id","title":"instance_pool_id: Optional[str] = None
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.is_new_job_cluster","title":"is_new_job_cluster: bool
property
","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.job_task_field_dict","title":"job_task_field_dict: Dict[str, str]
property
","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.max_workers","title":"max_workers: Optional[int] = None
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.min_workers","title":"min_workers: Optional[int] = None
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.name","title":"name: str
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.node_type_id","title":"node_type_id: str
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.num_workers","title":"num_workers: Optional[int] = None
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.policy_id","title":"policy_id: Optional[str] = None
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.runtime_engine","title":"runtime_engine: Optional[Literal['STANDARD', 'PHOTON']] = None
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.spark_conf","title":"spark_conf: Optional[Dict[str, str]] = None
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.spark_env_vars","title":"spark_env_vars: Optional[Dict[str, str]] = None
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.spark_version","title":"spark_version: str
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster-functions","title":"Functions","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.__hash__","title":"__hash__() -> int
","text":"Source code in brickflow/engine/compute.py
def __hash__(self) -> int:\n# dedupe dicts and lists which are default un hashable. Easiest way to identify dupes.\nreturn hash(json.dumps(self.as_dict()))\n
"},{"location":"api/compute/#brickflow.engine.compute.Cluster.__post_init__","title":"__post_init__() -> None
","text":"Source code in brickflow/engine/compute.py
def __post_init__(self) -> None:\nself.validate()\n
"},{"location":"api/compute/#brickflow.engine.compute.Cluster.as_dict","title":"as_dict(is_dlt_cluster: bool = False, allowed_fields: Optional[List[str]] = None, remove_fields: Optional[List[str]] = None) -> Dict[str, Any]
","text":"Source code in brickflow/engine/compute.py
def as_dict(\nself,\nis_dlt_cluster: bool = False,\nallowed_fields: Optional[List[str]] = None,\nremove_fields: Optional[List[str]] = None,\n) -> Dict[str, Any]:\nd = dataclasses.asdict(self)\nd = {**d, **self.autoscale(is_dlt_cluster=is_dlt_cluster)}\n# if allowed fields are provided and check if value is in set\nself.cleanup(d, allowed_fields=allowed_fields, remove_fields=remove_fields)\nreturn d\n
"},{"location":"api/compute/#brickflow.engine.compute.Cluster.autoscale","title":"autoscale(is_dlt_cluster: bool = False) -> Dict[str, Any]
","text":"Source code in brickflow/engine/compute.py
def autoscale(self, is_dlt_cluster: bool = False) -> Dict[str, Any]:\nif self.min_workers is not None and self.max_workers is not None:\nresp: Dict[str, Dict[str, Optional[str | int]]] = {\n\"autoscale\": {\n\"min_workers\": self.min_workers,\n\"max_workers\": self.max_workers,\n}\n}\nif is_dlt_cluster is True:\nresp[\"autoscale\"][\"mode\"] = self.dlt_auto_scale_mode\nreturn resp\nreturn {}\n
"},{"location":"api/compute/#brickflow.engine.compute.Cluster.cleanup","title":"cleanup(d: Dict[str, Any], allowed_fields: Optional[List[str]] = None, remove_fields: Optional[List[str]] = None) -> None
staticmethod
","text":"Source code in brickflow/engine/compute.py
@staticmethod\ndef cleanup(\nd: Dict[str, Any],\nallowed_fields: Optional[List[str]] = None,\nremove_fields: Optional[List[str]] = None,\n) -> None:\nd.pop(\"min_workers\", None)\nd.pop(\"max_workers\", None)\nd.pop(\"dlt_auto_scale_mode\", None)\nd.pop(\"existing_cluster_id\", None)\nremove_fields = remove_fields or []\nfor k in list(d.keys()):\n# if allowed fields are provided and check if value is in set\nif allowed_fields and k not in allowed_fields:\nd.pop(k, None)\nif k in remove_fields:\nd.pop(k, None)\n
"},{"location":"api/compute/#brickflow.engine.compute.Cluster.from_existing_cluster","title":"from_existing_cluster(existing_cluster_id: str) -> 'Cluster'
classmethod
","text":"Source code in brickflow/engine/compute.py
@classmethod\ndef from_existing_cluster(cls, existing_cluster_id: str) -> \"Cluster\":\n# just some stub value\nreturn Cluster(\nexisting_cluster_id,\nexisting_cluster_id,\nexisting_cluster_id,\nexisting_cluster_id=existing_cluster_id,\n)\n
"},{"location":"api/compute/#brickflow.engine.compute.Cluster.validate","title":"validate() -> None
","text":"Source code in brickflow/engine/compute.py
def validate(self) -> None:\nassert not (\nself.num_workers is not None\nand self.min_workers is not None\nand self.max_workers is not None\n), \"Num workers should not be provided with min and max workers\"\nassert not (\n(self.min_workers is None and self.max_workers is not None)\nor (self.min_workers is not None and self.max_workers is None)\n), \"Both min workers and max workers should be present if one is provided\"\n# noinspection PyTypeChecker\nassert not (\n(self.min_workers is not None and self.max_workers is not None)\nand (self.min_workers > self.max_workers)\n), \"Min workers should be less than max workers\"\n
"},{"location":"api/compute/#brickflow.engine.compute.Runtimes","title":"brickflow.engine.compute.Runtimes
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes-attributes","title":"Attributes","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_10_4_X_AARCH64_PHOTON_SCALA2_12_LTS","title":"RUNTIME_10_4_X_AARCH64_PHOTON_SCALA2_12_LTS = '10.4.x-aarch64-photon-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_10_4_X_AARCH64_SCALA2_12_LTS","title":"RUNTIME_10_4_X_AARCH64_SCALA2_12_LTS = '10.4.x-aarch64-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_10_4_X_CPU_ML_SCALA2_12_LTS","title":"RUNTIME_10_4_X_CPU_ML_SCALA2_12_LTS = '10.4.x-cpu-ml-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_10_4_X_GPU_ML_SCALA2_12_LTS","title":"RUNTIME_10_4_X_GPU_ML_SCALA2_12_LTS = '10.4.x-gpu-ml-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_10_4_X_PHOTON_SCALA2_12_LTS","title":"RUNTIME_10_4_X_PHOTON_SCALA2_12_LTS = '10.4.x-photon-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_10_4_X_SCALA2_12_LTS","title":"RUNTIME_10_4_X_SCALA2_12_LTS = '10.4.x-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_10_5_X_AARCH64_PHOTON_SCALA2_12","title":"RUNTIME_10_5_X_AARCH64_PHOTON_SCALA2_12 = '10.5.x-aarch64-photon-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_10_5_X_AARCH64_SCALA2_12","title":"RUNTIME_10_5_X_AARCH64_SCALA2_12 = '10.5.x-aarch64-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_10_5_X_CPU_ML_SCALA2_12","title":"RUNTIME_10_5_X_CPU_ML_SCALA2_12 = '10.5.x-cpu-ml-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_10_5_X_GPU_ML_SCALA2_12","title":"RUNTIME_10_5_X_GPU_ML_SCALA2_12 = '10.5.x-gpu-ml-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_10_5_X_PHOTON_SCALA2_12","title":"RUNTIME_10_5_X_PHOTON_SCALA2_12 = '10.5.x-photon-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_10_5_X_SCALA2_12","title":"RUNTIME_10_5_X_SCALA2_12 = '10.5.x-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_0_X_AARCH64_PHOTON_SCALA2_12","title":"RUNTIME_11_0_X_AARCH64_PHOTON_SCALA2_12 = '11.0.x-aarch64-photon-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_0_X_AARCH64_SCALA2_12","title":"RUNTIME_11_0_X_AARCH64_SCALA2_12 = '11.0.x-aarch64-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_0_X_CPU_ML_SCALA2_12","title":"RUNTIME_11_0_X_CPU_ML_SCALA2_12 = '11.0.x-cpu-ml-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_0_X_GPU_ML_SCALA2_12","title":"RUNTIME_11_0_X_GPU_ML_SCALA2_12 = '11.0.x-gpu-ml-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_0_X_PHOTON_SCALA2_12","title":"RUNTIME_11_0_X_PHOTON_SCALA2_12 = '11.0.x-photon-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_0_X_SCALA2_12","title":"RUNTIME_11_0_X_SCALA2_12 = '11.0.x-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_1_X_AARCH64_PHOTON_SCALA2_12","title":"RUNTIME_11_1_X_AARCH64_PHOTON_SCALA2_12 = '11.1.x-aarch64-photon-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_1_X_AARCH64_SCALA2_12","title":"RUNTIME_11_1_X_AARCH64_SCALA2_12 = '11.1.x-aarch64-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_1_X_CPU_ML_SCALA2_12","title":"RUNTIME_11_1_X_CPU_ML_SCALA2_12 = '11.1.x-cpu-ml-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_1_X_GPU_ML_SCALA2_12","title":"RUNTIME_11_1_X_GPU_ML_SCALA2_12 = '11.1.x-gpu-ml-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_1_X_PHOTON_SCALA2_12","title":"RUNTIME_11_1_X_PHOTON_SCALA2_12 = '11.1.x-photon-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_1_X_SCALA2_12","title":"RUNTIME_11_1_X_SCALA2_12 = '11.1.x-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_2_X_AARCH64_PHOTON_SCALA2_12","title":"RUNTIME_11_2_X_AARCH64_PHOTON_SCALA2_12 = '11.2.x-aarch64-photon-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_2_X_AARCH64_SCALA2_12","title":"RUNTIME_11_2_X_AARCH64_SCALA2_12 = '11.2.x-aarch64-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_2_X_CPU_ML_SCALA2_12","title":"RUNTIME_11_2_X_CPU_ML_SCALA2_12 = '11.2.x-cpu-ml-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_2_X_GPU_ML_SCALA2_12","title":"RUNTIME_11_2_X_GPU_ML_SCALA2_12 = '11.2.x-gpu-ml-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_2_X_PHOTON_SCALA2_12","title":"RUNTIME_11_2_X_PHOTON_SCALA2_12 = '11.2.x-photon-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_2_X_SCALA2_12","title":"RUNTIME_11_2_X_SCALA2_12 = '11.2.x-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_3_X_AARCH64_PHOTON_SCALA2_12","title":"RUNTIME_11_3_X_AARCH64_PHOTON_SCALA2_12 = '11.3.x-aarch64-photon-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_3_X_AARCH64_SCALA2_12","title":"RUNTIME_11_3_X_AARCH64_SCALA2_12 = '11.3.x-aarch64-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_3_X_CPU_ML_SCALA2_12","title":"RUNTIME_11_3_X_CPU_ML_SCALA2_12 = '11.3.x-cpu-ml-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_3_X_GPU_ML_SCALA2_12","title":"RUNTIME_11_3_X_GPU_ML_SCALA2_12 = '11.3.x-gpu-ml-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_3_X_PHOTON_SCALA2_12","title":"RUNTIME_11_3_X_PHOTON_SCALA2_12 = '11.3.x-photon-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_3_X_SCALA2_12","title":"RUNTIME_11_3_X_SCALA2_12 = '11.3.x-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_7_3_X_CPU_ML_SCALA2_12_LTS","title":"RUNTIME_7_3_X_CPU_ML_SCALA2_12_LTS = '7.3.x-cpu-ml-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_7_3_X_GPU_ML_SCALA2_12_LTS","title":"RUNTIME_7_3_X_GPU_ML_SCALA2_12_LTS = '7.3.x-gpu-ml-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_7_3_X_HLS_SCALA2_12_LTS","title":"RUNTIME_7_3_X_HLS_SCALA2_12_LTS = '7.3.x-hls-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_7_3_X_SCALA2_12_LTS","title":"RUNTIME_7_3_X_SCALA2_12_LTS = '7.3.x-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_9_1_X_AARCH64_SCALA2_12_LTS","title":"RUNTIME_9_1_X_AARCH64_SCALA2_12_LTS = '9.1.x-aarch64-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_9_1_X_CPU_ML_SCALA2_12_LTS","title":"RUNTIME_9_1_X_CPU_ML_SCALA2_12_LTS = '9.1.x-cpu-ml-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_9_1_X_GPU_ML_SCALA2_12_LTS","title":"RUNTIME_9_1_X_GPU_ML_SCALA2_12_LTS = '9.1.x-gpu-ml-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_9_1_X_PHOTON_SCALA2_12_LTS","title":"RUNTIME_9_1_X_PHOTON_SCALA2_12_LTS = '9.1.x-photon-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_9_1_X_SCALA2_12_LTS","title":"RUNTIME_9_1_X_SCALA2_12_LTS = '9.1.x-scala2.12'
class-attribute
instance-attribute
","text":""},{"location":"api/context/","title":"Context","text":""},{"location":"api/context/#brickflow.context.context-attributes","title":"Attributes","text":""},{"location":"api/context/#brickflow.context.context.BRANCH_SKIP_EXCEPT","title":"brickflow.context.context.BRANCH_SKIP_EXCEPT = 'branch_skip_except'
module-attribute
","text":""},{"location":"api/context/#brickflow.context.context.RETURN_VALUE_KEY","title":"brickflow.context.context.RETURN_VALUE_KEY = 'return_value'
module-attribute
","text":""},{"location":"api/context/#brickflow.context.context.SKIP_EXCEPT_HACK","title":"brickflow.context.context.SKIP_EXCEPT_HACK = 'brickflow_hack_skip_all'
module-attribute
","text":""},{"location":"api/context/#brickflow.context.context.T","title":"brickflow.context.context.T = TypeVar('T')
module-attribute
","text":""},{"location":"api/context/#brickflow.context.context.ctx","title":"brickflow.context.context.ctx = Context()
module-attribute
","text":""},{"location":"api/context/#brickflow.context.context-classes","title":"Classes","text":""},{"location":"api/context/#brickflow.context.context.BrickflowBuiltInTaskVariables","title":"brickflow.context.context.BrickflowBuiltInTaskVariables
","text":" Bases: Enum
job_id = 'brickflow_job_id'
class-attribute
instance-attribute
","text":""},{"location":"api/context/#brickflow.context.context.BrickflowBuiltInTaskVariables.parent_run_id","title":"parent_run_id = 'brickflow_parent_run_id'
class-attribute
instance-attribute
","text":""},{"location":"api/context/#brickflow.context.context.BrickflowBuiltInTaskVariables.run_id","title":"run_id = 'brickflow_run_id'
class-attribute
instance-attribute
","text":""},{"location":"api/context/#brickflow.context.context.BrickflowBuiltInTaskVariables.start_date","title":"start_date = 'brickflow_start_date'
class-attribute
instance-attribute
","text":""},{"location":"api/context/#brickflow.context.context.BrickflowBuiltInTaskVariables.start_time","title":"start_time = 'brickflow_start_time'
class-attribute
instance-attribute
","text":""},{"location":"api/context/#brickflow.context.context.BrickflowBuiltInTaskVariables.task_key","title":"task_key = 'brickflow_task_key'
class-attribute
instance-attribute
","text":""},{"location":"api/context/#brickflow.context.context.BrickflowBuiltInTaskVariables.task_retry_count","title":"task_retry_count = 'brickflow_task_retry_count'
class-attribute
instance-attribute
","text":""},{"location":"api/context/#brickflow.context.context.BrickflowInternalVariables","title":"brickflow.context.context.BrickflowInternalVariables
","text":" Bases: Enum
env = BrickflowEnvVars.BRICKFLOW_ENV.value.lower()
class-attribute
instance-attribute
","text":""},{"location":"api/context/#brickflow.context.context.BrickflowInternalVariables.only_run_tasks","title":"only_run_tasks = 'brickflow_internal_only_run_tasks'
class-attribute
instance-attribute
","text":""},{"location":"api/context/#brickflow.context.context.BrickflowInternalVariables.task_id","title":"task_id = 'brickflow_internal_task_name'
class-attribute
instance-attribute
","text":""},{"location":"api/context/#brickflow.context.context.BrickflowInternalVariables.workflow_id","title":"workflow_id = 'brickflow_internal_workflow_name'
class-attribute
instance-attribute
","text":""},{"location":"api/context/#brickflow.context.context.BrickflowInternalVariables.workflow_prefix","title":"workflow_prefix = 'brickflow_internal_workflow_prefix'
class-attribute
instance-attribute
","text":""},{"location":"api/context/#brickflow.context.context.BrickflowInternalVariables.workflow_suffix","title":"workflow_suffix = 'brickflow_internal_workflow_suffix'
class-attribute
instance-attribute
","text":""},{"location":"api/context/#brickflow.context.context.BrickflowTaskComs","title":"brickflow.context.context.BrickflowTaskComs
dataclass
","text":""},{"location":"api/context/#brickflow.context.context.BrickflowTaskComs-attributes","title":"Attributes","text":""},{"location":"api/context/#brickflow.context.context.BrickflowTaskComs.dbutils","title":"dbutils: Optional[Any] = None
class-attribute
instance-attribute
","text":""},{"location":"api/context/#brickflow.context.context.BrickflowTaskComs.storage","title":"storage: Dict[str, Any] = field(init=False, default_factory=lambda : {})
class-attribute
instance-attribute
","text":""},{"location":"api/context/#brickflow.context.context.BrickflowTaskComs-functions","title":"Functions","text":""},{"location":"api/context/#brickflow.context.context.BrickflowTaskComs.get","title":"get(task_id: str, key: Optional[str] = None) -> Any
","text":"Source code in brickflow/context/context.py
def get(self, task_id: str, key: Optional[str] = None) -> Any:\nif key is None:\nreturn BrickflowTaskComsDict(task_id=task_id, task_coms=self)\nif self.dbutils is not None:\nencoded_value = self.dbutils.jobs.taskValues.get(\nkey=key, taskKey=task_id, debugValue=\"debug\"\n)\nreturn BrickflowTaskComsObject.from_encoded_value(encoded_value).value\nelse:\n# TODO: logging using local task coms\nencoded_value = self.storage[self._key(task_id, key)]\nreturn BrickflowTaskComsObject.from_encoded_value(encoded_value).value\n
"},{"location":"api/context/#brickflow.context.context.BrickflowTaskComs.put","title":"put(task_id: str, key: str, value: Any) -> None
","text":"Source code in brickflow/context/context.py
def put(self, task_id: str, key: str, value: Any) -> None:\nencoded_value = BrickflowTaskComsObject(value).to_encoded_value\nif self.dbutils is not None:\nself.dbutils.jobs.taskValues.set(key, encoded_value)\nelse:\n# TODO: logging using local task coms\nself.storage[self._key(task_id, key)] = encoded_value\n
"},{"location":"api/context/#brickflow.context.context.BrickflowTaskComsDict","title":"brickflow.context.context.BrickflowTaskComsDict
dataclass
","text":""},{"location":"api/context/#brickflow.context.context.BrickflowTaskComsDict-attributes","title":"Attributes","text":""},{"location":"api/context/#brickflow.context.context.BrickflowTaskComsDict.task_coms","title":"task_coms: BrickflowTaskComs
instance-attribute
","text":""},{"location":"api/context/#brickflow.context.context.BrickflowTaskComsDict.task_id","title":"task_id: str
instance-attribute
","text":""},{"location":"api/context/#brickflow.context.context.BrickflowTaskComsObject","title":"brickflow.context.context.BrickflowTaskComsObject
dataclass
","text":""},{"location":"api/context/#brickflow.context.context.BrickflowTaskComsObject-attributes","title":"Attributes","text":""},{"location":"api/context/#brickflow.context.context.BrickflowTaskComsObject.to_encoded_value","title":"to_encoded_value: str
property
","text":""},{"location":"api/context/#brickflow.context.context.BrickflowTaskComsObject.value","title":"value: Any
property
","text":""},{"location":"api/context/#brickflow.context.context.BrickflowTaskComsObject-functions","title":"Functions","text":""},{"location":"api/context/#brickflow.context.context.BrickflowTaskComsObject.from_encoded_value","title":"from_encoded_value(encoded_value: Union[str, bytes]) -> BrickflowTaskComsObject
classmethod
","text":"Source code in brickflow/context/context.py
@classmethod\ndef from_encoded_value(\ncls, encoded_value: Union[str, bytes]\n) -> \"BrickflowTaskComsObject\":\ntry:\n_encoded_value = (\nencoded_value\nif isinstance(encoded_value, bytes)\nelse encoded_value.encode(\"utf-8\")\n)\nb64_bytes = base64.b64decode(_encoded_value)\nreturn cls(pickle.loads(b64_bytes).value)\nexcept binascii.Error:\n_decoded_value = (\nencoded_value.decode(\"utf-8\")\nif isinstance(encoded_value, bytes)\nelse encoded_value\n)\nreturn cls(_decoded_value)\n
"},{"location":"api/context/#brickflow.context.context.Context","title":"brickflow.context.context.Context() -> None
","text":"Source code in brickflow/context/context.py
def __init__(self) -> None:\n# Order of init matters todo: fix this\nself._dbutils: Optional[Any] = None\nself._spark: Optional[Any] = None\nself._task_coms: BrickflowTaskComs\nself._current_task: Optional[str] = None\nself._configure()\nself._current_project: Optional[str] = None\n
"},{"location":"api/context/#brickflow.context.context.Context-attributes","title":"Attributes","text":""},{"location":"api/context/#brickflow.context.context.Context.current_project","title":"current_project: Optional[str]
property
","text":""},{"location":"api/context/#brickflow.context.context.Context.current_task","title":"current_task: Optional[str]
property
","text":""},{"location":"api/context/#brickflow.context.context.Context.dbutils","title":"dbutils: DBUtils
property
","text":""},{"location":"api/context/#brickflow.context.context.Context.env","title":"env: str
property
","text":""},{"location":"api/context/#brickflow.context.context.Context.log","title":"log: logging.Logger
property
","text":""},{"location":"api/context/#brickflow.context.context.Context.spark","title":"spark: SparkSession
property
","text":""},{"location":"api/context/#brickflow.context.context.Context.task_coms","title":"task_coms: BrickflowTaskComs
property
","text":""},{"location":"api/context/#brickflow.context.context.Context-functions","title":"Functions","text":""},{"location":"api/context/#brickflow.context.context.Context.dbutils_widget_get_or_else","title":"dbutils_widget_get_or_else(key: str, debug: Optional[str]) -> Optional[str]
","text":"Source code in brickflow/context/context.py
@deprecated\ndef dbutils_widget_get_or_else(\nself, key: str, debug: Optional[str]\n) -> Optional[str]:\ntry:\nreturn self.dbutils.widgets.get(key)\nexcept Exception:\n# todo: log error\nreturn debug\n
"},{"location":"api/context/#brickflow.context.context.Context.get_by_env","title":"get_by_env(purpose: str, *, default: Optional[T] = None, local: Optional[T] = None, dev: Optional[T] = None, non_prod: Optional[T] = None, test: Optional[T] = None, qa: Optional[T] = None, prod: Optional[T] = None, uat: Optional[T] = None, **kwargs: Optional[T]) -> Optional[T]
","text":"Source code in brickflow/context/context.py
def get_by_env(\nself,\npurpose: str,\n*,\ndefault: Optional[T] = None,\nlocal: Optional[T] = None,\ndev: Optional[T] = None,\nnon_prod: Optional[T] = None,\ntest: Optional[T] = None,\nqa: Optional[T] = None,\nprod: Optional[T] = None,\nuat: Optional[T] = None,\n**kwargs: Optional[T],\n) -> Optional[T]:\n# deep copy without modifying kwargs\ndef add_if_not_none(\n_d: Dict[str, Optional[T]], _k: str, _v: Optional[T]\n) -> None:\nif _v is None:\nreturn\n_d[_k] = _v\n_dict = copy.deepcopy(kwargs)\nadd_if_not_none(_dict, \"local\", local)\nadd_if_not_none(_dict, \"non_prod\", non_prod)\nadd_if_not_none(_dict, \"dev\", dev)\nadd_if_not_none(_dict, \"test\", test)\nadd_if_not_none(_dict, \"qa\", qa)\nadd_if_not_none(_dict, \"prod\", prod)\nadd_if_not_none(_dict, \"uat\", uat)\n_env = self.env\n_ilog.info(\"Configuring: %s; Using env: '%s' to fetch value...\", purpose, _env)\nif _env not in _dict and default is None:\nraise KeyError(\nf\"Configuring: {purpose}; Unable to find environment key: {_env}, \"\nf\"only found env definitions: {list(_dict.keys())}\"\n)\nif _env not in _dict and default is not None:\n_ilog.info(\n\"Configuring: %s; Found no value configured with env: '%s' using default value...\",\npurpose,\n_env,\n)\nres = _dict.get(_env, default)\nreturn res\n
"},{"location":"api/context/#brickflow.context.context.Context.get_parameter","title":"get_parameter(key: str, debug: Optional[str] = None) -> Optional[str]
","text":"Source code in brickflow/context/context.py
def get_parameter(self, key: str, debug: Optional[str] = None) -> Optional[str]:\ntry:\nreturn self.dbutils.widgets.get(key)\nexcept Exception:\n# todo: log error\n_ilog.debug(\"Unable to get parameter: %s from dbutils\", key)\nreturn debug\n
"},{"location":"api/context/#brickflow.context.context.Context.get_return_value","title":"get_return_value(task_key: Union[str, Callable]) -> Any
","text":"Source code in brickflow/context/context.py
def get_return_value(self, task_key: Union[str, Callable]) -> Any:\ntask_key = task_key.__name__ if callable(task_key) else task_key\nreturn self.task_coms.get(task_key, RETURN_VALUE_KEY)\n
"},{"location":"api/context/#brickflow.context.context.Context.is_local","title":"is_local() -> bool
","text":"Source code in brickflow/context/context.py
def is_local(self) -> bool:\nreturn self.env == BrickflowDefaultEnvs.LOCAL.value\n
"},{"location":"api/context/#brickflow.context.context.Context.job_id","title":"job_id(*, debug: Optional[str] = None) -> Any
","text":"This function fetches the job_id value using the bind_variable decorator. The implementation is intentionally empty because the decorator handles the logic.
Source code inbrickflow/context/context.py
@bind_variable(BrickflowBuiltInTaskVariables.job_id)\ndef job_id(self, *, debug: Optional[str] = None) -> Any:\n\"\"\"\n This function fetches the job_id value using the bind_variable decorator.\n The implementation is intentionally empty because the decorator handles the logic.\n \"\"\"\npass\n
"},{"location":"api/context/#brickflow.context.context.Context.parent_run_id","title":"parent_run_id(*, debug: Optional[str] = None) -> Any
","text":"This function fetches the parent_run_id value using the bind_variable decorator. The implementation is intentionally empty because the decorator handles the logic.
Source code inbrickflow/context/context.py
@bind_variable(BrickflowBuiltInTaskVariables.parent_run_id)\ndef parent_run_id(self, *, debug: Optional[str] = None) -> Any:\n\"\"\"\n This function fetches the parent_run_id value using the bind_variable decorator.\n The implementation is intentionally empty because the decorator handles the logic.\n \"\"\"\npass\n
"},{"location":"api/context/#brickflow.context.context.Context.run_id","title":"run_id(*, debug: Optional[str] = None) -> Any
","text":"This function fetches the run_id value using the bind_variable decorator. The implementation is intentionally empty because the decorator handles the logic.
Source code inbrickflow/context/context.py
@bind_variable(BrickflowBuiltInTaskVariables.run_id)\ndef run_id(self, *, debug: Optional[str] = None) -> Any:\n\"\"\"\n This function fetches the run_id value using the bind_variable decorator.\n The implementation is intentionally empty because the decorator handles the logic.\n \"\"\"\npass\n
"},{"location":"api/context/#brickflow.context.context.Context.set_current_project","title":"set_current_project(project: str) -> None
","text":"Source code in brickflow/context/context.py
def set_current_project(self, project: str) -> None:\nself._current_project = project\n
"},{"location":"api/context/#brickflow.context.context.Context.skip_all_except","title":"skip_all_except(branch_task: Union[Callable, str]) -> None
","text":"Source code in brickflow/context/context.py
def skip_all_except(self, branch_task: Union[Callable, str]) -> None:\nif self._current_task is None:\nraise RuntimeError(\"Current task is empty unable to skip...\")\nbranch_task_key = (\nbranch_task.__name__\nif callable(branch_task) and hasattr(branch_task, \"__name__\") is True\nelse branch_task\n)\nself._task_coms.put(self._current_task, BRANCH_SKIP_EXCEPT, branch_task_key)\n
"},{"location":"api/context/#brickflow.context.context.Context.skip_all_following","title":"skip_all_following() -> None
","text":"Source code in brickflow/context/context.py
def skip_all_following(self) -> None:\nif self._current_task is None:\nraise RuntimeError(\"Current task is empty unable to skip...\")\nself._task_coms.put(self._current_task, BRANCH_SKIP_EXCEPT, SKIP_EXCEPT_HACK)\n
"},{"location":"api/context/#brickflow.context.context.Context.start_date","title":"start_date(*, debug: Optional[str] = None) -> Any
","text":"This function fetches the start_date value using the bind_variable decorator. The implementation is intentionally empty because the decorator handles the logic.
Source code inbrickflow/context/context.py
@bind_variable(BrickflowBuiltInTaskVariables.start_date)\ndef start_date(self, *, debug: Optional[str] = None) -> Any:\n\"\"\"\n This function fetches the start_date value using the bind_variable decorator.\n The implementation is intentionally empty because the decorator handles the logic.\n \"\"\"\npass\n
"},{"location":"api/context/#brickflow.context.context.Context.start_time","title":"start_time(*, debug: Optional[str] = None) -> Any
","text":"This function fetches the start_time value using the bind_variable decorator. The implementation is intentionally empty because the decorator handles the logic.
Source code inbrickflow/context/context.py
@bind_variable(BrickflowBuiltInTaskVariables.start_time)\ndef start_time(self, *, debug: Optional[str] = None) -> Any:\n\"\"\"\n This function fetches the start_time value using the bind_variable decorator.\n The implementation is intentionally empty because the decorator handles the logic.\n \"\"\"\npass\n
"},{"location":"api/context/#brickflow.context.context.Context.task_key","title":"task_key(*, debug: Optional[str] = None) -> Any
","text":"This function fetches the task_key value using the bind_variable decorator. The implementation is intentionally empty because the decorator handles the logic.
Source code inbrickflow/context/context.py
@bind_variable(BrickflowBuiltInTaskVariables.task_key)\ndef task_key(self, *, debug: Optional[str] = None) -> Any:\n\"\"\"\n This function fetches the task_key value using the bind_variable decorator.\n The implementation is intentionally empty because the decorator handles the logic.\n \"\"\"\npass\n
"},{"location":"api/context/#brickflow.context.context.Context.task_retry_count","title":"task_retry_count(*, debug: Optional[str] = None) -> Any
","text":"This function fetches the task_retry_count value using the bind_variable decorator. The implementation is intentionally empty because the decorator handles the logic.
Source code inbrickflow/context/context.py
@bind_variable(BrickflowBuiltInTaskVariables.task_retry_count)\ndef task_retry_count(self, *, debug: Optional[str] = None) -> Any:\n\"\"\"\n This function fetches the task_retry_count value using the bind_variable decorator.\n The implementation is intentionally empty because the decorator handles the logic.\n \"\"\"\npass\n
"},{"location":"api/context/#brickflow.context.context.ContextMode","title":"brickflow.context.context.ContextMode
","text":" Bases: Enum
databricks = 'databricks'
class-attribute
instance-attribute
","text":""},{"location":"api/context/#brickflow.context.context.ContextMode.not_databricks","title":"not_databricks = 'not_databricks'
class-attribute
instance-attribute
","text":""},{"location":"api/context/#brickflow.context.context.TaskComsObjectResult","title":"brickflow.context.context.TaskComsObjectResult
","text":" Bases: Enum
NO_RESULTS = 'NO_RESULTS'
class-attribute
instance-attribute
","text":""},{"location":"api/context/#brickflow.context.context-functions","title":"Functions","text":""},{"location":"api/context/#brickflow.context.context.bind_variable","title":"brickflow.context.context.bind_variable(builtin: BrickflowBuiltInTaskVariables) -> Callable
","text":"Source code in brickflow/context/context.py
def bind_variable(builtin: BrickflowBuiltInTaskVariables) -> Callable:\ndef wrapper(f: Callable) -> Callable:\n@functools.wraps(f)\ndef func(*args, **kwargs): # type: ignore\n_self: Context = args[0] # type: ignore\ndebug = kwargs[\"debug\"]\nf(*args, **kwargs) # no-op\nif _self.dbutils is not None:\nreturn _self.get_parameter(builtin.value, debug)\nreturn debug\nreturn func\nreturn wrapper\n
"},{"location":"api/misc/","title":"Misc","text":""},{"location":"api/project/","title":"Project","text":""},{"location":"api/project/#brickflow.engine.project-classes","title":"Classes","text":""},{"location":"api/project/#brickflow.engine.project.Project","title":"brickflow.engine.project.Project
dataclass
","text":""},{"location":"api/project/#brickflow.engine.project.Project-attributes","title":"Attributes","text":""},{"location":"api/project/#brickflow.engine.project.Project.batch","title":"batch: bool = True
class-attribute
instance-attribute
","text":""},{"location":"api/project/#brickflow.engine.project.Project.bundle_base_path","title":"bundle_base_path: Optional[str] = None
class-attribute
instance-attribute
","text":""},{"location":"api/project/#brickflow.engine.project.Project.bundle_obj_name","title":"bundle_obj_name: Optional[str] = None
class-attribute
instance-attribute
","text":""},{"location":"api/project/#brickflow.engine.project.Project.codegen_kwargs","title":"codegen_kwargs: Optional[Dict[str, Any]] = None
class-attribute
instance-attribute
","text":""},{"location":"api/project/#brickflow.engine.project.Project.codegen_mechanism","title":"codegen_mechanism: Optional[Type[CodegenInterface]] = None
class-attribute
instance-attribute
","text":""},{"location":"api/project/#brickflow.engine.project.Project.debug_execute_task","title":"debug_execute_task: Optional[str] = None
class-attribute
instance-attribute
","text":""},{"location":"api/project/#brickflow.engine.project.Project.debug_execute_workflow","title":"debug_execute_workflow: Optional[str] = None
class-attribute
instance-attribute
","text":""},{"location":"api/project/#brickflow.engine.project.Project.entry_point_path","title":"entry_point_path: Optional[str] = None
class-attribute
instance-attribute
","text":""},{"location":"api/project/#brickflow.engine.project.Project.git_reference","title":"git_reference: Optional[str] = None
class-attribute
instance-attribute
","text":""},{"location":"api/project/#brickflow.engine.project.Project.git_repo","title":"git_repo: Optional[str] = None
class-attribute
instance-attribute
","text":""},{"location":"api/project/#brickflow.engine.project.Project.libraries","title":"libraries: Optional[List[TaskLibrary]] = None
class-attribute
instance-attribute
","text":""},{"location":"api/project/#brickflow.engine.project.Project.mode","title":"mode: Optional[str] = None
class-attribute
instance-attribute
","text":""},{"location":"api/project/#brickflow.engine.project.Project.name","title":"name: str
instance-attribute
","text":""},{"location":"api/project/#brickflow.engine.project.Project.provider","title":"provider: Optional[str] = None
class-attribute
instance-attribute
","text":""},{"location":"api/project/#brickflow.engine.project.Project.s3_backend","title":"s3_backend: Optional[Dict[str, str]] = None
class-attribute
instance-attribute
","text":""},{"location":"api/project/#brickflow.engine.project.Project-functions","title":"Functions","text":""},{"location":"api/project/#brickflow.engine.project.Project.__enter__","title":"__enter__() -> _Project
","text":"Source code in brickflow/engine/project.py
def __enter__(self) -> \"_Project\":\nself._project = _Project(\nself.name,\nself.git_repo,\nself.provider,\nself.git_reference,\nself.s3_backend,\nself.entry_point_path,\nlibraries=self.libraries or [],\nbatch=self.batch,\nbundle_obj_name=self.bundle_obj_name,\nbundle_base_path=self.bundle_base_path,\n)\nreturn self._project\n
"},{"location":"api/project/#brickflow.engine.project.Project.__exit__","title":"__exit__(exc_type, exc_val, exc_tb) -> None
","text":"Source code in brickflow/engine/project.py
def __exit__(self, exc_type, exc_val, exc_tb) -> None: # type: ignore\nif exc_type is not None:\nerror_types = {Stage.deploy: DeployError, Stage.execute: ExecuteError}\nraise error_types[self._mode](\nf\"Oops... failed during: {self._mode}\"\n) from exc_val\nif len(self._project.workflows) == 0:\n_ilog.info(\"Doing nothing no workflows...\")\nreturn\nif self._mode.value == Stage.deploy.value:\n_ilog.info(\"Deploying changes... to %s\", ctx.env)\nif self.codegen_mechanism is None:\nraise ValueError(\n\"codegen_mechanism cannot be None; please raise a github issue for this.\"\n)\ncodegen = self.codegen_mechanism(\nproject=self._project,\nid_=f\"{ctx.env}_{self.name}\",\nenv=ctx.env,\n**(self.codegen_kwargs or {}),\n)\ncodegen.synth()\nif self._mode.value == Stage.execute.value:\nwf_id = ctx.get_parameter(\nBrickflowInternalVariables.workflow_id.value,\nself.debug_execute_workflow,\n)\nt_id = ctx.get_parameter(\nBrickflowInternalVariables.task_id.value, self.debug_execute_task\n)\nif wf_id is None or t_id is None:\n_ilog.info(\n\"No workflow id or task key was able to found; doing nothing...\"\n)\nreturn\nworkflow = self._project.get_workflow(wf_id)\ntask = workflow.get_task(t_id)\ntask.execute()\n
"},{"location":"api/project/#brickflow.engine.project.Project.__post_init__","title":"__post_init__() -> None
","text":"Source code in brickflow/engine/project.py
def __post_init__(self) -> None:\nself._mode = Stage[\nconfig(BrickflowEnvVars.BRICKFLOW_MODE.value, default=Stage.execute.value)\n]\nself.entry_point_path = self.entry_point_path or get_caller_info()\n# setup current_project\nenv_project_name = config(\nBrickflowEnvVars.BRICKFLOW_PROJECT_NAME.value, default=None\n)\nif (\nenv_project_name is not None\nand self.name is not None\nand env_project_name != self.name\n):\nraise ValueError(\n\"Project name in config files and entrypoint must be the same\"\n)\nctx.set_current_project(self.name or env_project_name) # always setup first\n# populate bundle info via env vars\nself.bundle_obj_name = config(\nBrickflowEnvVars.BRICKFLOW_BUNDLE_OBJ_NAME.value,\ndefault=\".brickflow_bundles\",\n)\nself.bundle_base_path = config(\nBrickflowEnvVars.BRICKFLOW_BUNDLE_BASE_PATH.value,\ndefault=\"/Users/${workspace.current_user.userName}\",\n)\nself.git_reference = config(\nBrickflowEnvVars.BRICKFLOW_GIT_REF.value, default=self.get_git_ref()\n)\nif (\nself._mode == Stage.deploy\nand ctx.is_local() is False\nand self.git_reference is None\n):\nraise ValueError(\n\"git_reference must be set when deploying to non-local envs\"\n)\nself.provider = config(\nBrickflowEnvVars.BRICKFLOW_GIT_PROVIDER.value, default=self.provider\n)\nself.git_repo = config(\nBrickflowEnvVars.BRICKFLOW_GIT_REPO.value, default=self.git_repo\n)\nif self.s3_backend is None:\nself.s3_backend = {\n\"bucket\": config(\"BRICKFLOW_S3_BACKEND_BUCKET\", default=None),\n\"key\": config(\"BRICKFLOW_S3_BACKEND_KEY\", default=None),\n\"region\": config(\"BRICKFLOW_S3_BACKEND_REGION\", default=None),\n\"dynamodb_table\": config(\n\"BRICKFLOW_S3_BACKEND_DYNAMODB_TABLE\", default=None\n),\n}\nif all(value is None for value in self.s3_backend.values()):\nself.s3_backend = None\ndeployment_mode = config(\nBrickflowEnvVars.BRICKFLOW_DEPLOYMENT_MODE.value, default=\"cdktf\"\n)\nif deployment_mode == BrickflowDeployMode.CDKTF.value:\nself.codegen_mechanism = HashicorpCDKTFGen\nelif deployment_mode == BrickflowDeployMode.BUNDLE.value:\nself.codegen_mechanism = DatabricksBundleCodegen\nif self.codegen_kwargs is None:\nself.codegen_kwargs = {}\n
"},{"location":"api/project/#brickflow.engine.project.Project.get_git_ref","title":"get_git_ref() -> Optional[str]
","text":"Source code in brickflow/engine/project.py
def get_git_ref(self) -> Optional[str]:\nif self._mode == Stage.deploy:\nif self.git_reference is not None:\nreturn self.git_reference\nelse:\ntry:\nreturn f\"commit/{get_current_commit()}\"\nexcept Exception:\n_ilog.warning(\n\"Unable to get current commit; defaulting to empty string\"\n)\nreturn \"commit/fake-local-stub\" if ctx.is_local() else None\nelse:\nreturn self.git_reference if self.git_reference is not None else \"\"\n
"},{"location":"api/secrets/","title":"Secrets","text":""},{"location":"api/secrets/#brickflow_plugins.secrets-attributes","title":"Attributes","text":""},{"location":"api/secrets/#brickflow_plugins.secrets.BRICKFLOW_SECRETS_BACKEND","title":"brickflow_plugins.secrets.BRICKFLOW_SECRETS_BACKEND = 'brickflow_secrets_backend'
module-attribute
","text":""},{"location":"api/secrets/#brickflow_plugins.secrets.brickflow_secrets_backend_plugin_impl","title":"brickflow_plugins.secrets.brickflow_secrets_backend_plugin_impl = pluggy.HookimplMarker(BRICKFLOW_SECRETS_BACKEND)
module-attribute
","text":""},{"location":"api/secrets/#brickflow_plugins.secrets.brickflow_secrets_plugin_spec","title":"brickflow_plugins.secrets.brickflow_secrets_plugin_spec = pluggy.HookspecMarker(BRICKFLOW_SECRETS_BACKEND)
module-attribute
","text":""},{"location":"api/secrets/#brickflow_plugins.secrets-classes","title":"Classes","text":""},{"location":"api/secrets/#brickflow_plugins.secrets.AbstractSecretsHelper","title":"brickflow_plugins.secrets.AbstractSecretsHelper
","text":" Bases: abc.ABC
PROTOCOL_STARTS_WITH: Optional[Union[str, List[str]]] = None
class-attribute
instance-attribute
","text":""},{"location":"api/secrets/#brickflow_plugins.secrets.AbstractSecretsHelper-functions","title":"Functions","text":""},{"location":"api/secrets/#brickflow_plugins.secrets.AbstractSecretsHelper.get_secret_value_from_url","title":"get_secret_value_from_url(url_parsed_result: ParseResult)
","text":"Source code in brickflow_plugins/secrets/__init__.py
def get_secret_value_from_url(self, url_parsed_result: ParseResult):\nallowed_protocols = (\n[self.PROTOCOL_STARTS_WITH]\nif isinstance(self.PROTOCOL_STARTS_WITH, str)\nelse self.PROTOCOL_STARTS_WITH\n)\nif self.PROTOCOL_STARTS_WITH is not None and not any(\n[\nurl_parsed_result.scheme.lower().startswith(protocol)\nfor protocol in allowed_protocols\n]\n):\nreturn None\nreturn self._get_secret_value_from_url(url_parsed_result)\n
"},{"location":"api/secrets/#brickflow_plugins.secrets.B64SecretsHelper","title":"brickflow_plugins.secrets.B64SecretsHelper
","text":" Bases: AbstractSecretsHelper
PROTOCOL_STARTS_WITH = ['base64', 'b64']
class-attribute
instance-attribute
","text":""},{"location":"api/secrets/#brickflow_plugins.secrets.B64SecretsHelper-functions","title":"Functions","text":""},{"location":"api/secrets/#brickflow_plugins.secrets.Base64BrickflowSecretPluginImpl","title":"brickflow_plugins.secrets.Base64BrickflowSecretPluginImpl
","text":" Bases: BrickflowSecretPluginSpec
get_secret_value(url_parsed_result: ParseResult) -> Optional['str']
staticmethod
","text":"Source code in brickflow_plugins/secrets/__init__.py
@staticmethod\n@brickflow_secrets_backend_plugin_impl\ndef get_secret_value(url_parsed_result: ParseResult) -> Optional[\"str\"]:\nreturn B64SecretsHelper().get_secret_value_from_url(url_parsed_result)\n
"},{"location":"api/secrets/#brickflow_plugins.secrets.BrickflowSecretPluginSpec","title":"brickflow_plugins.secrets.BrickflowSecretPluginSpec
","text":""},{"location":"api/secrets/#brickflow_plugins.secrets.BrickflowSecretPluginSpec-functions","title":"Functions","text":""},{"location":"api/secrets/#brickflow_plugins.secrets.BrickflowSecretPluginSpec.get_secret_value","title":"get_secret_value(url_parsed_result: ParseResult) -> Optional['str']
staticmethod
","text":"Custom execute method that is able to be plugged in.
Source code inbrickflow_plugins/secrets/__init__.py
@staticmethod\n@brickflow_secrets_plugin_spec(firstresult=True)\ndef get_secret_value(url_parsed_result: ParseResult) -> Optional[\"str\"]:\n\"\"\"Custom execute method that is able to be plugged in.\"\"\"\n
"},{"location":"api/secrets/#brickflow_plugins.secrets.BrickflowSecretsBackend","title":"brickflow_plugins.secrets.BrickflowSecretsBackend
","text":" Bases: BaseSecretsBackend
get_conn_value(conn_id: str) -> str | None
","text":"Source code in brickflow_plugins/secrets/__init__.py
def get_conn_value(self, conn_id: str) -> str | None:\nparsed_url = urlparse(conn_id)\nreturn get_brickflow_tasks_hook().get_secret_value(url_parsed_result=parsed_url)\n
"},{"location":"api/secrets/#brickflow_plugins.secrets.BrickflowSecretsBackend.set_backend_env","title":"set_backend_env()
","text":"Source code in brickflow_plugins/secrets/__init__.py
def set_backend_env(self):\nfor k, v in self._get_secrets_backend_env().items():\nos.environ[k] = v\n
"},{"location":"api/secrets/#brickflow_plugins.secrets.BrickflowSecretsBackend.unset_backend_env","title":"unset_backend_env()
","text":"Source code in brickflow_plugins/secrets/__init__.py
def unset_backend_env(self):\nfor k in self._get_secrets_backend_env().keys():\nos.environ.pop(k, None)\n
"},{"location":"api/secrets/#brickflow_plugins.secrets.CerberusBrickflowSecretPluginImpl","title":"brickflow_plugins.secrets.CerberusBrickflowSecretPluginImpl
","text":" Bases: BrickflowSecretPluginSpec
get_secret_value(url_parsed_result: ParseResult) -> Optional['str']
staticmethod
","text":"Source code in brickflow_plugins/secrets/__init__.py
@staticmethod\n@brickflow_secrets_backend_plugin_impl\ndef get_secret_value(url_parsed_result: ParseResult) -> Optional[\"str\"]:\nreturn CerberusSecretsHelper().get_secret_value_from_url(url_parsed_result)\n
"},{"location":"api/secrets/#brickflow_plugins.secrets.CerberusSecretsHelper","title":"brickflow_plugins.secrets.CerberusSecretsHelper
","text":" Bases: AbstractSecretsHelper
PROTOCOL_STARTS_WITH = 'cerberus'
class-attribute
instance-attribute
","text":""},{"location":"api/secrets/#brickflow_plugins.secrets.CerberusSecretsHelper-functions","title":"Functions","text":""},{"location":"api/secrets/#brickflow_plugins.secrets.CerberusSecretsHelper.parse_path_and_key","title":"parse_path_and_key(path: Optional[str]) -> Optional[Tuple[str, str]]
staticmethod
","text":"Source code in brickflow_plugins/secrets/__init__.py
@staticmethod\ndef parse_path_and_key(path: Optional[str]) -> Optional[Tuple[str, str]]:\nif path is not None:\n_cleaned_path = path.lstrip(\"/\").rstrip(\"/\")\nreturn \"/\".join(_cleaned_path.split(\"/\")[:-1]), _cleaned_path.split(\"/\")[-1]\nreturn None\n
"},{"location":"api/secrets/#brickflow_plugins.secrets.DatabricksSecretsBrickflowSecretPluginImpl","title":"brickflow_plugins.secrets.DatabricksSecretsBrickflowSecretPluginImpl
","text":" Bases: BrickflowSecretPluginSpec
get_secret_value(url_parsed_result: ParseResult) -> Optional['str']
staticmethod
","text":"Source code in brickflow_plugins/secrets/__init__.py
@staticmethod\n@brickflow_secrets_backend_plugin_impl\ndef get_secret_value(url_parsed_result: ParseResult) -> Optional[\"str\"]:\n# not implemented yet\nreturn None\n
"},{"location":"api/secrets/#brickflow_plugins.secrets-functions","title":"Functions","text":""},{"location":"api/secrets/#brickflow_plugins.secrets.get_brickflow_tasks_hook","title":"brickflow_plugins.secrets.get_brickflow_tasks_hook() -> BrickflowSecretPluginSpec
cached
","text":"Source code in brickflow_plugins/secrets/__init__.py
@functools.lru_cache\ndef get_brickflow_tasks_hook() -> BrickflowSecretPluginSpec:\npm = pluggy.PluginManager(BRICKFLOW_SECRETS_BACKEND)\npm.add_hookspecs(BrickflowSecretPluginSpec)\npm.load_setuptools_entrypoints(BRICKFLOW_SECRETS_BACKEND)\npm.register(CerberusBrickflowSecretPluginImpl())\npm.register(Base64BrickflowSecretPluginImpl())\nfor name, plugin_instance in pm.list_name_plugin():\nlog.info(\n\"Loaded plugin with name: %s and class: %s\",\nname,\nplugin_instance.__class__.__name__,\n)\nreturn pm.hook\n
"},{"location":"api/task/","title":"Task","text":""},{"location":"api/task/#brickflow.engine.task-classes","title":"Classes","text":""},{"location":"api/task/#brickflow.engine.task.Task","title":"brickflow.engine.task.Task
dataclass
","text":""},{"location":"api/task/#brickflow.engine.task.Task-attributes","title":"Attributes","text":""},{"location":"api/task/#brickflow.engine.task.Task.brickflow_default_params","title":"brickflow_default_params: Dict[str, str]
property
","text":""},{"location":"api/task/#brickflow.engine.task.Task.builtin_notebook_params","title":"builtin_notebook_params: Dict[str, str]
property
","text":""},{"location":"api/task/#brickflow.engine.task.Task.cluster","title":"cluster: Cluster
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.Task.custom_execute_callback","title":"custom_execute_callback: Optional[Callable] = None
class-attribute
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.Task.custom_task_parameters","title":"custom_task_parameters: Dict[str, str]
property
","text":""},{"location":"api/task/#brickflow.engine.task.Task.databricks_task_type_str","title":"databricks_task_type_str: str
property
","text":""},{"location":"api/task/#brickflow.engine.task.Task.depends_on","title":"depends_on: List[Union[Callable, str]] = field(default_factory=lambda : [])
class-attribute
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.Task.depends_on_names","title":"depends_on_names: Iterator[str]
property
","text":""},{"location":"api/task/#brickflow.engine.task.Task.description","title":"description: Optional[str] = None
class-attribute
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.Task.libraries","title":"libraries: List[TaskLibrary] = field(default_factory=lambda : [])
class-attribute
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.Task.name","title":"name: str
property
","text":""},{"location":"api/task/#brickflow.engine.task.Task.parents","title":"parents: List[str]
property
","text":""},{"location":"api/task/#brickflow.engine.task.Task.task_func","title":"task_func: Callable
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.Task.task_func_name","title":"task_func_name: str
property
","text":""},{"location":"api/task/#brickflow.engine.task.Task.task_id","title":"task_id: str
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.Task.task_settings","title":"task_settings: Optional[TaskSettings] = None
class-attribute
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.Task.task_type","title":"task_type: TaskType = TaskType.BRICKFLOW_TASK
class-attribute
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.Task.trigger_rule","title":"trigger_rule: BrickflowTriggerRule = BrickflowTriggerRule.ALL_SUCCESS
class-attribute
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.Task.workflow","title":"workflow: Workflow
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.Task-functions","title":"Functions","text":""},{"location":"api/task/#brickflow.engine.task.Task.execute","title":"execute() -> Any
","text":"Source code in brickflow/engine/task.py
@with_brickflow_logger\ndef execute(self) -> Any:\n# Workflow is:\n# 1. Check to see if there selected tasks and if there are is this task in the list\n# 2. Check to see if the previous task is skipped and trigger rule.\n# 3. Check to see if this a custom python task and execute it\n# 4. Execute the task function\nctx._set_current_task(self.name)\n_select_task_skip, _select_task_skip_reason = self._skip_because_not_selected()\nif _select_task_skip is True:\n# check if this task is skipped due to task selection\n_ilog.info(\n\"Skipping task... %s for reason: %s\",\nself.name,\n_select_task_skip_reason,\n)\nctx._reset_current_task()\nreturn\n_skip, reason = self.should_skip()\nif _skip is True:\n_ilog.info(\"Skipping task... %s for reason: %s\", self.name, reason)\nctx.task_coms.put(self.name, BRANCH_SKIP_EXCEPT, SKIP_EXCEPT_HACK)\nctx._reset_current_task()\nreturn\ninitial_resp: TaskResponse = get_brickflow_tasks_hook().task_execute(\ntask=self, workflow=self.workflow\n)\nresp: TaskResponse = get_brickflow_tasks_hook().handle_results(\nresp=initial_resp, task=self, workflow=self.workflow\n)\nif resp.push_return_value is True:\nctx.task_coms.put(self.name, RETURN_VALUE_KEY, resp.response)\nctx._reset_current_task()\nreturn resp.response\n
"},{"location":"api/task/#brickflow.engine.task.Task.get_obj_dict","title":"get_obj_dict(entrypoint: str) -> Dict[str, Any]
","text":"Source code in brickflow/engine/task.py
def get_obj_dict(self, entrypoint: str) -> Dict[str, Any]:\nreturn {\n\"notebook_path\": self.handle_notebook_path(entrypoint),\n\"base_parameters\": {\n**self.builtin_notebook_params,\n**self.brickflow_default_params,\n**self.custom_task_parameters, # type: ignore\n# **(self.custom_unique_task_parameters or {}),\n# TODO: implement only after validating limit on parameters\n},\n}\n
"},{"location":"api/task/#brickflow.engine.task.Task.get_runtime_parameter_values","title":"get_runtime_parameter_values() -> Dict[str, Any]
","text":"Source code in brickflow/engine/task.py
def get_runtime_parameter_values(self) -> Dict[str, Any]:\n# if dbutils returns None then return v instead\nreturn {\nk: (ctx.get_parameter(k, str(v)) or v)\nfor k, v in (\ninspect.getfullargspec(self.task_func).kwonlydefaults or {}\n).items()\n}\n
"},{"location":"api/task/#brickflow.engine.task.Task.handle_notebook_path","title":"handle_notebook_path(entrypoint: str) -> str
staticmethod
","text":"Source code in brickflow/engine/task.py
@staticmethod\ndef handle_notebook_path(entrypoint: str) -> str:\n# local will get created as workspace notebook job and not a git source job\nif ctx.env == BrickflowDefaultEnvs.LOCAL.value:\n# check and ensure suffix has .py extension\nreturn entrypoint if entrypoint.endswith(\".py\") else f\"{entrypoint}.py\"\nreturn entrypoint\n
"},{"location":"api/task/#brickflow.engine.task.Task.is_valid_task_signature","title":"is_valid_task_signature() -> None
","text":"Source code in brickflow/engine/task.py
def is_valid_task_signature(self) -> None:\n# only supports kwonlyargs with defaults\nspec: inspect.FullArgSpec = inspect.getfullargspec(self.task_func)\nsig: inspect.Signature = inspect.signature(self.task_func)\nsignature_error_msg = (\n\"Task signatures only supports kwargs with defaults. or catch all varkw **kwargs\"\n\"For example def execute(*, variable_a=None, variable_b=None, **kwargs). \"\nf\"Please fix function def {self.task_func_name}{sig}: ...\"\n)\nkwargs_default_error_msg = (\nf\"Keyword arguments must be Strings. \"\nf\"Please handle booleans and numbers via strings. \"\nf\"Please fix function def {self.task_func_name}{sig}: ...\"\n)\nvalid_case = spec.args == [] and spec.varargs is None and spec.defaults is None\nfor _, v in (spec.kwonlydefaults or {}).items():\n# in python boolean is a type of int must be captured here via short circuit\nif not (isinstance(v, str) or v is None):\nraise InvalidTaskSignatureDefinition(kwargs_default_error_msg)\nif valid_case:\nreturn\nraise InvalidTaskSignatureDefinition(signature_error_msg)\n
"},{"location":"api/task/#brickflow.engine.task.Task.should_skip","title":"should_skip() -> Tuple[bool, Optional[str]]
","text":"Source code in brickflow/engine/task.py
def should_skip(self) -> Tuple[bool, Optional[str]]:\n# return true or false and reason\nnode_skip_checks = []\nfor parent in self.parents:\nif parent != ROOT_NODE:\ntry:\ntask_to_not_skip = ctx.task_coms.get(parent, BRANCH_SKIP_EXCEPT)\nif self.name != task_to_not_skip:\n# set this task to skip hack to keep to empty to trigger failure\n# key look up will fail\nnode_skip_checks.append(True)\nelse:\nnode_skip_checks.append(False)\nexcept Exception:\n# ignore errors as it probably doesnt exist\n# TODO: log errors\nnode_skip_checks.append(False)\nif not node_skip_checks:\nreturn False, None\nif self.trigger_rule == BrickflowTriggerRule.NONE_FAILED:\n# by default a task failure automatically skips\nreturn self._get_skip_with_reason(\nall(node_skip_checks),\n\"At least one task before this were not successful\",\n)\n# default is BrickflowTriggerRule.ALL_SUCCESS\nreturn self._get_skip_with_reason(\nany(node_skip_checks), \"All tasks before this were not successful\"\n)\n
"},{"location":"api/task/#brickflow.engine.task.EmailNotifications","title":"brickflow.engine.task.EmailNotifications
dataclass
","text":""},{"location":"api/task/#brickflow.engine.task.EmailNotifications-attributes","title":"Attributes","text":""},{"location":"api/task/#brickflow.engine.task.EmailNotifications.on_failure","title":"on_failure: Optional[List[str]] = None
class-attribute
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.EmailNotifications.on_start","title":"on_start: Optional[List[str]] = None
class-attribute
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.EmailNotifications.on_success","title":"on_success: Optional[List[str]] = None
class-attribute
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.EmailNotifications-functions","title":"Functions","text":""},{"location":"api/task/#brickflow.engine.task.EmailNotifications.to_tf_dict","title":"to_tf_dict() -> Dict[str, Optional[List[str]]]
","text":"Source code in brickflow/engine/task.py
def to_tf_dict(self) -> Dict[str, Optional[List[str]]]:\nreturn {\n\"on_start\": self.on_start,\n\"on_failure\": self.on_failure,\n\"on_success\": self.on_success,\n}\n
"},{"location":"api/task/#brickflow.engine.task.JarTaskLibrary","title":"brickflow.engine.task.JarTaskLibrary
dataclass
","text":" Bases: StorageBasedTaskLibrary
Parameters:
Name Type Description Defaultjar
str
String to s3/dbfs path for jar
required"},{"location":"api/task/#brickflow.engine.task.JarTaskLibrary-attributes","title":"Attributes","text":""},{"location":"api/task/#brickflow.engine.task.JarTaskLibrary.jar","title":"jar: str
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.JarTaskLibrary-functions","title":"Functions","text":""},{"location":"api/task/#brickflow.engine.task.EggTaskLibrary","title":"brickflow.engine.task.EggTaskLibrary
dataclass
","text":" Bases: StorageBasedTaskLibrary
Parameters:
Name Type Description Defaultegg
str
String to s3/dbfs path for egg
required"},{"location":"api/task/#brickflow.engine.task.EggTaskLibrary-attributes","title":"Attributes","text":""},{"location":"api/task/#brickflow.engine.task.EggTaskLibrary.egg","title":"egg: str
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.EggTaskLibrary-functions","title":"Functions","text":""},{"location":"api/task/#brickflow.engine.task.WheelTaskLibrary","title":"brickflow.engine.task.WheelTaskLibrary
dataclass
","text":" Bases: StorageBasedTaskLibrary
Parameters:
Name Type Description Defaultwhl
str
String to s3/dbfs path for whl
required"},{"location":"api/task/#brickflow.engine.task.WheelTaskLibrary-attributes","title":"Attributes","text":""},{"location":"api/task/#brickflow.engine.task.WheelTaskLibrary.whl","title":"whl: str
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.WheelTaskLibrary-functions","title":"Functions","text":""},{"location":"api/task/#brickflow.engine.task.PypiTaskLibrary","title":"brickflow.engine.task.PypiTaskLibrary
dataclass
","text":" Bases: TaskLibrary
Parameters:
Name Type Description Defaultpackage
str
The package in pypi i.e. requests, requests==x.y.z, git+https://github.com/Nike-Inc/brickflow.git
requiredrepo
Optional[str]
The repository where the package can be found. By default pypi is used
None
"},{"location":"api/task/#brickflow.engine.task.PypiTaskLibrary-attributes","title":"Attributes","text":""},{"location":"api/task/#brickflow.engine.task.PypiTaskLibrary.dict","title":"dict: Dict[str, Union[str, Dict[str, str]]]
property
","text":""},{"location":"api/task/#brickflow.engine.task.PypiTaskLibrary.package","title":"package: str
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.PypiTaskLibrary.repo","title":"repo: Optional[str] = None
class-attribute
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.PypiTaskLibrary-functions","title":"Functions","text":""},{"location":"api/task/#brickflow.engine.task.MavenTaskLibrary","title":"brickflow.engine.task.MavenTaskLibrary
dataclass
","text":" Bases: TaskLibrary
Parameters:
Name Type Description Defaultcoordinates
str
Gradle-style Maven coordinates. For example: org.jsoup:jsoup:1.7.2.
requiredrepo
Optional[str]
Maven repo to install the Maven package from. If omitted, both Maven Central Repository and Spark Packages are searched.
None
exclusions
Optional[List[str]]
List of dependences to exclude. For example: [\"slf4j:slf4j\", \"*:hadoop-client\"]. Maven dependency exclusions: https://maven.apache.org/guides/introduction/introduction-to-optional-and-excludes-dependencies.html.
None
"},{"location":"api/task/#brickflow.engine.task.MavenTaskLibrary-attributes","title":"Attributes","text":""},{"location":"api/task/#brickflow.engine.task.MavenTaskLibrary.coordinates","title":"coordinates: str
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.MavenTaskLibrary.dict","title":"dict: Dict[str, Union[str, Dict[str, str]]]
property
","text":""},{"location":"api/task/#brickflow.engine.task.MavenTaskLibrary.exclusions","title":"exclusions: Optional[List[str]] = None
class-attribute
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.MavenTaskLibrary.repo","title":"repo: Optional[str] = None
class-attribute
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.MavenTaskLibrary-functions","title":"Functions","text":""},{"location":"api/task/#brickflow.engine.task.CranTaskLibrary","title":"brickflow.engine.task.CranTaskLibrary
dataclass
","text":" Bases: TaskLibrary
Parameters:
Name Type Description Defaultpackage
str
The name of the CRAN package to install.
requiredrepo
Optional[str]
The repository where the package can be found. If not specified, the default CRAN repo is used.
None
"},{"location":"api/task/#brickflow.engine.task.CranTaskLibrary-attributes","title":"Attributes","text":""},{"location":"api/task/#brickflow.engine.task.CranTaskLibrary.dict","title":"dict: Dict[str, Union[str, Dict[str, str]]]
property
","text":""},{"location":"api/task/#brickflow.engine.task.CranTaskLibrary.package","title":"package: str
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.CranTaskLibrary.repo","title":"repo: Optional[str] = None
class-attribute
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.CranTaskLibrary-functions","title":"Functions","text":""},{"location":"api/task/#brickflow.engine.task.BrickflowTriggerRule","title":"brickflow.engine.task.BrickflowTriggerRule
","text":" Bases: Enum
ALL_SUCCESS = 'all_success'
class-attribute
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.BrickflowTriggerRule.NONE_FAILED","title":"NONE_FAILED = 'none_failed'
class-attribute
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.BrickflowTaskEnvVars","title":"brickflow.engine.task.BrickflowTaskEnvVars
","text":" Bases: Enum
BRICKFLOW_SELECT_TASKS = 'BRICKFLOW_SELECT_TASKS'
class-attribute
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.TaskSettings","title":"brickflow.engine.task.TaskSettings
dataclass
","text":""},{"location":"api/task/#brickflow.engine.task.TaskSettings-attributes","title":"Attributes","text":""},{"location":"api/task/#brickflow.engine.task.TaskSettings.email_notifications","title":"email_notifications: Optional[EmailNotifications] = None
class-attribute
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.TaskSettings.max_retries","title":"max_retries: Optional[int] = None
class-attribute
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.TaskSettings.min_retry_interval_millis","title":"min_retry_interval_millis: Optional[int] = None
class-attribute
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.TaskSettings.notification_settings","title":"notification_settings: Optional[TaskNotificationSettings] = None
class-attribute
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.TaskSettings.retry_on_timeout","title":"retry_on_timeout: Optional[bool] = None
class-attribute
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.TaskSettings.timeout_seconds","title":"timeout_seconds: Optional[int] = None
class-attribute
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.TaskSettings-functions","title":"Functions","text":""},{"location":"api/task/#brickflow.engine.task.TaskSettings.merge","title":"merge(other: Optional['TaskSettings']) -> 'TaskSettings'
","text":"Source code in brickflow/engine/task.py
def merge(self, other: Optional[\"TaskSettings\"]) -> \"TaskSettings\":\n# overrides top level values\nif other is None:\nreturn self\nreturn TaskSettings(\nother.email_notifications or self.email_notifications,\nother.notification_settings or self.notification_settings,\nother.timeout_seconds or self.timeout_seconds or 0,\nother.max_retries or self.max_retries,\nother.min_retry_interval_millis or self.min_retry_interval_millis,\nother.retry_on_timeout or self.retry_on_timeout,\n)\n
"},{"location":"api/task/#brickflow.engine.task.TaskSettings.to_tf_dict","title":"to_tf_dict() -> Dict[str, Optional[str] | Optional[int] | Optional[bool] | Optional[Dict[str, Optional[List[str]]]]]
","text":"Source code in brickflow/engine/task.py
def to_tf_dict(\nself,\n) -> Dict[\nstr,\nOptional[str]\n| Optional[int]\n| Optional[bool]\n| Optional[Dict[str, Optional[List[str]]]],\n]:\nemail_not = (\nself.email_notifications.to_tf_dict()\nif self.email_notifications is not None\nelse {}\n)\nnotification_settings = (\n{}\nif self.notification_settings is None\nelse {\"notification_settings\": self.notification_settings.dict()}\n)\nreturn {\n**notification_settings,\n\"email_notifications\": email_not,\n\"timeout_seconds\": self.timeout_seconds,\n\"max_retries\": self.max_retries,\n\"min_retry_interval_millis\": self.min_retry_interval_millis,\n\"retry_on_timeout\": self.retry_on_timeout,\n}\n
"},{"location":"api/task/#brickflow.engine.task.TaskType","title":"brickflow.engine.task.TaskType
","text":" Bases: Enum
BRICKFLOW_TASK = 'brickflow_task'
class-attribute
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.TaskType.CUSTOM_PYTHON_TASK","title":"CUSTOM_PYTHON_TASK = 'custom_python_task'
class-attribute
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.TaskType.DLT","title":"DLT = 'pipeline_task'
class-attribute
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.TaskType.NOTEBOOK_TASK","title":"NOTEBOOK_TASK = 'notebook_task'
class-attribute
instance-attribute
","text":""},{"location":"api/task/#brickflow.engine.task.TaskType.SQL","title":"SQL = 'sql_task'
class-attribute
instance-attribute
","text":""},{"location":"api/workflow/","title":"Workflow","text":""},{"location":"api/workflow/#brickflow.engine.workflow-classes","title":"Classes","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow","title":"brickflow.engine.workflow.Workflow
dataclass
","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow-attributes","title":"Attributes","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.active_task","title":"active_task: Optional[str] = None
class-attribute
instance-attribute
","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.bfs_layers","title":"bfs_layers: List[str]
property
","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.clusters","title":"clusters: List[Cluster] = field(default_factory=lambda : [])
class-attribute
instance-attribute
","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.common_task_parameters","title":"common_task_parameters: Optional[Dict[str, str]] = None
class-attribute
instance-attribute
","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.default_cluster","title":"default_cluster: Optional[Cluster] = None
class-attribute
instance-attribute
","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.default_task_settings","title":"default_task_settings: TaskSettings = TaskSettings()
class-attribute
instance-attribute
","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.email_notifications","title":"email_notifications: Optional[WorkflowEmailNotifications] = None
class-attribute
instance-attribute
","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.graph","title":"graph: nx.DiGraph = field(default_factory=nx.DiGraph)
class-attribute
instance-attribute
","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.libraries","title":"libraries: List[TaskLibrary] = field(default_factory=lambda : [])
class-attribute
instance-attribute
","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.max_concurrent_runs","title":"max_concurrent_runs: int = 1
class-attribute
instance-attribute
","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.max_tasks_in_workflow","title":"max_tasks_in_workflow: int = 100
class-attribute
instance-attribute
","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.name","title":"name: str
property
","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.notification_settings","title":"notification_settings: Optional[WorkflowNotificationSettings] = None
class-attribute
instance-attribute
","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.permissions","title":"permissions: WorkflowPermissions = WorkflowPermissions()
class-attribute
instance-attribute
","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.prefix","title":"prefix: str = field(default_factory=lambda : config('BRICKFLOW_WORKFLOW_PREFIX', ''))
class-attribute
instance-attribute
","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.run_as_service_principal","title":"run_as_service_principal: Optional[str] = None
class-attribute
instance-attribute
","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.run_as_user","title":"run_as_user: Optional[str] = None
class-attribute
instance-attribute
","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.schedule_quartz_expression","title":"schedule_quartz_expression: Optional[str] = None
class-attribute
instance-attribute
","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.suffix","title":"suffix: str = field(default_factory=lambda : config('BRICKFLOW_WORKFLOW_SUFFIX', ''))
class-attribute
instance-attribute
","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.tags","title":"tags: Optional[Dict[str, str]] = None
class-attribute
instance-attribute
","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.tasks","title":"tasks: Dict[str, Task] = field(default_factory=lambda : {})
class-attribute
instance-attribute
","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.timezone","title":"timezone: str = 'UTC'
class-attribute
instance-attribute
","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.trigger","title":"trigger: Optional[Trigger] = None
class-attribute
instance-attribute
","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.unique_new_clusters","title":"unique_new_clusters: List[Cluster]
property
","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.webhook_notifications","title":"webhook_notifications: Optional[WorkflowWebhookNotifications] = None
class-attribute
instance-attribute
","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow-functions","title":"Functions","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.bfs_task_iter","title":"bfs_task_iter() -> Iterator[Task]
","text":"Source code in brickflow/engine/workflow.py
def bfs_task_iter(self) -> Iterator[Task]:\nfor layer in self.bfs_layers:\nfor task_key in layer:\nyield self.get_task(task_key)\n
"},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.check_no_active_task","title":"check_no_active_task() -> None
","text":"Source code in brickflow/engine/workflow.py
def check_no_active_task(self) -> None:\nif self.active_task is not None:\nraise AnotherActiveTaskError(\n\"You are calling another active task in another task. \"\n\"Please abstract the code more.\"\n)\n
"},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.dlt_task","title":"dlt_task(task_func: Optional[Callable] = None, name: Optional[str] = None, depends_on: Optional[Union[Callable, str, List[Union[Callable, str]]]] = None) -> Callable
","text":"Source code in brickflow/engine/workflow.py
def dlt_task(\nself,\ntask_func: Optional[Callable] = None,\nname: Optional[str] = None,\ndepends_on: Optional[Union[Callable, str, List[Union[Callable, str]]]] = None,\n) -> Callable:\nreturn self.task(task_func, name, task_type=TaskType.DLT, depends_on=depends_on)\n
"},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.get_task","title":"get_task(task_id: str) -> Task
","text":"Source code in brickflow/engine/workflow.py
@wraps_keyerror(TaskNotFoundError, \"Unable to find task: \")\ndef get_task(self, task_id: str) -> Task:\nreturn self.tasks[task_id]\n
"},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.notebook_task","title":"notebook_task(task_func: Optional[Callable] = None, name: Optional[str] = None, cluster: Optional[Cluster] = None, libraries: Optional[List[TaskLibrary]] = None, task_settings: Optional[TaskSettings] = None, depends_on: Optional[Union[Callable, str, List[Union[Callable, str]]]] = None) -> Callable
","text":"Source code in brickflow/engine/workflow.py
def notebook_task(\nself,\ntask_func: Optional[Callable] = None,\nname: Optional[str] = None,\ncluster: Optional[Cluster] = None,\nlibraries: Optional[List[TaskLibrary]] = None,\ntask_settings: Optional[TaskSettings] = None,\ndepends_on: Optional[Union[Callable, str, List[Union[Callable, str]]]] = None,\n) -> Callable:\nreturn self.task(\ntask_func,\nname,\ncluster=cluster,\nlibraries=libraries,\ntask_type=TaskType.NOTEBOOK_TASK,\ntask_settings=task_settings,\ndepends_on=depends_on,\n)\n
"},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.parents","title":"parents(node: str) -> Iterator
","text":"Source code in brickflow/engine/workflow.py
def parents(self, node: str) -> Iterator:\nreturn self.graph.predecessors(node)\n
"},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.pop_task","title":"pop_task(task_id: str) -> None
","text":"Source code in brickflow/engine/workflow.py
@wraps_keyerror(TaskNotFoundError, \"Unable to find task: \")\ndef pop_task(self, task_id: str) -> None:\n# Pop from dict and graph\nself.tasks.pop(task_id)\nself.graph.remove_node(task_id)\n
"},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.task","title":"task(task_func: Optional[Callable] = None, name: Optional[str] = None, cluster: Optional[Cluster] = None, libraries: Optional[List[TaskLibrary]] = None, task_type: TaskType = TaskType.BRICKFLOW_TASK, depends_on: Optional[Union[Callable, str, List[Union[Callable, str]]]] = None, trigger_rule: BrickflowTriggerRule = BrickflowTriggerRule.ALL_SUCCESS, custom_execute_callback: Optional[Callable] = None, task_settings: Optional[TaskSettings] = None) -> Callable
","text":"Source code in brickflow/engine/workflow.py
def task(\nself,\ntask_func: Optional[Callable] = None,\nname: Optional[str] = None,\ncluster: Optional[Cluster] = None,\nlibraries: Optional[List[TaskLibrary]] = None,\ntask_type: TaskType = TaskType.BRICKFLOW_TASK,\ndepends_on: Optional[Union[Callable, str, List[Union[Callable, str]]]] = None,\ntrigger_rule: BrickflowTriggerRule = BrickflowTriggerRule.ALL_SUCCESS,\ncustom_execute_callback: Optional[Callable] = None,\ntask_settings: Optional[TaskSettings] = None,\n) -> Callable:\nif len(self.tasks) >= self.max_tasks_in_workflow:\nraise ValueError(\n\"You have reached the maximum number of tasks allowed in a databricks workflow. \"\n\"Please split your workflow into multiple workflows or raise a feature request \"\n\"with your Databricks team.\"\n)\ndef task_wrapper(f: Callable) -> Callable:\ntask_id = name or f.__name__\nself._add_task(\nf,\ntask_id,\ncluster=cluster,\ntask_type=task_type,\nlibraries=libraries,\ndepends_on=depends_on,\ntrigger_rule=trigger_rule,\ncustom_execute_callback=custom_execute_callback,\ntask_settings=task_settings,\n)\n@functools.wraps(f)\ndef func(*args, **kwargs): # type: ignore\ntry:\nself.check_no_active_task()\nself._set_active_task(task_id)\nresp = f(*args, **kwargs)\nreturn resp\nexcept Exception as e:\nself._reset_active_task()\nraise e\nfinally:\nself._reset_active_task()\nreturn func\nif task_func is not None:\nif callable(task_func):\nreturn task_wrapper(task_func)\nelse:\nraise NoCallableTaskError(\n\"Please use task decorator against a callable function.\"\n)\nreturn task_wrapper\n
"},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.task_exists","title":"task_exists(task_id: str) -> bool
","text":"Source code in brickflow/engine/workflow.py
def task_exists(self, task_id: str) -> bool:\nreturn task_id in self.tasks\n
"},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.task_iter","title":"task_iter() -> Iterator[Task]
","text":"Source code in brickflow/engine/workflow.py
def task_iter(self) -> Iterator[Task]:\nfor task in self.bfs_task_iter():\nyield task\n
"},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.unique_new_clusters_dict","title":"unique_new_clusters_dict() -> List[Dict[str, Any]]
","text":"Source code in brickflow/engine/workflow.py
def unique_new_clusters_dict(self) -> List[Dict[str, Any]]:\nself.validate_new_clusters_with_unique_names()\nall_unique_clusters = self.unique_new_clusters\nreturn [\n# job clusters do not need names\n{\n\"job_cluster_key\": c.name,\n\"new_cluster\": c.as_dict(remove_fields=[\"name\"]),\n}\nfor c in all_unique_clusters\n]\n
"},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.validate_new_clusters_with_unique_names","title":"validate_new_clusters_with_unique_names() -> None
","text":"Source code in brickflow/engine/workflow.py
def validate_new_clusters_with_unique_names(self) -> None:\nall_unique_clusters = self.unique_new_clusters\nunique_name_list: Dict[str, Optional[str]] = {}\nduplicates = []\nfor cluster in all_unique_clusters:\nif cluster.name not in unique_name_list:\nunique_name_list[cluster.name] = None\nelse:\nduplicates.append(cluster.name)\nduplicate_list = list(set(duplicates))\nif len(duplicate_list) > 0:\nraise DuplicateClustersDefinitionError(\nf\"Found duplicate cluster definitions in your workflow: {self.name}, \"\nf\"with names: {duplicate_list}\"\n)\n
"},{"location":"api/workflow/#brickflow.engine.workflow.User","title":"brickflow.engine.workflow.User
","text":" Bases: ScimEntity
to_access_control() -> Dict[str, str]
","text":"Source code in brickflow/engine/workflow.py
def to_access_control(self) -> Dict[str, str]:\nreturn {\"user_name\": self.name}\n
"},{"location":"api/workflow/#brickflow.engine.workflow.Group","title":"brickflow.engine.workflow.Group
","text":" Bases: ScimEntity
to_access_control() -> Dict[str, str]
","text":"Source code in brickflow/engine/workflow.py
def to_access_control(self) -> Dict[str, str]:\nreturn {\"group_name\": self.name}\n
"},{"location":"api/workflow/#brickflow.engine.workflow.ServicePrincipal","title":"brickflow.engine.workflow.ServicePrincipal
","text":" Bases: ScimEntity
to_access_control() -> Dict[str, str]
","text":"Source code in brickflow/engine/workflow.py
def to_access_control(self) -> Dict[str, str]:\nreturn {\"service_principal_name\": self.name}\n
"},{"location":"api/workflow_dependency_sensor/","title":"WorkflowDependencySensor","text":""},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor-attributes","title":"Attributes","text":""},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor-classes","title":"Classes","text":""},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor","title":"brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor(databricks_host: str, databricks_secrets_scope: str, databricks_secrets_key: str, dependency_job_id: int, delta: timedelta, timeout_seconds: int, poke_interval_seconds: int = 60)
","text":"This is used to have dependencies on the databricks workflow
Example Usage in your brickflow taskWorkflowDependencySensor( databricks_host=https://your_workspace_url.cloud.databricks.com, databricks_secrets_scope=\"brickflow-demo-tobedeleted\", databricks_secrets_key=\"service_principle_id\" dependency_job_id=job_id, poke_interval=20, timeout=60, delta=timedelta(days=1) )
Source code inbrickflow_plugins/databricks/workflow_dependency_sensor.py
def __init__(\nself,\ndatabricks_host: str,\ndatabricks_secrets_scope: str,\ndatabricks_secrets_key: str,\ndependency_job_id: int,\ndelta: timedelta,\ntimeout_seconds: int,\npoke_interval_seconds: int = 60,\n):\nself.databricks_host = databricks_host\nself.dependency_job_id = dependency_job_id\nself.databricks_secrets_scope = databricks_secrets_scope\nself.databricks_secrets_key = databricks_secrets_key\nself.poke_interval = poke_interval_seconds\nself.timeout = timeout_seconds\nself.delta = delta\nself.log = logging\nself.start_time = time.time()\n
"},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor-attributes","title":"Attributes","text":""},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor.databricks_host","title":"databricks_host = databricks_host
instance-attribute
","text":""},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor.databricks_secrets_key","title":"databricks_secrets_key = databricks_secrets_key
instance-attribute
","text":""},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor.databricks_secrets_scope","title":"databricks_secrets_scope = databricks_secrets_scope
instance-attribute
","text":""},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor.delta","title":"delta = delta
instance-attribute
","text":""},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor.dependency_job_id","title":"dependency_job_id = dependency_job_id
instance-attribute
","text":""},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor.log","title":"log = logging
instance-attribute
","text":""},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor.poke_interval","title":"poke_interval = poke_interval_seconds
instance-attribute
","text":""},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor.start_time","title":"start_time = time.time()
instance-attribute
","text":""},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor.timeout","title":"timeout = timeout_seconds
instance-attribute
","text":""},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor-functions","title":"Functions","text":""},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor.execute","title":"execute()
","text":"Source code in brickflow_plugins/databricks/workflow_dependency_sensor.py
def execute(self):\nsession = self.get_http_session()\nurl = f\"{self.databricks_host.rstrip('/')}/api/2.0/jobs/runs/list\"\nheaders = {\n\"Authorization\": f\"Bearer {self.get_token()}\",\n\"Content-Type\": \"application/json\",\n}\n# http://www.unixtimestampconverter.com/\nparams = {\n\"limit\": 25,\n\"job_id\": self.dependency_job_id,\n\"expand_tasks\": \"true\",\n\"start_time_from\": self.get_the_execution_date(),\n}\nwhile True:\noffset = 0\nhas_more = True\nwhile has_more is True:\nparams[\"offset\"] = offset\nresp = session.get(url, params=params, headers=headers).json()\nfor run in resp.get(\"runs\", []):\nself.log.info(\nf\"Found the run_id: {run['run_id']}, and it's result_state is: {run.get('state', {}).get('result_state', None)}\"\n)\nif run.get(\"state\", {}).get(\"result_state\", None) == \"SUCCESS\":\nself.log.info(f\"Found a successful run: {run['run_id']}\")\nreturn\noffset += params[\"limit\"]\nhas_more = resp.get(\"has_more\", False)\nself.log.info(f\"This is offset: {offset}, this is has_more: {has_more}\")\nself.log.info(\"Didn't find a successful run yet\")\nif (\nself.timeout is not None\nand (time.time() - self.start_time) > self.timeout\n):\nraise WorkflowDependencySensorTimeOutException(f\"The job has timed out\")\nself.log.info(f\"sleeping for: {self.poke_interval}\")\ntime.sleep(self.poke_interval)\n
"},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor.get_http_session","title":"get_http_session()
cached
","text":"Source code in brickflow_plugins/databricks/workflow_dependency_sensor.py
@functools.lru_cache(maxsize=None)\ndef get_http_session(self):\nsession = requests.Session()\nmax_retries = int(os.getenv(\"DATABRICKS_REQUEST_RETRY_COUNT\", 10))\nretries = self.get_retry_class(max_retries)(\ntotal=max_retries,\nbackoff_factor=1,\nstatus_forcelist=[500, 501, 502, 503, 504, 429],\n)\nsession.mount(\"https://\", HTTPAdapter(max_retries=retries))\nsession.mount(\"http://\", HTTPAdapter(max_retries=retries))\nreturn session\n
"},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor.get_retry_class","title":"get_retry_class(max_retries)
","text":"Source code in brickflow_plugins/databricks/workflow_dependency_sensor.py
def get_retry_class(self, max_retries):\nfrom urllib3 import Retry\nlog = self.log\nclass LogRetry(Retry):\n\"\"\"\n Adding extra logs before making a retry request\n \"\"\"\ndef __init__(self, *args, **kwargs):\nif (\nkwargs.get(\"total\", None) != max_retries\nand kwargs.get(\"total\", None) > 0\n):\nlog.info(f\"Retrying with kwargs: {kwargs}\")\nsuper().__init__(*args, **kwargs)\nreturn LogRetry\n
"},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor.get_the_execution_date","title":"get_the_execution_date() -> str
","text":"Source code in brickflow_plugins/databricks/workflow_dependency_sensor.py
def get_the_execution_date(self) -> str:\nsession = self.get_http_session()\nurl = f\"{self.databricks_host.rstrip('/')}/api/2.0/jobs/runs/get\"\nheaders = {\n\"Authorization\": f\"Bearer {self.get_token()}\",\n\"Content-Type\": \"application/json\",\n}\nrun_id = ctx.dbutils_widget_get_or_else(\"brickflow_parent_run_id\", None)\nif run_id is None:\nraise WorkflowDependencySensorException(\n\"run_id is empty, brickflow_parent_run_id parameter is not found \"\n\"or no value present\"\n)\nparams = {\"run_id\": run_id}\nresp = session.get(url, params=params, headers=headers).json()\n# Convert Unix timestamp to datetime object\nstart_time = datetime.fromtimestamp(resp[\"start_time\"] / 1000)\nexecution_date = start_time - self.delta\nself.log.info(start_time)\nself.log.info(execution_date)\nself.log.info(execution_date.strftime(\"%s\"))\nreturn execution_date.strftime(\"%s\")\n
"},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor.get_token","title":"get_token()
cached
","text":"Source code in brickflow_plugins/databricks/workflow_dependency_sensor.py
@functools.lru_cache\ndef get_token(self):\nreturn ctx.dbutils.secrets.get(\nself.databricks_secrets_scope, self.databricks_secrets_key\n)\n
"},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensorException","title":"brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensorException
","text":" Bases: Exception
brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensorTimeOutException
","text":" Bases: TimeoutError
This page provides documentation for our command line tools.
"},{"location":"cli/reference/#bf","title":"bf","text":"CLI for managing Databricks Workflows
Usage:
bf [OPTIONS] COMMAND [ARGS]...\n
Options:
--version Show the version and exit.\n --help Show this message and exit.\n
"},{"location":"cli/reference/#bundle","title":"bundle","text":"CLI for proxying to databricks bundles cli.
Usage:
bf bundle [OPTIONS]\n
Options:
--help Show this message and exit.\n
"},{"location":"cli/reference/#cdktf","title":"cdktf","text":"CLI for proxying to cdktf cli.
Usage:
bf cdktf [OPTIONS]\n
Options:
--help Show this message and exit.\n
"},{"location":"cli/reference/#deploy","title":"deploy","text":"CLI for deploying workflow projects.
Usage:
bf deploy [OPTIONS]\n
Options:
--auto-approve Auto approve brickflow pipeline without\n being prompted to approve.\n --deploy-mode [cdktf|bundle] Which deployment framework to use to deploy.\n [default: cdktf]\n --force-acquire-lock Force acquire lock for databricks bundles\n deploy.\n -p, --profile TEXT The databricks profile to use for\n authenticating to databricks during\n deployment.\n --git-provider TEXT The github provider for brickflow this is\n used for configuring github on DBX jobs.\n --git-ref TEXT The commit/tag/branch to use in github.\n -r, --repo-url TEXT The github url in which to run brickflow\n with.\n -e, --env TEXT Set the environment value, certain tags\n [TBD] get added to the workflows based on\n this value.\n -w, --workflow TEXT Provide the workflow file names which you\n want to deploy, each file name separated by\n space! Example: bf deploy -p DEFAULT -l -w\n wf1.py -w wf2.py\n -wd, --workflows-dir DIRECTORY Provide the workflow directory that has to\n be deployed\n -l, --local-mode Set the environment flag to local and other\n components [TBD] are disabled in local mode.\n --help Show this message and exit.\n
"},{"location":"cli/reference/#destroy","title":"destroy","text":"CLI for destroying workflow projects.
Usage:
bf destroy [OPTIONS]\n
Options:
--auto-approve Auto approve brickflow pipeline without\n being prompted to approve.\n --deploy-mode [cdktf|bundle] Which deployment framework to use to deploy.\n [default: cdktf]\n --force-acquire-lock Force acquire lock for databricks bundles\n destroy.\n -p, --profile TEXT The databricks profile to use for\n authenticating to databricks during\n deployment.\n --git-provider TEXT The github provider for brickflow this is\n used for configuring github on DBX jobs.\n --git-ref TEXT The commit/tag/branch to use in github.\n -r, --repo-url TEXT The github url in which to run brickflow\n with.\n -e, --env TEXT Set the environment value, certain tags\n [TBD] get added to the workflows based on\n this value.\n -w, --workflow TEXT Provide the workflow file names which you\n want to deploy, each file name separated by\n space! Example: bf deploy -p DEFAULT -l -w\n wf1.py -w wf2.py\n -wd, --workflows-dir DIRECTORY Provide the workflow directory that has to\n be deployed\n -l, --local-mode Set the environment flag to local and other\n components [TBD] are disabled in local mode.\n --help Show this message and exit.\n
"},{"location":"cli/reference/#diff","title":"diff","text":"CLI for identifying diff in projects (only cdktf supported).
Usage:
bf diff [OPTIONS]\n
Options:
-p, --profile TEXT The databricks profile to use for\n authenticating to databricks during\n deployment.\n --git-provider TEXT The github provider for brickflow this is\n used for configuring github on DBX jobs.\n --git-ref TEXT The commit/tag/branch to use in github.\n -r, --repo-url TEXT The github url in which to run brickflow\n with.\n -e, --env TEXT Set the environment value, certain tags\n [TBD] get added to the workflows based on\n this value.\n -w, --workflow TEXT Provide the workflow file names which you\n want to deploy, each file name separated by\n space! Example: bf deploy -p DEFAULT -l -w\n wf1.py -w wf2.py\n -wd, --workflows-dir DIRECTORY Provide the workflow directory that has to\n be deployed\n -l, --local-mode Set the environment flag to local and other\n components [TBD] are disabled in local mode.\n --help Show this message and exit.\n
"},{"location":"cli/reference/#docs","title":"docs","text":"Use to open docs in your browser...
Usage:
bf docs [OPTIONS]\n
Options:
--help Show this message and exit.\n
"},{"location":"cli/reference/#init","title":"init","text":"Initialize your project with Brickflow...
Usage:
bf init [OPTIONS]\n
Options:
-n, --project-name TEXT\n -g, --git-https-url TEXT Provide the github URL for your project,\n example: https://github.com/nike-eda-\n apla/brickflow\n -wd, --workflows-dir DIRECTORY\n -bfv, --brickflow-version TEXT\n -sev, --spark-expectations-version TEXT\n --help Show this message and exit.\n
"},{"location":"cli/reference/#projects","title":"projects","text":"Manage one to many brickflow projects
Usage:
bf projects [OPTIONS] COMMAND [ARGS]...\n
Options:
--help Show this message and exit.\n
"},{"location":"cli/reference/#add","title":"add","text":"Adds a project to the brickflow-multi-project.yml file and a entrypoint.py file in workflows dir
Usage:
bf projects add [OPTIONS]\n
Options:
--name TEXT Name of the project\n --path-from-repo-root-to-project-root DIRECTORY\n Path from repo root to project root\n --path-project-root-to-workflows-dir TEXT\n Path from project root to workflows dir\n --deployment-mode [bundle] Deployment mode\n -g, --git-https-url TEXT Provide the github URL for your project,\n example: https://github.com/nike-eda-\n apla/brickflow\n -bfv, --brickflow-version TEXT\n -sev, --spark-expectations-version TEXT\n --skip-entrypoint Skip creating entrypoint.py file\n --help Show this message and exit.\n
"},{"location":"cli/reference/#deploy_1","title":"deploy","text":"Deploy projects in the brickflow-multi-project.yml file
Usage:
bf projects deploy [OPTIONS]\n
Options:
--force-acquire-lock Force acquire lock for databricks bundles destroy.\n --auto-approve Auto approve brickflow pipeline without being prompted\n to approve.\n -p, --profile TEXT The databricks profile to use for authenticating to\n databricks during deployment.\n --project [] Select the project of workflows you would like to\n deploy.\n -e, --env TEXT Set the environment value, certain tags [TBD] get\n added to the workflows based on this value.\n --help Show this message and exit.\n
"},{"location":"cli/reference/#destroy_1","title":"destroy","text":"Destroy projects in the brickflow-multi-project.yml file
Usage:
bf projects destroy [OPTIONS]\n
Options:
--force-acquire-lock Force acquire lock for databricks bundles destroy.\n --auto-approve Auto approve brickflow pipeline without being prompted\n to approve.\n -p, --profile TEXT The databricks profile to use for authenticating to\n databricks during deployment.\n --project [] Select the project of workflows you would like to\n deploy.\n -e, --env TEXT Set the environment value, certain tags [TBD] get\n added to the workflows based on this value.\n --help Show this message and exit.\n
"},{"location":"cli/reference/#list","title":"list","text":"Lists all projects in the brickflow-multi-project.yml file
Usage:
bf projects list [OPTIONS]\n
Options:
--help Show this message and exit.\n
"},{"location":"cli/reference/#remove","title":"remove","text":"Removes a project from the brickflow-multi-project.yml file
Usage:
bf projects remove [OPTIONS]\n
Options:
--name [] Name of the project\n --help Show this message and exit.\n
"},{"location":"cli/reference/#synth","title":"synth","text":"Synth the bundle.yml for project
Usage:
bf projects synth [OPTIONS]\n
Options:
-p, --profile TEXT The databricks profile to use for authenticating to\n databricks during deployment.\n --project [] Select the project of workflows you would like to\n deploy.\n -e, --env TEXT Set the environment value, certain tags [TBD] get added\n to the workflows based on this value.\n --help Show this message and exit.\n
"},{"location":"cli/reference/#sync","title":"sync","text":"Synchronize your bundle tree to databricks workspace (only supported by bundle deployment mode).
Usage:
bf sync [OPTIONS]\n
Options:
--deploy-mode [bundle] Which deployment framework to use to deploy.\n [default: bundle]\n --watch Enable filewatcher to sync files over.\n --full Run a full sync.\n --interval-duration TEXT File system polling interval (for --watch).\n --debug TEXT File system polling interval (for --watch).\n -p, --profile TEXT The databricks profile to use for\n authenticating to databricks during\n deployment.\n --git-provider TEXT The github provider for brickflow this is\n used for configuring github on DBX jobs.\n --git-ref TEXT The commit/tag/branch to use in github.\n -r, --repo-url TEXT The github url in which to run brickflow\n with.\n -e, --env TEXT Set the environment value, certain tags\n [TBD] get added to the workflows based on\n this value.\n -w, --workflow TEXT Provide the workflow file names which you\n want to deploy, each file name separated by\n space! Example: bf deploy -p DEFAULT -l -w\n wf1.py -w wf2.py\n -wd, --workflows-dir DIRECTORY Provide the workflow directory that has to\n be deployed\n -l, --local-mode Set the environment flag to local and other\n components [TBD] are disabled in local mode.\n --help Show this message and exit.\n
"},{"location":"faq/airflow-operator-rfc/","title":"Airflow operator rfc","text":""},{"location":"faq/airflow-operator-rfc/#airflow-operator-brickflow-support-rfc","title":"Airflow Operator - Brickflow Support RFC","text":"Airflow Operator Databricks Native Equivalent Will Implement Link to Issues Link to Impl Link to Docs Snowflake Operator Branch Python Operator Slack Operator Email Operator Task Dependency Sensor Canary Operator Bash Operator Short Circuit Operator S3 Sensor Compute Bash Operator Look at Bash Operator Compute Python Operator Use a task EMR Operator Use a task Spark Operator Use a task Python Operator Use a task Dummy Operator Use a task Genie Snowflake Operator Look at snowflake operator Genie Hive Operator N/A Genie S3 Dist CP Operator N/A Athena Operator Use DBSQL Nike EMR Operator Use a task Nike Spark Submit Operator Use a task Compute S3 Prefix Sensor Look at S3 sensor"},{"location":"faq/airflow-operator-rfc/#operators","title":"Operators","text":""},{"location":"faq/airflow-operator-rfc/#snowflake-operator","title":"Snowflake operator","text":""},{"location":"faq/airflow-operator-rfc/#branch-python-operator","title":"Branch python operator","text":""},{"location":"faq/airflow-operator-rfc/#slack-operator","title":"Slack operator","text":""},{"location":"faq/airflow-operator-rfc/#email-operator","title":"Email operator","text":""},{"location":"faq/airflow-operator-rfc/#task-dependency-sensor","title":"Task dependency sensor","text":""},{"location":"faq/airflow-operator-rfc/#bash-operator","title":"Bash operator","text":""},{"location":"faq/airflow-operator-rfc/#short-circuit-operator","title":"Short circuit operator","text":""},{"location":"faq/airflow-operator-rfc/#s3-prefix-sensor","title":"S3 Prefix Sensor","text":""},{"location":"faq/airflow-operator-rfc/#operators-which-will-not-be-supported","title":"Operators which will not be supported","text":""},{"location":"faq/airflow-operator-rfc/#compute-bash-operator","title":"Compute bash operator","text":""},{"location":"faq/airflow-operator-rfc/#alternative","title":"Alternative:","text":""},{"location":"faq/airflow-operator-rfc/#compute-python-operator","title":"Compute python operator","text":""},{"location":"faq/airflow-operator-rfc/#alternative_1","title":"Alternative:","text":""},{"location":"faq/airflow-operator-rfc/#emr-operator","title":"Emr operator","text":""},{"location":"faq/airflow-operator-rfc/#alternative_2","title":"Alternative:","text":""},{"location":"faq/airflow-operator-rfc/#spark-operator","title":"Spark operator","text":""},{"location":"faq/airflow-operator-rfc/#alternative_3","title":"Alternative:","text":""},{"location":"faq/airflow-operator-rfc/#python-operator","title":"Python operator","text":""},{"location":"faq/airflow-operator-rfc/#alternative_4","title":"Alternative:","text":""},{"location":"faq/airflow-operator-rfc/#dummy-operator","title":"Dummy operator","text":""},{"location":"faq/airflow-operator-rfc/#alternative_5","title":"Alternative:","text":""},{"location":"faq/airflow-operator-rfc/#canary-operator","title":"Canary operator","text":""},{"location":"faq/airflow-operator-rfc/#alternative_6","title":"Alternative:","text":""},{"location":"faq/airflow-operator-rfc/#genie-snowflake-operator","title":"Genie snowflake operator","text":""},{"location":"faq/airflow-operator-rfc/#alternative_7","title":"Alternative:","text":""},{"location":"faq/airflow-operator-rfc/#genie-hive-operator","title":"Genie hive operator","text":""},{"location":"faq/airflow-operator-rfc/#alternative_8","title":"Alternative:","text":""},{"location":"faq/airflow-operator-rfc/#genie-s3-dist-cp-operator","title":"Genie s3 dist cp operator","text":""},{"location":"faq/airflow-operator-rfc/#alternative_9","title":"Alternative:","text":""},{"location":"faq/airflow-operator-rfc/#athena-operator","title":"Athena operator","text":""},{"location":"faq/airflow-operator-rfc/#alternative_10","title":"Alternative:","text":""},{"location":"faq/airflow-operator-rfc/#nike-emr-operator","title":"Nike emr operator","text":""},{"location":"faq/airflow-operator-rfc/#alternative_11","title":"Alternative:","text":""},{"location":"faq/airflow-operator-rfc/#nike-spark-submit-operator","title":"Nike spark submit operator","text":""},{"location":"faq/airflow-operator-rfc/#alternative_12","title":"Alternative:","text":""},{"location":"faq/airflow-operator-rfc/#compute-s3-prefix-sensor","title":"Compute s3 prefix sensor","text":""},{"location":"faq/airflow-operator-rfc/#alternative_13","title":"Alternative:","text":""},{"location":"faq/faq/","title":"Faq","text":""}]}
\ No newline at end of file
diff --git a/dev/sitemap.xml b/dev/sitemap.xml
deleted file mode 100644
index 7addf1bb..00000000
--- a/dev/sitemap.xml
+++ /dev/null
@@ -1,118 +0,0 @@
-
-A task in Databricks workflows refers to a single unit of work that is executed as part of a larger data processing -pipeline. Tasks are typically designed to perform a specific set of operations on data, such as loading data from a -source, transforming the data, and storing it in a destination. In brickflow, tasks as designed in such a way that
-Assuming, that this is already read - workflow and workflow object is created
-Databricks workflow task can be created by decorating a python function with brickflow's task function
-from brickflow import Workflow
-wf = Workflow(...)
-
-@wf.task # (1)!
-def start():
- pass
-
-@wf.task(name="custom_end") # (2)!
-def end():
- pass
-
Define task dependency by using a variable "depends_on" in the task function. You can provide the dependent tasks as -direct python callables or string or list of callables/strings
-from brickflow import Workflow
-wf = Workflow(...)
-
-@wf.task
-def start():
- pass
-
-@wf.task(depends_on=start) # (1)!
-def bronze_layer():
- pass
-
-@wf.task(depends_on="bronze_layer") # (2)!
-def x_silver():
- pass
-
-@wf.task(depends_on=bronze_layer)
-def y_silver():
- pass
-
-@wf.task(depends_on=[x_silver, y_silver]) # (3)!
-def xy_gold():
- pass
-
-@wf.task(name="custom_z_gold", depends_on=[x_silver, "y_silver"]) # (4)!
-def z_gold():
- pass
-
-@wf.task(depends_on=["xy_gold", "custom_z_gold"]) # (5)!
-def end():
- pass
-
Task parameters can be defined as key value pairs in the function definition on which task is defined
-from brickflow import Workflow
-wf = Workflow(...)
-
-@wf.task
-def task_function(*, test="var", test1="var1"): # (1)!
- print(test)
- print(test1)
-
In the workflows section, we saw how the common task parameters are created at -the workflow level. Now in this section, we shall see how to use the common task parameters
-from brickflow import Workflow, ctx
-wf = Workflow(...)
-
-@wf.task
-def common_params():
- import some_pyspark_function # (1)!
-
- catalog_env = ctx.dbutils_widget_get_or_else(key="catalog", debug="local") # (2)!
- some_pyspark_function(catalog_env) # (3)!
-
There are many inbuilt task parameters that be accessed using brickflow context like above
-from brickflow import Workflow, ctx
-wf = Workflow(...)
-
-@wf.task
-def inbuilt_params():
- print(ctx.dbutils_widget_get_or_else(
- key="brickflow_env", # (1)!
- debug="local"))
- print(ctx.dbutils_widget_get_or_else(
- key="brickflow_run_id", # (2)!
- debug="788868"))
- print(ctx.dbutils_widget_get_or_else(
- key="brickflow_job_id", # (3)!
- debug="987987987987987"))
- print(ctx.dbutils_widget_get_or_else(
- key="brickflow_start_date", # (4)!
- debug="2023-05-03"))
- print(ctx.dbutils_widget_get_or_else(
- key="brickflow_start_time", # (5)!
- debug="1683102411626"))
- print(ctx.dbutils_widget_get_or_else(
- key="brickflow_task_retry_count", # (6)!
- debug="2"))
- print(ctx.dbutils_widget_get_or_else(
- key="brickflow_parent_run_id", # (7)!
- debug="788869"))
- print(ctx.dbutils_widget_get_or_else(
- key="brickflow_task_key", # (8)!
- debug="inbuilt_params"))
- print(ctx.dbutils_widget_get_or_else(
- key="brickflow_internal_workflow_name", # (9)!
- debug="Sample_Workflow"))
- print(ctx.dbutils_widget_get_or_else(
- key="brickflow_internal_task_name", # (10)!
- debug="inbuilt_params"))
- print(ctx.dbutils_widget_get_or_else(
- key="brickflow_internal_workflow_prefix", # (11)!
- debug="inbuilt_params"))
- print(ctx.dbutils_widget_get_or_else(
- key="brickflow_internal_workflow_suffix", # (12)!
- debug="inbuilt_params"))
-
There is a flexibility to use different clusters for each task or assign custom clusters
-from brickflow import Workflow, Cluster
-wf = Workflow(...)
-
-@wf.task(cluster=Cluster(...)) # (1)!
-def custom_cluster():
- pass
-
There is a flexibility to use specific libraries for a particular task
-from brickflow import Workflow
-wf = Workflow(...)
-
-@wf.task(libraries=[...]) # (1)!
-def custom_libraries():
- pass
-
There are different task types that are supported by brickflow right now. The default task type that is used by -brickflow is NOTEBOOK
-from brickflow import Workflow, TaskType, BrickflowTriggerRule, TaskResponse
-wf = Workflow(...)
-
-@wf.task
-def notebook_task():
- pass
-
-@wf.task(task_type=TaskType.DLT)
-def dlt_task():
- pass
-
-@wf.task(
- task_type=TaskType.CUSTOM_PYTHON_TASK, # (1)!
- trigger_rule=BrickflowTriggerRule.NONE_FAILED, # (2)!
- custom_execute_callback=lambda x: TaskResponse(x.name,
- push_return_value=True), # (3)!
-)
-def custom_python_task():
- pass
-
There are two types of trigger rules that can be applied on a task. It can be either ALL_SUCCESS or NONE_FAILED
-from brickflow import Workflow, BrickflowTriggerRule
-wf = Workflow(...)
-
-@wf.task(
- trigger_rule=BrickflowTriggerRule.NONE_FAILED # (1)!
-)
-def none_failed_task():
- pass
-
-@wf.task(
- trigger_rule=BrickflowTriggerRule.ALL_SUCCESS # (2)!
-)
-def all_success_task():
- pass
-
We have adopted/extended certain airflow operators that might be needed to run as a task in databricks workflows. -Typically for airflow operators we return the operator and brickflow will execute the operator based on task return -type.
-You will be able to use bash operator as below
-from brickflow import Workflow
-from brickflow_plugins import BashOperator
-wf = Workflow(...)
-
-@wf.task
-def bash_task():
- return BashOperator(task_id=bash_task.__name__,
- bash_command="ls -ltr") # (1)!
-
Even if you migrate to databricks workflows, brickflow gives you the flexibility to have a dependency on the airflow job
-from brickflow import Workflow, ctx
-from brickflow_plugins import TaskDependencySensor
-
-wf = Workflow(...)
-
-@wf.task
-def airflow_external_task_dependency_sensor():
- import base64
-
- data = base64.b64encode(
- ctx.dbutils.secrets.get("brickflow-demo-tobedeleted", "okta_conn_id").encode(
- "utf-8"
- )
- ).decode("utf-8")
- return TaskDependencySensor(
- task_id="sensor",
- timeout=180,
- okta_conn_id=f"b64://{data}",
- external_dag_id="external_airlfow_dag",
- external_task_id="hello",
- allowed_states=["success"],
- execution_delta=None,
- execution_delta_json=None,
- cluster_id="your_cluster_id",
- )
-
A Workflow is similar to an Airflow dag that lets you encapsulate a set of tasks.
-Here is an example of a workflow. -Click the plus buttons to understand all the parts of the workflow file.
-from datetime import timedelta
-from brickflow import Workflow, Cluster, WorkflowPermissions, User, \
- TaskSettings, EmailNotifications, PypiTaskLibrary, MavenTaskLibrary
-
-wf = Workflow( # (1)!
- "wf_test", # (2)!
- default_cluster=Cluster.from_existing_cluster("your_existing_cluster_id"), # (3)!
-
- # Optional parameters below
- schedule_quartz_expression="0 0/20 0 ? * * *", # (4)!
- timezone="UTC", # (5)!
- default_task_settings=TaskSettings( # (6)!
- email_notifications=EmailNotifications(
- on_start=["email@nike.com"],
- on_success=["email@nike.com"],
- on_failure=["email@nike.com"]
- ),
- timeout_seconds=timedelta(hours=2).seconds
- ),
- libraries=[ # (7)!
- PypiTaskLibrary(package="requests"),
- MavenTaskLibrary(coordinates="com.cronutils:cron-utils:9.2.0"),
- ],
- tags={ # (8)!
- "product_id": "brickflow_demo",
- "slack_channel": "nike-sole-brickflow-support"
- },
- max_concurrent_runs=1, # (9)!
- permissions=WorkflowPermissions( # (10)!
- can_manage_run=[User("abc@abc.com")],
- can_view=[User("abc@abc.com")],
- can_manage=[User("abc@abc.com")],
- ),
- prefix="feature-jira-xxx", # (11)!
- suffix="_qa1", # (12)!
- common_task_parameters={ # (13)!
- "catalog": "development",
- "database": "your_database"
- },
-)
-
-
-@wf.task() # (14)!
-def task_function(*, test="var"):
- return "hello world"
-
There are two ways to define the cluster for the workflow or a task
-from brickflow import Cluster
-
-default_cluster=Cluster.from_existing_cluster("your_existing_cluster_id")
-
from brickflow import Cluster
-
-default_cluster=Cluster(
- name="your_cluster_name",
- spark_version='11.3.x-scala2.12',
- node_type_id='m6g.xlarge',
- driver_node_type_id='m6g.xlarge',
- min_workers=1,
- max_workers=3,
- enable_elastic_disk=True,
- policy_id='your_policy_id',
- aws_attributes={
- "first_on_demand": 1,
- "availability": "SPOT_WITH_FALLBACK",
- "instance_profile_arn": "arn:aws:iam::XXXX:instance-profile/XXXX/group/XX",
- "spot_bid_price_percent": 100,
- "ebs_volume_type": "GENERAL_PURPOSE_SSD",
- "ebs_volume_count": 3,
- "ebs_volume_size": 100
- }
-)
-
Brickflow provides an opportunity to manage permissions on the workflows. -You can provide individual users or to a group or to a ServicePrincipal that can help manage, run or -view the workflows.
-Below example is for reference
-from brickflow import WorkflowPermissions, User, Group, ServicePrincipal
-
-permissions=WorkflowPermissions(
- can_manage_run=[
- User("abc@abc.com"),
- Group("app.xyz.team.Developer"),
- ServicePrincipal("ServicePrinciple_dbx_url.app.xyz.team.Developer")
- ],
- can_view=[User("abc@abc.com")],
- can_manage=[User("abc@abc.com")],
-)
-
Using brickflow, custom tags can be created on the workflow - but there are also some default tags -that are created while the job is deployed.
-The defaults tags that gets automatically attached to the workflow are below
-Use the below reference to define more tags and attach to the workflow. These can be used for collecting various -metrics and build dashboards.
-tags={
- "product_id": "brickflow_demo",
- "slack_channel": "nike-sole-brickflow-support"
- }
-
Databricks workflows uses Quartz cron expression unlike airflow's unix based cron scheduler. -A typical Quartz cron expression have six or seven fields, seperated by spaces
- -Below is a sample - -Task setting at workflow level can be used to have common setting defined that will be applicable for -all the tasks. Below is a sample that can be used for reference and all the parameters in TaskSettings -are optional -
from datetime import timedelta
-from brickflow import TaskSettings, EmailNotifications
-
-default_task_settings=TaskSettings(
- email_notifications=EmailNotifications(
- on_start=["email@nike.com"],
- on_success=["email@nike.com"],
- on_failure=["email@nike.com"]
- ),
- timeout_seconds=timedelta(hours=2).seconds,
- max_retries=2,
- min_retry_interval_millis=60000,
- retry_on_timeout=True
-)
-
Brickflow allows to specify libraries that are need to be installed and used across different tasks. -There are many ways to install library from different repositories/sources
-from brickflow import PypiTaskLibrary, MavenTaskLibrary, StorageBasedTaskLibrary, \
- JarTaskLibrary, EggTaskLibrary, WheelTaskLibrary
-
-libraries=[
- PypiTaskLibrary(package="requests"),
- MavenTaskLibrary(coordinates="com.cronutils:cron-utils:9.2.0"),
- StorageBasedTaskLibrary("s3://..."),
- StorageBasedTaskLibrary("dbfs://..."),
- JarTaskLibrary("s3://..."),
- JarTaskLibrary("dbfs://..."),
- EggTaskLibrary("s3://..."),
- EggTaskLibrary("dbfs://..."),
- WheelTaskLibrary("s3://..."),
- WheelTaskLibrary("dbfs://..."),
-]
-
Define the common parameters that can be used in all the tasks. Example could be database name, secrets_id etc
- - - - - - - -